if_mxge.c revision 170733
18097Sjkh/****************************************************************************** 28097Sjkh 38097SjkhCopyright (c) 2006, Myricom Inc. 48097SjkhAll rights reserved. 58097Sjkh 68097SjkhRedistribution and use in source and binary forms, with or without 714670Sjkhmodification, are permitted provided that the following conditions are met: 88097Sjkh 98097Sjkh 1. Redistributions of source code must retain the above copyright notice, 108097Sjkh this list of conditions and the following disclaimer. 118097Sjkh 128097Sjkh 2. Redistributions in binary form must reproduce the above copyright 138097Sjkh notice, this list of conditions and the following disclaimer in the 148097Sjkh documentation and/or other materials provided with the distribution. 158097Sjkh 168881Srgrimes 3. Neither the name of the Myricom Inc, nor the names of its 178881Srgrimes contributors may be used to endorse or promote products derived from 188097Sjkh this software without specific prior written permission. 198097Sjkh 208097SjkhTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 218097SjkhAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 228097SjkhIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 238097SjkhARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 248097SjkhLIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 258097SjkhCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 268097SjkhSUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 278097SjkhINTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 288097SjkhCONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 298097SjkhARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 308097SjkhPOSSIBILITY OF SUCH DAMAGE. 318097Sjkh 328097Sjkh***************************************************************************/ 338097Sjkh 348097Sjkh#include <sys/cdefs.h> 358097Sjkh__FBSDID("$FreeBSD: head/sys/dev/mxge/if_mxge.c 170733 2007-06-14 19:35:03Z gallatin $"); 368097Sjkh 378097Sjkh#include <sys/param.h> 388097Sjkh#include <sys/systm.h> 398097Sjkh#include <sys/linker.h> 408097Sjkh#include <sys/firmware.h> 418097Sjkh#include <sys/endian.h> 428097Sjkh#include <sys/sockio.h> 438097Sjkh#include <sys/mbuf.h> 448097Sjkh#include <sys/malloc.h> 458097Sjkh#include <sys/kdb.h> 468097Sjkh#include <sys/kernel.h> 478097Sjkh#include <sys/lock.h> 488097Sjkh#include <sys/module.h> 498097Sjkh#include <sys/memrange.h> 508097Sjkh#include <sys/socket.h> 518097Sjkh#include <sys/sysctl.h> 5212661Speter#include <sys/sx.h> 5312661Speter 548281Sjkh#include <net/if.h> 558405Sjkh#include <net/if_arp.h> 5612661Speter#include <net/ethernet.h> 578097Sjkh#include <net/if_dl.h> 588208Sjkh#include <net/if_media.h> 598208Sjkh 608174Sjkh#include <net/bpf.h> 618174Sjkh 628174Sjkh#include <net/if_types.h> 638174Sjkh#include <net/if_vlan_var.h> 648174Sjkh#include <net/zlib.h> 6514321Sjkh 6614321Sjkh#include <netinet/in_systm.h> 6714321Sjkh#include <netinet/in.h> 688208Sjkh#include <netinet/ip.h> 698208Sjkh#include <netinet/tcp.h> 708208Sjkh 718208Sjkh#include <machine/bus.h> 728208Sjkh#include <machine/in_cksum.h> 7312661Speter#include <machine/resource.h> 7412661Speter#include <sys/bus.h> 758549Sjkh#include <sys/rman.h> 7612661Speter 778208Sjkh#include <dev/pci/pcireg.h> 7812661Speter#include <dev/pci/pcivar.h> 7912661Speter 8012661Speter#include <vm/vm.h> /* for pmap_mapdev() */ 8112661Speter#include <vm/pmap.h> 828705Sjkh 838705Sjkh#if defined(__i386) || defined(__amd64) 848705Sjkh#include <machine/specialreg.h> 858705Sjkh#endif 868705Sjkh 878705Sjkh#include <dev/mxge/mxge_mcp.h> 888705Sjkh#include <dev/mxge/mcp_gen_header.h> 898705Sjkh#include <dev/mxge/if_mxge_var.h> 9012661Speter 918208Sjkh/* tunable params */ 9212661Speterstatic int mxge_nvidia_ecrc_enable = 1; 938549Sjkhstatic int mxge_force_firmware = 0; 948549Sjkhstatic int mxge_intr_coal_delay = 30; 9512661Speterstatic int mxge_deassert_wait = 1; 9612661Speterstatic int mxge_flow_control = 1; 978709Sjkhstatic int mxge_verbose = 0; 988549Sjkhstatic int mxge_lro_cnt = 8; 998549Sjkhstatic int mxge_ticks; 10012661Speterstatic char *mxge_fw_unaligned = "mxge_ethp_z8e"; 10112661Speterstatic char *mxge_fw_aligned = "mxge_eth_z8e"; 10212661Speter 10312661Speterstatic int mxge_probe(device_t dev); 10412661Speterstatic int mxge_attach(device_t dev); 10512661Speterstatic int mxge_detach(device_t dev); 10612661Speterstatic int mxge_shutdown(device_t dev); 10712661Speterstatic void mxge_intr(void *arg); 10812661Speter 10912661Speterstatic device_method_t mxge_methods[] = 11012661Speter{ 11112661Speter /* Device interface */ 11212661Speter DEVMETHOD(device_probe, mxge_probe), 11312661Speter DEVMETHOD(device_attach, mxge_attach), 11412661Speter DEVMETHOD(device_detach, mxge_detach), 11512661Speter DEVMETHOD(device_shutdown, mxge_shutdown), 11612661Speter {0, 0} 11712661Speter}; 11812661Speter 11912661Speterstatic driver_t mxge_driver = 12012661Speter{ 12112661Speter "mxge", 12212661Speter mxge_methods, 12312661Speter sizeof(mxge_softc_t), 12412661Speter}; 12512661Speter 12612661Speterstatic devclass_t mxge_devclass; 12712661Speter 12812661Speter/* Declare ourselves to be a child of the PCI bus.*/ 12912661SpeterDRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 13012661SpeterMODULE_DEPEND(mxge, firmware, 1, 1, 1); 13112661Speter 13212661Speterstatic int mxge_load_firmware(mxge_softc_t *sc); 13312661Speterstatic int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 13412661Speterstatic int mxge_close(mxge_softc_t *sc); 13512661Speterstatic int mxge_open(mxge_softc_t *sc); 13612661Speterstatic void mxge_tick(void *arg); 13712661Speter 13812661Speterstatic int 13912661Spetermxge_probe(device_t dev) 14012661Speter{ 14112661Speter if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 14212661Speter (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E)) { 14312661Speter device_set_desc(dev, "Myri10G-PCIE-8A"); 1449202Srgrimes return 0; 14512661Speter } 1469202Srgrimes return ENXIO; 14712661Speter} 14812661Speter 1498549Sjkhstatic void 1508208Sjkhmxge_enable_wc(mxge_softc_t *sc) 1518097Sjkh{ 1528549Sjkh struct mem_range_desc mrdesc; 1538549Sjkh vm_paddr_t pa; 1548097Sjkh vm_offset_t len; 1558097Sjkh int err, action; 1568174Sjkh 1578174Sjkh len = rman_get_size(sc->mem_res); 1588174Sjkh#if defined(__i386) || defined(__amd64) 1598174Sjkh err = pmap_change_attr((vm_offset_t) sc->sram, 1608174Sjkh len, PAT_WRITE_COMBINING); 1618405Sjkh if (err == 0) 1629202Srgrimes return; 1638174Sjkh else 1648174Sjkh device_printf(sc->dev, "pmap_change_attr failed, %d\n", 1658208Sjkh err); 1668097Sjkh#endif 1678097Sjkh pa = rman_get_start(sc->mem_res); 1688097Sjkh mrdesc.mr_base = pa; 1698174Sjkh mrdesc.mr_len = len; 1708174Sjkh mrdesc.mr_flags = MDF_WRITECOMBINE; 1718174Sjkh action = MEMRANGE_SET_UPDATE; 1728174Sjkh strcpy((char *)&mrdesc.mr_owner, "mxge"); 17312661Speter err = mem_range_attr_set(&mrdesc, &action); 1748208Sjkh if (err != 0) { 1759202Srgrimes device_printf(sc->dev, 1768097Sjkh "w/c failed for pa 0x%lx, len 0x%lx, err = %d\n", 1778097Sjkh (unsigned long)pa, (unsigned long)len, err); 1788097Sjkh } else { 1798174Sjkh sc->wc = 1; 1808174Sjkh } 1818174Sjkh} 1828174Sjkh 1838174Sjkh 1848174Sjkh/* callback to get our DMA address */ 1858097Sjkhstatic void 1868097Sjkhmxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 1878097Sjkh int error) 1888097Sjkh{ 1898097Sjkh if (error == 0) { 1908208Sjkh *(bus_addr_t *) arg = segs->ds_addr; 1918208Sjkh } 1928097Sjkh} 1938097Sjkh 19412661Speterstatic int 1958792Sjkhmxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 19612661Speter bus_size_t alignment) 19712661Speter{ 1988792Sjkh int err; 1998792Sjkh device_t dev = sc->dev; 20012661Speter 20112661Speter /* allocate DMAable memory tags */ 2028792Sjkh err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2038792Sjkh alignment, /* alignment */ 2048208Sjkh 4096, /* boundary */ 2058363Sjkh BUS_SPACE_MAXADDR, /* low */ 2068208Sjkh BUS_SPACE_MAXADDR, /* high */ 2078208Sjkh NULL, NULL, /* filter */ 2088756Sjkh bytes, /* maxsize */ 2098208Sjkh 1, /* num segs */ 2108208Sjkh 4096, /* maxsegsize */ 2118208Sjkh BUS_DMA_COHERENT, /* flags */ 2128642Sjkh NULL, NULL, /* lock */ 2138837Sjkh &dma->dmat); /* tag */ 2148837Sjkh if (err != 0) { 2158363Sjkh device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 2168208Sjkh return err; 2178097Sjkh } 2188208Sjkh 2198208Sjkh /* allocate DMAable memory & map */ 2208208Sjkh err = bus_dmamem_alloc(dma->dmat, &dma->addr, 2218556Sjkh (BUS_DMA_WAITOK | BUS_DMA_COHERENT 2228636Sjkh | BUS_DMA_ZERO), &dma->map); 2238208Sjkh if (err != 0) { 2248549Sjkh device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 2259202Srgrimes goto abort_with_dmat; 22614321Sjkh } 2279202Srgrimes 2289202Srgrimes /* load the memory */ 2298556Sjkh err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 2309202Srgrimes mxge_dmamap_callback, 2318208Sjkh (void *)&dma->bus_addr, 0); 2328208Sjkh if (err != 0) { 2338307Sjkh device_printf(dev, "couldn't load map (err = %d)\n", err); 2348307Sjkh goto abort_with_mem; 2358307Sjkh } 2368307Sjkh return 0; 2378307Sjkh 2388549Sjkhabort_with_mem: 2398549Sjkh bus_dmamem_free(dma->dmat, dma->addr, dma->map); 2408307Sjkhabort_with_dmat: 2418208Sjkh (void)bus_dma_tag_destroy(dma->dmat); 2428336Sjkh return err; 2438336Sjkh} 2448336Sjkh 2458307Sjkh 2468307Sjkhstatic void 2478307Sjkhmxge_dma_free(mxge_dma_t *dma) 2488336Sjkh{ 2498307Sjkh bus_dmamap_unload(dma->dmat, dma->map); 2508307Sjkh bus_dmamem_free(dma->dmat, dma->addr, dma->map); 25112661Speter (void)bus_dma_tag_destroy(dma->dmat); 25212661Speter} 25312661Speter 25412661Speter/* 25512661Speter * The eeprom strings on the lanaiX have the format 25612661Speter * SN=x\0 25712661Speter * MAC=x:x:x:x:x:x\0 25812661Speter * PC=text\0 25912661Speter */ 26012661Speter 26112661Speterstatic int 26212661Spetermxge_parse_strings(mxge_softc_t *sc) 26312661Speter{ 26412661Speter#define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 26512661Speter 26612661Speter char *ptr, *limit; 26712661Speter int i, found_mac; 26812661Speter 26912661Speter ptr = sc->eeprom_strings; 27012661Speter limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 27112661Speter found_mac = 0; 27212661Speter while (ptr < limit && *ptr != '\0') { 27312661Speter if (memcmp(ptr, "MAC=", 4) == 0) { 27412661Speter ptr += 1; 27512661Speter sc->mac_addr_string = ptr; 27612661Speter for (i = 0; i < 6; i++) { 27712661Speter ptr += 3; 27812661Speter if ((ptr + 2) > limit) 27912661Speter goto abort; 28012661Speter sc->mac_addr[i] = strtoul(ptr, NULL, 16); 28114670Sjkh found_mac = 1; 28212661Speter } 28312661Speter } else if (memcmp(ptr, "PC=", 3) == 0) { 28412661Speter ptr += 3; 28512661Speter strncpy(sc->product_code_string, ptr, 2868549Sjkh sizeof (sc->product_code_string) - 1); 2878307Sjkh } else if (memcmp(ptr, "SN=", 3) == 0) { 28812661Speter ptr += 3; 2898810Sjkh strncpy(sc->serial_number_string, ptr, 29012661Speter sizeof (sc->serial_number_string) - 1); 2918549Sjkh } 29212661Speter MXGE_NEXT_STRING(ptr); 29312661Speter } 29412661Speter 29512661Speter if (found_mac) 2968810Sjkh return 0; 2978810Sjkh 2988810Sjkh abort: 2998810Sjkh device_printf(sc->dev, "failed to parse eeprom_strings\n"); 3008810Sjkh 3018810Sjkh return ENXIO; 3028810Sjkh} 3038810Sjkh 3048208Sjkh#if #cpu(i386) || defined __i386 || defined i386 || defined __i386__ || #cpu(x86_64) || defined __x86_64__ 3058576Sjkhstatic void 3068881Srgrimesmxge_enable_nvidia_ecrc(mxge_softc_t *sc) 3078735Sjkh{ 3088576Sjkh uint32_t val; 3098576Sjkh unsigned long base, off; 3108576Sjkh char *va, *cfgptr; 3118576Sjkh device_t pdev, mcp55; 3128636Sjkh uint16_t vendor_id, device_id, word; 3138576Sjkh uintptr_t bus, slot, func, ivend, idev; 3149202Srgrimes uint32_t *ptr32; 3158576Sjkh 3168576Sjkh 3178576Sjkh if (!mxge_nvidia_ecrc_enable) 3188576Sjkh return; 3199202Srgrimes 3208097Sjkh pdev = device_get_parent(device_get_parent(sc->dev)); 3219202Srgrimes if (pdev == NULL) { 3228576Sjkh device_printf(sc->dev, "could not find parent?\n"); 3238660Sjkh return; 3248715Sjkh } 3258576Sjkh vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 32610882Speter device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 3278576Sjkh 3288576Sjkh if (vendor_id != 0x10de) 3298576Sjkh return; 33012661Speter 3318576Sjkh base = 0; 3328677Sjkh 3338576Sjkh if (device_id == 0x005d) { 3348576Sjkh /* ck804, base address is magic */ 3358677Sjkh base = 0xe0000000UL; 3368677Sjkh } else if (device_id >= 0x0374 && device_id <= 0x378) { 3378810Sjkh /* mcp55, base address stored in chipset */ 3388722Sjkh mcp55 = pci_find_bsf(0, 0, 0); 3398810Sjkh if (mcp55 && 3408810Sjkh 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 3418810Sjkh 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 3428810Sjkh word = pci_read_config(mcp55, 0x90, 2); 34310882Speter base = ((unsigned long)word & 0x7ffeU) << 25; 3448576Sjkh } 34512661Speter } 3469202Srgrimes if (!base) 3478576Sjkh return; 3488576Sjkh 3498576Sjkh /* XXXX 3508576Sjkh Test below is commented because it is believed that doing 3518576Sjkh config read/write beyond 0xff will access the config space 3528576Sjkh for the next larger function. Uncomment this and remove 3538576Sjkh the hacky pmap_mapdev() way of accessing config space when 35412661Speter FreeBSD grows support for extended pcie config space access 3558576Sjkh */ 3568576Sjkh#if 0 3578208Sjkh /* See if we can, by some miracle, access the extended 3588107Sjkh config space */ 35912661Speter val = pci_read_config(pdev, 0x178, 4); 36012661Speter if (val != 0xffffffff) { 36112661Speter val |= 0x40; 36212661Speter pci_write_config(pdev, 0x178, val, 4); 36312661Speter return; 36412661Speter } 3658792Sjkh#endif 36612661Speter /* Rather than using normal pci config space writes, we must 36712661Speter * map the Nvidia config space ourselves. This is because on 36812661Speter * opteron/nvidia class machine the 0xe000000 mapping is 3698792Sjkh * handled by the nvidia chipset, that means the internal PCI 3708792Sjkh * device (the on-chip northbridge), or the amd-8131 bridge 3718792Sjkh * and things behind them are not visible by this method. 37214321Sjkh */ 3738792Sjkh 3748792Sjkh BUS_READ_IVAR(device_get_parent(pdev), pdev, 3758347Sjkh PCI_IVAR_BUS, &bus); 3768347Sjkh BUS_READ_IVAR(device_get_parent(pdev), pdev, 3778347Sjkh PCI_IVAR_SLOT, &slot); 3788347Sjkh BUS_READ_IVAR(device_get_parent(pdev), pdev, 3798549Sjkh PCI_IVAR_FUNCTION, &func); 3808549Sjkh BUS_READ_IVAR(device_get_parent(pdev), pdev, 3818347Sjkh PCI_IVAR_VENDOR, &ivend); 3828705Sjkh BUS_READ_IVAR(device_get_parent(pdev), pdev, 38312661Speter PCI_IVAR_DEVICE, &idev); 3848722Sjkh 3858722Sjkh off = base 3868722Sjkh + 0x00100000UL * (unsigned long)bus 3878722Sjkh + 0x00001000UL * (unsigned long)(func 3888722Sjkh + 8 * slot); 3899202Srgrimes 3909202Srgrimes /* map it into the kernel */ 3918705Sjkh va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 3929202Srgrimes 3939202Srgrimes 3949202Srgrimes if (va == NULL) { 3958405Sjkh device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 3968405Sjkh return; 39712661Speter } 39812661Speter /* get a pointer to the config space mapped into the kernel */ 3998405Sjkh cfgptr = va + (off & PAGE_MASK); 4008351Sjkh 4018549Sjkh /* make sure that we can really access it */ 4028556Sjkh vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 4038556Sjkh device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 4048636Sjkh if (! (vendor_id == ivend && device_id == idev)) { 4058641Sjkh device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 4068641Sjkh vendor_id, device_id); 40714321Sjkh pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 4088722Sjkh return; 4098722Sjkh } 4109202Srgrimes 41114321Sjkh ptr32 = (uint32_t*)(cfgptr + 0x178); 4129202Srgrimes val = *ptr32; 4139202Srgrimes 4148097Sjkh if (val == 0xffffffff) { 4158351Sjkh device_printf(sc->dev, "extended mapping failed\n"); 4168556Sjkh pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 41710882Speter return; 4188097Sjkh } 4198278Sjkh *ptr32 = val | 0x40; 4208751Sjkh pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 42112661Speter if (mxge_verbose) 4228278Sjkh device_printf(sc->dev, 4238278Sjkh "Enabled ECRC on upstream Nvidia bridge " 4249202Srgrimes "at %d:%d:%d\n", 4258278Sjkh (int)bus, (int)slot, (int)func); 4268278Sjkh return; 4278278Sjkh} 4288278Sjkh#else 4299202Srgrimesstatic void 4308405Sjkhmxge_enable_nvidia_ecrc(mxge_softc_t *sc, device_t pdev) 4319202Srgrimes{ 43210882Speter device_printf(sc->dev, 4338278Sjkh "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 4348107Sjkh return; 4359202Srgrimes} 4369202Srgrimes#endif 4379202Srgrimes 4389202Srgrimes 4399202Srgrimesstatic int 4408097Sjkhmxge_dma_test(mxge_softc_t *sc, int test_type) 44112661Speter{ 44212661Speter mxge_cmd_t cmd; 44312661Speter bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 44412661Speter int status; 4458792Sjkh uint32_t len; 4468792Sjkh char *test = " "; 44714321Sjkh 4488792Sjkh 4498792Sjkh /* Run a small DMA test. 4508792Sjkh * The magic multipliers to the length tell the firmware 4518792Sjkh * to do DMA read, write, or read+write tests. The 4528792Sjkh * results are returned in cmd.data0. The upper 16 45314321Sjkh * bits of the return is the number of transfers completed. 4548792Sjkh * The lower 16 bits is the time in 0.5us ticks that the 4558792Sjkh * transfers took to complete. 4568792Sjkh */ 4578792Sjkh 4588792Sjkh len = sc->tx.boundary; 45914321Sjkh 4608792Sjkh cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 4618792Sjkh cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 4628351Sjkh cmd.data2 = len * 0x10000; 4638351Sjkh status = mxge_send_cmd(sc, test_type, &cmd); 4648351Sjkh if (status != 0) { 46512661Speter test = "read"; 46612661Speter goto abort; 46712661Speter } 46812661Speter sc->read_dma = ((cmd.data0>>16) * len * 2) / 46912661Speter (cmd.data0 & 0xffff); 47012661Speter cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 47112661Speter cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 47212661Speter cmd.data2 = len * 0x1; 47314670Sjkh status = mxge_send_cmd(sc, test_type, &cmd); 47412661Speter if (status != 0) { 4758351Sjkh test = "write"; 4768556Sjkh goto abort; 47710882Speter } 47812661Speter sc->write_dma = ((cmd.data0>>16) * len * 2) / 47912661Speter (cmd.data0 & 0xffff); 48012661Speter 48112661Speter cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 48212661Speter cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 48312661Speter cmd.data2 = len * 0x10001; 48412661Speter status = mxge_send_cmd(sc, test_type, &cmd); 48512661Speter if (status != 0) { 48612661Speter test = "read/write"; 4878351Sjkh goto abort; 48812661Speter } 48912661Speter sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 49012661Speter (cmd.data0 & 0xffff); 49112661Speter 49212661Speterabort: 4938351Sjkh if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 4948549Sjkh device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 4958576Sjkh test, status); 49610882Speter 4978549Sjkh return status; 49812661Speter} 49912661Speter 50012661Speter/* 5018351Sjkh * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 5028405Sjkh * when the PCI-E Completion packets are aligned on an 8-byte 5038405Sjkh * boundary. Some PCI-E chip sets always align Completion packets; on 5048405Sjkh * the ones that do not, the alignment can be enforced by enabling 5058405Sjkh * ECRC generation (if supported). 5068405Sjkh * 5078405Sjkh * When PCI-E Completion packets are not aligned, it is actually more 5088405Sjkh * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 5098601Sjkh * 5108601Sjkh * If the driver can neither enable ECRC nor verify that it has 5118629Sjkh * already been enabled, then it must use a firmware image which works 5128629Sjkh * around unaligned completion packets (ethp_z8e.dat), and it should 5138629Sjkh * also ensure that it never gives the device a Read-DMA which is 5148351Sjkh * larger than 2KB by setting the tx.boundary to 2KB. If ECRC is 5158351Sjkh * enabled, then the driver should use the aligned (eth_z8e.dat) 51612661Speter * firmware image, and set tx.boundary to 4KB. 5178351Sjkh */ 5188351Sjkh 5198351Sjkhstatic int 5208351Sjkhmxge_firmware_probe(mxge_softc_t *sc) 5218351Sjkh{ 52210882Speter device_t dev = sc->dev; 52310882Speter int reg, status; 5248837Sjkh uint16_t pectl; 5258837Sjkh 52612661Speter sc->tx.boundary = 4096; 52712661Speter /* 52812661Speter * Verify the max read request size was set to 4KB 52912661Speter * before trying the test with 4KB. 5308837Sjkh */ 5318837Sjkh if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 5328799Sphk pectl = pci_read_config(dev, reg + 0x8, 2); 5338556Sjkh if ((pectl & (5 << 12)) != (5 << 12)) { 5348576Sjkh device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 5358107Sjkh pectl); 5368097Sjkh sc->tx.boundary = 2048; 5378097Sjkh } 53814670Sjkh } 5398097Sjkh 54012661Speter /* 5418097Sjkh * load the optimized firmware (which assumes aligned PCIe 5428097Sjkh * completions) in order to see if it works on this host. 54312661Speter */ 54412661Speter sc->fw_name = mxge_fw_aligned; 5458097Sjkh status = mxge_load_firmware(sc); 5468208Sjkh if (status != 0) { 5478363Sjkh return status; 5488097Sjkh } 5498174Sjkh 5508174Sjkh /* 5518097Sjkh * Enable ECRC if possible 5528556Sjkh */ 5538556Sjkh mxge_enable_nvidia_ecrc(sc); 5548097Sjkh 5558107Sjkh /* 5568837Sjkh * Run a DMA test which watches for unaligned completions and 5578097Sjkh * aborts on the first one seen. 5588262Sjkh */ 5598097Sjkh 5608347Sjkh status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 5618097Sjkh if (status == 0) 5628097Sjkh return 0; /* keep the aligned firmware */ 5638208Sjkh 5648313Sjkh if (status != E2BIG) 5658705Sjkh device_printf(dev, "DMA test failed: %d\n", status); 5668208Sjkh if (status == ENOSYS) 5678262Sjkh device_printf(dev, "Falling back to ethp! " 56812661Speter "Please install up to date fw\n"); 56912661Speter return status; 5708097Sjkh} 5718792Sjkh 5728792Sjkhstatic int 5738792Sjkhmxge_select_firmware(mxge_softc_t *sc) 5748792Sjkh{ 5758837Sjkh int aligned = 0; 5768837Sjkh 57714321Sjkh 5788837Sjkh if (mxge_force_firmware != 0) { 5798837Sjkh if (mxge_force_firmware == 1) 58012661Speter aligned = 1; 58112661Speter else 58212661Speter aligned = 0; 58312661Speter if (mxge_verbose) 58412661Speter device_printf(sc->dev, 58512661Speter "Assuming %s completions (forced)\n", 58612661Speter aligned ? "aligned" : "unaligned"); 5878351Sjkh goto abort; 5888351Sjkh } 5898351Sjkh 59012661Speter /* if the PCIe link width is 4 or less, we can use the aligned 5918351Sjkh firmware and skip any checks */ 59212661Speter if (sc->link_width != 0 && sc->link_width <= 4) { 5938351Sjkh device_printf(sc->dev, 5948351Sjkh "PCIe x%d Link, expect reduced performance\n", 5958351Sjkh sc->link_width); 59612661Speter aligned = 1; 5978351Sjkh goto abort; 59812661Speter } 5998351Sjkh 60012661Speter if (0 == mxge_firmware_probe(sc)) 60112661Speter return 0; 6028107Sjkh 6038792Sjkhabort: 60412661Speter if (aligned) { 6058792Sjkh sc->fw_name = mxge_fw_aligned; 60614321Sjkh sc->tx.boundary = 4096; 6078792Sjkh } else { 6088792Sjkh sc->fw_name = mxge_fw_unaligned; 6098556Sjkh sc->tx.boundary = 2048; 6108768Sjkh } 6119202Srgrimes return (mxge_load_firmware(sc)); 61212661Speter} 6139202Srgrimes 6148556Sjkhunion qualhack 6158351Sjkh{ 6168351Sjkh const char *ro_char; 6178208Sjkh char *rw_char; 6188792Sjkh}; 6198837Sjkh 6208792Sjkhstatic int 62114321Sjkhmxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 6228792Sjkh{ 62312661Speter 6248208Sjkh 6258208Sjkh if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 62612661Speter device_printf(sc->dev, "Bad firmware type: 0x%x\n", 62712661Speter be32toh(hdr->mcp_type)); 62812661Speter return EIO; 6298208Sjkh } 6308281Sjkh 6318549Sjkh /* save firmware version for sysctl */ 6328281Sjkh strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 6338097Sjkh if (mxge_verbose) 6348097Sjkh device_printf(sc->dev, "firmware id: %s\n", hdr->version); 635 636 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 637 &sc->fw_ver_minor, &sc->fw_ver_tiny); 638 639 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 640 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 641 device_printf(sc->dev, "Found firmware version %s\n", 642 sc->fw_version); 643 device_printf(sc->dev, "Driver needs %d.%d\n", 644 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 645 return EINVAL; 646 } 647 return 0; 648 649} 650 651static int 652mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 653{ 654 const struct firmware *fw; 655 const mcp_gen_header_t *hdr; 656 unsigned hdr_offset; 657 const char *fw_data; 658 union qualhack hack; 659 int status; 660 unsigned int i; 661 char dummy; 662 663 664 fw = firmware_get(sc->fw_name); 665 666 if (fw == NULL) { 667 device_printf(sc->dev, "Could not find firmware image %s\n", 668 sc->fw_name); 669 return ENOENT; 670 } 671 if (fw->datasize > *limit || 672 fw->datasize < MCP_HEADER_PTR_OFFSET + 4) { 673 device_printf(sc->dev, "Firmware image %s too large (%d/%d)\n", 674 sc->fw_name, (int)fw->datasize, (int) *limit); 675 status = ENOSPC; 676 goto abort_with_fw; 677 } 678 *limit = fw->datasize; 679 680 /* check id */ 681 fw_data = (const char *)fw->data; 682 hdr_offset = htobe32(*(const uint32_t *) 683 (fw_data + MCP_HEADER_PTR_OFFSET)); 684 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw->datasize) { 685 device_printf(sc->dev, "Bad firmware file"); 686 status = EIO; 687 goto abort_with_fw; 688 } 689 hdr = (const void*)(fw_data + hdr_offset); 690 691 status = mxge_validate_firmware(sc, hdr); 692 if (status != 0) 693 goto abort_with_fw; 694 695 hack.ro_char = fw_data; 696 /* Copy the inflated firmware to NIC SRAM. */ 697 for (i = 0; i < *limit; i += 256) { 698 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 699 hack.rw_char + i, 700 min(256U, (unsigned)(*limit - i))); 701 mb(); 702 dummy = *sc->sram; 703 mb(); 704 } 705 706 status = 0; 707abort_with_fw: 708 firmware_put(fw, FIRMWARE_UNLOAD); 709 return status; 710} 711 712/* 713 * Enable or disable periodic RDMAs from the host to make certain 714 * chipsets resend dropped PCIe messages 715 */ 716 717static void 718mxge_dummy_rdma(mxge_softc_t *sc, int enable) 719{ 720 char buf_bytes[72]; 721 volatile uint32_t *confirm; 722 volatile char *submit; 723 uint32_t *buf, dma_low, dma_high; 724 int i; 725 726 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 727 728 /* clear confirmation addr */ 729 confirm = (volatile uint32_t *)sc->cmd; 730 *confirm = 0; 731 mb(); 732 733 /* send an rdma command to the PCIe engine, and wait for the 734 response in the confirmation address. The firmware should 735 write a -1 there to indicate it is alive and well 736 */ 737 738 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 739 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 740 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 741 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 742 buf[2] = htobe32(0xffffffff); /* confirm data */ 743 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 744 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 745 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 746 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 747 buf[5] = htobe32(enable); /* enable? */ 748 749 750 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 751 752 mxge_pio_copy(submit, buf, 64); 753 mb(); 754 DELAY(1000); 755 mb(); 756 i = 0; 757 while (*confirm != 0xffffffff && i < 20) { 758 DELAY(1000); 759 i++; 760 } 761 if (*confirm != 0xffffffff) { 762 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 763 (enable ? "enable" : "disable"), confirm, 764 *confirm); 765 } 766 return; 767} 768 769static int 770mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 771{ 772 mcp_cmd_t *buf; 773 char buf_bytes[sizeof(*buf) + 8]; 774 volatile mcp_cmd_response_t *response = sc->cmd; 775 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 776 uint32_t dma_low, dma_high; 777 int err, sleep_total = 0; 778 779 /* ensure buf is aligned to 8 bytes */ 780 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 781 782 buf->data0 = htobe32(data->data0); 783 buf->data1 = htobe32(data->data1); 784 buf->data2 = htobe32(data->data2); 785 buf->cmd = htobe32(cmd); 786 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 787 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 788 789 buf->response_addr.low = htobe32(dma_low); 790 buf->response_addr.high = htobe32(dma_high); 791 mtx_lock(&sc->cmd_mtx); 792 response->result = 0xffffffff; 793 mb(); 794 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 795 796 /* wait up to 20ms */ 797 err = EAGAIN; 798 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 799 bus_dmamap_sync(sc->cmd_dma.dmat, 800 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 801 mb(); 802 switch (be32toh(response->result)) { 803 case 0: 804 data->data0 = be32toh(response->data); 805 err = 0; 806 break; 807 case 0xffffffff: 808 DELAY(1000); 809 break; 810 case MXGEFW_CMD_UNKNOWN: 811 err = ENOSYS; 812 break; 813 case MXGEFW_CMD_ERROR_UNALIGNED: 814 err = E2BIG; 815 break; 816 default: 817 device_printf(sc->dev, 818 "mxge: command %d " 819 "failed, result = %d\n", 820 cmd, be32toh(response->result)); 821 err = ENXIO; 822 break; 823 } 824 if (err != EAGAIN) 825 break; 826 } 827 if (err == EAGAIN) 828 device_printf(sc->dev, "mxge: command %d timed out" 829 "result = %d\n", 830 cmd, be32toh(response->result)); 831 mtx_unlock(&sc->cmd_mtx); 832 return err; 833} 834 835static int 836mxge_adopt_running_firmware(mxge_softc_t *sc) 837{ 838 struct mcp_gen_header *hdr; 839 const size_t bytes = sizeof (struct mcp_gen_header); 840 size_t hdr_offset; 841 int status; 842 843 /* find running firmware header */ 844 hdr_offset = htobe32(*(volatile uint32_t *) 845 (sc->sram + MCP_HEADER_PTR_OFFSET)); 846 847 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 848 device_printf(sc->dev, 849 "Running firmware has bad header offset (%d)\n", 850 (int)hdr_offset); 851 return EIO; 852 } 853 854 /* copy header of running firmware from SRAM to host memory to 855 * validate firmware */ 856 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 857 if (hdr == NULL) { 858 device_printf(sc->dev, "could not malloc firmware hdr\n"); 859 return ENOMEM; 860 } 861 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 862 rman_get_bushandle(sc->mem_res), 863 hdr_offset, (char *)hdr, bytes); 864 status = mxge_validate_firmware(sc, hdr); 865 free(hdr, M_DEVBUF); 866 867 /* 868 * check to see if adopted firmware has bug where adopting 869 * it will cause broadcasts to be filtered unless the NIC 870 * is kept in ALLMULTI mode 871 */ 872 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 873 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 874 sc->adopted_rx_filter_bug = 1; 875 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 876 "working around rx filter bug\n", 877 sc->fw_ver_major, sc->fw_ver_minor, 878 sc->fw_ver_tiny); 879 } 880 881 return status; 882} 883 884 885static int 886mxge_load_firmware(mxge_softc_t *sc) 887{ 888 volatile uint32_t *confirm; 889 volatile char *submit; 890 char buf_bytes[72]; 891 uint32_t *buf, size, dma_low, dma_high; 892 int status, i; 893 894 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 895 896 size = sc->sram_size; 897 status = mxge_load_firmware_helper(sc, &size); 898 if (status) { 899 /* Try to use the currently running firmware, if 900 it is new enough */ 901 status = mxge_adopt_running_firmware(sc); 902 if (status) { 903 device_printf(sc->dev, 904 "failed to adopt running firmware\n"); 905 return status; 906 } 907 device_printf(sc->dev, 908 "Successfully adopted running firmware\n"); 909 if (sc->tx.boundary == 4096) { 910 device_printf(sc->dev, 911 "Using firmware currently running on NIC" 912 ". For optimal\n"); 913 device_printf(sc->dev, 914 "performance consider loading optimized " 915 "firmware\n"); 916 } 917 sc->fw_name = mxge_fw_unaligned; 918 sc->tx.boundary = 2048; 919 return 0; 920 } 921 /* clear confirmation addr */ 922 confirm = (volatile uint32_t *)sc->cmd; 923 *confirm = 0; 924 mb(); 925 /* send a reload command to the bootstrap MCP, and wait for the 926 response in the confirmation address. The firmware should 927 write a -1 there to indicate it is alive and well 928 */ 929 930 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 931 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 932 933 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 934 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 935 buf[2] = htobe32(0xffffffff); /* confirm data */ 936 937 /* FIX: All newest firmware should un-protect the bottom of 938 the sram before handoff. However, the very first interfaces 939 do not. Therefore the handoff copy must skip the first 8 bytes 940 */ 941 /* where the code starts*/ 942 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 943 buf[4] = htobe32(size - 8); /* length of code */ 944 buf[5] = htobe32(8); /* where to copy to */ 945 buf[6] = htobe32(0); /* where to jump to */ 946 947 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 948 mxge_pio_copy(submit, buf, 64); 949 mb(); 950 DELAY(1000); 951 mb(); 952 i = 0; 953 while (*confirm != 0xffffffff && i < 20) { 954 DELAY(1000*10); 955 i++; 956 bus_dmamap_sync(sc->cmd_dma.dmat, 957 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 958 } 959 if (*confirm != 0xffffffff) { 960 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 961 confirm, *confirm); 962 963 return ENXIO; 964 } 965 return 0; 966} 967 968static int 969mxge_update_mac_address(mxge_softc_t *sc) 970{ 971 mxge_cmd_t cmd; 972 uint8_t *addr = sc->mac_addr; 973 int status; 974 975 976 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 977 | (addr[2] << 8) | addr[3]); 978 979 cmd.data1 = ((addr[4] << 8) | (addr[5])); 980 981 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 982 return status; 983} 984 985static int 986mxge_change_pause(mxge_softc_t *sc, int pause) 987{ 988 mxge_cmd_t cmd; 989 int status; 990 991 if (pause) 992 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 993 &cmd); 994 else 995 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 996 &cmd); 997 998 if (status) { 999 device_printf(sc->dev, "Failed to set flow control mode\n"); 1000 return ENXIO; 1001 } 1002 sc->pause = pause; 1003 return 0; 1004} 1005 1006static void 1007mxge_change_promisc(mxge_softc_t *sc, int promisc) 1008{ 1009 mxge_cmd_t cmd; 1010 int status; 1011 1012 if (promisc) 1013 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1014 &cmd); 1015 else 1016 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1017 &cmd); 1018 1019 if (status) { 1020 device_printf(sc->dev, "Failed to set promisc mode\n"); 1021 } 1022} 1023 1024static void 1025mxge_set_multicast_list(mxge_softc_t *sc) 1026{ 1027 mxge_cmd_t cmd; 1028 struct ifmultiaddr *ifma; 1029 struct ifnet *ifp = sc->ifp; 1030 int err; 1031 1032 /* This firmware is known to not support multicast */ 1033 if (!sc->fw_multicast_support) 1034 return; 1035 1036 /* Disable multicast filtering while we play with the lists*/ 1037 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1038 if (err != 0) { 1039 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1040 " error status: %d\n", err); 1041 return; 1042 } 1043 1044 if (sc->adopted_rx_filter_bug) 1045 return; 1046 1047 if (ifp->if_flags & IFF_ALLMULTI) 1048 /* request to disable multicast filtering, so quit here */ 1049 return; 1050 1051 /* Flush all the filters */ 1052 1053 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1054 if (err != 0) { 1055 device_printf(sc->dev, 1056 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1057 ", error status: %d\n", err); 1058 return; 1059 } 1060 1061 /* Walk the multicast list, and add each address */ 1062 1063 IF_ADDR_LOCK(ifp); 1064 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1065 if (ifma->ifma_addr->sa_family != AF_LINK) 1066 continue; 1067 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1068 &cmd.data0, 4); 1069 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1070 &cmd.data1, 2); 1071 cmd.data0 = htonl(cmd.data0); 1072 cmd.data1 = htonl(cmd.data1); 1073 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1074 if (err != 0) { 1075 device_printf(sc->dev, "Failed " 1076 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1077 "%d\t", err); 1078 /* abort, leaving multicast filtering off */ 1079 IF_ADDR_UNLOCK(ifp); 1080 return; 1081 } 1082 } 1083 IF_ADDR_UNLOCK(ifp); 1084 /* Enable multicast filtering */ 1085 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1086 if (err != 0) { 1087 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1088 ", error status: %d\n", err); 1089 } 1090} 1091 1092static int 1093mxge_max_mtu(mxge_softc_t *sc) 1094{ 1095 mxge_cmd_t cmd; 1096 int status; 1097 1098 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1099 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1100 1101 /* try to set nbufs to see if it we can 1102 use virtually contiguous jumbos */ 1103 cmd.data0 = 0; 1104 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1105 &cmd); 1106 if (status == 0) 1107 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1108 1109 /* otherwise, we're limited to MJUMPAGESIZE */ 1110 return MJUMPAGESIZE - MXGEFW_PAD; 1111} 1112 1113static int 1114mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1115{ 1116 1117 mxge_cmd_t cmd; 1118 size_t bytes; 1119 int status; 1120 1121 /* try to send a reset command to the card to see if it 1122 is alive */ 1123 memset(&cmd, 0, sizeof (cmd)); 1124 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1125 if (status != 0) { 1126 device_printf(sc->dev, "failed reset\n"); 1127 return ENXIO; 1128 } 1129 1130 mxge_dummy_rdma(sc, 1); 1131 1132 if (interrupts_setup) { 1133 /* Now exchange information about interrupts */ 1134 bytes = (sc->rx_done.mask + 1) * sizeof (*sc->rx_done.entry); 1135 memset(sc->rx_done.entry, 0, bytes); 1136 cmd.data0 = (uint32_t)bytes; 1137 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1138 cmd.data0 = MXGE_LOWPART_TO_U32(sc->rx_done.dma.bus_addr); 1139 cmd.data1 = MXGE_HIGHPART_TO_U32(sc->rx_done.dma.bus_addr); 1140 status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA, &cmd); 1141 } 1142 1143 status |= mxge_send_cmd(sc, 1144 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1145 1146 1147 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1148 1149 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1150 sc->irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1151 1152 1153 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1154 &cmd); 1155 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1156 if (status != 0) { 1157 device_printf(sc->dev, "failed set interrupt parameters\n"); 1158 return status; 1159 } 1160 1161 1162 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1163 1164 1165 /* run a DMA benchmark */ 1166 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1167 1168 /* reset mcp/driver shared state back to 0 */ 1169 sc->rx_done.idx = 0; 1170 sc->rx_done.cnt = 0; 1171 sc->tx.req = 0; 1172 sc->tx.done = 0; 1173 sc->tx.pkt_done = 0; 1174 sc->tx.wake = 0; 1175 sc->tx_defrag = 0; 1176 sc->tx.stall = 0; 1177 sc->rx_big.cnt = 0; 1178 sc->rx_small.cnt = 0; 1179 sc->rdma_tags_available = 15; 1180 sc->fw_stats->valid = 0; 1181 sc->fw_stats->send_done_count = 0; 1182 sc->lro_bad_csum = 0; 1183 sc->lro_queued = 0; 1184 sc->lro_flushed = 0; 1185 status = mxge_update_mac_address(sc); 1186 mxge_change_promisc(sc, 0); 1187 mxge_change_pause(sc, sc->pause); 1188 mxge_set_multicast_list(sc); 1189 return status; 1190} 1191 1192static int 1193mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1194{ 1195 mxge_softc_t *sc; 1196 unsigned int intr_coal_delay; 1197 int err; 1198 1199 sc = arg1; 1200 intr_coal_delay = sc->intr_coal_delay; 1201 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1202 if (err != 0) { 1203 return err; 1204 } 1205 if (intr_coal_delay == sc->intr_coal_delay) 1206 return 0; 1207 1208 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1209 return EINVAL; 1210 1211 mtx_lock(&sc->driver_mtx); 1212 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1213 sc->intr_coal_delay = intr_coal_delay; 1214 1215 mtx_unlock(&sc->driver_mtx); 1216 return err; 1217} 1218 1219static int 1220mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1221{ 1222 mxge_softc_t *sc; 1223 unsigned int enabled; 1224 int err; 1225 1226 sc = arg1; 1227 enabled = sc->pause; 1228 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1229 if (err != 0) { 1230 return err; 1231 } 1232 if (enabled == sc->pause) 1233 return 0; 1234 1235 mtx_lock(&sc->driver_mtx); 1236 err = mxge_change_pause(sc, enabled); 1237 mtx_unlock(&sc->driver_mtx); 1238 return err; 1239} 1240 1241static int 1242mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) 1243{ 1244 struct ifnet *ifp; 1245 int err; 1246 1247 ifp = sc->ifp; 1248 if (lro_cnt == 0) 1249 ifp->if_capenable &= ~IFCAP_LRO; 1250 else 1251 ifp->if_capenable |= IFCAP_LRO; 1252 sc->lro_cnt = lro_cnt; 1253 callout_stop(&sc->co_hdl); 1254 mxge_close(sc); 1255 err = mxge_open(sc); 1256 if (err == 0) 1257 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 1258 return err; 1259} 1260 1261static int 1262mxge_change_lro(SYSCTL_HANDLER_ARGS) 1263{ 1264 mxge_softc_t *sc; 1265 unsigned int lro_cnt; 1266 int err; 1267 1268 sc = arg1; 1269 lro_cnt = sc->lro_cnt; 1270 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); 1271 if (err != 0) 1272 return err; 1273 1274 if (lro_cnt == sc->lro_cnt) 1275 return 0; 1276 1277 if (lro_cnt > 128) 1278 return EINVAL; 1279 1280 mtx_lock(&sc->driver_mtx); 1281 err = mxge_change_lro_locked(sc, lro_cnt); 1282 mtx_unlock(&sc->driver_mtx); 1283 return err; 1284} 1285 1286static int 1287mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1288{ 1289 int err; 1290 1291 if (arg1 == NULL) 1292 return EFAULT; 1293 arg2 = be32toh(*(int *)arg1); 1294 arg1 = NULL; 1295 err = sysctl_handle_int(oidp, arg1, arg2, req); 1296 1297 return err; 1298} 1299 1300static void 1301mxge_add_sysctls(mxge_softc_t *sc) 1302{ 1303 struct sysctl_ctx_list *ctx; 1304 struct sysctl_oid_list *children; 1305 mcp_irq_data_t *fw; 1306 1307 ctx = device_get_sysctl_ctx(sc->dev); 1308 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1309 fw = sc->fw_stats; 1310 1311 /* random information */ 1312 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1313 "firmware_version", 1314 CTLFLAG_RD, &sc->fw_version, 1315 0, "firmware version"); 1316 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1317 "serial_number", 1318 CTLFLAG_RD, &sc->serial_number_string, 1319 0, "serial number"); 1320 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1321 "product_code", 1322 CTLFLAG_RD, &sc->product_code_string, 1323 0, "product_code"); 1324 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1325 "pcie_link_width", 1326 CTLFLAG_RD, &sc->link_width, 1327 0, "tx_boundary"); 1328 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1329 "tx_boundary", 1330 CTLFLAG_RD, &sc->tx.boundary, 1331 0, "tx_boundary"); 1332 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1333 "write_combine", 1334 CTLFLAG_RD, &sc->wc, 1335 0, "write combining PIO?"); 1336 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1337 "read_dma_MBs", 1338 CTLFLAG_RD, &sc->read_dma, 1339 0, "DMA Read speed in MB/s"); 1340 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1341 "write_dma_MBs", 1342 CTLFLAG_RD, &sc->write_dma, 1343 0, "DMA Write speed in MB/s"); 1344 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1345 "read_write_dma_MBs", 1346 CTLFLAG_RD, &sc->read_write_dma, 1347 0, "DMA concurrent Read/Write speed in MB/s"); 1348 1349 1350 /* performance related tunables */ 1351 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1352 "intr_coal_delay", 1353 CTLTYPE_INT|CTLFLAG_RW, sc, 1354 0, mxge_change_intr_coal, 1355 "I", "interrupt coalescing delay in usecs"); 1356 1357 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1358 "flow_control_enabled", 1359 CTLTYPE_INT|CTLFLAG_RW, sc, 1360 0, mxge_change_flow_control, 1361 "I", "interrupt coalescing delay in usecs"); 1362 1363 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1364 "deassert_wait", 1365 CTLFLAG_RW, &mxge_deassert_wait, 1366 0, "Wait for IRQ line to go low in ihandler"); 1367 1368 /* stats block from firmware is in network byte order. 1369 Need to swap it */ 1370 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1371 "link_up", 1372 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1373 0, mxge_handle_be32, 1374 "I", "link up"); 1375 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1376 "rdma_tags_available", 1377 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1378 0, mxge_handle_be32, 1379 "I", "rdma_tags_available"); 1380 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1381 "dropped_bad_crc32", 1382 CTLTYPE_INT|CTLFLAG_RD, 1383 &fw->dropped_bad_crc32, 1384 0, mxge_handle_be32, 1385 "I", "dropped_bad_crc32"); 1386 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1387 "dropped_bad_phy", 1388 CTLTYPE_INT|CTLFLAG_RD, 1389 &fw->dropped_bad_phy, 1390 0, mxge_handle_be32, 1391 "I", "dropped_bad_phy"); 1392 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1393 "dropped_link_error_or_filtered", 1394 CTLTYPE_INT|CTLFLAG_RD, 1395 &fw->dropped_link_error_or_filtered, 1396 0, mxge_handle_be32, 1397 "I", "dropped_link_error_or_filtered"); 1398 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1399 "dropped_link_overflow", 1400 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1401 0, mxge_handle_be32, 1402 "I", "dropped_link_overflow"); 1403 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1404 "dropped_multicast_filtered", 1405 CTLTYPE_INT|CTLFLAG_RD, 1406 &fw->dropped_multicast_filtered, 1407 0, mxge_handle_be32, 1408 "I", "dropped_multicast_filtered"); 1409 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1410 "dropped_no_big_buffer", 1411 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1412 0, mxge_handle_be32, 1413 "I", "dropped_no_big_buffer"); 1414 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1415 "dropped_no_small_buffer", 1416 CTLTYPE_INT|CTLFLAG_RD, 1417 &fw->dropped_no_small_buffer, 1418 0, mxge_handle_be32, 1419 "I", "dropped_no_small_buffer"); 1420 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1421 "dropped_overrun", 1422 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1423 0, mxge_handle_be32, 1424 "I", "dropped_overrun"); 1425 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1426 "dropped_pause", 1427 CTLTYPE_INT|CTLFLAG_RD, 1428 &fw->dropped_pause, 1429 0, mxge_handle_be32, 1430 "I", "dropped_pause"); 1431 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1432 "dropped_runt", 1433 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1434 0, mxge_handle_be32, 1435 "I", "dropped_runt"); 1436 1437 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1438 "dropped_unicast_filtered", 1439 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1440 0, mxge_handle_be32, 1441 "I", "dropped_unicast_filtered"); 1442 1443 /* host counters exported for debugging */ 1444 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1445 "rx_small_cnt", 1446 CTLFLAG_RD, &sc->rx_small.cnt, 1447 0, "rx_small_cnt"); 1448 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1449 "rx_big_cnt", 1450 CTLFLAG_RD, &sc->rx_big.cnt, 1451 0, "rx_small_cnt"); 1452 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1453 "tx_req", 1454 CTLFLAG_RD, &sc->tx.req, 1455 0, "tx_req"); 1456 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1457 "tx_done", 1458 CTLFLAG_RD, &sc->tx.done, 1459 0, "tx_done"); 1460 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1461 "tx_pkt_done", 1462 CTLFLAG_RD, &sc->tx.pkt_done, 1463 0, "tx_done"); 1464 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1465 "tx_stall", 1466 CTLFLAG_RD, &sc->tx.stall, 1467 0, "tx_stall"); 1468 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1469 "tx_wake", 1470 CTLFLAG_RD, &sc->tx.wake, 1471 0, "tx_wake"); 1472 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1473 "tx_defrag", 1474 CTLFLAG_RD, &sc->tx_defrag, 1475 0, "tx_defrag"); 1476 1477 /* verbose printing? */ 1478 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1479 "verbose", 1480 CTLFLAG_RW, &mxge_verbose, 1481 0, "verbose printing"); 1482 1483 /* lro */ 1484 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1485 "lro_cnt", 1486 CTLTYPE_INT|CTLFLAG_RW, sc, 1487 0, mxge_change_lro, 1488 "I", "number of lro merge queues"); 1489 1490 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1491 "lro_flushed", CTLFLAG_RD, &sc->lro_flushed, 1492 0, "number of lro merge queues flushed"); 1493 1494 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1495 "lro_queued", CTLFLAG_RD, &sc->lro_queued, 1496 0, "number of frames appended to lro merge queues"); 1497 1498} 1499 1500/* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1501 backwards one at a time and handle ring wraps */ 1502 1503static inline void 1504mxge_submit_req_backwards(mxge_tx_buf_t *tx, 1505 mcp_kreq_ether_send_t *src, int cnt) 1506{ 1507 int idx, starting_slot; 1508 starting_slot = tx->req; 1509 while (cnt > 1) { 1510 cnt--; 1511 idx = (starting_slot + cnt) & tx->mask; 1512 mxge_pio_copy(&tx->lanai[idx], 1513 &src[cnt], sizeof(*src)); 1514 mb(); 1515 } 1516} 1517 1518/* 1519 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1520 * at most 32 bytes at a time, so as to avoid involving the software 1521 * pio handler in the nic. We re-write the first segment's flags 1522 * to mark them valid only after writing the entire chain 1523 */ 1524 1525static inline void 1526mxge_submit_req(mxge_tx_buf_t *tx, mcp_kreq_ether_send_t *src, 1527 int cnt) 1528{ 1529 int idx, i; 1530 uint32_t *src_ints; 1531 volatile uint32_t *dst_ints; 1532 mcp_kreq_ether_send_t *srcp; 1533 volatile mcp_kreq_ether_send_t *dstp, *dst; 1534 uint8_t last_flags; 1535 1536 idx = tx->req & tx->mask; 1537 1538 last_flags = src->flags; 1539 src->flags = 0; 1540 mb(); 1541 dst = dstp = &tx->lanai[idx]; 1542 srcp = src; 1543 1544 if ((idx + cnt) < tx->mask) { 1545 for (i = 0; i < (cnt - 1); i += 2) { 1546 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1547 mb(); /* force write every 32 bytes */ 1548 srcp += 2; 1549 dstp += 2; 1550 } 1551 } else { 1552 /* submit all but the first request, and ensure 1553 that it is submitted below */ 1554 mxge_submit_req_backwards(tx, src, cnt); 1555 i = 0; 1556 } 1557 if (i < cnt) { 1558 /* submit the first request */ 1559 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1560 mb(); /* barrier before setting valid flag */ 1561 } 1562 1563 /* re-write the last 32-bits with the valid flags */ 1564 src->flags = last_flags; 1565 src_ints = (uint32_t *)src; 1566 src_ints+=3; 1567 dst_ints = (volatile uint32_t *)dst; 1568 dst_ints+=3; 1569 *dst_ints = *src_ints; 1570 tx->req += cnt; 1571 mb(); 1572} 1573 1574static void 1575mxge_encap_tso(mxge_softc_t *sc, struct mbuf *m, int busdma_seg_cnt, 1576 int ip_off) 1577{ 1578 mxge_tx_buf_t *tx; 1579 mcp_kreq_ether_send_t *req; 1580 bus_dma_segment_t *seg; 1581 struct ip *ip; 1582 struct tcphdr *tcp; 1583 uint32_t low, high_swapped; 1584 int len, seglen, cum_len, cum_len_next; 1585 int next_is_first, chop, cnt, rdma_count, small; 1586 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1587 uint8_t flags, flags_next; 1588 static int once; 1589 1590 mss = m->m_pkthdr.tso_segsz; 1591 1592 /* negative cum_len signifies to the 1593 * send loop that we are still in the 1594 * header portion of the TSO packet. 1595 */ 1596 1597 /* ensure we have the ethernet, IP and TCP 1598 header together in the first mbuf, copy 1599 it to a scratch buffer if not */ 1600 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1601 m_copydata(m, 0, ip_off + sizeof (*ip), 1602 sc->scratch); 1603 ip = (struct ip *)(sc->scratch + ip_off); 1604 } else { 1605 ip = (struct ip *)(mtod(m, char *) + ip_off); 1606 } 1607 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) 1608 + sizeof (*tcp))) { 1609 m_copydata(m, 0, ip_off + (ip->ip_hl << 2) 1610 + sizeof (*tcp), sc->scratch); 1611 ip = (struct ip *)(mtod(m, char *) + ip_off); 1612 } 1613 1614 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1615 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); 1616 1617 /* TSO implies checksum offload on this hardware */ 1618 cksum_offset = ip_off + (ip->ip_hl << 2); 1619 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1620 1621 1622 /* for TSO, pseudo_hdr_offset holds mss. 1623 * The firmware figures out where to put 1624 * the checksum by parsing the header. */ 1625 pseudo_hdr_offset = htobe16(mss); 1626 1627 tx = &sc->tx; 1628 req = tx->req_list; 1629 seg = tx->seg_list; 1630 cnt = 0; 1631 rdma_count = 0; 1632 /* "rdma_count" is the number of RDMAs belonging to the 1633 * current packet BEFORE the current send request. For 1634 * non-TSO packets, this is equal to "count". 1635 * For TSO packets, rdma_count needs to be reset 1636 * to 0 after a segment cut. 1637 * 1638 * The rdma_count field of the send request is 1639 * the number of RDMAs of the packet starting at 1640 * that request. For TSO send requests with one ore more cuts 1641 * in the middle, this is the number of RDMAs starting 1642 * after the last cut in the request. All previous 1643 * segments before the last cut implicitly have 1 RDMA. 1644 * 1645 * Since the number of RDMAs is not known beforehand, 1646 * it must be filled-in retroactively - after each 1647 * segmentation cut or at the end of the entire packet. 1648 */ 1649 1650 while (busdma_seg_cnt) { 1651 /* Break the busdma segment up into pieces*/ 1652 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1653 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1654 len = seg->ds_len; 1655 1656 while (len) { 1657 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1658 seglen = len; 1659 cum_len_next = cum_len + seglen; 1660 (req-rdma_count)->rdma_count = rdma_count + 1; 1661 if (__predict_true(cum_len >= 0)) { 1662 /* payload */ 1663 chop = (cum_len_next > mss); 1664 cum_len_next = cum_len_next % mss; 1665 next_is_first = (cum_len_next == 0); 1666 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1667 flags_next |= next_is_first * 1668 MXGEFW_FLAGS_FIRST; 1669 rdma_count |= -(chop | next_is_first); 1670 rdma_count += chop & !next_is_first; 1671 } else if (cum_len_next >= 0) { 1672 /* header ends */ 1673 rdma_count = -1; 1674 cum_len_next = 0; 1675 seglen = -cum_len; 1676 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1677 flags_next = MXGEFW_FLAGS_TSO_PLD | 1678 MXGEFW_FLAGS_FIRST | 1679 (small * MXGEFW_FLAGS_SMALL); 1680 } 1681 1682 req->addr_high = high_swapped; 1683 req->addr_low = htobe32(low); 1684 req->pseudo_hdr_offset = pseudo_hdr_offset; 1685 req->pad = 0; 1686 req->rdma_count = 1; 1687 req->length = htobe16(seglen); 1688 req->cksum_offset = cksum_offset; 1689 req->flags = flags | ((cum_len & 1) * 1690 MXGEFW_FLAGS_ALIGN_ODD); 1691 low += seglen; 1692 len -= seglen; 1693 cum_len = cum_len_next; 1694 flags = flags_next; 1695 req++; 1696 cnt++; 1697 rdma_count++; 1698 if (__predict_false(cksum_offset > seglen)) 1699 cksum_offset -= seglen; 1700 else 1701 cksum_offset = 0; 1702 if (__predict_false(cnt > tx->max_desc)) 1703 goto drop; 1704 } 1705 busdma_seg_cnt--; 1706 seg++; 1707 } 1708 (req-rdma_count)->rdma_count = rdma_count; 1709 1710 do { 1711 req--; 1712 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1713 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1714 1715 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1716 mxge_submit_req(tx, tx->req_list, cnt); 1717 return; 1718 1719drop: 1720 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1721 m_freem(m); 1722 sc->ifp->if_oerrors++; 1723 if (!once) { 1724 printf("tx->max_desc exceeded via TSO!\n"); 1725 printf("mss = %d, %ld, %d!\n", mss, 1726 (long)seg - (long)tx->seg_list, tx->max_desc); 1727 once = 1; 1728 } 1729 return; 1730 1731} 1732 1733/* 1734 * We reproduce the software vlan tag insertion from 1735 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1736 * vlan tag insertion. We need to advertise this in order to have the 1737 * vlan interface respect our csum offload flags. 1738 */ 1739static struct mbuf * 1740mxge_vlan_tag_insert(struct mbuf *m) 1741{ 1742 struct ether_vlan_header *evl; 1743 1744 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT); 1745 if (__predict_false(m == NULL)) 1746 return NULL; 1747 if (m->m_len < sizeof(*evl)) { 1748 m = m_pullup(m, sizeof(*evl)); 1749 if (__predict_false(m == NULL)) 1750 return NULL; 1751 } 1752 /* 1753 * Transform the Ethernet header into an Ethernet header 1754 * with 802.1Q encapsulation. 1755 */ 1756 evl = mtod(m, struct ether_vlan_header *); 1757 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 1758 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 1759 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 1760 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 1761 m->m_flags &= ~M_VLANTAG; 1762 return m; 1763} 1764 1765static void 1766mxge_encap(mxge_softc_t *sc, struct mbuf *m) 1767{ 1768 mcp_kreq_ether_send_t *req; 1769 bus_dma_segment_t *seg; 1770 struct mbuf *m_tmp; 1771 struct ifnet *ifp; 1772 mxge_tx_buf_t *tx; 1773 struct ip *ip; 1774 int cnt, cum_len, err, i, idx, odd_flag, ip_off; 1775 uint16_t pseudo_hdr_offset; 1776 uint8_t flags, cksum_offset; 1777 1778 1779 1780 ifp = sc->ifp; 1781 tx = &sc->tx; 1782 1783 ip_off = sizeof (struct ether_header); 1784 if (m->m_flags & M_VLANTAG) { 1785 m = mxge_vlan_tag_insert(m); 1786 if (__predict_false(m == NULL)) 1787 goto drop; 1788 ip_off += ETHER_VLAN_ENCAP_LEN; 1789 } 1790 1791 /* (try to) map the frame for DMA */ 1792 idx = tx->req & tx->mask; 1793 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 1794 m, tx->seg_list, &cnt, 1795 BUS_DMA_NOWAIT); 1796 if (__predict_false(err == EFBIG)) { 1797 /* Too many segments in the chain. Try 1798 to defrag */ 1799 m_tmp = m_defrag(m, M_NOWAIT); 1800 if (m_tmp == NULL) { 1801 goto drop; 1802 } 1803 sc->tx_defrag++; 1804 m = m_tmp; 1805 err = bus_dmamap_load_mbuf_sg(tx->dmat, 1806 tx->info[idx].map, 1807 m, tx->seg_list, &cnt, 1808 BUS_DMA_NOWAIT); 1809 } 1810 if (__predict_false(err != 0)) { 1811 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 1812 " packet len = %d\n", err, m->m_pkthdr.len); 1813 goto drop; 1814 } 1815 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 1816 BUS_DMASYNC_PREWRITE); 1817 tx->info[idx].m = m; 1818 1819 1820 /* TSO is different enough, we handle it in another routine */ 1821 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 1822 mxge_encap_tso(sc, m, cnt, ip_off); 1823 return; 1824 } 1825 1826 req = tx->req_list; 1827 cksum_offset = 0; 1828 pseudo_hdr_offset = 0; 1829 flags = MXGEFW_FLAGS_NO_TSO; 1830 1831 /* checksum offloading? */ 1832 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 1833 /* ensure ip header is in first mbuf, copy 1834 it to a scratch buffer if not */ 1835 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1836 m_copydata(m, 0, ip_off + sizeof (*ip), 1837 sc->scratch); 1838 ip = (struct ip *)(sc->scratch + ip_off); 1839 } else { 1840 ip = (struct ip *)(mtod(m, char *) + ip_off); 1841 } 1842 cksum_offset = ip_off + (ip->ip_hl << 2); 1843 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 1844 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 1845 req->cksum_offset = cksum_offset; 1846 flags |= MXGEFW_FLAGS_CKSUM; 1847 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 1848 } else { 1849 odd_flag = 0; 1850 } 1851 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 1852 flags |= MXGEFW_FLAGS_SMALL; 1853 1854 /* convert segments into a request list */ 1855 cum_len = 0; 1856 seg = tx->seg_list; 1857 req->flags = MXGEFW_FLAGS_FIRST; 1858 for (i = 0; i < cnt; i++) { 1859 req->addr_low = 1860 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 1861 req->addr_high = 1862 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1863 req->length = htobe16(seg->ds_len); 1864 req->cksum_offset = cksum_offset; 1865 if (cksum_offset > seg->ds_len) 1866 cksum_offset -= seg->ds_len; 1867 else 1868 cksum_offset = 0; 1869 req->pseudo_hdr_offset = pseudo_hdr_offset; 1870 req->pad = 0; /* complete solid 16-byte block */ 1871 req->rdma_count = 1; 1872 req->flags |= flags | ((cum_len & 1) * odd_flag); 1873 cum_len += seg->ds_len; 1874 seg++; 1875 req++; 1876 req->flags = 0; 1877 } 1878 req--; 1879 /* pad runts to 60 bytes */ 1880 if (cum_len < 60) { 1881 req++; 1882 req->addr_low = 1883 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 1884 req->addr_high = 1885 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 1886 req->length = htobe16(60 - cum_len); 1887 req->cksum_offset = 0; 1888 req->pseudo_hdr_offset = pseudo_hdr_offset; 1889 req->pad = 0; /* complete solid 16-byte block */ 1890 req->rdma_count = 1; 1891 req->flags |= flags | ((cum_len & 1) * odd_flag); 1892 cnt++; 1893 } 1894 1895 tx->req_list[0].rdma_count = cnt; 1896#if 0 1897 /* print what the firmware will see */ 1898 for (i = 0; i < cnt; i++) { 1899 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 1900 "cso:%d, flags:0x%x, rdma:%d\n", 1901 i, (int)ntohl(tx->req_list[i].addr_high), 1902 (int)ntohl(tx->req_list[i].addr_low), 1903 (int)ntohs(tx->req_list[i].length), 1904 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 1905 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 1906 tx->req_list[i].rdma_count); 1907 } 1908 printf("--------------\n"); 1909#endif 1910 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1911 mxge_submit_req(tx, tx->req_list, cnt); 1912 return; 1913 1914drop: 1915 m_freem(m); 1916 ifp->if_oerrors++; 1917 return; 1918} 1919 1920 1921 1922 1923static inline void 1924mxge_start_locked(mxge_softc_t *sc) 1925{ 1926 struct mbuf *m; 1927 struct ifnet *ifp; 1928 mxge_tx_buf_t *tx; 1929 1930 ifp = sc->ifp; 1931 tx = &sc->tx; 1932 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 1933 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 1934 if (m == NULL) { 1935 return; 1936 } 1937 /* let BPF see it */ 1938 BPF_MTAP(ifp, m); 1939 1940 /* give it to the nic */ 1941 mxge_encap(sc, m); 1942 } 1943 /* ran out of transmit slots */ 1944 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 1945 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 1946 tx->stall++; 1947 } 1948} 1949 1950static void 1951mxge_start(struct ifnet *ifp) 1952{ 1953 mxge_softc_t *sc = ifp->if_softc; 1954 1955 1956 mtx_lock(&sc->tx_mtx); 1957 mxge_start_locked(sc); 1958 mtx_unlock(&sc->tx_mtx); 1959} 1960 1961/* 1962 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 1963 * at most 32 bytes at a time, so as to avoid involving the software 1964 * pio handler in the nic. We re-write the first segment's low 1965 * DMA address to mark it valid only after we write the entire chunk 1966 * in a burst 1967 */ 1968static inline void 1969mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 1970 mcp_kreq_ether_recv_t *src) 1971{ 1972 uint32_t low; 1973 1974 low = src->addr_low; 1975 src->addr_low = 0xffffffff; 1976 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 1977 mb(); 1978 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 1979 mb(); 1980 src->addr_low = low; 1981 dst->addr_low = low; 1982 mb(); 1983} 1984 1985static int 1986mxge_get_buf_small(mxge_softc_t *sc, bus_dmamap_t map, int idx) 1987{ 1988 bus_dma_segment_t seg; 1989 struct mbuf *m; 1990 mxge_rx_buf_t *rx = &sc->rx_small; 1991 int cnt, err; 1992 1993 m = m_gethdr(M_DONTWAIT, MT_DATA); 1994 if (m == NULL) { 1995 rx->alloc_fail++; 1996 err = ENOBUFS; 1997 goto done; 1998 } 1999 m->m_len = MHLEN; 2000 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2001 &seg, &cnt, BUS_DMA_NOWAIT); 2002 if (err != 0) { 2003 m_free(m); 2004 goto done; 2005 } 2006 rx->info[idx].m = m; 2007 rx->shadow[idx].addr_low = 2008 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2009 rx->shadow[idx].addr_high = 2010 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2011 2012done: 2013 if ((idx & 7) == 7) 2014 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2015 return err; 2016} 2017 2018static int 2019mxge_get_buf_big(mxge_softc_t *sc, bus_dmamap_t map, int idx) 2020{ 2021 bus_dma_segment_t seg[3]; 2022 struct mbuf *m; 2023 mxge_rx_buf_t *rx = &sc->rx_big; 2024 int cnt, err, i; 2025 2026 if (rx->cl_size == MCLBYTES) 2027 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); 2028 else 2029 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2030 if (m == NULL) { 2031 rx->alloc_fail++; 2032 err = ENOBUFS; 2033 goto done; 2034 } 2035 m->m_len = rx->cl_size; 2036 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2037 seg, &cnt, BUS_DMA_NOWAIT); 2038 if (err != 0) { 2039 m_free(m); 2040 goto done; 2041 } 2042 rx->info[idx].m = m; 2043 2044 for (i = 0; i < cnt; i++) { 2045 rx->shadow[idx + i].addr_low = 2046 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2047 rx->shadow[idx + i].addr_high = 2048 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2049 } 2050 2051 2052done: 2053 for (i = 0; i < rx->nbufs; i++) { 2054 if ((idx & 7) == 7) { 2055 mxge_submit_8rx(&rx->lanai[idx - 7], 2056 &rx->shadow[idx - 7]); 2057 } 2058 idx++; 2059 } 2060 return err; 2061} 2062 2063/* 2064 * Myri10GE hardware checksums are not valid if the sender 2065 * padded the frame with non-zero padding. This is because 2066 * the firmware just does a simple 16-bit 1s complement 2067 * checksum across the entire frame, excluding the first 14 2068 * bytes. It is best to simply to check the checksum and 2069 * tell the stack about it only if the checksum is good 2070 */ 2071 2072static inline uint16_t 2073mxge_rx_csum(struct mbuf *m, int csum) 2074{ 2075 struct ether_header *eh; 2076 struct ip *ip; 2077 uint16_t c; 2078 2079 eh = mtod(m, struct ether_header *); 2080 2081 /* only deal with IPv4 TCP & UDP for now */ 2082 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2083 return 1; 2084 ip = (struct ip *)(eh + 1); 2085 if (__predict_false(ip->ip_p != IPPROTO_TCP && 2086 ip->ip_p != IPPROTO_UDP)) 2087 return 1; 2088 2089 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2090 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2091 - (ip->ip_hl << 2) + ip->ip_p)); 2092 c ^= 0xffff; 2093 return (c); 2094} 2095 2096static void 2097mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2098{ 2099 struct ether_vlan_header *evl; 2100 struct ether_header *eh; 2101 uint32_t partial; 2102 2103 evl = mtod(m, struct ether_vlan_header *); 2104 eh = mtod(m, struct ether_header *); 2105 2106 /* 2107 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2108 * after what the firmware thought was the end of the ethernet 2109 * header. 2110 */ 2111 2112 /* put checksum into host byte order */ 2113 *csum = ntohs(*csum); 2114 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2115 (*csum) += ~partial; 2116 (*csum) += ((*csum) < ~partial); 2117 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2118 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2119 2120 /* restore checksum to network byte order; 2121 later consumers expect this */ 2122 *csum = htons(*csum); 2123 2124 /* save the tag */ 2125 m->m_flags |= M_VLANTAG; 2126 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2127 2128 /* 2129 * Remove the 802.1q header by copying the Ethernet 2130 * addresses over it and adjusting the beginning of 2131 * the data in the mbuf. The encapsulated Ethernet 2132 * type field is already in place. 2133 */ 2134 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2135 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2136 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2137} 2138 2139 2140static inline void 2141mxge_rx_done_big(mxge_softc_t *sc, uint32_t len, uint32_t csum) 2142{ 2143 struct ifnet *ifp; 2144 struct mbuf *m; 2145 struct ether_header *eh; 2146 mxge_rx_buf_t *rx; 2147 bus_dmamap_t old_map; 2148 int idx; 2149 uint16_t tcpudp_csum; 2150 2151 ifp = sc->ifp; 2152 rx = &sc->rx_big; 2153 idx = rx->cnt & rx->mask; 2154 rx->cnt += rx->nbufs; 2155 /* save a pointer to the received mbuf */ 2156 m = rx->info[idx].m; 2157 /* try to replace the received mbuf */ 2158 if (mxge_get_buf_big(sc, rx->extra_map, idx)) { 2159 /* drop the frame -- the old mbuf is re-cycled */ 2160 ifp->if_ierrors++; 2161 return; 2162 } 2163 2164 /* unmap the received buffer */ 2165 old_map = rx->info[idx].map; 2166 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2167 bus_dmamap_unload(rx->dmat, old_map); 2168 2169 /* swap the bus_dmamap_t's */ 2170 rx->info[idx].map = rx->extra_map; 2171 rx->extra_map = old_map; 2172 2173 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2174 * aligned */ 2175 m->m_data += MXGEFW_PAD; 2176 2177 m->m_pkthdr.rcvif = ifp; 2178 m->m_len = m->m_pkthdr.len = len; 2179 ifp->if_ipackets++; 2180 eh = mtod(m, struct ether_header *); 2181 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2182 mxge_vlan_tag_remove(m, &csum); 2183 } 2184 /* if the checksum is valid, mark it in the mbuf header */ 2185 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2186 if (sc->lro_cnt && (0 == mxge_lro_rx(sc, m, csum))) 2187 return; 2188 /* otherwise, it was a UDP frame, or a TCP frame which 2189 we could not do LRO on. Tell the stack that the 2190 checksum is good */ 2191 m->m_pkthdr.csum_data = 0xffff; 2192 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2193 } 2194 /* pass the frame up the stack */ 2195 (*ifp->if_input)(ifp, m); 2196} 2197 2198static inline void 2199mxge_rx_done_small(mxge_softc_t *sc, uint32_t len, uint32_t csum) 2200{ 2201 struct ifnet *ifp; 2202 struct ether_header *eh; 2203 struct mbuf *m; 2204 mxge_rx_buf_t *rx; 2205 bus_dmamap_t old_map; 2206 int idx; 2207 uint16_t tcpudp_csum; 2208 2209 ifp = sc->ifp; 2210 rx = &sc->rx_small; 2211 idx = rx->cnt & rx->mask; 2212 rx->cnt++; 2213 /* save a pointer to the received mbuf */ 2214 m = rx->info[idx].m; 2215 /* try to replace the received mbuf */ 2216 if (mxge_get_buf_small(sc, rx->extra_map, idx)) { 2217 /* drop the frame -- the old mbuf is re-cycled */ 2218 ifp->if_ierrors++; 2219 return; 2220 } 2221 2222 /* unmap the received buffer */ 2223 old_map = rx->info[idx].map; 2224 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2225 bus_dmamap_unload(rx->dmat, old_map); 2226 2227 /* swap the bus_dmamap_t's */ 2228 rx->info[idx].map = rx->extra_map; 2229 rx->extra_map = old_map; 2230 2231 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2232 * aligned */ 2233 m->m_data += MXGEFW_PAD; 2234 2235 m->m_pkthdr.rcvif = ifp; 2236 m->m_len = m->m_pkthdr.len = len; 2237 ifp->if_ipackets++; 2238 eh = mtod(m, struct ether_header *); 2239 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2240 mxge_vlan_tag_remove(m, &csum); 2241 } 2242 /* if the checksum is valid, mark it in the mbuf header */ 2243 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2244 if (sc->lro_cnt && (0 == mxge_lro_rx(sc, m, csum))) 2245 return; 2246 /* otherwise, it was a UDP frame, or a TCP frame which 2247 we could not do LRO on. Tell the stack that the 2248 checksum is good */ 2249 m->m_pkthdr.csum_data = 0xffff; 2250 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2251 } 2252 2253 /* pass the frame up the stack */ 2254 (*ifp->if_input)(ifp, m); 2255} 2256 2257static inline void 2258mxge_clean_rx_done(mxge_softc_t *sc) 2259{ 2260 mxge_rx_done_t *rx_done = &sc->rx_done; 2261 struct lro_entry *lro; 2262 int limit = 0; 2263 uint16_t length; 2264 uint16_t checksum; 2265 2266 2267 while (rx_done->entry[rx_done->idx].length != 0) { 2268 length = ntohs(rx_done->entry[rx_done->idx].length); 2269 rx_done->entry[rx_done->idx].length = 0; 2270 checksum = rx_done->entry[rx_done->idx].checksum; 2271 if (length <= (MHLEN - MXGEFW_PAD)) 2272 mxge_rx_done_small(sc, length, checksum); 2273 else 2274 mxge_rx_done_big(sc, length, checksum); 2275 rx_done->cnt++; 2276 rx_done->idx = rx_done->cnt & rx_done->mask; 2277 2278 /* limit potential for livelock */ 2279 if (__predict_false(++limit > rx_done->mask / 2)) 2280 break; 2281 } 2282 while(!SLIST_EMPTY(&sc->lro_active)) { 2283 lro = SLIST_FIRST(&sc->lro_active); 2284 SLIST_REMOVE_HEAD(&sc->lro_active, next); 2285 mxge_lro_flush(sc, lro); 2286 } 2287} 2288 2289 2290static inline void 2291mxge_tx_done(mxge_softc_t *sc, uint32_t mcp_idx) 2292{ 2293 struct ifnet *ifp; 2294 mxge_tx_buf_t *tx; 2295 struct mbuf *m; 2296 bus_dmamap_t map; 2297 int idx; 2298 2299 tx = &sc->tx; 2300 ifp = sc->ifp; 2301 while (tx->pkt_done != mcp_idx) { 2302 idx = tx->done & tx->mask; 2303 tx->done++; 2304 m = tx->info[idx].m; 2305 /* mbuf and DMA map only attached to the first 2306 segment per-mbuf */ 2307 if (m != NULL) { 2308 ifp->if_opackets++; 2309 tx->info[idx].m = NULL; 2310 map = tx->info[idx].map; 2311 bus_dmamap_unload(tx->dmat, map); 2312 m_freem(m); 2313 } 2314 if (tx->info[idx].flag) { 2315 tx->info[idx].flag = 0; 2316 tx->pkt_done++; 2317 } 2318 } 2319 2320 /* If we have space, clear IFF_OACTIVE to tell the stack that 2321 its OK to send packets */ 2322 2323 if (ifp->if_drv_flags & IFF_DRV_OACTIVE && 2324 tx->req - tx->done < (tx->mask + 1)/4) { 2325 mtx_lock(&sc->tx_mtx); 2326 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 2327 sc->tx.wake++; 2328 mxge_start_locked(sc); 2329 mtx_unlock(&sc->tx_mtx); 2330 } 2331} 2332 2333static void 2334mxge_intr(void *arg) 2335{ 2336 mxge_softc_t *sc = arg; 2337 mcp_irq_data_t *stats = sc->fw_stats; 2338 mxge_tx_buf_t *tx = &sc->tx; 2339 mxge_rx_done_t *rx_done = &sc->rx_done; 2340 uint32_t send_done_count; 2341 uint8_t valid; 2342 2343 2344 /* make sure the DMA has finished */ 2345 if (!stats->valid) { 2346 return; 2347 } 2348 valid = stats->valid; 2349 2350 if (!sc->msi_enabled) { 2351 /* lower legacy IRQ */ 2352 *sc->irq_deassert = 0; 2353 if (!mxge_deassert_wait) 2354 /* don't wait for conf. that irq is low */ 2355 stats->valid = 0; 2356 } else { 2357 stats->valid = 0; 2358 } 2359 2360 /* loop while waiting for legacy irq deassertion */ 2361 do { 2362 /* check for transmit completes and receives */ 2363 send_done_count = be32toh(stats->send_done_count); 2364 while ((send_done_count != tx->pkt_done) || 2365 (rx_done->entry[rx_done->idx].length != 0)) { 2366 mxge_tx_done(sc, (int)send_done_count); 2367 mxge_clean_rx_done(sc); 2368 send_done_count = be32toh(stats->send_done_count); 2369 } 2370 } while (*((volatile uint8_t *) &stats->valid)); 2371 2372 if (__predict_false(stats->stats_updated)) { 2373 if (sc->link_state != stats->link_up) { 2374 sc->link_state = stats->link_up; 2375 if (sc->link_state) { 2376 if_link_state_change(sc->ifp, LINK_STATE_UP); 2377 if (mxge_verbose) 2378 device_printf(sc->dev, "link up\n"); 2379 } else { 2380 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 2381 if (mxge_verbose) 2382 device_printf(sc->dev, "link down\n"); 2383 } 2384 } 2385 if (sc->rdma_tags_available != 2386 be32toh(sc->fw_stats->rdma_tags_available)) { 2387 sc->rdma_tags_available = 2388 be32toh(sc->fw_stats->rdma_tags_available); 2389 device_printf(sc->dev, "RDMA timed out! %d tags " 2390 "left\n", sc->rdma_tags_available); 2391 } 2392 sc->down_cnt += stats->link_down; 2393 } 2394 2395 /* check to see if we have rx token to pass back */ 2396 if (valid & 0x1) 2397 *sc->irq_claim = be32toh(3); 2398 *(sc->irq_claim + 1) = be32toh(3); 2399} 2400 2401static void 2402mxge_init(void *arg) 2403{ 2404} 2405 2406 2407 2408static void 2409mxge_free_mbufs(mxge_softc_t *sc) 2410{ 2411 int i; 2412 2413 for (i = 0; i <= sc->rx_big.mask; i++) { 2414 if (sc->rx_big.info[i].m == NULL) 2415 continue; 2416 bus_dmamap_unload(sc->rx_big.dmat, 2417 sc->rx_big.info[i].map); 2418 m_freem(sc->rx_big.info[i].m); 2419 sc->rx_big.info[i].m = NULL; 2420 } 2421 2422 for (i = 0; i <= sc->rx_small.mask; i++) { 2423 if (sc->rx_small.info[i].m == NULL) 2424 continue; 2425 bus_dmamap_unload(sc->rx_small.dmat, 2426 sc->rx_small.info[i].map); 2427 m_freem(sc->rx_small.info[i].m); 2428 sc->rx_small.info[i].m = NULL; 2429 } 2430 2431 for (i = 0; i <= sc->tx.mask; i++) { 2432 sc->tx.info[i].flag = 0; 2433 if (sc->tx.info[i].m == NULL) 2434 continue; 2435 bus_dmamap_unload(sc->tx.dmat, 2436 sc->tx.info[i].map); 2437 m_freem(sc->tx.info[i].m); 2438 sc->tx.info[i].m = NULL; 2439 } 2440} 2441 2442static void 2443mxge_free_rings(mxge_softc_t *sc) 2444{ 2445 int i; 2446 2447 if (sc->rx_done.entry != NULL) 2448 mxge_dma_free(&sc->rx_done.dma); 2449 sc->rx_done.entry = NULL; 2450 if (sc->tx.req_bytes != NULL) 2451 free(sc->tx.req_bytes, M_DEVBUF); 2452 if (sc->tx.seg_list != NULL) 2453 free(sc->tx.seg_list, M_DEVBUF); 2454 if (sc->rx_small.shadow != NULL) 2455 free(sc->rx_small.shadow, M_DEVBUF); 2456 if (sc->rx_big.shadow != NULL) 2457 free(sc->rx_big.shadow, M_DEVBUF); 2458 if (sc->tx.info != NULL) { 2459 if (sc->tx.dmat != NULL) { 2460 for (i = 0; i <= sc->tx.mask; i++) { 2461 bus_dmamap_destroy(sc->tx.dmat, 2462 sc->tx.info[i].map); 2463 } 2464 bus_dma_tag_destroy(sc->tx.dmat); 2465 } 2466 free(sc->tx.info, M_DEVBUF); 2467 } 2468 if (sc->rx_small.info != NULL) { 2469 if (sc->rx_small.dmat != NULL) { 2470 for (i = 0; i <= sc->rx_small.mask; i++) { 2471 bus_dmamap_destroy(sc->rx_small.dmat, 2472 sc->rx_small.info[i].map); 2473 } 2474 bus_dmamap_destroy(sc->rx_small.dmat, 2475 sc->rx_small.extra_map); 2476 bus_dma_tag_destroy(sc->rx_small.dmat); 2477 } 2478 free(sc->rx_small.info, M_DEVBUF); 2479 } 2480 if (sc->rx_big.info != NULL) { 2481 if (sc->rx_big.dmat != NULL) { 2482 for (i = 0; i <= sc->rx_big.mask; i++) { 2483 bus_dmamap_destroy(sc->rx_big.dmat, 2484 sc->rx_big.info[i].map); 2485 } 2486 bus_dmamap_destroy(sc->rx_big.dmat, 2487 sc->rx_big.extra_map); 2488 bus_dma_tag_destroy(sc->rx_big.dmat); 2489 } 2490 free(sc->rx_big.info, M_DEVBUF); 2491 } 2492} 2493 2494static int 2495mxge_alloc_rings(mxge_softc_t *sc) 2496{ 2497 mxge_cmd_t cmd; 2498 int tx_ring_size, rx_ring_size; 2499 int tx_ring_entries, rx_ring_entries; 2500 int i, err; 2501 unsigned long bytes; 2502 2503 /* get ring sizes */ 2504 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 2505 tx_ring_size = cmd.data0; 2506 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 2507 if (err != 0) { 2508 device_printf(sc->dev, "Cannot determine ring sizes\n"); 2509 goto abort_with_nothing; 2510 } 2511 2512 rx_ring_size = cmd.data0; 2513 2514 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 2515 rx_ring_entries = rx_ring_size / sizeof (mcp_dma_addr_t); 2516 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 2517 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 2518 IFQ_SET_READY(&sc->ifp->if_snd); 2519 2520 sc->tx.mask = tx_ring_entries - 1; 2521 sc->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 2522 sc->rx_small.mask = sc->rx_big.mask = rx_ring_entries - 1; 2523 sc->rx_done.mask = (2 * rx_ring_entries) - 1; 2524 2525 err = ENOMEM; 2526 2527 /* allocate interrupt queues */ 2528 bytes = (sc->rx_done.mask + 1) * sizeof (*sc->rx_done.entry); 2529 err = mxge_dma_alloc(sc, &sc->rx_done.dma, bytes, 4096); 2530 if (err != 0) 2531 goto abort_with_nothing; 2532 sc->rx_done.entry = sc->rx_done.dma.addr; 2533 bzero(sc->rx_done.entry, bytes); 2534 2535 /* allocate the tx request copy block */ 2536 bytes = 8 + 2537 sizeof (*sc->tx.req_list) * (sc->tx.max_desc + 4); 2538 sc->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 2539 if (sc->tx.req_bytes == NULL) 2540 goto abort_with_alloc; 2541 /* ensure req_list entries are aligned to 8 bytes */ 2542 sc->tx.req_list = (mcp_kreq_ether_send_t *) 2543 ((unsigned long)(sc->tx.req_bytes + 7) & ~7UL); 2544 2545 /* allocate the tx busdma segment list */ 2546 bytes = sizeof (*sc->tx.seg_list) * sc->tx.max_desc; 2547 sc->tx.seg_list = (bus_dma_segment_t *) 2548 malloc(bytes, M_DEVBUF, M_WAITOK); 2549 if (sc->tx.seg_list == NULL) 2550 goto abort_with_alloc; 2551 2552 /* allocate the rx shadow rings */ 2553 bytes = rx_ring_entries * sizeof (*sc->rx_small.shadow); 2554 sc->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2555 if (sc->rx_small.shadow == NULL) 2556 goto abort_with_alloc; 2557 2558 bytes = rx_ring_entries * sizeof (*sc->rx_big.shadow); 2559 sc->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2560 if (sc->rx_big.shadow == NULL) 2561 goto abort_with_alloc; 2562 2563 /* allocate the host info rings */ 2564 bytes = tx_ring_entries * sizeof (*sc->tx.info); 2565 sc->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2566 if (sc->tx.info == NULL) 2567 goto abort_with_alloc; 2568 2569 bytes = rx_ring_entries * sizeof (*sc->rx_small.info); 2570 sc->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2571 if (sc->rx_small.info == NULL) 2572 goto abort_with_alloc; 2573 2574 bytes = rx_ring_entries * sizeof (*sc->rx_big.info); 2575 sc->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2576 if (sc->rx_big.info == NULL) 2577 goto abort_with_alloc; 2578 2579 /* allocate the busdma resources */ 2580 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2581 1, /* alignment */ 2582 sc->tx.boundary, /* boundary */ 2583 BUS_SPACE_MAXADDR, /* low */ 2584 BUS_SPACE_MAXADDR, /* high */ 2585 NULL, NULL, /* filter */ 2586 65536 + 256, /* maxsize */ 2587 sc->tx.max_desc - 2, /* num segs */ 2588 sc->tx.boundary, /* maxsegsize */ 2589 BUS_DMA_ALLOCNOW, /* flags */ 2590 NULL, NULL, /* lock */ 2591 &sc->tx.dmat); /* tag */ 2592 2593 if (err != 0) { 2594 device_printf(sc->dev, "Err %d allocating tx dmat\n", 2595 err); 2596 goto abort_with_alloc; 2597 } 2598 2599 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2600 1, /* alignment */ 2601 4096, /* boundary */ 2602 BUS_SPACE_MAXADDR, /* low */ 2603 BUS_SPACE_MAXADDR, /* high */ 2604 NULL, NULL, /* filter */ 2605 MHLEN, /* maxsize */ 2606 1, /* num segs */ 2607 MHLEN, /* maxsegsize */ 2608 BUS_DMA_ALLOCNOW, /* flags */ 2609 NULL, NULL, /* lock */ 2610 &sc->rx_small.dmat); /* tag */ 2611 if (err != 0) { 2612 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 2613 err); 2614 goto abort_with_alloc; 2615 } 2616 2617 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2618 1, /* alignment */ 2619 4096, /* boundary */ 2620 BUS_SPACE_MAXADDR, /* low */ 2621 BUS_SPACE_MAXADDR, /* high */ 2622 NULL, NULL, /* filter */ 2623 3*4096, /* maxsize */ 2624 3, /* num segs */ 2625 4096, /* maxsegsize */ 2626 BUS_DMA_ALLOCNOW, /* flags */ 2627 NULL, NULL, /* lock */ 2628 &sc->rx_big.dmat); /* tag */ 2629 if (err != 0) { 2630 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 2631 err); 2632 goto abort_with_alloc; 2633 } 2634 2635 /* now use these tags to setup dmamaps for each slot 2636 in each ring */ 2637 for (i = 0; i <= sc->tx.mask; i++) { 2638 err = bus_dmamap_create(sc->tx.dmat, 0, 2639 &sc->tx.info[i].map); 2640 if (err != 0) { 2641 device_printf(sc->dev, "Err %d tx dmamap\n", 2642 err); 2643 goto abort_with_alloc; 2644 } 2645 } 2646 for (i = 0; i <= sc->rx_small.mask; i++) { 2647 err = bus_dmamap_create(sc->rx_small.dmat, 0, 2648 &sc->rx_small.info[i].map); 2649 if (err != 0) { 2650 device_printf(sc->dev, "Err %d rx_small dmamap\n", 2651 err); 2652 goto abort_with_alloc; 2653 } 2654 } 2655 err = bus_dmamap_create(sc->rx_small.dmat, 0, 2656 &sc->rx_small.extra_map); 2657 if (err != 0) { 2658 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 2659 err); 2660 goto abort_with_alloc; 2661 } 2662 2663 for (i = 0; i <= sc->rx_big.mask; i++) { 2664 err = bus_dmamap_create(sc->rx_big.dmat, 0, 2665 &sc->rx_big.info[i].map); 2666 if (err != 0) { 2667 device_printf(sc->dev, "Err %d rx_big dmamap\n", 2668 err); 2669 goto abort_with_alloc; 2670 } 2671 } 2672 err = bus_dmamap_create(sc->rx_big.dmat, 0, 2673 &sc->rx_big.extra_map); 2674 if (err != 0) { 2675 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 2676 err); 2677 goto abort_with_alloc; 2678 } 2679 return 0; 2680 2681abort_with_alloc: 2682 mxge_free_rings(sc); 2683 2684abort_with_nothing: 2685 return err; 2686} 2687 2688static void 2689mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 2690{ 2691 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 2692 2693 if (bufsize < MCLBYTES) { 2694 /* easy, everything fits in a single buffer */ 2695 *big_buf_size = MCLBYTES; 2696 *cl_size = MCLBYTES; 2697 *nbufs = 1; 2698 return; 2699 } 2700 2701 if (bufsize < MJUMPAGESIZE) { 2702 /* still easy, everything still fits in a single buffer */ 2703 *big_buf_size = MJUMPAGESIZE; 2704 *cl_size = MJUMPAGESIZE; 2705 *nbufs = 1; 2706 return; 2707 } 2708 /* now we need to use virtually contiguous buffers */ 2709 *cl_size = MJUM9BYTES; 2710 *big_buf_size = 4096; 2711 *nbufs = mtu / 4096 + 1; 2712 /* needs to be a power of two, so round up */ 2713 if (*nbufs == 3) 2714 *nbufs = 4; 2715} 2716 2717static int 2718mxge_open(mxge_softc_t *sc) 2719{ 2720 mxge_cmd_t cmd; 2721 int i, err, big_bytes; 2722 bus_dmamap_t map; 2723 bus_addr_t bus; 2724 struct lro_entry *lro_entry; 2725 2726 SLIST_INIT(&sc->lro_free); 2727 SLIST_INIT(&sc->lro_active); 2728 2729 for (i = 0; i < sc->lro_cnt; i++) { 2730 lro_entry = (struct lro_entry *) 2731 malloc(sizeof (*lro_entry), M_DEVBUF, M_NOWAIT | M_ZERO); 2732 if (lro_entry == NULL) { 2733 sc->lro_cnt = i; 2734 break; 2735 } 2736 SLIST_INSERT_HEAD(&sc->lro_free, lro_entry, next); 2737 } 2738 2739 /* Copy the MAC address in case it was overridden */ 2740 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 2741 2742 err = mxge_reset(sc, 1); 2743 if (err != 0) { 2744 device_printf(sc->dev, "failed to reset\n"); 2745 return EIO; 2746 } 2747 2748 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, 2749 &sc->rx_big.cl_size, &sc->rx_big.nbufs); 2750 2751 cmd.data0 = sc->rx_big.nbufs; 2752 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 2753 &cmd); 2754 /* error is only meaningful if we're trying to set 2755 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 2756 if (err && sc->rx_big.nbufs > 1) { 2757 device_printf(sc->dev, 2758 "Failed to set alway-use-n to %d\n", 2759 sc->rx_big.nbufs); 2760 return EIO; 2761 } 2762 /* get the lanai pointers to the send and receive rings */ 2763 2764 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 2765 sc->tx.lanai = 2766 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 2767 err |= mxge_send_cmd(sc, 2768 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 2769 sc->rx_small.lanai = 2770 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 2771 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 2772 sc->rx_big.lanai = 2773 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 2774 2775 if (err != 0) { 2776 device_printf(sc->dev, 2777 "failed to get ring sizes or locations\n"); 2778 return EIO; 2779 } 2780 2781 /* stock receive rings */ 2782 for (i = 0; i <= sc->rx_small.mask; i++) { 2783 map = sc->rx_small.info[i].map; 2784 err = mxge_get_buf_small(sc, map, i); 2785 if (err) { 2786 device_printf(sc->dev, "alloced %d/%d smalls\n", 2787 i, sc->rx_small.mask + 1); 2788 goto abort; 2789 } 2790 } 2791 for (i = 0; i <= sc->rx_big.mask; i++) { 2792 sc->rx_big.shadow[i].addr_low = 0xffffffff; 2793 sc->rx_big.shadow[i].addr_high = 0xffffffff; 2794 } 2795 for (i = 0; i <= sc->rx_big.mask; i += sc->rx_big.nbufs) { 2796 map = sc->rx_big.info[i].map; 2797 err = mxge_get_buf_big(sc, map, i); 2798 if (err) { 2799 device_printf(sc->dev, "alloced %d/%d bigs\n", 2800 i, sc->rx_big.mask + 1); 2801 goto abort; 2802 } 2803 } 2804 2805 /* Give the firmware the mtu and the big and small buffer 2806 sizes. The firmware wants the big buf size to be a power 2807 of two. Luckily, FreeBSD's clusters are powers of two */ 2808 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 2809 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 2810 cmd.data0 = MHLEN - MXGEFW_PAD; 2811 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 2812 &cmd); 2813 cmd.data0 = big_bytes; 2814 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 2815 2816 if (err != 0) { 2817 device_printf(sc->dev, "failed to setup params\n"); 2818 goto abort; 2819 } 2820 2821 /* Now give him the pointer to the stats block */ 2822 cmd.data0 = MXGE_LOWPART_TO_U32(sc->fw_stats_dma.bus_addr); 2823 cmd.data1 = MXGE_HIGHPART_TO_U32(sc->fw_stats_dma.bus_addr); 2824 cmd.data2 = sizeof(struct mcp_irq_data); 2825 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 2826 2827 if (err != 0) { 2828 bus = sc->fw_stats_dma.bus_addr; 2829 bus += offsetof(struct mcp_irq_data, send_done_count); 2830 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 2831 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 2832 err = mxge_send_cmd(sc, 2833 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 2834 &cmd); 2835 /* Firmware cannot support multicast without STATS_DMA_V2 */ 2836 sc->fw_multicast_support = 0; 2837 } else { 2838 sc->fw_multicast_support = 1; 2839 } 2840 2841 if (err != 0) { 2842 device_printf(sc->dev, "failed to setup params\n"); 2843 goto abort; 2844 } 2845 2846 /* Finally, start the firmware running */ 2847 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 2848 if (err) { 2849 device_printf(sc->dev, "Couldn't bring up link\n"); 2850 goto abort; 2851 } 2852 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 2853 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 2854 2855 return 0; 2856 2857 2858abort: 2859 mxge_free_mbufs(sc); 2860 2861 return err; 2862} 2863 2864static int 2865mxge_close(mxge_softc_t *sc) 2866{ 2867 struct lro_entry *lro_entry; 2868 mxge_cmd_t cmd; 2869 int err, old_down_cnt; 2870 2871 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 2872 old_down_cnt = sc->down_cnt; 2873 mb(); 2874 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 2875 if (err) { 2876 device_printf(sc->dev, "Couldn't bring down link\n"); 2877 } 2878 if (old_down_cnt == sc->down_cnt) { 2879 /* wait for down irq */ 2880 DELAY(10 * sc->intr_coal_delay); 2881 } 2882 if (old_down_cnt == sc->down_cnt) { 2883 device_printf(sc->dev, "never got down irq\n"); 2884 } 2885 2886 mxge_free_mbufs(sc); 2887 2888 while (!SLIST_EMPTY(&sc->lro_free)) { 2889 lro_entry = SLIST_FIRST(&sc->lro_free); 2890 SLIST_REMOVE_HEAD(&sc->lro_free, next); 2891 } 2892 return 0; 2893} 2894 2895static void 2896mxge_setup_cfg_space(mxge_softc_t *sc) 2897{ 2898 device_t dev = sc->dev; 2899 int reg; 2900 uint16_t cmd, lnk, pectl; 2901 2902 /* find the PCIe link width and set max read request to 4KB*/ 2903 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 2904 lnk = pci_read_config(dev, reg + 0x12, 2); 2905 sc->link_width = (lnk >> 4) & 0x3f; 2906 2907 pectl = pci_read_config(dev, reg + 0x8, 2); 2908 pectl = (pectl & ~0x7000) | (5 << 12); 2909 pci_write_config(dev, reg + 0x8, pectl, 2); 2910 } 2911 2912 /* Enable DMA and Memory space access */ 2913 pci_enable_busmaster(dev); 2914 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 2915 cmd |= PCIM_CMD_MEMEN; 2916 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 2917} 2918 2919static uint32_t 2920mxge_read_reboot(mxge_softc_t *sc) 2921{ 2922 device_t dev = sc->dev; 2923 uint32_t vs; 2924 2925 /* find the vendor specific offset */ 2926 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 2927 device_printf(sc->dev, 2928 "could not find vendor specific offset\n"); 2929 return (uint32_t)-1; 2930 } 2931 /* enable read32 mode */ 2932 pci_write_config(dev, vs + 0x10, 0x3, 1); 2933 /* tell NIC which register to read */ 2934 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 2935 return (pci_read_config(dev, vs + 0x14, 4)); 2936} 2937 2938static void 2939mxge_watchdog_reset(mxge_softc_t *sc) 2940{ 2941 int err; 2942 uint32_t reboot; 2943 uint16_t cmd; 2944 2945 err = ENXIO; 2946 2947 device_printf(sc->dev, "Watchdog reset!\n"); 2948 2949 /* 2950 * check to see if the NIC rebooted. If it did, then all of 2951 * PCI config space has been reset, and things like the 2952 * busmaster bit will be zero. If this is the case, then we 2953 * must restore PCI config space before the NIC can be used 2954 * again 2955 */ 2956 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 2957 if (cmd == 0xffff) { 2958 /* 2959 * maybe the watchdog caught the NIC rebooting; wait 2960 * up to 100ms for it to finish. If it does not come 2961 * back, then give up 2962 */ 2963 DELAY(1000*100); 2964 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 2965 if (cmd == 0xffff) { 2966 device_printf(sc->dev, "NIC disappeared!\n"); 2967 goto abort; 2968 } 2969 } 2970 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 2971 /* print the reboot status */ 2972 reboot = mxge_read_reboot(sc); 2973 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 2974 reboot); 2975 /* restore PCI configuration space */ 2976 2977 /* XXXX waiting for pci_cfg_restore() to be exported */ 2978 goto abort; /* just abort for now */ 2979 2980 /* and redo any changes we made to our config space */ 2981 mxge_setup_cfg_space(sc); 2982 } else { 2983 device_printf(sc->dev, "NIC did not reboot, ring state:\n"); 2984 device_printf(sc->dev, "tx.req=%d tx.done=%d\n", 2985 sc->tx.req, sc->tx.done); 2986 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 2987 sc->tx.pkt_done, 2988 be32toh(sc->fw_stats->send_done_count)); 2989 } 2990 2991 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) { 2992 mxge_close(sc); 2993 err = mxge_open(sc); 2994 } 2995 2996abort: 2997 /* 2998 * stop the watchdog if the nic is dead, to avoid spamming the 2999 * console 3000 */ 3001 if (err != 0) { 3002 callout_stop(&sc->co_hdl); 3003 } 3004} 3005 3006static void 3007mxge_watchdog(mxge_softc_t *sc) 3008{ 3009 mxge_tx_buf_t *tx = &sc->tx; 3010 3011 /* see if we have outstanding transmits, which 3012 have been pending for more than mxge_ticks */ 3013 if (tx->req != tx->done && 3014 tx->watchdog_req != tx->watchdog_done && 3015 tx->done == tx->watchdog_done) 3016 mxge_watchdog_reset(sc); 3017 3018 tx->watchdog_req = tx->req; 3019 tx->watchdog_done = tx->done; 3020} 3021 3022static void 3023mxge_tick(void *arg) 3024{ 3025 mxge_softc_t *sc = arg; 3026 3027 3028 /* Synchronize with possible callout reset/stop. */ 3029 if (callout_pending(&sc->co_hdl) || 3030 !callout_active(&sc->co_hdl)) { 3031 mtx_unlock(&sc->driver_mtx); 3032 return; 3033 } 3034 3035 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3036 mxge_watchdog(sc); 3037} 3038 3039static int 3040mxge_media_change(struct ifnet *ifp) 3041{ 3042 return EINVAL; 3043} 3044 3045static int 3046mxge_change_mtu(mxge_softc_t *sc, int mtu) 3047{ 3048 struct ifnet *ifp = sc->ifp; 3049 int real_mtu, old_mtu; 3050 int err = 0; 3051 3052 3053 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3054 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 3055 return EINVAL; 3056 mtx_lock(&sc->driver_mtx); 3057 old_mtu = ifp->if_mtu; 3058 ifp->if_mtu = mtu; 3059 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3060 callout_stop(&sc->co_hdl); 3061 mxge_close(sc); 3062 err = mxge_open(sc); 3063 if (err != 0) { 3064 ifp->if_mtu = old_mtu; 3065 mxge_close(sc); 3066 (void) mxge_open(sc); 3067 } 3068 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3069 } 3070 mtx_unlock(&sc->driver_mtx); 3071 return err; 3072} 3073 3074static void 3075mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3076{ 3077 mxge_softc_t *sc = ifp->if_softc; 3078 3079 3080 if (sc == NULL) 3081 return; 3082 ifmr->ifm_status = IFM_AVALID; 3083 ifmr->ifm_status |= sc->fw_stats->link_up ? IFM_ACTIVE : 0; 3084 ifmr->ifm_active = IFM_AUTO | IFM_ETHER; 3085 ifmr->ifm_active |= sc->fw_stats->link_up ? IFM_FDX : 0; 3086} 3087 3088static int 3089mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 3090{ 3091 mxge_softc_t *sc = ifp->if_softc; 3092 struct ifreq *ifr = (struct ifreq *)data; 3093 int err, mask; 3094 3095 err = 0; 3096 switch (command) { 3097 case SIOCSIFADDR: 3098 case SIOCGIFADDR: 3099 err = ether_ioctl(ifp, command, data); 3100 break; 3101 3102 case SIOCSIFMTU: 3103 err = mxge_change_mtu(sc, ifr->ifr_mtu); 3104 break; 3105 3106 case SIOCSIFFLAGS: 3107 mtx_lock(&sc->driver_mtx); 3108 if (ifp->if_flags & IFF_UP) { 3109 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 3110 err = mxge_open(sc); 3111 callout_reset(&sc->co_hdl, mxge_ticks, 3112 mxge_tick, sc); 3113 } else { 3114 /* take care of promis can allmulti 3115 flag chages */ 3116 mxge_change_promisc(sc, 3117 ifp->if_flags & IFF_PROMISC); 3118 mxge_set_multicast_list(sc); 3119 } 3120 } else { 3121 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3122 mxge_close(sc); 3123 callout_stop(&sc->co_hdl); 3124 } 3125 } 3126 mtx_unlock(&sc->driver_mtx); 3127 break; 3128 3129 case SIOCADDMULTI: 3130 case SIOCDELMULTI: 3131 mtx_lock(&sc->driver_mtx); 3132 mxge_set_multicast_list(sc); 3133 mtx_unlock(&sc->driver_mtx); 3134 break; 3135 3136 case SIOCSIFCAP: 3137 mtx_lock(&sc->driver_mtx); 3138 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 3139 if (mask & IFCAP_TXCSUM) { 3140 if (IFCAP_TXCSUM & ifp->if_capenable) { 3141 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 3142 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 3143 | CSUM_TSO); 3144 } else { 3145 ifp->if_capenable |= IFCAP_TXCSUM; 3146 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 3147 } 3148 } else if (mask & IFCAP_RXCSUM) { 3149 if (IFCAP_RXCSUM & ifp->if_capenable) { 3150 ifp->if_capenable &= ~IFCAP_RXCSUM; 3151 sc->csum_flag = 0; 3152 } else { 3153 ifp->if_capenable |= IFCAP_RXCSUM; 3154 sc->csum_flag = 1; 3155 } 3156 } 3157 if (mask & IFCAP_TSO4) { 3158 if (IFCAP_TSO4 & ifp->if_capenable) { 3159 ifp->if_capenable &= ~IFCAP_TSO4; 3160 ifp->if_hwassist &= ~CSUM_TSO; 3161 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 3162 ifp->if_capenable |= IFCAP_TSO4; 3163 ifp->if_hwassist |= CSUM_TSO; 3164 } else { 3165 printf("mxge requires tx checksum offload" 3166 " be enabled to use TSO\n"); 3167 err = EINVAL; 3168 } 3169 } 3170 if (mask & IFCAP_LRO) { 3171 if (IFCAP_LRO & ifp->if_capenable) 3172 err = mxge_change_lro_locked(sc, 0); 3173 else 3174 err = mxge_change_lro_locked(sc, mxge_lro_cnt); 3175 } 3176 if (mask & IFCAP_VLAN_HWTAGGING) 3177 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 3178 mtx_unlock(&sc->driver_mtx); 3179 VLAN_CAPABILITIES(ifp); 3180 3181 break; 3182 3183 case SIOCGIFMEDIA: 3184 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 3185 &sc->media, command); 3186 break; 3187 3188 default: 3189 err = ENOTTY; 3190 } 3191 return err; 3192} 3193 3194static void 3195mxge_fetch_tunables(mxge_softc_t *sc) 3196{ 3197 3198 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 3199 &mxge_flow_control); 3200 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 3201 &mxge_intr_coal_delay); 3202 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 3203 &mxge_nvidia_ecrc_enable); 3204 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 3205 &mxge_force_firmware); 3206 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 3207 &mxge_deassert_wait); 3208 TUNABLE_INT_FETCH("hw.mxge.verbose", 3209 &mxge_verbose); 3210 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 3211 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); 3212 printf("%d %d\n", sc->lro_cnt, mxge_lro_cnt); 3213 if (sc->lro_cnt != 0) 3214 mxge_lro_cnt = sc->lro_cnt; 3215 3216 if (bootverbose) 3217 mxge_verbose = 1; 3218 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 3219 mxge_intr_coal_delay = 30; 3220 if (mxge_ticks == 0) 3221 mxge_ticks = hz; 3222 sc->pause = mxge_flow_control; 3223 3224} 3225 3226static int 3227mxge_attach(device_t dev) 3228{ 3229 mxge_softc_t *sc = device_get_softc(dev); 3230 struct ifnet *ifp; 3231 int count, rid, err; 3232 3233 sc->dev = dev; 3234 mxge_fetch_tunables(sc); 3235 3236 err = bus_dma_tag_create(NULL, /* parent */ 3237 1, /* alignment */ 3238 4096, /* boundary */ 3239 BUS_SPACE_MAXADDR, /* low */ 3240 BUS_SPACE_MAXADDR, /* high */ 3241 NULL, NULL, /* filter */ 3242 65536 + 256, /* maxsize */ 3243 MXGE_MAX_SEND_DESC, /* num segs */ 3244 4096, /* maxsegsize */ 3245 0, /* flags */ 3246 NULL, NULL, /* lock */ 3247 &sc->parent_dmat); /* tag */ 3248 3249 if (err != 0) { 3250 device_printf(sc->dev, "Err %d allocating parent dmat\n", 3251 err); 3252 goto abort_with_nothing; 3253 } 3254 3255 ifp = sc->ifp = if_alloc(IFT_ETHER); 3256 if (ifp == NULL) { 3257 device_printf(dev, "can not if_alloc()\n"); 3258 err = ENOSPC; 3259 goto abort_with_parent_dmat; 3260 } 3261 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 3262 device_get_nameunit(dev)); 3263 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 3264 snprintf(sc->tx_mtx_name, sizeof(sc->tx_mtx_name), "%s:tx", 3265 device_get_nameunit(dev)); 3266 mtx_init(&sc->tx_mtx, sc->tx_mtx_name, NULL, MTX_DEF); 3267 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 3268 "%s:drv", device_get_nameunit(dev)); 3269 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 3270 MTX_NETWORK_LOCK, MTX_DEF); 3271 3272 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 3273 3274 mxge_setup_cfg_space(sc); 3275 3276 /* Map the board into the kernel */ 3277 rid = PCIR_BARS; 3278 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 3279 ~0, 1, RF_ACTIVE); 3280 if (sc->mem_res == NULL) { 3281 device_printf(dev, "could not map memory\n"); 3282 err = ENXIO; 3283 goto abort_with_lock; 3284 } 3285 sc->sram = rman_get_virtual(sc->mem_res); 3286 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 3287 if (sc->sram_size > rman_get_size(sc->mem_res)) { 3288 device_printf(dev, "impossible memory region size %ld\n", 3289 rman_get_size(sc->mem_res)); 3290 err = ENXIO; 3291 goto abort_with_mem_res; 3292 } 3293 3294 /* make NULL terminated copy of the EEPROM strings section of 3295 lanai SRAM */ 3296 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 3297 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 3298 rman_get_bushandle(sc->mem_res), 3299 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 3300 sc->eeprom_strings, 3301 MXGE_EEPROM_STRINGS_SIZE - 2); 3302 err = mxge_parse_strings(sc); 3303 if (err != 0) 3304 goto abort_with_mem_res; 3305 3306 /* Enable write combining for efficient use of PCIe bus */ 3307 mxge_enable_wc(sc); 3308 3309 /* Allocate the out of band dma memory */ 3310 err = mxge_dma_alloc(sc, &sc->cmd_dma, 3311 sizeof (mxge_cmd_t), 64); 3312 if (err != 0) 3313 goto abort_with_mem_res; 3314 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 3315 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 3316 if (err != 0) 3317 goto abort_with_cmd_dma; 3318 3319 err = mxge_dma_alloc(sc, &sc->fw_stats_dma, 3320 sizeof (*sc->fw_stats), 64); 3321 if (err != 0) 3322 goto abort_with_zeropad_dma; 3323 sc->fw_stats = (mcp_irq_data_t *)sc->fw_stats_dma.addr; 3324 3325 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 3326 if (err != 0) 3327 goto abort_with_fw_stats; 3328 3329 /* Add our ithread */ 3330 count = pci_msi_count(dev); 3331 if (count == 1 && pci_alloc_msi(dev, &count) == 0) { 3332 rid = 1; 3333 sc->msi_enabled = 1; 3334 } else { 3335 rid = 0; 3336 } 3337 sc->irq_res = bus_alloc_resource(dev, SYS_RES_IRQ, &rid, 0, ~0, 3338 1, RF_SHAREABLE | RF_ACTIVE); 3339 if (sc->irq_res == NULL) { 3340 device_printf(dev, "could not alloc interrupt\n"); 3341 goto abort_with_dmabench; 3342 } 3343 if (mxge_verbose) 3344 device_printf(dev, "using %s irq %ld\n", 3345 sc->msi_enabled ? "MSI" : "INTx", 3346 rman_get_start(sc->irq_res)); 3347 /* select & load the firmware */ 3348 err = mxge_select_firmware(sc); 3349 if (err != 0) 3350 goto abort_with_irq_res; 3351 sc->intr_coal_delay = mxge_intr_coal_delay; 3352 err = mxge_reset(sc, 0); 3353 if (err != 0) 3354 goto abort_with_irq_res; 3355 3356 err = mxge_alloc_rings(sc); 3357 if (err != 0) { 3358 device_printf(sc->dev, "failed to allocate rings\n"); 3359 goto abort_with_irq_res; 3360 } 3361 3362 err = bus_setup_intr(sc->dev, sc->irq_res, 3363 INTR_TYPE_NET | INTR_MPSAFE, 3364 NULL, mxge_intr, sc, &sc->ih); 3365 if (err != 0) { 3366 goto abort_with_rings; 3367 } 3368 /* hook into the network stack */ 3369 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 3370 ifp->if_baudrate = 100000000; 3371 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 3372 IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | 3373 IFCAP_VLAN_HWCSUM | IFCAP_LRO; 3374 3375 sc->max_mtu = mxge_max_mtu(sc); 3376 if (sc->max_mtu >= 9000) 3377 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 3378 else 3379 device_printf(dev, "MTU limited to %d. Install " 3380 "latest firmware for 9000 byte jumbo support\n", 3381 sc->max_mtu - ETHER_HDR_LEN); 3382 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 3383 ifp->if_capenable = ifp->if_capabilities; 3384 if (sc->lro_cnt == 0) 3385 ifp->if_capenable &= ~IFCAP_LRO; 3386 sc->csum_flag = 1; 3387 ifp->if_init = mxge_init; 3388 ifp->if_softc = sc; 3389 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 3390 ifp->if_ioctl = mxge_ioctl; 3391 ifp->if_start = mxge_start; 3392 ether_ifattach(ifp, sc->mac_addr); 3393 /* ether_ifattach sets mtu to 1500 */ 3394 if (ifp->if_capabilities & IFCAP_JUMBO_MTU) 3395 ifp->if_mtu = 9000; 3396 3397 /* Initialise the ifmedia structure */ 3398 ifmedia_init(&sc->media, 0, mxge_media_change, 3399 mxge_media_status); 3400 ifmedia_add(&sc->media, IFM_ETHER|IFM_AUTO, 0, NULL); 3401 mxge_add_sysctls(sc); 3402 return 0; 3403 3404abort_with_rings: 3405 mxge_free_rings(sc); 3406abort_with_irq_res: 3407 bus_release_resource(dev, SYS_RES_IRQ, 3408 sc->msi_enabled ? 1 : 0, sc->irq_res); 3409 if (sc->msi_enabled) 3410 pci_release_msi(dev); 3411abort_with_dmabench: 3412 mxge_dma_free(&sc->dmabench_dma); 3413abort_with_fw_stats: 3414 mxge_dma_free(&sc->fw_stats_dma); 3415abort_with_zeropad_dma: 3416 mxge_dma_free(&sc->zeropad_dma); 3417abort_with_cmd_dma: 3418 mxge_dma_free(&sc->cmd_dma); 3419abort_with_mem_res: 3420 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 3421abort_with_lock: 3422 pci_disable_busmaster(dev); 3423 mtx_destroy(&sc->cmd_mtx); 3424 mtx_destroy(&sc->tx_mtx); 3425 mtx_destroy(&sc->driver_mtx); 3426 if_free(ifp); 3427abort_with_parent_dmat: 3428 bus_dma_tag_destroy(sc->parent_dmat); 3429 3430abort_with_nothing: 3431 return err; 3432} 3433 3434static int 3435mxge_detach(device_t dev) 3436{ 3437 mxge_softc_t *sc = device_get_softc(dev); 3438 3439 if (sc->ifp->if_vlantrunk != NULL) { 3440 device_printf(sc->dev, 3441 "Detach vlans before removing module\n"); 3442 return EBUSY; 3443 } 3444 mtx_lock(&sc->driver_mtx); 3445 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 3446 mxge_close(sc); 3447 callout_stop(&sc->co_hdl); 3448 mtx_unlock(&sc->driver_mtx); 3449 ether_ifdetach(sc->ifp); 3450 ifmedia_removeall(&sc->media); 3451 mxge_dummy_rdma(sc, 0); 3452 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 3453 mxge_free_rings(sc); 3454 bus_release_resource(dev, SYS_RES_IRQ, 3455 sc->msi_enabled ? 1 : 0, sc->irq_res); 3456 if (sc->msi_enabled) 3457 pci_release_msi(dev); 3458 3459 sc->rx_done.entry = NULL; 3460 mxge_dma_free(&sc->fw_stats_dma); 3461 mxge_dma_free(&sc->dmabench_dma); 3462 mxge_dma_free(&sc->zeropad_dma); 3463 mxge_dma_free(&sc->cmd_dma); 3464 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 3465 pci_disable_busmaster(dev); 3466 mtx_destroy(&sc->cmd_mtx); 3467 mtx_destroy(&sc->tx_mtx); 3468 mtx_destroy(&sc->driver_mtx); 3469 if_free(sc->ifp); 3470 bus_dma_tag_destroy(sc->parent_dmat); 3471 return 0; 3472} 3473 3474static int 3475mxge_shutdown(device_t dev) 3476{ 3477 return 0; 3478} 3479 3480/* 3481 This file uses Myri10GE driver indentation. 3482 3483 Local Variables: 3484 c-file-style:"linux" 3485 tab-width:8 3486 End: 3487*/ 3488