Deleted Added
full compact
1/******************************************************************************
2
3Copyright (c) 2006-2013, Myricom Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/dev/mxge/if_mxge.c 281855 2015-04-22 14:38:58Z rodrigc $");
31__FBSDID("$FreeBSD: head/sys/dev/mxge/if_mxge.c 294327 2016-01-19 15:33:28Z hselasky $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/linker.h>
36#include <sys/firmware.h>
37#include <sys/endian.h>
38#include <sys/sockio.h>
39#include <sys/mbuf.h>
40#include <sys/malloc.h>
41#include <sys/kdb.h>
42#include <sys/kernel.h>
43#include <sys/lock.h>
44#include <sys/module.h>
45#include <sys/socket.h>
46#include <sys/sysctl.h>
47#include <sys/sx.h>
48#include <sys/taskqueue.h>
49#include <sys/zlib.h>
50
51#include <net/if.h>
52#include <net/if_var.h>
53#include <net/if_arp.h>
54#include <net/ethernet.h>
55#include <net/if_dl.h>
56#include <net/if_media.h>
57
58#include <net/bpf.h>
59
60#include <net/if_types.h>
61#include <net/if_vlan_var.h>
62
63#include <netinet/in_systm.h>
64#include <netinet/in.h>
65#include <netinet/ip.h>
66#include <netinet/ip6.h>
67#include <netinet/tcp.h>
68#include <netinet/tcp_lro.h>
69#include <netinet6/ip6_var.h>
70
71#include <machine/bus.h>
72#include <machine/in_cksum.h>
73#include <machine/resource.h>
74#include <sys/bus.h>
75#include <sys/rman.h>
76#include <sys/smp.h>
77
78#include <dev/pci/pcireg.h>
79#include <dev/pci/pcivar.h>
80#include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */
81
82#include <vm/vm.h> /* for pmap_mapdev() */
83#include <vm/pmap.h>
84
85#if defined(__i386) || defined(__amd64)
86#include <machine/specialreg.h>
87#endif
88
89#include <dev/mxge/mxge_mcp.h>
90#include <dev/mxge/mcp_gen_header.h>
91/*#define MXGE_FAKE_IFP*/
92#include <dev/mxge/if_mxge_var.h>
93#ifdef IFNET_BUF_RING
94#include <sys/buf_ring.h>
95#endif
96
97#include "opt_inet.h"
98#include "opt_inet6.h"
99
100/* tunable params */
101static int mxge_nvidia_ecrc_enable = 1;
102static int mxge_force_firmware = 0;
103static int mxge_intr_coal_delay = 30;
104static int mxge_deassert_wait = 1;
105static int mxge_flow_control = 1;
106static int mxge_verbose = 0;
107static int mxge_ticks;
108static int mxge_max_slices = 1;
109static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
110static int mxge_always_promisc = 0;
111static int mxge_initial_mtu = ETHERMTU_JUMBO;
112static int mxge_throttle = 0;
113static char *mxge_fw_unaligned = "mxge_ethp_z8e";
114static char *mxge_fw_aligned = "mxge_eth_z8e";
115static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
116static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
117
118static int mxge_probe(device_t dev);
119static int mxge_attach(device_t dev);
120static int mxge_detach(device_t dev);
121static int mxge_shutdown(device_t dev);
122static void mxge_intr(void *arg);
123
124static device_method_t mxge_methods[] =
125{
126 /* Device interface */
127 DEVMETHOD(device_probe, mxge_probe),
128 DEVMETHOD(device_attach, mxge_attach),
129 DEVMETHOD(device_detach, mxge_detach),
130 DEVMETHOD(device_shutdown, mxge_shutdown),
131
132 DEVMETHOD_END
133};
134
135static driver_t mxge_driver =
136{
137 "mxge",
138 mxge_methods,
139 sizeof(mxge_softc_t),
140};
141
142static devclass_t mxge_devclass;
143
144/* Declare ourselves to be a child of the PCI bus.*/
145DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
146MODULE_DEPEND(mxge, firmware, 1, 1, 1);
147MODULE_DEPEND(mxge, zlib, 1, 1, 1);
148
149static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
150static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
151static int mxge_close(mxge_softc_t *sc, int down);
152static int mxge_open(mxge_softc_t *sc);
153static void mxge_tick(void *arg);
154
155static int
156mxge_probe(device_t dev)
157{
158 int rev;
159
160
161 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
162 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
163 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
164 rev = pci_get_revid(dev);
165 switch (rev) {
166 case MXGE_PCI_REV_Z8E:
167 device_set_desc(dev, "Myri10G-PCIE-8A");
168 break;
169 case MXGE_PCI_REV_Z8ES:
170 device_set_desc(dev, "Myri10G-PCIE-8B");
171 break;
172 default:
173 device_set_desc(dev, "Myri10G-PCIE-8??");
174 device_printf(dev, "Unrecognized rev %d NIC\n",
175 rev);
176 break;
177 }
178 return 0;
179 }
180 return ENXIO;
181}
182
183static void
184mxge_enable_wc(mxge_softc_t *sc)
185{
186#if defined(__i386) || defined(__amd64)
187 vm_offset_t len;
188 int err;
189
190 sc->wc = 1;
191 len = rman_get_size(sc->mem_res);
192 err = pmap_change_attr((vm_offset_t) sc->sram,
193 len, PAT_WRITE_COMBINING);
194 if (err != 0) {
195 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
196 err);
197 sc->wc = 0;
198 }
199#endif
200}
201
202
203/* callback to get our DMA address */
204static void
205mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
206 int error)
207{
208 if (error == 0) {
209 *(bus_addr_t *) arg = segs->ds_addr;
210 }
211}
212
213static int
214mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
215 bus_size_t alignment)
216{
217 int err;
218 device_t dev = sc->dev;
219 bus_size_t boundary, maxsegsize;
220
221 if (bytes > 4096 && alignment == 4096) {
222 boundary = 0;
223 maxsegsize = bytes;
224 } else {
225 boundary = 4096;
226 maxsegsize = 4096;
227 }
228
229 /* allocate DMAable memory tags */
230 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
231 alignment, /* alignment */
232 boundary, /* boundary */
233 BUS_SPACE_MAXADDR, /* low */
234 BUS_SPACE_MAXADDR, /* high */
235 NULL, NULL, /* filter */
236 bytes, /* maxsize */
237 1, /* num segs */
238 maxsegsize, /* maxsegsize */
239 BUS_DMA_COHERENT, /* flags */
240 NULL, NULL, /* lock */
241 &dma->dmat); /* tag */
242 if (err != 0) {
243 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
244 return err;
245 }
246
247 /* allocate DMAable memory & map */
248 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
249 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
250 | BUS_DMA_ZERO), &dma->map);
251 if (err != 0) {
252 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
253 goto abort_with_dmat;
254 }
255
256 /* load the memory */
257 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
258 mxge_dmamap_callback,
259 (void *)&dma->bus_addr, 0);
260 if (err != 0) {
261 device_printf(dev, "couldn't load map (err = %d)\n", err);
262 goto abort_with_mem;
263 }
264 return 0;
265
266abort_with_mem:
267 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
268abort_with_dmat:
269 (void)bus_dma_tag_destroy(dma->dmat);
270 return err;
271}
272
273
274static void
275mxge_dma_free(mxge_dma_t *dma)
276{
277 bus_dmamap_unload(dma->dmat, dma->map);
278 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
279 (void)bus_dma_tag_destroy(dma->dmat);
280}
281
282/*
283 * The eeprom strings on the lanaiX have the format
284 * SN=x\0
285 * MAC=x:x:x:x:x:x\0
286 * PC=text\0
287 */
288
289static int
290mxge_parse_strings(mxge_softc_t *sc)
291{
292 char *ptr;
293 int i, found_mac, found_sn2;
294 char *endptr;
295
296 ptr = sc->eeprom_strings;
297 found_mac = 0;
298 found_sn2 = 0;
299 while (*ptr != '\0') {
300 if (strncmp(ptr, "MAC=", 4) == 0) {
301 ptr += 4;
302 for (i = 0;;) {
303 sc->mac_addr[i] = strtoul(ptr, &endptr, 16);
304 if (endptr - ptr != 2)
305 goto abort;
306 ptr = endptr;
307 if (++i == 6)
308 break;
309 if (*ptr++ != ':')
310 goto abort;
311 }
312 found_mac = 1;
313 } else if (strncmp(ptr, "PC=", 3) == 0) {
314 ptr += 3;
315 strlcpy(sc->product_code_string, ptr,
316 sizeof(sc->product_code_string));
317 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) {
318 ptr += 3;
319 strlcpy(sc->serial_number_string, ptr,
320 sizeof(sc->serial_number_string));
321 } else if (strncmp(ptr, "SN2=", 4) == 0) {
322 /* SN2 takes precedence over SN */
323 ptr += 4;
324 found_sn2 = 1;
325 strlcpy(sc->serial_number_string, ptr,
326 sizeof(sc->serial_number_string));
327 }
328 while (*ptr++ != '\0') {}
329 }
330
331 if (found_mac)
332 return 0;
333
334 abort:
335 device_printf(sc->dev, "failed to parse eeprom_strings\n");
336
337 return ENXIO;
338}
339
340#if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
341static void
342mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
343{
344 uint32_t val;
345 unsigned long base, off;
346 char *va, *cfgptr;
347 device_t pdev, mcp55;
348 uint16_t vendor_id, device_id, word;
349 uintptr_t bus, slot, func, ivend, idev;
350 uint32_t *ptr32;
351
352
353 if (!mxge_nvidia_ecrc_enable)
354 return;
355
356 pdev = device_get_parent(device_get_parent(sc->dev));
357 if (pdev == NULL) {
358 device_printf(sc->dev, "could not find parent?\n");
359 return;
360 }
361 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
362 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
363
364 if (vendor_id != 0x10de)
365 return;
366
367 base = 0;
368
369 if (device_id == 0x005d) {
370 /* ck804, base address is magic */
371 base = 0xe0000000UL;
372 } else if (device_id >= 0x0374 && device_id <= 0x378) {
373 /* mcp55, base address stored in chipset */
374 mcp55 = pci_find_bsf(0, 0, 0);
375 if (mcp55 &&
376 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
377 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
378 word = pci_read_config(mcp55, 0x90, 2);
379 base = ((unsigned long)word & 0x7ffeU) << 25;
380 }
381 }
382 if (!base)
383 return;
384
385 /* XXXX
386 Test below is commented because it is believed that doing
387 config read/write beyond 0xff will access the config space
388 for the next larger function. Uncomment this and remove
389 the hacky pmap_mapdev() way of accessing config space when
390 FreeBSD grows support for extended pcie config space access
391 */
392#if 0
393 /* See if we can, by some miracle, access the extended
394 config space */
395 val = pci_read_config(pdev, 0x178, 4);
396 if (val != 0xffffffff) {
397 val |= 0x40;
398 pci_write_config(pdev, 0x178, val, 4);
399 return;
400 }
401#endif
402 /* Rather than using normal pci config space writes, we must
403 * map the Nvidia config space ourselves. This is because on
404 * opteron/nvidia class machine the 0xe000000 mapping is
405 * handled by the nvidia chipset, that means the internal PCI
406 * device (the on-chip northbridge), or the amd-8131 bridge
407 * and things behind them are not visible by this method.
408 */
409
410 BUS_READ_IVAR(device_get_parent(pdev), pdev,
411 PCI_IVAR_BUS, &bus);
412 BUS_READ_IVAR(device_get_parent(pdev), pdev,
413 PCI_IVAR_SLOT, &slot);
414 BUS_READ_IVAR(device_get_parent(pdev), pdev,
415 PCI_IVAR_FUNCTION, &func);
416 BUS_READ_IVAR(device_get_parent(pdev), pdev,
417 PCI_IVAR_VENDOR, &ivend);
418 BUS_READ_IVAR(device_get_parent(pdev), pdev,
419 PCI_IVAR_DEVICE, &idev);
420
421 off = base
422 + 0x00100000UL * (unsigned long)bus
423 + 0x00001000UL * (unsigned long)(func
424 + 8 * slot);
425
426 /* map it into the kernel */
427 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
428
429
430 if (va == NULL) {
431 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
432 return;
433 }
434 /* get a pointer to the config space mapped into the kernel */
435 cfgptr = va + (off & PAGE_MASK);
436
437 /* make sure that we can really access it */
438 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
439 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
440 if (! (vendor_id == ivend && device_id == idev)) {
441 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
442 vendor_id, device_id);
443 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
444 return;
445 }
446
447 ptr32 = (uint32_t*)(cfgptr + 0x178);
448 val = *ptr32;
449
450 if (val == 0xffffffff) {
451 device_printf(sc->dev, "extended mapping failed\n");
452 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
453 return;
454 }
455 *ptr32 = val | 0x40;
456 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
457 if (mxge_verbose)
458 device_printf(sc->dev,
459 "Enabled ECRC on upstream Nvidia bridge "
460 "at %d:%d:%d\n",
461 (int)bus, (int)slot, (int)func);
462 return;
463}
464#else
465static void
466mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
467{
468 device_printf(sc->dev,
469 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
470 return;
471}
472#endif
473
474
475static int
476mxge_dma_test(mxge_softc_t *sc, int test_type)
477{
478 mxge_cmd_t cmd;
479 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
480 int status;
481 uint32_t len;
482 char *test = " ";
483
484
485 /* Run a small DMA test.
486 * The magic multipliers to the length tell the firmware
487 * to do DMA read, write, or read+write tests. The
488 * results are returned in cmd.data0. The upper 16
489 * bits of the return is the number of transfers completed.
490 * The lower 16 bits is the time in 0.5us ticks that the
491 * transfers took to complete.
492 */
493
494 len = sc->tx_boundary;
495
496 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
497 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
498 cmd.data2 = len * 0x10000;
499 status = mxge_send_cmd(sc, test_type, &cmd);
500 if (status != 0) {
501 test = "read";
502 goto abort;
503 }
504 sc->read_dma = ((cmd.data0>>16) * len * 2) /
505 (cmd.data0 & 0xffff);
506 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
507 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
508 cmd.data2 = len * 0x1;
509 status = mxge_send_cmd(sc, test_type, &cmd);
510 if (status != 0) {
511 test = "write";
512 goto abort;
513 }
514 sc->write_dma = ((cmd.data0>>16) * len * 2) /
515 (cmd.data0 & 0xffff);
516
517 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
518 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
519 cmd.data2 = len * 0x10001;
520 status = mxge_send_cmd(sc, test_type, &cmd);
521 if (status != 0) {
522 test = "read/write";
523 goto abort;
524 }
525 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
526 (cmd.data0 & 0xffff);
527
528abort:
529 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
530 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
531 test, status);
532
533 return status;
534}
535
536/*
537 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
538 * when the PCI-E Completion packets are aligned on an 8-byte
539 * boundary. Some PCI-E chip sets always align Completion packets; on
540 * the ones that do not, the alignment can be enforced by enabling
541 * ECRC generation (if supported).
542 *
543 * When PCI-E Completion packets are not aligned, it is actually more
544 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
545 *
546 * If the driver can neither enable ECRC nor verify that it has
547 * already been enabled, then it must use a firmware image which works
548 * around unaligned completion packets (ethp_z8e.dat), and it should
549 * also ensure that it never gives the device a Read-DMA which is
550 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
551 * enabled, then the driver should use the aligned (eth_z8e.dat)
552 * firmware image, and set tx_boundary to 4KB.
553 */
554
555static int
556mxge_firmware_probe(mxge_softc_t *sc)
557{
558 device_t dev = sc->dev;
559 int reg, status;
560 uint16_t pectl;
561
562 sc->tx_boundary = 4096;
563 /*
564 * Verify the max read request size was set to 4KB
565 * before trying the test with 4KB.
566 */
567 if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
568 pectl = pci_read_config(dev, reg + 0x8, 2);
569 if ((pectl & (5 << 12)) != (5 << 12)) {
570 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
571 pectl);
572 sc->tx_boundary = 2048;
573 }
574 }
575
576 /*
577 * load the optimized firmware (which assumes aligned PCIe
578 * completions) in order to see if it works on this host.
579 */
580 sc->fw_name = mxge_fw_aligned;
581 status = mxge_load_firmware(sc, 1);
582 if (status != 0) {
583 return status;
584 }
585
586 /*
587 * Enable ECRC if possible
588 */
589 mxge_enable_nvidia_ecrc(sc);
590
591 /*
592 * Run a DMA test which watches for unaligned completions and
593 * aborts on the first one seen. Not required on Z8ES or newer.
594 */
595 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES)
596 return 0;
597 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
598 if (status == 0)
599 return 0; /* keep the aligned firmware */
600
601 if (status != E2BIG)
602 device_printf(dev, "DMA test failed: %d\n", status);
603 if (status == ENOSYS)
604 device_printf(dev, "Falling back to ethp! "
605 "Please install up to date fw\n");
606 return status;
607}
608
609static int
610mxge_select_firmware(mxge_softc_t *sc)
611{
612 int aligned = 0;
613 int force_firmware = mxge_force_firmware;
614
615 if (sc->throttle)
616 force_firmware = sc->throttle;
617
618 if (force_firmware != 0) {
619 if (force_firmware == 1)
620 aligned = 1;
621 else
622 aligned = 0;
623 if (mxge_verbose)
624 device_printf(sc->dev,
625 "Assuming %s completions (forced)\n",
626 aligned ? "aligned" : "unaligned");
627 goto abort;
628 }
629
630 /* if the PCIe link width is 4 or less, we can use the aligned
631 firmware and skip any checks */
632 if (sc->link_width != 0 && sc->link_width <= 4) {
633 device_printf(sc->dev,
634 "PCIe x%d Link, expect reduced performance\n",
635 sc->link_width);
636 aligned = 1;
637 goto abort;
638 }
639
640 if (0 == mxge_firmware_probe(sc))
641 return 0;
642
643abort:
644 if (aligned) {
645 sc->fw_name = mxge_fw_aligned;
646 sc->tx_boundary = 4096;
647 } else {
648 sc->fw_name = mxge_fw_unaligned;
649 sc->tx_boundary = 2048;
650 }
651 return (mxge_load_firmware(sc, 0));
652}
653
654static int
655mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
656{
657
658
659 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
660 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
661 be32toh(hdr->mcp_type));
662 return EIO;
663 }
664
665 /* save firmware version for sysctl */
666 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version));
667 if (mxge_verbose)
668 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
669
670 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
671 &sc->fw_ver_minor, &sc->fw_ver_tiny);
672
673 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
674 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
675 device_printf(sc->dev, "Found firmware version %s\n",
676 sc->fw_version);
677 device_printf(sc->dev, "Driver needs %d.%d\n",
678 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
679 return EINVAL;
680 }
681 return 0;
682
683}
684
685static void *
686z_alloc(void *nil, u_int items, u_int size)
687{
688 void *ptr;
689
690 ptr = malloc(items * size, M_TEMP, M_NOWAIT);
691 return ptr;
692}
693
694static void
695z_free(void *nil, void *ptr)
696{
697 free(ptr, M_TEMP);
698}
699
700
701static int
702mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
703{
704 z_stream zs;
705 char *inflate_buffer;
706 const struct firmware *fw;
707 const mcp_gen_header_t *hdr;
708 unsigned hdr_offset;
709 int status;
710 unsigned int i;
711 char dummy;
712 size_t fw_len;
713
714 fw = firmware_get(sc->fw_name);
715 if (fw == NULL) {
716 device_printf(sc->dev, "Could not find firmware image %s\n",
717 sc->fw_name);
718 return ENOENT;
719 }
720
721
722
723 /* setup zlib and decompress f/w */
724 bzero(&zs, sizeof (zs));
725 zs.zalloc = z_alloc;
726 zs.zfree = z_free;
727 status = inflateInit(&zs);
728 if (status != Z_OK) {
729 status = EIO;
730 goto abort_with_fw;
731 }
732
733 /* the uncompressed size is stored as the firmware version,
734 which would otherwise go unused */
735 fw_len = (size_t) fw->version;
736 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT);
737 if (inflate_buffer == NULL)
738 goto abort_with_zs;
739 zs.avail_in = fw->datasize;
740 zs.next_in = __DECONST(char *, fw->data);
741 zs.avail_out = fw_len;
742 zs.next_out = inflate_buffer;
743 status = inflate(&zs, Z_FINISH);
744 if (status != Z_STREAM_END) {
745 device_printf(sc->dev, "zlib %d\n", status);
746 status = EIO;
747 goto abort_with_buffer;
748 }
749
750 /* check id */
751 hdr_offset = htobe32(*(const uint32_t *)
752 (inflate_buffer + MCP_HEADER_PTR_OFFSET));
753 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
754 device_printf(sc->dev, "Bad firmware file");
755 status = EIO;
756 goto abort_with_buffer;
757 }
758 hdr = (const void*)(inflate_buffer + hdr_offset);
759
760 status = mxge_validate_firmware(sc, hdr);
761 if (status != 0)
762 goto abort_with_buffer;
763
764 /* Copy the inflated firmware to NIC SRAM. */
765 for (i = 0; i < fw_len; i += 256) {
766 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
767 inflate_buffer + i,
768 min(256U, (unsigned)(fw_len - i)));
769 wmb();
770 dummy = *sc->sram;
771 wmb();
772 }
773
774 *limit = fw_len;
775 status = 0;
776abort_with_buffer:
777 free(inflate_buffer, M_TEMP);
778abort_with_zs:
779 inflateEnd(&zs);
780abort_with_fw:
781 firmware_put(fw, FIRMWARE_UNLOAD);
782 return status;
783}
784
785/*
786 * Enable or disable periodic RDMAs from the host to make certain
787 * chipsets resend dropped PCIe messages
788 */
789
790static void
791mxge_dummy_rdma(mxge_softc_t *sc, int enable)
792{
793 char buf_bytes[72];
794 volatile uint32_t *confirm;
795 volatile char *submit;
796 uint32_t *buf, dma_low, dma_high;
797 int i;
798
799 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
800
801 /* clear confirmation addr */
802 confirm = (volatile uint32_t *)sc->cmd;
803 *confirm = 0;
804 wmb();
805
806 /* send an rdma command to the PCIe engine, and wait for the
807 response in the confirmation address. The firmware should
808 write a -1 there to indicate it is alive and well
809 */
810
811 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
812 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
813 buf[0] = htobe32(dma_high); /* confirm addr MSW */
814 buf[1] = htobe32(dma_low); /* confirm addr LSW */
815 buf[2] = htobe32(0xffffffff); /* confirm data */
816 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
817 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
818 buf[3] = htobe32(dma_high); /* dummy addr MSW */
819 buf[4] = htobe32(dma_low); /* dummy addr LSW */
820 buf[5] = htobe32(enable); /* enable? */
821
822
823 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
824
825 mxge_pio_copy(submit, buf, 64);
826 wmb();
827 DELAY(1000);
828 wmb();
829 i = 0;
830 while (*confirm != 0xffffffff && i < 20) {
831 DELAY(1000);
832 i++;
833 }
834 if (*confirm != 0xffffffff) {
835 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
836 (enable ? "enable" : "disable"), confirm,
837 *confirm);
838 }
839 return;
840}
841
842static int
843mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
844{
845 mcp_cmd_t *buf;
846 char buf_bytes[sizeof(*buf) + 8];
847 volatile mcp_cmd_response_t *response = sc->cmd;
848 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
849 uint32_t dma_low, dma_high;
850 int err, sleep_total = 0;
851
852 /* ensure buf is aligned to 8 bytes */
853 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
854
855 buf->data0 = htobe32(data->data0);
856 buf->data1 = htobe32(data->data1);
857 buf->data2 = htobe32(data->data2);
858 buf->cmd = htobe32(cmd);
859 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
860 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
861
862 buf->response_addr.low = htobe32(dma_low);
863 buf->response_addr.high = htobe32(dma_high);
864 mtx_lock(&sc->cmd_mtx);
865 response->result = 0xffffffff;
866 wmb();
867 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
868
869 /* wait up to 20ms */
870 err = EAGAIN;
871 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
872 bus_dmamap_sync(sc->cmd_dma.dmat,
873 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
874 wmb();
875 switch (be32toh(response->result)) {
876 case 0:
877 data->data0 = be32toh(response->data);
878 err = 0;
879 break;
880 case 0xffffffff:
881 DELAY(1000);
882 break;
883 case MXGEFW_CMD_UNKNOWN:
884 err = ENOSYS;
885 break;
886 case MXGEFW_CMD_ERROR_UNALIGNED:
887 err = E2BIG;
888 break;
889 case MXGEFW_CMD_ERROR_BUSY:
890 err = EBUSY;
891 break;
892 case MXGEFW_CMD_ERROR_I2C_ABSENT:
893 err = ENXIO;
894 break;
895 default:
896 device_printf(sc->dev,
897 "mxge: command %d "
898 "failed, result = %d\n",
899 cmd, be32toh(response->result));
900 err = ENXIO;
901 break;
902 }
903 if (err != EAGAIN)
904 break;
905 }
906 if (err == EAGAIN)
907 device_printf(sc->dev, "mxge: command %d timed out"
908 "result = %d\n",
909 cmd, be32toh(response->result));
910 mtx_unlock(&sc->cmd_mtx);
911 return err;
912}
913
914static int
915mxge_adopt_running_firmware(mxge_softc_t *sc)
916{
917 struct mcp_gen_header *hdr;
918 const size_t bytes = sizeof (struct mcp_gen_header);
919 size_t hdr_offset;
920 int status;
921
922 /* find running firmware header */
923 hdr_offset = htobe32(*(volatile uint32_t *)
924 (sc->sram + MCP_HEADER_PTR_OFFSET));
925
926 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
927 device_printf(sc->dev,
928 "Running firmware has bad header offset (%d)\n",
929 (int)hdr_offset);
930 return EIO;
931 }
932
933 /* copy header of running firmware from SRAM to host memory to
934 * validate firmware */
935 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
936 if (hdr == NULL) {
937 device_printf(sc->dev, "could not malloc firmware hdr\n");
938 return ENOMEM;
939 }
940 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
941 rman_get_bushandle(sc->mem_res),
942 hdr_offset, (char *)hdr, bytes);
943 status = mxge_validate_firmware(sc, hdr);
944 free(hdr, M_DEVBUF);
945
946 /*
947 * check to see if adopted firmware has bug where adopting
948 * it will cause broadcasts to be filtered unless the NIC
949 * is kept in ALLMULTI mode
950 */
951 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
952 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
953 sc->adopted_rx_filter_bug = 1;
954 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
955 "working around rx filter bug\n",
956 sc->fw_ver_major, sc->fw_ver_minor,
957 sc->fw_ver_tiny);
958 }
959
960 return status;
961}
962
963
964static int
965mxge_load_firmware(mxge_softc_t *sc, int adopt)
966{
967 volatile uint32_t *confirm;
968 volatile char *submit;
969 char buf_bytes[72];
970 uint32_t *buf, size, dma_low, dma_high;
971 int status, i;
972
973 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
974
975 size = sc->sram_size;
976 status = mxge_load_firmware_helper(sc, &size);
977 if (status) {
978 if (!adopt)
979 return status;
980 /* Try to use the currently running firmware, if
981 it is new enough */
982 status = mxge_adopt_running_firmware(sc);
983 if (status) {
984 device_printf(sc->dev,
985 "failed to adopt running firmware\n");
986 return status;
987 }
988 device_printf(sc->dev,
989 "Successfully adopted running firmware\n");
990 if (sc->tx_boundary == 4096) {
991 device_printf(sc->dev,
992 "Using firmware currently running on NIC"
993 ". For optimal\n");
994 device_printf(sc->dev,
995 "performance consider loading optimized "
996 "firmware\n");
997 }
998 sc->fw_name = mxge_fw_unaligned;
999 sc->tx_boundary = 2048;
1000 return 0;
1001 }
1002 /* clear confirmation addr */
1003 confirm = (volatile uint32_t *)sc->cmd;
1004 *confirm = 0;
1005 wmb();
1006 /* send a reload command to the bootstrap MCP, and wait for the
1007 response in the confirmation address. The firmware should
1008 write a -1 there to indicate it is alive and well
1009 */
1010
1011 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
1012 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
1013
1014 buf[0] = htobe32(dma_high); /* confirm addr MSW */
1015 buf[1] = htobe32(dma_low); /* confirm addr LSW */
1016 buf[2] = htobe32(0xffffffff); /* confirm data */
1017
1018 /* FIX: All newest firmware should un-protect the bottom of
1019 the sram before handoff. However, the very first interfaces
1020 do not. Therefore the handoff copy must skip the first 8 bytes
1021 */
1022 /* where the code starts*/
1023 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
1024 buf[4] = htobe32(size - 8); /* length of code */
1025 buf[5] = htobe32(8); /* where to copy to */
1026 buf[6] = htobe32(0); /* where to jump to */
1027
1028 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1029 mxge_pio_copy(submit, buf, 64);
1030 wmb();
1031 DELAY(1000);
1032 wmb();
1033 i = 0;
1034 while (*confirm != 0xffffffff && i < 20) {
1035 DELAY(1000*10);
1036 i++;
1037 bus_dmamap_sync(sc->cmd_dma.dmat,
1038 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1039 }
1040 if (*confirm != 0xffffffff) {
1041 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1042 confirm, *confirm);
1043
1044 return ENXIO;
1045 }
1046 return 0;
1047}
1048
1049static int
1050mxge_update_mac_address(mxge_softc_t *sc)
1051{
1052 mxge_cmd_t cmd;
1053 uint8_t *addr = sc->mac_addr;
1054 int status;
1055
1056
1057 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1058 | (addr[2] << 8) | addr[3]);
1059
1060 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1061
1062 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1063 return status;
1064}
1065
1066static int
1067mxge_change_pause(mxge_softc_t *sc, int pause)
1068{
1069 mxge_cmd_t cmd;
1070 int status;
1071
1072 if (pause)
1073 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1074 &cmd);
1075 else
1076 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1077 &cmd);
1078
1079 if (status) {
1080 device_printf(sc->dev, "Failed to set flow control mode\n");
1081 return ENXIO;
1082 }
1083 sc->pause = pause;
1084 return 0;
1085}
1086
1087static void
1088mxge_change_promisc(mxge_softc_t *sc, int promisc)
1089{
1090 mxge_cmd_t cmd;
1091 int status;
1092
1093 if (mxge_always_promisc)
1094 promisc = 1;
1095
1096 if (promisc)
1097 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1098 &cmd);
1099 else
1100 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1101 &cmd);
1102
1103 if (status) {
1104 device_printf(sc->dev, "Failed to set promisc mode\n");
1105 }
1106}
1107
1108static void
1109mxge_set_multicast_list(mxge_softc_t *sc)
1110{
1111 mxge_cmd_t cmd;
1112 struct ifmultiaddr *ifma;
1113 struct ifnet *ifp = sc->ifp;
1114 int err;
1115
1116 /* This firmware is known to not support multicast */
1117 if (!sc->fw_multicast_support)
1118 return;
1119
1120 /* Disable multicast filtering while we play with the lists*/
1121 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1122 if (err != 0) {
1123 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1124 " error status: %d\n", err);
1125 return;
1126 }
1127
1128 if (sc->adopted_rx_filter_bug)
1129 return;
1130
1131 if (ifp->if_flags & IFF_ALLMULTI)
1132 /* request to disable multicast filtering, so quit here */
1133 return;
1134
1135 /* Flush all the filters */
1136
1137 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1138 if (err != 0) {
1139 device_printf(sc->dev,
1140 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1141 ", error status: %d\n", err);
1142 return;
1143 }
1144
1145 /* Walk the multicast list, and add each address */
1146
1147 if_maddr_rlock(ifp);
1148 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1149 if (ifma->ifma_addr->sa_family != AF_LINK)
1150 continue;
1151 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1152 &cmd.data0, 4);
1153 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1154 &cmd.data1, 2);
1155 cmd.data0 = htonl(cmd.data0);
1156 cmd.data1 = htonl(cmd.data1);
1157 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1158 if (err != 0) {
1159 device_printf(sc->dev, "Failed "
1160 "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1161 "%d\t", err);
1162 /* abort, leaving multicast filtering off */
1163 if_maddr_runlock(ifp);
1164 return;
1165 }
1166 }
1167 if_maddr_runlock(ifp);
1168 /* Enable multicast filtering */
1169 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1170 if (err != 0) {
1171 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1172 ", error status: %d\n", err);
1173 }
1174}
1175
1176static int
1177mxge_max_mtu(mxge_softc_t *sc)
1178{
1179 mxge_cmd_t cmd;
1180 int status;
1181
1182 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1183 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1184
1185 /* try to set nbufs to see if it we can
1186 use virtually contiguous jumbos */
1187 cmd.data0 = 0;
1188 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1189 &cmd);
1190 if (status == 0)
1191 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1192
1193 /* otherwise, we're limited to MJUMPAGESIZE */
1194 return MJUMPAGESIZE - MXGEFW_PAD;
1195}
1196
1197static int
1198mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1199{
1200 struct mxge_slice_state *ss;
1201 mxge_rx_done_t *rx_done;
1202 volatile uint32_t *irq_claim;
1203 mxge_cmd_t cmd;
1204 int slice, status;
1205
1206 /* try to send a reset command to the card to see if it
1207 is alive */
1208 memset(&cmd, 0, sizeof (cmd));
1209 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1210 if (status != 0) {
1211 device_printf(sc->dev, "failed reset\n");
1212 return ENXIO;
1213 }
1214
1215 mxge_dummy_rdma(sc, 1);
1216
1217
1218 /* set the intrq size */
1219 cmd.data0 = sc->rx_ring_size;
1220 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1221
1222 /*
1223 * Even though we already know how many slices are supported
1224 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1225 * has magic side effects, and must be called after a reset.
1226 * It must be called prior to calling any RSS related cmds,
1227 * including assigning an interrupt queue for anything but
1228 * slice 0. It must also be called *after*
1229 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1230 * the firmware to compute offsets.
1231 */
1232
1233 if (sc->num_slices > 1) {
1234 /* ask the maximum number of slices it supports */
1235 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1236 &cmd);
1237 if (status != 0) {
1238 device_printf(sc->dev,
1239 "failed to get number of slices\n");
1240 return status;
1241 }
1242 /*
1243 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1244 * to setting up the interrupt queue DMA
1245 */
1246 cmd.data0 = sc->num_slices;
1247 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1248#ifdef IFNET_BUF_RING
1249 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1250#endif
1251 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1252 &cmd);
1253 if (status != 0) {
1254 device_printf(sc->dev,
1255 "failed to set number of slices\n");
1256 return status;
1257 }
1258 }
1259
1260
1261 if (interrupts_setup) {
1262 /* Now exchange information about interrupts */
1263 for (slice = 0; slice < sc->num_slices; slice++) {
1264 rx_done = &sc->ss[slice].rx_done;
1265 memset(rx_done->entry, 0, sc->rx_ring_size);
1266 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1267 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1268 cmd.data2 = slice;
1269 status |= mxge_send_cmd(sc,
1270 MXGEFW_CMD_SET_INTRQ_DMA,
1271 &cmd);
1272 }
1273 }
1274
1275 status |= mxge_send_cmd(sc,
1276 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1277
1278
1279 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1280
1281 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1282 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1283
1284
1285 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1286 &cmd);
1287 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1288 if (status != 0) {
1289 device_printf(sc->dev, "failed set interrupt parameters\n");
1290 return status;
1291 }
1292
1293
1294 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1295
1296
1297 /* run a DMA benchmark */
1298 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1299
1300 for (slice = 0; slice < sc->num_slices; slice++) {
1301 ss = &sc->ss[slice];
1302
1303 ss->irq_claim = irq_claim + (2 * slice);
1304 /* reset mcp/driver shared state back to 0 */
1305 ss->rx_done.idx = 0;
1306 ss->rx_done.cnt = 0;
1307 ss->tx.req = 0;
1308 ss->tx.done = 0;
1309 ss->tx.pkt_done = 0;
1310 ss->tx.queue_active = 0;
1311 ss->tx.activate = 0;
1312 ss->tx.deactivate = 0;
1313 ss->tx.wake = 0;
1314 ss->tx.defrag = 0;
1315 ss->tx.stall = 0;
1316 ss->rx_big.cnt = 0;
1317 ss->rx_small.cnt = 0;
1318 ss->lc.lro_bad_csum = 0;
1319 ss->lc.lro_queued = 0;
1320 ss->lc.lro_flushed = 0;
1321 if (ss->fw_stats != NULL) {
1322 bzero(ss->fw_stats, sizeof *ss->fw_stats);
1323 }
1324 }
1325 sc->rdma_tags_available = 15;
1326 status = mxge_update_mac_address(sc);
1327 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1328 mxge_change_pause(sc, sc->pause);
1329 mxge_set_multicast_list(sc);
1330 if (sc->throttle) {
1331 cmd.data0 = sc->throttle;
1332 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR,
1333 &cmd)) {
1334 device_printf(sc->dev,
1335 "can't enable throttle\n");
1336 }
1337 }
1338 return status;
1339}
1340
1341static int
1342mxge_change_throttle(SYSCTL_HANDLER_ARGS)
1343{
1344 mxge_cmd_t cmd;
1345 mxge_softc_t *sc;
1346 int err;
1347 unsigned int throttle;
1348
1349 sc = arg1;
1350 throttle = sc->throttle;
1351 err = sysctl_handle_int(oidp, &throttle, arg2, req);
1352 if (err != 0) {
1353 return err;
1354 }
1355
1356 if (throttle == sc->throttle)
1357 return 0;
1358
1359 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE)
1360 return EINVAL;
1361
1362 mtx_lock(&sc->driver_mtx);
1363 cmd.data0 = throttle;
1364 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd);
1365 if (err == 0)
1366 sc->throttle = throttle;
1367 mtx_unlock(&sc->driver_mtx);
1368 return err;
1369}
1370
1371static int
1372mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1373{
1374 mxge_softc_t *sc;
1375 unsigned int intr_coal_delay;
1376 int err;
1377
1378 sc = arg1;
1379 intr_coal_delay = sc->intr_coal_delay;
1380 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1381 if (err != 0) {
1382 return err;
1383 }
1384 if (intr_coal_delay == sc->intr_coal_delay)
1385 return 0;
1386
1387 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1388 return EINVAL;
1389
1390 mtx_lock(&sc->driver_mtx);
1391 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1392 sc->intr_coal_delay = intr_coal_delay;
1393
1394 mtx_unlock(&sc->driver_mtx);
1395 return err;
1396}
1397
1398static int
1399mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1400{
1401 mxge_softc_t *sc;
1402 unsigned int enabled;
1403 int err;
1404
1405 sc = arg1;
1406 enabled = sc->pause;
1407 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1408 if (err != 0) {
1409 return err;
1410 }
1411 if (enabled == sc->pause)
1412 return 0;
1413
1414 mtx_lock(&sc->driver_mtx);
1415 err = mxge_change_pause(sc, enabled);
1416 mtx_unlock(&sc->driver_mtx);
1417 return err;
1418}
1419
1420static int
1421mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1422{
1423 int err;
1424
1425 if (arg1 == NULL)
1426 return EFAULT;
1427 arg2 = be32toh(*(int *)arg1);
1428 arg1 = NULL;
1429 err = sysctl_handle_int(oidp, arg1, arg2, req);
1430
1431 return err;
1432}
1433
1434static void
1435mxge_rem_sysctls(mxge_softc_t *sc)
1436{
1437 struct mxge_slice_state *ss;
1438 int slice;
1439
1440 if (sc->slice_sysctl_tree == NULL)
1441 return;
1442
1443 for (slice = 0; slice < sc->num_slices; slice++) {
1444 ss = &sc->ss[slice];
1445 if (ss == NULL || ss->sysctl_tree == NULL)
1446 continue;
1447 sysctl_ctx_free(&ss->sysctl_ctx);
1448 ss->sysctl_tree = NULL;
1449 }
1450 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1451 sc->slice_sysctl_tree = NULL;
1452}
1453
1454static void
1455mxge_add_sysctls(mxge_softc_t *sc)
1456{
1457 struct sysctl_ctx_list *ctx;
1458 struct sysctl_oid_list *children;
1459 mcp_irq_data_t *fw;
1460 struct mxge_slice_state *ss;
1461 int slice;
1462 char slice_num[8];
1463
1464 ctx = device_get_sysctl_ctx(sc->dev);
1465 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1466 fw = sc->ss[0].fw_stats;
1467
1468 /* random information */
1469 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1470 "firmware_version",
1471 CTLFLAG_RD, sc->fw_version,
1472 0, "firmware version");
1473 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1474 "serial_number",
1475 CTLFLAG_RD, sc->serial_number_string,
1476 0, "serial number");
1477 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1478 "product_code",
1479 CTLFLAG_RD, sc->product_code_string,
1480 0, "product_code");
1481 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1482 "pcie_link_width",
1483 CTLFLAG_RD, &sc->link_width,
1484 0, "tx_boundary");
1485 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1486 "tx_boundary",
1487 CTLFLAG_RD, &sc->tx_boundary,
1488 0, "tx_boundary");
1489 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1490 "write_combine",
1491 CTLFLAG_RD, &sc->wc,
1492 0, "write combining PIO?");
1493 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1494 "read_dma_MBs",
1495 CTLFLAG_RD, &sc->read_dma,
1496 0, "DMA Read speed in MB/s");
1497 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1498 "write_dma_MBs",
1499 CTLFLAG_RD, &sc->write_dma,
1500 0, "DMA Write speed in MB/s");
1501 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1502 "read_write_dma_MBs",
1503 CTLFLAG_RD, &sc->read_write_dma,
1504 0, "DMA concurrent Read/Write speed in MB/s");
1505 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1506 "watchdog_resets",
1507 CTLFLAG_RD, &sc->watchdog_resets,
1508 0, "Number of times NIC was reset");
1509
1510
1511 /* performance related tunables */
1512 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1513 "intr_coal_delay",
1514 CTLTYPE_INT|CTLFLAG_RW, sc,
1515 0, mxge_change_intr_coal,
1516 "I", "interrupt coalescing delay in usecs");
1517
1518 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1519 "throttle",
1520 CTLTYPE_INT|CTLFLAG_RW, sc,
1521 0, mxge_change_throttle,
1522 "I", "transmit throttling");
1523
1524 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1525 "flow_control_enabled",
1526 CTLTYPE_INT|CTLFLAG_RW, sc,
1527 0, mxge_change_flow_control,
1528 "I", "interrupt coalescing delay in usecs");
1529
1530 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1531 "deassert_wait",
1532 CTLFLAG_RW, &mxge_deassert_wait,
1533 0, "Wait for IRQ line to go low in ihandler");
1534
1535 /* stats block from firmware is in network byte order.
1536 Need to swap it */
1537 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1538 "link_up",
1539 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1540 0, mxge_handle_be32,
1541 "I", "link up");
1542 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1543 "rdma_tags_available",
1544 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1545 0, mxge_handle_be32,
1546 "I", "rdma_tags_available");
1547 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1548 "dropped_bad_crc32",
1549 CTLTYPE_INT|CTLFLAG_RD,
1550 &fw->dropped_bad_crc32,
1551 0, mxge_handle_be32,
1552 "I", "dropped_bad_crc32");
1553 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1554 "dropped_bad_phy",
1555 CTLTYPE_INT|CTLFLAG_RD,
1556 &fw->dropped_bad_phy,
1557 0, mxge_handle_be32,
1558 "I", "dropped_bad_phy");
1559 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1560 "dropped_link_error_or_filtered",
1561 CTLTYPE_INT|CTLFLAG_RD,
1562 &fw->dropped_link_error_or_filtered,
1563 0, mxge_handle_be32,
1564 "I", "dropped_link_error_or_filtered");
1565 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1566 "dropped_link_overflow",
1567 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1568 0, mxge_handle_be32,
1569 "I", "dropped_link_overflow");
1570 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1571 "dropped_multicast_filtered",
1572 CTLTYPE_INT|CTLFLAG_RD,
1573 &fw->dropped_multicast_filtered,
1574 0, mxge_handle_be32,
1575 "I", "dropped_multicast_filtered");
1576 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1577 "dropped_no_big_buffer",
1578 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1579 0, mxge_handle_be32,
1580 "I", "dropped_no_big_buffer");
1581 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1582 "dropped_no_small_buffer",
1583 CTLTYPE_INT|CTLFLAG_RD,
1584 &fw->dropped_no_small_buffer,
1585 0, mxge_handle_be32,
1586 "I", "dropped_no_small_buffer");
1587 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1588 "dropped_overrun",
1589 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1590 0, mxge_handle_be32,
1591 "I", "dropped_overrun");
1592 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1593 "dropped_pause",
1594 CTLTYPE_INT|CTLFLAG_RD,
1595 &fw->dropped_pause,
1596 0, mxge_handle_be32,
1597 "I", "dropped_pause");
1598 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1599 "dropped_runt",
1600 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1601 0, mxge_handle_be32,
1602 "I", "dropped_runt");
1603
1604 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1605 "dropped_unicast_filtered",
1606 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1607 0, mxge_handle_be32,
1608 "I", "dropped_unicast_filtered");
1609
1610 /* verbose printing? */
1611 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1612 "verbose",
1613 CTLFLAG_RW, &mxge_verbose,
1614 0, "verbose printing");
1615
1616 /* add counters exported for debugging from all slices */
1617 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1618 sc->slice_sysctl_tree =
1619 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1620 "slice", CTLFLAG_RD, 0, "");
1621
1622 for (slice = 0; slice < sc->num_slices; slice++) {
1623 ss = &sc->ss[slice];
1624 sysctl_ctx_init(&ss->sysctl_ctx);
1625 ctx = &ss->sysctl_ctx;
1626 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1627 sprintf(slice_num, "%d", slice);
1628 ss->sysctl_tree =
1629 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1630 CTLFLAG_RD, 0, "");
1631 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1632 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1633 "rx_small_cnt",
1634 CTLFLAG_RD, &ss->rx_small.cnt,
1635 0, "rx_small_cnt");
1636 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1637 "rx_big_cnt",
1638 CTLFLAG_RD, &ss->rx_big.cnt,
1639 0, "rx_small_cnt");
1640 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1640 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1641 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed,
1642 0, "number of lro merge queues flushed");
1643
1644 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1644 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1645 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum,
1646 0, "number of bad csums preventing LRO");
1647
1648 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1648 SYSCTL_ADD_U64(ctx, children, OID_AUTO,
1649 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued,
1650 0, "number of frames appended to lro merge"
1651 "queues");
1652
1653#ifndef IFNET_BUF_RING
1654 /* only transmit from slice 0 for now */
1655 if (slice > 0)
1656 continue;
1657#endif
1658 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1659 "tx_req",
1660 CTLFLAG_RD, &ss->tx.req,
1661 0, "tx_req");
1662
1663 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1664 "tx_done",
1665 CTLFLAG_RD, &ss->tx.done,
1666 0, "tx_done");
1667 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1668 "tx_pkt_done",
1669 CTLFLAG_RD, &ss->tx.pkt_done,
1670 0, "tx_done");
1671 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1672 "tx_stall",
1673 CTLFLAG_RD, &ss->tx.stall,
1674 0, "tx_stall");
1675 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1676 "tx_wake",
1677 CTLFLAG_RD, &ss->tx.wake,
1678 0, "tx_wake");
1679 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1680 "tx_defrag",
1681 CTLFLAG_RD, &ss->tx.defrag,
1682 0, "tx_defrag");
1683 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1684 "tx_queue_active",
1685 CTLFLAG_RD, &ss->tx.queue_active,
1686 0, "tx_queue_active");
1687 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1688 "tx_activate",
1689 CTLFLAG_RD, &ss->tx.activate,
1690 0, "tx_activate");
1691 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1692 "tx_deactivate",
1693 CTLFLAG_RD, &ss->tx.deactivate,
1694 0, "tx_deactivate");
1695 }
1696}
1697
1698/* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1699 backwards one at a time and handle ring wraps */
1700
1701static inline void
1702mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1703 mcp_kreq_ether_send_t *src, int cnt)
1704{
1705 int idx, starting_slot;
1706 starting_slot = tx->req;
1707 while (cnt > 1) {
1708 cnt--;
1709 idx = (starting_slot + cnt) & tx->mask;
1710 mxge_pio_copy(&tx->lanai[idx],
1711 &src[cnt], sizeof(*src));
1712 wmb();
1713 }
1714}
1715
1716/*
1717 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1718 * at most 32 bytes at a time, so as to avoid involving the software
1719 * pio handler in the nic. We re-write the first segment's flags
1720 * to mark them valid only after writing the entire chain
1721 */
1722
1723static inline void
1724mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1725 int cnt)
1726{
1727 int idx, i;
1728 uint32_t *src_ints;
1729 volatile uint32_t *dst_ints;
1730 mcp_kreq_ether_send_t *srcp;
1731 volatile mcp_kreq_ether_send_t *dstp, *dst;
1732 uint8_t last_flags;
1733
1734 idx = tx->req & tx->mask;
1735
1736 last_flags = src->flags;
1737 src->flags = 0;
1738 wmb();
1739 dst = dstp = &tx->lanai[idx];
1740 srcp = src;
1741
1742 if ((idx + cnt) < tx->mask) {
1743 for (i = 0; i < (cnt - 1); i += 2) {
1744 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1745 wmb(); /* force write every 32 bytes */
1746 srcp += 2;
1747 dstp += 2;
1748 }
1749 } else {
1750 /* submit all but the first request, and ensure
1751 that it is submitted below */
1752 mxge_submit_req_backwards(tx, src, cnt);
1753 i = 0;
1754 }
1755 if (i < cnt) {
1756 /* submit the first request */
1757 mxge_pio_copy(dstp, srcp, sizeof(*src));
1758 wmb(); /* barrier before setting valid flag */
1759 }
1760
1761 /* re-write the last 32-bits with the valid flags */
1762 src->flags = last_flags;
1763 src_ints = (uint32_t *)src;
1764 src_ints+=3;
1765 dst_ints = (volatile uint32_t *)dst;
1766 dst_ints+=3;
1767 *dst_ints = *src_ints;
1768 tx->req += cnt;
1769 wmb();
1770}
1771
1772static int
1773mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m,
1774 struct mxge_pkt_info *pi)
1775{
1776 struct ether_vlan_header *eh;
1777 uint16_t etype;
1778 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO);
1779#if IFCAP_TSO6 && defined(INET6)
1780 int nxt;
1781#endif
1782
1783 eh = mtod(m, struct ether_vlan_header *);
1784 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1785 etype = ntohs(eh->evl_proto);
1786 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1787 } else {
1788 etype = ntohs(eh->evl_encap_proto);
1789 pi->ip_off = ETHER_HDR_LEN;
1790 }
1791
1792 switch (etype) {
1793 case ETHERTYPE_IP:
1794 /*
1795 * ensure ip header is in first mbuf, copy it to a
1796 * scratch buffer if not
1797 */
1798 pi->ip = (struct ip *)(m->m_data + pi->ip_off);
1799 pi->ip6 = NULL;
1800 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) {
1801 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip),
1802 ss->scratch);
1803 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1804 }
1805 pi->ip_hlen = pi->ip->ip_hl << 2;
1806 if (!tso)
1807 return 0;
1808
1809 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1810 sizeof(struct tcphdr))) {
1811 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1812 sizeof(struct tcphdr), ss->scratch);
1813 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1814 }
1815 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen);
1816 break;
1817#if IFCAP_TSO6 && defined(INET6)
1818 case ETHERTYPE_IPV6:
1819 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off);
1820 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) {
1821 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6),
1822 ss->scratch);
1823 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1824 }
1825 nxt = 0;
1826 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt);
1827 pi->ip_hlen -= pi->ip_off;
1828 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
1829 return EINVAL;
1830
1831 if (!tso)
1832 return 0;
1833
1834 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen)
1835 return EINVAL;
1836
1837 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1838 sizeof(struct tcphdr))) {
1839 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1840 sizeof(struct tcphdr), ss->scratch);
1841 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1842 }
1843 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen);
1844 break;
1845#endif
1846 default:
1847 return EINVAL;
1848 }
1849 return 0;
1850}
1851
1852#if IFCAP_TSO4
1853
1854static void
1855mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1856 int busdma_seg_cnt, struct mxge_pkt_info *pi)
1857{
1858 mxge_tx_ring_t *tx;
1859 mcp_kreq_ether_send_t *req;
1860 bus_dma_segment_t *seg;
1861 uint32_t low, high_swapped;
1862 int len, seglen, cum_len, cum_len_next;
1863 int next_is_first, chop, cnt, rdma_count, small;
1864 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum;
1865 uint8_t flags, flags_next;
1866 static int once;
1867
1868 mss = m->m_pkthdr.tso_segsz;
1869
1870 /* negative cum_len signifies to the
1871 * send loop that we are still in the
1872 * header portion of the TSO packet.
1873 */
1874
1875 cksum_offset = pi->ip_off + pi->ip_hlen;
1876 cum_len = -(cksum_offset + (pi->tcp->th_off << 2));
1877
1878 /* TSO implies checksum offload on this hardware */
1879 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) {
1880 /*
1881 * If packet has full TCP csum, replace it with pseudo hdr
1882 * sum that the NIC expects, otherwise the NIC will emit
1883 * packets with bad TCP checksums.
1884 */
1885 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
1886 if (pi->ip6) {
1887#if (CSUM_TCP_IPV6 != 0) && defined(INET6)
1888 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
1889 sum = in6_cksum_pseudo(pi->ip6,
1890 m->m_pkthdr.len - cksum_offset,
1891 IPPROTO_TCP, 0);
1892#endif
1893 } else {
1894#ifdef INET
1895 m->m_pkthdr.csum_flags |= CSUM_TCP;
1896 sum = in_pseudo(pi->ip->ip_src.s_addr,
1897 pi->ip->ip_dst.s_addr,
1898 htons(IPPROTO_TCP + (m->m_pkthdr.len -
1899 cksum_offset)));
1900#endif
1901 }
1902 m_copyback(m, offsetof(struct tcphdr, th_sum) +
1903 cksum_offset, sizeof(sum), (caddr_t)&sum);
1904 }
1905 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1906
1907
1908 /* for TSO, pseudo_hdr_offset holds mss.
1909 * The firmware figures out where to put
1910 * the checksum by parsing the header. */
1911 pseudo_hdr_offset = htobe16(mss);
1912
1913 if (pi->ip6) {
1914 /*
1915 * for IPv6 TSO, the "checksum offset" is re-purposed
1916 * to store the TCP header len
1917 */
1918 cksum_offset = (pi->tcp->th_off << 2);
1919 }
1920
1921 tx = &ss->tx;
1922 req = tx->req_list;
1923 seg = tx->seg_list;
1924 cnt = 0;
1925 rdma_count = 0;
1926 /* "rdma_count" is the number of RDMAs belonging to the
1927 * current packet BEFORE the current send request. For
1928 * non-TSO packets, this is equal to "count".
1929 * For TSO packets, rdma_count needs to be reset
1930 * to 0 after a segment cut.
1931 *
1932 * The rdma_count field of the send request is
1933 * the number of RDMAs of the packet starting at
1934 * that request. For TSO send requests with one ore more cuts
1935 * in the middle, this is the number of RDMAs starting
1936 * after the last cut in the request. All previous
1937 * segments before the last cut implicitly have 1 RDMA.
1938 *
1939 * Since the number of RDMAs is not known beforehand,
1940 * it must be filled-in retroactively - after each
1941 * segmentation cut or at the end of the entire packet.
1942 */
1943
1944 while (busdma_seg_cnt) {
1945 /* Break the busdma segment up into pieces*/
1946 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1947 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1948 len = seg->ds_len;
1949
1950 while (len) {
1951 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1952 seglen = len;
1953 cum_len_next = cum_len + seglen;
1954 (req-rdma_count)->rdma_count = rdma_count + 1;
1955 if (__predict_true(cum_len >= 0)) {
1956 /* payload */
1957 chop = (cum_len_next > mss);
1958 cum_len_next = cum_len_next % mss;
1959 next_is_first = (cum_len_next == 0);
1960 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1961 flags_next |= next_is_first *
1962 MXGEFW_FLAGS_FIRST;
1963 rdma_count |= -(chop | next_is_first);
1964 rdma_count += chop & !next_is_first;
1965 } else if (cum_len_next >= 0) {
1966 /* header ends */
1967 rdma_count = -1;
1968 cum_len_next = 0;
1969 seglen = -cum_len;
1970 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1971 flags_next = MXGEFW_FLAGS_TSO_PLD |
1972 MXGEFW_FLAGS_FIRST |
1973 (small * MXGEFW_FLAGS_SMALL);
1974 }
1975
1976 req->addr_high = high_swapped;
1977 req->addr_low = htobe32(low);
1978 req->pseudo_hdr_offset = pseudo_hdr_offset;
1979 req->pad = 0;
1980 req->rdma_count = 1;
1981 req->length = htobe16(seglen);
1982 req->cksum_offset = cksum_offset;
1983 req->flags = flags | ((cum_len & 1) *
1984 MXGEFW_FLAGS_ALIGN_ODD);
1985 low += seglen;
1986 len -= seglen;
1987 cum_len = cum_len_next;
1988 flags = flags_next;
1989 req++;
1990 cnt++;
1991 rdma_count++;
1992 if (cksum_offset != 0 && !pi->ip6) {
1993 if (__predict_false(cksum_offset > seglen))
1994 cksum_offset -= seglen;
1995 else
1996 cksum_offset = 0;
1997 }
1998 if (__predict_false(cnt > tx->max_desc))
1999 goto drop;
2000 }
2001 busdma_seg_cnt--;
2002 seg++;
2003 }
2004 (req-rdma_count)->rdma_count = rdma_count;
2005
2006 do {
2007 req--;
2008 req->flags |= MXGEFW_FLAGS_TSO_LAST;
2009 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
2010
2011 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2012 mxge_submit_req(tx, tx->req_list, cnt);
2013#ifdef IFNET_BUF_RING
2014 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2015 /* tell the NIC to start polling this slice */
2016 *tx->send_go = 1;
2017 tx->queue_active = 1;
2018 tx->activate++;
2019 wmb();
2020 }
2021#endif
2022 return;
2023
2024drop:
2025 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
2026 m_freem(m);
2027 ss->oerrors++;
2028 if (!once) {
2029 printf("tx->max_desc exceeded via TSO!\n");
2030 printf("mss = %d, %ld, %d!\n", mss,
2031 (long)seg - (long)tx->seg_list, tx->max_desc);
2032 once = 1;
2033 }
2034 return;
2035
2036}
2037
2038#endif /* IFCAP_TSO4 */
2039
2040#ifdef MXGE_NEW_VLAN_API
2041/*
2042 * We reproduce the software vlan tag insertion from
2043 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
2044 * vlan tag insertion. We need to advertise this in order to have the
2045 * vlan interface respect our csum offload flags.
2046 */
2047static struct mbuf *
2048mxge_vlan_tag_insert(struct mbuf *m)
2049{
2050 struct ether_vlan_header *evl;
2051
2052 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
2053 if (__predict_false(m == NULL))
2054 return NULL;
2055 if (m->m_len < sizeof(*evl)) {
2056 m = m_pullup(m, sizeof(*evl));
2057 if (__predict_false(m == NULL))
2058 return NULL;
2059 }
2060 /*
2061 * Transform the Ethernet header into an Ethernet header
2062 * with 802.1Q encapsulation.
2063 */
2064 evl = mtod(m, struct ether_vlan_header *);
2065 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
2066 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
2067 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
2068 evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
2069 m->m_flags &= ~M_VLANTAG;
2070 return m;
2071}
2072#endif /* MXGE_NEW_VLAN_API */
2073
2074static void
2075mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
2076{
2077 struct mxge_pkt_info pi = {0,0,0,0};
2078 mxge_softc_t *sc;
2079 mcp_kreq_ether_send_t *req;
2080 bus_dma_segment_t *seg;
2081 struct mbuf *m_tmp;
2082 struct ifnet *ifp;
2083 mxge_tx_ring_t *tx;
2084 int cnt, cum_len, err, i, idx, odd_flag;
2085 uint16_t pseudo_hdr_offset;
2086 uint8_t flags, cksum_offset;
2087
2088
2089 sc = ss->sc;
2090 ifp = sc->ifp;
2091 tx = &ss->tx;
2092
2093#ifdef MXGE_NEW_VLAN_API
2094 if (m->m_flags & M_VLANTAG) {
2095 m = mxge_vlan_tag_insert(m);
2096 if (__predict_false(m == NULL))
2097 goto drop_without_m;
2098 }
2099#endif
2100 if (m->m_pkthdr.csum_flags &
2101 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2102 if (mxge_parse_tx(ss, m, &pi))
2103 goto drop;
2104 }
2105
2106 /* (try to) map the frame for DMA */
2107 idx = tx->req & tx->mask;
2108 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
2109 m, tx->seg_list, &cnt,
2110 BUS_DMA_NOWAIT);
2111 if (__predict_false(err == EFBIG)) {
2112 /* Too many segments in the chain. Try
2113 to defrag */
2114 m_tmp = m_defrag(m, M_NOWAIT);
2115 if (m_tmp == NULL) {
2116 goto drop;
2117 }
2118 ss->tx.defrag++;
2119 m = m_tmp;
2120 err = bus_dmamap_load_mbuf_sg(tx->dmat,
2121 tx->info[idx].map,
2122 m, tx->seg_list, &cnt,
2123 BUS_DMA_NOWAIT);
2124 }
2125 if (__predict_false(err != 0)) {
2126 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
2127 " packet len = %d\n", err, m->m_pkthdr.len);
2128 goto drop;
2129 }
2130 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2131 BUS_DMASYNC_PREWRITE);
2132 tx->info[idx].m = m;
2133
2134#if IFCAP_TSO4
2135 /* TSO is different enough, we handle it in another routine */
2136 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2137 mxge_encap_tso(ss, m, cnt, &pi);
2138 return;
2139 }
2140#endif
2141
2142 req = tx->req_list;
2143 cksum_offset = 0;
2144 pseudo_hdr_offset = 0;
2145 flags = MXGEFW_FLAGS_NO_TSO;
2146
2147 /* checksum offloading? */
2148 if (m->m_pkthdr.csum_flags &
2149 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2150 /* ensure ip header is in first mbuf, copy
2151 it to a scratch buffer if not */
2152 cksum_offset = pi.ip_off + pi.ip_hlen;
2153 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2154 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2155 req->cksum_offset = cksum_offset;
2156 flags |= MXGEFW_FLAGS_CKSUM;
2157 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2158 } else {
2159 odd_flag = 0;
2160 }
2161 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2162 flags |= MXGEFW_FLAGS_SMALL;
2163
2164 /* convert segments into a request list */
2165 cum_len = 0;
2166 seg = tx->seg_list;
2167 req->flags = MXGEFW_FLAGS_FIRST;
2168 for (i = 0; i < cnt; i++) {
2169 req->addr_low =
2170 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2171 req->addr_high =
2172 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2173 req->length = htobe16(seg->ds_len);
2174 req->cksum_offset = cksum_offset;
2175 if (cksum_offset > seg->ds_len)
2176 cksum_offset -= seg->ds_len;
2177 else
2178 cksum_offset = 0;
2179 req->pseudo_hdr_offset = pseudo_hdr_offset;
2180 req->pad = 0; /* complete solid 16-byte block */
2181 req->rdma_count = 1;
2182 req->flags |= flags | ((cum_len & 1) * odd_flag);
2183 cum_len += seg->ds_len;
2184 seg++;
2185 req++;
2186 req->flags = 0;
2187 }
2188 req--;
2189 /* pad runts to 60 bytes */
2190 if (cum_len < 60) {
2191 req++;
2192 req->addr_low =
2193 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2194 req->addr_high =
2195 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2196 req->length = htobe16(60 - cum_len);
2197 req->cksum_offset = 0;
2198 req->pseudo_hdr_offset = pseudo_hdr_offset;
2199 req->pad = 0; /* complete solid 16-byte block */
2200 req->rdma_count = 1;
2201 req->flags |= flags | ((cum_len & 1) * odd_flag);
2202 cnt++;
2203 }
2204
2205 tx->req_list[0].rdma_count = cnt;
2206#if 0
2207 /* print what the firmware will see */
2208 for (i = 0; i < cnt; i++) {
2209 printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2210 "cso:%d, flags:0x%x, rdma:%d\n",
2211 i, (int)ntohl(tx->req_list[i].addr_high),
2212 (int)ntohl(tx->req_list[i].addr_low),
2213 (int)ntohs(tx->req_list[i].length),
2214 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2215 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2216 tx->req_list[i].rdma_count);
2217 }
2218 printf("--------------\n");
2219#endif
2220 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2221 mxge_submit_req(tx, tx->req_list, cnt);
2222#ifdef IFNET_BUF_RING
2223 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2224 /* tell the NIC to start polling this slice */
2225 *tx->send_go = 1;
2226 tx->queue_active = 1;
2227 tx->activate++;
2228 wmb();
2229 }
2230#endif
2231 return;
2232
2233drop:
2234 m_freem(m);
2235drop_without_m:
2236 ss->oerrors++;
2237 return;
2238}
2239
2240#ifdef IFNET_BUF_RING
2241static void
2242mxge_qflush(struct ifnet *ifp)
2243{
2244 mxge_softc_t *sc = ifp->if_softc;
2245 mxge_tx_ring_t *tx;
2246 struct mbuf *m;
2247 int slice;
2248
2249 for (slice = 0; slice < sc->num_slices; slice++) {
2250 tx = &sc->ss[slice].tx;
2251 mtx_lock(&tx->mtx);
2252 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2253 m_freem(m);
2254 mtx_unlock(&tx->mtx);
2255 }
2256 if_qflush(ifp);
2257}
2258
2259static inline void
2260mxge_start_locked(struct mxge_slice_state *ss)
2261{
2262 mxge_softc_t *sc;
2263 struct mbuf *m;
2264 struct ifnet *ifp;
2265 mxge_tx_ring_t *tx;
2266
2267 sc = ss->sc;
2268 ifp = sc->ifp;
2269 tx = &ss->tx;
2270
2271 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2272 m = drbr_dequeue(ifp, tx->br);
2273 if (m == NULL) {
2274 return;
2275 }
2276 /* let BPF see it */
2277 BPF_MTAP(ifp, m);
2278
2279 /* give it to the nic */
2280 mxge_encap(ss, m);
2281 }
2282 /* ran out of transmit slots */
2283 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0)
2284 && (!drbr_empty(ifp, tx->br))) {
2285 ss->if_drv_flags |= IFF_DRV_OACTIVE;
2286 tx->stall++;
2287 }
2288}
2289
2290static int
2291mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2292{
2293 mxge_softc_t *sc;
2294 struct ifnet *ifp;
2295 mxge_tx_ring_t *tx;
2296 int err;
2297
2298 sc = ss->sc;
2299 ifp = sc->ifp;
2300 tx = &ss->tx;
2301
2302 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
2303 IFF_DRV_RUNNING) {
2304 err = drbr_enqueue(ifp, tx->br, m);
2305 return (err);
2306 }
2307
2308 if (!drbr_needs_enqueue(ifp, tx->br) &&
2309 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2310 /* let BPF see it */
2311 BPF_MTAP(ifp, m);
2312 /* give it to the nic */
2313 mxge_encap(ss, m);
2314 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2315 return (err);
2316 }
2317 if (!drbr_empty(ifp, tx->br))
2318 mxge_start_locked(ss);
2319 return (0);
2320}
2321
2322static int
2323mxge_transmit(struct ifnet *ifp, struct mbuf *m)
2324{
2325 mxge_softc_t *sc = ifp->if_softc;
2326 struct mxge_slice_state *ss;
2327 mxge_tx_ring_t *tx;
2328 int err = 0;
2329 int slice;
2330
2331 slice = m->m_pkthdr.flowid;
2332 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2333
2334 ss = &sc->ss[slice];
2335 tx = &ss->tx;
2336
2337 if (mtx_trylock(&tx->mtx)) {
2338 err = mxge_transmit_locked(ss, m);
2339 mtx_unlock(&tx->mtx);
2340 } else {
2341 err = drbr_enqueue(ifp, tx->br, m);
2342 }
2343
2344 return (err);
2345}
2346
2347#else
2348
2349static inline void
2350mxge_start_locked(struct mxge_slice_state *ss)
2351{
2352 mxge_softc_t *sc;
2353 struct mbuf *m;
2354 struct ifnet *ifp;
2355 mxge_tx_ring_t *tx;
2356
2357 sc = ss->sc;
2358 ifp = sc->ifp;
2359 tx = &ss->tx;
2360 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2361 IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
2362 if (m == NULL) {
2363 return;
2364 }
2365 /* let BPF see it */
2366 BPF_MTAP(ifp, m);
2367
2368 /* give it to the nic */
2369 mxge_encap(ss, m);
2370 }
2371 /* ran out of transmit slots */
2372 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
2373 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2374 tx->stall++;
2375 }
2376}
2377#endif
2378static void
2379mxge_start(struct ifnet *ifp)
2380{
2381 mxge_softc_t *sc = ifp->if_softc;
2382 struct mxge_slice_state *ss;
2383
2384 /* only use the first slice for now */
2385 ss = &sc->ss[0];
2386 mtx_lock(&ss->tx.mtx);
2387 mxge_start_locked(ss);
2388 mtx_unlock(&ss->tx.mtx);
2389}
2390
2391/*
2392 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2393 * at most 32 bytes at a time, so as to avoid involving the software
2394 * pio handler in the nic. We re-write the first segment's low
2395 * DMA address to mark it valid only after we write the entire chunk
2396 * in a burst
2397 */
2398static inline void
2399mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2400 mcp_kreq_ether_recv_t *src)
2401{
2402 uint32_t low;
2403
2404 low = src->addr_low;
2405 src->addr_low = 0xffffffff;
2406 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2407 wmb();
2408 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2409 wmb();
2410 src->addr_low = low;
2411 dst->addr_low = low;
2412 wmb();
2413}
2414
2415static int
2416mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2417{
2418 bus_dma_segment_t seg;
2419 struct mbuf *m;
2420 mxge_rx_ring_t *rx = &ss->rx_small;
2421 int cnt, err;
2422
2423 m = m_gethdr(M_NOWAIT, MT_DATA);
2424 if (m == NULL) {
2425 rx->alloc_fail++;
2426 err = ENOBUFS;
2427 goto done;
2428 }
2429 m->m_len = MHLEN;
2430 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2431 &seg, &cnt, BUS_DMA_NOWAIT);
2432 if (err != 0) {
2433 m_free(m);
2434 goto done;
2435 }
2436 rx->info[idx].m = m;
2437 rx->shadow[idx].addr_low =
2438 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2439 rx->shadow[idx].addr_high =
2440 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2441
2442done:
2443 if ((idx & 7) == 7)
2444 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2445 return err;
2446}
2447
2448static int
2449mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2450{
2451 bus_dma_segment_t seg[3];
2452 struct mbuf *m;
2453 mxge_rx_ring_t *rx = &ss->rx_big;
2454 int cnt, err, i;
2455
2456 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2457 if (m == NULL) {
2458 rx->alloc_fail++;
2459 err = ENOBUFS;
2460 goto done;
2461 }
2462 m->m_len = rx->mlen;
2463 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2464 seg, &cnt, BUS_DMA_NOWAIT);
2465 if (err != 0) {
2466 m_free(m);
2467 goto done;
2468 }
2469 rx->info[idx].m = m;
2470 rx->shadow[idx].addr_low =
2471 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2472 rx->shadow[idx].addr_high =
2473 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2474
2475#if MXGE_VIRT_JUMBOS
2476 for (i = 1; i < cnt; i++) {
2477 rx->shadow[idx + i].addr_low =
2478 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2479 rx->shadow[idx + i].addr_high =
2480 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2481 }
2482#endif
2483
2484done:
2485 for (i = 0; i < rx->nbufs; i++) {
2486 if ((idx & 7) == 7) {
2487 mxge_submit_8rx(&rx->lanai[idx - 7],
2488 &rx->shadow[idx - 7]);
2489 }
2490 idx++;
2491 }
2492 return err;
2493}
2494
2495#ifdef INET6
2496
2497static uint16_t
2498mxge_csum_generic(uint16_t *raw, int len)
2499{
2500 uint32_t csum;
2501
2502
2503 csum = 0;
2504 while (len > 0) {
2505 csum += *raw;
2506 raw++;
2507 len -= 2;
2508 }
2509 csum = (csum >> 16) + (csum & 0xffff);
2510 csum = (csum >> 16) + (csum & 0xffff);
2511 return (uint16_t)csum;
2512}
2513
2514static inline uint16_t
2515mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum)
2516{
2517 uint32_t partial;
2518 int nxt, cksum_offset;
2519 struct ip6_hdr *ip6 = p;
2520 uint16_t c;
2521
2522 nxt = ip6->ip6_nxt;
2523 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN;
2524 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) {
2525 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN,
2526 IPPROTO_IPV6, &nxt);
2527 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
2528 return (1);
2529 }
2530
2531 /*
2532 * IPv6 headers do not contain a checksum, and hence
2533 * do not checksum to zero, so they don't "fall out"
2534 * of the partial checksum calculation like IPv4
2535 * headers do. We need to fix the partial checksum by
2536 * subtracting the checksum of the IPv6 header.
2537 */
2538
2539 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset -
2540 ETHER_HDR_LEN);
2541 csum += ~partial;
2542 csum += (csum < ~partial);
2543 csum = (csum >> 16) + (csum & 0xFFFF);
2544 csum = (csum >> 16) + (csum & 0xFFFF);
2545 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt,
2546 csum);
2547 c ^= 0xffff;
2548 return (c);
2549}
2550#endif /* INET6 */
2551/*
2552 * Myri10GE hardware checksums are not valid if the sender
2553 * padded the frame with non-zero padding. This is because
2554 * the firmware just does a simple 16-bit 1s complement
2555 * checksum across the entire frame, excluding the first 14
2556 * bytes. It is best to simply to check the checksum and
2557 * tell the stack about it only if the checksum is good
2558 */
2559
2560static inline uint16_t
2561mxge_rx_csum(struct mbuf *m, int csum)
2562{
2563 struct ether_header *eh;
2564#ifdef INET
2565 struct ip *ip;
2566#endif
2567#if defined(INET) || defined(INET6)
2568 int cap = m->m_pkthdr.rcvif->if_capenable;
2569#endif
2570 uint16_t c, etype;
2571
2572
2573 eh = mtod(m, struct ether_header *);
2574 etype = ntohs(eh->ether_type);
2575 switch (etype) {
2576#ifdef INET
2577 case ETHERTYPE_IP:
2578 if ((cap & IFCAP_RXCSUM) == 0)
2579 return (1);
2580 ip = (struct ip *)(eh + 1);
2581 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)
2582 return (1);
2583 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2584 htonl(ntohs(csum) + ntohs(ip->ip_len) -
2585 (ip->ip_hl << 2) + ip->ip_p));
2586 c ^= 0xffff;
2587 break;
2588#endif
2589#ifdef INET6
2590 case ETHERTYPE_IPV6:
2591 if ((cap & IFCAP_RXCSUM_IPV6) == 0)
2592 return (1);
2593 c = mxge_rx_csum6((eh + 1), m, csum);
2594 break;
2595#endif
2596 default:
2597 c = 1;
2598 }
2599 return (c);
2600}
2601
2602static void
2603mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2604{
2605 struct ether_vlan_header *evl;
2606 struct ether_header *eh;
2607 uint32_t partial;
2608
2609 evl = mtod(m, struct ether_vlan_header *);
2610 eh = mtod(m, struct ether_header *);
2611
2612 /*
2613 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2614 * after what the firmware thought was the end of the ethernet
2615 * header.
2616 */
2617
2618 /* put checksum into host byte order */
2619 *csum = ntohs(*csum);
2620 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2621 (*csum) += ~partial;
2622 (*csum) += ((*csum) < ~partial);
2623 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2624 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2625
2626 /* restore checksum to network byte order;
2627 later consumers expect this */
2628 *csum = htons(*csum);
2629
2630 /* save the tag */
2631#ifdef MXGE_NEW_VLAN_API
2632 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2633#else
2634 {
2635 struct m_tag *mtag;
2636 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2637 M_NOWAIT);
2638 if (mtag == NULL)
2639 return;
2640 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2641 m_tag_prepend(m, mtag);
2642 }
2643
2644#endif
2645 m->m_flags |= M_VLANTAG;
2646
2647 /*
2648 * Remove the 802.1q header by copying the Ethernet
2649 * addresses over it and adjusting the beginning of
2650 * the data in the mbuf. The encapsulated Ethernet
2651 * type field is already in place.
2652 */
2653 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2654 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2655 m_adj(m, ETHER_VLAN_ENCAP_LEN);
2656}
2657
2658
2659static inline void
2660mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len,
2661 uint32_t csum, int lro)
2662{
2663 mxge_softc_t *sc;
2664 struct ifnet *ifp;
2665 struct mbuf *m;
2666 struct ether_header *eh;
2667 mxge_rx_ring_t *rx;
2668 bus_dmamap_t old_map;
2669 int idx;
2670
2671 sc = ss->sc;
2672 ifp = sc->ifp;
2673 rx = &ss->rx_big;
2674 idx = rx->cnt & rx->mask;
2675 rx->cnt += rx->nbufs;
2676 /* save a pointer to the received mbuf */
2677 m = rx->info[idx].m;
2678 /* try to replace the received mbuf */
2679 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2680 /* drop the frame -- the old mbuf is re-cycled */
2681 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
2682 return;
2683 }
2684
2685 /* unmap the received buffer */
2686 old_map = rx->info[idx].map;
2687 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2688 bus_dmamap_unload(rx->dmat, old_map);
2689
2690 /* swap the bus_dmamap_t's */
2691 rx->info[idx].map = rx->extra_map;
2692 rx->extra_map = old_map;
2693
2694 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2695 * aligned */
2696 m->m_data += MXGEFW_PAD;
2697
2698 m->m_pkthdr.rcvif = ifp;
2699 m->m_len = m->m_pkthdr.len = len;
2700 ss->ipackets++;
2701 eh = mtod(m, struct ether_header *);
2702 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2703 mxge_vlan_tag_remove(m, &csum);
2704 }
2705 /* if the checksum is valid, mark it in the mbuf header */
2706
2707 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2708 (0 == mxge_rx_csum(m, csum))) {
2709 /* Tell the stack that the checksum is good */
2710 m->m_pkthdr.csum_data = 0xffff;
2711 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2712 CSUM_DATA_VALID;
2713
2714#if defined(INET) || defined (INET6)
2715 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0)))
2716 return;
2717#endif
2718 }
2719 /* flowid only valid if RSS hashing is enabled */
2720 if (sc->num_slices > 1) {
2721 m->m_pkthdr.flowid = (ss - sc->ss);
2722 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2723 }
2724 /* pass the frame up the stack */
2725 (*ifp->if_input)(ifp, m);
2726}
2727
2728static inline void
2729mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len,
2730 uint32_t csum, int lro)
2731{
2732 mxge_softc_t *sc;
2733 struct ifnet *ifp;
2734 struct ether_header *eh;
2735 struct mbuf *m;
2736 mxge_rx_ring_t *rx;
2737 bus_dmamap_t old_map;
2738 int idx;
2739
2740 sc = ss->sc;
2741 ifp = sc->ifp;
2742 rx = &ss->rx_small;
2743 idx = rx->cnt & rx->mask;
2744 rx->cnt++;
2745 /* save a pointer to the received mbuf */
2746 m = rx->info[idx].m;
2747 /* try to replace the received mbuf */
2748 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2749 /* drop the frame -- the old mbuf is re-cycled */
2750 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
2751 return;
2752 }
2753
2754 /* unmap the received buffer */
2755 old_map = rx->info[idx].map;
2756 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2757 bus_dmamap_unload(rx->dmat, old_map);
2758
2759 /* swap the bus_dmamap_t's */
2760 rx->info[idx].map = rx->extra_map;
2761 rx->extra_map = old_map;
2762
2763 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2764 * aligned */
2765 m->m_data += MXGEFW_PAD;
2766
2767 m->m_pkthdr.rcvif = ifp;
2768 m->m_len = m->m_pkthdr.len = len;
2769 ss->ipackets++;
2770 eh = mtod(m, struct ether_header *);
2771 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2772 mxge_vlan_tag_remove(m, &csum);
2773 }
2774 /* if the checksum is valid, mark it in the mbuf header */
2775 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2776 (0 == mxge_rx_csum(m, csum))) {
2777 /* Tell the stack that the checksum is good */
2778 m->m_pkthdr.csum_data = 0xffff;
2779 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2780 CSUM_DATA_VALID;
2781
2782#if defined(INET) || defined (INET6)
2783 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum)))
2784 return;
2785#endif
2786 }
2787 /* flowid only valid if RSS hashing is enabled */
2788 if (sc->num_slices > 1) {
2789 m->m_pkthdr.flowid = (ss - sc->ss);
2790 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2791 }
2792 /* pass the frame up the stack */
2793 (*ifp->if_input)(ifp, m);
2794}
2795
2796static inline void
2797mxge_clean_rx_done(struct mxge_slice_state *ss)
2798{
2799 mxge_rx_done_t *rx_done = &ss->rx_done;
2800 int limit = 0;
2801 uint16_t length;
2802 uint16_t checksum;
2803 int lro;
2804
2805 lro = ss->sc->ifp->if_capenable & IFCAP_LRO;
2806 while (rx_done->entry[rx_done->idx].length != 0) {
2807 length = ntohs(rx_done->entry[rx_done->idx].length);
2808 rx_done->entry[rx_done->idx].length = 0;
2809 checksum = rx_done->entry[rx_done->idx].checksum;
2810 if (length <= (MHLEN - MXGEFW_PAD))
2811 mxge_rx_done_small(ss, length, checksum, lro);
2812 else
2813 mxge_rx_done_big(ss, length, checksum, lro);
2814 rx_done->cnt++;
2815 rx_done->idx = rx_done->cnt & rx_done->mask;
2816
2817 /* limit potential for livelock */
2818 if (__predict_false(++limit > rx_done->mask / 2))
2819 break;
2820 }
2821#if defined(INET) || defined (INET6)
2822 while (!SLIST_EMPTY(&ss->lc.lro_active)) {
2823 struct lro_entry *lro = SLIST_FIRST(&ss->lc.lro_active);
2824 SLIST_REMOVE_HEAD(&ss->lc.lro_active, next);
2825 tcp_lro_flush(&ss->lc, lro);
2826 }
2827#endif
2828}
2829
2830
2831static inline void
2832mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2833{
2834 struct ifnet *ifp;
2835 mxge_tx_ring_t *tx;
2836 struct mbuf *m;
2837 bus_dmamap_t map;
2838 int idx;
2839 int *flags;
2840
2841 tx = &ss->tx;
2842 ifp = ss->sc->ifp;
2843 while (tx->pkt_done != mcp_idx) {
2844 idx = tx->done & tx->mask;
2845 tx->done++;
2846 m = tx->info[idx].m;
2847 /* mbuf and DMA map only attached to the first
2848 segment per-mbuf */
2849 if (m != NULL) {
2850 ss->obytes += m->m_pkthdr.len;
2851 if (m->m_flags & M_MCAST)
2852 ss->omcasts++;
2853 ss->opackets++;
2854 tx->info[idx].m = NULL;
2855 map = tx->info[idx].map;
2856 bus_dmamap_unload(tx->dmat, map);
2857 m_freem(m);
2858 }
2859 if (tx->info[idx].flag) {
2860 tx->info[idx].flag = 0;
2861 tx->pkt_done++;
2862 }
2863 }
2864
2865 /* If we have space, clear IFF_OACTIVE to tell the stack that
2866 its OK to send packets */
2867#ifdef IFNET_BUF_RING
2868 flags = &ss->if_drv_flags;
2869#else
2870 flags = &ifp->if_drv_flags;
2871#endif
2872 mtx_lock(&ss->tx.mtx);
2873 if ((*flags) & IFF_DRV_OACTIVE &&
2874 tx->req - tx->done < (tx->mask + 1)/4) {
2875 *(flags) &= ~IFF_DRV_OACTIVE;
2876 ss->tx.wake++;
2877 mxge_start_locked(ss);
2878 }
2879#ifdef IFNET_BUF_RING
2880 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2881 /* let the NIC stop polling this queue, since there
2882 * are no more transmits pending */
2883 if (tx->req == tx->done) {
2884 *tx->send_stop = 1;
2885 tx->queue_active = 0;
2886 tx->deactivate++;
2887 wmb();
2888 }
2889 }
2890#endif
2891 mtx_unlock(&ss->tx.mtx);
2892
2893}
2894
2895static struct mxge_media_type mxge_xfp_media_types[] =
2896{
2897 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2898 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2899 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2900 {0, (1 << 5), "10GBASE-ER"},
2901 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2902 {0, (1 << 3), "10GBASE-SW"},
2903 {0, (1 << 2), "10GBASE-LW"},
2904 {0, (1 << 1), "10GBASE-EW"},
2905 {0, (1 << 0), "Reserved"}
2906};
2907static struct mxge_media_type mxge_sfp_media_types[] =
2908{
2909 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"},
2910 {0, (1 << 7), "Reserved"},
2911 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2912 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2913 {IFM_10G_SR, (1 << 4), "10GBASE-SR"},
2914 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"}
2915};
2916
2917static void
2918mxge_media_set(mxge_softc_t *sc, int media_type)
2919{
2920
2921
2922 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type,
2923 0, NULL);
2924 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type);
2925 sc->current_media = media_type;
2926 sc->media.ifm_media = sc->media.ifm_cur->ifm_media;
2927}
2928
2929static void
2930mxge_media_init(mxge_softc_t *sc)
2931{
2932 char *ptr;
2933 int i;
2934
2935 ifmedia_removeall(&sc->media);
2936 mxge_media_set(sc, IFM_AUTO);
2937
2938 /*
2939 * parse the product code to deterimine the interface type
2940 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2941 * after the 3rd dash in the driver's cached copy of the
2942 * EEPROM's product code string.
2943 */
2944 ptr = sc->product_code_string;
2945 if (ptr == NULL) {
2946 device_printf(sc->dev, "Missing product code\n");
2947 return;
2948 }
2949
2950 for (i = 0; i < 3; i++, ptr++) {
2951 ptr = strchr(ptr, '-');
2952 if (ptr == NULL) {
2953 device_printf(sc->dev,
2954 "only %d dashes in PC?!?\n", i);
2955 return;
2956 }
2957 }
2958 if (*ptr == 'C' || *(ptr +1) == 'C') {
2959 /* -C is CX4 */
2960 sc->connector = MXGE_CX4;
2961 mxge_media_set(sc, IFM_10G_CX4);
2962 } else if (*ptr == 'Q') {
2963 /* -Q is Quad Ribbon Fiber */
2964 sc->connector = MXGE_QRF;
2965 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2966 /* FreeBSD has no media type for Quad ribbon fiber */
2967 } else if (*ptr == 'R') {
2968 /* -R is XFP */
2969 sc->connector = MXGE_XFP;
2970 } else if (*ptr == 'S' || *(ptr +1) == 'S') {
2971 /* -S or -2S is SFP+ */
2972 sc->connector = MXGE_SFP;
2973 } else {
2974 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2975 }
2976}
2977
2978/*
2979 * Determine the media type for a NIC. Some XFPs will identify
2980 * themselves only when their link is up, so this is initiated via a
2981 * link up interrupt. However, this can potentially take up to
2982 * several milliseconds, so it is run via the watchdog routine, rather
2983 * than in the interrupt handler itself.
2984 */
2985static void
2986mxge_media_probe(mxge_softc_t *sc)
2987{
2988 mxge_cmd_t cmd;
2989 char *cage_type;
2990
2991 struct mxge_media_type *mxge_media_types = NULL;
2992 int i, err, ms, mxge_media_type_entries;
2993 uint32_t byte;
2994
2995 sc->need_media_probe = 0;
2996
2997 if (sc->connector == MXGE_XFP) {
2998 /* -R is XFP */
2999 mxge_media_types = mxge_xfp_media_types;
3000 mxge_media_type_entries =
3001 sizeof (mxge_xfp_media_types) /
3002 sizeof (mxge_xfp_media_types[0]);
3003 byte = MXGE_XFP_COMPLIANCE_BYTE;
3004 cage_type = "XFP";
3005 } else if (sc->connector == MXGE_SFP) {
3006 /* -S or -2S is SFP+ */
3007 mxge_media_types = mxge_sfp_media_types;
3008 mxge_media_type_entries =
3009 sizeof (mxge_sfp_media_types) /
3010 sizeof (mxge_sfp_media_types[0]);
3011 cage_type = "SFP+";
3012 byte = 3;
3013 } else {
3014 /* nothing to do; media type cannot change */
3015 return;
3016 }
3017
3018 /*
3019 * At this point we know the NIC has an XFP cage, so now we
3020 * try to determine what is in the cage by using the
3021 * firmware's XFP I2C commands to read the XFP 10GbE compilance
3022 * register. We read just one byte, which may take over
3023 * a millisecond
3024 */
3025
3026 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
3027 cmd.data1 = byte;
3028 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
3029 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
3030 device_printf(sc->dev, "failed to read XFP\n");
3031 }
3032 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
3033 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
3034 }
3035 if (err != MXGEFW_CMD_OK) {
3036 return;
3037 }
3038
3039 /* now we wait for the data to be cached */
3040 cmd.data0 = byte;
3041 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
3042 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
3043 DELAY(1000);
3044 cmd.data0 = byte;
3045 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
3046 }
3047 if (err != MXGEFW_CMD_OK) {
3048 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
3049 cage_type, err, ms);
3050 return;
3051 }
3052
3053 if (cmd.data0 == mxge_media_types[0].bitmask) {
3054 if (mxge_verbose)
3055 device_printf(sc->dev, "%s:%s\n", cage_type,
3056 mxge_media_types[0].name);
3057 if (sc->current_media != mxge_media_types[0].flag) {
3058 mxge_media_init(sc);
3059 mxge_media_set(sc, mxge_media_types[0].flag);
3060 }
3061 return;
3062 }
3063 for (i = 1; i < mxge_media_type_entries; i++) {
3064 if (cmd.data0 & mxge_media_types[i].bitmask) {
3065 if (mxge_verbose)
3066 device_printf(sc->dev, "%s:%s\n",
3067 cage_type,
3068 mxge_media_types[i].name);
3069
3070 if (sc->current_media != mxge_media_types[i].flag) {
3071 mxge_media_init(sc);
3072 mxge_media_set(sc, mxge_media_types[i].flag);
3073 }
3074 return;
3075 }
3076 }
3077 if (mxge_verbose)
3078 device_printf(sc->dev, "%s media 0x%x unknown\n",
3079 cage_type, cmd.data0);
3080
3081 return;
3082}
3083
3084static void
3085mxge_intr(void *arg)
3086{
3087 struct mxge_slice_state *ss = arg;
3088 mxge_softc_t *sc = ss->sc;
3089 mcp_irq_data_t *stats = ss->fw_stats;
3090 mxge_tx_ring_t *tx = &ss->tx;
3091 mxge_rx_done_t *rx_done = &ss->rx_done;
3092 uint32_t send_done_count;
3093 uint8_t valid;
3094
3095
3096#ifndef IFNET_BUF_RING
3097 /* an interrupt on a non-zero slice is implicitly valid
3098 since MSI-X irqs are not shared */
3099 if (ss != sc->ss) {
3100 mxge_clean_rx_done(ss);
3101 *ss->irq_claim = be32toh(3);
3102 return;
3103 }
3104#endif
3105
3106 /* make sure the DMA has finished */
3107 if (!stats->valid) {
3108 return;
3109 }
3110 valid = stats->valid;
3111
3112 if (sc->legacy_irq) {
3113 /* lower legacy IRQ */
3114 *sc->irq_deassert = 0;
3115 if (!mxge_deassert_wait)
3116 /* don't wait for conf. that irq is low */
3117 stats->valid = 0;
3118 } else {
3119 stats->valid = 0;
3120 }
3121
3122 /* loop while waiting for legacy irq deassertion */
3123 do {
3124 /* check for transmit completes and receives */
3125 send_done_count = be32toh(stats->send_done_count);
3126 while ((send_done_count != tx->pkt_done) ||
3127 (rx_done->entry[rx_done->idx].length != 0)) {
3128 if (send_done_count != tx->pkt_done)
3129 mxge_tx_done(ss, (int)send_done_count);
3130 mxge_clean_rx_done(ss);
3131 send_done_count = be32toh(stats->send_done_count);
3132 }
3133 if (sc->legacy_irq && mxge_deassert_wait)
3134 wmb();
3135 } while (*((volatile uint8_t *) &stats->valid));
3136
3137 /* fw link & error stats meaningful only on the first slice */
3138 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
3139 if (sc->link_state != stats->link_up) {
3140 sc->link_state = stats->link_up;
3141 if (sc->link_state) {
3142 if_link_state_change(sc->ifp, LINK_STATE_UP);
3143 if (mxge_verbose)
3144 device_printf(sc->dev, "link up\n");
3145 } else {
3146 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3147 if (mxge_verbose)
3148 device_printf(sc->dev, "link down\n");
3149 }
3150 sc->need_media_probe = 1;
3151 }
3152 if (sc->rdma_tags_available !=
3153 be32toh(stats->rdma_tags_available)) {
3154 sc->rdma_tags_available =
3155 be32toh(stats->rdma_tags_available);
3156 device_printf(sc->dev, "RDMA timed out! %d tags "
3157 "left\n", sc->rdma_tags_available);
3158 }
3159
3160 if (stats->link_down) {
3161 sc->down_cnt += stats->link_down;
3162 sc->link_state = 0;
3163 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3164 }
3165 }
3166
3167 /* check to see if we have rx token to pass back */
3168 if (valid & 0x1)
3169 *ss->irq_claim = be32toh(3);
3170 *(ss->irq_claim + 1) = be32toh(3);
3171}
3172
3173static void
3174mxge_init(void *arg)
3175{
3176 mxge_softc_t *sc = arg;
3177 struct ifnet *ifp = sc->ifp;
3178
3179
3180 mtx_lock(&sc->driver_mtx);
3181 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
3182 (void) mxge_open(sc);
3183 mtx_unlock(&sc->driver_mtx);
3184}
3185
3186
3187
3188static void
3189mxge_free_slice_mbufs(struct mxge_slice_state *ss)
3190{
3191 int i;
3192
3193#if defined(INET) || defined(INET6)
3194 tcp_lro_free(&ss->lc);
3195#endif
3196 for (i = 0; i <= ss->rx_big.mask; i++) {
3197 if (ss->rx_big.info[i].m == NULL)
3198 continue;
3199 bus_dmamap_unload(ss->rx_big.dmat,
3200 ss->rx_big.info[i].map);
3201 m_freem(ss->rx_big.info[i].m);
3202 ss->rx_big.info[i].m = NULL;
3203 }
3204
3205 for (i = 0; i <= ss->rx_small.mask; i++) {
3206 if (ss->rx_small.info[i].m == NULL)
3207 continue;
3208 bus_dmamap_unload(ss->rx_small.dmat,
3209 ss->rx_small.info[i].map);
3210 m_freem(ss->rx_small.info[i].m);
3211 ss->rx_small.info[i].m = NULL;
3212 }
3213
3214 /* transmit ring used only on the first slice */
3215 if (ss->tx.info == NULL)
3216 return;
3217
3218 for (i = 0; i <= ss->tx.mask; i++) {
3219 ss->tx.info[i].flag = 0;
3220 if (ss->tx.info[i].m == NULL)
3221 continue;
3222 bus_dmamap_unload(ss->tx.dmat,
3223 ss->tx.info[i].map);
3224 m_freem(ss->tx.info[i].m);
3225 ss->tx.info[i].m = NULL;
3226 }
3227}
3228
3229static void
3230mxge_free_mbufs(mxge_softc_t *sc)
3231{
3232 int slice;
3233
3234 for (slice = 0; slice < sc->num_slices; slice++)
3235 mxge_free_slice_mbufs(&sc->ss[slice]);
3236}
3237
3238static void
3239mxge_free_slice_rings(struct mxge_slice_state *ss)
3240{
3241 int i;
3242
3243
3244 if (ss->rx_done.entry != NULL)
3245 mxge_dma_free(&ss->rx_done.dma);
3246 ss->rx_done.entry = NULL;
3247
3248 if (ss->tx.req_bytes != NULL)
3249 free(ss->tx.req_bytes, M_DEVBUF);
3250 ss->tx.req_bytes = NULL;
3251
3252 if (ss->tx.seg_list != NULL)
3253 free(ss->tx.seg_list, M_DEVBUF);
3254 ss->tx.seg_list = NULL;
3255
3256 if (ss->rx_small.shadow != NULL)
3257 free(ss->rx_small.shadow, M_DEVBUF);
3258 ss->rx_small.shadow = NULL;
3259
3260 if (ss->rx_big.shadow != NULL)
3261 free(ss->rx_big.shadow, M_DEVBUF);
3262 ss->rx_big.shadow = NULL;
3263
3264 if (ss->tx.info != NULL) {
3265 if (ss->tx.dmat != NULL) {
3266 for (i = 0; i <= ss->tx.mask; i++) {
3267 bus_dmamap_destroy(ss->tx.dmat,
3268 ss->tx.info[i].map);
3269 }
3270 bus_dma_tag_destroy(ss->tx.dmat);
3271 }
3272 free(ss->tx.info, M_DEVBUF);
3273 }
3274 ss->tx.info = NULL;
3275
3276 if (ss->rx_small.info != NULL) {
3277 if (ss->rx_small.dmat != NULL) {
3278 for (i = 0; i <= ss->rx_small.mask; i++) {
3279 bus_dmamap_destroy(ss->rx_small.dmat,
3280 ss->rx_small.info[i].map);
3281 }
3282 bus_dmamap_destroy(ss->rx_small.dmat,
3283 ss->rx_small.extra_map);
3284 bus_dma_tag_destroy(ss->rx_small.dmat);
3285 }
3286 free(ss->rx_small.info, M_DEVBUF);
3287 }
3288 ss->rx_small.info = NULL;
3289
3290 if (ss->rx_big.info != NULL) {
3291 if (ss->rx_big.dmat != NULL) {
3292 for (i = 0; i <= ss->rx_big.mask; i++) {
3293 bus_dmamap_destroy(ss->rx_big.dmat,
3294 ss->rx_big.info[i].map);
3295 }
3296 bus_dmamap_destroy(ss->rx_big.dmat,
3297 ss->rx_big.extra_map);
3298 bus_dma_tag_destroy(ss->rx_big.dmat);
3299 }
3300 free(ss->rx_big.info, M_DEVBUF);
3301 }
3302 ss->rx_big.info = NULL;
3303}
3304
3305static void
3306mxge_free_rings(mxge_softc_t *sc)
3307{
3308 int slice;
3309
3310 for (slice = 0; slice < sc->num_slices; slice++)
3311 mxge_free_slice_rings(&sc->ss[slice]);
3312}
3313
3314static int
3315mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3316 int tx_ring_entries)
3317{
3318 mxge_softc_t *sc = ss->sc;
3319 size_t bytes;
3320 int err, i;
3321
3322 /* allocate per-slice receive resources */
3323
3324 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3325 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3326
3327 /* allocate the rx shadow rings */
3328 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
3329 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3330
3331 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
3332 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3333
3334 /* allocate the rx host info rings */
3335 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
3336 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3337
3338 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
3339 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3340
3341 /* allocate the rx busdma resources */
3342 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3343 1, /* alignment */
3344 4096, /* boundary */
3345 BUS_SPACE_MAXADDR, /* low */
3346 BUS_SPACE_MAXADDR, /* high */
3347 NULL, NULL, /* filter */
3348 MHLEN, /* maxsize */
3349 1, /* num segs */
3350 MHLEN, /* maxsegsize */
3351 BUS_DMA_ALLOCNOW, /* flags */
3352 NULL, NULL, /* lock */
3353 &ss->rx_small.dmat); /* tag */
3354 if (err != 0) {
3355 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3356 err);
3357 return err;
3358 }
3359
3360 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3361 1, /* alignment */
3362#if MXGE_VIRT_JUMBOS
3363 4096, /* boundary */
3364#else
3365 0, /* boundary */
3366#endif
3367 BUS_SPACE_MAXADDR, /* low */
3368 BUS_SPACE_MAXADDR, /* high */
3369 NULL, NULL, /* filter */
3370 3*4096, /* maxsize */
3371#if MXGE_VIRT_JUMBOS
3372 3, /* num segs */
3373 4096, /* maxsegsize*/
3374#else
3375 1, /* num segs */
3376 MJUM9BYTES, /* maxsegsize*/
3377#endif
3378 BUS_DMA_ALLOCNOW, /* flags */
3379 NULL, NULL, /* lock */
3380 &ss->rx_big.dmat); /* tag */
3381 if (err != 0) {
3382 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3383 err);
3384 return err;
3385 }
3386 for (i = 0; i <= ss->rx_small.mask; i++) {
3387 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3388 &ss->rx_small.info[i].map);
3389 if (err != 0) {
3390 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3391 err);
3392 return err;
3393 }
3394 }
3395 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3396 &ss->rx_small.extra_map);
3397 if (err != 0) {
3398 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3399 err);
3400 return err;
3401 }
3402
3403 for (i = 0; i <= ss->rx_big.mask; i++) {
3404 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3405 &ss->rx_big.info[i].map);
3406 if (err != 0) {
3407 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3408 err);
3409 return err;
3410 }
3411 }
3412 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3413 &ss->rx_big.extra_map);
3414 if (err != 0) {
3415 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3416 err);
3417 return err;
3418 }
3419
3420 /* now allocate TX resources */
3421
3422#ifndef IFNET_BUF_RING
3423 /* only use a single TX ring for now */
3424 if (ss != ss->sc->ss)
3425 return 0;
3426#endif
3427
3428 ss->tx.mask = tx_ring_entries - 1;
3429 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3430
3431
3432 /* allocate the tx request copy block */
3433 bytes = 8 +
3434 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3435 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
3436 /* ensure req_list entries are aligned to 8 bytes */
3437 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3438 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3439
3440 /* allocate the tx busdma segment list */
3441 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3442 ss->tx.seg_list = (bus_dma_segment_t *)
3443 malloc(bytes, M_DEVBUF, M_WAITOK);
3444
3445 /* allocate the tx host info ring */
3446 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3447 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3448
3449 /* allocate the tx busdma resources */
3450 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3451 1, /* alignment */
3452 sc->tx_boundary, /* boundary */
3453 BUS_SPACE_MAXADDR, /* low */
3454 BUS_SPACE_MAXADDR, /* high */
3455 NULL, NULL, /* filter */
3456 65536 + 256, /* maxsize */
3457 ss->tx.max_desc - 2, /* num segs */
3458 sc->tx_boundary, /* maxsegsz */
3459 BUS_DMA_ALLOCNOW, /* flags */
3460 NULL, NULL, /* lock */
3461 &ss->tx.dmat); /* tag */
3462
3463 if (err != 0) {
3464 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3465 err);
3466 return err;
3467 }
3468
3469 /* now use these tags to setup dmamaps for each slot
3470 in the ring */
3471 for (i = 0; i <= ss->tx.mask; i++) {
3472 err = bus_dmamap_create(ss->tx.dmat, 0,
3473 &ss->tx.info[i].map);
3474 if (err != 0) {
3475 device_printf(sc->dev, "Err %d tx dmamap\n",
3476 err);
3477 return err;
3478 }
3479 }
3480 return 0;
3481
3482}
3483
3484static int
3485mxge_alloc_rings(mxge_softc_t *sc)
3486{
3487 mxge_cmd_t cmd;
3488 int tx_ring_size;
3489 int tx_ring_entries, rx_ring_entries;
3490 int err, slice;
3491
3492 /* get ring sizes */
3493 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3494 tx_ring_size = cmd.data0;
3495 if (err != 0) {
3496 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3497 goto abort;
3498 }
3499
3500 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3501 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3502 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
3503 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
3504 IFQ_SET_READY(&sc->ifp->if_snd);
3505
3506 for (slice = 0; slice < sc->num_slices; slice++) {
3507 err = mxge_alloc_slice_rings(&sc->ss[slice],
3508 rx_ring_entries,
3509 tx_ring_entries);
3510 if (err != 0)
3511 goto abort;
3512 }
3513 return 0;
3514
3515abort:
3516 mxge_free_rings(sc);
3517 return err;
3518
3519}
3520
3521
3522static void
3523mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3524{
3525 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3526
3527 if (bufsize < MCLBYTES) {
3528 /* easy, everything fits in a single buffer */
3529 *big_buf_size = MCLBYTES;
3530 *cl_size = MCLBYTES;
3531 *nbufs = 1;
3532 return;
3533 }
3534
3535 if (bufsize < MJUMPAGESIZE) {
3536 /* still easy, everything still fits in a single buffer */
3537 *big_buf_size = MJUMPAGESIZE;
3538 *cl_size = MJUMPAGESIZE;
3539 *nbufs = 1;
3540 return;
3541 }
3542#if MXGE_VIRT_JUMBOS
3543 /* now we need to use virtually contiguous buffers */
3544 *cl_size = MJUM9BYTES;
3545 *big_buf_size = 4096;
3546 *nbufs = mtu / 4096 + 1;
3547 /* needs to be a power of two, so round up */
3548 if (*nbufs == 3)
3549 *nbufs = 4;
3550#else
3551 *cl_size = MJUM9BYTES;
3552 *big_buf_size = MJUM9BYTES;
3553 *nbufs = 1;
3554#endif
3555}
3556
3557static int
3558mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3559{
3560 mxge_softc_t *sc;
3561 mxge_cmd_t cmd;
3562 bus_dmamap_t map;
3563 int err, i, slice;
3564
3565
3566 sc = ss->sc;
3567 slice = ss - sc->ss;
3568
3569#if defined(INET) || defined(INET6)
3570 (void)tcp_lro_init(&ss->lc);
3571#endif
3572 ss->lc.ifp = sc->ifp;
3573
3574 /* get the lanai pointers to the send and receive rings */
3575
3576 err = 0;
3577#ifndef IFNET_BUF_RING
3578 /* We currently only send from the first slice */
3579 if (slice == 0) {
3580#endif
3581 cmd.data0 = slice;
3582 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3583 ss->tx.lanai =
3584 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3585 ss->tx.send_go = (volatile uint32_t *)
3586 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3587 ss->tx.send_stop = (volatile uint32_t *)
3588 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3589#ifndef IFNET_BUF_RING
3590 }
3591#endif
3592 cmd.data0 = slice;
3593 err |= mxge_send_cmd(sc,
3594 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3595 ss->rx_small.lanai =
3596 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3597 cmd.data0 = slice;
3598 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3599 ss->rx_big.lanai =
3600 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3601
3602 if (err != 0) {
3603 device_printf(sc->dev,
3604 "failed to get ring sizes or locations\n");
3605 return EIO;
3606 }
3607
3608 /* stock receive rings */
3609 for (i = 0; i <= ss->rx_small.mask; i++) {
3610 map = ss->rx_small.info[i].map;
3611 err = mxge_get_buf_small(ss, map, i);
3612 if (err) {
3613 device_printf(sc->dev, "alloced %d/%d smalls\n",
3614 i, ss->rx_small.mask + 1);
3615 return ENOMEM;
3616 }
3617 }
3618 for (i = 0; i <= ss->rx_big.mask; i++) {
3619 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3620 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3621 }
3622 ss->rx_big.nbufs = nbufs;
3623 ss->rx_big.cl_size = cl_size;
3624 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
3625 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3626 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3627 map = ss->rx_big.info[i].map;
3628 err = mxge_get_buf_big(ss, map, i);
3629 if (err) {
3630 device_printf(sc->dev, "alloced %d/%d bigs\n",
3631 i, ss->rx_big.mask + 1);
3632 return ENOMEM;
3633 }
3634 }
3635 return 0;
3636}
3637
3638static int
3639mxge_open(mxge_softc_t *sc)
3640{
3641 mxge_cmd_t cmd;
3642 int err, big_bytes, nbufs, slice, cl_size, i;
3643 bus_addr_t bus;
3644 volatile uint8_t *itable;
3645 struct mxge_slice_state *ss;
3646
3647 /* Copy the MAC address in case it was overridden */
3648 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3649
3650 err = mxge_reset(sc, 1);
3651 if (err != 0) {
3652 device_printf(sc->dev, "failed to reset\n");
3653 return EIO;
3654 }
3655
3656 if (sc->num_slices > 1) {
3657 /* setup the indirection table */
3658 cmd.data0 = sc->num_slices;
3659 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3660 &cmd);
3661
3662 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3663 &cmd);
3664 if (err != 0) {
3665 device_printf(sc->dev,
3666 "failed to setup rss tables\n");
3667 return err;
3668 }
3669
3670 /* just enable an identity mapping */
3671 itable = sc->sram + cmd.data0;
3672 for (i = 0; i < sc->num_slices; i++)
3673 itable[i] = (uint8_t)i;
3674
3675 cmd.data0 = 1;
3676 cmd.data1 = mxge_rss_hash_type;
3677 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3678 if (err != 0) {
3679 device_printf(sc->dev, "failed to enable slices\n");
3680 return err;
3681 }
3682 }
3683
3684
3685 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3686
3687 cmd.data0 = nbufs;
3688 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3689 &cmd);
3690 /* error is only meaningful if we're trying to set
3691 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3692 if (err && nbufs > 1) {
3693 device_printf(sc->dev,
3694 "Failed to set alway-use-n to %d\n",
3695 nbufs);
3696 return EIO;
3697 }
3698 /* Give the firmware the mtu and the big and small buffer
3699 sizes. The firmware wants the big buf size to be a power
3700 of two. Luckily, FreeBSD's clusters are powers of two */
3701 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3702 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3703 cmd.data0 = MHLEN - MXGEFW_PAD;
3704 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3705 &cmd);
3706 cmd.data0 = big_bytes;
3707 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3708
3709 if (err != 0) {
3710 device_printf(sc->dev, "failed to setup params\n");
3711 goto abort;
3712 }
3713
3714 /* Now give him the pointer to the stats block */
3715 for (slice = 0;
3716#ifdef IFNET_BUF_RING
3717 slice < sc->num_slices;
3718#else
3719 slice < 1;
3720#endif
3721 slice++) {
3722 ss = &sc->ss[slice];
3723 cmd.data0 =
3724 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3725 cmd.data1 =
3726 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3727 cmd.data2 = sizeof(struct mcp_irq_data);
3728 cmd.data2 |= (slice << 16);
3729 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3730 }
3731
3732 if (err != 0) {
3733 bus = sc->ss->fw_stats_dma.bus_addr;
3734 bus += offsetof(struct mcp_irq_data, send_done_count);
3735 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3736 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3737 err = mxge_send_cmd(sc,
3738 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3739 &cmd);
3740 /* Firmware cannot support multicast without STATS_DMA_V2 */
3741 sc->fw_multicast_support = 0;
3742 } else {
3743 sc->fw_multicast_support = 1;
3744 }
3745
3746 if (err != 0) {
3747 device_printf(sc->dev, "failed to setup params\n");
3748 goto abort;
3749 }
3750
3751 for (slice = 0; slice < sc->num_slices; slice++) {
3752 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3753 if (err != 0) {
3754 device_printf(sc->dev, "couldn't open slice %d\n",
3755 slice);
3756 goto abort;
3757 }
3758 }
3759
3760 /* Finally, start the firmware running */
3761 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3762 if (err) {
3763 device_printf(sc->dev, "Couldn't bring up link\n");
3764 goto abort;
3765 }
3766#ifdef IFNET_BUF_RING
3767 for (slice = 0; slice < sc->num_slices; slice++) {
3768 ss = &sc->ss[slice];
3769 ss->if_drv_flags |= IFF_DRV_RUNNING;
3770 ss->if_drv_flags &= ~IFF_DRV_OACTIVE;
3771 }
3772#endif
3773 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
3774 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3775
3776 return 0;
3777
3778
3779abort:
3780 mxge_free_mbufs(sc);
3781
3782 return err;
3783}
3784
3785static int
3786mxge_close(mxge_softc_t *sc, int down)
3787{
3788 mxge_cmd_t cmd;
3789 int err, old_down_cnt;
3790#ifdef IFNET_BUF_RING
3791 struct mxge_slice_state *ss;
3792 int slice;
3793#endif
3794
3795#ifdef IFNET_BUF_RING
3796 for (slice = 0; slice < sc->num_slices; slice++) {
3797 ss = &sc->ss[slice];
3798 ss->if_drv_flags &= ~IFF_DRV_RUNNING;
3799 }
3800#endif
3801 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3802 if (!down) {
3803 old_down_cnt = sc->down_cnt;
3804 wmb();
3805 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3806 if (err) {
3807 device_printf(sc->dev,
3808 "Couldn't bring down link\n");
3809 }
3810 if (old_down_cnt == sc->down_cnt) {
3811 /* wait for down irq */
3812 DELAY(10 * sc->intr_coal_delay);
3813 }
3814 wmb();
3815 if (old_down_cnt == sc->down_cnt) {
3816 device_printf(sc->dev, "never got down irq\n");
3817 }
3818 }
3819 mxge_free_mbufs(sc);
3820
3821 return 0;
3822}
3823
3824static void
3825mxge_setup_cfg_space(mxge_softc_t *sc)
3826{
3827 device_t dev = sc->dev;
3828 int reg;
3829 uint16_t lnk, pectl;
3830
3831 /* find the PCIe link width and set max read request to 4KB*/
3832 if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
3833 lnk = pci_read_config(dev, reg + 0x12, 2);
3834 sc->link_width = (lnk >> 4) & 0x3f;
3835
3836 if (sc->pectl == 0) {
3837 pectl = pci_read_config(dev, reg + 0x8, 2);
3838 pectl = (pectl & ~0x7000) | (5 << 12);
3839 pci_write_config(dev, reg + 0x8, pectl, 2);
3840 sc->pectl = pectl;
3841 } else {
3842 /* restore saved pectl after watchdog reset */
3843 pci_write_config(dev, reg + 0x8, sc->pectl, 2);
3844 }
3845 }
3846
3847 /* Enable DMA and Memory space access */
3848 pci_enable_busmaster(dev);
3849}
3850
3851static uint32_t
3852mxge_read_reboot(mxge_softc_t *sc)
3853{
3854 device_t dev = sc->dev;
3855 uint32_t vs;
3856
3857 /* find the vendor specific offset */
3858 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) {
3859 device_printf(sc->dev,
3860 "could not find vendor specific offset\n");
3861 return (uint32_t)-1;
3862 }
3863 /* enable read32 mode */
3864 pci_write_config(dev, vs + 0x10, 0x3, 1);
3865 /* tell NIC which register to read */
3866 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3867 return (pci_read_config(dev, vs + 0x14, 4));
3868}
3869
3870static void
3871mxge_watchdog_reset(mxge_softc_t *sc)
3872{
3873 struct pci_devinfo *dinfo;
3874 struct mxge_slice_state *ss;
3875 int err, running, s, num_tx_slices = 1;
3876 uint32_t reboot;
3877 uint16_t cmd;
3878
3879 err = ENXIO;
3880
3881 device_printf(sc->dev, "Watchdog reset!\n");
3882
3883 /*
3884 * check to see if the NIC rebooted. If it did, then all of
3885 * PCI config space has been reset, and things like the
3886 * busmaster bit will be zero. If this is the case, then we
3887 * must restore PCI config space before the NIC can be used
3888 * again
3889 */
3890 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3891 if (cmd == 0xffff) {
3892 /*
3893 * maybe the watchdog caught the NIC rebooting; wait
3894 * up to 100ms for it to finish. If it does not come
3895 * back, then give up
3896 */
3897 DELAY(1000*100);
3898 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3899 if (cmd == 0xffff) {
3900 device_printf(sc->dev, "NIC disappeared!\n");
3901 }
3902 }
3903 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3904 /* print the reboot status */
3905 reboot = mxge_read_reboot(sc);
3906 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3907 reboot);
3908 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
3909 if (running) {
3910
3911 /*
3912 * quiesce NIC so that TX routines will not try to
3913 * xmit after restoration of BAR
3914 */
3915
3916 /* Mark the link as down */
3917 if (sc->link_state) {
3918 sc->link_state = 0;
3919 if_link_state_change(sc->ifp,
3920 LINK_STATE_DOWN);
3921 }
3922#ifdef IFNET_BUF_RING
3923 num_tx_slices = sc->num_slices;
3924#endif
3925 /* grab all TX locks to ensure no tx */
3926 for (s = 0; s < num_tx_slices; s++) {
3927 ss = &sc->ss[s];
3928 mtx_lock(&ss->tx.mtx);
3929 }
3930 mxge_close(sc, 1);
3931 }
3932 /* restore PCI configuration space */
3933 dinfo = device_get_ivars(sc->dev);
3934 pci_cfg_restore(sc->dev, dinfo);
3935
3936 /* and redo any changes we made to our config space */
3937 mxge_setup_cfg_space(sc);
3938
3939 /* reload f/w */
3940 err = mxge_load_firmware(sc, 0);
3941 if (err) {
3942 device_printf(sc->dev,
3943 "Unable to re-load f/w\n");
3944 }
3945 if (running) {
3946 if (!err)
3947 err = mxge_open(sc);
3948 /* release all TX locks */
3949 for (s = 0; s < num_tx_slices; s++) {
3950 ss = &sc->ss[s];
3951#ifdef IFNET_BUF_RING
3952 mxge_start_locked(ss);
3953#endif
3954 mtx_unlock(&ss->tx.mtx);
3955 }
3956 }
3957 sc->watchdog_resets++;
3958 } else {
3959 device_printf(sc->dev,
3960 "NIC did not reboot, not resetting\n");
3961 err = 0;
3962 }
3963 if (err) {
3964 device_printf(sc->dev, "watchdog reset failed\n");
3965 } else {
3966 if (sc->dying == 2)
3967 sc->dying = 0;
3968 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3969 }
3970}
3971
3972static void
3973mxge_watchdog_task(void *arg, int pending)
3974{
3975 mxge_softc_t *sc = arg;
3976
3977
3978 mtx_lock(&sc->driver_mtx);
3979 mxge_watchdog_reset(sc);
3980 mtx_unlock(&sc->driver_mtx);
3981}
3982
3983static void
3984mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
3985{
3986 tx = &sc->ss[slice].tx;
3987 device_printf(sc->dev, "slice %d struck? ring state:\n", slice);
3988 device_printf(sc->dev,
3989 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3990 tx->req, tx->done, tx->queue_active);
3991 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3992 tx->activate, tx->deactivate);
3993 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3994 tx->pkt_done,
3995 be32toh(sc->ss->fw_stats->send_done_count));
3996}
3997
3998static int
3999mxge_watchdog(mxge_softc_t *sc)
4000{
4001 mxge_tx_ring_t *tx;
4002 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
4003 int i, err = 0;
4004
4005 /* see if we have outstanding transmits, which
4006 have been pending for more than mxge_ticks */
4007 for (i = 0;
4008#ifdef IFNET_BUF_RING
4009 (i < sc->num_slices) && (err == 0);
4010#else
4011 (i < 1) && (err == 0);
4012#endif
4013 i++) {
4014 tx = &sc->ss[i].tx;
4015 if (tx->req != tx->done &&
4016 tx->watchdog_req != tx->watchdog_done &&
4017 tx->done == tx->watchdog_done) {
4018 /* check for pause blocking before resetting */
4019 if (tx->watchdog_rx_pause == rx_pause) {
4020 mxge_warn_stuck(sc, tx, i);
4021 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
4022 return (ENXIO);
4023 }
4024 else
4025 device_printf(sc->dev, "Flow control blocking "
4026 "xmits, check link partner\n");
4027 }
4028
4029 tx->watchdog_req = tx->req;
4030 tx->watchdog_done = tx->done;
4031 tx->watchdog_rx_pause = rx_pause;
4032 }
4033
4034 if (sc->need_media_probe)
4035 mxge_media_probe(sc);
4036 return (err);
4037}
4038
4039static uint64_t
4040mxge_get_counter(struct ifnet *ifp, ift_counter cnt)
4041{
4042 struct mxge_softc *sc;
4043 uint64_t rv;
4044
4045 sc = if_getsoftc(ifp);
4046 rv = 0;
4047
4048 switch (cnt) {
4049 case IFCOUNTER_IPACKETS:
4050 for (int s = 0; s < sc->num_slices; s++)
4051 rv += sc->ss[s].ipackets;
4052 return (rv);
4053 case IFCOUNTER_OPACKETS:
4054 for (int s = 0; s < sc->num_slices; s++)
4055 rv += sc->ss[s].opackets;
4056 return (rv);
4057 case IFCOUNTER_OERRORS:
4058 for (int s = 0; s < sc->num_slices; s++)
4059 rv += sc->ss[s].oerrors;
4060 return (rv);
4061#ifdef IFNET_BUF_RING
4062 case IFCOUNTER_OBYTES:
4063 for (int s = 0; s < sc->num_slices; s++)
4064 rv += sc->ss[s].obytes;
4065 return (rv);
4066 case IFCOUNTER_OMCASTS:
4067 for (int s = 0; s < sc->num_slices; s++)
4068 rv += sc->ss[s].omcasts;
4069 return (rv);
4070 case IFCOUNTER_OQDROPS:
4071 for (int s = 0; s < sc->num_slices; s++)
4072 rv += sc->ss[s].tx.br->br_drops;
4073 return (rv);
4074#endif
4075 default:
4076 return (if_get_counter_default(ifp, cnt));
4077 }
4078}
4079
4080static void
4081mxge_tick(void *arg)
4082{
4083 mxge_softc_t *sc = arg;
4084 u_long pkts = 0;
4085 int err = 0;
4086 int running, ticks;
4087 uint16_t cmd;
4088
4089 ticks = mxge_ticks;
4090 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
4091 if (running) {
4092 if (!sc->watchdog_countdown) {
4093 err = mxge_watchdog(sc);
4094 sc->watchdog_countdown = 4;
4095 }
4096 sc->watchdog_countdown--;
4097 }
4098 if (pkts == 0) {
4099 /* ensure NIC did not suffer h/w fault while idle */
4100 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
4101 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
4102 sc->dying = 2;
4103 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
4104 err = ENXIO;
4105 }
4106 /* look less often if NIC is idle */
4107 ticks *= 4;
4108 }
4109
4110 if (err == 0)
4111 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
4112
4113}
4114
4115static int
4116mxge_media_change(struct ifnet *ifp)
4117{
4118 return EINVAL;
4119}
4120
4121static int
4122mxge_change_mtu(mxge_softc_t *sc, int mtu)
4123{
4124 struct ifnet *ifp = sc->ifp;
4125 int real_mtu, old_mtu;
4126 int err = 0;
4127
4128
4129 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
4130 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
4131 return EINVAL;
4132 mtx_lock(&sc->driver_mtx);
4133 old_mtu = ifp->if_mtu;
4134 ifp->if_mtu = mtu;
4135 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4136 mxge_close(sc, 0);
4137 err = mxge_open(sc);
4138 if (err != 0) {
4139 ifp->if_mtu = old_mtu;
4140 mxge_close(sc, 0);
4141 (void) mxge_open(sc);
4142 }
4143 }
4144 mtx_unlock(&sc->driver_mtx);
4145 return err;
4146}
4147
4148static void
4149mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
4150{
4151 mxge_softc_t *sc = ifp->if_softc;
4152
4153
4154 if (sc == NULL)
4155 return;
4156 ifmr->ifm_status = IFM_AVALID;
4157 ifmr->ifm_active = IFM_ETHER | IFM_FDX;
4158 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
4159 ifmr->ifm_active |= sc->current_media;
4160}
4161
4162static int
4163mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
4164{
4165 mxge_softc_t *sc = ifp->if_softc;
4166 struct ifreq *ifr = (struct ifreq *)data;
4167 int err, mask;
4168
4169 err = 0;
4170 switch (command) {
4171 case SIOCSIFADDR:
4172 case SIOCGIFADDR:
4173 err = ether_ioctl(ifp, command, data);
4174 break;
4175
4176 case SIOCSIFMTU:
4177 err = mxge_change_mtu(sc, ifr->ifr_mtu);
4178 break;
4179
4180 case SIOCSIFFLAGS:
4181 mtx_lock(&sc->driver_mtx);
4182 if (sc->dying) {
4183 mtx_unlock(&sc->driver_mtx);
4184 return EINVAL;
4185 }
4186 if (ifp->if_flags & IFF_UP) {
4187 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4188 err = mxge_open(sc);
4189 } else {
4190 /* take care of promis can allmulti
4191 flag chages */
4192 mxge_change_promisc(sc,
4193 ifp->if_flags & IFF_PROMISC);
4194 mxge_set_multicast_list(sc);
4195 }
4196 } else {
4197 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4198 mxge_close(sc, 0);
4199 }
4200 }
4201 mtx_unlock(&sc->driver_mtx);
4202 break;
4203
4204 case SIOCADDMULTI:
4205 case SIOCDELMULTI:
4206 mtx_lock(&sc->driver_mtx);
4207 mxge_set_multicast_list(sc);
4208 mtx_unlock(&sc->driver_mtx);
4209 break;
4210
4211 case SIOCSIFCAP:
4212 mtx_lock(&sc->driver_mtx);
4213 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
4214 if (mask & IFCAP_TXCSUM) {
4215 if (IFCAP_TXCSUM & ifp->if_capenable) {
4216 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
4217 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
4218 } else {
4219 ifp->if_capenable |= IFCAP_TXCSUM;
4220 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
4221 }
4222 } else if (mask & IFCAP_RXCSUM) {
4223 if (IFCAP_RXCSUM & ifp->if_capenable) {
4224 ifp->if_capenable &= ~IFCAP_RXCSUM;
4225 } else {
4226 ifp->if_capenable |= IFCAP_RXCSUM;
4227 }
4228 }
4229 if (mask & IFCAP_TSO4) {
4230 if (IFCAP_TSO4 & ifp->if_capenable) {
4231 ifp->if_capenable &= ~IFCAP_TSO4;
4232 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
4233 ifp->if_capenable |= IFCAP_TSO4;
4234 ifp->if_hwassist |= CSUM_TSO;
4235 } else {
4236 printf("mxge requires tx checksum offload"
4237 " be enabled to use TSO\n");
4238 err = EINVAL;
4239 }
4240 }
4241#if IFCAP_TSO6
4242 if (mask & IFCAP_TXCSUM_IPV6) {
4243 if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
4244 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6
4245 | IFCAP_TSO6);
4246 ifp->if_hwassist &= ~(CSUM_TCP_IPV6
4247 | CSUM_UDP);
4248 } else {
4249 ifp->if_capenable |= IFCAP_TXCSUM_IPV6;
4250 ifp->if_hwassist |= (CSUM_TCP_IPV6
4251 | CSUM_UDP_IPV6);
4252 }
4253 } else if (mask & IFCAP_RXCSUM_IPV6) {
4254 if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) {
4255 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6;
4256 } else {
4257 ifp->if_capenable |= IFCAP_RXCSUM_IPV6;
4258 }
4259 }
4260 if (mask & IFCAP_TSO6) {
4261 if (IFCAP_TSO6 & ifp->if_capenable) {
4262 ifp->if_capenable &= ~IFCAP_TSO6;
4263 } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
4264 ifp->if_capenable |= IFCAP_TSO6;
4265 ifp->if_hwassist |= CSUM_TSO;
4266 } else {
4267 printf("mxge requires tx checksum offload"
4268 " be enabled to use TSO\n");
4269 err = EINVAL;
4270 }
4271 }
4272#endif /*IFCAP_TSO6 */
4273
4274 if (mask & IFCAP_LRO)
4275 ifp->if_capenable ^= IFCAP_LRO;
4276 if (mask & IFCAP_VLAN_HWTAGGING)
4277 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
4278 if (mask & IFCAP_VLAN_HWTSO)
4279 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
4280
4281 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) ||
4282 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING))
4283 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO;
4284
4285 mtx_unlock(&sc->driver_mtx);
4286 VLAN_CAPABILITIES(ifp);
4287
4288 break;
4289
4290 case SIOCGIFMEDIA:
4291 mtx_lock(&sc->driver_mtx);
4292 mxge_media_probe(sc);
4293 mtx_unlock(&sc->driver_mtx);
4294 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
4295 &sc->media, command);
4296 break;
4297
4298 default:
4299 err = ENOTTY;
4300 }
4301 return err;
4302}
4303
4304static void
4305mxge_fetch_tunables(mxge_softc_t *sc)
4306{
4307
4308 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
4309 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
4310 &mxge_flow_control);
4311 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
4312 &mxge_intr_coal_delay);
4313 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4314 &mxge_nvidia_ecrc_enable);
4315 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4316 &mxge_force_firmware);
4317 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4318 &mxge_deassert_wait);
4319 TUNABLE_INT_FETCH("hw.mxge.verbose",
4320 &mxge_verbose);
4321 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
4322 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
4323 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
4324 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type);
4325 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
4326 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle);
4327
4328 if (bootverbose)
4329 mxge_verbose = 1;
4330 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
4331 mxge_intr_coal_delay = 30;
4332 if (mxge_ticks == 0)
4333 mxge_ticks = hz / 2;
4334 sc->pause = mxge_flow_control;
4335 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
4336 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
4337 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4338 }
4339 if (mxge_initial_mtu > ETHERMTU_JUMBO ||
4340 mxge_initial_mtu < ETHER_MIN_LEN)
4341 mxge_initial_mtu = ETHERMTU_JUMBO;
4342
4343 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE)
4344 mxge_throttle = MXGE_MAX_THROTTLE;
4345 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE)
4346 mxge_throttle = MXGE_MIN_THROTTLE;
4347 sc->throttle = mxge_throttle;
4348}
4349
4350
4351static void
4352mxge_free_slices(mxge_softc_t *sc)
4353{
4354 struct mxge_slice_state *ss;
4355 int i;
4356
4357
4358 if (sc->ss == NULL)
4359 return;
4360
4361 for (i = 0; i < sc->num_slices; i++) {
4362 ss = &sc->ss[i];
4363 if (ss->fw_stats != NULL) {
4364 mxge_dma_free(&ss->fw_stats_dma);
4365 ss->fw_stats = NULL;
4366#ifdef IFNET_BUF_RING
4367 if (ss->tx.br != NULL) {
4368 drbr_free(ss->tx.br, M_DEVBUF);
4369 ss->tx.br = NULL;
4370 }
4371#endif
4372 mtx_destroy(&ss->tx.mtx);
4373 }
4374 if (ss->rx_done.entry != NULL) {
4375 mxge_dma_free(&ss->rx_done.dma);
4376 ss->rx_done.entry = NULL;
4377 }
4378 }
4379 free(sc->ss, M_DEVBUF);
4380 sc->ss = NULL;
4381}
4382
4383static int
4384mxge_alloc_slices(mxge_softc_t *sc)
4385{
4386 mxge_cmd_t cmd;
4387 struct mxge_slice_state *ss;
4388 size_t bytes;
4389 int err, i, max_intr_slots;
4390
4391 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4392 if (err != 0) {
4393 device_printf(sc->dev, "Cannot determine rx ring size\n");
4394 return err;
4395 }
4396 sc->rx_ring_size = cmd.data0;
4397 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
4398
4399 bytes = sizeof (*sc->ss) * sc->num_slices;
4400 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
4401 if (sc->ss == NULL)
4402 return (ENOMEM);
4403 for (i = 0; i < sc->num_slices; i++) {
4404 ss = &sc->ss[i];
4405
4406 ss->sc = sc;
4407
4408 /* allocate per-slice rx interrupt queues */
4409
4410 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
4411 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
4412 if (err != 0)
4413 goto abort;
4414 ss->rx_done.entry = ss->rx_done.dma.addr;
4415 bzero(ss->rx_done.entry, bytes);
4416
4417 /*
4418 * allocate the per-slice firmware stats; stats
4419 * (including tx) are used used only on the first
4420 * slice for now
4421 */
4422#ifndef IFNET_BUF_RING
4423 if (i > 0)
4424 continue;
4425#endif
4426
4427 bytes = sizeof (*ss->fw_stats);
4428 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
4429 sizeof (*ss->fw_stats), 64);
4430 if (err != 0)
4431 goto abort;
4432 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
4433 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name),
4434 "%s:tx(%d)", device_get_nameunit(sc->dev), i);
4435 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF);
4436#ifdef IFNET_BUF_RING
4437 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
4438 &ss->tx.mtx);
4439#endif
4440 }
4441
4442 return (0);
4443
4444abort:
4445 mxge_free_slices(sc);
4446 return (ENOMEM);
4447}
4448
4449static void
4450mxge_slice_probe(mxge_softc_t *sc)
4451{
4452 mxge_cmd_t cmd;
4453 char *old_fw;
4454 int msix_cnt, status, max_intr_slots;
4455
4456 sc->num_slices = 1;
4457 /*
4458 * don't enable multiple slices if they are not enabled,
4459 * or if this is not an SMP system
4460 */
4461
4462 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
4463 return;
4464
4465 /* see how many MSI-X interrupts are available */
4466 msix_cnt = pci_msix_count(sc->dev);
4467 if (msix_cnt < 2)
4468 return;
4469
4470 /* now load the slice aware firmware see what it supports */
4471 old_fw = sc->fw_name;
4472 if (old_fw == mxge_fw_aligned)
4473 sc->fw_name = mxge_fw_rss_aligned;
4474 else
4475 sc->fw_name = mxge_fw_rss_unaligned;
4476 status = mxge_load_firmware(sc, 0);
4477 if (status != 0) {
4478 device_printf(sc->dev, "Falling back to a single slice\n");
4479 return;
4480 }
4481
4482 /* try to send a reset command to the card to see if it
4483 is alive */
4484 memset(&cmd, 0, sizeof (cmd));
4485 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4486 if (status != 0) {
4487 device_printf(sc->dev, "failed reset\n");
4488 goto abort_with_fw;
4489 }
4490
4491 /* get rx ring size */
4492 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4493 if (status != 0) {
4494 device_printf(sc->dev, "Cannot determine rx ring size\n");
4495 goto abort_with_fw;
4496 }
4497 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
4498
4499 /* tell it the size of the interrupt queues */
4500 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
4501 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4502 if (status != 0) {
4503 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4504 goto abort_with_fw;
4505 }
4506
4507 /* ask the maximum number of slices it supports */
4508 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4509 if (status != 0) {
4510 device_printf(sc->dev,
4511 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4512 goto abort_with_fw;
4513 }
4514 sc->num_slices = cmd.data0;
4515 if (sc->num_slices > msix_cnt)
4516 sc->num_slices = msix_cnt;
4517
4518 if (mxge_max_slices == -1) {
4519 /* cap to number of CPUs in system */
4520 if (sc->num_slices > mp_ncpus)
4521 sc->num_slices = mp_ncpus;
4522 } else {
4523 if (sc->num_slices > mxge_max_slices)
4524 sc->num_slices = mxge_max_slices;
4525 }
4526 /* make sure it is a power of two */
4527 while (sc->num_slices & (sc->num_slices - 1))
4528 sc->num_slices--;
4529
4530 if (mxge_verbose)
4531 device_printf(sc->dev, "using %d slices\n",
4532 sc->num_slices);
4533
4534 return;
4535
4536abort_with_fw:
4537 sc->fw_name = old_fw;
4538 (void) mxge_load_firmware(sc, 0);
4539}
4540
4541static int
4542mxge_add_msix_irqs(mxge_softc_t *sc)
4543{
4544 size_t bytes;
4545 int count, err, i, rid;
4546
4547 rid = PCIR_BAR(2);
4548 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4549 &rid, RF_ACTIVE);
4550
4551 if (sc->msix_table_res == NULL) {
4552 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
4553 return ENXIO;
4554 }
4555
4556 count = sc->num_slices;
4557 err = pci_alloc_msix(sc->dev, &count);
4558 if (err != 0) {
4559 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
4560 "err = %d \n", sc->num_slices, err);
4561 goto abort_with_msix_table;
4562 }
4563 if (count < sc->num_slices) {
4564 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
4565 count, sc->num_slices);
4566 device_printf(sc->dev,
4567 "Try setting hw.mxge.max_slices to %d\n",
4568 count);
4569 err = ENOSPC;
4570 goto abort_with_msix;
4571 }
4572 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
4573 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4574 if (sc->msix_irq_res == NULL) {
4575 err = ENOMEM;
4576 goto abort_with_msix;
4577 }
4578
4579 for (i = 0; i < sc->num_slices; i++) {
4580 rid = i + 1;
4581 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
4582 SYS_RES_IRQ,
4583 &rid, RF_ACTIVE);
4584 if (sc->msix_irq_res[i] == NULL) {
4585 device_printf(sc->dev, "couldn't allocate IRQ res"
4586 " for message %d\n", i);
4587 err = ENXIO;
4588 goto abort_with_res;
4589 }
4590 }
4591
4592 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
4593 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4594
4595 for (i = 0; i < sc->num_slices; i++) {
4596 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
4597 INTR_TYPE_NET | INTR_MPSAFE,
4598#if __FreeBSD_version > 700030
4599 NULL,
4600#endif
4601 mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
4602 if (err != 0) {
4603 device_printf(sc->dev, "couldn't setup intr for "
4604 "message %d\n", i);
4605 goto abort_with_intr;
4606 }
4607 bus_describe_intr(sc->dev, sc->msix_irq_res[i],
4608 sc->msix_ih[i], "s%d", i);
4609 }
4610
4611 if (mxge_verbose) {
4612 device_printf(sc->dev, "using %d msix IRQs:",
4613 sc->num_slices);
4614 for (i = 0; i < sc->num_slices; i++)
4615 printf(" %ld", rman_get_start(sc->msix_irq_res[i]));
4616 printf("\n");
4617 }
4618 return (0);
4619
4620abort_with_intr:
4621 for (i = 0; i < sc->num_slices; i++) {
4622 if (sc->msix_ih[i] != NULL) {
4623 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4624 sc->msix_ih[i]);
4625 sc->msix_ih[i] = NULL;
4626 }
4627 }
4628 free(sc->msix_ih, M_DEVBUF);
4629
4630
4631abort_with_res:
4632 for (i = 0; i < sc->num_slices; i++) {
4633 rid = i + 1;
4634 if (sc->msix_irq_res[i] != NULL)
4635 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4636 sc->msix_irq_res[i]);
4637 sc->msix_irq_res[i] = NULL;
4638 }
4639 free(sc->msix_irq_res, M_DEVBUF);
4640
4641
4642abort_with_msix:
4643 pci_release_msi(sc->dev);
4644
4645abort_with_msix_table:
4646 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4647 sc->msix_table_res);
4648
4649 return err;
4650}
4651
4652static int
4653mxge_add_single_irq(mxge_softc_t *sc)
4654{
4655 int count, err, rid;
4656
4657 count = pci_msi_count(sc->dev);
4658 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4659 rid = 1;
4660 } else {
4661 rid = 0;
4662 sc->legacy_irq = 1;
4663 }
4664 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0,
4665 1, RF_SHAREABLE | RF_ACTIVE);
4666 if (sc->irq_res == NULL) {
4667 device_printf(sc->dev, "could not alloc interrupt\n");
4668 return ENXIO;
4669 }
4670 if (mxge_verbose)
4671 device_printf(sc->dev, "using %s irq %ld\n",
4672 sc->legacy_irq ? "INTx" : "MSI",
4673 rman_get_start(sc->irq_res));
4674 err = bus_setup_intr(sc->dev, sc->irq_res,
4675 INTR_TYPE_NET | INTR_MPSAFE,
4676#if __FreeBSD_version > 700030
4677 NULL,
4678#endif
4679 mxge_intr, &sc->ss[0], &sc->ih);
4680 if (err != 0) {
4681 bus_release_resource(sc->dev, SYS_RES_IRQ,
4682 sc->legacy_irq ? 0 : 1, sc->irq_res);
4683 if (!sc->legacy_irq)
4684 pci_release_msi(sc->dev);
4685 }
4686 return err;
4687}
4688
4689static void
4690mxge_rem_msix_irqs(mxge_softc_t *sc)
4691{
4692 int i, rid;
4693
4694 for (i = 0; i < sc->num_slices; i++) {
4695 if (sc->msix_ih[i] != NULL) {
4696 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4697 sc->msix_ih[i]);
4698 sc->msix_ih[i] = NULL;
4699 }
4700 }
4701 free(sc->msix_ih, M_DEVBUF);
4702
4703 for (i = 0; i < sc->num_slices; i++) {
4704 rid = i + 1;
4705 if (sc->msix_irq_res[i] != NULL)
4706 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4707 sc->msix_irq_res[i]);
4708 sc->msix_irq_res[i] = NULL;
4709 }
4710 free(sc->msix_irq_res, M_DEVBUF);
4711
4712 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4713 sc->msix_table_res);
4714
4715 pci_release_msi(sc->dev);
4716 return;
4717}
4718
4719static void
4720mxge_rem_single_irq(mxge_softc_t *sc)
4721{
4722 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4723 bus_release_resource(sc->dev, SYS_RES_IRQ,
4724 sc->legacy_irq ? 0 : 1, sc->irq_res);
4725 if (!sc->legacy_irq)
4726 pci_release_msi(sc->dev);
4727}
4728
4729static void
4730mxge_rem_irq(mxge_softc_t *sc)
4731{
4732 if (sc->num_slices > 1)
4733 mxge_rem_msix_irqs(sc);
4734 else
4735 mxge_rem_single_irq(sc);
4736}
4737
4738static int
4739mxge_add_irq(mxge_softc_t *sc)
4740{
4741 int err;
4742
4743 if (sc->num_slices > 1)
4744 err = mxge_add_msix_irqs(sc);
4745 else
4746 err = mxge_add_single_irq(sc);
4747
4748 if (0 && err == 0 && sc->num_slices > 1) {
4749 mxge_rem_msix_irqs(sc);
4750 err = mxge_add_msix_irqs(sc);
4751 }
4752 return err;
4753}
4754
4755
4756static int
4757mxge_attach(device_t dev)
4758{
4759 mxge_cmd_t cmd;
4760 mxge_softc_t *sc = device_get_softc(dev);
4761 struct ifnet *ifp;
4762 int err, rid;
4763
4764 sc->dev = dev;
4765 mxge_fetch_tunables(sc);
4766
4767 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc);
4768 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK,
4769 taskqueue_thread_enqueue, &sc->tq);
4770 if (sc->tq == NULL) {
4771 err = ENOMEM;
4772 goto abort_with_nothing;
4773 }
4774
4775 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4776 1, /* alignment */
4777 0, /* boundary */
4778 BUS_SPACE_MAXADDR, /* low */
4779 BUS_SPACE_MAXADDR, /* high */
4780 NULL, NULL, /* filter */
4781 65536 + 256, /* maxsize */
4782 MXGE_MAX_SEND_DESC, /* num segs */
4783 65536, /* maxsegsize */
4784 0, /* flags */
4785 NULL, NULL, /* lock */
4786 &sc->parent_dmat); /* tag */
4787
4788 if (err != 0) {
4789 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4790 err);
4791 goto abort_with_tq;
4792 }
4793
4794 ifp = sc->ifp = if_alloc(IFT_ETHER);
4795 if (ifp == NULL) {
4796 device_printf(dev, "can not if_alloc()\n");
4797 err = ENOSPC;
4798 goto abort_with_parent_dmat;
4799 }
4800 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4801
4802 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd",
4803 device_get_nameunit(dev));
4804 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF);
4805 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name),
4806 "%s:drv", device_get_nameunit(dev));
4807 mtx_init(&sc->driver_mtx, sc->driver_mtx_name,
4808 MTX_NETWORK_LOCK, MTX_DEF);
4809
4810 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0);
4811
4812 mxge_setup_cfg_space(sc);
4813
4814 /* Map the board into the kernel */
4815 rid = PCIR_BARS;
4816 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0,
4817 ~0, 1, RF_ACTIVE);
4818 if (sc->mem_res == NULL) {
4819 device_printf(dev, "could not map memory\n");
4820 err = ENXIO;
4821 goto abort_with_lock;
4822 }
4823 sc->sram = rman_get_virtual(sc->mem_res);
4824 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4825 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4826 device_printf(dev, "impossible memory region size %ld\n",
4827 rman_get_size(sc->mem_res));
4828 err = ENXIO;
4829 goto abort_with_mem_res;
4830 }
4831
4832 /* make NULL terminated copy of the EEPROM strings section of
4833 lanai SRAM */
4834 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4835 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4836 rman_get_bushandle(sc->mem_res),
4837 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4838 sc->eeprom_strings,
4839 MXGE_EEPROM_STRINGS_SIZE - 2);
4840 err = mxge_parse_strings(sc);
4841 if (err != 0)
4842 goto abort_with_mem_res;
4843
4844 /* Enable write combining for efficient use of PCIe bus */
4845 mxge_enable_wc(sc);
4846
4847 /* Allocate the out of band dma memory */
4848 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4849 sizeof (mxge_cmd_t), 64);
4850 if (err != 0)
4851 goto abort_with_mem_res;
4852 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4853 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4854 if (err != 0)
4855 goto abort_with_cmd_dma;
4856
4857 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4858 if (err != 0)
4859 goto abort_with_zeropad_dma;
4860
4861 /* select & load the firmware */
4862 err = mxge_select_firmware(sc);
4863 if (err != 0)
4864 goto abort_with_dmabench;
4865 sc->intr_coal_delay = mxge_intr_coal_delay;
4866
4867 mxge_slice_probe(sc);
4868 err = mxge_alloc_slices(sc);
4869 if (err != 0)
4870 goto abort_with_dmabench;
4871
4872 err = mxge_reset(sc, 0);
4873 if (err != 0)
4874 goto abort_with_slices;
4875
4876 err = mxge_alloc_rings(sc);
4877 if (err != 0) {
4878 device_printf(sc->dev, "failed to allocate rings\n");
4879 goto abort_with_slices;
4880 }
4881
4882 err = mxge_add_irq(sc);
4883 if (err != 0) {
4884 device_printf(sc->dev, "failed to add irq\n");
4885 goto abort_with_rings;
4886 }
4887
4888 ifp->if_baudrate = IF_Gbps(10);
4889 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4890 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 |
4891 IFCAP_RXCSUM_IPV6;
4892#if defined(INET) || defined(INET6)
4893 ifp->if_capabilities |= IFCAP_LRO;
4894#endif
4895
4896#ifdef MXGE_NEW_VLAN_API
4897 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
4898
4899 /* Only FW 1.4.32 and newer can do TSO over vlans */
4900 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
4901 sc->fw_ver_tiny >= 32)
4902 ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
4903#endif
4904 sc->max_mtu = mxge_max_mtu(sc);
4905 if (sc->max_mtu >= 9000)
4906 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
4907 else
4908 device_printf(dev, "MTU limited to %d. Install "
4909 "latest firmware for 9000 byte jumbo support\n",
4910 sc->max_mtu - ETHER_HDR_LEN);
4911 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4912 ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6;
4913 /* check to see if f/w supports TSO for IPv6 */
4914 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) {
4915 if (CSUM_TCP_IPV6)
4916 ifp->if_capabilities |= IFCAP_TSO6;
4917 sc->max_tso6_hlen = min(cmd.data0,
4918 sizeof (sc->ss[0].scratch));
4919 }
4920 ifp->if_capenable = ifp->if_capabilities;
4921 if (sc->lro_cnt == 0)
4922 ifp->if_capenable &= ~IFCAP_LRO;
4923 ifp->if_init = mxge_init;
4924 ifp->if_softc = sc;
4925 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4926 ifp->if_ioctl = mxge_ioctl;
4927 ifp->if_start = mxge_start;
4928 ifp->if_get_counter = mxge_get_counter;
4929 /* Initialise the ifmedia structure */
4930 ifmedia_init(&sc->media, 0, mxge_media_change,
4931 mxge_media_status);
4932 mxge_media_init(sc);
4933 mxge_media_probe(sc);
4934 sc->dying = 0;
4935 ether_ifattach(ifp, sc->mac_addr);
4936 /* ether_ifattach sets mtu to ETHERMTU */
4937 if (mxge_initial_mtu != ETHERMTU)
4938 mxge_change_mtu(sc, mxge_initial_mtu);
4939
4940 mxge_add_sysctls(sc);
4941#ifdef IFNET_BUF_RING
4942 ifp->if_transmit = mxge_transmit;
4943 ifp->if_qflush = mxge_qflush;
4944#endif
4945 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
4946 device_get_nameunit(sc->dev));
4947 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
4948 return 0;
4949
4950abort_with_rings:
4951 mxge_free_rings(sc);
4952abort_with_slices:
4953 mxge_free_slices(sc);
4954abort_with_dmabench:
4955 mxge_dma_free(&sc->dmabench_dma);
4956abort_with_zeropad_dma:
4957 mxge_dma_free(&sc->zeropad_dma);
4958abort_with_cmd_dma:
4959 mxge_dma_free(&sc->cmd_dma);
4960abort_with_mem_res:
4961 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4962abort_with_lock:
4963 pci_disable_busmaster(dev);
4964 mtx_destroy(&sc->cmd_mtx);
4965 mtx_destroy(&sc->driver_mtx);
4966 if_free(ifp);
4967abort_with_parent_dmat:
4968 bus_dma_tag_destroy(sc->parent_dmat);
4969abort_with_tq:
4970 if (sc->tq != NULL) {
4971 taskqueue_drain(sc->tq, &sc->watchdog_task);
4972 taskqueue_free(sc->tq);
4973 sc->tq = NULL;
4974 }
4975abort_with_nothing:
4976 return err;
4977}
4978
4979static int
4980mxge_detach(device_t dev)
4981{
4982 mxge_softc_t *sc = device_get_softc(dev);
4983
4984 if (mxge_vlans_active(sc)) {
4985 device_printf(sc->dev,
4986 "Detach vlans before removing module\n");
4987 return EBUSY;
4988 }
4989 mtx_lock(&sc->driver_mtx);
4990 sc->dying = 1;
4991 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
4992 mxge_close(sc, 0);
4993 mtx_unlock(&sc->driver_mtx);
4994 ether_ifdetach(sc->ifp);
4995 if (sc->tq != NULL) {
4996 taskqueue_drain(sc->tq, &sc->watchdog_task);
4997 taskqueue_free(sc->tq);
4998 sc->tq = NULL;
4999 }
5000 callout_drain(&sc->co_hdl);
5001 ifmedia_removeall(&sc->media);
5002 mxge_dummy_rdma(sc, 0);
5003 mxge_rem_sysctls(sc);
5004 mxge_rem_irq(sc);
5005 mxge_free_rings(sc);
5006 mxge_free_slices(sc);
5007 mxge_dma_free(&sc->dmabench_dma);
5008 mxge_dma_free(&sc->zeropad_dma);
5009 mxge_dma_free(&sc->cmd_dma);
5010 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
5011 pci_disable_busmaster(dev);
5012 mtx_destroy(&sc->cmd_mtx);
5013 mtx_destroy(&sc->driver_mtx);
5014 if_free(sc->ifp);
5015 bus_dma_tag_destroy(sc->parent_dmat);
5016 return 0;
5017}
5018
5019static int
5020mxge_shutdown(device_t dev)
5021{
5022 return 0;
5023}
5024
5025/*
5026 This file uses Myri10GE driver indentation.
5027
5028 Local Variables:
5029 c-file-style:"linux"
5030 tab-width:8
5031 End:
5032*/