Deleted Added
full compact
if_mxge.c (247268) if_mxge.c (249586)
1/******************************************************************************
2
3Copyright (c) 2006-2013, Myricom Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
1/******************************************************************************
2
3Copyright (c) 2006-2013, Myricom Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/dev/mxge/if_mxge.c 247268 2013-02-25 16:22:40Z gallatin $");
31__FBSDID("$FreeBSD: head/sys/dev/mxge/if_mxge.c 249586 2013-04-17 11:47:32Z gabor $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/linker.h>
36#include <sys/firmware.h>
37#include <sys/endian.h>
38#include <sys/sockio.h>
39#include <sys/mbuf.h>
40#include <sys/malloc.h>
41#include <sys/kdb.h>
42#include <sys/kernel.h>
43#include <sys/lock.h>
44#include <sys/module.h>
45#include <sys/socket.h>
46#include <sys/sysctl.h>
47#include <sys/sx.h>
48#include <sys/taskqueue.h>
49
50#include <net/if.h>
51#include <net/if_arp.h>
52#include <net/ethernet.h>
53#include <net/if_dl.h>
54#include <net/if_media.h>
55
56#include <net/bpf.h>
57
58#include <net/if_types.h>
59#include <net/if_vlan_var.h>
60#include <net/zlib.h>
61
62#include <netinet/in_systm.h>
63#include <netinet/in.h>
64#include <netinet/ip.h>
65#include <netinet/ip6.h>
66#include <netinet/tcp.h>
67#include <netinet/tcp_lro.h>
68#include <netinet6/ip6_var.h>
69
70#include <machine/bus.h>
71#include <machine/in_cksum.h>
72#include <machine/resource.h>
73#include <sys/bus.h>
74#include <sys/rman.h>
75#include <sys/smp.h>
76
77#include <dev/pci/pcireg.h>
78#include <dev/pci/pcivar.h>
79#include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */
80
81#include <vm/vm.h> /* for pmap_mapdev() */
82#include <vm/pmap.h>
83
84#if defined(__i386) || defined(__amd64)
85#include <machine/specialreg.h>
86#endif
87
88#include <dev/mxge/mxge_mcp.h>
89#include <dev/mxge/mcp_gen_header.h>
90/*#define MXGE_FAKE_IFP*/
91#include <dev/mxge/if_mxge_var.h>
92#ifdef IFNET_BUF_RING
93#include <sys/buf_ring.h>
94#endif
95
96#include "opt_inet.h"
97#include "opt_inet6.h"
98
99/* tunable params */
100static int mxge_nvidia_ecrc_enable = 1;
101static int mxge_force_firmware = 0;
102static int mxge_intr_coal_delay = 30;
103static int mxge_deassert_wait = 1;
104static int mxge_flow_control = 1;
105static int mxge_verbose = 0;
106static int mxge_ticks;
107static int mxge_max_slices = 1;
108static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
109static int mxge_always_promisc = 0;
110static int mxge_initial_mtu = ETHERMTU_JUMBO;
111static int mxge_throttle = 0;
112static char *mxge_fw_unaligned = "mxge_ethp_z8e";
113static char *mxge_fw_aligned = "mxge_eth_z8e";
114static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
115static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
116
117static int mxge_probe(device_t dev);
118static int mxge_attach(device_t dev);
119static int mxge_detach(device_t dev);
120static int mxge_shutdown(device_t dev);
121static void mxge_intr(void *arg);
122
123static device_method_t mxge_methods[] =
124{
125 /* Device interface */
126 DEVMETHOD(device_probe, mxge_probe),
127 DEVMETHOD(device_attach, mxge_attach),
128 DEVMETHOD(device_detach, mxge_detach),
129 DEVMETHOD(device_shutdown, mxge_shutdown),
130
131 DEVMETHOD_END
132};
133
134static driver_t mxge_driver =
135{
136 "mxge",
137 mxge_methods,
138 sizeof(mxge_softc_t),
139};
140
141static devclass_t mxge_devclass;
142
143/* Declare ourselves to be a child of the PCI bus.*/
144DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
145MODULE_DEPEND(mxge, firmware, 1, 1, 1);
146MODULE_DEPEND(mxge, zlib, 1, 1, 1);
147
148static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
149static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
150static int mxge_close(mxge_softc_t *sc, int down);
151static int mxge_open(mxge_softc_t *sc);
152static void mxge_tick(void *arg);
153
154static int
155mxge_probe(device_t dev)
156{
157 int rev;
158
159
160 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
161 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
162 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
163 rev = pci_get_revid(dev);
164 switch (rev) {
165 case MXGE_PCI_REV_Z8E:
166 device_set_desc(dev, "Myri10G-PCIE-8A");
167 break;
168 case MXGE_PCI_REV_Z8ES:
169 device_set_desc(dev, "Myri10G-PCIE-8B");
170 break;
171 default:
172 device_set_desc(dev, "Myri10G-PCIE-8??");
173 device_printf(dev, "Unrecognized rev %d NIC\n",
174 rev);
175 break;
176 }
177 return 0;
178 }
179 return ENXIO;
180}
181
182static void
183mxge_enable_wc(mxge_softc_t *sc)
184{
185#if defined(__i386) || defined(__amd64)
186 vm_offset_t len;
187 int err;
188
189 sc->wc = 1;
190 len = rman_get_size(sc->mem_res);
191 err = pmap_change_attr((vm_offset_t) sc->sram,
192 len, PAT_WRITE_COMBINING);
193 if (err != 0) {
194 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
195 err);
196 sc->wc = 0;
197 }
198#endif
199}
200
201
202/* callback to get our DMA address */
203static void
204mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
205 int error)
206{
207 if (error == 0) {
208 *(bus_addr_t *) arg = segs->ds_addr;
209 }
210}
211
212static int
213mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
214 bus_size_t alignment)
215{
216 int err;
217 device_t dev = sc->dev;
218 bus_size_t boundary, maxsegsize;
219
220 if (bytes > 4096 && alignment == 4096) {
221 boundary = 0;
222 maxsegsize = bytes;
223 } else {
224 boundary = 4096;
225 maxsegsize = 4096;
226 }
227
228 /* allocate DMAable memory tags */
229 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
230 alignment, /* alignment */
231 boundary, /* boundary */
232 BUS_SPACE_MAXADDR, /* low */
233 BUS_SPACE_MAXADDR, /* high */
234 NULL, NULL, /* filter */
235 bytes, /* maxsize */
236 1, /* num segs */
237 maxsegsize, /* maxsegsize */
238 BUS_DMA_COHERENT, /* flags */
239 NULL, NULL, /* lock */
240 &dma->dmat); /* tag */
241 if (err != 0) {
242 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
243 return err;
244 }
245
246 /* allocate DMAable memory & map */
247 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
248 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
249 | BUS_DMA_ZERO), &dma->map);
250 if (err != 0) {
251 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
252 goto abort_with_dmat;
253 }
254
255 /* load the memory */
256 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
257 mxge_dmamap_callback,
258 (void *)&dma->bus_addr, 0);
259 if (err != 0) {
260 device_printf(dev, "couldn't load map (err = %d)\n", err);
261 goto abort_with_mem;
262 }
263 return 0;
264
265abort_with_mem:
266 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
267abort_with_dmat:
268 (void)bus_dma_tag_destroy(dma->dmat);
269 return err;
270}
271
272
273static void
274mxge_dma_free(mxge_dma_t *dma)
275{
276 bus_dmamap_unload(dma->dmat, dma->map);
277 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
278 (void)bus_dma_tag_destroy(dma->dmat);
279}
280
281/*
282 * The eeprom strings on the lanaiX have the format
283 * SN=x\0
284 * MAC=x:x:x:x:x:x\0
285 * PC=text\0
286 */
287
288static int
289mxge_parse_strings(mxge_softc_t *sc)
290{
291 char *ptr;
292 int i, found_mac, found_sn2;
293 char *endptr;
294
295 ptr = sc->eeprom_strings;
296 found_mac = 0;
297 found_sn2 = 0;
298 while (*ptr != '\0') {
299 if (strncmp(ptr, "MAC=", 4) == 0) {
300 ptr += 4;
301 for (i = 0;;) {
302 sc->mac_addr[i] = strtoul(ptr, &endptr, 16);
303 if (endptr - ptr != 2)
304 goto abort;
305 ptr = endptr;
306 if (++i == 6)
307 break;
308 if (*ptr++ != ':')
309 goto abort;
310 }
311 found_mac = 1;
312 } else if (strncmp(ptr, "PC=", 3) == 0) {
313 ptr += 3;
314 strlcpy(sc->product_code_string, ptr,
315 sizeof(sc->product_code_string));
316 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) {
317 ptr += 3;
318 strlcpy(sc->serial_number_string, ptr,
319 sizeof(sc->serial_number_string));
320 } else if (strncmp(ptr, "SN2=", 4) == 0) {
321 /* SN2 takes precedence over SN */
322 ptr += 4;
323 found_sn2 = 1;
324 strlcpy(sc->serial_number_string, ptr,
325 sizeof(sc->serial_number_string));
326 }
327 while (*ptr++ != '\0') {}
328 }
329
330 if (found_mac)
331 return 0;
332
333 abort:
334 device_printf(sc->dev, "failed to parse eeprom_strings\n");
335
336 return ENXIO;
337}
338
339#if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
340static void
341mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
342{
343 uint32_t val;
344 unsigned long base, off;
345 char *va, *cfgptr;
346 device_t pdev, mcp55;
347 uint16_t vendor_id, device_id, word;
348 uintptr_t bus, slot, func, ivend, idev;
349 uint32_t *ptr32;
350
351
352 if (!mxge_nvidia_ecrc_enable)
353 return;
354
355 pdev = device_get_parent(device_get_parent(sc->dev));
356 if (pdev == NULL) {
357 device_printf(sc->dev, "could not find parent?\n");
358 return;
359 }
360 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
361 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
362
363 if (vendor_id != 0x10de)
364 return;
365
366 base = 0;
367
368 if (device_id == 0x005d) {
369 /* ck804, base address is magic */
370 base = 0xe0000000UL;
371 } else if (device_id >= 0x0374 && device_id <= 0x378) {
372 /* mcp55, base address stored in chipset */
373 mcp55 = pci_find_bsf(0, 0, 0);
374 if (mcp55 &&
375 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
376 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
377 word = pci_read_config(mcp55, 0x90, 2);
378 base = ((unsigned long)word & 0x7ffeU) << 25;
379 }
380 }
381 if (!base)
382 return;
383
384 /* XXXX
385 Test below is commented because it is believed that doing
386 config read/write beyond 0xff will access the config space
387 for the next larger function. Uncomment this and remove
388 the hacky pmap_mapdev() way of accessing config space when
389 FreeBSD grows support for extended pcie config space access
390 */
391#if 0
392 /* See if we can, by some miracle, access the extended
393 config space */
394 val = pci_read_config(pdev, 0x178, 4);
395 if (val != 0xffffffff) {
396 val |= 0x40;
397 pci_write_config(pdev, 0x178, val, 4);
398 return;
399 }
400#endif
401 /* Rather than using normal pci config space writes, we must
402 * map the Nvidia config space ourselves. This is because on
403 * opteron/nvidia class machine the 0xe000000 mapping is
404 * handled by the nvidia chipset, that means the internal PCI
405 * device (the on-chip northbridge), or the amd-8131 bridge
406 * and things behind them are not visible by this method.
407 */
408
409 BUS_READ_IVAR(device_get_parent(pdev), pdev,
410 PCI_IVAR_BUS, &bus);
411 BUS_READ_IVAR(device_get_parent(pdev), pdev,
412 PCI_IVAR_SLOT, &slot);
413 BUS_READ_IVAR(device_get_parent(pdev), pdev,
414 PCI_IVAR_FUNCTION, &func);
415 BUS_READ_IVAR(device_get_parent(pdev), pdev,
416 PCI_IVAR_VENDOR, &ivend);
417 BUS_READ_IVAR(device_get_parent(pdev), pdev,
418 PCI_IVAR_DEVICE, &idev);
419
420 off = base
421 + 0x00100000UL * (unsigned long)bus
422 + 0x00001000UL * (unsigned long)(func
423 + 8 * slot);
424
425 /* map it into the kernel */
426 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
427
428
429 if (va == NULL) {
430 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
431 return;
432 }
433 /* get a pointer to the config space mapped into the kernel */
434 cfgptr = va + (off & PAGE_MASK);
435
436 /* make sure that we can really access it */
437 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
438 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
439 if (! (vendor_id == ivend && device_id == idev)) {
440 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
441 vendor_id, device_id);
442 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
443 return;
444 }
445
446 ptr32 = (uint32_t*)(cfgptr + 0x178);
447 val = *ptr32;
448
449 if (val == 0xffffffff) {
450 device_printf(sc->dev, "extended mapping failed\n");
451 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
452 return;
453 }
454 *ptr32 = val | 0x40;
455 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
456 if (mxge_verbose)
457 device_printf(sc->dev,
458 "Enabled ECRC on upstream Nvidia bridge "
459 "at %d:%d:%d\n",
460 (int)bus, (int)slot, (int)func);
461 return;
462}
463#else
464static void
465mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
466{
467 device_printf(sc->dev,
468 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
469 return;
470}
471#endif
472
473
474static int
475mxge_dma_test(mxge_softc_t *sc, int test_type)
476{
477 mxge_cmd_t cmd;
478 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
479 int status;
480 uint32_t len;
481 char *test = " ";
482
483
484 /* Run a small DMA test.
485 * The magic multipliers to the length tell the firmware
486 * to do DMA read, write, or read+write tests. The
487 * results are returned in cmd.data0. The upper 16
488 * bits of the return is the number of transfers completed.
489 * The lower 16 bits is the time in 0.5us ticks that the
490 * transfers took to complete.
491 */
492
493 len = sc->tx_boundary;
494
495 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
496 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
497 cmd.data2 = len * 0x10000;
498 status = mxge_send_cmd(sc, test_type, &cmd);
499 if (status != 0) {
500 test = "read";
501 goto abort;
502 }
503 sc->read_dma = ((cmd.data0>>16) * len * 2) /
504 (cmd.data0 & 0xffff);
505 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
506 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
507 cmd.data2 = len * 0x1;
508 status = mxge_send_cmd(sc, test_type, &cmd);
509 if (status != 0) {
510 test = "write";
511 goto abort;
512 }
513 sc->write_dma = ((cmd.data0>>16) * len * 2) /
514 (cmd.data0 & 0xffff);
515
516 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
517 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
518 cmd.data2 = len * 0x10001;
519 status = mxge_send_cmd(sc, test_type, &cmd);
520 if (status != 0) {
521 test = "read/write";
522 goto abort;
523 }
524 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
525 (cmd.data0 & 0xffff);
526
527abort:
528 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
529 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
530 test, status);
531
532 return status;
533}
534
535/*
536 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
537 * when the PCI-E Completion packets are aligned on an 8-byte
538 * boundary. Some PCI-E chip sets always align Completion packets; on
539 * the ones that do not, the alignment can be enforced by enabling
540 * ECRC generation (if supported).
541 *
542 * When PCI-E Completion packets are not aligned, it is actually more
543 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
544 *
545 * If the driver can neither enable ECRC nor verify that it has
546 * already been enabled, then it must use a firmware image which works
547 * around unaligned completion packets (ethp_z8e.dat), and it should
548 * also ensure that it never gives the device a Read-DMA which is
549 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
550 * enabled, then the driver should use the aligned (eth_z8e.dat)
551 * firmware image, and set tx_boundary to 4KB.
552 */
553
554static int
555mxge_firmware_probe(mxge_softc_t *sc)
556{
557 device_t dev = sc->dev;
558 int reg, status;
559 uint16_t pectl;
560
561 sc->tx_boundary = 4096;
562 /*
563 * Verify the max read request size was set to 4KB
564 * before trying the test with 4KB.
565 */
566 if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
567 pectl = pci_read_config(dev, reg + 0x8, 2);
568 if ((pectl & (5 << 12)) != (5 << 12)) {
569 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
570 pectl);
571 sc->tx_boundary = 2048;
572 }
573 }
574
575 /*
576 * load the optimized firmware (which assumes aligned PCIe
577 * completions) in order to see if it works on this host.
578 */
579 sc->fw_name = mxge_fw_aligned;
580 status = mxge_load_firmware(sc, 1);
581 if (status != 0) {
582 return status;
583 }
584
585 /*
586 * Enable ECRC if possible
587 */
588 mxge_enable_nvidia_ecrc(sc);
589
590 /*
591 * Run a DMA test which watches for unaligned completions and
592 * aborts on the first one seen. Not required on Z8ES or newer.
593 */
594 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES)
595 return 0;
596 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
597 if (status == 0)
598 return 0; /* keep the aligned firmware */
599
600 if (status != E2BIG)
601 device_printf(dev, "DMA test failed: %d\n", status);
602 if (status == ENOSYS)
603 device_printf(dev, "Falling back to ethp! "
604 "Please install up to date fw\n");
605 return status;
606}
607
608static int
609mxge_select_firmware(mxge_softc_t *sc)
610{
611 int aligned = 0;
612 int force_firmware = mxge_force_firmware;
613
614 if (sc->throttle)
615 force_firmware = sc->throttle;
616
617 if (force_firmware != 0) {
618 if (force_firmware == 1)
619 aligned = 1;
620 else
621 aligned = 0;
622 if (mxge_verbose)
623 device_printf(sc->dev,
624 "Assuming %s completions (forced)\n",
625 aligned ? "aligned" : "unaligned");
626 goto abort;
627 }
628
629 /* if the PCIe link width is 4 or less, we can use the aligned
630 firmware and skip any checks */
631 if (sc->link_width != 0 && sc->link_width <= 4) {
632 device_printf(sc->dev,
633 "PCIe x%d Link, expect reduced performance\n",
634 sc->link_width);
635 aligned = 1;
636 goto abort;
637 }
638
639 if (0 == mxge_firmware_probe(sc))
640 return 0;
641
642abort:
643 if (aligned) {
644 sc->fw_name = mxge_fw_aligned;
645 sc->tx_boundary = 4096;
646 } else {
647 sc->fw_name = mxge_fw_unaligned;
648 sc->tx_boundary = 2048;
649 }
650 return (mxge_load_firmware(sc, 0));
651}
652
653static int
654mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
655{
656
657
658 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
659 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
660 be32toh(hdr->mcp_type));
661 return EIO;
662 }
663
664 /* save firmware version for sysctl */
665 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version));
666 if (mxge_verbose)
667 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
668
669 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
670 &sc->fw_ver_minor, &sc->fw_ver_tiny);
671
672 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
673 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
674 device_printf(sc->dev, "Found firmware version %s\n",
675 sc->fw_version);
676 device_printf(sc->dev, "Driver needs %d.%d\n",
677 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
678 return EINVAL;
679 }
680 return 0;
681
682}
683
684static void *
685z_alloc(void *nil, u_int items, u_int size)
686{
687 void *ptr;
688
689 ptr = malloc(items * size, M_TEMP, M_NOWAIT);
690 return ptr;
691}
692
693static void
694z_free(void *nil, void *ptr)
695{
696 free(ptr, M_TEMP);
697}
698
699
700static int
701mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
702{
703 z_stream zs;
704 char *inflate_buffer;
705 const struct firmware *fw;
706 const mcp_gen_header_t *hdr;
707 unsigned hdr_offset;
708 int status;
709 unsigned int i;
710 char dummy;
711 size_t fw_len;
712
713 fw = firmware_get(sc->fw_name);
714 if (fw == NULL) {
715 device_printf(sc->dev, "Could not find firmware image %s\n",
716 sc->fw_name);
717 return ENOENT;
718 }
719
720
721
722 /* setup zlib and decompress f/w */
723 bzero(&zs, sizeof (zs));
724 zs.zalloc = z_alloc;
725 zs.zfree = z_free;
726 status = inflateInit(&zs);
727 if (status != Z_OK) {
728 status = EIO;
729 goto abort_with_fw;
730 }
731
732 /* the uncompressed size is stored as the firmware version,
733 which would otherwise go unused */
734 fw_len = (size_t) fw->version;
735 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT);
736 if (inflate_buffer == NULL)
737 goto abort_with_zs;
738 zs.avail_in = fw->datasize;
739 zs.next_in = __DECONST(char *, fw->data);
740 zs.avail_out = fw_len;
741 zs.next_out = inflate_buffer;
742 status = inflate(&zs, Z_FINISH);
743 if (status != Z_STREAM_END) {
744 device_printf(sc->dev, "zlib %d\n", status);
745 status = EIO;
746 goto abort_with_buffer;
747 }
748
749 /* check id */
750 hdr_offset = htobe32(*(const uint32_t *)
751 (inflate_buffer + MCP_HEADER_PTR_OFFSET));
752 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
753 device_printf(sc->dev, "Bad firmware file");
754 status = EIO;
755 goto abort_with_buffer;
756 }
757 hdr = (const void*)(inflate_buffer + hdr_offset);
758
759 status = mxge_validate_firmware(sc, hdr);
760 if (status != 0)
761 goto abort_with_buffer;
762
763 /* Copy the inflated firmware to NIC SRAM. */
764 for (i = 0; i < fw_len; i += 256) {
765 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
766 inflate_buffer + i,
767 min(256U, (unsigned)(fw_len - i)));
768 wmb();
769 dummy = *sc->sram;
770 wmb();
771 }
772
773 *limit = fw_len;
774 status = 0;
775abort_with_buffer:
776 free(inflate_buffer, M_TEMP);
777abort_with_zs:
778 inflateEnd(&zs);
779abort_with_fw:
780 firmware_put(fw, FIRMWARE_UNLOAD);
781 return status;
782}
783
784/*
785 * Enable or disable periodic RDMAs from the host to make certain
786 * chipsets resend dropped PCIe messages
787 */
788
789static void
790mxge_dummy_rdma(mxge_softc_t *sc, int enable)
791{
792 char buf_bytes[72];
793 volatile uint32_t *confirm;
794 volatile char *submit;
795 uint32_t *buf, dma_low, dma_high;
796 int i;
797
798 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
799
800 /* clear confirmation addr */
801 confirm = (volatile uint32_t *)sc->cmd;
802 *confirm = 0;
803 wmb();
804
805 /* send an rdma command to the PCIe engine, and wait for the
806 response in the confirmation address. The firmware should
807 write a -1 there to indicate it is alive and well
808 */
809
810 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
811 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
812 buf[0] = htobe32(dma_high); /* confirm addr MSW */
813 buf[1] = htobe32(dma_low); /* confirm addr LSW */
814 buf[2] = htobe32(0xffffffff); /* confirm data */
815 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
816 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
817 buf[3] = htobe32(dma_high); /* dummy addr MSW */
818 buf[4] = htobe32(dma_low); /* dummy addr LSW */
819 buf[5] = htobe32(enable); /* enable? */
820
821
822 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
823
824 mxge_pio_copy(submit, buf, 64);
825 wmb();
826 DELAY(1000);
827 wmb();
828 i = 0;
829 while (*confirm != 0xffffffff && i < 20) {
830 DELAY(1000);
831 i++;
832 }
833 if (*confirm != 0xffffffff) {
834 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
835 (enable ? "enable" : "disable"), confirm,
836 *confirm);
837 }
838 return;
839}
840
841static int
842mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
843{
844 mcp_cmd_t *buf;
845 char buf_bytes[sizeof(*buf) + 8];
846 volatile mcp_cmd_response_t *response = sc->cmd;
847 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
848 uint32_t dma_low, dma_high;
849 int err, sleep_total = 0;
850
851 /* ensure buf is aligned to 8 bytes */
852 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
853
854 buf->data0 = htobe32(data->data0);
855 buf->data1 = htobe32(data->data1);
856 buf->data2 = htobe32(data->data2);
857 buf->cmd = htobe32(cmd);
858 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
859 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
860
861 buf->response_addr.low = htobe32(dma_low);
862 buf->response_addr.high = htobe32(dma_high);
863 mtx_lock(&sc->cmd_mtx);
864 response->result = 0xffffffff;
865 wmb();
866 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
867
868 /* wait up to 20ms */
869 err = EAGAIN;
870 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
871 bus_dmamap_sync(sc->cmd_dma.dmat,
872 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
873 wmb();
874 switch (be32toh(response->result)) {
875 case 0:
876 data->data0 = be32toh(response->data);
877 err = 0;
878 break;
879 case 0xffffffff:
880 DELAY(1000);
881 break;
882 case MXGEFW_CMD_UNKNOWN:
883 err = ENOSYS;
884 break;
885 case MXGEFW_CMD_ERROR_UNALIGNED:
886 err = E2BIG;
887 break;
888 case MXGEFW_CMD_ERROR_BUSY:
889 err = EBUSY;
890 break;
891 case MXGEFW_CMD_ERROR_I2C_ABSENT:
892 err = ENXIO;
893 break;
894 default:
895 device_printf(sc->dev,
896 "mxge: command %d "
897 "failed, result = %d\n",
898 cmd, be32toh(response->result));
899 err = ENXIO;
900 break;
901 }
902 if (err != EAGAIN)
903 break;
904 }
905 if (err == EAGAIN)
906 device_printf(sc->dev, "mxge: command %d timed out"
907 "result = %d\n",
908 cmd, be32toh(response->result));
909 mtx_unlock(&sc->cmd_mtx);
910 return err;
911}
912
913static int
914mxge_adopt_running_firmware(mxge_softc_t *sc)
915{
916 struct mcp_gen_header *hdr;
917 const size_t bytes = sizeof (struct mcp_gen_header);
918 size_t hdr_offset;
919 int status;
920
921 /* find running firmware header */
922 hdr_offset = htobe32(*(volatile uint32_t *)
923 (sc->sram + MCP_HEADER_PTR_OFFSET));
924
925 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
926 device_printf(sc->dev,
927 "Running firmware has bad header offset (%d)\n",
928 (int)hdr_offset);
929 return EIO;
930 }
931
932 /* copy header of running firmware from SRAM to host memory to
933 * validate firmware */
934 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
935 if (hdr == NULL) {
936 device_printf(sc->dev, "could not malloc firmware hdr\n");
937 return ENOMEM;
938 }
939 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
940 rman_get_bushandle(sc->mem_res),
941 hdr_offset, (char *)hdr, bytes);
942 status = mxge_validate_firmware(sc, hdr);
943 free(hdr, M_DEVBUF);
944
945 /*
946 * check to see if adopted firmware has bug where adopting
947 * it will cause broadcasts to be filtered unless the NIC
948 * is kept in ALLMULTI mode
949 */
950 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
951 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
952 sc->adopted_rx_filter_bug = 1;
953 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
954 "working around rx filter bug\n",
955 sc->fw_ver_major, sc->fw_ver_minor,
956 sc->fw_ver_tiny);
957 }
958
959 return status;
960}
961
962
963static int
964mxge_load_firmware(mxge_softc_t *sc, int adopt)
965{
966 volatile uint32_t *confirm;
967 volatile char *submit;
968 char buf_bytes[72];
969 uint32_t *buf, size, dma_low, dma_high;
970 int status, i;
971
972 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
973
974 size = sc->sram_size;
975 status = mxge_load_firmware_helper(sc, &size);
976 if (status) {
977 if (!adopt)
978 return status;
979 /* Try to use the currently running firmware, if
980 it is new enough */
981 status = mxge_adopt_running_firmware(sc);
982 if (status) {
983 device_printf(sc->dev,
984 "failed to adopt running firmware\n");
985 return status;
986 }
987 device_printf(sc->dev,
988 "Successfully adopted running firmware\n");
989 if (sc->tx_boundary == 4096) {
990 device_printf(sc->dev,
991 "Using firmware currently running on NIC"
992 ". For optimal\n");
993 device_printf(sc->dev,
994 "performance consider loading optimized "
995 "firmware\n");
996 }
997 sc->fw_name = mxge_fw_unaligned;
998 sc->tx_boundary = 2048;
999 return 0;
1000 }
1001 /* clear confirmation addr */
1002 confirm = (volatile uint32_t *)sc->cmd;
1003 *confirm = 0;
1004 wmb();
1005 /* send a reload command to the bootstrap MCP, and wait for the
1006 response in the confirmation address. The firmware should
1007 write a -1 there to indicate it is alive and well
1008 */
1009
1010 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
1011 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
1012
1013 buf[0] = htobe32(dma_high); /* confirm addr MSW */
1014 buf[1] = htobe32(dma_low); /* confirm addr LSW */
1015 buf[2] = htobe32(0xffffffff); /* confirm data */
1016
1017 /* FIX: All newest firmware should un-protect the bottom of
1018 the sram before handoff. However, the very first interfaces
1019 do not. Therefore the handoff copy must skip the first 8 bytes
1020 */
1021 /* where the code starts*/
1022 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
1023 buf[4] = htobe32(size - 8); /* length of code */
1024 buf[5] = htobe32(8); /* where to copy to */
1025 buf[6] = htobe32(0); /* where to jump to */
1026
1027 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1028 mxge_pio_copy(submit, buf, 64);
1029 wmb();
1030 DELAY(1000);
1031 wmb();
1032 i = 0;
1033 while (*confirm != 0xffffffff && i < 20) {
1034 DELAY(1000*10);
1035 i++;
1036 bus_dmamap_sync(sc->cmd_dma.dmat,
1037 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1038 }
1039 if (*confirm != 0xffffffff) {
1040 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1041 confirm, *confirm);
1042
1043 return ENXIO;
1044 }
1045 return 0;
1046}
1047
1048static int
1049mxge_update_mac_address(mxge_softc_t *sc)
1050{
1051 mxge_cmd_t cmd;
1052 uint8_t *addr = sc->mac_addr;
1053 int status;
1054
1055
1056 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1057 | (addr[2] << 8) | addr[3]);
1058
1059 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1060
1061 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1062 return status;
1063}
1064
1065static int
1066mxge_change_pause(mxge_softc_t *sc, int pause)
1067{
1068 mxge_cmd_t cmd;
1069 int status;
1070
1071 if (pause)
1072 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1073 &cmd);
1074 else
1075 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1076 &cmd);
1077
1078 if (status) {
1079 device_printf(sc->dev, "Failed to set flow control mode\n");
1080 return ENXIO;
1081 }
1082 sc->pause = pause;
1083 return 0;
1084}
1085
1086static void
1087mxge_change_promisc(mxge_softc_t *sc, int promisc)
1088{
1089 mxge_cmd_t cmd;
1090 int status;
1091
1092 if (mxge_always_promisc)
1093 promisc = 1;
1094
1095 if (promisc)
1096 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1097 &cmd);
1098 else
1099 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1100 &cmd);
1101
1102 if (status) {
1103 device_printf(sc->dev, "Failed to set promisc mode\n");
1104 }
1105}
1106
1107static void
1108mxge_set_multicast_list(mxge_softc_t *sc)
1109{
1110 mxge_cmd_t cmd;
1111 struct ifmultiaddr *ifma;
1112 struct ifnet *ifp = sc->ifp;
1113 int err;
1114
1115 /* This firmware is known to not support multicast */
1116 if (!sc->fw_multicast_support)
1117 return;
1118
1119 /* Disable multicast filtering while we play with the lists*/
1120 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1121 if (err != 0) {
1122 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1123 " error status: %d\n", err);
1124 return;
1125 }
1126
1127 if (sc->adopted_rx_filter_bug)
1128 return;
1129
1130 if (ifp->if_flags & IFF_ALLMULTI)
1131 /* request to disable multicast filtering, so quit here */
1132 return;
1133
1134 /* Flush all the filters */
1135
1136 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1137 if (err != 0) {
1138 device_printf(sc->dev,
1139 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1140 ", error status: %d\n", err);
1141 return;
1142 }
1143
1144 /* Walk the multicast list, and add each address */
1145
1146 if_maddr_rlock(ifp);
1147 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1148 if (ifma->ifma_addr->sa_family != AF_LINK)
1149 continue;
1150 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1151 &cmd.data0, 4);
1152 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1153 &cmd.data1, 2);
1154 cmd.data0 = htonl(cmd.data0);
1155 cmd.data1 = htonl(cmd.data1);
1156 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1157 if (err != 0) {
1158 device_printf(sc->dev, "Failed "
1159 "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1160 "%d\t", err);
1161 /* abort, leaving multicast filtering off */
1162 if_maddr_runlock(ifp);
1163 return;
1164 }
1165 }
1166 if_maddr_runlock(ifp);
1167 /* Enable multicast filtering */
1168 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1169 if (err != 0) {
1170 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1171 ", error status: %d\n", err);
1172 }
1173}
1174
1175static int
1176mxge_max_mtu(mxge_softc_t *sc)
1177{
1178 mxge_cmd_t cmd;
1179 int status;
1180
1181 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1182 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1183
1184 /* try to set nbufs to see if it we can
1185 use virtually contiguous jumbos */
1186 cmd.data0 = 0;
1187 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1188 &cmd);
1189 if (status == 0)
1190 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1191
1192 /* otherwise, we're limited to MJUMPAGESIZE */
1193 return MJUMPAGESIZE - MXGEFW_PAD;
1194}
1195
1196static int
1197mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1198{
1199 struct mxge_slice_state *ss;
1200 mxge_rx_done_t *rx_done;
1201 volatile uint32_t *irq_claim;
1202 mxge_cmd_t cmd;
1203 int slice, status;
1204
1205 /* try to send a reset command to the card to see if it
1206 is alive */
1207 memset(&cmd, 0, sizeof (cmd));
1208 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1209 if (status != 0) {
1210 device_printf(sc->dev, "failed reset\n");
1211 return ENXIO;
1212 }
1213
1214 mxge_dummy_rdma(sc, 1);
1215
1216
1217 /* set the intrq size */
1218 cmd.data0 = sc->rx_ring_size;
1219 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1220
1221 /*
1222 * Even though we already know how many slices are supported
1223 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1224 * has magic side effects, and must be called after a reset.
1225 * It must be called prior to calling any RSS related cmds,
1226 * including assigning an interrupt queue for anything but
1227 * slice 0. It must also be called *after*
1228 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1229 * the firmware to compute offsets.
1230 */
1231
1232 if (sc->num_slices > 1) {
1233 /* ask the maximum number of slices it supports */
1234 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1235 &cmd);
1236 if (status != 0) {
1237 device_printf(sc->dev,
1238 "failed to get number of slices\n");
1239 return status;
1240 }
1241 /*
1242 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1243 * to setting up the interrupt queue DMA
1244 */
1245 cmd.data0 = sc->num_slices;
1246 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1247#ifdef IFNET_BUF_RING
1248 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1249#endif
1250 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1251 &cmd);
1252 if (status != 0) {
1253 device_printf(sc->dev,
1254 "failed to set number of slices\n");
1255 return status;
1256 }
1257 }
1258
1259
1260 if (interrupts_setup) {
1261 /* Now exchange information about interrupts */
1262 for (slice = 0; slice < sc->num_slices; slice++) {
1263 rx_done = &sc->ss[slice].rx_done;
1264 memset(rx_done->entry, 0, sc->rx_ring_size);
1265 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1266 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1267 cmd.data2 = slice;
1268 status |= mxge_send_cmd(sc,
1269 MXGEFW_CMD_SET_INTRQ_DMA,
1270 &cmd);
1271 }
1272 }
1273
1274 status |= mxge_send_cmd(sc,
1275 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1276
1277
1278 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1279
1280 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1281 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1282
1283
1284 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1285 &cmd);
1286 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1287 if (status != 0) {
1288 device_printf(sc->dev, "failed set interrupt parameters\n");
1289 return status;
1290 }
1291
1292
1293 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1294
1295
1296 /* run a DMA benchmark */
1297 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1298
1299 for (slice = 0; slice < sc->num_slices; slice++) {
1300 ss = &sc->ss[slice];
1301
1302 ss->irq_claim = irq_claim + (2 * slice);
1303 /* reset mcp/driver shared state back to 0 */
1304 ss->rx_done.idx = 0;
1305 ss->rx_done.cnt = 0;
1306 ss->tx.req = 0;
1307 ss->tx.done = 0;
1308 ss->tx.pkt_done = 0;
1309 ss->tx.queue_active = 0;
1310 ss->tx.activate = 0;
1311 ss->tx.deactivate = 0;
1312 ss->tx.wake = 0;
1313 ss->tx.defrag = 0;
1314 ss->tx.stall = 0;
1315 ss->rx_big.cnt = 0;
1316 ss->rx_small.cnt = 0;
1317 ss->lc.lro_bad_csum = 0;
1318 ss->lc.lro_queued = 0;
1319 ss->lc.lro_flushed = 0;
1320 if (ss->fw_stats != NULL) {
1321 bzero(ss->fw_stats, sizeof *ss->fw_stats);
1322 }
1323 }
1324 sc->rdma_tags_available = 15;
1325 status = mxge_update_mac_address(sc);
1326 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1327 mxge_change_pause(sc, sc->pause);
1328 mxge_set_multicast_list(sc);
1329 if (sc->throttle) {
1330 cmd.data0 = sc->throttle;
1331 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR,
1332 &cmd)) {
1333 device_printf(sc->dev,
1334 "can't enable throttle\n");
1335 }
1336 }
1337 return status;
1338}
1339
1340static int
1341mxge_change_throttle(SYSCTL_HANDLER_ARGS)
1342{
1343 mxge_cmd_t cmd;
1344 mxge_softc_t *sc;
1345 int err;
1346 unsigned int throttle;
1347
1348 sc = arg1;
1349 throttle = sc->throttle;
1350 err = sysctl_handle_int(oidp, &throttle, arg2, req);
1351 if (err != 0) {
1352 return err;
1353 }
1354
1355 if (throttle == sc->throttle)
1356 return 0;
1357
1358 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE)
1359 return EINVAL;
1360
1361 mtx_lock(&sc->driver_mtx);
1362 cmd.data0 = throttle;
1363 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd);
1364 if (err == 0)
1365 sc->throttle = throttle;
1366 mtx_unlock(&sc->driver_mtx);
1367 return err;
1368}
1369
1370static int
1371mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1372{
1373 mxge_softc_t *sc;
1374 unsigned int intr_coal_delay;
1375 int err;
1376
1377 sc = arg1;
1378 intr_coal_delay = sc->intr_coal_delay;
1379 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1380 if (err != 0) {
1381 return err;
1382 }
1383 if (intr_coal_delay == sc->intr_coal_delay)
1384 return 0;
1385
1386 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1387 return EINVAL;
1388
1389 mtx_lock(&sc->driver_mtx);
1390 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1391 sc->intr_coal_delay = intr_coal_delay;
1392
1393 mtx_unlock(&sc->driver_mtx);
1394 return err;
1395}
1396
1397static int
1398mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1399{
1400 mxge_softc_t *sc;
1401 unsigned int enabled;
1402 int err;
1403
1404 sc = arg1;
1405 enabled = sc->pause;
1406 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1407 if (err != 0) {
1408 return err;
1409 }
1410 if (enabled == sc->pause)
1411 return 0;
1412
1413 mtx_lock(&sc->driver_mtx);
1414 err = mxge_change_pause(sc, enabled);
1415 mtx_unlock(&sc->driver_mtx);
1416 return err;
1417}
1418
1419static int
1420mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1421{
1422 int err;
1423
1424 if (arg1 == NULL)
1425 return EFAULT;
1426 arg2 = be32toh(*(int *)arg1);
1427 arg1 = NULL;
1428 err = sysctl_handle_int(oidp, arg1, arg2, req);
1429
1430 return err;
1431}
1432
1433static void
1434mxge_rem_sysctls(mxge_softc_t *sc)
1435{
1436 struct mxge_slice_state *ss;
1437 int slice;
1438
1439 if (sc->slice_sysctl_tree == NULL)
1440 return;
1441
1442 for (slice = 0; slice < sc->num_slices; slice++) {
1443 ss = &sc->ss[slice];
1444 if (ss == NULL || ss->sysctl_tree == NULL)
1445 continue;
1446 sysctl_ctx_free(&ss->sysctl_ctx);
1447 ss->sysctl_tree = NULL;
1448 }
1449 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1450 sc->slice_sysctl_tree = NULL;
1451}
1452
1453static void
1454mxge_add_sysctls(mxge_softc_t *sc)
1455{
1456 struct sysctl_ctx_list *ctx;
1457 struct sysctl_oid_list *children;
1458 mcp_irq_data_t *fw;
1459 struct mxge_slice_state *ss;
1460 int slice;
1461 char slice_num[8];
1462
1463 ctx = device_get_sysctl_ctx(sc->dev);
1464 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1465 fw = sc->ss[0].fw_stats;
1466
1467 /* random information */
1468 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1469 "firmware_version",
1470 CTLFLAG_RD, &sc->fw_version,
1471 0, "firmware version");
1472 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1473 "serial_number",
1474 CTLFLAG_RD, &sc->serial_number_string,
1475 0, "serial number");
1476 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1477 "product_code",
1478 CTLFLAG_RD, &sc->product_code_string,
1479 0, "product_code");
1480 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1481 "pcie_link_width",
1482 CTLFLAG_RD, &sc->link_width,
1483 0, "tx_boundary");
1484 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1485 "tx_boundary",
1486 CTLFLAG_RD, &sc->tx_boundary,
1487 0, "tx_boundary");
1488 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1489 "write_combine",
1490 CTLFLAG_RD, &sc->wc,
1491 0, "write combining PIO?");
1492 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1493 "read_dma_MBs",
1494 CTLFLAG_RD, &sc->read_dma,
1495 0, "DMA Read speed in MB/s");
1496 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1497 "write_dma_MBs",
1498 CTLFLAG_RD, &sc->write_dma,
1499 0, "DMA Write speed in MB/s");
1500 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1501 "read_write_dma_MBs",
1502 CTLFLAG_RD, &sc->read_write_dma,
1503 0, "DMA concurrent Read/Write speed in MB/s");
1504 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1505 "watchdog_resets",
1506 CTLFLAG_RD, &sc->watchdog_resets,
1507 0, "Number of times NIC was reset");
1508
1509
1510 /* performance related tunables */
1511 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1512 "intr_coal_delay",
1513 CTLTYPE_INT|CTLFLAG_RW, sc,
1514 0, mxge_change_intr_coal,
1515 "I", "interrupt coalescing delay in usecs");
1516
1517 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1518 "throttle",
1519 CTLTYPE_INT|CTLFLAG_RW, sc,
1520 0, mxge_change_throttle,
1521 "I", "transmit throttling");
1522
1523 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1524 "flow_control_enabled",
1525 CTLTYPE_INT|CTLFLAG_RW, sc,
1526 0, mxge_change_flow_control,
1527 "I", "interrupt coalescing delay in usecs");
1528
1529 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1530 "deassert_wait",
1531 CTLFLAG_RW, &mxge_deassert_wait,
1532 0, "Wait for IRQ line to go low in ihandler");
1533
1534 /* stats block from firmware is in network byte order.
1535 Need to swap it */
1536 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1537 "link_up",
1538 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1539 0, mxge_handle_be32,
1540 "I", "link up");
1541 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1542 "rdma_tags_available",
1543 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1544 0, mxge_handle_be32,
1545 "I", "rdma_tags_available");
1546 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1547 "dropped_bad_crc32",
1548 CTLTYPE_INT|CTLFLAG_RD,
1549 &fw->dropped_bad_crc32,
1550 0, mxge_handle_be32,
1551 "I", "dropped_bad_crc32");
1552 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1553 "dropped_bad_phy",
1554 CTLTYPE_INT|CTLFLAG_RD,
1555 &fw->dropped_bad_phy,
1556 0, mxge_handle_be32,
1557 "I", "dropped_bad_phy");
1558 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1559 "dropped_link_error_or_filtered",
1560 CTLTYPE_INT|CTLFLAG_RD,
1561 &fw->dropped_link_error_or_filtered,
1562 0, mxge_handle_be32,
1563 "I", "dropped_link_error_or_filtered");
1564 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1565 "dropped_link_overflow",
1566 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1567 0, mxge_handle_be32,
1568 "I", "dropped_link_overflow");
1569 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1570 "dropped_multicast_filtered",
1571 CTLTYPE_INT|CTLFLAG_RD,
1572 &fw->dropped_multicast_filtered,
1573 0, mxge_handle_be32,
1574 "I", "dropped_multicast_filtered");
1575 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1576 "dropped_no_big_buffer",
1577 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1578 0, mxge_handle_be32,
1579 "I", "dropped_no_big_buffer");
1580 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1581 "dropped_no_small_buffer",
1582 CTLTYPE_INT|CTLFLAG_RD,
1583 &fw->dropped_no_small_buffer,
1584 0, mxge_handle_be32,
1585 "I", "dropped_no_small_buffer");
1586 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1587 "dropped_overrun",
1588 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1589 0, mxge_handle_be32,
1590 "I", "dropped_overrun");
1591 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1592 "dropped_pause",
1593 CTLTYPE_INT|CTLFLAG_RD,
1594 &fw->dropped_pause,
1595 0, mxge_handle_be32,
1596 "I", "dropped_pause");
1597 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1598 "dropped_runt",
1599 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1600 0, mxge_handle_be32,
1601 "I", "dropped_runt");
1602
1603 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1604 "dropped_unicast_filtered",
1605 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1606 0, mxge_handle_be32,
1607 "I", "dropped_unicast_filtered");
1608
1609 /* verbose printing? */
1610 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1611 "verbose",
1612 CTLFLAG_RW, &mxge_verbose,
1613 0, "verbose printing");
1614
1615 /* add counters exported for debugging from all slices */
1616 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1617 sc->slice_sysctl_tree =
1618 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1619 "slice", CTLFLAG_RD, 0, "");
1620
1621 for (slice = 0; slice < sc->num_slices; slice++) {
1622 ss = &sc->ss[slice];
1623 sysctl_ctx_init(&ss->sysctl_ctx);
1624 ctx = &ss->sysctl_ctx;
1625 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1626 sprintf(slice_num, "%d", slice);
1627 ss->sysctl_tree =
1628 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1629 CTLFLAG_RD, 0, "");
1630 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1631 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1632 "rx_small_cnt",
1633 CTLFLAG_RD, &ss->rx_small.cnt,
1634 0, "rx_small_cnt");
1635 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1636 "rx_big_cnt",
1637 CTLFLAG_RD, &ss->rx_big.cnt,
1638 0, "rx_small_cnt");
1639 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1640 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed,
1641 0, "number of lro merge queues flushed");
1642
1643 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1644 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum,
1645 0, "number of bad csums preventing LRO");
1646
1647 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1648 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued,
1649 0, "number of frames appended to lro merge"
1650 "queues");
1651
1652#ifndef IFNET_BUF_RING
1653 /* only transmit from slice 0 for now */
1654 if (slice > 0)
1655 continue;
1656#endif
1657 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1658 "tx_req",
1659 CTLFLAG_RD, &ss->tx.req,
1660 0, "tx_req");
1661
1662 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1663 "tx_done",
1664 CTLFLAG_RD, &ss->tx.done,
1665 0, "tx_done");
1666 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1667 "tx_pkt_done",
1668 CTLFLAG_RD, &ss->tx.pkt_done,
1669 0, "tx_done");
1670 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1671 "tx_stall",
1672 CTLFLAG_RD, &ss->tx.stall,
1673 0, "tx_stall");
1674 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1675 "tx_wake",
1676 CTLFLAG_RD, &ss->tx.wake,
1677 0, "tx_wake");
1678 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1679 "tx_defrag",
1680 CTLFLAG_RD, &ss->tx.defrag,
1681 0, "tx_defrag");
1682 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1683 "tx_queue_active",
1684 CTLFLAG_RD, &ss->tx.queue_active,
1685 0, "tx_queue_active");
1686 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1687 "tx_activate",
1688 CTLFLAG_RD, &ss->tx.activate,
1689 0, "tx_activate");
1690 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1691 "tx_deactivate",
1692 CTLFLAG_RD, &ss->tx.deactivate,
1693 0, "tx_deactivate");
1694 }
1695}
1696
1697/* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1698 backwards one at a time and handle ring wraps */
1699
1700static inline void
1701mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1702 mcp_kreq_ether_send_t *src, int cnt)
1703{
1704 int idx, starting_slot;
1705 starting_slot = tx->req;
1706 while (cnt > 1) {
1707 cnt--;
1708 idx = (starting_slot + cnt) & tx->mask;
1709 mxge_pio_copy(&tx->lanai[idx],
1710 &src[cnt], sizeof(*src));
1711 wmb();
1712 }
1713}
1714
1715/*
1716 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1717 * at most 32 bytes at a time, so as to avoid involving the software
1718 * pio handler in the nic. We re-write the first segment's flags
1719 * to mark them valid only after writing the entire chain
1720 */
1721
1722static inline void
1723mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1724 int cnt)
1725{
1726 int idx, i;
1727 uint32_t *src_ints;
1728 volatile uint32_t *dst_ints;
1729 mcp_kreq_ether_send_t *srcp;
1730 volatile mcp_kreq_ether_send_t *dstp, *dst;
1731 uint8_t last_flags;
1732
1733 idx = tx->req & tx->mask;
1734
1735 last_flags = src->flags;
1736 src->flags = 0;
1737 wmb();
1738 dst = dstp = &tx->lanai[idx];
1739 srcp = src;
1740
1741 if ((idx + cnt) < tx->mask) {
1742 for (i = 0; i < (cnt - 1); i += 2) {
1743 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1744 wmb(); /* force write every 32 bytes */
1745 srcp += 2;
1746 dstp += 2;
1747 }
1748 } else {
1749 /* submit all but the first request, and ensure
1750 that it is submitted below */
1751 mxge_submit_req_backwards(tx, src, cnt);
1752 i = 0;
1753 }
1754 if (i < cnt) {
1755 /* submit the first request */
1756 mxge_pio_copy(dstp, srcp, sizeof(*src));
1757 wmb(); /* barrier before setting valid flag */
1758 }
1759
1760 /* re-write the last 32-bits with the valid flags */
1761 src->flags = last_flags;
1762 src_ints = (uint32_t *)src;
1763 src_ints+=3;
1764 dst_ints = (volatile uint32_t *)dst;
1765 dst_ints+=3;
1766 *dst_ints = *src_ints;
1767 tx->req += cnt;
1768 wmb();
1769}
1770
1771static int
1772mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m,
1773 struct mxge_pkt_info *pi)
1774{
1775 struct ether_vlan_header *eh;
1776 uint16_t etype;
1777 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO);
1778#if IFCAP_TSO6 && defined(INET6)
1779 int nxt;
1780#endif
1781
1782 eh = mtod(m, struct ether_vlan_header *);
1783 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1784 etype = ntohs(eh->evl_proto);
1785 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1786 } else {
1787 etype = ntohs(eh->evl_encap_proto);
1788 pi->ip_off = ETHER_HDR_LEN;
1789 }
1790
1791 switch (etype) {
1792 case ETHERTYPE_IP:
1793 /*
1794 * ensure ip header is in first mbuf, copy it to a
1795 * scratch buffer if not
1796 */
1797 pi->ip = (struct ip *)(m->m_data + pi->ip_off);
1798 pi->ip6 = NULL;
1799 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) {
1800 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip),
1801 ss->scratch);
1802 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1803 }
1804 pi->ip_hlen = pi->ip->ip_hl << 2;
1805 if (!tso)
1806 return 0;
1807
1808 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1809 sizeof(struct tcphdr))) {
1810 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1811 sizeof(struct tcphdr), ss->scratch);
1812 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1813 }
1814 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen);
1815 break;
1816#if IFCAP_TSO6 && defined(INET6)
1817 case ETHERTYPE_IPV6:
1818 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off);
1819 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) {
1820 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6),
1821 ss->scratch);
1822 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1823 }
1824 nxt = 0;
1825 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt);
1826 pi->ip_hlen -= pi->ip_off;
1827 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
1828 return EINVAL;
1829
1830 if (!tso)
1831 return 0;
1832
1833 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen)
1834 return EINVAL;
1835
1836 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1837 sizeof(struct tcphdr))) {
1838 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1839 sizeof(struct tcphdr), ss->scratch);
1840 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1841 }
1842 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen);
1843 break;
1844#endif
1845 default:
1846 return EINVAL;
1847 }
1848 return 0;
1849}
1850
1851#if IFCAP_TSO4
1852
1853static void
1854mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1855 int busdma_seg_cnt, struct mxge_pkt_info *pi)
1856{
1857 mxge_tx_ring_t *tx;
1858 mcp_kreq_ether_send_t *req;
1859 bus_dma_segment_t *seg;
1860 uint32_t low, high_swapped;
1861 int len, seglen, cum_len, cum_len_next;
1862 int next_is_first, chop, cnt, rdma_count, small;
1863 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum;
1864 uint8_t flags, flags_next;
1865 static int once;
1866
1867 mss = m->m_pkthdr.tso_segsz;
1868
1869 /* negative cum_len signifies to the
1870 * send loop that we are still in the
1871 * header portion of the TSO packet.
1872 */
1873
1874 cksum_offset = pi->ip_off + pi->ip_hlen;
1875 cum_len = -(cksum_offset + (pi->tcp->th_off << 2));
1876
1877 /* TSO implies checksum offload on this hardware */
1878 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) {
1879 /*
1880 * If packet has full TCP csum, replace it with pseudo hdr
1881 * sum that the NIC expects, otherwise the NIC will emit
1882 * packets with bad TCP checksums.
1883 */
1884 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
1885 if (pi->ip6) {
1886#if (CSUM_TCP_IPV6 != 0) && defined(INET6)
1887 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
1888 sum = in6_cksum_pseudo(pi->ip6,
1889 m->m_pkthdr.len - cksum_offset,
1890 IPPROTO_TCP, 0);
1891#endif
1892 } else {
1893#ifdef INET
1894 m->m_pkthdr.csum_flags |= CSUM_TCP;
1895 sum = in_pseudo(pi->ip->ip_src.s_addr,
1896 pi->ip->ip_dst.s_addr,
1897 htons(IPPROTO_TCP + (m->m_pkthdr.len -
1898 cksum_offset)));
1899#endif
1900 }
1901 m_copyback(m, offsetof(struct tcphdr, th_sum) +
1902 cksum_offset, sizeof(sum), (caddr_t)&sum);
1903 }
1904 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1905
1906
1907 /* for TSO, pseudo_hdr_offset holds mss.
1908 * The firmware figures out where to put
1909 * the checksum by parsing the header. */
1910 pseudo_hdr_offset = htobe16(mss);
1911
1912 if (pi->ip6) {
1913 /*
1914 * for IPv6 TSO, the "checksum offset" is re-purposed
1915 * to store the TCP header len
1916 */
1917 cksum_offset = (pi->tcp->th_off << 2);
1918 }
1919
1920 tx = &ss->tx;
1921 req = tx->req_list;
1922 seg = tx->seg_list;
1923 cnt = 0;
1924 rdma_count = 0;
1925 /* "rdma_count" is the number of RDMAs belonging to the
1926 * current packet BEFORE the current send request. For
1927 * non-TSO packets, this is equal to "count".
1928 * For TSO packets, rdma_count needs to be reset
1929 * to 0 after a segment cut.
1930 *
1931 * The rdma_count field of the send request is
1932 * the number of RDMAs of the packet starting at
1933 * that request. For TSO send requests with one ore more cuts
1934 * in the middle, this is the number of RDMAs starting
1935 * after the last cut in the request. All previous
1936 * segments before the last cut implicitly have 1 RDMA.
1937 *
1938 * Since the number of RDMAs is not known beforehand,
1939 * it must be filled-in retroactively - after each
1940 * segmentation cut or at the end of the entire packet.
1941 */
1942
1943 while (busdma_seg_cnt) {
1944 /* Break the busdma segment up into pieces*/
1945 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1946 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1947 len = seg->ds_len;
1948
1949 while (len) {
1950 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1951 seglen = len;
1952 cum_len_next = cum_len + seglen;
1953 (req-rdma_count)->rdma_count = rdma_count + 1;
1954 if (__predict_true(cum_len >= 0)) {
1955 /* payload */
1956 chop = (cum_len_next > mss);
1957 cum_len_next = cum_len_next % mss;
1958 next_is_first = (cum_len_next == 0);
1959 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1960 flags_next |= next_is_first *
1961 MXGEFW_FLAGS_FIRST;
1962 rdma_count |= -(chop | next_is_first);
1963 rdma_count += chop & !next_is_first;
1964 } else if (cum_len_next >= 0) {
1965 /* header ends */
1966 rdma_count = -1;
1967 cum_len_next = 0;
1968 seglen = -cum_len;
1969 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1970 flags_next = MXGEFW_FLAGS_TSO_PLD |
1971 MXGEFW_FLAGS_FIRST |
1972 (small * MXGEFW_FLAGS_SMALL);
1973 }
1974
1975 req->addr_high = high_swapped;
1976 req->addr_low = htobe32(low);
1977 req->pseudo_hdr_offset = pseudo_hdr_offset;
1978 req->pad = 0;
1979 req->rdma_count = 1;
1980 req->length = htobe16(seglen);
1981 req->cksum_offset = cksum_offset;
1982 req->flags = flags | ((cum_len & 1) *
1983 MXGEFW_FLAGS_ALIGN_ODD);
1984 low += seglen;
1985 len -= seglen;
1986 cum_len = cum_len_next;
1987 flags = flags_next;
1988 req++;
1989 cnt++;
1990 rdma_count++;
1991 if (cksum_offset != 0 && !pi->ip6) {
1992 if (__predict_false(cksum_offset > seglen))
1993 cksum_offset -= seglen;
1994 else
1995 cksum_offset = 0;
1996 }
1997 if (__predict_false(cnt > tx->max_desc))
1998 goto drop;
1999 }
2000 busdma_seg_cnt--;
2001 seg++;
2002 }
2003 (req-rdma_count)->rdma_count = rdma_count;
2004
2005 do {
2006 req--;
2007 req->flags |= MXGEFW_FLAGS_TSO_LAST;
2008 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
2009
2010 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2011 mxge_submit_req(tx, tx->req_list, cnt);
2012#ifdef IFNET_BUF_RING
2013 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2014 /* tell the NIC to start polling this slice */
2015 *tx->send_go = 1;
2016 tx->queue_active = 1;
2017 tx->activate++;
2018 wmb();
2019 }
2020#endif
2021 return;
2022
2023drop:
2024 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
2025 m_freem(m);
2026 ss->oerrors++;
2027 if (!once) {
2028 printf("tx->max_desc exceeded via TSO!\n");
2029 printf("mss = %d, %ld, %d!\n", mss,
2030 (long)seg - (long)tx->seg_list, tx->max_desc);
2031 once = 1;
2032 }
2033 return;
2034
2035}
2036
2037#endif /* IFCAP_TSO4 */
2038
2039#ifdef MXGE_NEW_VLAN_API
2040/*
2041 * We reproduce the software vlan tag insertion from
2042 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
2043 * vlan tag insertion. We need to advertise this in order to have the
2044 * vlan interface respect our csum offload flags.
2045 */
2046static struct mbuf *
2047mxge_vlan_tag_insert(struct mbuf *m)
2048{
2049 struct ether_vlan_header *evl;
2050
2051 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
2052 if (__predict_false(m == NULL))
2053 return NULL;
2054 if (m->m_len < sizeof(*evl)) {
2055 m = m_pullup(m, sizeof(*evl));
2056 if (__predict_false(m == NULL))
2057 return NULL;
2058 }
2059 /*
2060 * Transform the Ethernet header into an Ethernet header
2061 * with 802.1Q encapsulation.
2062 */
2063 evl = mtod(m, struct ether_vlan_header *);
2064 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
2065 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
2066 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
2067 evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
2068 m->m_flags &= ~M_VLANTAG;
2069 return m;
2070}
2071#endif /* MXGE_NEW_VLAN_API */
2072
2073static void
2074mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
2075{
2076 struct mxge_pkt_info pi = {0,0,0,0};
2077 mxge_softc_t *sc;
2078 mcp_kreq_ether_send_t *req;
2079 bus_dma_segment_t *seg;
2080 struct mbuf *m_tmp;
2081 struct ifnet *ifp;
2082 mxge_tx_ring_t *tx;
2083 int cnt, cum_len, err, i, idx, odd_flag;
2084 uint16_t pseudo_hdr_offset;
2085 uint8_t flags, cksum_offset;
2086
2087
2088 sc = ss->sc;
2089 ifp = sc->ifp;
2090 tx = &ss->tx;
2091
2092#ifdef MXGE_NEW_VLAN_API
2093 if (m->m_flags & M_VLANTAG) {
2094 m = mxge_vlan_tag_insert(m);
2095 if (__predict_false(m == NULL))
2096 goto drop_without_m;
2097 }
2098#endif
2099 if (m->m_pkthdr.csum_flags &
2100 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2101 if (mxge_parse_tx(ss, m, &pi))
2102 goto drop;
2103 }
2104
2105 /* (try to) map the frame for DMA */
2106 idx = tx->req & tx->mask;
2107 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
2108 m, tx->seg_list, &cnt,
2109 BUS_DMA_NOWAIT);
2110 if (__predict_false(err == EFBIG)) {
2111 /* Too many segments in the chain. Try
2112 to defrag */
2113 m_tmp = m_defrag(m, M_NOWAIT);
2114 if (m_tmp == NULL) {
2115 goto drop;
2116 }
2117 ss->tx.defrag++;
2118 m = m_tmp;
2119 err = bus_dmamap_load_mbuf_sg(tx->dmat,
2120 tx->info[idx].map,
2121 m, tx->seg_list, &cnt,
2122 BUS_DMA_NOWAIT);
2123 }
2124 if (__predict_false(err != 0)) {
2125 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
2126 " packet len = %d\n", err, m->m_pkthdr.len);
2127 goto drop;
2128 }
2129 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2130 BUS_DMASYNC_PREWRITE);
2131 tx->info[idx].m = m;
2132
2133#if IFCAP_TSO4
2134 /* TSO is different enough, we handle it in another routine */
2135 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2136 mxge_encap_tso(ss, m, cnt, &pi);
2137 return;
2138 }
2139#endif
2140
2141 req = tx->req_list;
2142 cksum_offset = 0;
2143 pseudo_hdr_offset = 0;
2144 flags = MXGEFW_FLAGS_NO_TSO;
2145
2146 /* checksum offloading? */
2147 if (m->m_pkthdr.csum_flags &
2148 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2149 /* ensure ip header is in first mbuf, copy
2150 it to a scratch buffer if not */
2151 cksum_offset = pi.ip_off + pi.ip_hlen;
2152 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2153 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2154 req->cksum_offset = cksum_offset;
2155 flags |= MXGEFW_FLAGS_CKSUM;
2156 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2157 } else {
2158 odd_flag = 0;
2159 }
2160 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2161 flags |= MXGEFW_FLAGS_SMALL;
2162
2163 /* convert segments into a request list */
2164 cum_len = 0;
2165 seg = tx->seg_list;
2166 req->flags = MXGEFW_FLAGS_FIRST;
2167 for (i = 0; i < cnt; i++) {
2168 req->addr_low =
2169 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2170 req->addr_high =
2171 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2172 req->length = htobe16(seg->ds_len);
2173 req->cksum_offset = cksum_offset;
2174 if (cksum_offset > seg->ds_len)
2175 cksum_offset -= seg->ds_len;
2176 else
2177 cksum_offset = 0;
2178 req->pseudo_hdr_offset = pseudo_hdr_offset;
2179 req->pad = 0; /* complete solid 16-byte block */
2180 req->rdma_count = 1;
2181 req->flags |= flags | ((cum_len & 1) * odd_flag);
2182 cum_len += seg->ds_len;
2183 seg++;
2184 req++;
2185 req->flags = 0;
2186 }
2187 req--;
2188 /* pad runts to 60 bytes */
2189 if (cum_len < 60) {
2190 req++;
2191 req->addr_low =
2192 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2193 req->addr_high =
2194 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2195 req->length = htobe16(60 - cum_len);
2196 req->cksum_offset = 0;
2197 req->pseudo_hdr_offset = pseudo_hdr_offset;
2198 req->pad = 0; /* complete solid 16-byte block */
2199 req->rdma_count = 1;
2200 req->flags |= flags | ((cum_len & 1) * odd_flag);
2201 cnt++;
2202 }
2203
2204 tx->req_list[0].rdma_count = cnt;
2205#if 0
2206 /* print what the firmware will see */
2207 for (i = 0; i < cnt; i++) {
2208 printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2209 "cso:%d, flags:0x%x, rdma:%d\n",
2210 i, (int)ntohl(tx->req_list[i].addr_high),
2211 (int)ntohl(tx->req_list[i].addr_low),
2212 (int)ntohs(tx->req_list[i].length),
2213 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2214 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2215 tx->req_list[i].rdma_count);
2216 }
2217 printf("--------------\n");
2218#endif
2219 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2220 mxge_submit_req(tx, tx->req_list, cnt);
2221#ifdef IFNET_BUF_RING
2222 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2223 /* tell the NIC to start polling this slice */
2224 *tx->send_go = 1;
2225 tx->queue_active = 1;
2226 tx->activate++;
2227 wmb();
2228 }
2229#endif
2230 return;
2231
2232drop:
2233 m_freem(m);
2234drop_without_m:
2235 ss->oerrors++;
2236 return;
2237}
2238
2239#ifdef IFNET_BUF_RING
2240static void
2241mxge_qflush(struct ifnet *ifp)
2242{
2243 mxge_softc_t *sc = ifp->if_softc;
2244 mxge_tx_ring_t *tx;
2245 struct mbuf *m;
2246 int slice;
2247
2248 for (slice = 0; slice < sc->num_slices; slice++) {
2249 tx = &sc->ss[slice].tx;
2250 mtx_lock(&tx->mtx);
2251 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2252 m_freem(m);
2253 mtx_unlock(&tx->mtx);
2254 }
2255 if_qflush(ifp);
2256}
2257
2258static inline void
2259mxge_start_locked(struct mxge_slice_state *ss)
2260{
2261 mxge_softc_t *sc;
2262 struct mbuf *m;
2263 struct ifnet *ifp;
2264 mxge_tx_ring_t *tx;
2265
2266 sc = ss->sc;
2267 ifp = sc->ifp;
2268 tx = &ss->tx;
2269
2270 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2271 m = drbr_dequeue(ifp, tx->br);
2272 if (m == NULL) {
2273 return;
2274 }
2275 /* let BPF see it */
2276 BPF_MTAP(ifp, m);
2277
2278 /* give it to the nic */
2279 mxge_encap(ss, m);
2280 }
2281 /* ran out of transmit slots */
2282 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0)
2283 && (!drbr_empty(ifp, tx->br))) {
2284 ss->if_drv_flags |= IFF_DRV_OACTIVE;
2285 tx->stall++;
2286 }
2287}
2288
2289static int
2290mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2291{
2292 mxge_softc_t *sc;
2293 struct ifnet *ifp;
2294 mxge_tx_ring_t *tx;
2295 int err;
2296
2297 sc = ss->sc;
2298 ifp = sc->ifp;
2299 tx = &ss->tx;
2300
2301 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
2302 IFF_DRV_RUNNING) {
2303 err = drbr_enqueue(ifp, tx->br, m);
2304 return (err);
2305 }
2306
2307 if (!drbr_needs_enqueue(ifp, tx->br) &&
2308 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2309 /* let BPF see it */
2310 BPF_MTAP(ifp, m);
2311 /* give it to the nic */
2312 mxge_encap(ss, m);
2313 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2314 return (err);
2315 }
2316 if (!drbr_empty(ifp, tx->br))
2317 mxge_start_locked(ss);
2318 return (0);
2319}
2320
2321static int
2322mxge_transmit(struct ifnet *ifp, struct mbuf *m)
2323{
2324 mxge_softc_t *sc = ifp->if_softc;
2325 struct mxge_slice_state *ss;
2326 mxge_tx_ring_t *tx;
2327 int err = 0;
2328 int slice;
2329
2330 slice = m->m_pkthdr.flowid;
2331 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2332
2333 ss = &sc->ss[slice];
2334 tx = &ss->tx;
2335
2336 if (mtx_trylock(&tx->mtx)) {
2337 err = mxge_transmit_locked(ss, m);
2338 mtx_unlock(&tx->mtx);
2339 } else {
2340 err = drbr_enqueue(ifp, tx->br, m);
2341 }
2342
2343 return (err);
2344}
2345
2346#else
2347
2348static inline void
2349mxge_start_locked(struct mxge_slice_state *ss)
2350{
2351 mxge_softc_t *sc;
2352 struct mbuf *m;
2353 struct ifnet *ifp;
2354 mxge_tx_ring_t *tx;
2355
2356 sc = ss->sc;
2357 ifp = sc->ifp;
2358 tx = &ss->tx;
2359 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2360 IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
2361 if (m == NULL) {
2362 return;
2363 }
2364 /* let BPF see it */
2365 BPF_MTAP(ifp, m);
2366
2367 /* give it to the nic */
2368 mxge_encap(ss, m);
2369 }
2370 /* ran out of transmit slots */
2371 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
2372 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2373 tx->stall++;
2374 }
2375}
2376#endif
2377static void
2378mxge_start(struct ifnet *ifp)
2379{
2380 mxge_softc_t *sc = ifp->if_softc;
2381 struct mxge_slice_state *ss;
2382
2383 /* only use the first slice for now */
2384 ss = &sc->ss[0];
2385 mtx_lock(&ss->tx.mtx);
2386 mxge_start_locked(ss);
2387 mtx_unlock(&ss->tx.mtx);
2388}
2389
2390/*
2391 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2392 * at most 32 bytes at a time, so as to avoid involving the software
2393 * pio handler in the nic. We re-write the first segment's low
2394 * DMA address to mark it valid only after we write the entire chunk
2395 * in a burst
2396 */
2397static inline void
2398mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2399 mcp_kreq_ether_recv_t *src)
2400{
2401 uint32_t low;
2402
2403 low = src->addr_low;
2404 src->addr_low = 0xffffffff;
2405 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2406 wmb();
2407 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2408 wmb();
2409 src->addr_low = low;
2410 dst->addr_low = low;
2411 wmb();
2412}
2413
2414static int
2415mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2416{
2417 bus_dma_segment_t seg;
2418 struct mbuf *m;
2419 mxge_rx_ring_t *rx = &ss->rx_small;
2420 int cnt, err;
2421
2422 m = m_gethdr(M_NOWAIT, MT_DATA);
2423 if (m == NULL) {
2424 rx->alloc_fail++;
2425 err = ENOBUFS;
2426 goto done;
2427 }
2428 m->m_len = MHLEN;
2429 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2430 &seg, &cnt, BUS_DMA_NOWAIT);
2431 if (err != 0) {
2432 m_free(m);
2433 goto done;
2434 }
2435 rx->info[idx].m = m;
2436 rx->shadow[idx].addr_low =
2437 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2438 rx->shadow[idx].addr_high =
2439 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2440
2441done:
2442 if ((idx & 7) == 7)
2443 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2444 return err;
2445}
2446
2447static int
2448mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2449{
2450 bus_dma_segment_t seg[3];
2451 struct mbuf *m;
2452 mxge_rx_ring_t *rx = &ss->rx_big;
2453 int cnt, err, i;
2454
2455 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2456 if (m == NULL) {
2457 rx->alloc_fail++;
2458 err = ENOBUFS;
2459 goto done;
2460 }
2461 m->m_len = rx->mlen;
2462 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2463 seg, &cnt, BUS_DMA_NOWAIT);
2464 if (err != 0) {
2465 m_free(m);
2466 goto done;
2467 }
2468 rx->info[idx].m = m;
2469 rx->shadow[idx].addr_low =
2470 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2471 rx->shadow[idx].addr_high =
2472 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2473
2474#if MXGE_VIRT_JUMBOS
2475 for (i = 1; i < cnt; i++) {
2476 rx->shadow[idx + i].addr_low =
2477 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2478 rx->shadow[idx + i].addr_high =
2479 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2480 }
2481#endif
2482
2483done:
2484 for (i = 0; i < rx->nbufs; i++) {
2485 if ((idx & 7) == 7) {
2486 mxge_submit_8rx(&rx->lanai[idx - 7],
2487 &rx->shadow[idx - 7]);
2488 }
2489 idx++;
2490 }
2491 return err;
2492}
2493
2494#ifdef INET6
2495
2496static uint16_t
2497mxge_csum_generic(uint16_t *raw, int len)
2498{
2499 uint32_t csum;
2500
2501
2502 csum = 0;
2503 while (len > 0) {
2504 csum += *raw;
2505 raw++;
2506 len -= 2;
2507 }
2508 csum = (csum >> 16) + (csum & 0xffff);
2509 csum = (csum >> 16) + (csum & 0xffff);
2510 return (uint16_t)csum;
2511}
2512
2513static inline uint16_t
2514mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum)
2515{
2516 uint32_t partial;
2517 int nxt, cksum_offset;
2518 struct ip6_hdr *ip6 = p;
2519 uint16_t c;
2520
2521 nxt = ip6->ip6_nxt;
2522 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN;
2523 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) {
2524 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN,
2525 IPPROTO_IPV6, &nxt);
2526 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
2527 return (1);
2528 }
2529
2530 /*
2531 * IPv6 headers do not contain a checksum, and hence
2532 * do not checksum to zero, so they don't "fall out"
2533 * of the partial checksum calculation like IPv4
2534 * headers do. We need to fix the partial checksum by
2535 * subtracting the checksum of the IPv6 header.
2536 */
2537
2538 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset -
2539 ETHER_HDR_LEN);
2540 csum += ~partial;
2541 csum += (csum < ~partial);
2542 csum = (csum >> 16) + (csum & 0xFFFF);
2543 csum = (csum >> 16) + (csum & 0xFFFF);
2544 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt,
2545 csum);
2546 c ^= 0xffff;
2547 return (c);
2548}
2549#endif /* INET6 */
2550/*
2551 * Myri10GE hardware checksums are not valid if the sender
2552 * padded the frame with non-zero padding. This is because
2553 * the firmware just does a simple 16-bit 1s complement
2554 * checksum across the entire frame, excluding the first 14
2555 * bytes. It is best to simply to check the checksum and
2556 * tell the stack about it only if the checksum is good
2557 */
2558
2559static inline uint16_t
2560mxge_rx_csum(struct mbuf *m, int csum)
2561{
2562 struct ether_header *eh;
2563#ifdef INET
2564 struct ip *ip;
2565#endif
2566#if defined(INET) || defined(INET6)
2567 int cap = m->m_pkthdr.rcvif->if_capenable;
2568#endif
2569 uint16_t c, etype;
2570
2571
2572 eh = mtod(m, struct ether_header *);
2573 etype = ntohs(eh->ether_type);
2574 switch (etype) {
2575#ifdef INET
2576 case ETHERTYPE_IP:
2577 if ((cap & IFCAP_RXCSUM) == 0)
2578 return (1);
2579 ip = (struct ip *)(eh + 1);
2580 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)
2581 return (1);
2582 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2583 htonl(ntohs(csum) + ntohs(ip->ip_len) -
2584 (ip->ip_hl << 2) + ip->ip_p));
2585 c ^= 0xffff;
2586 break;
2587#endif
2588#ifdef INET6
2589 case ETHERTYPE_IPV6:
2590 if ((cap & IFCAP_RXCSUM_IPV6) == 0)
2591 return (1);
2592 c = mxge_rx_csum6((eh + 1), m, csum);
2593 break;
2594#endif
2595 default:
2596 c = 1;
2597 }
2598 return (c);
2599}
2600
2601static void
2602mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2603{
2604 struct ether_vlan_header *evl;
2605 struct ether_header *eh;
2606 uint32_t partial;
2607
2608 evl = mtod(m, struct ether_vlan_header *);
2609 eh = mtod(m, struct ether_header *);
2610
2611 /*
2612 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2613 * after what the firmware thought was the end of the ethernet
2614 * header.
2615 */
2616
2617 /* put checksum into host byte order */
2618 *csum = ntohs(*csum);
2619 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2620 (*csum) += ~partial;
2621 (*csum) += ((*csum) < ~partial);
2622 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2623 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2624
2625 /* restore checksum to network byte order;
2626 later consumers expect this */
2627 *csum = htons(*csum);
2628
2629 /* save the tag */
2630#ifdef MXGE_NEW_VLAN_API
2631 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2632#else
2633 {
2634 struct m_tag *mtag;
2635 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2636 M_NOWAIT);
2637 if (mtag == NULL)
2638 return;
2639 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2640 m_tag_prepend(m, mtag);
2641 }
2642
2643#endif
2644 m->m_flags |= M_VLANTAG;
2645
2646 /*
2647 * Remove the 802.1q header by copying the Ethernet
2648 * addresses over it and adjusting the beginning of
2649 * the data in the mbuf. The encapsulated Ethernet
2650 * type field is already in place.
2651 */
2652 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2653 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2654 m_adj(m, ETHER_VLAN_ENCAP_LEN);
2655}
2656
2657
2658static inline void
2659mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len,
2660 uint32_t csum, int lro)
2661{
2662 mxge_softc_t *sc;
2663 struct ifnet *ifp;
2664 struct mbuf *m;
2665 struct ether_header *eh;
2666 mxge_rx_ring_t *rx;
2667 bus_dmamap_t old_map;
2668 int idx;
2669
2670 sc = ss->sc;
2671 ifp = sc->ifp;
2672 rx = &ss->rx_big;
2673 idx = rx->cnt & rx->mask;
2674 rx->cnt += rx->nbufs;
2675 /* save a pointer to the received mbuf */
2676 m = rx->info[idx].m;
2677 /* try to replace the received mbuf */
2678 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2679 /* drop the frame -- the old mbuf is re-cycled */
2680 ifp->if_ierrors++;
2681 return;
2682 }
2683
2684 /* unmap the received buffer */
2685 old_map = rx->info[idx].map;
2686 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2687 bus_dmamap_unload(rx->dmat, old_map);
2688
2689 /* swap the bus_dmamap_t's */
2690 rx->info[idx].map = rx->extra_map;
2691 rx->extra_map = old_map;
2692
2693 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2694 * aligned */
2695 m->m_data += MXGEFW_PAD;
2696
2697 m->m_pkthdr.rcvif = ifp;
2698 m->m_len = m->m_pkthdr.len = len;
2699 ss->ipackets++;
2700 eh = mtod(m, struct ether_header *);
2701 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2702 mxge_vlan_tag_remove(m, &csum);
2703 }
2704 /* if the checksum is valid, mark it in the mbuf header */
2705
2706 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2707 (0 == mxge_rx_csum(m, csum))) {
2708 /* Tell the stack that the checksum is good */
2709 m->m_pkthdr.csum_data = 0xffff;
2710 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2711 CSUM_DATA_VALID;
2712
2713#if defined(INET) || defined (INET6)
2714 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0)))
2715 return;
2716#endif
2717 }
2718 /* flowid only valid if RSS hashing is enabled */
2719 if (sc->num_slices > 1) {
2720 m->m_pkthdr.flowid = (ss - sc->ss);
2721 m->m_flags |= M_FLOWID;
2722 }
2723 /* pass the frame up the stack */
2724 (*ifp->if_input)(ifp, m);
2725}
2726
2727static inline void
2728mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len,
2729 uint32_t csum, int lro)
2730{
2731 mxge_softc_t *sc;
2732 struct ifnet *ifp;
2733 struct ether_header *eh;
2734 struct mbuf *m;
2735 mxge_rx_ring_t *rx;
2736 bus_dmamap_t old_map;
2737 int idx;
2738
2739 sc = ss->sc;
2740 ifp = sc->ifp;
2741 rx = &ss->rx_small;
2742 idx = rx->cnt & rx->mask;
2743 rx->cnt++;
2744 /* save a pointer to the received mbuf */
2745 m = rx->info[idx].m;
2746 /* try to replace the received mbuf */
2747 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2748 /* drop the frame -- the old mbuf is re-cycled */
2749 ifp->if_ierrors++;
2750 return;
2751 }
2752
2753 /* unmap the received buffer */
2754 old_map = rx->info[idx].map;
2755 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2756 bus_dmamap_unload(rx->dmat, old_map);
2757
2758 /* swap the bus_dmamap_t's */
2759 rx->info[idx].map = rx->extra_map;
2760 rx->extra_map = old_map;
2761
2762 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2763 * aligned */
2764 m->m_data += MXGEFW_PAD;
2765
2766 m->m_pkthdr.rcvif = ifp;
2767 m->m_len = m->m_pkthdr.len = len;
2768 ss->ipackets++;
2769 eh = mtod(m, struct ether_header *);
2770 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2771 mxge_vlan_tag_remove(m, &csum);
2772 }
2773 /* if the checksum is valid, mark it in the mbuf header */
2774 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2775 (0 == mxge_rx_csum(m, csum))) {
2776 /* Tell the stack that the checksum is good */
2777 m->m_pkthdr.csum_data = 0xffff;
2778 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2779 CSUM_DATA_VALID;
2780
2781#if defined(INET) || defined (INET6)
2782 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum)))
2783 return;
2784#endif
2785 }
2786 /* flowid only valid if RSS hashing is enabled */
2787 if (sc->num_slices > 1) {
2788 m->m_pkthdr.flowid = (ss - sc->ss);
2789 m->m_flags |= M_FLOWID;
2790 }
2791 /* pass the frame up the stack */
2792 (*ifp->if_input)(ifp, m);
2793}
2794
2795static inline void
2796mxge_clean_rx_done(struct mxge_slice_state *ss)
2797{
2798 mxge_rx_done_t *rx_done = &ss->rx_done;
2799 int limit = 0;
2800 uint16_t length;
2801 uint16_t checksum;
2802 int lro;
2803
2804 lro = ss->sc->ifp->if_capenable & IFCAP_LRO;
2805 while (rx_done->entry[rx_done->idx].length != 0) {
2806 length = ntohs(rx_done->entry[rx_done->idx].length);
2807 rx_done->entry[rx_done->idx].length = 0;
2808 checksum = rx_done->entry[rx_done->idx].checksum;
2809 if (length <= (MHLEN - MXGEFW_PAD))
2810 mxge_rx_done_small(ss, length, checksum, lro);
2811 else
2812 mxge_rx_done_big(ss, length, checksum, lro);
2813 rx_done->cnt++;
2814 rx_done->idx = rx_done->cnt & rx_done->mask;
2815
2816 /* limit potential for livelock */
2817 if (__predict_false(++limit > rx_done->mask / 2))
2818 break;
2819 }
2820#if defined(INET) || defined (INET6)
2821 while (!SLIST_EMPTY(&ss->lc.lro_active)) {
2822 struct lro_entry *lro = SLIST_FIRST(&ss->lc.lro_active);
2823 SLIST_REMOVE_HEAD(&ss->lc.lro_active, next);
2824 tcp_lro_flush(&ss->lc, lro);
2825 }
2826#endif
2827}
2828
2829
2830static inline void
2831mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2832{
2833 struct ifnet *ifp;
2834 mxge_tx_ring_t *tx;
2835 struct mbuf *m;
2836 bus_dmamap_t map;
2837 int idx;
2838 int *flags;
2839
2840 tx = &ss->tx;
2841 ifp = ss->sc->ifp;
2842 while (tx->pkt_done != mcp_idx) {
2843 idx = tx->done & tx->mask;
2844 tx->done++;
2845 m = tx->info[idx].m;
2846 /* mbuf and DMA map only attached to the first
2847 segment per-mbuf */
2848 if (m != NULL) {
2849 ss->obytes += m->m_pkthdr.len;
2850 if (m->m_flags & M_MCAST)
2851 ss->omcasts++;
2852 ss->opackets++;
2853 tx->info[idx].m = NULL;
2854 map = tx->info[idx].map;
2855 bus_dmamap_unload(tx->dmat, map);
2856 m_freem(m);
2857 }
2858 if (tx->info[idx].flag) {
2859 tx->info[idx].flag = 0;
2860 tx->pkt_done++;
2861 }
2862 }
2863
2864 /* If we have space, clear IFF_OACTIVE to tell the stack that
2865 its OK to send packets */
2866#ifdef IFNET_BUF_RING
2867 flags = &ss->if_drv_flags;
2868#else
2869 flags = &ifp->if_drv_flags;
2870#endif
2871 mtx_lock(&ss->tx.mtx);
2872 if ((*flags) & IFF_DRV_OACTIVE &&
2873 tx->req - tx->done < (tx->mask + 1)/4) {
2874 *(flags) &= ~IFF_DRV_OACTIVE;
2875 ss->tx.wake++;
2876 mxge_start_locked(ss);
2877 }
2878#ifdef IFNET_BUF_RING
2879 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2880 /* let the NIC stop polling this queue, since there
2881 * are no more transmits pending */
2882 if (tx->req == tx->done) {
2883 *tx->send_stop = 1;
2884 tx->queue_active = 0;
2885 tx->deactivate++;
2886 wmb();
2887 }
2888 }
2889#endif
2890 mtx_unlock(&ss->tx.mtx);
2891
2892}
2893
2894static struct mxge_media_type mxge_xfp_media_types[] =
2895{
2896 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2897 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2898 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2899 {0, (1 << 5), "10GBASE-ER"},
2900 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2901 {0, (1 << 3), "10GBASE-SW"},
2902 {0, (1 << 2), "10GBASE-LW"},
2903 {0, (1 << 1), "10GBASE-EW"},
2904 {0, (1 << 0), "Reserved"}
2905};
2906static struct mxge_media_type mxge_sfp_media_types[] =
2907{
2908 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"},
2909 {0, (1 << 7), "Reserved"},
2910 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2911 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2912 {IFM_10G_SR, (1 << 4), "10GBASE-SR"},
2913 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"}
2914};
2915
2916static void
2917mxge_media_set(mxge_softc_t *sc, int media_type)
2918{
2919
2920
2921 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type,
2922 0, NULL);
2923 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type);
2924 sc->current_media = media_type;
2925 sc->media.ifm_media = sc->media.ifm_cur->ifm_media;
2926}
2927
2928static void
2929mxge_media_init(mxge_softc_t *sc)
2930{
2931 char *ptr;
2932 int i;
2933
2934 ifmedia_removeall(&sc->media);
2935 mxge_media_set(sc, IFM_AUTO);
2936
2937 /*
2938 * parse the product code to deterimine the interface type
2939 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2940 * after the 3rd dash in the driver's cached copy of the
2941 * EEPROM's product code string.
2942 */
2943 ptr = sc->product_code_string;
2944 if (ptr == NULL) {
2945 device_printf(sc->dev, "Missing product code\n");
2946 return;
2947 }
2948
2949 for (i = 0; i < 3; i++, ptr++) {
2950 ptr = strchr(ptr, '-');
2951 if (ptr == NULL) {
2952 device_printf(sc->dev,
2953 "only %d dashes in PC?!?\n", i);
2954 return;
2955 }
2956 }
2957 if (*ptr == 'C' || *(ptr +1) == 'C') {
2958 /* -C is CX4 */
2959 sc->connector = MXGE_CX4;
2960 mxge_media_set(sc, IFM_10G_CX4);
2961 } else if (*ptr == 'Q') {
2962 /* -Q is Quad Ribbon Fiber */
2963 sc->connector = MXGE_QRF;
2964 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2965 /* FreeBSD has no media type for Quad ribbon fiber */
2966 } else if (*ptr == 'R') {
2967 /* -R is XFP */
2968 sc->connector = MXGE_XFP;
2969 } else if (*ptr == 'S' || *(ptr +1) == 'S') {
2970 /* -S or -2S is SFP+ */
2971 sc->connector = MXGE_SFP;
2972 } else {
2973 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2974 }
2975}
2976
2977/*
2978 * Determine the media type for a NIC. Some XFPs will identify
2979 * themselves only when their link is up, so this is initiated via a
2980 * link up interrupt. However, this can potentially take up to
2981 * several milliseconds, so it is run via the watchdog routine, rather
2982 * than in the interrupt handler itself.
2983 */
2984static void
2985mxge_media_probe(mxge_softc_t *sc)
2986{
2987 mxge_cmd_t cmd;
2988 char *cage_type;
2989
2990 struct mxge_media_type *mxge_media_types = NULL;
2991 int i, err, ms, mxge_media_type_entries;
2992 uint32_t byte;
2993
2994 sc->need_media_probe = 0;
2995
2996 if (sc->connector == MXGE_XFP) {
2997 /* -R is XFP */
2998 mxge_media_types = mxge_xfp_media_types;
2999 mxge_media_type_entries =
3000 sizeof (mxge_xfp_media_types) /
3001 sizeof (mxge_xfp_media_types[0]);
3002 byte = MXGE_XFP_COMPLIANCE_BYTE;
3003 cage_type = "XFP";
3004 } else if (sc->connector == MXGE_SFP) {
3005 /* -S or -2S is SFP+ */
3006 mxge_media_types = mxge_sfp_media_types;
3007 mxge_media_type_entries =
3008 sizeof (mxge_sfp_media_types) /
3009 sizeof (mxge_sfp_media_types[0]);
3010 cage_type = "SFP+";
3011 byte = 3;
3012 } else {
3013 /* nothing to do; media type cannot change */
3014 return;
3015 }
3016
3017 /*
3018 * At this point we know the NIC has an XFP cage, so now we
3019 * try to determine what is in the cage by using the
3020 * firmware's XFP I2C commands to read the XFP 10GbE compilance
3021 * register. We read just one byte, which may take over
3022 * a millisecond
3023 */
3024
3025 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
3026 cmd.data1 = byte;
3027 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
3028 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
3029 device_printf(sc->dev, "failed to read XFP\n");
3030 }
3031 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
3032 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
3033 }
3034 if (err != MXGEFW_CMD_OK) {
3035 return;
3036 }
3037
3038 /* now we wait for the data to be cached */
3039 cmd.data0 = byte;
3040 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
3041 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
3042 DELAY(1000);
3043 cmd.data0 = byte;
3044 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
3045 }
3046 if (err != MXGEFW_CMD_OK) {
3047 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
3048 cage_type, err, ms);
3049 return;
3050 }
3051
3052 if (cmd.data0 == mxge_media_types[0].bitmask) {
3053 if (mxge_verbose)
3054 device_printf(sc->dev, "%s:%s\n", cage_type,
3055 mxge_media_types[0].name);
3056 if (sc->current_media != mxge_media_types[0].flag) {
3057 mxge_media_init(sc);
3058 mxge_media_set(sc, mxge_media_types[0].flag);
3059 }
3060 return;
3061 }
3062 for (i = 1; i < mxge_media_type_entries; i++) {
3063 if (cmd.data0 & mxge_media_types[i].bitmask) {
3064 if (mxge_verbose)
3065 device_printf(sc->dev, "%s:%s\n",
3066 cage_type,
3067 mxge_media_types[i].name);
3068
3069 if (sc->current_media != mxge_media_types[i].flag) {
3070 mxge_media_init(sc);
3071 mxge_media_set(sc, mxge_media_types[i].flag);
3072 }
3073 return;
3074 }
3075 }
3076 if (mxge_verbose)
3077 device_printf(sc->dev, "%s media 0x%x unknown\n",
3078 cage_type, cmd.data0);
3079
3080 return;
3081}
3082
3083static void
3084mxge_intr(void *arg)
3085{
3086 struct mxge_slice_state *ss = arg;
3087 mxge_softc_t *sc = ss->sc;
3088 mcp_irq_data_t *stats = ss->fw_stats;
3089 mxge_tx_ring_t *tx = &ss->tx;
3090 mxge_rx_done_t *rx_done = &ss->rx_done;
3091 uint32_t send_done_count;
3092 uint8_t valid;
3093
3094
3095#ifndef IFNET_BUF_RING
3096 /* an interrupt on a non-zero slice is implicitly valid
3097 since MSI-X irqs are not shared */
3098 if (ss != sc->ss) {
3099 mxge_clean_rx_done(ss);
3100 *ss->irq_claim = be32toh(3);
3101 return;
3102 }
3103#endif
3104
3105 /* make sure the DMA has finished */
3106 if (!stats->valid) {
3107 return;
3108 }
3109 valid = stats->valid;
3110
3111 if (sc->legacy_irq) {
3112 /* lower legacy IRQ */
3113 *sc->irq_deassert = 0;
3114 if (!mxge_deassert_wait)
3115 /* don't wait for conf. that irq is low */
3116 stats->valid = 0;
3117 } else {
3118 stats->valid = 0;
3119 }
3120
3121 /* loop while waiting for legacy irq deassertion */
3122 do {
3123 /* check for transmit completes and receives */
3124 send_done_count = be32toh(stats->send_done_count);
3125 while ((send_done_count != tx->pkt_done) ||
3126 (rx_done->entry[rx_done->idx].length != 0)) {
3127 if (send_done_count != tx->pkt_done)
3128 mxge_tx_done(ss, (int)send_done_count);
3129 mxge_clean_rx_done(ss);
3130 send_done_count = be32toh(stats->send_done_count);
3131 }
3132 if (sc->legacy_irq && mxge_deassert_wait)
3133 wmb();
3134 } while (*((volatile uint8_t *) &stats->valid));
3135
3136 /* fw link & error stats meaningful only on the first slice */
3137 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
3138 if (sc->link_state != stats->link_up) {
3139 sc->link_state = stats->link_up;
3140 if (sc->link_state) {
3141 if_link_state_change(sc->ifp, LINK_STATE_UP);
3142 if_initbaudrate(sc->ifp, IF_Gbps(10));
3143 if (mxge_verbose)
3144 device_printf(sc->dev, "link up\n");
3145 } else {
3146 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3147 sc->ifp->if_baudrate = 0;
3148 if (mxge_verbose)
3149 device_printf(sc->dev, "link down\n");
3150 }
3151 sc->need_media_probe = 1;
3152 }
3153 if (sc->rdma_tags_available !=
3154 be32toh(stats->rdma_tags_available)) {
3155 sc->rdma_tags_available =
3156 be32toh(stats->rdma_tags_available);
3157 device_printf(sc->dev, "RDMA timed out! %d tags "
3158 "left\n", sc->rdma_tags_available);
3159 }
3160
3161 if (stats->link_down) {
3162 sc->down_cnt += stats->link_down;
3163 sc->link_state = 0;
3164 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3165 }
3166 }
3167
3168 /* check to see if we have rx token to pass back */
3169 if (valid & 0x1)
3170 *ss->irq_claim = be32toh(3);
3171 *(ss->irq_claim + 1) = be32toh(3);
3172}
3173
3174static void
3175mxge_init(void *arg)
3176{
3177 mxge_softc_t *sc = arg;
3178 struct ifnet *ifp = sc->ifp;
3179
3180
3181 mtx_lock(&sc->driver_mtx);
3182 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
3183 (void) mxge_open(sc);
3184 mtx_unlock(&sc->driver_mtx);
3185}
3186
3187
3188
3189static void
3190mxge_free_slice_mbufs(struct mxge_slice_state *ss)
3191{
3192 int i;
3193
3194#if defined(INET) || defined(INET6)
3195 tcp_lro_free(&ss->lc);
3196#endif
3197 for (i = 0; i <= ss->rx_big.mask; i++) {
3198 if (ss->rx_big.info[i].m == NULL)
3199 continue;
3200 bus_dmamap_unload(ss->rx_big.dmat,
3201 ss->rx_big.info[i].map);
3202 m_freem(ss->rx_big.info[i].m);
3203 ss->rx_big.info[i].m = NULL;
3204 }
3205
3206 for (i = 0; i <= ss->rx_small.mask; i++) {
3207 if (ss->rx_small.info[i].m == NULL)
3208 continue;
3209 bus_dmamap_unload(ss->rx_small.dmat,
3210 ss->rx_small.info[i].map);
3211 m_freem(ss->rx_small.info[i].m);
3212 ss->rx_small.info[i].m = NULL;
3213 }
3214
3215 /* transmit ring used only on the first slice */
3216 if (ss->tx.info == NULL)
3217 return;
3218
3219 for (i = 0; i <= ss->tx.mask; i++) {
3220 ss->tx.info[i].flag = 0;
3221 if (ss->tx.info[i].m == NULL)
3222 continue;
3223 bus_dmamap_unload(ss->tx.dmat,
3224 ss->tx.info[i].map);
3225 m_freem(ss->tx.info[i].m);
3226 ss->tx.info[i].m = NULL;
3227 }
3228}
3229
3230static void
3231mxge_free_mbufs(mxge_softc_t *sc)
3232{
3233 int slice;
3234
3235 for (slice = 0; slice < sc->num_slices; slice++)
3236 mxge_free_slice_mbufs(&sc->ss[slice]);
3237}
3238
3239static void
3240mxge_free_slice_rings(struct mxge_slice_state *ss)
3241{
3242 int i;
3243
3244
3245 if (ss->rx_done.entry != NULL)
3246 mxge_dma_free(&ss->rx_done.dma);
3247 ss->rx_done.entry = NULL;
3248
3249 if (ss->tx.req_bytes != NULL)
3250 free(ss->tx.req_bytes, M_DEVBUF);
3251 ss->tx.req_bytes = NULL;
3252
3253 if (ss->tx.seg_list != NULL)
3254 free(ss->tx.seg_list, M_DEVBUF);
3255 ss->tx.seg_list = NULL;
3256
3257 if (ss->rx_small.shadow != NULL)
3258 free(ss->rx_small.shadow, M_DEVBUF);
3259 ss->rx_small.shadow = NULL;
3260
3261 if (ss->rx_big.shadow != NULL)
3262 free(ss->rx_big.shadow, M_DEVBUF);
3263 ss->rx_big.shadow = NULL;
3264
3265 if (ss->tx.info != NULL) {
3266 if (ss->tx.dmat != NULL) {
3267 for (i = 0; i <= ss->tx.mask; i++) {
3268 bus_dmamap_destroy(ss->tx.dmat,
3269 ss->tx.info[i].map);
3270 }
3271 bus_dma_tag_destroy(ss->tx.dmat);
3272 }
3273 free(ss->tx.info, M_DEVBUF);
3274 }
3275 ss->tx.info = NULL;
3276
3277 if (ss->rx_small.info != NULL) {
3278 if (ss->rx_small.dmat != NULL) {
3279 for (i = 0; i <= ss->rx_small.mask; i++) {
3280 bus_dmamap_destroy(ss->rx_small.dmat,
3281 ss->rx_small.info[i].map);
3282 }
3283 bus_dmamap_destroy(ss->rx_small.dmat,
3284 ss->rx_small.extra_map);
3285 bus_dma_tag_destroy(ss->rx_small.dmat);
3286 }
3287 free(ss->rx_small.info, M_DEVBUF);
3288 }
3289 ss->rx_small.info = NULL;
3290
3291 if (ss->rx_big.info != NULL) {
3292 if (ss->rx_big.dmat != NULL) {
3293 for (i = 0; i <= ss->rx_big.mask; i++) {
3294 bus_dmamap_destroy(ss->rx_big.dmat,
3295 ss->rx_big.info[i].map);
3296 }
3297 bus_dmamap_destroy(ss->rx_big.dmat,
3298 ss->rx_big.extra_map);
3299 bus_dma_tag_destroy(ss->rx_big.dmat);
3300 }
3301 free(ss->rx_big.info, M_DEVBUF);
3302 }
3303 ss->rx_big.info = NULL;
3304}
3305
3306static void
3307mxge_free_rings(mxge_softc_t *sc)
3308{
3309 int slice;
3310
3311 for (slice = 0; slice < sc->num_slices; slice++)
3312 mxge_free_slice_rings(&sc->ss[slice]);
3313}
3314
3315static int
3316mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3317 int tx_ring_entries)
3318{
3319 mxge_softc_t *sc = ss->sc;
3320 size_t bytes;
3321 int err, i;
3322
3323 /* allocate per-slice receive resources */
3324
3325 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3326 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3327
3328 /* allocate the rx shadow rings */
3329 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
3330 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3331
3332 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
3333 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3334
3335 /* allocate the rx host info rings */
3336 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
3337 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3338
3339 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
3340 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3341
3342 /* allocate the rx busdma resources */
3343 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3344 1, /* alignment */
3345 4096, /* boundary */
3346 BUS_SPACE_MAXADDR, /* low */
3347 BUS_SPACE_MAXADDR, /* high */
3348 NULL, NULL, /* filter */
3349 MHLEN, /* maxsize */
3350 1, /* num segs */
3351 MHLEN, /* maxsegsize */
3352 BUS_DMA_ALLOCNOW, /* flags */
3353 NULL, NULL, /* lock */
3354 &ss->rx_small.dmat); /* tag */
3355 if (err != 0) {
3356 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3357 err);
3358 return err;
3359 }
3360
3361 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3362 1, /* alignment */
3363#if MXGE_VIRT_JUMBOS
3364 4096, /* boundary */
3365#else
3366 0, /* boundary */
3367#endif
3368 BUS_SPACE_MAXADDR, /* low */
3369 BUS_SPACE_MAXADDR, /* high */
3370 NULL, NULL, /* filter */
3371 3*4096, /* maxsize */
3372#if MXGE_VIRT_JUMBOS
3373 3, /* num segs */
3374 4096, /* maxsegsize*/
3375#else
3376 1, /* num segs */
3377 MJUM9BYTES, /* maxsegsize*/
3378#endif
3379 BUS_DMA_ALLOCNOW, /* flags */
3380 NULL, NULL, /* lock */
3381 &ss->rx_big.dmat); /* tag */
3382 if (err != 0) {
3383 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3384 err);
3385 return err;
3386 }
3387 for (i = 0; i <= ss->rx_small.mask; i++) {
3388 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3389 &ss->rx_small.info[i].map);
3390 if (err != 0) {
3391 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3392 err);
3393 return err;
3394 }
3395 }
3396 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3397 &ss->rx_small.extra_map);
3398 if (err != 0) {
3399 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3400 err);
3401 return err;
3402 }
3403
3404 for (i = 0; i <= ss->rx_big.mask; i++) {
3405 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3406 &ss->rx_big.info[i].map);
3407 if (err != 0) {
3408 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3409 err);
3410 return err;
3411 }
3412 }
3413 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3414 &ss->rx_big.extra_map);
3415 if (err != 0) {
3416 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3417 err);
3418 return err;
3419 }
3420
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/linker.h>
36#include <sys/firmware.h>
37#include <sys/endian.h>
38#include <sys/sockio.h>
39#include <sys/mbuf.h>
40#include <sys/malloc.h>
41#include <sys/kdb.h>
42#include <sys/kernel.h>
43#include <sys/lock.h>
44#include <sys/module.h>
45#include <sys/socket.h>
46#include <sys/sysctl.h>
47#include <sys/sx.h>
48#include <sys/taskqueue.h>
49
50#include <net/if.h>
51#include <net/if_arp.h>
52#include <net/ethernet.h>
53#include <net/if_dl.h>
54#include <net/if_media.h>
55
56#include <net/bpf.h>
57
58#include <net/if_types.h>
59#include <net/if_vlan_var.h>
60#include <net/zlib.h>
61
62#include <netinet/in_systm.h>
63#include <netinet/in.h>
64#include <netinet/ip.h>
65#include <netinet/ip6.h>
66#include <netinet/tcp.h>
67#include <netinet/tcp_lro.h>
68#include <netinet6/ip6_var.h>
69
70#include <machine/bus.h>
71#include <machine/in_cksum.h>
72#include <machine/resource.h>
73#include <sys/bus.h>
74#include <sys/rman.h>
75#include <sys/smp.h>
76
77#include <dev/pci/pcireg.h>
78#include <dev/pci/pcivar.h>
79#include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */
80
81#include <vm/vm.h> /* for pmap_mapdev() */
82#include <vm/pmap.h>
83
84#if defined(__i386) || defined(__amd64)
85#include <machine/specialreg.h>
86#endif
87
88#include <dev/mxge/mxge_mcp.h>
89#include <dev/mxge/mcp_gen_header.h>
90/*#define MXGE_FAKE_IFP*/
91#include <dev/mxge/if_mxge_var.h>
92#ifdef IFNET_BUF_RING
93#include <sys/buf_ring.h>
94#endif
95
96#include "opt_inet.h"
97#include "opt_inet6.h"
98
99/* tunable params */
100static int mxge_nvidia_ecrc_enable = 1;
101static int mxge_force_firmware = 0;
102static int mxge_intr_coal_delay = 30;
103static int mxge_deassert_wait = 1;
104static int mxge_flow_control = 1;
105static int mxge_verbose = 0;
106static int mxge_ticks;
107static int mxge_max_slices = 1;
108static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
109static int mxge_always_promisc = 0;
110static int mxge_initial_mtu = ETHERMTU_JUMBO;
111static int mxge_throttle = 0;
112static char *mxge_fw_unaligned = "mxge_ethp_z8e";
113static char *mxge_fw_aligned = "mxge_eth_z8e";
114static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
115static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
116
117static int mxge_probe(device_t dev);
118static int mxge_attach(device_t dev);
119static int mxge_detach(device_t dev);
120static int mxge_shutdown(device_t dev);
121static void mxge_intr(void *arg);
122
123static device_method_t mxge_methods[] =
124{
125 /* Device interface */
126 DEVMETHOD(device_probe, mxge_probe),
127 DEVMETHOD(device_attach, mxge_attach),
128 DEVMETHOD(device_detach, mxge_detach),
129 DEVMETHOD(device_shutdown, mxge_shutdown),
130
131 DEVMETHOD_END
132};
133
134static driver_t mxge_driver =
135{
136 "mxge",
137 mxge_methods,
138 sizeof(mxge_softc_t),
139};
140
141static devclass_t mxge_devclass;
142
143/* Declare ourselves to be a child of the PCI bus.*/
144DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
145MODULE_DEPEND(mxge, firmware, 1, 1, 1);
146MODULE_DEPEND(mxge, zlib, 1, 1, 1);
147
148static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
149static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
150static int mxge_close(mxge_softc_t *sc, int down);
151static int mxge_open(mxge_softc_t *sc);
152static void mxge_tick(void *arg);
153
154static int
155mxge_probe(device_t dev)
156{
157 int rev;
158
159
160 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
161 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
162 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
163 rev = pci_get_revid(dev);
164 switch (rev) {
165 case MXGE_PCI_REV_Z8E:
166 device_set_desc(dev, "Myri10G-PCIE-8A");
167 break;
168 case MXGE_PCI_REV_Z8ES:
169 device_set_desc(dev, "Myri10G-PCIE-8B");
170 break;
171 default:
172 device_set_desc(dev, "Myri10G-PCIE-8??");
173 device_printf(dev, "Unrecognized rev %d NIC\n",
174 rev);
175 break;
176 }
177 return 0;
178 }
179 return ENXIO;
180}
181
182static void
183mxge_enable_wc(mxge_softc_t *sc)
184{
185#if defined(__i386) || defined(__amd64)
186 vm_offset_t len;
187 int err;
188
189 sc->wc = 1;
190 len = rman_get_size(sc->mem_res);
191 err = pmap_change_attr((vm_offset_t) sc->sram,
192 len, PAT_WRITE_COMBINING);
193 if (err != 0) {
194 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
195 err);
196 sc->wc = 0;
197 }
198#endif
199}
200
201
202/* callback to get our DMA address */
203static void
204mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
205 int error)
206{
207 if (error == 0) {
208 *(bus_addr_t *) arg = segs->ds_addr;
209 }
210}
211
212static int
213mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
214 bus_size_t alignment)
215{
216 int err;
217 device_t dev = sc->dev;
218 bus_size_t boundary, maxsegsize;
219
220 if (bytes > 4096 && alignment == 4096) {
221 boundary = 0;
222 maxsegsize = bytes;
223 } else {
224 boundary = 4096;
225 maxsegsize = 4096;
226 }
227
228 /* allocate DMAable memory tags */
229 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
230 alignment, /* alignment */
231 boundary, /* boundary */
232 BUS_SPACE_MAXADDR, /* low */
233 BUS_SPACE_MAXADDR, /* high */
234 NULL, NULL, /* filter */
235 bytes, /* maxsize */
236 1, /* num segs */
237 maxsegsize, /* maxsegsize */
238 BUS_DMA_COHERENT, /* flags */
239 NULL, NULL, /* lock */
240 &dma->dmat); /* tag */
241 if (err != 0) {
242 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
243 return err;
244 }
245
246 /* allocate DMAable memory & map */
247 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
248 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
249 | BUS_DMA_ZERO), &dma->map);
250 if (err != 0) {
251 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
252 goto abort_with_dmat;
253 }
254
255 /* load the memory */
256 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
257 mxge_dmamap_callback,
258 (void *)&dma->bus_addr, 0);
259 if (err != 0) {
260 device_printf(dev, "couldn't load map (err = %d)\n", err);
261 goto abort_with_mem;
262 }
263 return 0;
264
265abort_with_mem:
266 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
267abort_with_dmat:
268 (void)bus_dma_tag_destroy(dma->dmat);
269 return err;
270}
271
272
273static void
274mxge_dma_free(mxge_dma_t *dma)
275{
276 bus_dmamap_unload(dma->dmat, dma->map);
277 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
278 (void)bus_dma_tag_destroy(dma->dmat);
279}
280
281/*
282 * The eeprom strings on the lanaiX have the format
283 * SN=x\0
284 * MAC=x:x:x:x:x:x\0
285 * PC=text\0
286 */
287
288static int
289mxge_parse_strings(mxge_softc_t *sc)
290{
291 char *ptr;
292 int i, found_mac, found_sn2;
293 char *endptr;
294
295 ptr = sc->eeprom_strings;
296 found_mac = 0;
297 found_sn2 = 0;
298 while (*ptr != '\0') {
299 if (strncmp(ptr, "MAC=", 4) == 0) {
300 ptr += 4;
301 for (i = 0;;) {
302 sc->mac_addr[i] = strtoul(ptr, &endptr, 16);
303 if (endptr - ptr != 2)
304 goto abort;
305 ptr = endptr;
306 if (++i == 6)
307 break;
308 if (*ptr++ != ':')
309 goto abort;
310 }
311 found_mac = 1;
312 } else if (strncmp(ptr, "PC=", 3) == 0) {
313 ptr += 3;
314 strlcpy(sc->product_code_string, ptr,
315 sizeof(sc->product_code_string));
316 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) {
317 ptr += 3;
318 strlcpy(sc->serial_number_string, ptr,
319 sizeof(sc->serial_number_string));
320 } else if (strncmp(ptr, "SN2=", 4) == 0) {
321 /* SN2 takes precedence over SN */
322 ptr += 4;
323 found_sn2 = 1;
324 strlcpy(sc->serial_number_string, ptr,
325 sizeof(sc->serial_number_string));
326 }
327 while (*ptr++ != '\0') {}
328 }
329
330 if (found_mac)
331 return 0;
332
333 abort:
334 device_printf(sc->dev, "failed to parse eeprom_strings\n");
335
336 return ENXIO;
337}
338
339#if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
340static void
341mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
342{
343 uint32_t val;
344 unsigned long base, off;
345 char *va, *cfgptr;
346 device_t pdev, mcp55;
347 uint16_t vendor_id, device_id, word;
348 uintptr_t bus, slot, func, ivend, idev;
349 uint32_t *ptr32;
350
351
352 if (!mxge_nvidia_ecrc_enable)
353 return;
354
355 pdev = device_get_parent(device_get_parent(sc->dev));
356 if (pdev == NULL) {
357 device_printf(sc->dev, "could not find parent?\n");
358 return;
359 }
360 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
361 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
362
363 if (vendor_id != 0x10de)
364 return;
365
366 base = 0;
367
368 if (device_id == 0x005d) {
369 /* ck804, base address is magic */
370 base = 0xe0000000UL;
371 } else if (device_id >= 0x0374 && device_id <= 0x378) {
372 /* mcp55, base address stored in chipset */
373 mcp55 = pci_find_bsf(0, 0, 0);
374 if (mcp55 &&
375 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
376 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
377 word = pci_read_config(mcp55, 0x90, 2);
378 base = ((unsigned long)word & 0x7ffeU) << 25;
379 }
380 }
381 if (!base)
382 return;
383
384 /* XXXX
385 Test below is commented because it is believed that doing
386 config read/write beyond 0xff will access the config space
387 for the next larger function. Uncomment this and remove
388 the hacky pmap_mapdev() way of accessing config space when
389 FreeBSD grows support for extended pcie config space access
390 */
391#if 0
392 /* See if we can, by some miracle, access the extended
393 config space */
394 val = pci_read_config(pdev, 0x178, 4);
395 if (val != 0xffffffff) {
396 val |= 0x40;
397 pci_write_config(pdev, 0x178, val, 4);
398 return;
399 }
400#endif
401 /* Rather than using normal pci config space writes, we must
402 * map the Nvidia config space ourselves. This is because on
403 * opteron/nvidia class machine the 0xe000000 mapping is
404 * handled by the nvidia chipset, that means the internal PCI
405 * device (the on-chip northbridge), or the amd-8131 bridge
406 * and things behind them are not visible by this method.
407 */
408
409 BUS_READ_IVAR(device_get_parent(pdev), pdev,
410 PCI_IVAR_BUS, &bus);
411 BUS_READ_IVAR(device_get_parent(pdev), pdev,
412 PCI_IVAR_SLOT, &slot);
413 BUS_READ_IVAR(device_get_parent(pdev), pdev,
414 PCI_IVAR_FUNCTION, &func);
415 BUS_READ_IVAR(device_get_parent(pdev), pdev,
416 PCI_IVAR_VENDOR, &ivend);
417 BUS_READ_IVAR(device_get_parent(pdev), pdev,
418 PCI_IVAR_DEVICE, &idev);
419
420 off = base
421 + 0x00100000UL * (unsigned long)bus
422 + 0x00001000UL * (unsigned long)(func
423 + 8 * slot);
424
425 /* map it into the kernel */
426 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
427
428
429 if (va == NULL) {
430 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
431 return;
432 }
433 /* get a pointer to the config space mapped into the kernel */
434 cfgptr = va + (off & PAGE_MASK);
435
436 /* make sure that we can really access it */
437 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
438 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
439 if (! (vendor_id == ivend && device_id == idev)) {
440 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
441 vendor_id, device_id);
442 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
443 return;
444 }
445
446 ptr32 = (uint32_t*)(cfgptr + 0x178);
447 val = *ptr32;
448
449 if (val == 0xffffffff) {
450 device_printf(sc->dev, "extended mapping failed\n");
451 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
452 return;
453 }
454 *ptr32 = val | 0x40;
455 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
456 if (mxge_verbose)
457 device_printf(sc->dev,
458 "Enabled ECRC on upstream Nvidia bridge "
459 "at %d:%d:%d\n",
460 (int)bus, (int)slot, (int)func);
461 return;
462}
463#else
464static void
465mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
466{
467 device_printf(sc->dev,
468 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
469 return;
470}
471#endif
472
473
474static int
475mxge_dma_test(mxge_softc_t *sc, int test_type)
476{
477 mxge_cmd_t cmd;
478 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
479 int status;
480 uint32_t len;
481 char *test = " ";
482
483
484 /* Run a small DMA test.
485 * The magic multipliers to the length tell the firmware
486 * to do DMA read, write, or read+write tests. The
487 * results are returned in cmd.data0. The upper 16
488 * bits of the return is the number of transfers completed.
489 * The lower 16 bits is the time in 0.5us ticks that the
490 * transfers took to complete.
491 */
492
493 len = sc->tx_boundary;
494
495 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
496 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
497 cmd.data2 = len * 0x10000;
498 status = mxge_send_cmd(sc, test_type, &cmd);
499 if (status != 0) {
500 test = "read";
501 goto abort;
502 }
503 sc->read_dma = ((cmd.data0>>16) * len * 2) /
504 (cmd.data0 & 0xffff);
505 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
506 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
507 cmd.data2 = len * 0x1;
508 status = mxge_send_cmd(sc, test_type, &cmd);
509 if (status != 0) {
510 test = "write";
511 goto abort;
512 }
513 sc->write_dma = ((cmd.data0>>16) * len * 2) /
514 (cmd.data0 & 0xffff);
515
516 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
517 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
518 cmd.data2 = len * 0x10001;
519 status = mxge_send_cmd(sc, test_type, &cmd);
520 if (status != 0) {
521 test = "read/write";
522 goto abort;
523 }
524 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
525 (cmd.data0 & 0xffff);
526
527abort:
528 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
529 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
530 test, status);
531
532 return status;
533}
534
535/*
536 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
537 * when the PCI-E Completion packets are aligned on an 8-byte
538 * boundary. Some PCI-E chip sets always align Completion packets; on
539 * the ones that do not, the alignment can be enforced by enabling
540 * ECRC generation (if supported).
541 *
542 * When PCI-E Completion packets are not aligned, it is actually more
543 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
544 *
545 * If the driver can neither enable ECRC nor verify that it has
546 * already been enabled, then it must use a firmware image which works
547 * around unaligned completion packets (ethp_z8e.dat), and it should
548 * also ensure that it never gives the device a Read-DMA which is
549 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
550 * enabled, then the driver should use the aligned (eth_z8e.dat)
551 * firmware image, and set tx_boundary to 4KB.
552 */
553
554static int
555mxge_firmware_probe(mxge_softc_t *sc)
556{
557 device_t dev = sc->dev;
558 int reg, status;
559 uint16_t pectl;
560
561 sc->tx_boundary = 4096;
562 /*
563 * Verify the max read request size was set to 4KB
564 * before trying the test with 4KB.
565 */
566 if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
567 pectl = pci_read_config(dev, reg + 0x8, 2);
568 if ((pectl & (5 << 12)) != (5 << 12)) {
569 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
570 pectl);
571 sc->tx_boundary = 2048;
572 }
573 }
574
575 /*
576 * load the optimized firmware (which assumes aligned PCIe
577 * completions) in order to see if it works on this host.
578 */
579 sc->fw_name = mxge_fw_aligned;
580 status = mxge_load_firmware(sc, 1);
581 if (status != 0) {
582 return status;
583 }
584
585 /*
586 * Enable ECRC if possible
587 */
588 mxge_enable_nvidia_ecrc(sc);
589
590 /*
591 * Run a DMA test which watches for unaligned completions and
592 * aborts on the first one seen. Not required on Z8ES or newer.
593 */
594 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES)
595 return 0;
596 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
597 if (status == 0)
598 return 0; /* keep the aligned firmware */
599
600 if (status != E2BIG)
601 device_printf(dev, "DMA test failed: %d\n", status);
602 if (status == ENOSYS)
603 device_printf(dev, "Falling back to ethp! "
604 "Please install up to date fw\n");
605 return status;
606}
607
608static int
609mxge_select_firmware(mxge_softc_t *sc)
610{
611 int aligned = 0;
612 int force_firmware = mxge_force_firmware;
613
614 if (sc->throttle)
615 force_firmware = sc->throttle;
616
617 if (force_firmware != 0) {
618 if (force_firmware == 1)
619 aligned = 1;
620 else
621 aligned = 0;
622 if (mxge_verbose)
623 device_printf(sc->dev,
624 "Assuming %s completions (forced)\n",
625 aligned ? "aligned" : "unaligned");
626 goto abort;
627 }
628
629 /* if the PCIe link width is 4 or less, we can use the aligned
630 firmware and skip any checks */
631 if (sc->link_width != 0 && sc->link_width <= 4) {
632 device_printf(sc->dev,
633 "PCIe x%d Link, expect reduced performance\n",
634 sc->link_width);
635 aligned = 1;
636 goto abort;
637 }
638
639 if (0 == mxge_firmware_probe(sc))
640 return 0;
641
642abort:
643 if (aligned) {
644 sc->fw_name = mxge_fw_aligned;
645 sc->tx_boundary = 4096;
646 } else {
647 sc->fw_name = mxge_fw_unaligned;
648 sc->tx_boundary = 2048;
649 }
650 return (mxge_load_firmware(sc, 0));
651}
652
653static int
654mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
655{
656
657
658 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
659 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
660 be32toh(hdr->mcp_type));
661 return EIO;
662 }
663
664 /* save firmware version for sysctl */
665 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version));
666 if (mxge_verbose)
667 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
668
669 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
670 &sc->fw_ver_minor, &sc->fw_ver_tiny);
671
672 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
673 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
674 device_printf(sc->dev, "Found firmware version %s\n",
675 sc->fw_version);
676 device_printf(sc->dev, "Driver needs %d.%d\n",
677 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
678 return EINVAL;
679 }
680 return 0;
681
682}
683
684static void *
685z_alloc(void *nil, u_int items, u_int size)
686{
687 void *ptr;
688
689 ptr = malloc(items * size, M_TEMP, M_NOWAIT);
690 return ptr;
691}
692
693static void
694z_free(void *nil, void *ptr)
695{
696 free(ptr, M_TEMP);
697}
698
699
700static int
701mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
702{
703 z_stream zs;
704 char *inflate_buffer;
705 const struct firmware *fw;
706 const mcp_gen_header_t *hdr;
707 unsigned hdr_offset;
708 int status;
709 unsigned int i;
710 char dummy;
711 size_t fw_len;
712
713 fw = firmware_get(sc->fw_name);
714 if (fw == NULL) {
715 device_printf(sc->dev, "Could not find firmware image %s\n",
716 sc->fw_name);
717 return ENOENT;
718 }
719
720
721
722 /* setup zlib and decompress f/w */
723 bzero(&zs, sizeof (zs));
724 zs.zalloc = z_alloc;
725 zs.zfree = z_free;
726 status = inflateInit(&zs);
727 if (status != Z_OK) {
728 status = EIO;
729 goto abort_with_fw;
730 }
731
732 /* the uncompressed size is stored as the firmware version,
733 which would otherwise go unused */
734 fw_len = (size_t) fw->version;
735 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT);
736 if (inflate_buffer == NULL)
737 goto abort_with_zs;
738 zs.avail_in = fw->datasize;
739 zs.next_in = __DECONST(char *, fw->data);
740 zs.avail_out = fw_len;
741 zs.next_out = inflate_buffer;
742 status = inflate(&zs, Z_FINISH);
743 if (status != Z_STREAM_END) {
744 device_printf(sc->dev, "zlib %d\n", status);
745 status = EIO;
746 goto abort_with_buffer;
747 }
748
749 /* check id */
750 hdr_offset = htobe32(*(const uint32_t *)
751 (inflate_buffer + MCP_HEADER_PTR_OFFSET));
752 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
753 device_printf(sc->dev, "Bad firmware file");
754 status = EIO;
755 goto abort_with_buffer;
756 }
757 hdr = (const void*)(inflate_buffer + hdr_offset);
758
759 status = mxge_validate_firmware(sc, hdr);
760 if (status != 0)
761 goto abort_with_buffer;
762
763 /* Copy the inflated firmware to NIC SRAM. */
764 for (i = 0; i < fw_len; i += 256) {
765 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
766 inflate_buffer + i,
767 min(256U, (unsigned)(fw_len - i)));
768 wmb();
769 dummy = *sc->sram;
770 wmb();
771 }
772
773 *limit = fw_len;
774 status = 0;
775abort_with_buffer:
776 free(inflate_buffer, M_TEMP);
777abort_with_zs:
778 inflateEnd(&zs);
779abort_with_fw:
780 firmware_put(fw, FIRMWARE_UNLOAD);
781 return status;
782}
783
784/*
785 * Enable or disable periodic RDMAs from the host to make certain
786 * chipsets resend dropped PCIe messages
787 */
788
789static void
790mxge_dummy_rdma(mxge_softc_t *sc, int enable)
791{
792 char buf_bytes[72];
793 volatile uint32_t *confirm;
794 volatile char *submit;
795 uint32_t *buf, dma_low, dma_high;
796 int i;
797
798 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
799
800 /* clear confirmation addr */
801 confirm = (volatile uint32_t *)sc->cmd;
802 *confirm = 0;
803 wmb();
804
805 /* send an rdma command to the PCIe engine, and wait for the
806 response in the confirmation address. The firmware should
807 write a -1 there to indicate it is alive and well
808 */
809
810 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
811 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
812 buf[0] = htobe32(dma_high); /* confirm addr MSW */
813 buf[1] = htobe32(dma_low); /* confirm addr LSW */
814 buf[2] = htobe32(0xffffffff); /* confirm data */
815 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
816 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
817 buf[3] = htobe32(dma_high); /* dummy addr MSW */
818 buf[4] = htobe32(dma_low); /* dummy addr LSW */
819 buf[5] = htobe32(enable); /* enable? */
820
821
822 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
823
824 mxge_pio_copy(submit, buf, 64);
825 wmb();
826 DELAY(1000);
827 wmb();
828 i = 0;
829 while (*confirm != 0xffffffff && i < 20) {
830 DELAY(1000);
831 i++;
832 }
833 if (*confirm != 0xffffffff) {
834 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
835 (enable ? "enable" : "disable"), confirm,
836 *confirm);
837 }
838 return;
839}
840
841static int
842mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
843{
844 mcp_cmd_t *buf;
845 char buf_bytes[sizeof(*buf) + 8];
846 volatile mcp_cmd_response_t *response = sc->cmd;
847 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
848 uint32_t dma_low, dma_high;
849 int err, sleep_total = 0;
850
851 /* ensure buf is aligned to 8 bytes */
852 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
853
854 buf->data0 = htobe32(data->data0);
855 buf->data1 = htobe32(data->data1);
856 buf->data2 = htobe32(data->data2);
857 buf->cmd = htobe32(cmd);
858 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
859 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
860
861 buf->response_addr.low = htobe32(dma_low);
862 buf->response_addr.high = htobe32(dma_high);
863 mtx_lock(&sc->cmd_mtx);
864 response->result = 0xffffffff;
865 wmb();
866 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
867
868 /* wait up to 20ms */
869 err = EAGAIN;
870 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
871 bus_dmamap_sync(sc->cmd_dma.dmat,
872 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
873 wmb();
874 switch (be32toh(response->result)) {
875 case 0:
876 data->data0 = be32toh(response->data);
877 err = 0;
878 break;
879 case 0xffffffff:
880 DELAY(1000);
881 break;
882 case MXGEFW_CMD_UNKNOWN:
883 err = ENOSYS;
884 break;
885 case MXGEFW_CMD_ERROR_UNALIGNED:
886 err = E2BIG;
887 break;
888 case MXGEFW_CMD_ERROR_BUSY:
889 err = EBUSY;
890 break;
891 case MXGEFW_CMD_ERROR_I2C_ABSENT:
892 err = ENXIO;
893 break;
894 default:
895 device_printf(sc->dev,
896 "mxge: command %d "
897 "failed, result = %d\n",
898 cmd, be32toh(response->result));
899 err = ENXIO;
900 break;
901 }
902 if (err != EAGAIN)
903 break;
904 }
905 if (err == EAGAIN)
906 device_printf(sc->dev, "mxge: command %d timed out"
907 "result = %d\n",
908 cmd, be32toh(response->result));
909 mtx_unlock(&sc->cmd_mtx);
910 return err;
911}
912
913static int
914mxge_adopt_running_firmware(mxge_softc_t *sc)
915{
916 struct mcp_gen_header *hdr;
917 const size_t bytes = sizeof (struct mcp_gen_header);
918 size_t hdr_offset;
919 int status;
920
921 /* find running firmware header */
922 hdr_offset = htobe32(*(volatile uint32_t *)
923 (sc->sram + MCP_HEADER_PTR_OFFSET));
924
925 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
926 device_printf(sc->dev,
927 "Running firmware has bad header offset (%d)\n",
928 (int)hdr_offset);
929 return EIO;
930 }
931
932 /* copy header of running firmware from SRAM to host memory to
933 * validate firmware */
934 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
935 if (hdr == NULL) {
936 device_printf(sc->dev, "could not malloc firmware hdr\n");
937 return ENOMEM;
938 }
939 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
940 rman_get_bushandle(sc->mem_res),
941 hdr_offset, (char *)hdr, bytes);
942 status = mxge_validate_firmware(sc, hdr);
943 free(hdr, M_DEVBUF);
944
945 /*
946 * check to see if adopted firmware has bug where adopting
947 * it will cause broadcasts to be filtered unless the NIC
948 * is kept in ALLMULTI mode
949 */
950 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
951 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
952 sc->adopted_rx_filter_bug = 1;
953 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
954 "working around rx filter bug\n",
955 sc->fw_ver_major, sc->fw_ver_minor,
956 sc->fw_ver_tiny);
957 }
958
959 return status;
960}
961
962
963static int
964mxge_load_firmware(mxge_softc_t *sc, int adopt)
965{
966 volatile uint32_t *confirm;
967 volatile char *submit;
968 char buf_bytes[72];
969 uint32_t *buf, size, dma_low, dma_high;
970 int status, i;
971
972 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
973
974 size = sc->sram_size;
975 status = mxge_load_firmware_helper(sc, &size);
976 if (status) {
977 if (!adopt)
978 return status;
979 /* Try to use the currently running firmware, if
980 it is new enough */
981 status = mxge_adopt_running_firmware(sc);
982 if (status) {
983 device_printf(sc->dev,
984 "failed to adopt running firmware\n");
985 return status;
986 }
987 device_printf(sc->dev,
988 "Successfully adopted running firmware\n");
989 if (sc->tx_boundary == 4096) {
990 device_printf(sc->dev,
991 "Using firmware currently running on NIC"
992 ". For optimal\n");
993 device_printf(sc->dev,
994 "performance consider loading optimized "
995 "firmware\n");
996 }
997 sc->fw_name = mxge_fw_unaligned;
998 sc->tx_boundary = 2048;
999 return 0;
1000 }
1001 /* clear confirmation addr */
1002 confirm = (volatile uint32_t *)sc->cmd;
1003 *confirm = 0;
1004 wmb();
1005 /* send a reload command to the bootstrap MCP, and wait for the
1006 response in the confirmation address. The firmware should
1007 write a -1 there to indicate it is alive and well
1008 */
1009
1010 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
1011 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
1012
1013 buf[0] = htobe32(dma_high); /* confirm addr MSW */
1014 buf[1] = htobe32(dma_low); /* confirm addr LSW */
1015 buf[2] = htobe32(0xffffffff); /* confirm data */
1016
1017 /* FIX: All newest firmware should un-protect the bottom of
1018 the sram before handoff. However, the very first interfaces
1019 do not. Therefore the handoff copy must skip the first 8 bytes
1020 */
1021 /* where the code starts*/
1022 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
1023 buf[4] = htobe32(size - 8); /* length of code */
1024 buf[5] = htobe32(8); /* where to copy to */
1025 buf[6] = htobe32(0); /* where to jump to */
1026
1027 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1028 mxge_pio_copy(submit, buf, 64);
1029 wmb();
1030 DELAY(1000);
1031 wmb();
1032 i = 0;
1033 while (*confirm != 0xffffffff && i < 20) {
1034 DELAY(1000*10);
1035 i++;
1036 bus_dmamap_sync(sc->cmd_dma.dmat,
1037 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1038 }
1039 if (*confirm != 0xffffffff) {
1040 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1041 confirm, *confirm);
1042
1043 return ENXIO;
1044 }
1045 return 0;
1046}
1047
1048static int
1049mxge_update_mac_address(mxge_softc_t *sc)
1050{
1051 mxge_cmd_t cmd;
1052 uint8_t *addr = sc->mac_addr;
1053 int status;
1054
1055
1056 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1057 | (addr[2] << 8) | addr[3]);
1058
1059 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1060
1061 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1062 return status;
1063}
1064
1065static int
1066mxge_change_pause(mxge_softc_t *sc, int pause)
1067{
1068 mxge_cmd_t cmd;
1069 int status;
1070
1071 if (pause)
1072 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1073 &cmd);
1074 else
1075 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1076 &cmd);
1077
1078 if (status) {
1079 device_printf(sc->dev, "Failed to set flow control mode\n");
1080 return ENXIO;
1081 }
1082 sc->pause = pause;
1083 return 0;
1084}
1085
1086static void
1087mxge_change_promisc(mxge_softc_t *sc, int promisc)
1088{
1089 mxge_cmd_t cmd;
1090 int status;
1091
1092 if (mxge_always_promisc)
1093 promisc = 1;
1094
1095 if (promisc)
1096 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1097 &cmd);
1098 else
1099 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1100 &cmd);
1101
1102 if (status) {
1103 device_printf(sc->dev, "Failed to set promisc mode\n");
1104 }
1105}
1106
1107static void
1108mxge_set_multicast_list(mxge_softc_t *sc)
1109{
1110 mxge_cmd_t cmd;
1111 struct ifmultiaddr *ifma;
1112 struct ifnet *ifp = sc->ifp;
1113 int err;
1114
1115 /* This firmware is known to not support multicast */
1116 if (!sc->fw_multicast_support)
1117 return;
1118
1119 /* Disable multicast filtering while we play with the lists*/
1120 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1121 if (err != 0) {
1122 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1123 " error status: %d\n", err);
1124 return;
1125 }
1126
1127 if (sc->adopted_rx_filter_bug)
1128 return;
1129
1130 if (ifp->if_flags & IFF_ALLMULTI)
1131 /* request to disable multicast filtering, so quit here */
1132 return;
1133
1134 /* Flush all the filters */
1135
1136 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1137 if (err != 0) {
1138 device_printf(sc->dev,
1139 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1140 ", error status: %d\n", err);
1141 return;
1142 }
1143
1144 /* Walk the multicast list, and add each address */
1145
1146 if_maddr_rlock(ifp);
1147 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1148 if (ifma->ifma_addr->sa_family != AF_LINK)
1149 continue;
1150 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1151 &cmd.data0, 4);
1152 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1153 &cmd.data1, 2);
1154 cmd.data0 = htonl(cmd.data0);
1155 cmd.data1 = htonl(cmd.data1);
1156 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1157 if (err != 0) {
1158 device_printf(sc->dev, "Failed "
1159 "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1160 "%d\t", err);
1161 /* abort, leaving multicast filtering off */
1162 if_maddr_runlock(ifp);
1163 return;
1164 }
1165 }
1166 if_maddr_runlock(ifp);
1167 /* Enable multicast filtering */
1168 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1169 if (err != 0) {
1170 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1171 ", error status: %d\n", err);
1172 }
1173}
1174
1175static int
1176mxge_max_mtu(mxge_softc_t *sc)
1177{
1178 mxge_cmd_t cmd;
1179 int status;
1180
1181 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1182 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1183
1184 /* try to set nbufs to see if it we can
1185 use virtually contiguous jumbos */
1186 cmd.data0 = 0;
1187 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1188 &cmd);
1189 if (status == 0)
1190 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1191
1192 /* otherwise, we're limited to MJUMPAGESIZE */
1193 return MJUMPAGESIZE - MXGEFW_PAD;
1194}
1195
1196static int
1197mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1198{
1199 struct mxge_slice_state *ss;
1200 mxge_rx_done_t *rx_done;
1201 volatile uint32_t *irq_claim;
1202 mxge_cmd_t cmd;
1203 int slice, status;
1204
1205 /* try to send a reset command to the card to see if it
1206 is alive */
1207 memset(&cmd, 0, sizeof (cmd));
1208 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1209 if (status != 0) {
1210 device_printf(sc->dev, "failed reset\n");
1211 return ENXIO;
1212 }
1213
1214 mxge_dummy_rdma(sc, 1);
1215
1216
1217 /* set the intrq size */
1218 cmd.data0 = sc->rx_ring_size;
1219 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1220
1221 /*
1222 * Even though we already know how many slices are supported
1223 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1224 * has magic side effects, and must be called after a reset.
1225 * It must be called prior to calling any RSS related cmds,
1226 * including assigning an interrupt queue for anything but
1227 * slice 0. It must also be called *after*
1228 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1229 * the firmware to compute offsets.
1230 */
1231
1232 if (sc->num_slices > 1) {
1233 /* ask the maximum number of slices it supports */
1234 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1235 &cmd);
1236 if (status != 0) {
1237 device_printf(sc->dev,
1238 "failed to get number of slices\n");
1239 return status;
1240 }
1241 /*
1242 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1243 * to setting up the interrupt queue DMA
1244 */
1245 cmd.data0 = sc->num_slices;
1246 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1247#ifdef IFNET_BUF_RING
1248 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1249#endif
1250 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1251 &cmd);
1252 if (status != 0) {
1253 device_printf(sc->dev,
1254 "failed to set number of slices\n");
1255 return status;
1256 }
1257 }
1258
1259
1260 if (interrupts_setup) {
1261 /* Now exchange information about interrupts */
1262 for (slice = 0; slice < sc->num_slices; slice++) {
1263 rx_done = &sc->ss[slice].rx_done;
1264 memset(rx_done->entry, 0, sc->rx_ring_size);
1265 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1266 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1267 cmd.data2 = slice;
1268 status |= mxge_send_cmd(sc,
1269 MXGEFW_CMD_SET_INTRQ_DMA,
1270 &cmd);
1271 }
1272 }
1273
1274 status |= mxge_send_cmd(sc,
1275 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1276
1277
1278 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1279
1280 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1281 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1282
1283
1284 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1285 &cmd);
1286 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1287 if (status != 0) {
1288 device_printf(sc->dev, "failed set interrupt parameters\n");
1289 return status;
1290 }
1291
1292
1293 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1294
1295
1296 /* run a DMA benchmark */
1297 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1298
1299 for (slice = 0; slice < sc->num_slices; slice++) {
1300 ss = &sc->ss[slice];
1301
1302 ss->irq_claim = irq_claim + (2 * slice);
1303 /* reset mcp/driver shared state back to 0 */
1304 ss->rx_done.idx = 0;
1305 ss->rx_done.cnt = 0;
1306 ss->tx.req = 0;
1307 ss->tx.done = 0;
1308 ss->tx.pkt_done = 0;
1309 ss->tx.queue_active = 0;
1310 ss->tx.activate = 0;
1311 ss->tx.deactivate = 0;
1312 ss->tx.wake = 0;
1313 ss->tx.defrag = 0;
1314 ss->tx.stall = 0;
1315 ss->rx_big.cnt = 0;
1316 ss->rx_small.cnt = 0;
1317 ss->lc.lro_bad_csum = 0;
1318 ss->lc.lro_queued = 0;
1319 ss->lc.lro_flushed = 0;
1320 if (ss->fw_stats != NULL) {
1321 bzero(ss->fw_stats, sizeof *ss->fw_stats);
1322 }
1323 }
1324 sc->rdma_tags_available = 15;
1325 status = mxge_update_mac_address(sc);
1326 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1327 mxge_change_pause(sc, sc->pause);
1328 mxge_set_multicast_list(sc);
1329 if (sc->throttle) {
1330 cmd.data0 = sc->throttle;
1331 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR,
1332 &cmd)) {
1333 device_printf(sc->dev,
1334 "can't enable throttle\n");
1335 }
1336 }
1337 return status;
1338}
1339
1340static int
1341mxge_change_throttle(SYSCTL_HANDLER_ARGS)
1342{
1343 mxge_cmd_t cmd;
1344 mxge_softc_t *sc;
1345 int err;
1346 unsigned int throttle;
1347
1348 sc = arg1;
1349 throttle = sc->throttle;
1350 err = sysctl_handle_int(oidp, &throttle, arg2, req);
1351 if (err != 0) {
1352 return err;
1353 }
1354
1355 if (throttle == sc->throttle)
1356 return 0;
1357
1358 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE)
1359 return EINVAL;
1360
1361 mtx_lock(&sc->driver_mtx);
1362 cmd.data0 = throttle;
1363 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd);
1364 if (err == 0)
1365 sc->throttle = throttle;
1366 mtx_unlock(&sc->driver_mtx);
1367 return err;
1368}
1369
1370static int
1371mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1372{
1373 mxge_softc_t *sc;
1374 unsigned int intr_coal_delay;
1375 int err;
1376
1377 sc = arg1;
1378 intr_coal_delay = sc->intr_coal_delay;
1379 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1380 if (err != 0) {
1381 return err;
1382 }
1383 if (intr_coal_delay == sc->intr_coal_delay)
1384 return 0;
1385
1386 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1387 return EINVAL;
1388
1389 mtx_lock(&sc->driver_mtx);
1390 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1391 sc->intr_coal_delay = intr_coal_delay;
1392
1393 mtx_unlock(&sc->driver_mtx);
1394 return err;
1395}
1396
1397static int
1398mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1399{
1400 mxge_softc_t *sc;
1401 unsigned int enabled;
1402 int err;
1403
1404 sc = arg1;
1405 enabled = sc->pause;
1406 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1407 if (err != 0) {
1408 return err;
1409 }
1410 if (enabled == sc->pause)
1411 return 0;
1412
1413 mtx_lock(&sc->driver_mtx);
1414 err = mxge_change_pause(sc, enabled);
1415 mtx_unlock(&sc->driver_mtx);
1416 return err;
1417}
1418
1419static int
1420mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1421{
1422 int err;
1423
1424 if (arg1 == NULL)
1425 return EFAULT;
1426 arg2 = be32toh(*(int *)arg1);
1427 arg1 = NULL;
1428 err = sysctl_handle_int(oidp, arg1, arg2, req);
1429
1430 return err;
1431}
1432
1433static void
1434mxge_rem_sysctls(mxge_softc_t *sc)
1435{
1436 struct mxge_slice_state *ss;
1437 int slice;
1438
1439 if (sc->slice_sysctl_tree == NULL)
1440 return;
1441
1442 for (slice = 0; slice < sc->num_slices; slice++) {
1443 ss = &sc->ss[slice];
1444 if (ss == NULL || ss->sysctl_tree == NULL)
1445 continue;
1446 sysctl_ctx_free(&ss->sysctl_ctx);
1447 ss->sysctl_tree = NULL;
1448 }
1449 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1450 sc->slice_sysctl_tree = NULL;
1451}
1452
1453static void
1454mxge_add_sysctls(mxge_softc_t *sc)
1455{
1456 struct sysctl_ctx_list *ctx;
1457 struct sysctl_oid_list *children;
1458 mcp_irq_data_t *fw;
1459 struct mxge_slice_state *ss;
1460 int slice;
1461 char slice_num[8];
1462
1463 ctx = device_get_sysctl_ctx(sc->dev);
1464 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1465 fw = sc->ss[0].fw_stats;
1466
1467 /* random information */
1468 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1469 "firmware_version",
1470 CTLFLAG_RD, &sc->fw_version,
1471 0, "firmware version");
1472 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1473 "serial_number",
1474 CTLFLAG_RD, &sc->serial_number_string,
1475 0, "serial number");
1476 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1477 "product_code",
1478 CTLFLAG_RD, &sc->product_code_string,
1479 0, "product_code");
1480 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1481 "pcie_link_width",
1482 CTLFLAG_RD, &sc->link_width,
1483 0, "tx_boundary");
1484 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1485 "tx_boundary",
1486 CTLFLAG_RD, &sc->tx_boundary,
1487 0, "tx_boundary");
1488 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1489 "write_combine",
1490 CTLFLAG_RD, &sc->wc,
1491 0, "write combining PIO?");
1492 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1493 "read_dma_MBs",
1494 CTLFLAG_RD, &sc->read_dma,
1495 0, "DMA Read speed in MB/s");
1496 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1497 "write_dma_MBs",
1498 CTLFLAG_RD, &sc->write_dma,
1499 0, "DMA Write speed in MB/s");
1500 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1501 "read_write_dma_MBs",
1502 CTLFLAG_RD, &sc->read_write_dma,
1503 0, "DMA concurrent Read/Write speed in MB/s");
1504 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1505 "watchdog_resets",
1506 CTLFLAG_RD, &sc->watchdog_resets,
1507 0, "Number of times NIC was reset");
1508
1509
1510 /* performance related tunables */
1511 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1512 "intr_coal_delay",
1513 CTLTYPE_INT|CTLFLAG_RW, sc,
1514 0, mxge_change_intr_coal,
1515 "I", "interrupt coalescing delay in usecs");
1516
1517 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1518 "throttle",
1519 CTLTYPE_INT|CTLFLAG_RW, sc,
1520 0, mxge_change_throttle,
1521 "I", "transmit throttling");
1522
1523 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1524 "flow_control_enabled",
1525 CTLTYPE_INT|CTLFLAG_RW, sc,
1526 0, mxge_change_flow_control,
1527 "I", "interrupt coalescing delay in usecs");
1528
1529 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1530 "deassert_wait",
1531 CTLFLAG_RW, &mxge_deassert_wait,
1532 0, "Wait for IRQ line to go low in ihandler");
1533
1534 /* stats block from firmware is in network byte order.
1535 Need to swap it */
1536 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1537 "link_up",
1538 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1539 0, mxge_handle_be32,
1540 "I", "link up");
1541 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1542 "rdma_tags_available",
1543 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1544 0, mxge_handle_be32,
1545 "I", "rdma_tags_available");
1546 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1547 "dropped_bad_crc32",
1548 CTLTYPE_INT|CTLFLAG_RD,
1549 &fw->dropped_bad_crc32,
1550 0, mxge_handle_be32,
1551 "I", "dropped_bad_crc32");
1552 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1553 "dropped_bad_phy",
1554 CTLTYPE_INT|CTLFLAG_RD,
1555 &fw->dropped_bad_phy,
1556 0, mxge_handle_be32,
1557 "I", "dropped_bad_phy");
1558 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1559 "dropped_link_error_or_filtered",
1560 CTLTYPE_INT|CTLFLAG_RD,
1561 &fw->dropped_link_error_or_filtered,
1562 0, mxge_handle_be32,
1563 "I", "dropped_link_error_or_filtered");
1564 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1565 "dropped_link_overflow",
1566 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1567 0, mxge_handle_be32,
1568 "I", "dropped_link_overflow");
1569 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1570 "dropped_multicast_filtered",
1571 CTLTYPE_INT|CTLFLAG_RD,
1572 &fw->dropped_multicast_filtered,
1573 0, mxge_handle_be32,
1574 "I", "dropped_multicast_filtered");
1575 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1576 "dropped_no_big_buffer",
1577 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1578 0, mxge_handle_be32,
1579 "I", "dropped_no_big_buffer");
1580 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1581 "dropped_no_small_buffer",
1582 CTLTYPE_INT|CTLFLAG_RD,
1583 &fw->dropped_no_small_buffer,
1584 0, mxge_handle_be32,
1585 "I", "dropped_no_small_buffer");
1586 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1587 "dropped_overrun",
1588 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1589 0, mxge_handle_be32,
1590 "I", "dropped_overrun");
1591 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1592 "dropped_pause",
1593 CTLTYPE_INT|CTLFLAG_RD,
1594 &fw->dropped_pause,
1595 0, mxge_handle_be32,
1596 "I", "dropped_pause");
1597 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1598 "dropped_runt",
1599 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1600 0, mxge_handle_be32,
1601 "I", "dropped_runt");
1602
1603 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1604 "dropped_unicast_filtered",
1605 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1606 0, mxge_handle_be32,
1607 "I", "dropped_unicast_filtered");
1608
1609 /* verbose printing? */
1610 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1611 "verbose",
1612 CTLFLAG_RW, &mxge_verbose,
1613 0, "verbose printing");
1614
1615 /* add counters exported for debugging from all slices */
1616 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1617 sc->slice_sysctl_tree =
1618 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1619 "slice", CTLFLAG_RD, 0, "");
1620
1621 for (slice = 0; slice < sc->num_slices; slice++) {
1622 ss = &sc->ss[slice];
1623 sysctl_ctx_init(&ss->sysctl_ctx);
1624 ctx = &ss->sysctl_ctx;
1625 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1626 sprintf(slice_num, "%d", slice);
1627 ss->sysctl_tree =
1628 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1629 CTLFLAG_RD, 0, "");
1630 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1631 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1632 "rx_small_cnt",
1633 CTLFLAG_RD, &ss->rx_small.cnt,
1634 0, "rx_small_cnt");
1635 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1636 "rx_big_cnt",
1637 CTLFLAG_RD, &ss->rx_big.cnt,
1638 0, "rx_small_cnt");
1639 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1640 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed,
1641 0, "number of lro merge queues flushed");
1642
1643 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1644 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum,
1645 0, "number of bad csums preventing LRO");
1646
1647 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1648 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued,
1649 0, "number of frames appended to lro merge"
1650 "queues");
1651
1652#ifndef IFNET_BUF_RING
1653 /* only transmit from slice 0 for now */
1654 if (slice > 0)
1655 continue;
1656#endif
1657 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1658 "tx_req",
1659 CTLFLAG_RD, &ss->tx.req,
1660 0, "tx_req");
1661
1662 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1663 "tx_done",
1664 CTLFLAG_RD, &ss->tx.done,
1665 0, "tx_done");
1666 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1667 "tx_pkt_done",
1668 CTLFLAG_RD, &ss->tx.pkt_done,
1669 0, "tx_done");
1670 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1671 "tx_stall",
1672 CTLFLAG_RD, &ss->tx.stall,
1673 0, "tx_stall");
1674 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1675 "tx_wake",
1676 CTLFLAG_RD, &ss->tx.wake,
1677 0, "tx_wake");
1678 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1679 "tx_defrag",
1680 CTLFLAG_RD, &ss->tx.defrag,
1681 0, "tx_defrag");
1682 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1683 "tx_queue_active",
1684 CTLFLAG_RD, &ss->tx.queue_active,
1685 0, "tx_queue_active");
1686 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1687 "tx_activate",
1688 CTLFLAG_RD, &ss->tx.activate,
1689 0, "tx_activate");
1690 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1691 "tx_deactivate",
1692 CTLFLAG_RD, &ss->tx.deactivate,
1693 0, "tx_deactivate");
1694 }
1695}
1696
1697/* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1698 backwards one at a time and handle ring wraps */
1699
1700static inline void
1701mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1702 mcp_kreq_ether_send_t *src, int cnt)
1703{
1704 int idx, starting_slot;
1705 starting_slot = tx->req;
1706 while (cnt > 1) {
1707 cnt--;
1708 idx = (starting_slot + cnt) & tx->mask;
1709 mxge_pio_copy(&tx->lanai[idx],
1710 &src[cnt], sizeof(*src));
1711 wmb();
1712 }
1713}
1714
1715/*
1716 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1717 * at most 32 bytes at a time, so as to avoid involving the software
1718 * pio handler in the nic. We re-write the first segment's flags
1719 * to mark them valid only after writing the entire chain
1720 */
1721
1722static inline void
1723mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1724 int cnt)
1725{
1726 int idx, i;
1727 uint32_t *src_ints;
1728 volatile uint32_t *dst_ints;
1729 mcp_kreq_ether_send_t *srcp;
1730 volatile mcp_kreq_ether_send_t *dstp, *dst;
1731 uint8_t last_flags;
1732
1733 idx = tx->req & tx->mask;
1734
1735 last_flags = src->flags;
1736 src->flags = 0;
1737 wmb();
1738 dst = dstp = &tx->lanai[idx];
1739 srcp = src;
1740
1741 if ((idx + cnt) < tx->mask) {
1742 for (i = 0; i < (cnt - 1); i += 2) {
1743 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1744 wmb(); /* force write every 32 bytes */
1745 srcp += 2;
1746 dstp += 2;
1747 }
1748 } else {
1749 /* submit all but the first request, and ensure
1750 that it is submitted below */
1751 mxge_submit_req_backwards(tx, src, cnt);
1752 i = 0;
1753 }
1754 if (i < cnt) {
1755 /* submit the first request */
1756 mxge_pio_copy(dstp, srcp, sizeof(*src));
1757 wmb(); /* barrier before setting valid flag */
1758 }
1759
1760 /* re-write the last 32-bits with the valid flags */
1761 src->flags = last_flags;
1762 src_ints = (uint32_t *)src;
1763 src_ints+=3;
1764 dst_ints = (volatile uint32_t *)dst;
1765 dst_ints+=3;
1766 *dst_ints = *src_ints;
1767 tx->req += cnt;
1768 wmb();
1769}
1770
1771static int
1772mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m,
1773 struct mxge_pkt_info *pi)
1774{
1775 struct ether_vlan_header *eh;
1776 uint16_t etype;
1777 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO);
1778#if IFCAP_TSO6 && defined(INET6)
1779 int nxt;
1780#endif
1781
1782 eh = mtod(m, struct ether_vlan_header *);
1783 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1784 etype = ntohs(eh->evl_proto);
1785 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1786 } else {
1787 etype = ntohs(eh->evl_encap_proto);
1788 pi->ip_off = ETHER_HDR_LEN;
1789 }
1790
1791 switch (etype) {
1792 case ETHERTYPE_IP:
1793 /*
1794 * ensure ip header is in first mbuf, copy it to a
1795 * scratch buffer if not
1796 */
1797 pi->ip = (struct ip *)(m->m_data + pi->ip_off);
1798 pi->ip6 = NULL;
1799 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) {
1800 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip),
1801 ss->scratch);
1802 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1803 }
1804 pi->ip_hlen = pi->ip->ip_hl << 2;
1805 if (!tso)
1806 return 0;
1807
1808 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1809 sizeof(struct tcphdr))) {
1810 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1811 sizeof(struct tcphdr), ss->scratch);
1812 pi->ip = (struct ip *)(ss->scratch + pi->ip_off);
1813 }
1814 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen);
1815 break;
1816#if IFCAP_TSO6 && defined(INET6)
1817 case ETHERTYPE_IPV6:
1818 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off);
1819 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) {
1820 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6),
1821 ss->scratch);
1822 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1823 }
1824 nxt = 0;
1825 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt);
1826 pi->ip_hlen -= pi->ip_off;
1827 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
1828 return EINVAL;
1829
1830 if (!tso)
1831 return 0;
1832
1833 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen)
1834 return EINVAL;
1835
1836 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen +
1837 sizeof(struct tcphdr))) {
1838 m_copydata(m, 0, pi->ip_off + pi->ip_hlen +
1839 sizeof(struct tcphdr), ss->scratch);
1840 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off);
1841 }
1842 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen);
1843 break;
1844#endif
1845 default:
1846 return EINVAL;
1847 }
1848 return 0;
1849}
1850
1851#if IFCAP_TSO4
1852
1853static void
1854mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1855 int busdma_seg_cnt, struct mxge_pkt_info *pi)
1856{
1857 mxge_tx_ring_t *tx;
1858 mcp_kreq_ether_send_t *req;
1859 bus_dma_segment_t *seg;
1860 uint32_t low, high_swapped;
1861 int len, seglen, cum_len, cum_len_next;
1862 int next_is_first, chop, cnt, rdma_count, small;
1863 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum;
1864 uint8_t flags, flags_next;
1865 static int once;
1866
1867 mss = m->m_pkthdr.tso_segsz;
1868
1869 /* negative cum_len signifies to the
1870 * send loop that we are still in the
1871 * header portion of the TSO packet.
1872 */
1873
1874 cksum_offset = pi->ip_off + pi->ip_hlen;
1875 cum_len = -(cksum_offset + (pi->tcp->th_off << 2));
1876
1877 /* TSO implies checksum offload on this hardware */
1878 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) {
1879 /*
1880 * If packet has full TCP csum, replace it with pseudo hdr
1881 * sum that the NIC expects, otherwise the NIC will emit
1882 * packets with bad TCP checksums.
1883 */
1884 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
1885 if (pi->ip6) {
1886#if (CSUM_TCP_IPV6 != 0) && defined(INET6)
1887 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
1888 sum = in6_cksum_pseudo(pi->ip6,
1889 m->m_pkthdr.len - cksum_offset,
1890 IPPROTO_TCP, 0);
1891#endif
1892 } else {
1893#ifdef INET
1894 m->m_pkthdr.csum_flags |= CSUM_TCP;
1895 sum = in_pseudo(pi->ip->ip_src.s_addr,
1896 pi->ip->ip_dst.s_addr,
1897 htons(IPPROTO_TCP + (m->m_pkthdr.len -
1898 cksum_offset)));
1899#endif
1900 }
1901 m_copyback(m, offsetof(struct tcphdr, th_sum) +
1902 cksum_offset, sizeof(sum), (caddr_t)&sum);
1903 }
1904 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1905
1906
1907 /* for TSO, pseudo_hdr_offset holds mss.
1908 * The firmware figures out where to put
1909 * the checksum by parsing the header. */
1910 pseudo_hdr_offset = htobe16(mss);
1911
1912 if (pi->ip6) {
1913 /*
1914 * for IPv6 TSO, the "checksum offset" is re-purposed
1915 * to store the TCP header len
1916 */
1917 cksum_offset = (pi->tcp->th_off << 2);
1918 }
1919
1920 tx = &ss->tx;
1921 req = tx->req_list;
1922 seg = tx->seg_list;
1923 cnt = 0;
1924 rdma_count = 0;
1925 /* "rdma_count" is the number of RDMAs belonging to the
1926 * current packet BEFORE the current send request. For
1927 * non-TSO packets, this is equal to "count".
1928 * For TSO packets, rdma_count needs to be reset
1929 * to 0 after a segment cut.
1930 *
1931 * The rdma_count field of the send request is
1932 * the number of RDMAs of the packet starting at
1933 * that request. For TSO send requests with one ore more cuts
1934 * in the middle, this is the number of RDMAs starting
1935 * after the last cut in the request. All previous
1936 * segments before the last cut implicitly have 1 RDMA.
1937 *
1938 * Since the number of RDMAs is not known beforehand,
1939 * it must be filled-in retroactively - after each
1940 * segmentation cut or at the end of the entire packet.
1941 */
1942
1943 while (busdma_seg_cnt) {
1944 /* Break the busdma segment up into pieces*/
1945 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1946 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1947 len = seg->ds_len;
1948
1949 while (len) {
1950 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1951 seglen = len;
1952 cum_len_next = cum_len + seglen;
1953 (req-rdma_count)->rdma_count = rdma_count + 1;
1954 if (__predict_true(cum_len >= 0)) {
1955 /* payload */
1956 chop = (cum_len_next > mss);
1957 cum_len_next = cum_len_next % mss;
1958 next_is_first = (cum_len_next == 0);
1959 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1960 flags_next |= next_is_first *
1961 MXGEFW_FLAGS_FIRST;
1962 rdma_count |= -(chop | next_is_first);
1963 rdma_count += chop & !next_is_first;
1964 } else if (cum_len_next >= 0) {
1965 /* header ends */
1966 rdma_count = -1;
1967 cum_len_next = 0;
1968 seglen = -cum_len;
1969 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1970 flags_next = MXGEFW_FLAGS_TSO_PLD |
1971 MXGEFW_FLAGS_FIRST |
1972 (small * MXGEFW_FLAGS_SMALL);
1973 }
1974
1975 req->addr_high = high_swapped;
1976 req->addr_low = htobe32(low);
1977 req->pseudo_hdr_offset = pseudo_hdr_offset;
1978 req->pad = 0;
1979 req->rdma_count = 1;
1980 req->length = htobe16(seglen);
1981 req->cksum_offset = cksum_offset;
1982 req->flags = flags | ((cum_len & 1) *
1983 MXGEFW_FLAGS_ALIGN_ODD);
1984 low += seglen;
1985 len -= seglen;
1986 cum_len = cum_len_next;
1987 flags = flags_next;
1988 req++;
1989 cnt++;
1990 rdma_count++;
1991 if (cksum_offset != 0 && !pi->ip6) {
1992 if (__predict_false(cksum_offset > seglen))
1993 cksum_offset -= seglen;
1994 else
1995 cksum_offset = 0;
1996 }
1997 if (__predict_false(cnt > tx->max_desc))
1998 goto drop;
1999 }
2000 busdma_seg_cnt--;
2001 seg++;
2002 }
2003 (req-rdma_count)->rdma_count = rdma_count;
2004
2005 do {
2006 req--;
2007 req->flags |= MXGEFW_FLAGS_TSO_LAST;
2008 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
2009
2010 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2011 mxge_submit_req(tx, tx->req_list, cnt);
2012#ifdef IFNET_BUF_RING
2013 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2014 /* tell the NIC to start polling this slice */
2015 *tx->send_go = 1;
2016 tx->queue_active = 1;
2017 tx->activate++;
2018 wmb();
2019 }
2020#endif
2021 return;
2022
2023drop:
2024 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
2025 m_freem(m);
2026 ss->oerrors++;
2027 if (!once) {
2028 printf("tx->max_desc exceeded via TSO!\n");
2029 printf("mss = %d, %ld, %d!\n", mss,
2030 (long)seg - (long)tx->seg_list, tx->max_desc);
2031 once = 1;
2032 }
2033 return;
2034
2035}
2036
2037#endif /* IFCAP_TSO4 */
2038
2039#ifdef MXGE_NEW_VLAN_API
2040/*
2041 * We reproduce the software vlan tag insertion from
2042 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
2043 * vlan tag insertion. We need to advertise this in order to have the
2044 * vlan interface respect our csum offload flags.
2045 */
2046static struct mbuf *
2047mxge_vlan_tag_insert(struct mbuf *m)
2048{
2049 struct ether_vlan_header *evl;
2050
2051 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
2052 if (__predict_false(m == NULL))
2053 return NULL;
2054 if (m->m_len < sizeof(*evl)) {
2055 m = m_pullup(m, sizeof(*evl));
2056 if (__predict_false(m == NULL))
2057 return NULL;
2058 }
2059 /*
2060 * Transform the Ethernet header into an Ethernet header
2061 * with 802.1Q encapsulation.
2062 */
2063 evl = mtod(m, struct ether_vlan_header *);
2064 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
2065 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
2066 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
2067 evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
2068 m->m_flags &= ~M_VLANTAG;
2069 return m;
2070}
2071#endif /* MXGE_NEW_VLAN_API */
2072
2073static void
2074mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
2075{
2076 struct mxge_pkt_info pi = {0,0,0,0};
2077 mxge_softc_t *sc;
2078 mcp_kreq_ether_send_t *req;
2079 bus_dma_segment_t *seg;
2080 struct mbuf *m_tmp;
2081 struct ifnet *ifp;
2082 mxge_tx_ring_t *tx;
2083 int cnt, cum_len, err, i, idx, odd_flag;
2084 uint16_t pseudo_hdr_offset;
2085 uint8_t flags, cksum_offset;
2086
2087
2088 sc = ss->sc;
2089 ifp = sc->ifp;
2090 tx = &ss->tx;
2091
2092#ifdef MXGE_NEW_VLAN_API
2093 if (m->m_flags & M_VLANTAG) {
2094 m = mxge_vlan_tag_insert(m);
2095 if (__predict_false(m == NULL))
2096 goto drop_without_m;
2097 }
2098#endif
2099 if (m->m_pkthdr.csum_flags &
2100 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2101 if (mxge_parse_tx(ss, m, &pi))
2102 goto drop;
2103 }
2104
2105 /* (try to) map the frame for DMA */
2106 idx = tx->req & tx->mask;
2107 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
2108 m, tx->seg_list, &cnt,
2109 BUS_DMA_NOWAIT);
2110 if (__predict_false(err == EFBIG)) {
2111 /* Too many segments in the chain. Try
2112 to defrag */
2113 m_tmp = m_defrag(m, M_NOWAIT);
2114 if (m_tmp == NULL) {
2115 goto drop;
2116 }
2117 ss->tx.defrag++;
2118 m = m_tmp;
2119 err = bus_dmamap_load_mbuf_sg(tx->dmat,
2120 tx->info[idx].map,
2121 m, tx->seg_list, &cnt,
2122 BUS_DMA_NOWAIT);
2123 }
2124 if (__predict_false(err != 0)) {
2125 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
2126 " packet len = %d\n", err, m->m_pkthdr.len);
2127 goto drop;
2128 }
2129 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2130 BUS_DMASYNC_PREWRITE);
2131 tx->info[idx].m = m;
2132
2133#if IFCAP_TSO4
2134 /* TSO is different enough, we handle it in another routine */
2135 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2136 mxge_encap_tso(ss, m, cnt, &pi);
2137 return;
2138 }
2139#endif
2140
2141 req = tx->req_list;
2142 cksum_offset = 0;
2143 pseudo_hdr_offset = 0;
2144 flags = MXGEFW_FLAGS_NO_TSO;
2145
2146 /* checksum offloading? */
2147 if (m->m_pkthdr.csum_flags &
2148 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) {
2149 /* ensure ip header is in first mbuf, copy
2150 it to a scratch buffer if not */
2151 cksum_offset = pi.ip_off + pi.ip_hlen;
2152 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2153 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2154 req->cksum_offset = cksum_offset;
2155 flags |= MXGEFW_FLAGS_CKSUM;
2156 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2157 } else {
2158 odd_flag = 0;
2159 }
2160 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2161 flags |= MXGEFW_FLAGS_SMALL;
2162
2163 /* convert segments into a request list */
2164 cum_len = 0;
2165 seg = tx->seg_list;
2166 req->flags = MXGEFW_FLAGS_FIRST;
2167 for (i = 0; i < cnt; i++) {
2168 req->addr_low =
2169 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2170 req->addr_high =
2171 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2172 req->length = htobe16(seg->ds_len);
2173 req->cksum_offset = cksum_offset;
2174 if (cksum_offset > seg->ds_len)
2175 cksum_offset -= seg->ds_len;
2176 else
2177 cksum_offset = 0;
2178 req->pseudo_hdr_offset = pseudo_hdr_offset;
2179 req->pad = 0; /* complete solid 16-byte block */
2180 req->rdma_count = 1;
2181 req->flags |= flags | ((cum_len & 1) * odd_flag);
2182 cum_len += seg->ds_len;
2183 seg++;
2184 req++;
2185 req->flags = 0;
2186 }
2187 req--;
2188 /* pad runts to 60 bytes */
2189 if (cum_len < 60) {
2190 req++;
2191 req->addr_low =
2192 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2193 req->addr_high =
2194 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2195 req->length = htobe16(60 - cum_len);
2196 req->cksum_offset = 0;
2197 req->pseudo_hdr_offset = pseudo_hdr_offset;
2198 req->pad = 0; /* complete solid 16-byte block */
2199 req->rdma_count = 1;
2200 req->flags |= flags | ((cum_len & 1) * odd_flag);
2201 cnt++;
2202 }
2203
2204 tx->req_list[0].rdma_count = cnt;
2205#if 0
2206 /* print what the firmware will see */
2207 for (i = 0; i < cnt; i++) {
2208 printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2209 "cso:%d, flags:0x%x, rdma:%d\n",
2210 i, (int)ntohl(tx->req_list[i].addr_high),
2211 (int)ntohl(tx->req_list[i].addr_low),
2212 (int)ntohs(tx->req_list[i].length),
2213 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2214 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2215 tx->req_list[i].rdma_count);
2216 }
2217 printf("--------------\n");
2218#endif
2219 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2220 mxge_submit_req(tx, tx->req_list, cnt);
2221#ifdef IFNET_BUF_RING
2222 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2223 /* tell the NIC to start polling this slice */
2224 *tx->send_go = 1;
2225 tx->queue_active = 1;
2226 tx->activate++;
2227 wmb();
2228 }
2229#endif
2230 return;
2231
2232drop:
2233 m_freem(m);
2234drop_without_m:
2235 ss->oerrors++;
2236 return;
2237}
2238
2239#ifdef IFNET_BUF_RING
2240static void
2241mxge_qflush(struct ifnet *ifp)
2242{
2243 mxge_softc_t *sc = ifp->if_softc;
2244 mxge_tx_ring_t *tx;
2245 struct mbuf *m;
2246 int slice;
2247
2248 for (slice = 0; slice < sc->num_slices; slice++) {
2249 tx = &sc->ss[slice].tx;
2250 mtx_lock(&tx->mtx);
2251 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2252 m_freem(m);
2253 mtx_unlock(&tx->mtx);
2254 }
2255 if_qflush(ifp);
2256}
2257
2258static inline void
2259mxge_start_locked(struct mxge_slice_state *ss)
2260{
2261 mxge_softc_t *sc;
2262 struct mbuf *m;
2263 struct ifnet *ifp;
2264 mxge_tx_ring_t *tx;
2265
2266 sc = ss->sc;
2267 ifp = sc->ifp;
2268 tx = &ss->tx;
2269
2270 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2271 m = drbr_dequeue(ifp, tx->br);
2272 if (m == NULL) {
2273 return;
2274 }
2275 /* let BPF see it */
2276 BPF_MTAP(ifp, m);
2277
2278 /* give it to the nic */
2279 mxge_encap(ss, m);
2280 }
2281 /* ran out of transmit slots */
2282 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0)
2283 && (!drbr_empty(ifp, tx->br))) {
2284 ss->if_drv_flags |= IFF_DRV_OACTIVE;
2285 tx->stall++;
2286 }
2287}
2288
2289static int
2290mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2291{
2292 mxge_softc_t *sc;
2293 struct ifnet *ifp;
2294 mxge_tx_ring_t *tx;
2295 int err;
2296
2297 sc = ss->sc;
2298 ifp = sc->ifp;
2299 tx = &ss->tx;
2300
2301 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
2302 IFF_DRV_RUNNING) {
2303 err = drbr_enqueue(ifp, tx->br, m);
2304 return (err);
2305 }
2306
2307 if (!drbr_needs_enqueue(ifp, tx->br) &&
2308 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2309 /* let BPF see it */
2310 BPF_MTAP(ifp, m);
2311 /* give it to the nic */
2312 mxge_encap(ss, m);
2313 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2314 return (err);
2315 }
2316 if (!drbr_empty(ifp, tx->br))
2317 mxge_start_locked(ss);
2318 return (0);
2319}
2320
2321static int
2322mxge_transmit(struct ifnet *ifp, struct mbuf *m)
2323{
2324 mxge_softc_t *sc = ifp->if_softc;
2325 struct mxge_slice_state *ss;
2326 mxge_tx_ring_t *tx;
2327 int err = 0;
2328 int slice;
2329
2330 slice = m->m_pkthdr.flowid;
2331 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2332
2333 ss = &sc->ss[slice];
2334 tx = &ss->tx;
2335
2336 if (mtx_trylock(&tx->mtx)) {
2337 err = mxge_transmit_locked(ss, m);
2338 mtx_unlock(&tx->mtx);
2339 } else {
2340 err = drbr_enqueue(ifp, tx->br, m);
2341 }
2342
2343 return (err);
2344}
2345
2346#else
2347
2348static inline void
2349mxge_start_locked(struct mxge_slice_state *ss)
2350{
2351 mxge_softc_t *sc;
2352 struct mbuf *m;
2353 struct ifnet *ifp;
2354 mxge_tx_ring_t *tx;
2355
2356 sc = ss->sc;
2357 ifp = sc->ifp;
2358 tx = &ss->tx;
2359 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2360 IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
2361 if (m == NULL) {
2362 return;
2363 }
2364 /* let BPF see it */
2365 BPF_MTAP(ifp, m);
2366
2367 /* give it to the nic */
2368 mxge_encap(ss, m);
2369 }
2370 /* ran out of transmit slots */
2371 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
2372 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2373 tx->stall++;
2374 }
2375}
2376#endif
2377static void
2378mxge_start(struct ifnet *ifp)
2379{
2380 mxge_softc_t *sc = ifp->if_softc;
2381 struct mxge_slice_state *ss;
2382
2383 /* only use the first slice for now */
2384 ss = &sc->ss[0];
2385 mtx_lock(&ss->tx.mtx);
2386 mxge_start_locked(ss);
2387 mtx_unlock(&ss->tx.mtx);
2388}
2389
2390/*
2391 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2392 * at most 32 bytes at a time, so as to avoid involving the software
2393 * pio handler in the nic. We re-write the first segment's low
2394 * DMA address to mark it valid only after we write the entire chunk
2395 * in a burst
2396 */
2397static inline void
2398mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2399 mcp_kreq_ether_recv_t *src)
2400{
2401 uint32_t low;
2402
2403 low = src->addr_low;
2404 src->addr_low = 0xffffffff;
2405 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2406 wmb();
2407 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2408 wmb();
2409 src->addr_low = low;
2410 dst->addr_low = low;
2411 wmb();
2412}
2413
2414static int
2415mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2416{
2417 bus_dma_segment_t seg;
2418 struct mbuf *m;
2419 mxge_rx_ring_t *rx = &ss->rx_small;
2420 int cnt, err;
2421
2422 m = m_gethdr(M_NOWAIT, MT_DATA);
2423 if (m == NULL) {
2424 rx->alloc_fail++;
2425 err = ENOBUFS;
2426 goto done;
2427 }
2428 m->m_len = MHLEN;
2429 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2430 &seg, &cnt, BUS_DMA_NOWAIT);
2431 if (err != 0) {
2432 m_free(m);
2433 goto done;
2434 }
2435 rx->info[idx].m = m;
2436 rx->shadow[idx].addr_low =
2437 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2438 rx->shadow[idx].addr_high =
2439 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2440
2441done:
2442 if ((idx & 7) == 7)
2443 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2444 return err;
2445}
2446
2447static int
2448mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2449{
2450 bus_dma_segment_t seg[3];
2451 struct mbuf *m;
2452 mxge_rx_ring_t *rx = &ss->rx_big;
2453 int cnt, err, i;
2454
2455 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2456 if (m == NULL) {
2457 rx->alloc_fail++;
2458 err = ENOBUFS;
2459 goto done;
2460 }
2461 m->m_len = rx->mlen;
2462 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2463 seg, &cnt, BUS_DMA_NOWAIT);
2464 if (err != 0) {
2465 m_free(m);
2466 goto done;
2467 }
2468 rx->info[idx].m = m;
2469 rx->shadow[idx].addr_low =
2470 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2471 rx->shadow[idx].addr_high =
2472 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2473
2474#if MXGE_VIRT_JUMBOS
2475 for (i = 1; i < cnt; i++) {
2476 rx->shadow[idx + i].addr_low =
2477 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2478 rx->shadow[idx + i].addr_high =
2479 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2480 }
2481#endif
2482
2483done:
2484 for (i = 0; i < rx->nbufs; i++) {
2485 if ((idx & 7) == 7) {
2486 mxge_submit_8rx(&rx->lanai[idx - 7],
2487 &rx->shadow[idx - 7]);
2488 }
2489 idx++;
2490 }
2491 return err;
2492}
2493
2494#ifdef INET6
2495
2496static uint16_t
2497mxge_csum_generic(uint16_t *raw, int len)
2498{
2499 uint32_t csum;
2500
2501
2502 csum = 0;
2503 while (len > 0) {
2504 csum += *raw;
2505 raw++;
2506 len -= 2;
2507 }
2508 csum = (csum >> 16) + (csum & 0xffff);
2509 csum = (csum >> 16) + (csum & 0xffff);
2510 return (uint16_t)csum;
2511}
2512
2513static inline uint16_t
2514mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum)
2515{
2516 uint32_t partial;
2517 int nxt, cksum_offset;
2518 struct ip6_hdr *ip6 = p;
2519 uint16_t c;
2520
2521 nxt = ip6->ip6_nxt;
2522 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN;
2523 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) {
2524 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN,
2525 IPPROTO_IPV6, &nxt);
2526 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
2527 return (1);
2528 }
2529
2530 /*
2531 * IPv6 headers do not contain a checksum, and hence
2532 * do not checksum to zero, so they don't "fall out"
2533 * of the partial checksum calculation like IPv4
2534 * headers do. We need to fix the partial checksum by
2535 * subtracting the checksum of the IPv6 header.
2536 */
2537
2538 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset -
2539 ETHER_HDR_LEN);
2540 csum += ~partial;
2541 csum += (csum < ~partial);
2542 csum = (csum >> 16) + (csum & 0xFFFF);
2543 csum = (csum >> 16) + (csum & 0xFFFF);
2544 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt,
2545 csum);
2546 c ^= 0xffff;
2547 return (c);
2548}
2549#endif /* INET6 */
2550/*
2551 * Myri10GE hardware checksums are not valid if the sender
2552 * padded the frame with non-zero padding. This is because
2553 * the firmware just does a simple 16-bit 1s complement
2554 * checksum across the entire frame, excluding the first 14
2555 * bytes. It is best to simply to check the checksum and
2556 * tell the stack about it only if the checksum is good
2557 */
2558
2559static inline uint16_t
2560mxge_rx_csum(struct mbuf *m, int csum)
2561{
2562 struct ether_header *eh;
2563#ifdef INET
2564 struct ip *ip;
2565#endif
2566#if defined(INET) || defined(INET6)
2567 int cap = m->m_pkthdr.rcvif->if_capenable;
2568#endif
2569 uint16_t c, etype;
2570
2571
2572 eh = mtod(m, struct ether_header *);
2573 etype = ntohs(eh->ether_type);
2574 switch (etype) {
2575#ifdef INET
2576 case ETHERTYPE_IP:
2577 if ((cap & IFCAP_RXCSUM) == 0)
2578 return (1);
2579 ip = (struct ip *)(eh + 1);
2580 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)
2581 return (1);
2582 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2583 htonl(ntohs(csum) + ntohs(ip->ip_len) -
2584 (ip->ip_hl << 2) + ip->ip_p));
2585 c ^= 0xffff;
2586 break;
2587#endif
2588#ifdef INET6
2589 case ETHERTYPE_IPV6:
2590 if ((cap & IFCAP_RXCSUM_IPV6) == 0)
2591 return (1);
2592 c = mxge_rx_csum6((eh + 1), m, csum);
2593 break;
2594#endif
2595 default:
2596 c = 1;
2597 }
2598 return (c);
2599}
2600
2601static void
2602mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2603{
2604 struct ether_vlan_header *evl;
2605 struct ether_header *eh;
2606 uint32_t partial;
2607
2608 evl = mtod(m, struct ether_vlan_header *);
2609 eh = mtod(m, struct ether_header *);
2610
2611 /*
2612 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2613 * after what the firmware thought was the end of the ethernet
2614 * header.
2615 */
2616
2617 /* put checksum into host byte order */
2618 *csum = ntohs(*csum);
2619 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2620 (*csum) += ~partial;
2621 (*csum) += ((*csum) < ~partial);
2622 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2623 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2624
2625 /* restore checksum to network byte order;
2626 later consumers expect this */
2627 *csum = htons(*csum);
2628
2629 /* save the tag */
2630#ifdef MXGE_NEW_VLAN_API
2631 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2632#else
2633 {
2634 struct m_tag *mtag;
2635 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2636 M_NOWAIT);
2637 if (mtag == NULL)
2638 return;
2639 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2640 m_tag_prepend(m, mtag);
2641 }
2642
2643#endif
2644 m->m_flags |= M_VLANTAG;
2645
2646 /*
2647 * Remove the 802.1q header by copying the Ethernet
2648 * addresses over it and adjusting the beginning of
2649 * the data in the mbuf. The encapsulated Ethernet
2650 * type field is already in place.
2651 */
2652 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2653 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2654 m_adj(m, ETHER_VLAN_ENCAP_LEN);
2655}
2656
2657
2658static inline void
2659mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len,
2660 uint32_t csum, int lro)
2661{
2662 mxge_softc_t *sc;
2663 struct ifnet *ifp;
2664 struct mbuf *m;
2665 struct ether_header *eh;
2666 mxge_rx_ring_t *rx;
2667 bus_dmamap_t old_map;
2668 int idx;
2669
2670 sc = ss->sc;
2671 ifp = sc->ifp;
2672 rx = &ss->rx_big;
2673 idx = rx->cnt & rx->mask;
2674 rx->cnt += rx->nbufs;
2675 /* save a pointer to the received mbuf */
2676 m = rx->info[idx].m;
2677 /* try to replace the received mbuf */
2678 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2679 /* drop the frame -- the old mbuf is re-cycled */
2680 ifp->if_ierrors++;
2681 return;
2682 }
2683
2684 /* unmap the received buffer */
2685 old_map = rx->info[idx].map;
2686 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2687 bus_dmamap_unload(rx->dmat, old_map);
2688
2689 /* swap the bus_dmamap_t's */
2690 rx->info[idx].map = rx->extra_map;
2691 rx->extra_map = old_map;
2692
2693 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2694 * aligned */
2695 m->m_data += MXGEFW_PAD;
2696
2697 m->m_pkthdr.rcvif = ifp;
2698 m->m_len = m->m_pkthdr.len = len;
2699 ss->ipackets++;
2700 eh = mtod(m, struct ether_header *);
2701 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2702 mxge_vlan_tag_remove(m, &csum);
2703 }
2704 /* if the checksum is valid, mark it in the mbuf header */
2705
2706 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2707 (0 == mxge_rx_csum(m, csum))) {
2708 /* Tell the stack that the checksum is good */
2709 m->m_pkthdr.csum_data = 0xffff;
2710 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2711 CSUM_DATA_VALID;
2712
2713#if defined(INET) || defined (INET6)
2714 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0)))
2715 return;
2716#endif
2717 }
2718 /* flowid only valid if RSS hashing is enabled */
2719 if (sc->num_slices > 1) {
2720 m->m_pkthdr.flowid = (ss - sc->ss);
2721 m->m_flags |= M_FLOWID;
2722 }
2723 /* pass the frame up the stack */
2724 (*ifp->if_input)(ifp, m);
2725}
2726
2727static inline void
2728mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len,
2729 uint32_t csum, int lro)
2730{
2731 mxge_softc_t *sc;
2732 struct ifnet *ifp;
2733 struct ether_header *eh;
2734 struct mbuf *m;
2735 mxge_rx_ring_t *rx;
2736 bus_dmamap_t old_map;
2737 int idx;
2738
2739 sc = ss->sc;
2740 ifp = sc->ifp;
2741 rx = &ss->rx_small;
2742 idx = rx->cnt & rx->mask;
2743 rx->cnt++;
2744 /* save a pointer to the received mbuf */
2745 m = rx->info[idx].m;
2746 /* try to replace the received mbuf */
2747 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2748 /* drop the frame -- the old mbuf is re-cycled */
2749 ifp->if_ierrors++;
2750 return;
2751 }
2752
2753 /* unmap the received buffer */
2754 old_map = rx->info[idx].map;
2755 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2756 bus_dmamap_unload(rx->dmat, old_map);
2757
2758 /* swap the bus_dmamap_t's */
2759 rx->info[idx].map = rx->extra_map;
2760 rx->extra_map = old_map;
2761
2762 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2763 * aligned */
2764 m->m_data += MXGEFW_PAD;
2765
2766 m->m_pkthdr.rcvif = ifp;
2767 m->m_len = m->m_pkthdr.len = len;
2768 ss->ipackets++;
2769 eh = mtod(m, struct ether_header *);
2770 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2771 mxge_vlan_tag_remove(m, &csum);
2772 }
2773 /* if the checksum is valid, mark it in the mbuf header */
2774 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
2775 (0 == mxge_rx_csum(m, csum))) {
2776 /* Tell the stack that the checksum is good */
2777 m->m_pkthdr.csum_data = 0xffff;
2778 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2779 CSUM_DATA_VALID;
2780
2781#if defined(INET) || defined (INET6)
2782 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum)))
2783 return;
2784#endif
2785 }
2786 /* flowid only valid if RSS hashing is enabled */
2787 if (sc->num_slices > 1) {
2788 m->m_pkthdr.flowid = (ss - sc->ss);
2789 m->m_flags |= M_FLOWID;
2790 }
2791 /* pass the frame up the stack */
2792 (*ifp->if_input)(ifp, m);
2793}
2794
2795static inline void
2796mxge_clean_rx_done(struct mxge_slice_state *ss)
2797{
2798 mxge_rx_done_t *rx_done = &ss->rx_done;
2799 int limit = 0;
2800 uint16_t length;
2801 uint16_t checksum;
2802 int lro;
2803
2804 lro = ss->sc->ifp->if_capenable & IFCAP_LRO;
2805 while (rx_done->entry[rx_done->idx].length != 0) {
2806 length = ntohs(rx_done->entry[rx_done->idx].length);
2807 rx_done->entry[rx_done->idx].length = 0;
2808 checksum = rx_done->entry[rx_done->idx].checksum;
2809 if (length <= (MHLEN - MXGEFW_PAD))
2810 mxge_rx_done_small(ss, length, checksum, lro);
2811 else
2812 mxge_rx_done_big(ss, length, checksum, lro);
2813 rx_done->cnt++;
2814 rx_done->idx = rx_done->cnt & rx_done->mask;
2815
2816 /* limit potential for livelock */
2817 if (__predict_false(++limit > rx_done->mask / 2))
2818 break;
2819 }
2820#if defined(INET) || defined (INET6)
2821 while (!SLIST_EMPTY(&ss->lc.lro_active)) {
2822 struct lro_entry *lro = SLIST_FIRST(&ss->lc.lro_active);
2823 SLIST_REMOVE_HEAD(&ss->lc.lro_active, next);
2824 tcp_lro_flush(&ss->lc, lro);
2825 }
2826#endif
2827}
2828
2829
2830static inline void
2831mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2832{
2833 struct ifnet *ifp;
2834 mxge_tx_ring_t *tx;
2835 struct mbuf *m;
2836 bus_dmamap_t map;
2837 int idx;
2838 int *flags;
2839
2840 tx = &ss->tx;
2841 ifp = ss->sc->ifp;
2842 while (tx->pkt_done != mcp_idx) {
2843 idx = tx->done & tx->mask;
2844 tx->done++;
2845 m = tx->info[idx].m;
2846 /* mbuf and DMA map only attached to the first
2847 segment per-mbuf */
2848 if (m != NULL) {
2849 ss->obytes += m->m_pkthdr.len;
2850 if (m->m_flags & M_MCAST)
2851 ss->omcasts++;
2852 ss->opackets++;
2853 tx->info[idx].m = NULL;
2854 map = tx->info[idx].map;
2855 bus_dmamap_unload(tx->dmat, map);
2856 m_freem(m);
2857 }
2858 if (tx->info[idx].flag) {
2859 tx->info[idx].flag = 0;
2860 tx->pkt_done++;
2861 }
2862 }
2863
2864 /* If we have space, clear IFF_OACTIVE to tell the stack that
2865 its OK to send packets */
2866#ifdef IFNET_BUF_RING
2867 flags = &ss->if_drv_flags;
2868#else
2869 flags = &ifp->if_drv_flags;
2870#endif
2871 mtx_lock(&ss->tx.mtx);
2872 if ((*flags) & IFF_DRV_OACTIVE &&
2873 tx->req - tx->done < (tx->mask + 1)/4) {
2874 *(flags) &= ~IFF_DRV_OACTIVE;
2875 ss->tx.wake++;
2876 mxge_start_locked(ss);
2877 }
2878#ifdef IFNET_BUF_RING
2879 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2880 /* let the NIC stop polling this queue, since there
2881 * are no more transmits pending */
2882 if (tx->req == tx->done) {
2883 *tx->send_stop = 1;
2884 tx->queue_active = 0;
2885 tx->deactivate++;
2886 wmb();
2887 }
2888 }
2889#endif
2890 mtx_unlock(&ss->tx.mtx);
2891
2892}
2893
2894static struct mxge_media_type mxge_xfp_media_types[] =
2895{
2896 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2897 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2898 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2899 {0, (1 << 5), "10GBASE-ER"},
2900 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2901 {0, (1 << 3), "10GBASE-SW"},
2902 {0, (1 << 2), "10GBASE-LW"},
2903 {0, (1 << 1), "10GBASE-EW"},
2904 {0, (1 << 0), "Reserved"}
2905};
2906static struct mxge_media_type mxge_sfp_media_types[] =
2907{
2908 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"},
2909 {0, (1 << 7), "Reserved"},
2910 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2911 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2912 {IFM_10G_SR, (1 << 4), "10GBASE-SR"},
2913 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"}
2914};
2915
2916static void
2917mxge_media_set(mxge_softc_t *sc, int media_type)
2918{
2919
2920
2921 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type,
2922 0, NULL);
2923 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type);
2924 sc->current_media = media_type;
2925 sc->media.ifm_media = sc->media.ifm_cur->ifm_media;
2926}
2927
2928static void
2929mxge_media_init(mxge_softc_t *sc)
2930{
2931 char *ptr;
2932 int i;
2933
2934 ifmedia_removeall(&sc->media);
2935 mxge_media_set(sc, IFM_AUTO);
2936
2937 /*
2938 * parse the product code to deterimine the interface type
2939 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2940 * after the 3rd dash in the driver's cached copy of the
2941 * EEPROM's product code string.
2942 */
2943 ptr = sc->product_code_string;
2944 if (ptr == NULL) {
2945 device_printf(sc->dev, "Missing product code\n");
2946 return;
2947 }
2948
2949 for (i = 0; i < 3; i++, ptr++) {
2950 ptr = strchr(ptr, '-');
2951 if (ptr == NULL) {
2952 device_printf(sc->dev,
2953 "only %d dashes in PC?!?\n", i);
2954 return;
2955 }
2956 }
2957 if (*ptr == 'C' || *(ptr +1) == 'C') {
2958 /* -C is CX4 */
2959 sc->connector = MXGE_CX4;
2960 mxge_media_set(sc, IFM_10G_CX4);
2961 } else if (*ptr == 'Q') {
2962 /* -Q is Quad Ribbon Fiber */
2963 sc->connector = MXGE_QRF;
2964 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2965 /* FreeBSD has no media type for Quad ribbon fiber */
2966 } else if (*ptr == 'R') {
2967 /* -R is XFP */
2968 sc->connector = MXGE_XFP;
2969 } else if (*ptr == 'S' || *(ptr +1) == 'S') {
2970 /* -S or -2S is SFP+ */
2971 sc->connector = MXGE_SFP;
2972 } else {
2973 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2974 }
2975}
2976
2977/*
2978 * Determine the media type for a NIC. Some XFPs will identify
2979 * themselves only when their link is up, so this is initiated via a
2980 * link up interrupt. However, this can potentially take up to
2981 * several milliseconds, so it is run via the watchdog routine, rather
2982 * than in the interrupt handler itself.
2983 */
2984static void
2985mxge_media_probe(mxge_softc_t *sc)
2986{
2987 mxge_cmd_t cmd;
2988 char *cage_type;
2989
2990 struct mxge_media_type *mxge_media_types = NULL;
2991 int i, err, ms, mxge_media_type_entries;
2992 uint32_t byte;
2993
2994 sc->need_media_probe = 0;
2995
2996 if (sc->connector == MXGE_XFP) {
2997 /* -R is XFP */
2998 mxge_media_types = mxge_xfp_media_types;
2999 mxge_media_type_entries =
3000 sizeof (mxge_xfp_media_types) /
3001 sizeof (mxge_xfp_media_types[0]);
3002 byte = MXGE_XFP_COMPLIANCE_BYTE;
3003 cage_type = "XFP";
3004 } else if (sc->connector == MXGE_SFP) {
3005 /* -S or -2S is SFP+ */
3006 mxge_media_types = mxge_sfp_media_types;
3007 mxge_media_type_entries =
3008 sizeof (mxge_sfp_media_types) /
3009 sizeof (mxge_sfp_media_types[0]);
3010 cage_type = "SFP+";
3011 byte = 3;
3012 } else {
3013 /* nothing to do; media type cannot change */
3014 return;
3015 }
3016
3017 /*
3018 * At this point we know the NIC has an XFP cage, so now we
3019 * try to determine what is in the cage by using the
3020 * firmware's XFP I2C commands to read the XFP 10GbE compilance
3021 * register. We read just one byte, which may take over
3022 * a millisecond
3023 */
3024
3025 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
3026 cmd.data1 = byte;
3027 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
3028 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
3029 device_printf(sc->dev, "failed to read XFP\n");
3030 }
3031 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
3032 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
3033 }
3034 if (err != MXGEFW_CMD_OK) {
3035 return;
3036 }
3037
3038 /* now we wait for the data to be cached */
3039 cmd.data0 = byte;
3040 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
3041 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
3042 DELAY(1000);
3043 cmd.data0 = byte;
3044 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
3045 }
3046 if (err != MXGEFW_CMD_OK) {
3047 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
3048 cage_type, err, ms);
3049 return;
3050 }
3051
3052 if (cmd.data0 == mxge_media_types[0].bitmask) {
3053 if (mxge_verbose)
3054 device_printf(sc->dev, "%s:%s\n", cage_type,
3055 mxge_media_types[0].name);
3056 if (sc->current_media != mxge_media_types[0].flag) {
3057 mxge_media_init(sc);
3058 mxge_media_set(sc, mxge_media_types[0].flag);
3059 }
3060 return;
3061 }
3062 for (i = 1; i < mxge_media_type_entries; i++) {
3063 if (cmd.data0 & mxge_media_types[i].bitmask) {
3064 if (mxge_verbose)
3065 device_printf(sc->dev, "%s:%s\n",
3066 cage_type,
3067 mxge_media_types[i].name);
3068
3069 if (sc->current_media != mxge_media_types[i].flag) {
3070 mxge_media_init(sc);
3071 mxge_media_set(sc, mxge_media_types[i].flag);
3072 }
3073 return;
3074 }
3075 }
3076 if (mxge_verbose)
3077 device_printf(sc->dev, "%s media 0x%x unknown\n",
3078 cage_type, cmd.data0);
3079
3080 return;
3081}
3082
3083static void
3084mxge_intr(void *arg)
3085{
3086 struct mxge_slice_state *ss = arg;
3087 mxge_softc_t *sc = ss->sc;
3088 mcp_irq_data_t *stats = ss->fw_stats;
3089 mxge_tx_ring_t *tx = &ss->tx;
3090 mxge_rx_done_t *rx_done = &ss->rx_done;
3091 uint32_t send_done_count;
3092 uint8_t valid;
3093
3094
3095#ifndef IFNET_BUF_RING
3096 /* an interrupt on a non-zero slice is implicitly valid
3097 since MSI-X irqs are not shared */
3098 if (ss != sc->ss) {
3099 mxge_clean_rx_done(ss);
3100 *ss->irq_claim = be32toh(3);
3101 return;
3102 }
3103#endif
3104
3105 /* make sure the DMA has finished */
3106 if (!stats->valid) {
3107 return;
3108 }
3109 valid = stats->valid;
3110
3111 if (sc->legacy_irq) {
3112 /* lower legacy IRQ */
3113 *sc->irq_deassert = 0;
3114 if (!mxge_deassert_wait)
3115 /* don't wait for conf. that irq is low */
3116 stats->valid = 0;
3117 } else {
3118 stats->valid = 0;
3119 }
3120
3121 /* loop while waiting for legacy irq deassertion */
3122 do {
3123 /* check for transmit completes and receives */
3124 send_done_count = be32toh(stats->send_done_count);
3125 while ((send_done_count != tx->pkt_done) ||
3126 (rx_done->entry[rx_done->idx].length != 0)) {
3127 if (send_done_count != tx->pkt_done)
3128 mxge_tx_done(ss, (int)send_done_count);
3129 mxge_clean_rx_done(ss);
3130 send_done_count = be32toh(stats->send_done_count);
3131 }
3132 if (sc->legacy_irq && mxge_deassert_wait)
3133 wmb();
3134 } while (*((volatile uint8_t *) &stats->valid));
3135
3136 /* fw link & error stats meaningful only on the first slice */
3137 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
3138 if (sc->link_state != stats->link_up) {
3139 sc->link_state = stats->link_up;
3140 if (sc->link_state) {
3141 if_link_state_change(sc->ifp, LINK_STATE_UP);
3142 if_initbaudrate(sc->ifp, IF_Gbps(10));
3143 if (mxge_verbose)
3144 device_printf(sc->dev, "link up\n");
3145 } else {
3146 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3147 sc->ifp->if_baudrate = 0;
3148 if (mxge_verbose)
3149 device_printf(sc->dev, "link down\n");
3150 }
3151 sc->need_media_probe = 1;
3152 }
3153 if (sc->rdma_tags_available !=
3154 be32toh(stats->rdma_tags_available)) {
3155 sc->rdma_tags_available =
3156 be32toh(stats->rdma_tags_available);
3157 device_printf(sc->dev, "RDMA timed out! %d tags "
3158 "left\n", sc->rdma_tags_available);
3159 }
3160
3161 if (stats->link_down) {
3162 sc->down_cnt += stats->link_down;
3163 sc->link_state = 0;
3164 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3165 }
3166 }
3167
3168 /* check to see if we have rx token to pass back */
3169 if (valid & 0x1)
3170 *ss->irq_claim = be32toh(3);
3171 *(ss->irq_claim + 1) = be32toh(3);
3172}
3173
3174static void
3175mxge_init(void *arg)
3176{
3177 mxge_softc_t *sc = arg;
3178 struct ifnet *ifp = sc->ifp;
3179
3180
3181 mtx_lock(&sc->driver_mtx);
3182 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
3183 (void) mxge_open(sc);
3184 mtx_unlock(&sc->driver_mtx);
3185}
3186
3187
3188
3189static void
3190mxge_free_slice_mbufs(struct mxge_slice_state *ss)
3191{
3192 int i;
3193
3194#if defined(INET) || defined(INET6)
3195 tcp_lro_free(&ss->lc);
3196#endif
3197 for (i = 0; i <= ss->rx_big.mask; i++) {
3198 if (ss->rx_big.info[i].m == NULL)
3199 continue;
3200 bus_dmamap_unload(ss->rx_big.dmat,
3201 ss->rx_big.info[i].map);
3202 m_freem(ss->rx_big.info[i].m);
3203 ss->rx_big.info[i].m = NULL;
3204 }
3205
3206 for (i = 0; i <= ss->rx_small.mask; i++) {
3207 if (ss->rx_small.info[i].m == NULL)
3208 continue;
3209 bus_dmamap_unload(ss->rx_small.dmat,
3210 ss->rx_small.info[i].map);
3211 m_freem(ss->rx_small.info[i].m);
3212 ss->rx_small.info[i].m = NULL;
3213 }
3214
3215 /* transmit ring used only on the first slice */
3216 if (ss->tx.info == NULL)
3217 return;
3218
3219 for (i = 0; i <= ss->tx.mask; i++) {
3220 ss->tx.info[i].flag = 0;
3221 if (ss->tx.info[i].m == NULL)
3222 continue;
3223 bus_dmamap_unload(ss->tx.dmat,
3224 ss->tx.info[i].map);
3225 m_freem(ss->tx.info[i].m);
3226 ss->tx.info[i].m = NULL;
3227 }
3228}
3229
3230static void
3231mxge_free_mbufs(mxge_softc_t *sc)
3232{
3233 int slice;
3234
3235 for (slice = 0; slice < sc->num_slices; slice++)
3236 mxge_free_slice_mbufs(&sc->ss[slice]);
3237}
3238
3239static void
3240mxge_free_slice_rings(struct mxge_slice_state *ss)
3241{
3242 int i;
3243
3244
3245 if (ss->rx_done.entry != NULL)
3246 mxge_dma_free(&ss->rx_done.dma);
3247 ss->rx_done.entry = NULL;
3248
3249 if (ss->tx.req_bytes != NULL)
3250 free(ss->tx.req_bytes, M_DEVBUF);
3251 ss->tx.req_bytes = NULL;
3252
3253 if (ss->tx.seg_list != NULL)
3254 free(ss->tx.seg_list, M_DEVBUF);
3255 ss->tx.seg_list = NULL;
3256
3257 if (ss->rx_small.shadow != NULL)
3258 free(ss->rx_small.shadow, M_DEVBUF);
3259 ss->rx_small.shadow = NULL;
3260
3261 if (ss->rx_big.shadow != NULL)
3262 free(ss->rx_big.shadow, M_DEVBUF);
3263 ss->rx_big.shadow = NULL;
3264
3265 if (ss->tx.info != NULL) {
3266 if (ss->tx.dmat != NULL) {
3267 for (i = 0; i <= ss->tx.mask; i++) {
3268 bus_dmamap_destroy(ss->tx.dmat,
3269 ss->tx.info[i].map);
3270 }
3271 bus_dma_tag_destroy(ss->tx.dmat);
3272 }
3273 free(ss->tx.info, M_DEVBUF);
3274 }
3275 ss->tx.info = NULL;
3276
3277 if (ss->rx_small.info != NULL) {
3278 if (ss->rx_small.dmat != NULL) {
3279 for (i = 0; i <= ss->rx_small.mask; i++) {
3280 bus_dmamap_destroy(ss->rx_small.dmat,
3281 ss->rx_small.info[i].map);
3282 }
3283 bus_dmamap_destroy(ss->rx_small.dmat,
3284 ss->rx_small.extra_map);
3285 bus_dma_tag_destroy(ss->rx_small.dmat);
3286 }
3287 free(ss->rx_small.info, M_DEVBUF);
3288 }
3289 ss->rx_small.info = NULL;
3290
3291 if (ss->rx_big.info != NULL) {
3292 if (ss->rx_big.dmat != NULL) {
3293 for (i = 0; i <= ss->rx_big.mask; i++) {
3294 bus_dmamap_destroy(ss->rx_big.dmat,
3295 ss->rx_big.info[i].map);
3296 }
3297 bus_dmamap_destroy(ss->rx_big.dmat,
3298 ss->rx_big.extra_map);
3299 bus_dma_tag_destroy(ss->rx_big.dmat);
3300 }
3301 free(ss->rx_big.info, M_DEVBUF);
3302 }
3303 ss->rx_big.info = NULL;
3304}
3305
3306static void
3307mxge_free_rings(mxge_softc_t *sc)
3308{
3309 int slice;
3310
3311 for (slice = 0; slice < sc->num_slices; slice++)
3312 mxge_free_slice_rings(&sc->ss[slice]);
3313}
3314
3315static int
3316mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3317 int tx_ring_entries)
3318{
3319 mxge_softc_t *sc = ss->sc;
3320 size_t bytes;
3321 int err, i;
3322
3323 /* allocate per-slice receive resources */
3324
3325 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3326 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3327
3328 /* allocate the rx shadow rings */
3329 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
3330 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3331
3332 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
3333 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3334
3335 /* allocate the rx host info rings */
3336 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
3337 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3338
3339 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
3340 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3341
3342 /* allocate the rx busdma resources */
3343 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3344 1, /* alignment */
3345 4096, /* boundary */
3346 BUS_SPACE_MAXADDR, /* low */
3347 BUS_SPACE_MAXADDR, /* high */
3348 NULL, NULL, /* filter */
3349 MHLEN, /* maxsize */
3350 1, /* num segs */
3351 MHLEN, /* maxsegsize */
3352 BUS_DMA_ALLOCNOW, /* flags */
3353 NULL, NULL, /* lock */
3354 &ss->rx_small.dmat); /* tag */
3355 if (err != 0) {
3356 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3357 err);
3358 return err;
3359 }
3360
3361 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3362 1, /* alignment */
3363#if MXGE_VIRT_JUMBOS
3364 4096, /* boundary */
3365#else
3366 0, /* boundary */
3367#endif
3368 BUS_SPACE_MAXADDR, /* low */
3369 BUS_SPACE_MAXADDR, /* high */
3370 NULL, NULL, /* filter */
3371 3*4096, /* maxsize */
3372#if MXGE_VIRT_JUMBOS
3373 3, /* num segs */
3374 4096, /* maxsegsize*/
3375#else
3376 1, /* num segs */
3377 MJUM9BYTES, /* maxsegsize*/
3378#endif
3379 BUS_DMA_ALLOCNOW, /* flags */
3380 NULL, NULL, /* lock */
3381 &ss->rx_big.dmat); /* tag */
3382 if (err != 0) {
3383 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3384 err);
3385 return err;
3386 }
3387 for (i = 0; i <= ss->rx_small.mask; i++) {
3388 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3389 &ss->rx_small.info[i].map);
3390 if (err != 0) {
3391 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3392 err);
3393 return err;
3394 }
3395 }
3396 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3397 &ss->rx_small.extra_map);
3398 if (err != 0) {
3399 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3400 err);
3401 return err;
3402 }
3403
3404 for (i = 0; i <= ss->rx_big.mask; i++) {
3405 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3406 &ss->rx_big.info[i].map);
3407 if (err != 0) {
3408 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3409 err);
3410 return err;
3411 }
3412 }
3413 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3414 &ss->rx_big.extra_map);
3415 if (err != 0) {
3416 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3417 err);
3418 return err;
3419 }
3420
3421 /* now allocate TX resouces */
3421 /* now allocate TX resources */
3422
3423#ifndef IFNET_BUF_RING
3424 /* only use a single TX ring for now */
3425 if (ss != ss->sc->ss)
3426 return 0;
3427#endif
3428
3429 ss->tx.mask = tx_ring_entries - 1;
3430 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3431
3432
3433 /* allocate the tx request copy block */
3434 bytes = 8 +
3435 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3436 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
3437 /* ensure req_list entries are aligned to 8 bytes */
3438 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3439 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3440
3441 /* allocate the tx busdma segment list */
3442 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3443 ss->tx.seg_list = (bus_dma_segment_t *)
3444 malloc(bytes, M_DEVBUF, M_WAITOK);
3445
3446 /* allocate the tx host info ring */
3447 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3448 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3449
3450 /* allocate the tx busdma resources */
3451 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3452 1, /* alignment */
3453 sc->tx_boundary, /* boundary */
3454 BUS_SPACE_MAXADDR, /* low */
3455 BUS_SPACE_MAXADDR, /* high */
3456 NULL, NULL, /* filter */
3457 65536 + 256, /* maxsize */
3458 ss->tx.max_desc - 2, /* num segs */
3459 sc->tx_boundary, /* maxsegsz */
3460 BUS_DMA_ALLOCNOW, /* flags */
3461 NULL, NULL, /* lock */
3462 &ss->tx.dmat); /* tag */
3463
3464 if (err != 0) {
3465 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3466 err);
3467 return err;
3468 }
3469
3470 /* now use these tags to setup dmamaps for each slot
3471 in the ring */
3472 for (i = 0; i <= ss->tx.mask; i++) {
3473 err = bus_dmamap_create(ss->tx.dmat, 0,
3474 &ss->tx.info[i].map);
3475 if (err != 0) {
3476 device_printf(sc->dev, "Err %d tx dmamap\n",
3477 err);
3478 return err;
3479 }
3480 }
3481 return 0;
3482
3483}
3484
3485static int
3486mxge_alloc_rings(mxge_softc_t *sc)
3487{
3488 mxge_cmd_t cmd;
3489 int tx_ring_size;
3490 int tx_ring_entries, rx_ring_entries;
3491 int err, slice;
3492
3493 /* get ring sizes */
3494 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3495 tx_ring_size = cmd.data0;
3496 if (err != 0) {
3497 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3498 goto abort;
3499 }
3500
3501 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3502 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3503 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
3504 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
3505 IFQ_SET_READY(&sc->ifp->if_snd);
3506
3507 for (slice = 0; slice < sc->num_slices; slice++) {
3508 err = mxge_alloc_slice_rings(&sc->ss[slice],
3509 rx_ring_entries,
3510 tx_ring_entries);
3511 if (err != 0)
3512 goto abort;
3513 }
3514 return 0;
3515
3516abort:
3517 mxge_free_rings(sc);
3518 return err;
3519
3520}
3521
3522
3523static void
3524mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3525{
3526 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3527
3528 if (bufsize < MCLBYTES) {
3529 /* easy, everything fits in a single buffer */
3530 *big_buf_size = MCLBYTES;
3531 *cl_size = MCLBYTES;
3532 *nbufs = 1;
3533 return;
3534 }
3535
3536 if (bufsize < MJUMPAGESIZE) {
3537 /* still easy, everything still fits in a single buffer */
3538 *big_buf_size = MJUMPAGESIZE;
3539 *cl_size = MJUMPAGESIZE;
3540 *nbufs = 1;
3541 return;
3542 }
3543#if MXGE_VIRT_JUMBOS
3544 /* now we need to use virtually contiguous buffers */
3545 *cl_size = MJUM9BYTES;
3546 *big_buf_size = 4096;
3547 *nbufs = mtu / 4096 + 1;
3548 /* needs to be a power of two, so round up */
3549 if (*nbufs == 3)
3550 *nbufs = 4;
3551#else
3552 *cl_size = MJUM9BYTES;
3553 *big_buf_size = MJUM9BYTES;
3554 *nbufs = 1;
3555#endif
3556}
3557
3558static int
3559mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3560{
3561 mxge_softc_t *sc;
3562 mxge_cmd_t cmd;
3563 bus_dmamap_t map;
3564 int err, i, slice;
3565
3566
3567 sc = ss->sc;
3568 slice = ss - sc->ss;
3569
3570#if defined(INET) || defined(INET6)
3571 (void)tcp_lro_init(&ss->lc);
3572#endif
3573 ss->lc.ifp = sc->ifp;
3574
3575 /* get the lanai pointers to the send and receive rings */
3576
3577 err = 0;
3578#ifndef IFNET_BUF_RING
3579 /* We currently only send from the first slice */
3580 if (slice == 0) {
3581#endif
3582 cmd.data0 = slice;
3583 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3584 ss->tx.lanai =
3585 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3586 ss->tx.send_go = (volatile uint32_t *)
3587 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3588 ss->tx.send_stop = (volatile uint32_t *)
3589 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3590#ifndef IFNET_BUF_RING
3591 }
3592#endif
3593 cmd.data0 = slice;
3594 err |= mxge_send_cmd(sc,
3595 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3596 ss->rx_small.lanai =
3597 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3598 cmd.data0 = slice;
3599 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3600 ss->rx_big.lanai =
3601 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3602
3603 if (err != 0) {
3604 device_printf(sc->dev,
3605 "failed to get ring sizes or locations\n");
3606 return EIO;
3607 }
3608
3609 /* stock receive rings */
3610 for (i = 0; i <= ss->rx_small.mask; i++) {
3611 map = ss->rx_small.info[i].map;
3612 err = mxge_get_buf_small(ss, map, i);
3613 if (err) {
3614 device_printf(sc->dev, "alloced %d/%d smalls\n",
3615 i, ss->rx_small.mask + 1);
3616 return ENOMEM;
3617 }
3618 }
3619 for (i = 0; i <= ss->rx_big.mask; i++) {
3620 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3621 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3622 }
3623 ss->rx_big.nbufs = nbufs;
3624 ss->rx_big.cl_size = cl_size;
3625 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
3626 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3627 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3628 map = ss->rx_big.info[i].map;
3629 err = mxge_get_buf_big(ss, map, i);
3630 if (err) {
3631 device_printf(sc->dev, "alloced %d/%d bigs\n",
3632 i, ss->rx_big.mask + 1);
3633 return ENOMEM;
3634 }
3635 }
3636 return 0;
3637}
3638
3639static int
3640mxge_open(mxge_softc_t *sc)
3641{
3642 mxge_cmd_t cmd;
3643 int err, big_bytes, nbufs, slice, cl_size, i;
3644 bus_addr_t bus;
3645 volatile uint8_t *itable;
3646 struct mxge_slice_state *ss;
3647
3648 /* Copy the MAC address in case it was overridden */
3649 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3650
3651 err = mxge_reset(sc, 1);
3652 if (err != 0) {
3653 device_printf(sc->dev, "failed to reset\n");
3654 return EIO;
3655 }
3656
3657 if (sc->num_slices > 1) {
3658 /* setup the indirection table */
3659 cmd.data0 = sc->num_slices;
3660 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3661 &cmd);
3662
3663 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3664 &cmd);
3665 if (err != 0) {
3666 device_printf(sc->dev,
3667 "failed to setup rss tables\n");
3668 return err;
3669 }
3670
3671 /* just enable an identity mapping */
3672 itable = sc->sram + cmd.data0;
3673 for (i = 0; i < sc->num_slices; i++)
3674 itable[i] = (uint8_t)i;
3675
3676 cmd.data0 = 1;
3677 cmd.data1 = mxge_rss_hash_type;
3678 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3679 if (err != 0) {
3680 device_printf(sc->dev, "failed to enable slices\n");
3681 return err;
3682 }
3683 }
3684
3685
3686 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3687
3688 cmd.data0 = nbufs;
3689 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3690 &cmd);
3691 /* error is only meaningful if we're trying to set
3692 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3693 if (err && nbufs > 1) {
3694 device_printf(sc->dev,
3695 "Failed to set alway-use-n to %d\n",
3696 nbufs);
3697 return EIO;
3698 }
3699 /* Give the firmware the mtu and the big and small buffer
3700 sizes. The firmware wants the big buf size to be a power
3701 of two. Luckily, FreeBSD's clusters are powers of two */
3702 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3703 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3704 cmd.data0 = MHLEN - MXGEFW_PAD;
3705 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3706 &cmd);
3707 cmd.data0 = big_bytes;
3708 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3709
3710 if (err != 0) {
3711 device_printf(sc->dev, "failed to setup params\n");
3712 goto abort;
3713 }
3714
3715 /* Now give him the pointer to the stats block */
3716 for (slice = 0;
3717#ifdef IFNET_BUF_RING
3718 slice < sc->num_slices;
3719#else
3720 slice < 1;
3721#endif
3722 slice++) {
3723 ss = &sc->ss[slice];
3724 cmd.data0 =
3725 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3726 cmd.data1 =
3727 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3728 cmd.data2 = sizeof(struct mcp_irq_data);
3729 cmd.data2 |= (slice << 16);
3730 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3731 }
3732
3733 if (err != 0) {
3734 bus = sc->ss->fw_stats_dma.bus_addr;
3735 bus += offsetof(struct mcp_irq_data, send_done_count);
3736 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3737 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3738 err = mxge_send_cmd(sc,
3739 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3740 &cmd);
3741 /* Firmware cannot support multicast without STATS_DMA_V2 */
3742 sc->fw_multicast_support = 0;
3743 } else {
3744 sc->fw_multicast_support = 1;
3745 }
3746
3747 if (err != 0) {
3748 device_printf(sc->dev, "failed to setup params\n");
3749 goto abort;
3750 }
3751
3752 for (slice = 0; slice < sc->num_slices; slice++) {
3753 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3754 if (err != 0) {
3755 device_printf(sc->dev, "couldn't open slice %d\n",
3756 slice);
3757 goto abort;
3758 }
3759 }
3760
3761 /* Finally, start the firmware running */
3762 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3763 if (err) {
3764 device_printf(sc->dev, "Couldn't bring up link\n");
3765 goto abort;
3766 }
3767#ifdef IFNET_BUF_RING
3768 for (slice = 0; slice < sc->num_slices; slice++) {
3769 ss = &sc->ss[slice];
3770 ss->if_drv_flags |= IFF_DRV_RUNNING;
3771 ss->if_drv_flags &= ~IFF_DRV_OACTIVE;
3772 }
3773#endif
3774 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
3775 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3776
3777 return 0;
3778
3779
3780abort:
3781 mxge_free_mbufs(sc);
3782
3783 return err;
3784}
3785
3786static int
3787mxge_close(mxge_softc_t *sc, int down)
3788{
3789 mxge_cmd_t cmd;
3790 int err, old_down_cnt;
3791#ifdef IFNET_BUF_RING
3792 struct mxge_slice_state *ss;
3793 int slice;
3794#endif
3795
3796#ifdef IFNET_BUF_RING
3797 for (slice = 0; slice < sc->num_slices; slice++) {
3798 ss = &sc->ss[slice];
3799 ss->if_drv_flags &= ~IFF_DRV_RUNNING;
3800 }
3801#endif
3802 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3803 if (!down) {
3804 old_down_cnt = sc->down_cnt;
3805 wmb();
3806 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3807 if (err) {
3808 device_printf(sc->dev,
3809 "Couldn't bring down link\n");
3810 }
3811 if (old_down_cnt == sc->down_cnt) {
3812 /* wait for down irq */
3813 DELAY(10 * sc->intr_coal_delay);
3814 }
3815 wmb();
3816 if (old_down_cnt == sc->down_cnt) {
3817 device_printf(sc->dev, "never got down irq\n");
3818 }
3819 }
3820 mxge_free_mbufs(sc);
3821
3822 return 0;
3823}
3824
3825static void
3826mxge_setup_cfg_space(mxge_softc_t *sc)
3827{
3828 device_t dev = sc->dev;
3829 int reg;
3830 uint16_t cmd, lnk, pectl;
3831
3832 /* find the PCIe link width and set max read request to 4KB*/
3833 if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
3834 lnk = pci_read_config(dev, reg + 0x12, 2);
3835 sc->link_width = (lnk >> 4) & 0x3f;
3836
3837 if (sc->pectl == 0) {
3838 pectl = pci_read_config(dev, reg + 0x8, 2);
3839 pectl = (pectl & ~0x7000) | (5 << 12);
3840 pci_write_config(dev, reg + 0x8, pectl, 2);
3841 sc->pectl = pectl;
3842 } else {
3843 /* restore saved pectl after watchdog reset */
3844 pci_write_config(dev, reg + 0x8, sc->pectl, 2);
3845 }
3846 }
3847
3848 /* Enable DMA and Memory space access */
3849 pci_enable_busmaster(dev);
3850 cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3851 cmd |= PCIM_CMD_MEMEN;
3852 pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3853}
3854
3855static uint32_t
3856mxge_read_reboot(mxge_softc_t *sc)
3857{
3858 device_t dev = sc->dev;
3859 uint32_t vs;
3860
3861 /* find the vendor specific offset */
3862 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) {
3863 device_printf(sc->dev,
3864 "could not find vendor specific offset\n");
3865 return (uint32_t)-1;
3866 }
3867 /* enable read32 mode */
3868 pci_write_config(dev, vs + 0x10, 0x3, 1);
3869 /* tell NIC which register to read */
3870 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3871 return (pci_read_config(dev, vs + 0x14, 4));
3872}
3873
3874static void
3875mxge_watchdog_reset(mxge_softc_t *sc)
3876{
3877 struct pci_devinfo *dinfo;
3878 struct mxge_slice_state *ss;
3879 int err, running, s, num_tx_slices = 1;
3880 uint32_t reboot;
3881 uint16_t cmd;
3882
3883 err = ENXIO;
3884
3885 device_printf(sc->dev, "Watchdog reset!\n");
3886
3887 /*
3888 * check to see if the NIC rebooted. If it did, then all of
3889 * PCI config space has been reset, and things like the
3890 * busmaster bit will be zero. If this is the case, then we
3891 * must restore PCI config space before the NIC can be used
3892 * again
3893 */
3894 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3895 if (cmd == 0xffff) {
3896 /*
3897 * maybe the watchdog caught the NIC rebooting; wait
3898 * up to 100ms for it to finish. If it does not come
3899 * back, then give up
3900 */
3901 DELAY(1000*100);
3902 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3903 if (cmd == 0xffff) {
3904 device_printf(sc->dev, "NIC disappeared!\n");
3905 }
3906 }
3907 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3908 /* print the reboot status */
3909 reboot = mxge_read_reboot(sc);
3910 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3911 reboot);
3912 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
3913 if (running) {
3914
3915 /*
3916 * quiesce NIC so that TX routines will not try to
3917 * xmit after restoration of BAR
3918 */
3919
3920 /* Mark the link as down */
3921 if (sc->link_state) {
3922 sc->link_state = 0;
3923 if_link_state_change(sc->ifp,
3924 LINK_STATE_DOWN);
3925 }
3926#ifdef IFNET_BUF_RING
3927 num_tx_slices = sc->num_slices;
3928#endif
3929 /* grab all TX locks to ensure no tx */
3930 for (s = 0; s < num_tx_slices; s++) {
3931 ss = &sc->ss[s];
3932 mtx_lock(&ss->tx.mtx);
3933 }
3934 mxge_close(sc, 1);
3935 }
3936 /* restore PCI configuration space */
3937 dinfo = device_get_ivars(sc->dev);
3938 pci_cfg_restore(sc->dev, dinfo);
3939
3940 /* and redo any changes we made to our config space */
3941 mxge_setup_cfg_space(sc);
3942
3943 /* reload f/w */
3944 err = mxge_load_firmware(sc, 0);
3945 if (err) {
3946 device_printf(sc->dev,
3947 "Unable to re-load f/w\n");
3948 }
3949 if (running) {
3950 if (!err)
3951 err = mxge_open(sc);
3952 /* release all TX locks */
3953 for (s = 0; s < num_tx_slices; s++) {
3954 ss = &sc->ss[s];
3955#ifdef IFNET_BUF_RING
3956 mxge_start_locked(ss);
3957#endif
3958 mtx_unlock(&ss->tx.mtx);
3959 }
3960 }
3961 sc->watchdog_resets++;
3962 } else {
3963 device_printf(sc->dev,
3964 "NIC did not reboot, not resetting\n");
3965 err = 0;
3966 }
3967 if (err) {
3968 device_printf(sc->dev, "watchdog reset failed\n");
3969 } else {
3970 if (sc->dying == 2)
3971 sc->dying = 0;
3972 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3973 }
3974}
3975
3976static void
3977mxge_watchdog_task(void *arg, int pending)
3978{
3979 mxge_softc_t *sc = arg;
3980
3981
3982 mtx_lock(&sc->driver_mtx);
3983 mxge_watchdog_reset(sc);
3984 mtx_unlock(&sc->driver_mtx);
3985}
3986
3987static void
3988mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
3989{
3990 tx = &sc->ss[slice].tx;
3991 device_printf(sc->dev, "slice %d struck? ring state:\n", slice);
3992 device_printf(sc->dev,
3993 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3994 tx->req, tx->done, tx->queue_active);
3995 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3996 tx->activate, tx->deactivate);
3997 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3998 tx->pkt_done,
3999 be32toh(sc->ss->fw_stats->send_done_count));
4000}
4001
4002static int
4003mxge_watchdog(mxge_softc_t *sc)
4004{
4005 mxge_tx_ring_t *tx;
4006 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
4007 int i, err = 0;
4008
4009 /* see if we have outstanding transmits, which
4010 have been pending for more than mxge_ticks */
4011 for (i = 0;
4012#ifdef IFNET_BUF_RING
4013 (i < sc->num_slices) && (err == 0);
4014#else
4015 (i < 1) && (err == 0);
4016#endif
4017 i++) {
4018 tx = &sc->ss[i].tx;
4019 if (tx->req != tx->done &&
4020 tx->watchdog_req != tx->watchdog_done &&
4021 tx->done == tx->watchdog_done) {
4022 /* check for pause blocking before resetting */
4023 if (tx->watchdog_rx_pause == rx_pause) {
4024 mxge_warn_stuck(sc, tx, i);
4025 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
4026 return (ENXIO);
4027 }
4028 else
4029 device_printf(sc->dev, "Flow control blocking "
4030 "xmits, check link partner\n");
4031 }
4032
4033 tx->watchdog_req = tx->req;
4034 tx->watchdog_done = tx->done;
4035 tx->watchdog_rx_pause = rx_pause;
4036 }
4037
4038 if (sc->need_media_probe)
4039 mxge_media_probe(sc);
4040 return (err);
4041}
4042
4043static u_long
4044mxge_update_stats(mxge_softc_t *sc)
4045{
4046 struct mxge_slice_state *ss;
4047 u_long pkts = 0;
4048 u_long ipackets = 0;
4049 u_long opackets = 0;
4050#ifdef IFNET_BUF_RING
4051 u_long obytes = 0;
4052 u_long omcasts = 0;
4053 u_long odrops = 0;
4054#endif
4055 u_long oerrors = 0;
4056 int slice;
4057
4058 for (slice = 0; slice < sc->num_slices; slice++) {
4059 ss = &sc->ss[slice];
4060 ipackets += ss->ipackets;
4061 opackets += ss->opackets;
4062#ifdef IFNET_BUF_RING
4063 obytes += ss->obytes;
4064 omcasts += ss->omcasts;
4065 odrops += ss->tx.br->br_drops;
4066#endif
4067 oerrors += ss->oerrors;
4068 }
4069 pkts = (ipackets - sc->ifp->if_ipackets);
4070 pkts += (opackets - sc->ifp->if_opackets);
4071 sc->ifp->if_ipackets = ipackets;
4072 sc->ifp->if_opackets = opackets;
4073#ifdef IFNET_BUF_RING
4074 sc->ifp->if_obytes = obytes;
4075 sc->ifp->if_omcasts = omcasts;
4076 sc->ifp->if_snd.ifq_drops = odrops;
4077#endif
4078 sc->ifp->if_oerrors = oerrors;
4079 return pkts;
4080}
4081
4082static void
4083mxge_tick(void *arg)
4084{
4085 mxge_softc_t *sc = arg;
4086 u_long pkts = 0;
4087 int err = 0;
4088 int running, ticks;
4089 uint16_t cmd;
4090
4091 ticks = mxge_ticks;
4092 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
4093 if (running) {
4094 /* aggregate stats from different slices */
4095 pkts = mxge_update_stats(sc);
4096 if (!sc->watchdog_countdown) {
4097 err = mxge_watchdog(sc);
4098 sc->watchdog_countdown = 4;
4099 }
4100 sc->watchdog_countdown--;
4101 }
4102 if (pkts == 0) {
4103 /* ensure NIC did not suffer h/w fault while idle */
4104 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
4105 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
4106 sc->dying = 2;
4107 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
4108 err = ENXIO;
4109 }
4110 /* look less often if NIC is idle */
4111 ticks *= 4;
4112 }
4113
4114 if (err == 0)
4115 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
4116
4117}
4118
4119static int
4120mxge_media_change(struct ifnet *ifp)
4121{
4122 return EINVAL;
4123}
4124
4125static int
4126mxge_change_mtu(mxge_softc_t *sc, int mtu)
4127{
4128 struct ifnet *ifp = sc->ifp;
4129 int real_mtu, old_mtu;
4130 int err = 0;
4131
4132
4133 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
4134 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
4135 return EINVAL;
4136 mtx_lock(&sc->driver_mtx);
4137 old_mtu = ifp->if_mtu;
4138 ifp->if_mtu = mtu;
4139 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4140 mxge_close(sc, 0);
4141 err = mxge_open(sc);
4142 if (err != 0) {
4143 ifp->if_mtu = old_mtu;
4144 mxge_close(sc, 0);
4145 (void) mxge_open(sc);
4146 }
4147 }
4148 mtx_unlock(&sc->driver_mtx);
4149 return err;
4150}
4151
4152static void
4153mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
4154{
4155 mxge_softc_t *sc = ifp->if_softc;
4156
4157
4158 if (sc == NULL)
4159 return;
4160 ifmr->ifm_status = IFM_AVALID;
4161 ifmr->ifm_active = IFM_ETHER | IFM_FDX;
4162 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
4163 ifmr->ifm_active |= sc->current_media;
4164}
4165
4166static int
4167mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
4168{
4169 mxge_softc_t *sc = ifp->if_softc;
4170 struct ifreq *ifr = (struct ifreq *)data;
4171 int err, mask;
4172
4173 err = 0;
4174 switch (command) {
4175 case SIOCSIFADDR:
4176 case SIOCGIFADDR:
4177 err = ether_ioctl(ifp, command, data);
4178 break;
4179
4180 case SIOCSIFMTU:
4181 err = mxge_change_mtu(sc, ifr->ifr_mtu);
4182 break;
4183
4184 case SIOCSIFFLAGS:
4185 mtx_lock(&sc->driver_mtx);
4186 if (sc->dying) {
4187 mtx_unlock(&sc->driver_mtx);
4188 return EINVAL;
4189 }
4190 if (ifp->if_flags & IFF_UP) {
4191 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4192 err = mxge_open(sc);
4193 } else {
4194 /* take care of promis can allmulti
4195 flag chages */
4196 mxge_change_promisc(sc,
4197 ifp->if_flags & IFF_PROMISC);
4198 mxge_set_multicast_list(sc);
4199 }
4200 } else {
4201 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4202 mxge_close(sc, 0);
4203 }
4204 }
4205 mtx_unlock(&sc->driver_mtx);
4206 break;
4207
4208 case SIOCADDMULTI:
4209 case SIOCDELMULTI:
4210 mtx_lock(&sc->driver_mtx);
4211 mxge_set_multicast_list(sc);
4212 mtx_unlock(&sc->driver_mtx);
4213 break;
4214
4215 case SIOCSIFCAP:
4216 mtx_lock(&sc->driver_mtx);
4217 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
4218 if (mask & IFCAP_TXCSUM) {
4219 if (IFCAP_TXCSUM & ifp->if_capenable) {
4220 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
4221 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
4222 } else {
4223 ifp->if_capenable |= IFCAP_TXCSUM;
4224 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
4225 }
4226 } else if (mask & IFCAP_RXCSUM) {
4227 if (IFCAP_RXCSUM & ifp->if_capenable) {
4228 ifp->if_capenable &= ~IFCAP_RXCSUM;
4229 } else {
4230 ifp->if_capenable |= IFCAP_RXCSUM;
4231 }
4232 }
4233 if (mask & IFCAP_TSO4) {
4234 if (IFCAP_TSO4 & ifp->if_capenable) {
4235 ifp->if_capenable &= ~IFCAP_TSO4;
4236 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
4237 ifp->if_capenable |= IFCAP_TSO4;
4238 ifp->if_hwassist |= CSUM_TSO;
4239 } else {
4240 printf("mxge requires tx checksum offload"
4241 " be enabled to use TSO\n");
4242 err = EINVAL;
4243 }
4244 }
4245#if IFCAP_TSO6
4246 if (mask & IFCAP_TXCSUM_IPV6) {
4247 if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
4248 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6
4249 | IFCAP_TSO6);
4250 ifp->if_hwassist &= ~(CSUM_TCP_IPV6
4251 | CSUM_UDP);
4252 } else {
4253 ifp->if_capenable |= IFCAP_TXCSUM_IPV6;
4254 ifp->if_hwassist |= (CSUM_TCP_IPV6
4255 | CSUM_UDP_IPV6);
4256 }
4257 } else if (mask & IFCAP_RXCSUM_IPV6) {
4258 if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) {
4259 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6;
4260 } else {
4261 ifp->if_capenable |= IFCAP_RXCSUM_IPV6;
4262 }
4263 }
4264 if (mask & IFCAP_TSO6) {
4265 if (IFCAP_TSO6 & ifp->if_capenable) {
4266 ifp->if_capenable &= ~IFCAP_TSO6;
4267 } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
4268 ifp->if_capenable |= IFCAP_TSO6;
4269 ifp->if_hwassist |= CSUM_TSO;
4270 } else {
4271 printf("mxge requires tx checksum offload"
4272 " be enabled to use TSO\n");
4273 err = EINVAL;
4274 }
4275 }
4276#endif /*IFCAP_TSO6 */
4277
4278 if (mask & IFCAP_LRO)
4279 ifp->if_capenable ^= IFCAP_LRO;
4280 if (mask & IFCAP_VLAN_HWTAGGING)
4281 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
4282 if (mask & IFCAP_VLAN_HWTSO)
4283 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
4284
4285 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) ||
4286 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING))
4287 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO;
4288
4289 mtx_unlock(&sc->driver_mtx);
4290 VLAN_CAPABILITIES(ifp);
4291
4292 break;
4293
4294 case SIOCGIFMEDIA:
4295 mtx_lock(&sc->driver_mtx);
4296 mxge_media_probe(sc);
4297 mtx_unlock(&sc->driver_mtx);
4298 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
4299 &sc->media, command);
4300 break;
4301
4302 default:
4303 err = ENOTTY;
4304 }
4305 return err;
4306}
4307
4308static void
4309mxge_fetch_tunables(mxge_softc_t *sc)
4310{
4311
4312 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
4313 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
4314 &mxge_flow_control);
4315 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
4316 &mxge_intr_coal_delay);
4317 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4318 &mxge_nvidia_ecrc_enable);
4319 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4320 &mxge_force_firmware);
4321 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4322 &mxge_deassert_wait);
4323 TUNABLE_INT_FETCH("hw.mxge.verbose",
4324 &mxge_verbose);
4325 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
4326 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
4327 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
4328 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type);
4329 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
4330 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle);
4331
4332 if (bootverbose)
4333 mxge_verbose = 1;
4334 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
4335 mxge_intr_coal_delay = 30;
4336 if (mxge_ticks == 0)
4337 mxge_ticks = hz / 2;
4338 sc->pause = mxge_flow_control;
4339 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
4340 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
4341 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4342 }
4343 if (mxge_initial_mtu > ETHERMTU_JUMBO ||
4344 mxge_initial_mtu < ETHER_MIN_LEN)
4345 mxge_initial_mtu = ETHERMTU_JUMBO;
4346
4347 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE)
4348 mxge_throttle = MXGE_MAX_THROTTLE;
4349 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE)
4350 mxge_throttle = MXGE_MIN_THROTTLE;
4351 sc->throttle = mxge_throttle;
4352}
4353
4354
4355static void
4356mxge_free_slices(mxge_softc_t *sc)
4357{
4358 struct mxge_slice_state *ss;
4359 int i;
4360
4361
4362 if (sc->ss == NULL)
4363 return;
4364
4365 for (i = 0; i < sc->num_slices; i++) {
4366 ss = &sc->ss[i];
4367 if (ss->fw_stats != NULL) {
4368 mxge_dma_free(&ss->fw_stats_dma);
4369 ss->fw_stats = NULL;
4370#ifdef IFNET_BUF_RING
4371 if (ss->tx.br != NULL) {
4372 drbr_free(ss->tx.br, M_DEVBUF);
4373 ss->tx.br = NULL;
4374 }
4375#endif
4376 mtx_destroy(&ss->tx.mtx);
4377 }
4378 if (ss->rx_done.entry != NULL) {
4379 mxge_dma_free(&ss->rx_done.dma);
4380 ss->rx_done.entry = NULL;
4381 }
4382 }
4383 free(sc->ss, M_DEVBUF);
4384 sc->ss = NULL;
4385}
4386
4387static int
4388mxge_alloc_slices(mxge_softc_t *sc)
4389{
4390 mxge_cmd_t cmd;
4391 struct mxge_slice_state *ss;
4392 size_t bytes;
4393 int err, i, max_intr_slots;
4394
4395 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4396 if (err != 0) {
4397 device_printf(sc->dev, "Cannot determine rx ring size\n");
4398 return err;
4399 }
4400 sc->rx_ring_size = cmd.data0;
4401 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
4402
4403 bytes = sizeof (*sc->ss) * sc->num_slices;
4404 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
4405 if (sc->ss == NULL)
4406 return (ENOMEM);
4407 for (i = 0; i < sc->num_slices; i++) {
4408 ss = &sc->ss[i];
4409
4410 ss->sc = sc;
4411
4412 /* allocate per-slice rx interrupt queues */
4413
4414 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
4415 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
4416 if (err != 0)
4417 goto abort;
4418 ss->rx_done.entry = ss->rx_done.dma.addr;
4419 bzero(ss->rx_done.entry, bytes);
4420
4421 /*
4422 * allocate the per-slice firmware stats; stats
4423 * (including tx) are used used only on the first
4424 * slice for now
4425 */
4426#ifndef IFNET_BUF_RING
4427 if (i > 0)
4428 continue;
4429#endif
4430
4431 bytes = sizeof (*ss->fw_stats);
4432 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
4433 sizeof (*ss->fw_stats), 64);
4434 if (err != 0)
4435 goto abort;
4436 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
4437 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name),
4438 "%s:tx(%d)", device_get_nameunit(sc->dev), i);
4439 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF);
4440#ifdef IFNET_BUF_RING
4441 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
4442 &ss->tx.mtx);
4443#endif
4444 }
4445
4446 return (0);
4447
4448abort:
4449 mxge_free_slices(sc);
4450 return (ENOMEM);
4451}
4452
4453static void
4454mxge_slice_probe(mxge_softc_t *sc)
4455{
4456 mxge_cmd_t cmd;
4457 char *old_fw;
4458 int msix_cnt, status, max_intr_slots;
4459
4460 sc->num_slices = 1;
4461 /*
4462 * don't enable multiple slices if they are not enabled,
4463 * or if this is not an SMP system
4464 */
4465
4466 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
4467 return;
4468
4469 /* see how many MSI-X interrupts are available */
4470 msix_cnt = pci_msix_count(sc->dev);
4471 if (msix_cnt < 2)
4472 return;
4473
4474 /* now load the slice aware firmware see what it supports */
4475 old_fw = sc->fw_name;
4476 if (old_fw == mxge_fw_aligned)
4477 sc->fw_name = mxge_fw_rss_aligned;
4478 else
4479 sc->fw_name = mxge_fw_rss_unaligned;
4480 status = mxge_load_firmware(sc, 0);
4481 if (status != 0) {
4482 device_printf(sc->dev, "Falling back to a single slice\n");
4483 return;
4484 }
4485
4486 /* try to send a reset command to the card to see if it
4487 is alive */
4488 memset(&cmd, 0, sizeof (cmd));
4489 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4490 if (status != 0) {
4491 device_printf(sc->dev, "failed reset\n");
4492 goto abort_with_fw;
4493 }
4494
4495 /* get rx ring size */
4496 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4497 if (status != 0) {
4498 device_printf(sc->dev, "Cannot determine rx ring size\n");
4499 goto abort_with_fw;
4500 }
4501 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
4502
4503 /* tell it the size of the interrupt queues */
4504 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
4505 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4506 if (status != 0) {
4507 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4508 goto abort_with_fw;
4509 }
4510
4511 /* ask the maximum number of slices it supports */
4512 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4513 if (status != 0) {
4514 device_printf(sc->dev,
4515 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4516 goto abort_with_fw;
4517 }
4518 sc->num_slices = cmd.data0;
4519 if (sc->num_slices > msix_cnt)
4520 sc->num_slices = msix_cnt;
4521
4522 if (mxge_max_slices == -1) {
4523 /* cap to number of CPUs in system */
4524 if (sc->num_slices > mp_ncpus)
4525 sc->num_slices = mp_ncpus;
4526 } else {
4527 if (sc->num_slices > mxge_max_slices)
4528 sc->num_slices = mxge_max_slices;
4529 }
4530 /* make sure it is a power of two */
4531 while (sc->num_slices & (sc->num_slices - 1))
4532 sc->num_slices--;
4533
4534 if (mxge_verbose)
4535 device_printf(sc->dev, "using %d slices\n",
4536 sc->num_slices);
4537
4538 return;
4539
4540abort_with_fw:
4541 sc->fw_name = old_fw;
4542 (void) mxge_load_firmware(sc, 0);
4543}
4544
4545static int
4546mxge_add_msix_irqs(mxge_softc_t *sc)
4547{
4548 size_t bytes;
4549 int count, err, i, rid;
4550
4551 rid = PCIR_BAR(2);
4552 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4553 &rid, RF_ACTIVE);
4554
4555 if (sc->msix_table_res == NULL) {
4556 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
4557 return ENXIO;
4558 }
4559
4560 count = sc->num_slices;
4561 err = pci_alloc_msix(sc->dev, &count);
4562 if (err != 0) {
4563 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
4564 "err = %d \n", sc->num_slices, err);
4565 goto abort_with_msix_table;
4566 }
4567 if (count < sc->num_slices) {
4568 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
4569 count, sc->num_slices);
4570 device_printf(sc->dev,
4571 "Try setting hw.mxge.max_slices to %d\n",
4572 count);
4573 err = ENOSPC;
4574 goto abort_with_msix;
4575 }
4576 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
4577 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4578 if (sc->msix_irq_res == NULL) {
4579 err = ENOMEM;
4580 goto abort_with_msix;
4581 }
4582
4583 for (i = 0; i < sc->num_slices; i++) {
4584 rid = i + 1;
4585 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
4586 SYS_RES_IRQ,
4587 &rid, RF_ACTIVE);
4588 if (sc->msix_irq_res[i] == NULL) {
4589 device_printf(sc->dev, "couldn't allocate IRQ res"
4590 " for message %d\n", i);
4591 err = ENXIO;
4592 goto abort_with_res;
4593 }
4594 }
4595
4596 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
4597 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4598
4599 for (i = 0; i < sc->num_slices; i++) {
4600 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
4601 INTR_TYPE_NET | INTR_MPSAFE,
4602#if __FreeBSD_version > 700030
4603 NULL,
4604#endif
4605 mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
4606 if (err != 0) {
4607 device_printf(sc->dev, "couldn't setup intr for "
4608 "message %d\n", i);
4609 goto abort_with_intr;
4610 }
4611 bus_describe_intr(sc->dev, sc->msix_irq_res[i],
4612 sc->msix_ih[i], "s%d", i);
4613 }
4614
4615 if (mxge_verbose) {
4616 device_printf(sc->dev, "using %d msix IRQs:",
4617 sc->num_slices);
4618 for (i = 0; i < sc->num_slices; i++)
4619 printf(" %ld", rman_get_start(sc->msix_irq_res[i]));
4620 printf("\n");
4621 }
4622 return (0);
4623
4624abort_with_intr:
4625 for (i = 0; i < sc->num_slices; i++) {
4626 if (sc->msix_ih[i] != NULL) {
4627 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4628 sc->msix_ih[i]);
4629 sc->msix_ih[i] = NULL;
4630 }
4631 }
4632 free(sc->msix_ih, M_DEVBUF);
4633
4634
4635abort_with_res:
4636 for (i = 0; i < sc->num_slices; i++) {
4637 rid = i + 1;
4638 if (sc->msix_irq_res[i] != NULL)
4639 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4640 sc->msix_irq_res[i]);
4641 sc->msix_irq_res[i] = NULL;
4642 }
4643 free(sc->msix_irq_res, M_DEVBUF);
4644
4645
4646abort_with_msix:
4647 pci_release_msi(sc->dev);
4648
4649abort_with_msix_table:
4650 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4651 sc->msix_table_res);
4652
4653 return err;
4654}
4655
4656static int
4657mxge_add_single_irq(mxge_softc_t *sc)
4658{
4659 int count, err, rid;
4660
4661 count = pci_msi_count(sc->dev);
4662 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4663 rid = 1;
4664 } else {
4665 rid = 0;
4666 sc->legacy_irq = 1;
4667 }
4668 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0,
4669 1, RF_SHAREABLE | RF_ACTIVE);
4670 if (sc->irq_res == NULL) {
4671 device_printf(sc->dev, "could not alloc interrupt\n");
4672 return ENXIO;
4673 }
4674 if (mxge_verbose)
4675 device_printf(sc->dev, "using %s irq %ld\n",
4676 sc->legacy_irq ? "INTx" : "MSI",
4677 rman_get_start(sc->irq_res));
4678 err = bus_setup_intr(sc->dev, sc->irq_res,
4679 INTR_TYPE_NET | INTR_MPSAFE,
4680#if __FreeBSD_version > 700030
4681 NULL,
4682#endif
4683 mxge_intr, &sc->ss[0], &sc->ih);
4684 if (err != 0) {
4685 bus_release_resource(sc->dev, SYS_RES_IRQ,
4686 sc->legacy_irq ? 0 : 1, sc->irq_res);
4687 if (!sc->legacy_irq)
4688 pci_release_msi(sc->dev);
4689 }
4690 return err;
4691}
4692
4693static void
4694mxge_rem_msix_irqs(mxge_softc_t *sc)
4695{
4696 int i, rid;
4697
4698 for (i = 0; i < sc->num_slices; i++) {
4699 if (sc->msix_ih[i] != NULL) {
4700 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4701 sc->msix_ih[i]);
4702 sc->msix_ih[i] = NULL;
4703 }
4704 }
4705 free(sc->msix_ih, M_DEVBUF);
4706
4707 for (i = 0; i < sc->num_slices; i++) {
4708 rid = i + 1;
4709 if (sc->msix_irq_res[i] != NULL)
4710 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4711 sc->msix_irq_res[i]);
4712 sc->msix_irq_res[i] = NULL;
4713 }
4714 free(sc->msix_irq_res, M_DEVBUF);
4715
4716 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4717 sc->msix_table_res);
4718
4719 pci_release_msi(sc->dev);
4720 return;
4721}
4722
4723static void
4724mxge_rem_single_irq(mxge_softc_t *sc)
4725{
4726 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4727 bus_release_resource(sc->dev, SYS_RES_IRQ,
4728 sc->legacy_irq ? 0 : 1, sc->irq_res);
4729 if (!sc->legacy_irq)
4730 pci_release_msi(sc->dev);
4731}
4732
4733static void
4734mxge_rem_irq(mxge_softc_t *sc)
4735{
4736 if (sc->num_slices > 1)
4737 mxge_rem_msix_irqs(sc);
4738 else
4739 mxge_rem_single_irq(sc);
4740}
4741
4742static int
4743mxge_add_irq(mxge_softc_t *sc)
4744{
4745 int err;
4746
4747 if (sc->num_slices > 1)
4748 err = mxge_add_msix_irqs(sc);
4749 else
4750 err = mxge_add_single_irq(sc);
4751
4752 if (0 && err == 0 && sc->num_slices > 1) {
4753 mxge_rem_msix_irqs(sc);
4754 err = mxge_add_msix_irqs(sc);
4755 }
4756 return err;
4757}
4758
4759
4760static int
4761mxge_attach(device_t dev)
4762{
4763 mxge_cmd_t cmd;
4764 mxge_softc_t *sc = device_get_softc(dev);
4765 struct ifnet *ifp;
4766 int err, rid;
4767
4768 sc->dev = dev;
4769 mxge_fetch_tunables(sc);
4770
4771 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc);
4772 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK,
4773 taskqueue_thread_enqueue, &sc->tq);
4774 if (sc->tq == NULL) {
4775 err = ENOMEM;
4776 goto abort_with_nothing;
4777 }
4778
4779 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4780 1, /* alignment */
4781 0, /* boundary */
4782 BUS_SPACE_MAXADDR, /* low */
4783 BUS_SPACE_MAXADDR, /* high */
4784 NULL, NULL, /* filter */
4785 65536 + 256, /* maxsize */
4786 MXGE_MAX_SEND_DESC, /* num segs */
4787 65536, /* maxsegsize */
4788 0, /* flags */
4789 NULL, NULL, /* lock */
4790 &sc->parent_dmat); /* tag */
4791
4792 if (err != 0) {
4793 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4794 err);
4795 goto abort_with_tq;
4796 }
4797
4798 ifp = sc->ifp = if_alloc(IFT_ETHER);
4799 if (ifp == NULL) {
4800 device_printf(dev, "can not if_alloc()\n");
4801 err = ENOSPC;
4802 goto abort_with_parent_dmat;
4803 }
4804 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4805
4806 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd",
4807 device_get_nameunit(dev));
4808 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF);
4809 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name),
4810 "%s:drv", device_get_nameunit(dev));
4811 mtx_init(&sc->driver_mtx, sc->driver_mtx_name,
4812 MTX_NETWORK_LOCK, MTX_DEF);
4813
4814 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0);
4815
4816 mxge_setup_cfg_space(sc);
4817
4818 /* Map the board into the kernel */
4819 rid = PCIR_BARS;
4820 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0,
4821 ~0, 1, RF_ACTIVE);
4822 if (sc->mem_res == NULL) {
4823 device_printf(dev, "could not map memory\n");
4824 err = ENXIO;
4825 goto abort_with_lock;
4826 }
4827 sc->sram = rman_get_virtual(sc->mem_res);
4828 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4829 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4830 device_printf(dev, "impossible memory region size %ld\n",
4831 rman_get_size(sc->mem_res));
4832 err = ENXIO;
4833 goto abort_with_mem_res;
4834 }
4835
4836 /* make NULL terminated copy of the EEPROM strings section of
4837 lanai SRAM */
4838 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4839 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4840 rman_get_bushandle(sc->mem_res),
4841 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4842 sc->eeprom_strings,
4843 MXGE_EEPROM_STRINGS_SIZE - 2);
4844 err = mxge_parse_strings(sc);
4845 if (err != 0)
4846 goto abort_with_mem_res;
4847
4848 /* Enable write combining for efficient use of PCIe bus */
4849 mxge_enable_wc(sc);
4850
4851 /* Allocate the out of band dma memory */
4852 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4853 sizeof (mxge_cmd_t), 64);
4854 if (err != 0)
4855 goto abort_with_mem_res;
4856 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4857 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4858 if (err != 0)
4859 goto abort_with_cmd_dma;
4860
4861 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4862 if (err != 0)
4863 goto abort_with_zeropad_dma;
4864
4865 /* select & load the firmware */
4866 err = mxge_select_firmware(sc);
4867 if (err != 0)
4868 goto abort_with_dmabench;
4869 sc->intr_coal_delay = mxge_intr_coal_delay;
4870
4871 mxge_slice_probe(sc);
4872 err = mxge_alloc_slices(sc);
4873 if (err != 0)
4874 goto abort_with_dmabench;
4875
4876 err = mxge_reset(sc, 0);
4877 if (err != 0)
4878 goto abort_with_slices;
4879
4880 err = mxge_alloc_rings(sc);
4881 if (err != 0) {
4882 device_printf(sc->dev, "failed to allocate rings\n");
4883 goto abort_with_slices;
4884 }
4885
4886 err = mxge_add_irq(sc);
4887 if (err != 0) {
4888 device_printf(sc->dev, "failed to add irq\n");
4889 goto abort_with_rings;
4890 }
4891
4892 if_initbaudrate(ifp, IF_Gbps(10));
4893 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4894 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 |
4895 IFCAP_RXCSUM_IPV6;
4896#if defined(INET) || defined(INET6)
4897 ifp->if_capabilities |= IFCAP_LRO;
4898#endif
4899
4900#ifdef MXGE_NEW_VLAN_API
4901 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
4902
4903 /* Only FW 1.4.32 and newer can do TSO over vlans */
4904 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
4905 sc->fw_ver_tiny >= 32)
4906 ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
4907#endif
4908 sc->max_mtu = mxge_max_mtu(sc);
4909 if (sc->max_mtu >= 9000)
4910 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
4911 else
4912 device_printf(dev, "MTU limited to %d. Install "
4913 "latest firmware for 9000 byte jumbo support\n",
4914 sc->max_mtu - ETHER_HDR_LEN);
4915 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4916 ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6;
4917 /* check to see if f/w supports TSO for IPv6 */
4918 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) {
4919 if (CSUM_TCP_IPV6)
4920 ifp->if_capabilities |= IFCAP_TSO6;
4921 sc->max_tso6_hlen = min(cmd.data0,
4922 sizeof (sc->ss[0].scratch));
4923 }
4924 ifp->if_capenable = ifp->if_capabilities;
4925 if (sc->lro_cnt == 0)
4926 ifp->if_capenable &= ~IFCAP_LRO;
4927 ifp->if_init = mxge_init;
4928 ifp->if_softc = sc;
4929 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4930 ifp->if_ioctl = mxge_ioctl;
4931 ifp->if_start = mxge_start;
4932 /* Initialise the ifmedia structure */
4933 ifmedia_init(&sc->media, 0, mxge_media_change,
4934 mxge_media_status);
4935 mxge_media_init(sc);
4936 mxge_media_probe(sc);
4937 sc->dying = 0;
4938 ether_ifattach(ifp, sc->mac_addr);
4939 /* ether_ifattach sets mtu to ETHERMTU */
4940 if (mxge_initial_mtu != ETHERMTU)
4941 mxge_change_mtu(sc, mxge_initial_mtu);
4942
4943 mxge_add_sysctls(sc);
4944#ifdef IFNET_BUF_RING
4945 ifp->if_transmit = mxge_transmit;
4946 ifp->if_qflush = mxge_qflush;
4947#endif
4948 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
4949 device_get_nameunit(sc->dev));
4950 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
4951 return 0;
4952
4953abort_with_rings:
4954 mxge_free_rings(sc);
4955abort_with_slices:
4956 mxge_free_slices(sc);
4957abort_with_dmabench:
4958 mxge_dma_free(&sc->dmabench_dma);
4959abort_with_zeropad_dma:
4960 mxge_dma_free(&sc->zeropad_dma);
4961abort_with_cmd_dma:
4962 mxge_dma_free(&sc->cmd_dma);
4963abort_with_mem_res:
4964 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4965abort_with_lock:
4966 pci_disable_busmaster(dev);
4967 mtx_destroy(&sc->cmd_mtx);
4968 mtx_destroy(&sc->driver_mtx);
4969 if_free(ifp);
4970abort_with_parent_dmat:
4971 bus_dma_tag_destroy(sc->parent_dmat);
4972abort_with_tq:
4973 if (sc->tq != NULL) {
4974 taskqueue_drain(sc->tq, &sc->watchdog_task);
4975 taskqueue_free(sc->tq);
4976 sc->tq = NULL;
4977 }
4978abort_with_nothing:
4979 return err;
4980}
4981
4982static int
4983mxge_detach(device_t dev)
4984{
4985 mxge_softc_t *sc = device_get_softc(dev);
4986
4987 if (mxge_vlans_active(sc)) {
4988 device_printf(sc->dev,
4989 "Detach vlans before removing module\n");
4990 return EBUSY;
4991 }
4992 mtx_lock(&sc->driver_mtx);
4993 sc->dying = 1;
4994 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
4995 mxge_close(sc, 0);
4996 mtx_unlock(&sc->driver_mtx);
4997 ether_ifdetach(sc->ifp);
4998 if (sc->tq != NULL) {
4999 taskqueue_drain(sc->tq, &sc->watchdog_task);
5000 taskqueue_free(sc->tq);
5001 sc->tq = NULL;
5002 }
5003 callout_drain(&sc->co_hdl);
5004 ifmedia_removeall(&sc->media);
5005 mxge_dummy_rdma(sc, 0);
5006 mxge_rem_sysctls(sc);
5007 mxge_rem_irq(sc);
5008 mxge_free_rings(sc);
5009 mxge_free_slices(sc);
5010 mxge_dma_free(&sc->dmabench_dma);
5011 mxge_dma_free(&sc->zeropad_dma);
5012 mxge_dma_free(&sc->cmd_dma);
5013 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
5014 pci_disable_busmaster(dev);
5015 mtx_destroy(&sc->cmd_mtx);
5016 mtx_destroy(&sc->driver_mtx);
5017 if_free(sc->ifp);
5018 bus_dma_tag_destroy(sc->parent_dmat);
5019 return 0;
5020}
5021
5022static int
5023mxge_shutdown(device_t dev)
5024{
5025 return 0;
5026}
5027
5028/*
5029 This file uses Myri10GE driver indentation.
5030
5031 Local Variables:
5032 c-file-style:"linux"
5033 tab-width:8
5034 End:
5035*/
3422
3423#ifndef IFNET_BUF_RING
3424 /* only use a single TX ring for now */
3425 if (ss != ss->sc->ss)
3426 return 0;
3427#endif
3428
3429 ss->tx.mask = tx_ring_entries - 1;
3430 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3431
3432
3433 /* allocate the tx request copy block */
3434 bytes = 8 +
3435 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3436 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
3437 /* ensure req_list entries are aligned to 8 bytes */
3438 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3439 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3440
3441 /* allocate the tx busdma segment list */
3442 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3443 ss->tx.seg_list = (bus_dma_segment_t *)
3444 malloc(bytes, M_DEVBUF, M_WAITOK);
3445
3446 /* allocate the tx host info ring */
3447 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3448 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3449
3450 /* allocate the tx busdma resources */
3451 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3452 1, /* alignment */
3453 sc->tx_boundary, /* boundary */
3454 BUS_SPACE_MAXADDR, /* low */
3455 BUS_SPACE_MAXADDR, /* high */
3456 NULL, NULL, /* filter */
3457 65536 + 256, /* maxsize */
3458 ss->tx.max_desc - 2, /* num segs */
3459 sc->tx_boundary, /* maxsegsz */
3460 BUS_DMA_ALLOCNOW, /* flags */
3461 NULL, NULL, /* lock */
3462 &ss->tx.dmat); /* tag */
3463
3464 if (err != 0) {
3465 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3466 err);
3467 return err;
3468 }
3469
3470 /* now use these tags to setup dmamaps for each slot
3471 in the ring */
3472 for (i = 0; i <= ss->tx.mask; i++) {
3473 err = bus_dmamap_create(ss->tx.dmat, 0,
3474 &ss->tx.info[i].map);
3475 if (err != 0) {
3476 device_printf(sc->dev, "Err %d tx dmamap\n",
3477 err);
3478 return err;
3479 }
3480 }
3481 return 0;
3482
3483}
3484
3485static int
3486mxge_alloc_rings(mxge_softc_t *sc)
3487{
3488 mxge_cmd_t cmd;
3489 int tx_ring_size;
3490 int tx_ring_entries, rx_ring_entries;
3491 int err, slice;
3492
3493 /* get ring sizes */
3494 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3495 tx_ring_size = cmd.data0;
3496 if (err != 0) {
3497 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3498 goto abort;
3499 }
3500
3501 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3502 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3503 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
3504 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
3505 IFQ_SET_READY(&sc->ifp->if_snd);
3506
3507 for (slice = 0; slice < sc->num_slices; slice++) {
3508 err = mxge_alloc_slice_rings(&sc->ss[slice],
3509 rx_ring_entries,
3510 tx_ring_entries);
3511 if (err != 0)
3512 goto abort;
3513 }
3514 return 0;
3515
3516abort:
3517 mxge_free_rings(sc);
3518 return err;
3519
3520}
3521
3522
3523static void
3524mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3525{
3526 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3527
3528 if (bufsize < MCLBYTES) {
3529 /* easy, everything fits in a single buffer */
3530 *big_buf_size = MCLBYTES;
3531 *cl_size = MCLBYTES;
3532 *nbufs = 1;
3533 return;
3534 }
3535
3536 if (bufsize < MJUMPAGESIZE) {
3537 /* still easy, everything still fits in a single buffer */
3538 *big_buf_size = MJUMPAGESIZE;
3539 *cl_size = MJUMPAGESIZE;
3540 *nbufs = 1;
3541 return;
3542 }
3543#if MXGE_VIRT_JUMBOS
3544 /* now we need to use virtually contiguous buffers */
3545 *cl_size = MJUM9BYTES;
3546 *big_buf_size = 4096;
3547 *nbufs = mtu / 4096 + 1;
3548 /* needs to be a power of two, so round up */
3549 if (*nbufs == 3)
3550 *nbufs = 4;
3551#else
3552 *cl_size = MJUM9BYTES;
3553 *big_buf_size = MJUM9BYTES;
3554 *nbufs = 1;
3555#endif
3556}
3557
3558static int
3559mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3560{
3561 mxge_softc_t *sc;
3562 mxge_cmd_t cmd;
3563 bus_dmamap_t map;
3564 int err, i, slice;
3565
3566
3567 sc = ss->sc;
3568 slice = ss - sc->ss;
3569
3570#if defined(INET) || defined(INET6)
3571 (void)tcp_lro_init(&ss->lc);
3572#endif
3573 ss->lc.ifp = sc->ifp;
3574
3575 /* get the lanai pointers to the send and receive rings */
3576
3577 err = 0;
3578#ifndef IFNET_BUF_RING
3579 /* We currently only send from the first slice */
3580 if (slice == 0) {
3581#endif
3582 cmd.data0 = slice;
3583 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3584 ss->tx.lanai =
3585 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3586 ss->tx.send_go = (volatile uint32_t *)
3587 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3588 ss->tx.send_stop = (volatile uint32_t *)
3589 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3590#ifndef IFNET_BUF_RING
3591 }
3592#endif
3593 cmd.data0 = slice;
3594 err |= mxge_send_cmd(sc,
3595 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3596 ss->rx_small.lanai =
3597 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3598 cmd.data0 = slice;
3599 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3600 ss->rx_big.lanai =
3601 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3602
3603 if (err != 0) {
3604 device_printf(sc->dev,
3605 "failed to get ring sizes or locations\n");
3606 return EIO;
3607 }
3608
3609 /* stock receive rings */
3610 for (i = 0; i <= ss->rx_small.mask; i++) {
3611 map = ss->rx_small.info[i].map;
3612 err = mxge_get_buf_small(ss, map, i);
3613 if (err) {
3614 device_printf(sc->dev, "alloced %d/%d smalls\n",
3615 i, ss->rx_small.mask + 1);
3616 return ENOMEM;
3617 }
3618 }
3619 for (i = 0; i <= ss->rx_big.mask; i++) {
3620 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3621 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3622 }
3623 ss->rx_big.nbufs = nbufs;
3624 ss->rx_big.cl_size = cl_size;
3625 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
3626 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3627 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3628 map = ss->rx_big.info[i].map;
3629 err = mxge_get_buf_big(ss, map, i);
3630 if (err) {
3631 device_printf(sc->dev, "alloced %d/%d bigs\n",
3632 i, ss->rx_big.mask + 1);
3633 return ENOMEM;
3634 }
3635 }
3636 return 0;
3637}
3638
3639static int
3640mxge_open(mxge_softc_t *sc)
3641{
3642 mxge_cmd_t cmd;
3643 int err, big_bytes, nbufs, slice, cl_size, i;
3644 bus_addr_t bus;
3645 volatile uint8_t *itable;
3646 struct mxge_slice_state *ss;
3647
3648 /* Copy the MAC address in case it was overridden */
3649 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3650
3651 err = mxge_reset(sc, 1);
3652 if (err != 0) {
3653 device_printf(sc->dev, "failed to reset\n");
3654 return EIO;
3655 }
3656
3657 if (sc->num_slices > 1) {
3658 /* setup the indirection table */
3659 cmd.data0 = sc->num_slices;
3660 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3661 &cmd);
3662
3663 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3664 &cmd);
3665 if (err != 0) {
3666 device_printf(sc->dev,
3667 "failed to setup rss tables\n");
3668 return err;
3669 }
3670
3671 /* just enable an identity mapping */
3672 itable = sc->sram + cmd.data0;
3673 for (i = 0; i < sc->num_slices; i++)
3674 itable[i] = (uint8_t)i;
3675
3676 cmd.data0 = 1;
3677 cmd.data1 = mxge_rss_hash_type;
3678 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3679 if (err != 0) {
3680 device_printf(sc->dev, "failed to enable slices\n");
3681 return err;
3682 }
3683 }
3684
3685
3686 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3687
3688 cmd.data0 = nbufs;
3689 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3690 &cmd);
3691 /* error is only meaningful if we're trying to set
3692 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3693 if (err && nbufs > 1) {
3694 device_printf(sc->dev,
3695 "Failed to set alway-use-n to %d\n",
3696 nbufs);
3697 return EIO;
3698 }
3699 /* Give the firmware the mtu and the big and small buffer
3700 sizes. The firmware wants the big buf size to be a power
3701 of two. Luckily, FreeBSD's clusters are powers of two */
3702 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3703 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3704 cmd.data0 = MHLEN - MXGEFW_PAD;
3705 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3706 &cmd);
3707 cmd.data0 = big_bytes;
3708 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3709
3710 if (err != 0) {
3711 device_printf(sc->dev, "failed to setup params\n");
3712 goto abort;
3713 }
3714
3715 /* Now give him the pointer to the stats block */
3716 for (slice = 0;
3717#ifdef IFNET_BUF_RING
3718 slice < sc->num_slices;
3719#else
3720 slice < 1;
3721#endif
3722 slice++) {
3723 ss = &sc->ss[slice];
3724 cmd.data0 =
3725 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3726 cmd.data1 =
3727 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3728 cmd.data2 = sizeof(struct mcp_irq_data);
3729 cmd.data2 |= (slice << 16);
3730 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3731 }
3732
3733 if (err != 0) {
3734 bus = sc->ss->fw_stats_dma.bus_addr;
3735 bus += offsetof(struct mcp_irq_data, send_done_count);
3736 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3737 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3738 err = mxge_send_cmd(sc,
3739 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3740 &cmd);
3741 /* Firmware cannot support multicast without STATS_DMA_V2 */
3742 sc->fw_multicast_support = 0;
3743 } else {
3744 sc->fw_multicast_support = 1;
3745 }
3746
3747 if (err != 0) {
3748 device_printf(sc->dev, "failed to setup params\n");
3749 goto abort;
3750 }
3751
3752 for (slice = 0; slice < sc->num_slices; slice++) {
3753 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3754 if (err != 0) {
3755 device_printf(sc->dev, "couldn't open slice %d\n",
3756 slice);
3757 goto abort;
3758 }
3759 }
3760
3761 /* Finally, start the firmware running */
3762 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3763 if (err) {
3764 device_printf(sc->dev, "Couldn't bring up link\n");
3765 goto abort;
3766 }
3767#ifdef IFNET_BUF_RING
3768 for (slice = 0; slice < sc->num_slices; slice++) {
3769 ss = &sc->ss[slice];
3770 ss->if_drv_flags |= IFF_DRV_RUNNING;
3771 ss->if_drv_flags &= ~IFF_DRV_OACTIVE;
3772 }
3773#endif
3774 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
3775 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3776
3777 return 0;
3778
3779
3780abort:
3781 mxge_free_mbufs(sc);
3782
3783 return err;
3784}
3785
3786static int
3787mxge_close(mxge_softc_t *sc, int down)
3788{
3789 mxge_cmd_t cmd;
3790 int err, old_down_cnt;
3791#ifdef IFNET_BUF_RING
3792 struct mxge_slice_state *ss;
3793 int slice;
3794#endif
3795
3796#ifdef IFNET_BUF_RING
3797 for (slice = 0; slice < sc->num_slices; slice++) {
3798 ss = &sc->ss[slice];
3799 ss->if_drv_flags &= ~IFF_DRV_RUNNING;
3800 }
3801#endif
3802 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3803 if (!down) {
3804 old_down_cnt = sc->down_cnt;
3805 wmb();
3806 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3807 if (err) {
3808 device_printf(sc->dev,
3809 "Couldn't bring down link\n");
3810 }
3811 if (old_down_cnt == sc->down_cnt) {
3812 /* wait for down irq */
3813 DELAY(10 * sc->intr_coal_delay);
3814 }
3815 wmb();
3816 if (old_down_cnt == sc->down_cnt) {
3817 device_printf(sc->dev, "never got down irq\n");
3818 }
3819 }
3820 mxge_free_mbufs(sc);
3821
3822 return 0;
3823}
3824
3825static void
3826mxge_setup_cfg_space(mxge_softc_t *sc)
3827{
3828 device_t dev = sc->dev;
3829 int reg;
3830 uint16_t cmd, lnk, pectl;
3831
3832 /* find the PCIe link width and set max read request to 4KB*/
3833 if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
3834 lnk = pci_read_config(dev, reg + 0x12, 2);
3835 sc->link_width = (lnk >> 4) & 0x3f;
3836
3837 if (sc->pectl == 0) {
3838 pectl = pci_read_config(dev, reg + 0x8, 2);
3839 pectl = (pectl & ~0x7000) | (5 << 12);
3840 pci_write_config(dev, reg + 0x8, pectl, 2);
3841 sc->pectl = pectl;
3842 } else {
3843 /* restore saved pectl after watchdog reset */
3844 pci_write_config(dev, reg + 0x8, sc->pectl, 2);
3845 }
3846 }
3847
3848 /* Enable DMA and Memory space access */
3849 pci_enable_busmaster(dev);
3850 cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3851 cmd |= PCIM_CMD_MEMEN;
3852 pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3853}
3854
3855static uint32_t
3856mxge_read_reboot(mxge_softc_t *sc)
3857{
3858 device_t dev = sc->dev;
3859 uint32_t vs;
3860
3861 /* find the vendor specific offset */
3862 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) {
3863 device_printf(sc->dev,
3864 "could not find vendor specific offset\n");
3865 return (uint32_t)-1;
3866 }
3867 /* enable read32 mode */
3868 pci_write_config(dev, vs + 0x10, 0x3, 1);
3869 /* tell NIC which register to read */
3870 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3871 return (pci_read_config(dev, vs + 0x14, 4));
3872}
3873
3874static void
3875mxge_watchdog_reset(mxge_softc_t *sc)
3876{
3877 struct pci_devinfo *dinfo;
3878 struct mxge_slice_state *ss;
3879 int err, running, s, num_tx_slices = 1;
3880 uint32_t reboot;
3881 uint16_t cmd;
3882
3883 err = ENXIO;
3884
3885 device_printf(sc->dev, "Watchdog reset!\n");
3886
3887 /*
3888 * check to see if the NIC rebooted. If it did, then all of
3889 * PCI config space has been reset, and things like the
3890 * busmaster bit will be zero. If this is the case, then we
3891 * must restore PCI config space before the NIC can be used
3892 * again
3893 */
3894 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3895 if (cmd == 0xffff) {
3896 /*
3897 * maybe the watchdog caught the NIC rebooting; wait
3898 * up to 100ms for it to finish. If it does not come
3899 * back, then give up
3900 */
3901 DELAY(1000*100);
3902 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3903 if (cmd == 0xffff) {
3904 device_printf(sc->dev, "NIC disappeared!\n");
3905 }
3906 }
3907 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3908 /* print the reboot status */
3909 reboot = mxge_read_reboot(sc);
3910 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3911 reboot);
3912 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
3913 if (running) {
3914
3915 /*
3916 * quiesce NIC so that TX routines will not try to
3917 * xmit after restoration of BAR
3918 */
3919
3920 /* Mark the link as down */
3921 if (sc->link_state) {
3922 sc->link_state = 0;
3923 if_link_state_change(sc->ifp,
3924 LINK_STATE_DOWN);
3925 }
3926#ifdef IFNET_BUF_RING
3927 num_tx_slices = sc->num_slices;
3928#endif
3929 /* grab all TX locks to ensure no tx */
3930 for (s = 0; s < num_tx_slices; s++) {
3931 ss = &sc->ss[s];
3932 mtx_lock(&ss->tx.mtx);
3933 }
3934 mxge_close(sc, 1);
3935 }
3936 /* restore PCI configuration space */
3937 dinfo = device_get_ivars(sc->dev);
3938 pci_cfg_restore(sc->dev, dinfo);
3939
3940 /* and redo any changes we made to our config space */
3941 mxge_setup_cfg_space(sc);
3942
3943 /* reload f/w */
3944 err = mxge_load_firmware(sc, 0);
3945 if (err) {
3946 device_printf(sc->dev,
3947 "Unable to re-load f/w\n");
3948 }
3949 if (running) {
3950 if (!err)
3951 err = mxge_open(sc);
3952 /* release all TX locks */
3953 for (s = 0; s < num_tx_slices; s++) {
3954 ss = &sc->ss[s];
3955#ifdef IFNET_BUF_RING
3956 mxge_start_locked(ss);
3957#endif
3958 mtx_unlock(&ss->tx.mtx);
3959 }
3960 }
3961 sc->watchdog_resets++;
3962 } else {
3963 device_printf(sc->dev,
3964 "NIC did not reboot, not resetting\n");
3965 err = 0;
3966 }
3967 if (err) {
3968 device_printf(sc->dev, "watchdog reset failed\n");
3969 } else {
3970 if (sc->dying == 2)
3971 sc->dying = 0;
3972 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3973 }
3974}
3975
3976static void
3977mxge_watchdog_task(void *arg, int pending)
3978{
3979 mxge_softc_t *sc = arg;
3980
3981
3982 mtx_lock(&sc->driver_mtx);
3983 mxge_watchdog_reset(sc);
3984 mtx_unlock(&sc->driver_mtx);
3985}
3986
3987static void
3988mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
3989{
3990 tx = &sc->ss[slice].tx;
3991 device_printf(sc->dev, "slice %d struck? ring state:\n", slice);
3992 device_printf(sc->dev,
3993 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3994 tx->req, tx->done, tx->queue_active);
3995 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3996 tx->activate, tx->deactivate);
3997 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3998 tx->pkt_done,
3999 be32toh(sc->ss->fw_stats->send_done_count));
4000}
4001
4002static int
4003mxge_watchdog(mxge_softc_t *sc)
4004{
4005 mxge_tx_ring_t *tx;
4006 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
4007 int i, err = 0;
4008
4009 /* see if we have outstanding transmits, which
4010 have been pending for more than mxge_ticks */
4011 for (i = 0;
4012#ifdef IFNET_BUF_RING
4013 (i < sc->num_slices) && (err == 0);
4014#else
4015 (i < 1) && (err == 0);
4016#endif
4017 i++) {
4018 tx = &sc->ss[i].tx;
4019 if (tx->req != tx->done &&
4020 tx->watchdog_req != tx->watchdog_done &&
4021 tx->done == tx->watchdog_done) {
4022 /* check for pause blocking before resetting */
4023 if (tx->watchdog_rx_pause == rx_pause) {
4024 mxge_warn_stuck(sc, tx, i);
4025 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
4026 return (ENXIO);
4027 }
4028 else
4029 device_printf(sc->dev, "Flow control blocking "
4030 "xmits, check link partner\n");
4031 }
4032
4033 tx->watchdog_req = tx->req;
4034 tx->watchdog_done = tx->done;
4035 tx->watchdog_rx_pause = rx_pause;
4036 }
4037
4038 if (sc->need_media_probe)
4039 mxge_media_probe(sc);
4040 return (err);
4041}
4042
4043static u_long
4044mxge_update_stats(mxge_softc_t *sc)
4045{
4046 struct mxge_slice_state *ss;
4047 u_long pkts = 0;
4048 u_long ipackets = 0;
4049 u_long opackets = 0;
4050#ifdef IFNET_BUF_RING
4051 u_long obytes = 0;
4052 u_long omcasts = 0;
4053 u_long odrops = 0;
4054#endif
4055 u_long oerrors = 0;
4056 int slice;
4057
4058 for (slice = 0; slice < sc->num_slices; slice++) {
4059 ss = &sc->ss[slice];
4060 ipackets += ss->ipackets;
4061 opackets += ss->opackets;
4062#ifdef IFNET_BUF_RING
4063 obytes += ss->obytes;
4064 omcasts += ss->omcasts;
4065 odrops += ss->tx.br->br_drops;
4066#endif
4067 oerrors += ss->oerrors;
4068 }
4069 pkts = (ipackets - sc->ifp->if_ipackets);
4070 pkts += (opackets - sc->ifp->if_opackets);
4071 sc->ifp->if_ipackets = ipackets;
4072 sc->ifp->if_opackets = opackets;
4073#ifdef IFNET_BUF_RING
4074 sc->ifp->if_obytes = obytes;
4075 sc->ifp->if_omcasts = omcasts;
4076 sc->ifp->if_snd.ifq_drops = odrops;
4077#endif
4078 sc->ifp->if_oerrors = oerrors;
4079 return pkts;
4080}
4081
4082static void
4083mxge_tick(void *arg)
4084{
4085 mxge_softc_t *sc = arg;
4086 u_long pkts = 0;
4087 int err = 0;
4088 int running, ticks;
4089 uint16_t cmd;
4090
4091 ticks = mxge_ticks;
4092 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
4093 if (running) {
4094 /* aggregate stats from different slices */
4095 pkts = mxge_update_stats(sc);
4096 if (!sc->watchdog_countdown) {
4097 err = mxge_watchdog(sc);
4098 sc->watchdog_countdown = 4;
4099 }
4100 sc->watchdog_countdown--;
4101 }
4102 if (pkts == 0) {
4103 /* ensure NIC did not suffer h/w fault while idle */
4104 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
4105 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
4106 sc->dying = 2;
4107 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
4108 err = ENXIO;
4109 }
4110 /* look less often if NIC is idle */
4111 ticks *= 4;
4112 }
4113
4114 if (err == 0)
4115 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
4116
4117}
4118
4119static int
4120mxge_media_change(struct ifnet *ifp)
4121{
4122 return EINVAL;
4123}
4124
4125static int
4126mxge_change_mtu(mxge_softc_t *sc, int mtu)
4127{
4128 struct ifnet *ifp = sc->ifp;
4129 int real_mtu, old_mtu;
4130 int err = 0;
4131
4132
4133 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
4134 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
4135 return EINVAL;
4136 mtx_lock(&sc->driver_mtx);
4137 old_mtu = ifp->if_mtu;
4138 ifp->if_mtu = mtu;
4139 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4140 mxge_close(sc, 0);
4141 err = mxge_open(sc);
4142 if (err != 0) {
4143 ifp->if_mtu = old_mtu;
4144 mxge_close(sc, 0);
4145 (void) mxge_open(sc);
4146 }
4147 }
4148 mtx_unlock(&sc->driver_mtx);
4149 return err;
4150}
4151
4152static void
4153mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
4154{
4155 mxge_softc_t *sc = ifp->if_softc;
4156
4157
4158 if (sc == NULL)
4159 return;
4160 ifmr->ifm_status = IFM_AVALID;
4161 ifmr->ifm_active = IFM_ETHER | IFM_FDX;
4162 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
4163 ifmr->ifm_active |= sc->current_media;
4164}
4165
4166static int
4167mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
4168{
4169 mxge_softc_t *sc = ifp->if_softc;
4170 struct ifreq *ifr = (struct ifreq *)data;
4171 int err, mask;
4172
4173 err = 0;
4174 switch (command) {
4175 case SIOCSIFADDR:
4176 case SIOCGIFADDR:
4177 err = ether_ioctl(ifp, command, data);
4178 break;
4179
4180 case SIOCSIFMTU:
4181 err = mxge_change_mtu(sc, ifr->ifr_mtu);
4182 break;
4183
4184 case SIOCSIFFLAGS:
4185 mtx_lock(&sc->driver_mtx);
4186 if (sc->dying) {
4187 mtx_unlock(&sc->driver_mtx);
4188 return EINVAL;
4189 }
4190 if (ifp->if_flags & IFF_UP) {
4191 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4192 err = mxge_open(sc);
4193 } else {
4194 /* take care of promis can allmulti
4195 flag chages */
4196 mxge_change_promisc(sc,
4197 ifp->if_flags & IFF_PROMISC);
4198 mxge_set_multicast_list(sc);
4199 }
4200 } else {
4201 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4202 mxge_close(sc, 0);
4203 }
4204 }
4205 mtx_unlock(&sc->driver_mtx);
4206 break;
4207
4208 case SIOCADDMULTI:
4209 case SIOCDELMULTI:
4210 mtx_lock(&sc->driver_mtx);
4211 mxge_set_multicast_list(sc);
4212 mtx_unlock(&sc->driver_mtx);
4213 break;
4214
4215 case SIOCSIFCAP:
4216 mtx_lock(&sc->driver_mtx);
4217 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
4218 if (mask & IFCAP_TXCSUM) {
4219 if (IFCAP_TXCSUM & ifp->if_capenable) {
4220 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
4221 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
4222 } else {
4223 ifp->if_capenable |= IFCAP_TXCSUM;
4224 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
4225 }
4226 } else if (mask & IFCAP_RXCSUM) {
4227 if (IFCAP_RXCSUM & ifp->if_capenable) {
4228 ifp->if_capenable &= ~IFCAP_RXCSUM;
4229 } else {
4230 ifp->if_capenable |= IFCAP_RXCSUM;
4231 }
4232 }
4233 if (mask & IFCAP_TSO4) {
4234 if (IFCAP_TSO4 & ifp->if_capenable) {
4235 ifp->if_capenable &= ~IFCAP_TSO4;
4236 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
4237 ifp->if_capenable |= IFCAP_TSO4;
4238 ifp->if_hwassist |= CSUM_TSO;
4239 } else {
4240 printf("mxge requires tx checksum offload"
4241 " be enabled to use TSO\n");
4242 err = EINVAL;
4243 }
4244 }
4245#if IFCAP_TSO6
4246 if (mask & IFCAP_TXCSUM_IPV6) {
4247 if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
4248 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6
4249 | IFCAP_TSO6);
4250 ifp->if_hwassist &= ~(CSUM_TCP_IPV6
4251 | CSUM_UDP);
4252 } else {
4253 ifp->if_capenable |= IFCAP_TXCSUM_IPV6;
4254 ifp->if_hwassist |= (CSUM_TCP_IPV6
4255 | CSUM_UDP_IPV6);
4256 }
4257 } else if (mask & IFCAP_RXCSUM_IPV6) {
4258 if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) {
4259 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6;
4260 } else {
4261 ifp->if_capenable |= IFCAP_RXCSUM_IPV6;
4262 }
4263 }
4264 if (mask & IFCAP_TSO6) {
4265 if (IFCAP_TSO6 & ifp->if_capenable) {
4266 ifp->if_capenable &= ~IFCAP_TSO6;
4267 } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) {
4268 ifp->if_capenable |= IFCAP_TSO6;
4269 ifp->if_hwassist |= CSUM_TSO;
4270 } else {
4271 printf("mxge requires tx checksum offload"
4272 " be enabled to use TSO\n");
4273 err = EINVAL;
4274 }
4275 }
4276#endif /*IFCAP_TSO6 */
4277
4278 if (mask & IFCAP_LRO)
4279 ifp->if_capenable ^= IFCAP_LRO;
4280 if (mask & IFCAP_VLAN_HWTAGGING)
4281 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
4282 if (mask & IFCAP_VLAN_HWTSO)
4283 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
4284
4285 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) ||
4286 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING))
4287 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO;
4288
4289 mtx_unlock(&sc->driver_mtx);
4290 VLAN_CAPABILITIES(ifp);
4291
4292 break;
4293
4294 case SIOCGIFMEDIA:
4295 mtx_lock(&sc->driver_mtx);
4296 mxge_media_probe(sc);
4297 mtx_unlock(&sc->driver_mtx);
4298 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
4299 &sc->media, command);
4300 break;
4301
4302 default:
4303 err = ENOTTY;
4304 }
4305 return err;
4306}
4307
4308static void
4309mxge_fetch_tunables(mxge_softc_t *sc)
4310{
4311
4312 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
4313 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
4314 &mxge_flow_control);
4315 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
4316 &mxge_intr_coal_delay);
4317 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4318 &mxge_nvidia_ecrc_enable);
4319 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4320 &mxge_force_firmware);
4321 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4322 &mxge_deassert_wait);
4323 TUNABLE_INT_FETCH("hw.mxge.verbose",
4324 &mxge_verbose);
4325 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
4326 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
4327 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
4328 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type);
4329 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
4330 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle);
4331
4332 if (bootverbose)
4333 mxge_verbose = 1;
4334 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
4335 mxge_intr_coal_delay = 30;
4336 if (mxge_ticks == 0)
4337 mxge_ticks = hz / 2;
4338 sc->pause = mxge_flow_control;
4339 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
4340 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
4341 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4342 }
4343 if (mxge_initial_mtu > ETHERMTU_JUMBO ||
4344 mxge_initial_mtu < ETHER_MIN_LEN)
4345 mxge_initial_mtu = ETHERMTU_JUMBO;
4346
4347 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE)
4348 mxge_throttle = MXGE_MAX_THROTTLE;
4349 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE)
4350 mxge_throttle = MXGE_MIN_THROTTLE;
4351 sc->throttle = mxge_throttle;
4352}
4353
4354
4355static void
4356mxge_free_slices(mxge_softc_t *sc)
4357{
4358 struct mxge_slice_state *ss;
4359 int i;
4360
4361
4362 if (sc->ss == NULL)
4363 return;
4364
4365 for (i = 0; i < sc->num_slices; i++) {
4366 ss = &sc->ss[i];
4367 if (ss->fw_stats != NULL) {
4368 mxge_dma_free(&ss->fw_stats_dma);
4369 ss->fw_stats = NULL;
4370#ifdef IFNET_BUF_RING
4371 if (ss->tx.br != NULL) {
4372 drbr_free(ss->tx.br, M_DEVBUF);
4373 ss->tx.br = NULL;
4374 }
4375#endif
4376 mtx_destroy(&ss->tx.mtx);
4377 }
4378 if (ss->rx_done.entry != NULL) {
4379 mxge_dma_free(&ss->rx_done.dma);
4380 ss->rx_done.entry = NULL;
4381 }
4382 }
4383 free(sc->ss, M_DEVBUF);
4384 sc->ss = NULL;
4385}
4386
4387static int
4388mxge_alloc_slices(mxge_softc_t *sc)
4389{
4390 mxge_cmd_t cmd;
4391 struct mxge_slice_state *ss;
4392 size_t bytes;
4393 int err, i, max_intr_slots;
4394
4395 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4396 if (err != 0) {
4397 device_printf(sc->dev, "Cannot determine rx ring size\n");
4398 return err;
4399 }
4400 sc->rx_ring_size = cmd.data0;
4401 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
4402
4403 bytes = sizeof (*sc->ss) * sc->num_slices;
4404 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
4405 if (sc->ss == NULL)
4406 return (ENOMEM);
4407 for (i = 0; i < sc->num_slices; i++) {
4408 ss = &sc->ss[i];
4409
4410 ss->sc = sc;
4411
4412 /* allocate per-slice rx interrupt queues */
4413
4414 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
4415 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
4416 if (err != 0)
4417 goto abort;
4418 ss->rx_done.entry = ss->rx_done.dma.addr;
4419 bzero(ss->rx_done.entry, bytes);
4420
4421 /*
4422 * allocate the per-slice firmware stats; stats
4423 * (including tx) are used used only on the first
4424 * slice for now
4425 */
4426#ifndef IFNET_BUF_RING
4427 if (i > 0)
4428 continue;
4429#endif
4430
4431 bytes = sizeof (*ss->fw_stats);
4432 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
4433 sizeof (*ss->fw_stats), 64);
4434 if (err != 0)
4435 goto abort;
4436 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
4437 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name),
4438 "%s:tx(%d)", device_get_nameunit(sc->dev), i);
4439 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF);
4440#ifdef IFNET_BUF_RING
4441 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
4442 &ss->tx.mtx);
4443#endif
4444 }
4445
4446 return (0);
4447
4448abort:
4449 mxge_free_slices(sc);
4450 return (ENOMEM);
4451}
4452
4453static void
4454mxge_slice_probe(mxge_softc_t *sc)
4455{
4456 mxge_cmd_t cmd;
4457 char *old_fw;
4458 int msix_cnt, status, max_intr_slots;
4459
4460 sc->num_slices = 1;
4461 /*
4462 * don't enable multiple slices if they are not enabled,
4463 * or if this is not an SMP system
4464 */
4465
4466 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
4467 return;
4468
4469 /* see how many MSI-X interrupts are available */
4470 msix_cnt = pci_msix_count(sc->dev);
4471 if (msix_cnt < 2)
4472 return;
4473
4474 /* now load the slice aware firmware see what it supports */
4475 old_fw = sc->fw_name;
4476 if (old_fw == mxge_fw_aligned)
4477 sc->fw_name = mxge_fw_rss_aligned;
4478 else
4479 sc->fw_name = mxge_fw_rss_unaligned;
4480 status = mxge_load_firmware(sc, 0);
4481 if (status != 0) {
4482 device_printf(sc->dev, "Falling back to a single slice\n");
4483 return;
4484 }
4485
4486 /* try to send a reset command to the card to see if it
4487 is alive */
4488 memset(&cmd, 0, sizeof (cmd));
4489 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4490 if (status != 0) {
4491 device_printf(sc->dev, "failed reset\n");
4492 goto abort_with_fw;
4493 }
4494
4495 /* get rx ring size */
4496 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4497 if (status != 0) {
4498 device_printf(sc->dev, "Cannot determine rx ring size\n");
4499 goto abort_with_fw;
4500 }
4501 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
4502
4503 /* tell it the size of the interrupt queues */
4504 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
4505 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4506 if (status != 0) {
4507 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4508 goto abort_with_fw;
4509 }
4510
4511 /* ask the maximum number of slices it supports */
4512 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4513 if (status != 0) {
4514 device_printf(sc->dev,
4515 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4516 goto abort_with_fw;
4517 }
4518 sc->num_slices = cmd.data0;
4519 if (sc->num_slices > msix_cnt)
4520 sc->num_slices = msix_cnt;
4521
4522 if (mxge_max_slices == -1) {
4523 /* cap to number of CPUs in system */
4524 if (sc->num_slices > mp_ncpus)
4525 sc->num_slices = mp_ncpus;
4526 } else {
4527 if (sc->num_slices > mxge_max_slices)
4528 sc->num_slices = mxge_max_slices;
4529 }
4530 /* make sure it is a power of two */
4531 while (sc->num_slices & (sc->num_slices - 1))
4532 sc->num_slices--;
4533
4534 if (mxge_verbose)
4535 device_printf(sc->dev, "using %d slices\n",
4536 sc->num_slices);
4537
4538 return;
4539
4540abort_with_fw:
4541 sc->fw_name = old_fw;
4542 (void) mxge_load_firmware(sc, 0);
4543}
4544
4545static int
4546mxge_add_msix_irqs(mxge_softc_t *sc)
4547{
4548 size_t bytes;
4549 int count, err, i, rid;
4550
4551 rid = PCIR_BAR(2);
4552 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4553 &rid, RF_ACTIVE);
4554
4555 if (sc->msix_table_res == NULL) {
4556 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
4557 return ENXIO;
4558 }
4559
4560 count = sc->num_slices;
4561 err = pci_alloc_msix(sc->dev, &count);
4562 if (err != 0) {
4563 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
4564 "err = %d \n", sc->num_slices, err);
4565 goto abort_with_msix_table;
4566 }
4567 if (count < sc->num_slices) {
4568 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
4569 count, sc->num_slices);
4570 device_printf(sc->dev,
4571 "Try setting hw.mxge.max_slices to %d\n",
4572 count);
4573 err = ENOSPC;
4574 goto abort_with_msix;
4575 }
4576 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
4577 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4578 if (sc->msix_irq_res == NULL) {
4579 err = ENOMEM;
4580 goto abort_with_msix;
4581 }
4582
4583 for (i = 0; i < sc->num_slices; i++) {
4584 rid = i + 1;
4585 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
4586 SYS_RES_IRQ,
4587 &rid, RF_ACTIVE);
4588 if (sc->msix_irq_res[i] == NULL) {
4589 device_printf(sc->dev, "couldn't allocate IRQ res"
4590 " for message %d\n", i);
4591 err = ENXIO;
4592 goto abort_with_res;
4593 }
4594 }
4595
4596 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
4597 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4598
4599 for (i = 0; i < sc->num_slices; i++) {
4600 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
4601 INTR_TYPE_NET | INTR_MPSAFE,
4602#if __FreeBSD_version > 700030
4603 NULL,
4604#endif
4605 mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
4606 if (err != 0) {
4607 device_printf(sc->dev, "couldn't setup intr for "
4608 "message %d\n", i);
4609 goto abort_with_intr;
4610 }
4611 bus_describe_intr(sc->dev, sc->msix_irq_res[i],
4612 sc->msix_ih[i], "s%d", i);
4613 }
4614
4615 if (mxge_verbose) {
4616 device_printf(sc->dev, "using %d msix IRQs:",
4617 sc->num_slices);
4618 for (i = 0; i < sc->num_slices; i++)
4619 printf(" %ld", rman_get_start(sc->msix_irq_res[i]));
4620 printf("\n");
4621 }
4622 return (0);
4623
4624abort_with_intr:
4625 for (i = 0; i < sc->num_slices; i++) {
4626 if (sc->msix_ih[i] != NULL) {
4627 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4628 sc->msix_ih[i]);
4629 sc->msix_ih[i] = NULL;
4630 }
4631 }
4632 free(sc->msix_ih, M_DEVBUF);
4633
4634
4635abort_with_res:
4636 for (i = 0; i < sc->num_slices; i++) {
4637 rid = i + 1;
4638 if (sc->msix_irq_res[i] != NULL)
4639 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4640 sc->msix_irq_res[i]);
4641 sc->msix_irq_res[i] = NULL;
4642 }
4643 free(sc->msix_irq_res, M_DEVBUF);
4644
4645
4646abort_with_msix:
4647 pci_release_msi(sc->dev);
4648
4649abort_with_msix_table:
4650 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4651 sc->msix_table_res);
4652
4653 return err;
4654}
4655
4656static int
4657mxge_add_single_irq(mxge_softc_t *sc)
4658{
4659 int count, err, rid;
4660
4661 count = pci_msi_count(sc->dev);
4662 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4663 rid = 1;
4664 } else {
4665 rid = 0;
4666 sc->legacy_irq = 1;
4667 }
4668 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0,
4669 1, RF_SHAREABLE | RF_ACTIVE);
4670 if (sc->irq_res == NULL) {
4671 device_printf(sc->dev, "could not alloc interrupt\n");
4672 return ENXIO;
4673 }
4674 if (mxge_verbose)
4675 device_printf(sc->dev, "using %s irq %ld\n",
4676 sc->legacy_irq ? "INTx" : "MSI",
4677 rman_get_start(sc->irq_res));
4678 err = bus_setup_intr(sc->dev, sc->irq_res,
4679 INTR_TYPE_NET | INTR_MPSAFE,
4680#if __FreeBSD_version > 700030
4681 NULL,
4682#endif
4683 mxge_intr, &sc->ss[0], &sc->ih);
4684 if (err != 0) {
4685 bus_release_resource(sc->dev, SYS_RES_IRQ,
4686 sc->legacy_irq ? 0 : 1, sc->irq_res);
4687 if (!sc->legacy_irq)
4688 pci_release_msi(sc->dev);
4689 }
4690 return err;
4691}
4692
4693static void
4694mxge_rem_msix_irqs(mxge_softc_t *sc)
4695{
4696 int i, rid;
4697
4698 for (i = 0; i < sc->num_slices; i++) {
4699 if (sc->msix_ih[i] != NULL) {
4700 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4701 sc->msix_ih[i]);
4702 sc->msix_ih[i] = NULL;
4703 }
4704 }
4705 free(sc->msix_ih, M_DEVBUF);
4706
4707 for (i = 0; i < sc->num_slices; i++) {
4708 rid = i + 1;
4709 if (sc->msix_irq_res[i] != NULL)
4710 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4711 sc->msix_irq_res[i]);
4712 sc->msix_irq_res[i] = NULL;
4713 }
4714 free(sc->msix_irq_res, M_DEVBUF);
4715
4716 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4717 sc->msix_table_res);
4718
4719 pci_release_msi(sc->dev);
4720 return;
4721}
4722
4723static void
4724mxge_rem_single_irq(mxge_softc_t *sc)
4725{
4726 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4727 bus_release_resource(sc->dev, SYS_RES_IRQ,
4728 sc->legacy_irq ? 0 : 1, sc->irq_res);
4729 if (!sc->legacy_irq)
4730 pci_release_msi(sc->dev);
4731}
4732
4733static void
4734mxge_rem_irq(mxge_softc_t *sc)
4735{
4736 if (sc->num_slices > 1)
4737 mxge_rem_msix_irqs(sc);
4738 else
4739 mxge_rem_single_irq(sc);
4740}
4741
4742static int
4743mxge_add_irq(mxge_softc_t *sc)
4744{
4745 int err;
4746
4747 if (sc->num_slices > 1)
4748 err = mxge_add_msix_irqs(sc);
4749 else
4750 err = mxge_add_single_irq(sc);
4751
4752 if (0 && err == 0 && sc->num_slices > 1) {
4753 mxge_rem_msix_irqs(sc);
4754 err = mxge_add_msix_irqs(sc);
4755 }
4756 return err;
4757}
4758
4759
4760static int
4761mxge_attach(device_t dev)
4762{
4763 mxge_cmd_t cmd;
4764 mxge_softc_t *sc = device_get_softc(dev);
4765 struct ifnet *ifp;
4766 int err, rid;
4767
4768 sc->dev = dev;
4769 mxge_fetch_tunables(sc);
4770
4771 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc);
4772 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK,
4773 taskqueue_thread_enqueue, &sc->tq);
4774 if (sc->tq == NULL) {
4775 err = ENOMEM;
4776 goto abort_with_nothing;
4777 }
4778
4779 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4780 1, /* alignment */
4781 0, /* boundary */
4782 BUS_SPACE_MAXADDR, /* low */
4783 BUS_SPACE_MAXADDR, /* high */
4784 NULL, NULL, /* filter */
4785 65536 + 256, /* maxsize */
4786 MXGE_MAX_SEND_DESC, /* num segs */
4787 65536, /* maxsegsize */
4788 0, /* flags */
4789 NULL, NULL, /* lock */
4790 &sc->parent_dmat); /* tag */
4791
4792 if (err != 0) {
4793 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4794 err);
4795 goto abort_with_tq;
4796 }
4797
4798 ifp = sc->ifp = if_alloc(IFT_ETHER);
4799 if (ifp == NULL) {
4800 device_printf(dev, "can not if_alloc()\n");
4801 err = ENOSPC;
4802 goto abort_with_parent_dmat;
4803 }
4804 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4805
4806 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd",
4807 device_get_nameunit(dev));
4808 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF);
4809 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name),
4810 "%s:drv", device_get_nameunit(dev));
4811 mtx_init(&sc->driver_mtx, sc->driver_mtx_name,
4812 MTX_NETWORK_LOCK, MTX_DEF);
4813
4814 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0);
4815
4816 mxge_setup_cfg_space(sc);
4817
4818 /* Map the board into the kernel */
4819 rid = PCIR_BARS;
4820 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0,
4821 ~0, 1, RF_ACTIVE);
4822 if (sc->mem_res == NULL) {
4823 device_printf(dev, "could not map memory\n");
4824 err = ENXIO;
4825 goto abort_with_lock;
4826 }
4827 sc->sram = rman_get_virtual(sc->mem_res);
4828 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4829 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4830 device_printf(dev, "impossible memory region size %ld\n",
4831 rman_get_size(sc->mem_res));
4832 err = ENXIO;
4833 goto abort_with_mem_res;
4834 }
4835
4836 /* make NULL terminated copy of the EEPROM strings section of
4837 lanai SRAM */
4838 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4839 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4840 rman_get_bushandle(sc->mem_res),
4841 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4842 sc->eeprom_strings,
4843 MXGE_EEPROM_STRINGS_SIZE - 2);
4844 err = mxge_parse_strings(sc);
4845 if (err != 0)
4846 goto abort_with_mem_res;
4847
4848 /* Enable write combining for efficient use of PCIe bus */
4849 mxge_enable_wc(sc);
4850
4851 /* Allocate the out of band dma memory */
4852 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4853 sizeof (mxge_cmd_t), 64);
4854 if (err != 0)
4855 goto abort_with_mem_res;
4856 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4857 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4858 if (err != 0)
4859 goto abort_with_cmd_dma;
4860
4861 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4862 if (err != 0)
4863 goto abort_with_zeropad_dma;
4864
4865 /* select & load the firmware */
4866 err = mxge_select_firmware(sc);
4867 if (err != 0)
4868 goto abort_with_dmabench;
4869 sc->intr_coal_delay = mxge_intr_coal_delay;
4870
4871 mxge_slice_probe(sc);
4872 err = mxge_alloc_slices(sc);
4873 if (err != 0)
4874 goto abort_with_dmabench;
4875
4876 err = mxge_reset(sc, 0);
4877 if (err != 0)
4878 goto abort_with_slices;
4879
4880 err = mxge_alloc_rings(sc);
4881 if (err != 0) {
4882 device_printf(sc->dev, "failed to allocate rings\n");
4883 goto abort_with_slices;
4884 }
4885
4886 err = mxge_add_irq(sc);
4887 if (err != 0) {
4888 device_printf(sc->dev, "failed to add irq\n");
4889 goto abort_with_rings;
4890 }
4891
4892 if_initbaudrate(ifp, IF_Gbps(10));
4893 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4894 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 |
4895 IFCAP_RXCSUM_IPV6;
4896#if defined(INET) || defined(INET6)
4897 ifp->if_capabilities |= IFCAP_LRO;
4898#endif
4899
4900#ifdef MXGE_NEW_VLAN_API
4901 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
4902
4903 /* Only FW 1.4.32 and newer can do TSO over vlans */
4904 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
4905 sc->fw_ver_tiny >= 32)
4906 ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
4907#endif
4908 sc->max_mtu = mxge_max_mtu(sc);
4909 if (sc->max_mtu >= 9000)
4910 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
4911 else
4912 device_printf(dev, "MTU limited to %d. Install "
4913 "latest firmware for 9000 byte jumbo support\n",
4914 sc->max_mtu - ETHER_HDR_LEN);
4915 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4916 ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6;
4917 /* check to see if f/w supports TSO for IPv6 */
4918 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) {
4919 if (CSUM_TCP_IPV6)
4920 ifp->if_capabilities |= IFCAP_TSO6;
4921 sc->max_tso6_hlen = min(cmd.data0,
4922 sizeof (sc->ss[0].scratch));
4923 }
4924 ifp->if_capenable = ifp->if_capabilities;
4925 if (sc->lro_cnt == 0)
4926 ifp->if_capenable &= ~IFCAP_LRO;
4927 ifp->if_init = mxge_init;
4928 ifp->if_softc = sc;
4929 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4930 ifp->if_ioctl = mxge_ioctl;
4931 ifp->if_start = mxge_start;
4932 /* Initialise the ifmedia structure */
4933 ifmedia_init(&sc->media, 0, mxge_media_change,
4934 mxge_media_status);
4935 mxge_media_init(sc);
4936 mxge_media_probe(sc);
4937 sc->dying = 0;
4938 ether_ifattach(ifp, sc->mac_addr);
4939 /* ether_ifattach sets mtu to ETHERMTU */
4940 if (mxge_initial_mtu != ETHERMTU)
4941 mxge_change_mtu(sc, mxge_initial_mtu);
4942
4943 mxge_add_sysctls(sc);
4944#ifdef IFNET_BUF_RING
4945 ifp->if_transmit = mxge_transmit;
4946 ifp->if_qflush = mxge_qflush;
4947#endif
4948 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
4949 device_get_nameunit(sc->dev));
4950 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
4951 return 0;
4952
4953abort_with_rings:
4954 mxge_free_rings(sc);
4955abort_with_slices:
4956 mxge_free_slices(sc);
4957abort_with_dmabench:
4958 mxge_dma_free(&sc->dmabench_dma);
4959abort_with_zeropad_dma:
4960 mxge_dma_free(&sc->zeropad_dma);
4961abort_with_cmd_dma:
4962 mxge_dma_free(&sc->cmd_dma);
4963abort_with_mem_res:
4964 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4965abort_with_lock:
4966 pci_disable_busmaster(dev);
4967 mtx_destroy(&sc->cmd_mtx);
4968 mtx_destroy(&sc->driver_mtx);
4969 if_free(ifp);
4970abort_with_parent_dmat:
4971 bus_dma_tag_destroy(sc->parent_dmat);
4972abort_with_tq:
4973 if (sc->tq != NULL) {
4974 taskqueue_drain(sc->tq, &sc->watchdog_task);
4975 taskqueue_free(sc->tq);
4976 sc->tq = NULL;
4977 }
4978abort_with_nothing:
4979 return err;
4980}
4981
4982static int
4983mxge_detach(device_t dev)
4984{
4985 mxge_softc_t *sc = device_get_softc(dev);
4986
4987 if (mxge_vlans_active(sc)) {
4988 device_printf(sc->dev,
4989 "Detach vlans before removing module\n");
4990 return EBUSY;
4991 }
4992 mtx_lock(&sc->driver_mtx);
4993 sc->dying = 1;
4994 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
4995 mxge_close(sc, 0);
4996 mtx_unlock(&sc->driver_mtx);
4997 ether_ifdetach(sc->ifp);
4998 if (sc->tq != NULL) {
4999 taskqueue_drain(sc->tq, &sc->watchdog_task);
5000 taskqueue_free(sc->tq);
5001 sc->tq = NULL;
5002 }
5003 callout_drain(&sc->co_hdl);
5004 ifmedia_removeall(&sc->media);
5005 mxge_dummy_rdma(sc, 0);
5006 mxge_rem_sysctls(sc);
5007 mxge_rem_irq(sc);
5008 mxge_free_rings(sc);
5009 mxge_free_slices(sc);
5010 mxge_dma_free(&sc->dmabench_dma);
5011 mxge_dma_free(&sc->zeropad_dma);
5012 mxge_dma_free(&sc->cmd_dma);
5013 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
5014 pci_disable_busmaster(dev);
5015 mtx_destroy(&sc->cmd_mtx);
5016 mtx_destroy(&sc->driver_mtx);
5017 if_free(sc->ifp);
5018 bus_dma_tag_destroy(sc->parent_dmat);
5019 return 0;
5020}
5021
5022static int
5023mxge_shutdown(device_t dev)
5024{
5025 return 0;
5026}
5027
5028/*
5029 This file uses Myri10GE driver indentation.
5030
5031 Local Variables:
5032 c-file-style:"linux"
5033 tab-width:8
5034 End:
5035*/