Deleted Added
full compact
if_mxge.c (202121) if_mxge.c (203834)
1/******************************************************************************
2
3Copyright (c) 2006-2009, Myricom Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
1/******************************************************************************
2
3Copyright (c) 2006-2009, Myricom Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/dev/mxge/if_mxge.c 202121 2010-01-11 22:28:40Z gallatin $");
31__FBSDID("$FreeBSD: head/sys/dev/mxge/if_mxge.c 203834 2010-02-13 16:04:58Z mlaier $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/linker.h>
36#include <sys/firmware.h>
37#include <sys/endian.h>
38#include <sys/sockio.h>
39#include <sys/mbuf.h>
40#include <sys/malloc.h>
41#include <sys/kdb.h>
42#include <sys/kernel.h>
43#include <sys/lock.h>
44#include <sys/module.h>
45#include <sys/socket.h>
46#include <sys/sysctl.h>
47#include <sys/sx.h>
48#include <sys/taskqueue.h>
49
50/* count xmits ourselves, rather than via drbr */
51#define NO_SLOW_STATS
52#include <net/if.h>
53#include <net/if_arp.h>
54#include <net/ethernet.h>
55#include <net/if_dl.h>
56#include <net/if_media.h>
57
58#include <net/bpf.h>
59
60#include <net/if_types.h>
61#include <net/if_vlan_var.h>
62#include <net/zlib.h>
63
64#include <netinet/in_systm.h>
65#include <netinet/in.h>
66#include <netinet/ip.h>
67#include <netinet/tcp.h>
68
69#include <machine/bus.h>
70#include <machine/in_cksum.h>
71#include <machine/resource.h>
72#include <sys/bus.h>
73#include <sys/rman.h>
74#include <sys/smp.h>
75
76#include <dev/pci/pcireg.h>
77#include <dev/pci/pcivar.h>
78#include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */
79
80#include <vm/vm.h> /* for pmap_mapdev() */
81#include <vm/pmap.h>
82
83#if defined(__i386) || defined(__amd64)
84#include <machine/specialreg.h>
85#endif
86
87#include <dev/mxge/mxge_mcp.h>
88#include <dev/mxge/mcp_gen_header.h>
89/*#define MXGE_FAKE_IFP*/
90#include <dev/mxge/if_mxge_var.h>
91#ifdef IFNET_BUF_RING
92#include <sys/buf_ring.h>
93#endif
94
95#include "opt_inet.h"
96
97/* tunable params */
98static int mxge_nvidia_ecrc_enable = 1;
99static int mxge_force_firmware = 0;
100static int mxge_intr_coal_delay = 30;
101static int mxge_deassert_wait = 1;
102static int mxge_flow_control = 1;
103static int mxge_verbose = 0;
104static int mxge_lro_cnt = 8;
105static int mxge_ticks;
106static int mxge_max_slices = 1;
107static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
108static int mxge_always_promisc = 0;
109static int mxge_initial_mtu = ETHERMTU_JUMBO;
110static int mxge_throttle = 0;
111static char *mxge_fw_unaligned = "mxge_ethp_z8e";
112static char *mxge_fw_aligned = "mxge_eth_z8e";
113static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
114static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
115
116static int mxge_probe(device_t dev);
117static int mxge_attach(device_t dev);
118static int mxge_detach(device_t dev);
119static int mxge_shutdown(device_t dev);
120static void mxge_intr(void *arg);
121
122static device_method_t mxge_methods[] =
123{
124 /* Device interface */
125 DEVMETHOD(device_probe, mxge_probe),
126 DEVMETHOD(device_attach, mxge_attach),
127 DEVMETHOD(device_detach, mxge_detach),
128 DEVMETHOD(device_shutdown, mxge_shutdown),
129 {0, 0}
130};
131
132static driver_t mxge_driver =
133{
134 "mxge",
135 mxge_methods,
136 sizeof(mxge_softc_t),
137};
138
139static devclass_t mxge_devclass;
140
141/* Declare ourselves to be a child of the PCI bus.*/
142DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
143MODULE_DEPEND(mxge, firmware, 1, 1, 1);
144MODULE_DEPEND(mxge, zlib, 1, 1, 1);
145
146static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
147static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
148static int mxge_close(mxge_softc_t *sc, int down);
149static int mxge_open(mxge_softc_t *sc);
150static void mxge_tick(void *arg);
151
152static int
153mxge_probe(device_t dev)
154{
155 int rev;
156
157
158 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
159 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
160 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
161 rev = pci_get_revid(dev);
162 switch (rev) {
163 case MXGE_PCI_REV_Z8E:
164 device_set_desc(dev, "Myri10G-PCIE-8A");
165 break;
166 case MXGE_PCI_REV_Z8ES:
167 device_set_desc(dev, "Myri10G-PCIE-8B");
168 break;
169 default:
170 device_set_desc(dev, "Myri10G-PCIE-8??");
171 device_printf(dev, "Unrecognized rev %d NIC\n",
172 rev);
173 break;
174 }
175 return 0;
176 }
177 return ENXIO;
178}
179
180static void
181mxge_enable_wc(mxge_softc_t *sc)
182{
183#if defined(__i386) || defined(__amd64)
184 vm_offset_t len;
185 int err;
186
187 sc->wc = 1;
188 len = rman_get_size(sc->mem_res);
189 err = pmap_change_attr((vm_offset_t) sc->sram,
190 len, PAT_WRITE_COMBINING);
191 if (err != 0) {
192 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
193 err);
194 sc->wc = 0;
195 }
196#endif
197}
198
199
200/* callback to get our DMA address */
201static void
202mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
203 int error)
204{
205 if (error == 0) {
206 *(bus_addr_t *) arg = segs->ds_addr;
207 }
208}
209
210static int
211mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
212 bus_size_t alignment)
213{
214 int err;
215 device_t dev = sc->dev;
216 bus_size_t boundary, maxsegsize;
217
218 if (bytes > 4096 && alignment == 4096) {
219 boundary = 0;
220 maxsegsize = bytes;
221 } else {
222 boundary = 4096;
223 maxsegsize = 4096;
224 }
225
226 /* allocate DMAable memory tags */
227 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
228 alignment, /* alignment */
229 boundary, /* boundary */
230 BUS_SPACE_MAXADDR, /* low */
231 BUS_SPACE_MAXADDR, /* high */
232 NULL, NULL, /* filter */
233 bytes, /* maxsize */
234 1, /* num segs */
235 maxsegsize, /* maxsegsize */
236 BUS_DMA_COHERENT, /* flags */
237 NULL, NULL, /* lock */
238 &dma->dmat); /* tag */
239 if (err != 0) {
240 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
241 return err;
242 }
243
244 /* allocate DMAable memory & map */
245 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
246 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
247 | BUS_DMA_ZERO), &dma->map);
248 if (err != 0) {
249 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
250 goto abort_with_dmat;
251 }
252
253 /* load the memory */
254 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
255 mxge_dmamap_callback,
256 (void *)&dma->bus_addr, 0);
257 if (err != 0) {
258 device_printf(dev, "couldn't load map (err = %d)\n", err);
259 goto abort_with_mem;
260 }
261 return 0;
262
263abort_with_mem:
264 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
265abort_with_dmat:
266 (void)bus_dma_tag_destroy(dma->dmat);
267 return err;
268}
269
270
271static void
272mxge_dma_free(mxge_dma_t *dma)
273{
274 bus_dmamap_unload(dma->dmat, dma->map);
275 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
276 (void)bus_dma_tag_destroy(dma->dmat);
277}
278
279/*
280 * The eeprom strings on the lanaiX have the format
281 * SN=x\0
282 * MAC=x:x:x:x:x:x\0
283 * PC=text\0
284 */
285
286static int
287mxge_parse_strings(mxge_softc_t *sc)
288{
289#define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++)
290
291 char *ptr, *limit;
292 int i, found_mac;
293
294 ptr = sc->eeprom_strings;
295 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE;
296 found_mac = 0;
297 while (ptr < limit && *ptr != '\0') {
298 if (memcmp(ptr, "MAC=", 4) == 0) {
299 ptr += 1;
300 sc->mac_addr_string = ptr;
301 for (i = 0; i < 6; i++) {
302 ptr += 3;
303 if ((ptr + 2) > limit)
304 goto abort;
305 sc->mac_addr[i] = strtoul(ptr, NULL, 16);
306 found_mac = 1;
307 }
308 } else if (memcmp(ptr, "PC=", 3) == 0) {
309 ptr += 3;
310 strncpy(sc->product_code_string, ptr,
311 sizeof (sc->product_code_string) - 1);
312 } else if (memcmp(ptr, "SN=", 3) == 0) {
313 ptr += 3;
314 strncpy(sc->serial_number_string, ptr,
315 sizeof (sc->serial_number_string) - 1);
316 }
317 MXGE_NEXT_STRING(ptr);
318 }
319
320 if (found_mac)
321 return 0;
322
323 abort:
324 device_printf(sc->dev, "failed to parse eeprom_strings\n");
325
326 return ENXIO;
327}
328
329#if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
330static void
331mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
332{
333 uint32_t val;
334 unsigned long base, off;
335 char *va, *cfgptr;
336 device_t pdev, mcp55;
337 uint16_t vendor_id, device_id, word;
338 uintptr_t bus, slot, func, ivend, idev;
339 uint32_t *ptr32;
340
341
342 if (!mxge_nvidia_ecrc_enable)
343 return;
344
345 pdev = device_get_parent(device_get_parent(sc->dev));
346 if (pdev == NULL) {
347 device_printf(sc->dev, "could not find parent?\n");
348 return;
349 }
350 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
351 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
352
353 if (vendor_id != 0x10de)
354 return;
355
356 base = 0;
357
358 if (device_id == 0x005d) {
359 /* ck804, base address is magic */
360 base = 0xe0000000UL;
361 } else if (device_id >= 0x0374 && device_id <= 0x378) {
362 /* mcp55, base address stored in chipset */
363 mcp55 = pci_find_bsf(0, 0, 0);
364 if (mcp55 &&
365 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
366 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
367 word = pci_read_config(mcp55, 0x90, 2);
368 base = ((unsigned long)word & 0x7ffeU) << 25;
369 }
370 }
371 if (!base)
372 return;
373
374 /* XXXX
375 Test below is commented because it is believed that doing
376 config read/write beyond 0xff will access the config space
377 for the next larger function. Uncomment this and remove
378 the hacky pmap_mapdev() way of accessing config space when
379 FreeBSD grows support for extended pcie config space access
380 */
381#if 0
382 /* See if we can, by some miracle, access the extended
383 config space */
384 val = pci_read_config(pdev, 0x178, 4);
385 if (val != 0xffffffff) {
386 val |= 0x40;
387 pci_write_config(pdev, 0x178, val, 4);
388 return;
389 }
390#endif
391 /* Rather than using normal pci config space writes, we must
392 * map the Nvidia config space ourselves. This is because on
393 * opteron/nvidia class machine the 0xe000000 mapping is
394 * handled by the nvidia chipset, that means the internal PCI
395 * device (the on-chip northbridge), or the amd-8131 bridge
396 * and things behind them are not visible by this method.
397 */
398
399 BUS_READ_IVAR(device_get_parent(pdev), pdev,
400 PCI_IVAR_BUS, &bus);
401 BUS_READ_IVAR(device_get_parent(pdev), pdev,
402 PCI_IVAR_SLOT, &slot);
403 BUS_READ_IVAR(device_get_parent(pdev), pdev,
404 PCI_IVAR_FUNCTION, &func);
405 BUS_READ_IVAR(device_get_parent(pdev), pdev,
406 PCI_IVAR_VENDOR, &ivend);
407 BUS_READ_IVAR(device_get_parent(pdev), pdev,
408 PCI_IVAR_DEVICE, &idev);
409
410 off = base
411 + 0x00100000UL * (unsigned long)bus
412 + 0x00001000UL * (unsigned long)(func
413 + 8 * slot);
414
415 /* map it into the kernel */
416 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
417
418
419 if (va == NULL) {
420 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
421 return;
422 }
423 /* get a pointer to the config space mapped into the kernel */
424 cfgptr = va + (off & PAGE_MASK);
425
426 /* make sure that we can really access it */
427 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
428 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
429 if (! (vendor_id == ivend && device_id == idev)) {
430 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
431 vendor_id, device_id);
432 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
433 return;
434 }
435
436 ptr32 = (uint32_t*)(cfgptr + 0x178);
437 val = *ptr32;
438
439 if (val == 0xffffffff) {
440 device_printf(sc->dev, "extended mapping failed\n");
441 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
442 return;
443 }
444 *ptr32 = val | 0x40;
445 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
446 if (mxge_verbose)
447 device_printf(sc->dev,
448 "Enabled ECRC on upstream Nvidia bridge "
449 "at %d:%d:%d\n",
450 (int)bus, (int)slot, (int)func);
451 return;
452}
453#else
454static void
455mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
456{
457 device_printf(sc->dev,
458 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
459 return;
460}
461#endif
462
463
464static int
465mxge_dma_test(mxge_softc_t *sc, int test_type)
466{
467 mxge_cmd_t cmd;
468 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
469 int status;
470 uint32_t len;
471 char *test = " ";
472
473
474 /* Run a small DMA test.
475 * The magic multipliers to the length tell the firmware
476 * to do DMA read, write, or read+write tests. The
477 * results are returned in cmd.data0. The upper 16
478 * bits of the return is the number of transfers completed.
479 * The lower 16 bits is the time in 0.5us ticks that the
480 * transfers took to complete.
481 */
482
483 len = sc->tx_boundary;
484
485 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
486 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
487 cmd.data2 = len * 0x10000;
488 status = mxge_send_cmd(sc, test_type, &cmd);
489 if (status != 0) {
490 test = "read";
491 goto abort;
492 }
493 sc->read_dma = ((cmd.data0>>16) * len * 2) /
494 (cmd.data0 & 0xffff);
495 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
496 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
497 cmd.data2 = len * 0x1;
498 status = mxge_send_cmd(sc, test_type, &cmd);
499 if (status != 0) {
500 test = "write";
501 goto abort;
502 }
503 sc->write_dma = ((cmd.data0>>16) * len * 2) /
504 (cmd.data0 & 0xffff);
505
506 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
507 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
508 cmd.data2 = len * 0x10001;
509 status = mxge_send_cmd(sc, test_type, &cmd);
510 if (status != 0) {
511 test = "read/write";
512 goto abort;
513 }
514 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
515 (cmd.data0 & 0xffff);
516
517abort:
518 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
519 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
520 test, status);
521
522 return status;
523}
524
525/*
526 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
527 * when the PCI-E Completion packets are aligned on an 8-byte
528 * boundary. Some PCI-E chip sets always align Completion packets; on
529 * the ones that do not, the alignment can be enforced by enabling
530 * ECRC generation (if supported).
531 *
532 * When PCI-E Completion packets are not aligned, it is actually more
533 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
534 *
535 * If the driver can neither enable ECRC nor verify that it has
536 * already been enabled, then it must use a firmware image which works
537 * around unaligned completion packets (ethp_z8e.dat), and it should
538 * also ensure that it never gives the device a Read-DMA which is
539 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
540 * enabled, then the driver should use the aligned (eth_z8e.dat)
541 * firmware image, and set tx_boundary to 4KB.
542 */
543
544static int
545mxge_firmware_probe(mxge_softc_t *sc)
546{
547 device_t dev = sc->dev;
548 int reg, status;
549 uint16_t pectl;
550
551 sc->tx_boundary = 4096;
552 /*
553 * Verify the max read request size was set to 4KB
554 * before trying the test with 4KB.
555 */
556 if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
557 pectl = pci_read_config(dev, reg + 0x8, 2);
558 if ((pectl & (5 << 12)) != (5 << 12)) {
559 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
560 pectl);
561 sc->tx_boundary = 2048;
562 }
563 }
564
565 /*
566 * load the optimized firmware (which assumes aligned PCIe
567 * completions) in order to see if it works on this host.
568 */
569 sc->fw_name = mxge_fw_aligned;
570 status = mxge_load_firmware(sc, 1);
571 if (status != 0) {
572 return status;
573 }
574
575 /*
576 * Enable ECRC if possible
577 */
578 mxge_enable_nvidia_ecrc(sc);
579
580 /*
581 * Run a DMA test which watches for unaligned completions and
582 * aborts on the first one seen.
583 */
584
585 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
586 if (status == 0)
587 return 0; /* keep the aligned firmware */
588
589 if (status != E2BIG)
590 device_printf(dev, "DMA test failed: %d\n", status);
591 if (status == ENOSYS)
592 device_printf(dev, "Falling back to ethp! "
593 "Please install up to date fw\n");
594 return status;
595}
596
597static int
598mxge_select_firmware(mxge_softc_t *sc)
599{
600 int aligned = 0;
601 int force_firmware = mxge_force_firmware;
602
603 if (sc->throttle)
604 force_firmware = sc->throttle;
605
606 if (force_firmware != 0) {
607 if (force_firmware == 1)
608 aligned = 1;
609 else
610 aligned = 0;
611 if (mxge_verbose)
612 device_printf(sc->dev,
613 "Assuming %s completions (forced)\n",
614 aligned ? "aligned" : "unaligned");
615 goto abort;
616 }
617
618 /* if the PCIe link width is 4 or less, we can use the aligned
619 firmware and skip any checks */
620 if (sc->link_width != 0 && sc->link_width <= 4) {
621 device_printf(sc->dev,
622 "PCIe x%d Link, expect reduced performance\n",
623 sc->link_width);
624 aligned = 1;
625 goto abort;
626 }
627
628 if (0 == mxge_firmware_probe(sc))
629 return 0;
630
631abort:
632 if (aligned) {
633 sc->fw_name = mxge_fw_aligned;
634 sc->tx_boundary = 4096;
635 } else {
636 sc->fw_name = mxge_fw_unaligned;
637 sc->tx_boundary = 2048;
638 }
639 return (mxge_load_firmware(sc, 0));
640}
641
642union qualhack
643{
644 const char *ro_char;
645 char *rw_char;
646};
647
648static int
649mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
650{
651
652
653 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
654 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
655 be32toh(hdr->mcp_type));
656 return EIO;
657 }
658
659 /* save firmware version for sysctl */
660 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version));
661 if (mxge_verbose)
662 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
663
664 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
665 &sc->fw_ver_minor, &sc->fw_ver_tiny);
666
667 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
668 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
669 device_printf(sc->dev, "Found firmware version %s\n",
670 sc->fw_version);
671 device_printf(sc->dev, "Driver needs %d.%d\n",
672 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
673 return EINVAL;
674 }
675 return 0;
676
677}
678
679static void *
680z_alloc(void *nil, u_int items, u_int size)
681{
682 void *ptr;
683
684 ptr = malloc(items * size, M_TEMP, M_NOWAIT);
685 return ptr;
686}
687
688static void
689z_free(void *nil, void *ptr)
690{
691 free(ptr, M_TEMP);
692}
693
694
695static int
696mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
697{
698 z_stream zs;
699 char *inflate_buffer;
700 const struct firmware *fw;
701 const mcp_gen_header_t *hdr;
702 unsigned hdr_offset;
703 int status;
704 unsigned int i;
705 char dummy;
706 size_t fw_len;
707
708 fw = firmware_get(sc->fw_name);
709 if (fw == NULL) {
710 device_printf(sc->dev, "Could not find firmware image %s\n",
711 sc->fw_name);
712 return ENOENT;
713 }
714
715
716
717 /* setup zlib and decompress f/w */
718 bzero(&zs, sizeof (zs));
719 zs.zalloc = z_alloc;
720 zs.zfree = z_free;
721 status = inflateInit(&zs);
722 if (status != Z_OK) {
723 status = EIO;
724 goto abort_with_fw;
725 }
726
727 /* the uncompressed size is stored as the firmware version,
728 which would otherwise go unused */
729 fw_len = (size_t) fw->version;
730 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT);
731 if (inflate_buffer == NULL)
732 goto abort_with_zs;
733 zs.avail_in = fw->datasize;
734 zs.next_in = __DECONST(char *, fw->data);
735 zs.avail_out = fw_len;
736 zs.next_out = inflate_buffer;
737 status = inflate(&zs, Z_FINISH);
738 if (status != Z_STREAM_END) {
739 device_printf(sc->dev, "zlib %d\n", status);
740 status = EIO;
741 goto abort_with_buffer;
742 }
743
744 /* check id */
745 hdr_offset = htobe32(*(const uint32_t *)
746 (inflate_buffer + MCP_HEADER_PTR_OFFSET));
747 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
748 device_printf(sc->dev, "Bad firmware file");
749 status = EIO;
750 goto abort_with_buffer;
751 }
752 hdr = (const void*)(inflate_buffer + hdr_offset);
753
754 status = mxge_validate_firmware(sc, hdr);
755 if (status != 0)
756 goto abort_with_buffer;
757
758 /* Copy the inflated firmware to NIC SRAM. */
759 for (i = 0; i < fw_len; i += 256) {
760 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
761 inflate_buffer + i,
762 min(256U, (unsigned)(fw_len - i)));
763 wmb();
764 dummy = *sc->sram;
765 wmb();
766 }
767
768 *limit = fw_len;
769 status = 0;
770abort_with_buffer:
771 free(inflate_buffer, M_TEMP);
772abort_with_zs:
773 inflateEnd(&zs);
774abort_with_fw:
775 firmware_put(fw, FIRMWARE_UNLOAD);
776 return status;
777}
778
779/*
780 * Enable or disable periodic RDMAs from the host to make certain
781 * chipsets resend dropped PCIe messages
782 */
783
784static void
785mxge_dummy_rdma(mxge_softc_t *sc, int enable)
786{
787 char buf_bytes[72];
788 volatile uint32_t *confirm;
789 volatile char *submit;
790 uint32_t *buf, dma_low, dma_high;
791 int i;
792
793 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
794
795 /* clear confirmation addr */
796 confirm = (volatile uint32_t *)sc->cmd;
797 *confirm = 0;
798 wmb();
799
800 /* send an rdma command to the PCIe engine, and wait for the
801 response in the confirmation address. The firmware should
802 write a -1 there to indicate it is alive and well
803 */
804
805 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
806 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
807 buf[0] = htobe32(dma_high); /* confirm addr MSW */
808 buf[1] = htobe32(dma_low); /* confirm addr LSW */
809 buf[2] = htobe32(0xffffffff); /* confirm data */
810 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
811 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
812 buf[3] = htobe32(dma_high); /* dummy addr MSW */
813 buf[4] = htobe32(dma_low); /* dummy addr LSW */
814 buf[5] = htobe32(enable); /* enable? */
815
816
817 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
818
819 mxge_pio_copy(submit, buf, 64);
820 wmb();
821 DELAY(1000);
822 wmb();
823 i = 0;
824 while (*confirm != 0xffffffff && i < 20) {
825 DELAY(1000);
826 i++;
827 }
828 if (*confirm != 0xffffffff) {
829 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
830 (enable ? "enable" : "disable"), confirm,
831 *confirm);
832 }
833 return;
834}
835
836static int
837mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
838{
839 mcp_cmd_t *buf;
840 char buf_bytes[sizeof(*buf) + 8];
841 volatile mcp_cmd_response_t *response = sc->cmd;
842 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
843 uint32_t dma_low, dma_high;
844 int err, sleep_total = 0;
845
846 /* ensure buf is aligned to 8 bytes */
847 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
848
849 buf->data0 = htobe32(data->data0);
850 buf->data1 = htobe32(data->data1);
851 buf->data2 = htobe32(data->data2);
852 buf->cmd = htobe32(cmd);
853 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
854 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
855
856 buf->response_addr.low = htobe32(dma_low);
857 buf->response_addr.high = htobe32(dma_high);
858 mtx_lock(&sc->cmd_mtx);
859 response->result = 0xffffffff;
860 wmb();
861 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
862
863 /* wait up to 20ms */
864 err = EAGAIN;
865 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
866 bus_dmamap_sync(sc->cmd_dma.dmat,
867 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
868 wmb();
869 switch (be32toh(response->result)) {
870 case 0:
871 data->data0 = be32toh(response->data);
872 err = 0;
873 break;
874 case 0xffffffff:
875 DELAY(1000);
876 break;
877 case MXGEFW_CMD_UNKNOWN:
878 err = ENOSYS;
879 break;
880 case MXGEFW_CMD_ERROR_UNALIGNED:
881 err = E2BIG;
882 break;
883 case MXGEFW_CMD_ERROR_BUSY:
884 err = EBUSY;
885 break;
886 default:
887 device_printf(sc->dev,
888 "mxge: command %d "
889 "failed, result = %d\n",
890 cmd, be32toh(response->result));
891 err = ENXIO;
892 break;
893 }
894 if (err != EAGAIN)
895 break;
896 }
897 if (err == EAGAIN)
898 device_printf(sc->dev, "mxge: command %d timed out"
899 "result = %d\n",
900 cmd, be32toh(response->result));
901 mtx_unlock(&sc->cmd_mtx);
902 return err;
903}
904
905static int
906mxge_adopt_running_firmware(mxge_softc_t *sc)
907{
908 struct mcp_gen_header *hdr;
909 const size_t bytes = sizeof (struct mcp_gen_header);
910 size_t hdr_offset;
911 int status;
912
913 /* find running firmware header */
914 hdr_offset = htobe32(*(volatile uint32_t *)
915 (sc->sram + MCP_HEADER_PTR_OFFSET));
916
917 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
918 device_printf(sc->dev,
919 "Running firmware has bad header offset (%d)\n",
920 (int)hdr_offset);
921 return EIO;
922 }
923
924 /* copy header of running firmware from SRAM to host memory to
925 * validate firmware */
926 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
927 if (hdr == NULL) {
928 device_printf(sc->dev, "could not malloc firmware hdr\n");
929 return ENOMEM;
930 }
931 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
932 rman_get_bushandle(sc->mem_res),
933 hdr_offset, (char *)hdr, bytes);
934 status = mxge_validate_firmware(sc, hdr);
935 free(hdr, M_DEVBUF);
936
937 /*
938 * check to see if adopted firmware has bug where adopting
939 * it will cause broadcasts to be filtered unless the NIC
940 * is kept in ALLMULTI mode
941 */
942 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
943 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
944 sc->adopted_rx_filter_bug = 1;
945 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
946 "working around rx filter bug\n",
947 sc->fw_ver_major, sc->fw_ver_minor,
948 sc->fw_ver_tiny);
949 }
950
951 return status;
952}
953
954
955static int
956mxge_load_firmware(mxge_softc_t *sc, int adopt)
957{
958 volatile uint32_t *confirm;
959 volatile char *submit;
960 char buf_bytes[72];
961 uint32_t *buf, size, dma_low, dma_high;
962 int status, i;
963
964 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
965
966 size = sc->sram_size;
967 status = mxge_load_firmware_helper(sc, &size);
968 if (status) {
969 if (!adopt)
970 return status;
971 /* Try to use the currently running firmware, if
972 it is new enough */
973 status = mxge_adopt_running_firmware(sc);
974 if (status) {
975 device_printf(sc->dev,
976 "failed to adopt running firmware\n");
977 return status;
978 }
979 device_printf(sc->dev,
980 "Successfully adopted running firmware\n");
981 if (sc->tx_boundary == 4096) {
982 device_printf(sc->dev,
983 "Using firmware currently running on NIC"
984 ". For optimal\n");
985 device_printf(sc->dev,
986 "performance consider loading optimized "
987 "firmware\n");
988 }
989 sc->fw_name = mxge_fw_unaligned;
990 sc->tx_boundary = 2048;
991 return 0;
992 }
993 /* clear confirmation addr */
994 confirm = (volatile uint32_t *)sc->cmd;
995 *confirm = 0;
996 wmb();
997 /* send a reload command to the bootstrap MCP, and wait for the
998 response in the confirmation address. The firmware should
999 write a -1 there to indicate it is alive and well
1000 */
1001
1002 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
1003 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
1004
1005 buf[0] = htobe32(dma_high); /* confirm addr MSW */
1006 buf[1] = htobe32(dma_low); /* confirm addr LSW */
1007 buf[2] = htobe32(0xffffffff); /* confirm data */
1008
1009 /* FIX: All newest firmware should un-protect the bottom of
1010 the sram before handoff. However, the very first interfaces
1011 do not. Therefore the handoff copy must skip the first 8 bytes
1012 */
1013 /* where the code starts*/
1014 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
1015 buf[4] = htobe32(size - 8); /* length of code */
1016 buf[5] = htobe32(8); /* where to copy to */
1017 buf[6] = htobe32(0); /* where to jump to */
1018
1019 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1020 mxge_pio_copy(submit, buf, 64);
1021 wmb();
1022 DELAY(1000);
1023 wmb();
1024 i = 0;
1025 while (*confirm != 0xffffffff && i < 20) {
1026 DELAY(1000*10);
1027 i++;
1028 bus_dmamap_sync(sc->cmd_dma.dmat,
1029 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1030 }
1031 if (*confirm != 0xffffffff) {
1032 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1033 confirm, *confirm);
1034
1035 return ENXIO;
1036 }
1037 return 0;
1038}
1039
1040static int
1041mxge_update_mac_address(mxge_softc_t *sc)
1042{
1043 mxge_cmd_t cmd;
1044 uint8_t *addr = sc->mac_addr;
1045 int status;
1046
1047
1048 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1049 | (addr[2] << 8) | addr[3]);
1050
1051 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1052
1053 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1054 return status;
1055}
1056
1057static int
1058mxge_change_pause(mxge_softc_t *sc, int pause)
1059{
1060 mxge_cmd_t cmd;
1061 int status;
1062
1063 if (pause)
1064 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1065 &cmd);
1066 else
1067 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1068 &cmd);
1069
1070 if (status) {
1071 device_printf(sc->dev, "Failed to set flow control mode\n");
1072 return ENXIO;
1073 }
1074 sc->pause = pause;
1075 return 0;
1076}
1077
1078static void
1079mxge_change_promisc(mxge_softc_t *sc, int promisc)
1080{
1081 mxge_cmd_t cmd;
1082 int status;
1083
1084 if (mxge_always_promisc)
1085 promisc = 1;
1086
1087 if (promisc)
1088 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1089 &cmd);
1090 else
1091 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1092 &cmd);
1093
1094 if (status) {
1095 device_printf(sc->dev, "Failed to set promisc mode\n");
1096 }
1097}
1098
1099static void
1100mxge_set_multicast_list(mxge_softc_t *sc)
1101{
1102 mxge_cmd_t cmd;
1103 struct ifmultiaddr *ifma;
1104 struct ifnet *ifp = sc->ifp;
1105 int err;
1106
1107 /* This firmware is known to not support multicast */
1108 if (!sc->fw_multicast_support)
1109 return;
1110
1111 /* Disable multicast filtering while we play with the lists*/
1112 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1113 if (err != 0) {
1114 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1115 " error status: %d\n", err);
1116 return;
1117 }
1118
1119 if (sc->adopted_rx_filter_bug)
1120 return;
1121
1122 if (ifp->if_flags & IFF_ALLMULTI)
1123 /* request to disable multicast filtering, so quit here */
1124 return;
1125
1126 /* Flush all the filters */
1127
1128 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1129 if (err != 0) {
1130 device_printf(sc->dev,
1131 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1132 ", error status: %d\n", err);
1133 return;
1134 }
1135
1136 /* Walk the multicast list, and add each address */
1137
1138 if_maddr_rlock(ifp);
1139 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1140 if (ifma->ifma_addr->sa_family != AF_LINK)
1141 continue;
1142 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1143 &cmd.data0, 4);
1144 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1145 &cmd.data1, 2);
1146 cmd.data0 = htonl(cmd.data0);
1147 cmd.data1 = htonl(cmd.data1);
1148 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1149 if (err != 0) {
1150 device_printf(sc->dev, "Failed "
1151 "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1152 "%d\t", err);
1153 /* abort, leaving multicast filtering off */
1154 if_maddr_runlock(ifp);
1155 return;
1156 }
1157 }
1158 if_maddr_runlock(ifp);
1159 /* Enable multicast filtering */
1160 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1161 if (err != 0) {
1162 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1163 ", error status: %d\n", err);
1164 }
1165}
1166
1167static int
1168mxge_max_mtu(mxge_softc_t *sc)
1169{
1170 mxge_cmd_t cmd;
1171 int status;
1172
1173 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1174 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1175
1176 /* try to set nbufs to see if it we can
1177 use virtually contiguous jumbos */
1178 cmd.data0 = 0;
1179 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1180 &cmd);
1181 if (status == 0)
1182 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1183
1184 /* otherwise, we're limited to MJUMPAGESIZE */
1185 return MJUMPAGESIZE - MXGEFW_PAD;
1186}
1187
1188static int
1189mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1190{
1191 struct mxge_slice_state *ss;
1192 mxge_rx_done_t *rx_done;
1193 volatile uint32_t *irq_claim;
1194 mxge_cmd_t cmd;
1195 int slice, status;
1196
1197 /* try to send a reset command to the card to see if it
1198 is alive */
1199 memset(&cmd, 0, sizeof (cmd));
1200 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1201 if (status != 0) {
1202 device_printf(sc->dev, "failed reset\n");
1203 return ENXIO;
1204 }
1205
1206 mxge_dummy_rdma(sc, 1);
1207
1208
1209 /* set the intrq size */
1210 cmd.data0 = sc->rx_ring_size;
1211 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1212
1213 /*
1214 * Even though we already know how many slices are supported
1215 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1216 * has magic side effects, and must be called after a reset.
1217 * It must be called prior to calling any RSS related cmds,
1218 * including assigning an interrupt queue for anything but
1219 * slice 0. It must also be called *after*
1220 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1221 * the firmware to compute offsets.
1222 */
1223
1224 if (sc->num_slices > 1) {
1225 /* ask the maximum number of slices it supports */
1226 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1227 &cmd);
1228 if (status != 0) {
1229 device_printf(sc->dev,
1230 "failed to get number of slices\n");
1231 return status;
1232 }
1233 /*
1234 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1235 * to setting up the interrupt queue DMA
1236 */
1237 cmd.data0 = sc->num_slices;
1238 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1239#ifdef IFNET_BUF_RING
1240 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1241#endif
1242 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1243 &cmd);
1244 if (status != 0) {
1245 device_printf(sc->dev,
1246 "failed to set number of slices\n");
1247 return status;
1248 }
1249 }
1250
1251
1252 if (interrupts_setup) {
1253 /* Now exchange information about interrupts */
1254 for (slice = 0; slice < sc->num_slices; slice++) {
1255 rx_done = &sc->ss[slice].rx_done;
1256 memset(rx_done->entry, 0, sc->rx_ring_size);
1257 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1258 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1259 cmd.data2 = slice;
1260 status |= mxge_send_cmd(sc,
1261 MXGEFW_CMD_SET_INTRQ_DMA,
1262 &cmd);
1263 }
1264 }
1265
1266 status |= mxge_send_cmd(sc,
1267 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1268
1269
1270 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1271
1272 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1273 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1274
1275
1276 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1277 &cmd);
1278 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1279 if (status != 0) {
1280 device_printf(sc->dev, "failed set interrupt parameters\n");
1281 return status;
1282 }
1283
1284
1285 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1286
1287
1288 /* run a DMA benchmark */
1289 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1290
1291 for (slice = 0; slice < sc->num_slices; slice++) {
1292 ss = &sc->ss[slice];
1293
1294 ss->irq_claim = irq_claim + (2 * slice);
1295 /* reset mcp/driver shared state back to 0 */
1296 ss->rx_done.idx = 0;
1297 ss->rx_done.cnt = 0;
1298 ss->tx.req = 0;
1299 ss->tx.done = 0;
1300 ss->tx.pkt_done = 0;
1301 ss->tx.queue_active = 0;
1302 ss->tx.activate = 0;
1303 ss->tx.deactivate = 0;
1304 ss->tx.wake = 0;
1305 ss->tx.defrag = 0;
1306 ss->tx.stall = 0;
1307 ss->rx_big.cnt = 0;
1308 ss->rx_small.cnt = 0;
1309 ss->lro_bad_csum = 0;
1310 ss->lro_queued = 0;
1311 ss->lro_flushed = 0;
1312 if (ss->fw_stats != NULL) {
1313 bzero(ss->fw_stats, sizeof *ss->fw_stats);
1314 }
1315 }
1316 sc->rdma_tags_available = 15;
1317 status = mxge_update_mac_address(sc);
1318 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1319 mxge_change_pause(sc, sc->pause);
1320 mxge_set_multicast_list(sc);
1321 if (sc->throttle) {
1322 cmd.data0 = sc->throttle;
1323 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR,
1324 &cmd)) {
1325 device_printf(sc->dev,
1326 "can't enable throttle\n");
1327 }
1328 }
1329 return status;
1330}
1331
1332static int
1333mxge_change_throttle(SYSCTL_HANDLER_ARGS)
1334{
1335 mxge_cmd_t cmd;
1336 mxge_softc_t *sc;
1337 int err;
1338 unsigned int throttle;
1339
1340 sc = arg1;
1341 throttle = sc->throttle;
1342 err = sysctl_handle_int(oidp, &throttle, arg2, req);
1343 if (err != 0) {
1344 return err;
1345 }
1346
1347 if (throttle == sc->throttle)
1348 return 0;
1349
1350 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE)
1351 return EINVAL;
1352
1353 mtx_lock(&sc->driver_mtx);
1354 cmd.data0 = throttle;
1355 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd);
1356 if (err == 0)
1357 sc->throttle = throttle;
1358 mtx_unlock(&sc->driver_mtx);
1359 return err;
1360}
1361
1362static int
1363mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1364{
1365 mxge_softc_t *sc;
1366 unsigned int intr_coal_delay;
1367 int err;
1368
1369 sc = arg1;
1370 intr_coal_delay = sc->intr_coal_delay;
1371 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1372 if (err != 0) {
1373 return err;
1374 }
1375 if (intr_coal_delay == sc->intr_coal_delay)
1376 return 0;
1377
1378 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1379 return EINVAL;
1380
1381 mtx_lock(&sc->driver_mtx);
1382 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1383 sc->intr_coal_delay = intr_coal_delay;
1384
1385 mtx_unlock(&sc->driver_mtx);
1386 return err;
1387}
1388
1389static int
1390mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1391{
1392 mxge_softc_t *sc;
1393 unsigned int enabled;
1394 int err;
1395
1396 sc = arg1;
1397 enabled = sc->pause;
1398 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1399 if (err != 0) {
1400 return err;
1401 }
1402 if (enabled == sc->pause)
1403 return 0;
1404
1405 mtx_lock(&sc->driver_mtx);
1406 err = mxge_change_pause(sc, enabled);
1407 mtx_unlock(&sc->driver_mtx);
1408 return err;
1409}
1410
1411static int
1412mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt)
1413{
1414 struct ifnet *ifp;
1415 int err = 0;
1416
1417 ifp = sc->ifp;
1418 if (lro_cnt == 0)
1419 ifp->if_capenable &= ~IFCAP_LRO;
1420 else
1421 ifp->if_capenable |= IFCAP_LRO;
1422 sc->lro_cnt = lro_cnt;
1423 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1424 mxge_close(sc, 0);
1425 err = mxge_open(sc);
1426 }
1427 return err;
1428}
1429
1430static int
1431mxge_change_lro(SYSCTL_HANDLER_ARGS)
1432{
1433 mxge_softc_t *sc;
1434 unsigned int lro_cnt;
1435 int err;
1436
1437 sc = arg1;
1438 lro_cnt = sc->lro_cnt;
1439 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req);
1440 if (err != 0)
1441 return err;
1442
1443 if (lro_cnt == sc->lro_cnt)
1444 return 0;
1445
1446 if (lro_cnt > 128)
1447 return EINVAL;
1448
1449 mtx_lock(&sc->driver_mtx);
1450 err = mxge_change_lro_locked(sc, lro_cnt);
1451 mtx_unlock(&sc->driver_mtx);
1452 return err;
1453}
1454
1455static int
1456mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1457{
1458 int err;
1459
1460 if (arg1 == NULL)
1461 return EFAULT;
1462 arg2 = be32toh(*(int *)arg1);
1463 arg1 = NULL;
1464 err = sysctl_handle_int(oidp, arg1, arg2, req);
1465
1466 return err;
1467}
1468
1469static void
1470mxge_rem_sysctls(mxge_softc_t *sc)
1471{
1472 struct mxge_slice_state *ss;
1473 int slice;
1474
1475 if (sc->slice_sysctl_tree == NULL)
1476 return;
1477
1478 for (slice = 0; slice < sc->num_slices; slice++) {
1479 ss = &sc->ss[slice];
1480 if (ss == NULL || ss->sysctl_tree == NULL)
1481 continue;
1482 sysctl_ctx_free(&ss->sysctl_ctx);
1483 ss->sysctl_tree = NULL;
1484 }
1485 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1486 sc->slice_sysctl_tree = NULL;
1487}
1488
1489static void
1490mxge_add_sysctls(mxge_softc_t *sc)
1491{
1492 struct sysctl_ctx_list *ctx;
1493 struct sysctl_oid_list *children;
1494 mcp_irq_data_t *fw;
1495 struct mxge_slice_state *ss;
1496 int slice;
1497 char slice_num[8];
1498
1499 ctx = device_get_sysctl_ctx(sc->dev);
1500 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1501 fw = sc->ss[0].fw_stats;
1502
1503 /* random information */
1504 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1505 "firmware_version",
1506 CTLFLAG_RD, &sc->fw_version,
1507 0, "firmware version");
1508 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1509 "serial_number",
1510 CTLFLAG_RD, &sc->serial_number_string,
1511 0, "serial number");
1512 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1513 "product_code",
1514 CTLFLAG_RD, &sc->product_code_string,
1515 0, "product_code");
1516 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1517 "pcie_link_width",
1518 CTLFLAG_RD, &sc->link_width,
1519 0, "tx_boundary");
1520 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1521 "tx_boundary",
1522 CTLFLAG_RD, &sc->tx_boundary,
1523 0, "tx_boundary");
1524 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1525 "write_combine",
1526 CTLFLAG_RD, &sc->wc,
1527 0, "write combining PIO?");
1528 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1529 "read_dma_MBs",
1530 CTLFLAG_RD, &sc->read_dma,
1531 0, "DMA Read speed in MB/s");
1532 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1533 "write_dma_MBs",
1534 CTLFLAG_RD, &sc->write_dma,
1535 0, "DMA Write speed in MB/s");
1536 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1537 "read_write_dma_MBs",
1538 CTLFLAG_RD, &sc->read_write_dma,
1539 0, "DMA concurrent Read/Write speed in MB/s");
1540 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1541 "watchdog_resets",
1542 CTLFLAG_RD, &sc->watchdog_resets,
1543 0, "Number of times NIC was reset");
1544
1545
1546 /* performance related tunables */
1547 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1548 "intr_coal_delay",
1549 CTLTYPE_INT|CTLFLAG_RW, sc,
1550 0, mxge_change_intr_coal,
1551 "I", "interrupt coalescing delay in usecs");
1552
1553 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1554 "throttle",
1555 CTLTYPE_INT|CTLFLAG_RW, sc,
1556 0, mxge_change_throttle,
1557 "I", "transmit throttling");
1558
1559 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1560 "flow_control_enabled",
1561 CTLTYPE_INT|CTLFLAG_RW, sc,
1562 0, mxge_change_flow_control,
1563 "I", "interrupt coalescing delay in usecs");
1564
1565 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1566 "deassert_wait",
1567 CTLFLAG_RW, &mxge_deassert_wait,
1568 0, "Wait for IRQ line to go low in ihandler");
1569
1570 /* stats block from firmware is in network byte order.
1571 Need to swap it */
1572 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1573 "link_up",
1574 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1575 0, mxge_handle_be32,
1576 "I", "link up");
1577 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1578 "rdma_tags_available",
1579 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1580 0, mxge_handle_be32,
1581 "I", "rdma_tags_available");
1582 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1583 "dropped_bad_crc32",
1584 CTLTYPE_INT|CTLFLAG_RD,
1585 &fw->dropped_bad_crc32,
1586 0, mxge_handle_be32,
1587 "I", "dropped_bad_crc32");
1588 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1589 "dropped_bad_phy",
1590 CTLTYPE_INT|CTLFLAG_RD,
1591 &fw->dropped_bad_phy,
1592 0, mxge_handle_be32,
1593 "I", "dropped_bad_phy");
1594 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1595 "dropped_link_error_or_filtered",
1596 CTLTYPE_INT|CTLFLAG_RD,
1597 &fw->dropped_link_error_or_filtered,
1598 0, mxge_handle_be32,
1599 "I", "dropped_link_error_or_filtered");
1600 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1601 "dropped_link_overflow",
1602 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1603 0, mxge_handle_be32,
1604 "I", "dropped_link_overflow");
1605 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1606 "dropped_multicast_filtered",
1607 CTLTYPE_INT|CTLFLAG_RD,
1608 &fw->dropped_multicast_filtered,
1609 0, mxge_handle_be32,
1610 "I", "dropped_multicast_filtered");
1611 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1612 "dropped_no_big_buffer",
1613 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1614 0, mxge_handle_be32,
1615 "I", "dropped_no_big_buffer");
1616 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1617 "dropped_no_small_buffer",
1618 CTLTYPE_INT|CTLFLAG_RD,
1619 &fw->dropped_no_small_buffer,
1620 0, mxge_handle_be32,
1621 "I", "dropped_no_small_buffer");
1622 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1623 "dropped_overrun",
1624 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1625 0, mxge_handle_be32,
1626 "I", "dropped_overrun");
1627 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1628 "dropped_pause",
1629 CTLTYPE_INT|CTLFLAG_RD,
1630 &fw->dropped_pause,
1631 0, mxge_handle_be32,
1632 "I", "dropped_pause");
1633 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1634 "dropped_runt",
1635 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1636 0, mxge_handle_be32,
1637 "I", "dropped_runt");
1638
1639 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1640 "dropped_unicast_filtered",
1641 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1642 0, mxge_handle_be32,
1643 "I", "dropped_unicast_filtered");
1644
1645 /* verbose printing? */
1646 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1647 "verbose",
1648 CTLFLAG_RW, &mxge_verbose,
1649 0, "verbose printing");
1650
1651 /* lro */
1652 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1653 "lro_cnt",
1654 CTLTYPE_INT|CTLFLAG_RW, sc,
1655 0, mxge_change_lro,
1656 "I", "number of lro merge queues");
1657
1658
1659 /* add counters exported for debugging from all slices */
1660 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1661 sc->slice_sysctl_tree =
1662 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1663 "slice", CTLFLAG_RD, 0, "");
1664
1665 for (slice = 0; slice < sc->num_slices; slice++) {
1666 ss = &sc->ss[slice];
1667 sysctl_ctx_init(&ss->sysctl_ctx);
1668 ctx = &ss->sysctl_ctx;
1669 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1670 sprintf(slice_num, "%d", slice);
1671 ss->sysctl_tree =
1672 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1673 CTLFLAG_RD, 0, "");
1674 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1675 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1676 "rx_small_cnt",
1677 CTLFLAG_RD, &ss->rx_small.cnt,
1678 0, "rx_small_cnt");
1679 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1680 "rx_big_cnt",
1681 CTLFLAG_RD, &ss->rx_big.cnt,
1682 0, "rx_small_cnt");
1683 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1684 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed,
1685 0, "number of lro merge queues flushed");
1686
1687 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1688 "lro_queued", CTLFLAG_RD, &ss->lro_queued,
1689 0, "number of frames appended to lro merge"
1690 "queues");
1691
1692#ifndef IFNET_BUF_RING
1693 /* only transmit from slice 0 for now */
1694 if (slice > 0)
1695 continue;
1696#endif
1697 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1698 "tx_req",
1699 CTLFLAG_RD, &ss->tx.req,
1700 0, "tx_req");
1701
1702 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1703 "tx_done",
1704 CTLFLAG_RD, &ss->tx.done,
1705 0, "tx_done");
1706 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1707 "tx_pkt_done",
1708 CTLFLAG_RD, &ss->tx.pkt_done,
1709 0, "tx_done");
1710 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1711 "tx_stall",
1712 CTLFLAG_RD, &ss->tx.stall,
1713 0, "tx_stall");
1714 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1715 "tx_wake",
1716 CTLFLAG_RD, &ss->tx.wake,
1717 0, "tx_wake");
1718 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1719 "tx_defrag",
1720 CTLFLAG_RD, &ss->tx.defrag,
1721 0, "tx_defrag");
1722 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1723 "tx_queue_active",
1724 CTLFLAG_RD, &ss->tx.queue_active,
1725 0, "tx_queue_active");
1726 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1727 "tx_activate",
1728 CTLFLAG_RD, &ss->tx.activate,
1729 0, "tx_activate");
1730 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1731 "tx_deactivate",
1732 CTLFLAG_RD, &ss->tx.deactivate,
1733 0, "tx_deactivate");
1734 }
1735}
1736
1737/* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1738 backwards one at a time and handle ring wraps */
1739
1740static inline void
1741mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1742 mcp_kreq_ether_send_t *src, int cnt)
1743{
1744 int idx, starting_slot;
1745 starting_slot = tx->req;
1746 while (cnt > 1) {
1747 cnt--;
1748 idx = (starting_slot + cnt) & tx->mask;
1749 mxge_pio_copy(&tx->lanai[idx],
1750 &src[cnt], sizeof(*src));
1751 wmb();
1752 }
1753}
1754
1755/*
1756 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1757 * at most 32 bytes at a time, so as to avoid involving the software
1758 * pio handler in the nic. We re-write the first segment's flags
1759 * to mark them valid only after writing the entire chain
1760 */
1761
1762static inline void
1763mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1764 int cnt)
1765{
1766 int idx, i;
1767 uint32_t *src_ints;
1768 volatile uint32_t *dst_ints;
1769 mcp_kreq_ether_send_t *srcp;
1770 volatile mcp_kreq_ether_send_t *dstp, *dst;
1771 uint8_t last_flags;
1772
1773 idx = tx->req & tx->mask;
1774
1775 last_flags = src->flags;
1776 src->flags = 0;
1777 wmb();
1778 dst = dstp = &tx->lanai[idx];
1779 srcp = src;
1780
1781 if ((idx + cnt) < tx->mask) {
1782 for (i = 0; i < (cnt - 1); i += 2) {
1783 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1784 wmb(); /* force write every 32 bytes */
1785 srcp += 2;
1786 dstp += 2;
1787 }
1788 } else {
1789 /* submit all but the first request, and ensure
1790 that it is submitted below */
1791 mxge_submit_req_backwards(tx, src, cnt);
1792 i = 0;
1793 }
1794 if (i < cnt) {
1795 /* submit the first request */
1796 mxge_pio_copy(dstp, srcp, sizeof(*src));
1797 wmb(); /* barrier before setting valid flag */
1798 }
1799
1800 /* re-write the last 32-bits with the valid flags */
1801 src->flags = last_flags;
1802 src_ints = (uint32_t *)src;
1803 src_ints+=3;
1804 dst_ints = (volatile uint32_t *)dst;
1805 dst_ints+=3;
1806 *dst_ints = *src_ints;
1807 tx->req += cnt;
1808 wmb();
1809}
1810
1811#if IFCAP_TSO4
1812
1813static void
1814mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1815 int busdma_seg_cnt, int ip_off)
1816{
1817 mxge_tx_ring_t *tx;
1818 mcp_kreq_ether_send_t *req;
1819 bus_dma_segment_t *seg;
1820 struct ip *ip;
1821 struct tcphdr *tcp;
1822 uint32_t low, high_swapped;
1823 int len, seglen, cum_len, cum_len_next;
1824 int next_is_first, chop, cnt, rdma_count, small;
1825 uint16_t pseudo_hdr_offset, cksum_offset, mss;
1826 uint8_t flags, flags_next;
1827 static int once;
1828
1829 mss = m->m_pkthdr.tso_segsz;
1830
1831 /* negative cum_len signifies to the
1832 * send loop that we are still in the
1833 * header portion of the TSO packet.
1834 */
1835
1836 /* ensure we have the ethernet, IP and TCP
1837 header together in the first mbuf, copy
1838 it to a scratch buffer if not */
1839 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
1840 m_copydata(m, 0, ip_off + sizeof (*ip),
1841 ss->scratch);
1842 ip = (struct ip *)(ss->scratch + ip_off);
1843 } else {
1844 ip = (struct ip *)(mtod(m, char *) + ip_off);
1845 }
1846 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2)
1847 + sizeof (*tcp))) {
1848 m_copydata(m, 0, ip_off + (ip->ip_hl << 2)
1849 + sizeof (*tcp), ss->scratch);
1850 ip = (struct ip *)(mtod(m, char *) + ip_off);
1851 }
1852
1853 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2));
1854 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2));
1855
1856 /* TSO implies checksum offload on this hardware */
1857 cksum_offset = ip_off + (ip->ip_hl << 2);
1858 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1859
1860
1861 /* for TSO, pseudo_hdr_offset holds mss.
1862 * The firmware figures out where to put
1863 * the checksum by parsing the header. */
1864 pseudo_hdr_offset = htobe16(mss);
1865
1866 tx = &ss->tx;
1867 req = tx->req_list;
1868 seg = tx->seg_list;
1869 cnt = 0;
1870 rdma_count = 0;
1871 /* "rdma_count" is the number of RDMAs belonging to the
1872 * current packet BEFORE the current send request. For
1873 * non-TSO packets, this is equal to "count".
1874 * For TSO packets, rdma_count needs to be reset
1875 * to 0 after a segment cut.
1876 *
1877 * The rdma_count field of the send request is
1878 * the number of RDMAs of the packet starting at
1879 * that request. For TSO send requests with one ore more cuts
1880 * in the middle, this is the number of RDMAs starting
1881 * after the last cut in the request. All previous
1882 * segments before the last cut implicitly have 1 RDMA.
1883 *
1884 * Since the number of RDMAs is not known beforehand,
1885 * it must be filled-in retroactively - after each
1886 * segmentation cut or at the end of the entire packet.
1887 */
1888
1889 while (busdma_seg_cnt) {
1890 /* Break the busdma segment up into pieces*/
1891 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1892 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1893 len = seg->ds_len;
1894
1895 while (len) {
1896 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1897 seglen = len;
1898 cum_len_next = cum_len + seglen;
1899 (req-rdma_count)->rdma_count = rdma_count + 1;
1900 if (__predict_true(cum_len >= 0)) {
1901 /* payload */
1902 chop = (cum_len_next > mss);
1903 cum_len_next = cum_len_next % mss;
1904 next_is_first = (cum_len_next == 0);
1905 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1906 flags_next |= next_is_first *
1907 MXGEFW_FLAGS_FIRST;
1908 rdma_count |= -(chop | next_is_first);
1909 rdma_count += chop & !next_is_first;
1910 } else if (cum_len_next >= 0) {
1911 /* header ends */
1912 rdma_count = -1;
1913 cum_len_next = 0;
1914 seglen = -cum_len;
1915 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1916 flags_next = MXGEFW_FLAGS_TSO_PLD |
1917 MXGEFW_FLAGS_FIRST |
1918 (small * MXGEFW_FLAGS_SMALL);
1919 }
1920
1921 req->addr_high = high_swapped;
1922 req->addr_low = htobe32(low);
1923 req->pseudo_hdr_offset = pseudo_hdr_offset;
1924 req->pad = 0;
1925 req->rdma_count = 1;
1926 req->length = htobe16(seglen);
1927 req->cksum_offset = cksum_offset;
1928 req->flags = flags | ((cum_len & 1) *
1929 MXGEFW_FLAGS_ALIGN_ODD);
1930 low += seglen;
1931 len -= seglen;
1932 cum_len = cum_len_next;
1933 flags = flags_next;
1934 req++;
1935 cnt++;
1936 rdma_count++;
1937 if (__predict_false(cksum_offset > seglen))
1938 cksum_offset -= seglen;
1939 else
1940 cksum_offset = 0;
1941 if (__predict_false(cnt > tx->max_desc))
1942 goto drop;
1943 }
1944 busdma_seg_cnt--;
1945 seg++;
1946 }
1947 (req-rdma_count)->rdma_count = rdma_count;
1948
1949 do {
1950 req--;
1951 req->flags |= MXGEFW_FLAGS_TSO_LAST;
1952 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1953
1954 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1955 mxge_submit_req(tx, tx->req_list, cnt);
1956#ifdef IFNET_BUF_RING
1957 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
1958 /* tell the NIC to start polling this slice */
1959 *tx->send_go = 1;
1960 tx->queue_active = 1;
1961 tx->activate++;
1962 wmb();
1963 }
1964#endif
1965 return;
1966
1967drop:
1968 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
1969 m_freem(m);
1970 ss->oerrors++;
1971 if (!once) {
1972 printf("tx->max_desc exceeded via TSO!\n");
1973 printf("mss = %d, %ld, %d!\n", mss,
1974 (long)seg - (long)tx->seg_list, tx->max_desc);
1975 once = 1;
1976 }
1977 return;
1978
1979}
1980
1981#endif /* IFCAP_TSO4 */
1982
1983#ifdef MXGE_NEW_VLAN_API
1984/*
1985 * We reproduce the software vlan tag insertion from
1986 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
1987 * vlan tag insertion. We need to advertise this in order to have the
1988 * vlan interface respect our csum offload flags.
1989 */
1990static struct mbuf *
1991mxge_vlan_tag_insert(struct mbuf *m)
1992{
1993 struct ether_vlan_header *evl;
1994
1995 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT);
1996 if (__predict_false(m == NULL))
1997 return NULL;
1998 if (m->m_len < sizeof(*evl)) {
1999 m = m_pullup(m, sizeof(*evl));
2000 if (__predict_false(m == NULL))
2001 return NULL;
2002 }
2003 /*
2004 * Transform the Ethernet header into an Ethernet header
2005 * with 802.1Q encapsulation.
2006 */
2007 evl = mtod(m, struct ether_vlan_header *);
2008 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
2009 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
2010 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
2011 evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
2012 m->m_flags &= ~M_VLANTAG;
2013 return m;
2014}
2015#endif /* MXGE_NEW_VLAN_API */
2016
2017static void
2018mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
2019{
2020 mxge_softc_t *sc;
2021 mcp_kreq_ether_send_t *req;
2022 bus_dma_segment_t *seg;
2023 struct mbuf *m_tmp;
2024 struct ifnet *ifp;
2025 mxge_tx_ring_t *tx;
2026 struct ip *ip;
2027 int cnt, cum_len, err, i, idx, odd_flag, ip_off;
2028 uint16_t pseudo_hdr_offset;
2029 uint8_t flags, cksum_offset;
2030
2031
2032 sc = ss->sc;
2033 ifp = sc->ifp;
2034 tx = &ss->tx;
2035
2036 ip_off = sizeof (struct ether_header);
2037#ifdef MXGE_NEW_VLAN_API
2038 if (m->m_flags & M_VLANTAG) {
2039 m = mxge_vlan_tag_insert(m);
2040 if (__predict_false(m == NULL))
2041 goto drop;
2042 ip_off += ETHER_VLAN_ENCAP_LEN;
2043 }
2044#endif
2045 /* (try to) map the frame for DMA */
2046 idx = tx->req & tx->mask;
2047 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
2048 m, tx->seg_list, &cnt,
2049 BUS_DMA_NOWAIT);
2050 if (__predict_false(err == EFBIG)) {
2051 /* Too many segments in the chain. Try
2052 to defrag */
2053 m_tmp = m_defrag(m, M_NOWAIT);
2054 if (m_tmp == NULL) {
2055 goto drop;
2056 }
2057 ss->tx.defrag++;
2058 m = m_tmp;
2059 err = bus_dmamap_load_mbuf_sg(tx->dmat,
2060 tx->info[idx].map,
2061 m, tx->seg_list, &cnt,
2062 BUS_DMA_NOWAIT);
2063 }
2064 if (__predict_false(err != 0)) {
2065 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
2066 " packet len = %d\n", err, m->m_pkthdr.len);
2067 goto drop;
2068 }
2069 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2070 BUS_DMASYNC_PREWRITE);
2071 tx->info[idx].m = m;
2072
2073#if IFCAP_TSO4
2074 /* TSO is different enough, we handle it in another routine */
2075 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2076 mxge_encap_tso(ss, m, cnt, ip_off);
2077 return;
2078 }
2079#endif
2080
2081 req = tx->req_list;
2082 cksum_offset = 0;
2083 pseudo_hdr_offset = 0;
2084 flags = MXGEFW_FLAGS_NO_TSO;
2085
2086 /* checksum offloading? */
2087 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) {
2088 /* ensure ip header is in first mbuf, copy
2089 it to a scratch buffer if not */
2090 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
2091 m_copydata(m, 0, ip_off + sizeof (*ip),
2092 ss->scratch);
2093 ip = (struct ip *)(ss->scratch + ip_off);
2094 } else {
2095 ip = (struct ip *)(mtod(m, char *) + ip_off);
2096 }
2097 cksum_offset = ip_off + (ip->ip_hl << 2);
2098 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2099 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2100 req->cksum_offset = cksum_offset;
2101 flags |= MXGEFW_FLAGS_CKSUM;
2102 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2103 } else {
2104 odd_flag = 0;
2105 }
2106 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2107 flags |= MXGEFW_FLAGS_SMALL;
2108
2109 /* convert segments into a request list */
2110 cum_len = 0;
2111 seg = tx->seg_list;
2112 req->flags = MXGEFW_FLAGS_FIRST;
2113 for (i = 0; i < cnt; i++) {
2114 req->addr_low =
2115 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2116 req->addr_high =
2117 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2118 req->length = htobe16(seg->ds_len);
2119 req->cksum_offset = cksum_offset;
2120 if (cksum_offset > seg->ds_len)
2121 cksum_offset -= seg->ds_len;
2122 else
2123 cksum_offset = 0;
2124 req->pseudo_hdr_offset = pseudo_hdr_offset;
2125 req->pad = 0; /* complete solid 16-byte block */
2126 req->rdma_count = 1;
2127 req->flags |= flags | ((cum_len & 1) * odd_flag);
2128 cum_len += seg->ds_len;
2129 seg++;
2130 req++;
2131 req->flags = 0;
2132 }
2133 req--;
2134 /* pad runts to 60 bytes */
2135 if (cum_len < 60) {
2136 req++;
2137 req->addr_low =
2138 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2139 req->addr_high =
2140 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2141 req->length = htobe16(60 - cum_len);
2142 req->cksum_offset = 0;
2143 req->pseudo_hdr_offset = pseudo_hdr_offset;
2144 req->pad = 0; /* complete solid 16-byte block */
2145 req->rdma_count = 1;
2146 req->flags |= flags | ((cum_len & 1) * odd_flag);
2147 cnt++;
2148 }
2149
2150 tx->req_list[0].rdma_count = cnt;
2151#if 0
2152 /* print what the firmware will see */
2153 for (i = 0; i < cnt; i++) {
2154 printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2155 "cso:%d, flags:0x%x, rdma:%d\n",
2156 i, (int)ntohl(tx->req_list[i].addr_high),
2157 (int)ntohl(tx->req_list[i].addr_low),
2158 (int)ntohs(tx->req_list[i].length),
2159 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2160 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2161 tx->req_list[i].rdma_count);
2162 }
2163 printf("--------------\n");
2164#endif
2165 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2166 mxge_submit_req(tx, tx->req_list, cnt);
2167#ifdef IFNET_BUF_RING
2168 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2169 /* tell the NIC to start polling this slice */
2170 *tx->send_go = 1;
2171 tx->queue_active = 1;
2172 tx->activate++;
2173 wmb();
2174 }
2175#endif
2176 return;
2177
2178drop:
2179 m_freem(m);
2180 ss->oerrors++;
2181 return;
2182}
2183
2184#ifdef IFNET_BUF_RING
2185static void
2186mxge_qflush(struct ifnet *ifp)
2187{
2188 mxge_softc_t *sc = ifp->if_softc;
2189 mxge_tx_ring_t *tx;
2190 struct mbuf *m;
2191 int slice;
2192
2193 for (slice = 0; slice < sc->num_slices; slice++) {
2194 tx = &sc->ss[slice].tx;
2195 mtx_lock(&tx->mtx);
2196 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2197 m_freem(m);
2198 mtx_unlock(&tx->mtx);
2199 }
2200 if_qflush(ifp);
2201}
2202
2203static inline void
2204mxge_start_locked(struct mxge_slice_state *ss)
2205{
2206 mxge_softc_t *sc;
2207 struct mbuf *m;
2208 struct ifnet *ifp;
2209 mxge_tx_ring_t *tx;
2210
2211 sc = ss->sc;
2212 ifp = sc->ifp;
2213 tx = &ss->tx;
2214
2215 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2216 m = drbr_dequeue(ifp, tx->br);
2217 if (m == NULL) {
2218 return;
2219 }
2220 /* let BPF see it */
2221 BPF_MTAP(ifp, m);
2222
2223 /* give it to the nic */
2224 mxge_encap(ss, m);
2225 }
2226 /* ran out of transmit slots */
2227 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0)
2228 && (!drbr_empty(ifp, tx->br))) {
2229 ss->if_drv_flags |= IFF_DRV_OACTIVE;
2230 tx->stall++;
2231 }
2232}
2233
2234static int
2235mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2236{
2237 mxge_softc_t *sc;
2238 struct ifnet *ifp;
2239 mxge_tx_ring_t *tx;
2240 int err;
2241
2242 sc = ss->sc;
2243 ifp = sc->ifp;
2244 tx = &ss->tx;
2245
2246 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
2247 IFF_DRV_RUNNING) {
2248 err = drbr_enqueue(ifp, tx->br, m);
2249 return (err);
2250 }
2251
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/linker.h>
36#include <sys/firmware.h>
37#include <sys/endian.h>
38#include <sys/sockio.h>
39#include <sys/mbuf.h>
40#include <sys/malloc.h>
41#include <sys/kdb.h>
42#include <sys/kernel.h>
43#include <sys/lock.h>
44#include <sys/module.h>
45#include <sys/socket.h>
46#include <sys/sysctl.h>
47#include <sys/sx.h>
48#include <sys/taskqueue.h>
49
50/* count xmits ourselves, rather than via drbr */
51#define NO_SLOW_STATS
52#include <net/if.h>
53#include <net/if_arp.h>
54#include <net/ethernet.h>
55#include <net/if_dl.h>
56#include <net/if_media.h>
57
58#include <net/bpf.h>
59
60#include <net/if_types.h>
61#include <net/if_vlan_var.h>
62#include <net/zlib.h>
63
64#include <netinet/in_systm.h>
65#include <netinet/in.h>
66#include <netinet/ip.h>
67#include <netinet/tcp.h>
68
69#include <machine/bus.h>
70#include <machine/in_cksum.h>
71#include <machine/resource.h>
72#include <sys/bus.h>
73#include <sys/rman.h>
74#include <sys/smp.h>
75
76#include <dev/pci/pcireg.h>
77#include <dev/pci/pcivar.h>
78#include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */
79
80#include <vm/vm.h> /* for pmap_mapdev() */
81#include <vm/pmap.h>
82
83#if defined(__i386) || defined(__amd64)
84#include <machine/specialreg.h>
85#endif
86
87#include <dev/mxge/mxge_mcp.h>
88#include <dev/mxge/mcp_gen_header.h>
89/*#define MXGE_FAKE_IFP*/
90#include <dev/mxge/if_mxge_var.h>
91#ifdef IFNET_BUF_RING
92#include <sys/buf_ring.h>
93#endif
94
95#include "opt_inet.h"
96
97/* tunable params */
98static int mxge_nvidia_ecrc_enable = 1;
99static int mxge_force_firmware = 0;
100static int mxge_intr_coal_delay = 30;
101static int mxge_deassert_wait = 1;
102static int mxge_flow_control = 1;
103static int mxge_verbose = 0;
104static int mxge_lro_cnt = 8;
105static int mxge_ticks;
106static int mxge_max_slices = 1;
107static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
108static int mxge_always_promisc = 0;
109static int mxge_initial_mtu = ETHERMTU_JUMBO;
110static int mxge_throttle = 0;
111static char *mxge_fw_unaligned = "mxge_ethp_z8e";
112static char *mxge_fw_aligned = "mxge_eth_z8e";
113static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
114static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
115
116static int mxge_probe(device_t dev);
117static int mxge_attach(device_t dev);
118static int mxge_detach(device_t dev);
119static int mxge_shutdown(device_t dev);
120static void mxge_intr(void *arg);
121
122static device_method_t mxge_methods[] =
123{
124 /* Device interface */
125 DEVMETHOD(device_probe, mxge_probe),
126 DEVMETHOD(device_attach, mxge_attach),
127 DEVMETHOD(device_detach, mxge_detach),
128 DEVMETHOD(device_shutdown, mxge_shutdown),
129 {0, 0}
130};
131
132static driver_t mxge_driver =
133{
134 "mxge",
135 mxge_methods,
136 sizeof(mxge_softc_t),
137};
138
139static devclass_t mxge_devclass;
140
141/* Declare ourselves to be a child of the PCI bus.*/
142DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
143MODULE_DEPEND(mxge, firmware, 1, 1, 1);
144MODULE_DEPEND(mxge, zlib, 1, 1, 1);
145
146static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
147static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
148static int mxge_close(mxge_softc_t *sc, int down);
149static int mxge_open(mxge_softc_t *sc);
150static void mxge_tick(void *arg);
151
152static int
153mxge_probe(device_t dev)
154{
155 int rev;
156
157
158 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
159 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
160 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
161 rev = pci_get_revid(dev);
162 switch (rev) {
163 case MXGE_PCI_REV_Z8E:
164 device_set_desc(dev, "Myri10G-PCIE-8A");
165 break;
166 case MXGE_PCI_REV_Z8ES:
167 device_set_desc(dev, "Myri10G-PCIE-8B");
168 break;
169 default:
170 device_set_desc(dev, "Myri10G-PCIE-8??");
171 device_printf(dev, "Unrecognized rev %d NIC\n",
172 rev);
173 break;
174 }
175 return 0;
176 }
177 return ENXIO;
178}
179
180static void
181mxge_enable_wc(mxge_softc_t *sc)
182{
183#if defined(__i386) || defined(__amd64)
184 vm_offset_t len;
185 int err;
186
187 sc->wc = 1;
188 len = rman_get_size(sc->mem_res);
189 err = pmap_change_attr((vm_offset_t) sc->sram,
190 len, PAT_WRITE_COMBINING);
191 if (err != 0) {
192 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
193 err);
194 sc->wc = 0;
195 }
196#endif
197}
198
199
200/* callback to get our DMA address */
201static void
202mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
203 int error)
204{
205 if (error == 0) {
206 *(bus_addr_t *) arg = segs->ds_addr;
207 }
208}
209
210static int
211mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
212 bus_size_t alignment)
213{
214 int err;
215 device_t dev = sc->dev;
216 bus_size_t boundary, maxsegsize;
217
218 if (bytes > 4096 && alignment == 4096) {
219 boundary = 0;
220 maxsegsize = bytes;
221 } else {
222 boundary = 4096;
223 maxsegsize = 4096;
224 }
225
226 /* allocate DMAable memory tags */
227 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
228 alignment, /* alignment */
229 boundary, /* boundary */
230 BUS_SPACE_MAXADDR, /* low */
231 BUS_SPACE_MAXADDR, /* high */
232 NULL, NULL, /* filter */
233 bytes, /* maxsize */
234 1, /* num segs */
235 maxsegsize, /* maxsegsize */
236 BUS_DMA_COHERENT, /* flags */
237 NULL, NULL, /* lock */
238 &dma->dmat); /* tag */
239 if (err != 0) {
240 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
241 return err;
242 }
243
244 /* allocate DMAable memory & map */
245 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
246 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
247 | BUS_DMA_ZERO), &dma->map);
248 if (err != 0) {
249 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
250 goto abort_with_dmat;
251 }
252
253 /* load the memory */
254 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
255 mxge_dmamap_callback,
256 (void *)&dma->bus_addr, 0);
257 if (err != 0) {
258 device_printf(dev, "couldn't load map (err = %d)\n", err);
259 goto abort_with_mem;
260 }
261 return 0;
262
263abort_with_mem:
264 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
265abort_with_dmat:
266 (void)bus_dma_tag_destroy(dma->dmat);
267 return err;
268}
269
270
271static void
272mxge_dma_free(mxge_dma_t *dma)
273{
274 bus_dmamap_unload(dma->dmat, dma->map);
275 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
276 (void)bus_dma_tag_destroy(dma->dmat);
277}
278
279/*
280 * The eeprom strings on the lanaiX have the format
281 * SN=x\0
282 * MAC=x:x:x:x:x:x\0
283 * PC=text\0
284 */
285
286static int
287mxge_parse_strings(mxge_softc_t *sc)
288{
289#define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++)
290
291 char *ptr, *limit;
292 int i, found_mac;
293
294 ptr = sc->eeprom_strings;
295 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE;
296 found_mac = 0;
297 while (ptr < limit && *ptr != '\0') {
298 if (memcmp(ptr, "MAC=", 4) == 0) {
299 ptr += 1;
300 sc->mac_addr_string = ptr;
301 for (i = 0; i < 6; i++) {
302 ptr += 3;
303 if ((ptr + 2) > limit)
304 goto abort;
305 sc->mac_addr[i] = strtoul(ptr, NULL, 16);
306 found_mac = 1;
307 }
308 } else if (memcmp(ptr, "PC=", 3) == 0) {
309 ptr += 3;
310 strncpy(sc->product_code_string, ptr,
311 sizeof (sc->product_code_string) - 1);
312 } else if (memcmp(ptr, "SN=", 3) == 0) {
313 ptr += 3;
314 strncpy(sc->serial_number_string, ptr,
315 sizeof (sc->serial_number_string) - 1);
316 }
317 MXGE_NEXT_STRING(ptr);
318 }
319
320 if (found_mac)
321 return 0;
322
323 abort:
324 device_printf(sc->dev, "failed to parse eeprom_strings\n");
325
326 return ENXIO;
327}
328
329#if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
330static void
331mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
332{
333 uint32_t val;
334 unsigned long base, off;
335 char *va, *cfgptr;
336 device_t pdev, mcp55;
337 uint16_t vendor_id, device_id, word;
338 uintptr_t bus, slot, func, ivend, idev;
339 uint32_t *ptr32;
340
341
342 if (!mxge_nvidia_ecrc_enable)
343 return;
344
345 pdev = device_get_parent(device_get_parent(sc->dev));
346 if (pdev == NULL) {
347 device_printf(sc->dev, "could not find parent?\n");
348 return;
349 }
350 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
351 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
352
353 if (vendor_id != 0x10de)
354 return;
355
356 base = 0;
357
358 if (device_id == 0x005d) {
359 /* ck804, base address is magic */
360 base = 0xe0000000UL;
361 } else if (device_id >= 0x0374 && device_id <= 0x378) {
362 /* mcp55, base address stored in chipset */
363 mcp55 = pci_find_bsf(0, 0, 0);
364 if (mcp55 &&
365 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
366 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
367 word = pci_read_config(mcp55, 0x90, 2);
368 base = ((unsigned long)word & 0x7ffeU) << 25;
369 }
370 }
371 if (!base)
372 return;
373
374 /* XXXX
375 Test below is commented because it is believed that doing
376 config read/write beyond 0xff will access the config space
377 for the next larger function. Uncomment this and remove
378 the hacky pmap_mapdev() way of accessing config space when
379 FreeBSD grows support for extended pcie config space access
380 */
381#if 0
382 /* See if we can, by some miracle, access the extended
383 config space */
384 val = pci_read_config(pdev, 0x178, 4);
385 if (val != 0xffffffff) {
386 val |= 0x40;
387 pci_write_config(pdev, 0x178, val, 4);
388 return;
389 }
390#endif
391 /* Rather than using normal pci config space writes, we must
392 * map the Nvidia config space ourselves. This is because on
393 * opteron/nvidia class machine the 0xe000000 mapping is
394 * handled by the nvidia chipset, that means the internal PCI
395 * device (the on-chip northbridge), or the amd-8131 bridge
396 * and things behind them are not visible by this method.
397 */
398
399 BUS_READ_IVAR(device_get_parent(pdev), pdev,
400 PCI_IVAR_BUS, &bus);
401 BUS_READ_IVAR(device_get_parent(pdev), pdev,
402 PCI_IVAR_SLOT, &slot);
403 BUS_READ_IVAR(device_get_parent(pdev), pdev,
404 PCI_IVAR_FUNCTION, &func);
405 BUS_READ_IVAR(device_get_parent(pdev), pdev,
406 PCI_IVAR_VENDOR, &ivend);
407 BUS_READ_IVAR(device_get_parent(pdev), pdev,
408 PCI_IVAR_DEVICE, &idev);
409
410 off = base
411 + 0x00100000UL * (unsigned long)bus
412 + 0x00001000UL * (unsigned long)(func
413 + 8 * slot);
414
415 /* map it into the kernel */
416 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
417
418
419 if (va == NULL) {
420 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
421 return;
422 }
423 /* get a pointer to the config space mapped into the kernel */
424 cfgptr = va + (off & PAGE_MASK);
425
426 /* make sure that we can really access it */
427 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
428 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
429 if (! (vendor_id == ivend && device_id == idev)) {
430 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
431 vendor_id, device_id);
432 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
433 return;
434 }
435
436 ptr32 = (uint32_t*)(cfgptr + 0x178);
437 val = *ptr32;
438
439 if (val == 0xffffffff) {
440 device_printf(sc->dev, "extended mapping failed\n");
441 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
442 return;
443 }
444 *ptr32 = val | 0x40;
445 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
446 if (mxge_verbose)
447 device_printf(sc->dev,
448 "Enabled ECRC on upstream Nvidia bridge "
449 "at %d:%d:%d\n",
450 (int)bus, (int)slot, (int)func);
451 return;
452}
453#else
454static void
455mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
456{
457 device_printf(sc->dev,
458 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
459 return;
460}
461#endif
462
463
464static int
465mxge_dma_test(mxge_softc_t *sc, int test_type)
466{
467 mxge_cmd_t cmd;
468 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
469 int status;
470 uint32_t len;
471 char *test = " ";
472
473
474 /* Run a small DMA test.
475 * The magic multipliers to the length tell the firmware
476 * to do DMA read, write, or read+write tests. The
477 * results are returned in cmd.data0. The upper 16
478 * bits of the return is the number of transfers completed.
479 * The lower 16 bits is the time in 0.5us ticks that the
480 * transfers took to complete.
481 */
482
483 len = sc->tx_boundary;
484
485 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
486 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
487 cmd.data2 = len * 0x10000;
488 status = mxge_send_cmd(sc, test_type, &cmd);
489 if (status != 0) {
490 test = "read";
491 goto abort;
492 }
493 sc->read_dma = ((cmd.data0>>16) * len * 2) /
494 (cmd.data0 & 0xffff);
495 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
496 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
497 cmd.data2 = len * 0x1;
498 status = mxge_send_cmd(sc, test_type, &cmd);
499 if (status != 0) {
500 test = "write";
501 goto abort;
502 }
503 sc->write_dma = ((cmd.data0>>16) * len * 2) /
504 (cmd.data0 & 0xffff);
505
506 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
507 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
508 cmd.data2 = len * 0x10001;
509 status = mxge_send_cmd(sc, test_type, &cmd);
510 if (status != 0) {
511 test = "read/write";
512 goto abort;
513 }
514 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
515 (cmd.data0 & 0xffff);
516
517abort:
518 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
519 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
520 test, status);
521
522 return status;
523}
524
525/*
526 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
527 * when the PCI-E Completion packets are aligned on an 8-byte
528 * boundary. Some PCI-E chip sets always align Completion packets; on
529 * the ones that do not, the alignment can be enforced by enabling
530 * ECRC generation (if supported).
531 *
532 * When PCI-E Completion packets are not aligned, it is actually more
533 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
534 *
535 * If the driver can neither enable ECRC nor verify that it has
536 * already been enabled, then it must use a firmware image which works
537 * around unaligned completion packets (ethp_z8e.dat), and it should
538 * also ensure that it never gives the device a Read-DMA which is
539 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
540 * enabled, then the driver should use the aligned (eth_z8e.dat)
541 * firmware image, and set tx_boundary to 4KB.
542 */
543
544static int
545mxge_firmware_probe(mxge_softc_t *sc)
546{
547 device_t dev = sc->dev;
548 int reg, status;
549 uint16_t pectl;
550
551 sc->tx_boundary = 4096;
552 /*
553 * Verify the max read request size was set to 4KB
554 * before trying the test with 4KB.
555 */
556 if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
557 pectl = pci_read_config(dev, reg + 0x8, 2);
558 if ((pectl & (5 << 12)) != (5 << 12)) {
559 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
560 pectl);
561 sc->tx_boundary = 2048;
562 }
563 }
564
565 /*
566 * load the optimized firmware (which assumes aligned PCIe
567 * completions) in order to see if it works on this host.
568 */
569 sc->fw_name = mxge_fw_aligned;
570 status = mxge_load_firmware(sc, 1);
571 if (status != 0) {
572 return status;
573 }
574
575 /*
576 * Enable ECRC if possible
577 */
578 mxge_enable_nvidia_ecrc(sc);
579
580 /*
581 * Run a DMA test which watches for unaligned completions and
582 * aborts on the first one seen.
583 */
584
585 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
586 if (status == 0)
587 return 0; /* keep the aligned firmware */
588
589 if (status != E2BIG)
590 device_printf(dev, "DMA test failed: %d\n", status);
591 if (status == ENOSYS)
592 device_printf(dev, "Falling back to ethp! "
593 "Please install up to date fw\n");
594 return status;
595}
596
597static int
598mxge_select_firmware(mxge_softc_t *sc)
599{
600 int aligned = 0;
601 int force_firmware = mxge_force_firmware;
602
603 if (sc->throttle)
604 force_firmware = sc->throttle;
605
606 if (force_firmware != 0) {
607 if (force_firmware == 1)
608 aligned = 1;
609 else
610 aligned = 0;
611 if (mxge_verbose)
612 device_printf(sc->dev,
613 "Assuming %s completions (forced)\n",
614 aligned ? "aligned" : "unaligned");
615 goto abort;
616 }
617
618 /* if the PCIe link width is 4 or less, we can use the aligned
619 firmware and skip any checks */
620 if (sc->link_width != 0 && sc->link_width <= 4) {
621 device_printf(sc->dev,
622 "PCIe x%d Link, expect reduced performance\n",
623 sc->link_width);
624 aligned = 1;
625 goto abort;
626 }
627
628 if (0 == mxge_firmware_probe(sc))
629 return 0;
630
631abort:
632 if (aligned) {
633 sc->fw_name = mxge_fw_aligned;
634 sc->tx_boundary = 4096;
635 } else {
636 sc->fw_name = mxge_fw_unaligned;
637 sc->tx_boundary = 2048;
638 }
639 return (mxge_load_firmware(sc, 0));
640}
641
642union qualhack
643{
644 const char *ro_char;
645 char *rw_char;
646};
647
648static int
649mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
650{
651
652
653 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
654 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
655 be32toh(hdr->mcp_type));
656 return EIO;
657 }
658
659 /* save firmware version for sysctl */
660 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version));
661 if (mxge_verbose)
662 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
663
664 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
665 &sc->fw_ver_minor, &sc->fw_ver_tiny);
666
667 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
668 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
669 device_printf(sc->dev, "Found firmware version %s\n",
670 sc->fw_version);
671 device_printf(sc->dev, "Driver needs %d.%d\n",
672 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
673 return EINVAL;
674 }
675 return 0;
676
677}
678
679static void *
680z_alloc(void *nil, u_int items, u_int size)
681{
682 void *ptr;
683
684 ptr = malloc(items * size, M_TEMP, M_NOWAIT);
685 return ptr;
686}
687
688static void
689z_free(void *nil, void *ptr)
690{
691 free(ptr, M_TEMP);
692}
693
694
695static int
696mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
697{
698 z_stream zs;
699 char *inflate_buffer;
700 const struct firmware *fw;
701 const mcp_gen_header_t *hdr;
702 unsigned hdr_offset;
703 int status;
704 unsigned int i;
705 char dummy;
706 size_t fw_len;
707
708 fw = firmware_get(sc->fw_name);
709 if (fw == NULL) {
710 device_printf(sc->dev, "Could not find firmware image %s\n",
711 sc->fw_name);
712 return ENOENT;
713 }
714
715
716
717 /* setup zlib and decompress f/w */
718 bzero(&zs, sizeof (zs));
719 zs.zalloc = z_alloc;
720 zs.zfree = z_free;
721 status = inflateInit(&zs);
722 if (status != Z_OK) {
723 status = EIO;
724 goto abort_with_fw;
725 }
726
727 /* the uncompressed size is stored as the firmware version,
728 which would otherwise go unused */
729 fw_len = (size_t) fw->version;
730 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT);
731 if (inflate_buffer == NULL)
732 goto abort_with_zs;
733 zs.avail_in = fw->datasize;
734 zs.next_in = __DECONST(char *, fw->data);
735 zs.avail_out = fw_len;
736 zs.next_out = inflate_buffer;
737 status = inflate(&zs, Z_FINISH);
738 if (status != Z_STREAM_END) {
739 device_printf(sc->dev, "zlib %d\n", status);
740 status = EIO;
741 goto abort_with_buffer;
742 }
743
744 /* check id */
745 hdr_offset = htobe32(*(const uint32_t *)
746 (inflate_buffer + MCP_HEADER_PTR_OFFSET));
747 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
748 device_printf(sc->dev, "Bad firmware file");
749 status = EIO;
750 goto abort_with_buffer;
751 }
752 hdr = (const void*)(inflate_buffer + hdr_offset);
753
754 status = mxge_validate_firmware(sc, hdr);
755 if (status != 0)
756 goto abort_with_buffer;
757
758 /* Copy the inflated firmware to NIC SRAM. */
759 for (i = 0; i < fw_len; i += 256) {
760 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
761 inflate_buffer + i,
762 min(256U, (unsigned)(fw_len - i)));
763 wmb();
764 dummy = *sc->sram;
765 wmb();
766 }
767
768 *limit = fw_len;
769 status = 0;
770abort_with_buffer:
771 free(inflate_buffer, M_TEMP);
772abort_with_zs:
773 inflateEnd(&zs);
774abort_with_fw:
775 firmware_put(fw, FIRMWARE_UNLOAD);
776 return status;
777}
778
779/*
780 * Enable or disable periodic RDMAs from the host to make certain
781 * chipsets resend dropped PCIe messages
782 */
783
784static void
785mxge_dummy_rdma(mxge_softc_t *sc, int enable)
786{
787 char buf_bytes[72];
788 volatile uint32_t *confirm;
789 volatile char *submit;
790 uint32_t *buf, dma_low, dma_high;
791 int i;
792
793 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
794
795 /* clear confirmation addr */
796 confirm = (volatile uint32_t *)sc->cmd;
797 *confirm = 0;
798 wmb();
799
800 /* send an rdma command to the PCIe engine, and wait for the
801 response in the confirmation address. The firmware should
802 write a -1 there to indicate it is alive and well
803 */
804
805 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
806 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
807 buf[0] = htobe32(dma_high); /* confirm addr MSW */
808 buf[1] = htobe32(dma_low); /* confirm addr LSW */
809 buf[2] = htobe32(0xffffffff); /* confirm data */
810 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
811 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
812 buf[3] = htobe32(dma_high); /* dummy addr MSW */
813 buf[4] = htobe32(dma_low); /* dummy addr LSW */
814 buf[5] = htobe32(enable); /* enable? */
815
816
817 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
818
819 mxge_pio_copy(submit, buf, 64);
820 wmb();
821 DELAY(1000);
822 wmb();
823 i = 0;
824 while (*confirm != 0xffffffff && i < 20) {
825 DELAY(1000);
826 i++;
827 }
828 if (*confirm != 0xffffffff) {
829 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
830 (enable ? "enable" : "disable"), confirm,
831 *confirm);
832 }
833 return;
834}
835
836static int
837mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
838{
839 mcp_cmd_t *buf;
840 char buf_bytes[sizeof(*buf) + 8];
841 volatile mcp_cmd_response_t *response = sc->cmd;
842 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
843 uint32_t dma_low, dma_high;
844 int err, sleep_total = 0;
845
846 /* ensure buf is aligned to 8 bytes */
847 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
848
849 buf->data0 = htobe32(data->data0);
850 buf->data1 = htobe32(data->data1);
851 buf->data2 = htobe32(data->data2);
852 buf->cmd = htobe32(cmd);
853 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
854 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
855
856 buf->response_addr.low = htobe32(dma_low);
857 buf->response_addr.high = htobe32(dma_high);
858 mtx_lock(&sc->cmd_mtx);
859 response->result = 0xffffffff;
860 wmb();
861 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
862
863 /* wait up to 20ms */
864 err = EAGAIN;
865 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
866 bus_dmamap_sync(sc->cmd_dma.dmat,
867 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
868 wmb();
869 switch (be32toh(response->result)) {
870 case 0:
871 data->data0 = be32toh(response->data);
872 err = 0;
873 break;
874 case 0xffffffff:
875 DELAY(1000);
876 break;
877 case MXGEFW_CMD_UNKNOWN:
878 err = ENOSYS;
879 break;
880 case MXGEFW_CMD_ERROR_UNALIGNED:
881 err = E2BIG;
882 break;
883 case MXGEFW_CMD_ERROR_BUSY:
884 err = EBUSY;
885 break;
886 default:
887 device_printf(sc->dev,
888 "mxge: command %d "
889 "failed, result = %d\n",
890 cmd, be32toh(response->result));
891 err = ENXIO;
892 break;
893 }
894 if (err != EAGAIN)
895 break;
896 }
897 if (err == EAGAIN)
898 device_printf(sc->dev, "mxge: command %d timed out"
899 "result = %d\n",
900 cmd, be32toh(response->result));
901 mtx_unlock(&sc->cmd_mtx);
902 return err;
903}
904
905static int
906mxge_adopt_running_firmware(mxge_softc_t *sc)
907{
908 struct mcp_gen_header *hdr;
909 const size_t bytes = sizeof (struct mcp_gen_header);
910 size_t hdr_offset;
911 int status;
912
913 /* find running firmware header */
914 hdr_offset = htobe32(*(volatile uint32_t *)
915 (sc->sram + MCP_HEADER_PTR_OFFSET));
916
917 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
918 device_printf(sc->dev,
919 "Running firmware has bad header offset (%d)\n",
920 (int)hdr_offset);
921 return EIO;
922 }
923
924 /* copy header of running firmware from SRAM to host memory to
925 * validate firmware */
926 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
927 if (hdr == NULL) {
928 device_printf(sc->dev, "could not malloc firmware hdr\n");
929 return ENOMEM;
930 }
931 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
932 rman_get_bushandle(sc->mem_res),
933 hdr_offset, (char *)hdr, bytes);
934 status = mxge_validate_firmware(sc, hdr);
935 free(hdr, M_DEVBUF);
936
937 /*
938 * check to see if adopted firmware has bug where adopting
939 * it will cause broadcasts to be filtered unless the NIC
940 * is kept in ALLMULTI mode
941 */
942 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
943 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
944 sc->adopted_rx_filter_bug = 1;
945 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
946 "working around rx filter bug\n",
947 sc->fw_ver_major, sc->fw_ver_minor,
948 sc->fw_ver_tiny);
949 }
950
951 return status;
952}
953
954
955static int
956mxge_load_firmware(mxge_softc_t *sc, int adopt)
957{
958 volatile uint32_t *confirm;
959 volatile char *submit;
960 char buf_bytes[72];
961 uint32_t *buf, size, dma_low, dma_high;
962 int status, i;
963
964 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
965
966 size = sc->sram_size;
967 status = mxge_load_firmware_helper(sc, &size);
968 if (status) {
969 if (!adopt)
970 return status;
971 /* Try to use the currently running firmware, if
972 it is new enough */
973 status = mxge_adopt_running_firmware(sc);
974 if (status) {
975 device_printf(sc->dev,
976 "failed to adopt running firmware\n");
977 return status;
978 }
979 device_printf(sc->dev,
980 "Successfully adopted running firmware\n");
981 if (sc->tx_boundary == 4096) {
982 device_printf(sc->dev,
983 "Using firmware currently running on NIC"
984 ". For optimal\n");
985 device_printf(sc->dev,
986 "performance consider loading optimized "
987 "firmware\n");
988 }
989 sc->fw_name = mxge_fw_unaligned;
990 sc->tx_boundary = 2048;
991 return 0;
992 }
993 /* clear confirmation addr */
994 confirm = (volatile uint32_t *)sc->cmd;
995 *confirm = 0;
996 wmb();
997 /* send a reload command to the bootstrap MCP, and wait for the
998 response in the confirmation address. The firmware should
999 write a -1 there to indicate it is alive and well
1000 */
1001
1002 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
1003 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
1004
1005 buf[0] = htobe32(dma_high); /* confirm addr MSW */
1006 buf[1] = htobe32(dma_low); /* confirm addr LSW */
1007 buf[2] = htobe32(0xffffffff); /* confirm data */
1008
1009 /* FIX: All newest firmware should un-protect the bottom of
1010 the sram before handoff. However, the very first interfaces
1011 do not. Therefore the handoff copy must skip the first 8 bytes
1012 */
1013 /* where the code starts*/
1014 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
1015 buf[4] = htobe32(size - 8); /* length of code */
1016 buf[5] = htobe32(8); /* where to copy to */
1017 buf[6] = htobe32(0); /* where to jump to */
1018
1019 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1020 mxge_pio_copy(submit, buf, 64);
1021 wmb();
1022 DELAY(1000);
1023 wmb();
1024 i = 0;
1025 while (*confirm != 0xffffffff && i < 20) {
1026 DELAY(1000*10);
1027 i++;
1028 bus_dmamap_sync(sc->cmd_dma.dmat,
1029 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1030 }
1031 if (*confirm != 0xffffffff) {
1032 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1033 confirm, *confirm);
1034
1035 return ENXIO;
1036 }
1037 return 0;
1038}
1039
1040static int
1041mxge_update_mac_address(mxge_softc_t *sc)
1042{
1043 mxge_cmd_t cmd;
1044 uint8_t *addr = sc->mac_addr;
1045 int status;
1046
1047
1048 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1049 | (addr[2] << 8) | addr[3]);
1050
1051 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1052
1053 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1054 return status;
1055}
1056
1057static int
1058mxge_change_pause(mxge_softc_t *sc, int pause)
1059{
1060 mxge_cmd_t cmd;
1061 int status;
1062
1063 if (pause)
1064 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1065 &cmd);
1066 else
1067 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1068 &cmd);
1069
1070 if (status) {
1071 device_printf(sc->dev, "Failed to set flow control mode\n");
1072 return ENXIO;
1073 }
1074 sc->pause = pause;
1075 return 0;
1076}
1077
1078static void
1079mxge_change_promisc(mxge_softc_t *sc, int promisc)
1080{
1081 mxge_cmd_t cmd;
1082 int status;
1083
1084 if (mxge_always_promisc)
1085 promisc = 1;
1086
1087 if (promisc)
1088 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1089 &cmd);
1090 else
1091 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1092 &cmd);
1093
1094 if (status) {
1095 device_printf(sc->dev, "Failed to set promisc mode\n");
1096 }
1097}
1098
1099static void
1100mxge_set_multicast_list(mxge_softc_t *sc)
1101{
1102 mxge_cmd_t cmd;
1103 struct ifmultiaddr *ifma;
1104 struct ifnet *ifp = sc->ifp;
1105 int err;
1106
1107 /* This firmware is known to not support multicast */
1108 if (!sc->fw_multicast_support)
1109 return;
1110
1111 /* Disable multicast filtering while we play with the lists*/
1112 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1113 if (err != 0) {
1114 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1115 " error status: %d\n", err);
1116 return;
1117 }
1118
1119 if (sc->adopted_rx_filter_bug)
1120 return;
1121
1122 if (ifp->if_flags & IFF_ALLMULTI)
1123 /* request to disable multicast filtering, so quit here */
1124 return;
1125
1126 /* Flush all the filters */
1127
1128 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1129 if (err != 0) {
1130 device_printf(sc->dev,
1131 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1132 ", error status: %d\n", err);
1133 return;
1134 }
1135
1136 /* Walk the multicast list, and add each address */
1137
1138 if_maddr_rlock(ifp);
1139 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1140 if (ifma->ifma_addr->sa_family != AF_LINK)
1141 continue;
1142 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1143 &cmd.data0, 4);
1144 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1145 &cmd.data1, 2);
1146 cmd.data0 = htonl(cmd.data0);
1147 cmd.data1 = htonl(cmd.data1);
1148 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1149 if (err != 0) {
1150 device_printf(sc->dev, "Failed "
1151 "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1152 "%d\t", err);
1153 /* abort, leaving multicast filtering off */
1154 if_maddr_runlock(ifp);
1155 return;
1156 }
1157 }
1158 if_maddr_runlock(ifp);
1159 /* Enable multicast filtering */
1160 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1161 if (err != 0) {
1162 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1163 ", error status: %d\n", err);
1164 }
1165}
1166
1167static int
1168mxge_max_mtu(mxge_softc_t *sc)
1169{
1170 mxge_cmd_t cmd;
1171 int status;
1172
1173 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1174 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1175
1176 /* try to set nbufs to see if it we can
1177 use virtually contiguous jumbos */
1178 cmd.data0 = 0;
1179 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1180 &cmd);
1181 if (status == 0)
1182 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1183
1184 /* otherwise, we're limited to MJUMPAGESIZE */
1185 return MJUMPAGESIZE - MXGEFW_PAD;
1186}
1187
1188static int
1189mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1190{
1191 struct mxge_slice_state *ss;
1192 mxge_rx_done_t *rx_done;
1193 volatile uint32_t *irq_claim;
1194 mxge_cmd_t cmd;
1195 int slice, status;
1196
1197 /* try to send a reset command to the card to see if it
1198 is alive */
1199 memset(&cmd, 0, sizeof (cmd));
1200 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1201 if (status != 0) {
1202 device_printf(sc->dev, "failed reset\n");
1203 return ENXIO;
1204 }
1205
1206 mxge_dummy_rdma(sc, 1);
1207
1208
1209 /* set the intrq size */
1210 cmd.data0 = sc->rx_ring_size;
1211 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1212
1213 /*
1214 * Even though we already know how many slices are supported
1215 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1216 * has magic side effects, and must be called after a reset.
1217 * It must be called prior to calling any RSS related cmds,
1218 * including assigning an interrupt queue for anything but
1219 * slice 0. It must also be called *after*
1220 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1221 * the firmware to compute offsets.
1222 */
1223
1224 if (sc->num_slices > 1) {
1225 /* ask the maximum number of slices it supports */
1226 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1227 &cmd);
1228 if (status != 0) {
1229 device_printf(sc->dev,
1230 "failed to get number of slices\n");
1231 return status;
1232 }
1233 /*
1234 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1235 * to setting up the interrupt queue DMA
1236 */
1237 cmd.data0 = sc->num_slices;
1238 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1239#ifdef IFNET_BUF_RING
1240 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1241#endif
1242 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1243 &cmd);
1244 if (status != 0) {
1245 device_printf(sc->dev,
1246 "failed to set number of slices\n");
1247 return status;
1248 }
1249 }
1250
1251
1252 if (interrupts_setup) {
1253 /* Now exchange information about interrupts */
1254 for (slice = 0; slice < sc->num_slices; slice++) {
1255 rx_done = &sc->ss[slice].rx_done;
1256 memset(rx_done->entry, 0, sc->rx_ring_size);
1257 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1258 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1259 cmd.data2 = slice;
1260 status |= mxge_send_cmd(sc,
1261 MXGEFW_CMD_SET_INTRQ_DMA,
1262 &cmd);
1263 }
1264 }
1265
1266 status |= mxge_send_cmd(sc,
1267 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1268
1269
1270 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1271
1272 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1273 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1274
1275
1276 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1277 &cmd);
1278 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1279 if (status != 0) {
1280 device_printf(sc->dev, "failed set interrupt parameters\n");
1281 return status;
1282 }
1283
1284
1285 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1286
1287
1288 /* run a DMA benchmark */
1289 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1290
1291 for (slice = 0; slice < sc->num_slices; slice++) {
1292 ss = &sc->ss[slice];
1293
1294 ss->irq_claim = irq_claim + (2 * slice);
1295 /* reset mcp/driver shared state back to 0 */
1296 ss->rx_done.idx = 0;
1297 ss->rx_done.cnt = 0;
1298 ss->tx.req = 0;
1299 ss->tx.done = 0;
1300 ss->tx.pkt_done = 0;
1301 ss->tx.queue_active = 0;
1302 ss->tx.activate = 0;
1303 ss->tx.deactivate = 0;
1304 ss->tx.wake = 0;
1305 ss->tx.defrag = 0;
1306 ss->tx.stall = 0;
1307 ss->rx_big.cnt = 0;
1308 ss->rx_small.cnt = 0;
1309 ss->lro_bad_csum = 0;
1310 ss->lro_queued = 0;
1311 ss->lro_flushed = 0;
1312 if (ss->fw_stats != NULL) {
1313 bzero(ss->fw_stats, sizeof *ss->fw_stats);
1314 }
1315 }
1316 sc->rdma_tags_available = 15;
1317 status = mxge_update_mac_address(sc);
1318 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1319 mxge_change_pause(sc, sc->pause);
1320 mxge_set_multicast_list(sc);
1321 if (sc->throttle) {
1322 cmd.data0 = sc->throttle;
1323 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR,
1324 &cmd)) {
1325 device_printf(sc->dev,
1326 "can't enable throttle\n");
1327 }
1328 }
1329 return status;
1330}
1331
1332static int
1333mxge_change_throttle(SYSCTL_HANDLER_ARGS)
1334{
1335 mxge_cmd_t cmd;
1336 mxge_softc_t *sc;
1337 int err;
1338 unsigned int throttle;
1339
1340 sc = arg1;
1341 throttle = sc->throttle;
1342 err = sysctl_handle_int(oidp, &throttle, arg2, req);
1343 if (err != 0) {
1344 return err;
1345 }
1346
1347 if (throttle == sc->throttle)
1348 return 0;
1349
1350 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE)
1351 return EINVAL;
1352
1353 mtx_lock(&sc->driver_mtx);
1354 cmd.data0 = throttle;
1355 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd);
1356 if (err == 0)
1357 sc->throttle = throttle;
1358 mtx_unlock(&sc->driver_mtx);
1359 return err;
1360}
1361
1362static int
1363mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1364{
1365 mxge_softc_t *sc;
1366 unsigned int intr_coal_delay;
1367 int err;
1368
1369 sc = arg1;
1370 intr_coal_delay = sc->intr_coal_delay;
1371 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1372 if (err != 0) {
1373 return err;
1374 }
1375 if (intr_coal_delay == sc->intr_coal_delay)
1376 return 0;
1377
1378 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1379 return EINVAL;
1380
1381 mtx_lock(&sc->driver_mtx);
1382 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1383 sc->intr_coal_delay = intr_coal_delay;
1384
1385 mtx_unlock(&sc->driver_mtx);
1386 return err;
1387}
1388
1389static int
1390mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1391{
1392 mxge_softc_t *sc;
1393 unsigned int enabled;
1394 int err;
1395
1396 sc = arg1;
1397 enabled = sc->pause;
1398 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1399 if (err != 0) {
1400 return err;
1401 }
1402 if (enabled == sc->pause)
1403 return 0;
1404
1405 mtx_lock(&sc->driver_mtx);
1406 err = mxge_change_pause(sc, enabled);
1407 mtx_unlock(&sc->driver_mtx);
1408 return err;
1409}
1410
1411static int
1412mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt)
1413{
1414 struct ifnet *ifp;
1415 int err = 0;
1416
1417 ifp = sc->ifp;
1418 if (lro_cnt == 0)
1419 ifp->if_capenable &= ~IFCAP_LRO;
1420 else
1421 ifp->if_capenable |= IFCAP_LRO;
1422 sc->lro_cnt = lro_cnt;
1423 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1424 mxge_close(sc, 0);
1425 err = mxge_open(sc);
1426 }
1427 return err;
1428}
1429
1430static int
1431mxge_change_lro(SYSCTL_HANDLER_ARGS)
1432{
1433 mxge_softc_t *sc;
1434 unsigned int lro_cnt;
1435 int err;
1436
1437 sc = arg1;
1438 lro_cnt = sc->lro_cnt;
1439 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req);
1440 if (err != 0)
1441 return err;
1442
1443 if (lro_cnt == sc->lro_cnt)
1444 return 0;
1445
1446 if (lro_cnt > 128)
1447 return EINVAL;
1448
1449 mtx_lock(&sc->driver_mtx);
1450 err = mxge_change_lro_locked(sc, lro_cnt);
1451 mtx_unlock(&sc->driver_mtx);
1452 return err;
1453}
1454
1455static int
1456mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1457{
1458 int err;
1459
1460 if (arg1 == NULL)
1461 return EFAULT;
1462 arg2 = be32toh(*(int *)arg1);
1463 arg1 = NULL;
1464 err = sysctl_handle_int(oidp, arg1, arg2, req);
1465
1466 return err;
1467}
1468
1469static void
1470mxge_rem_sysctls(mxge_softc_t *sc)
1471{
1472 struct mxge_slice_state *ss;
1473 int slice;
1474
1475 if (sc->slice_sysctl_tree == NULL)
1476 return;
1477
1478 for (slice = 0; slice < sc->num_slices; slice++) {
1479 ss = &sc->ss[slice];
1480 if (ss == NULL || ss->sysctl_tree == NULL)
1481 continue;
1482 sysctl_ctx_free(&ss->sysctl_ctx);
1483 ss->sysctl_tree = NULL;
1484 }
1485 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1486 sc->slice_sysctl_tree = NULL;
1487}
1488
1489static void
1490mxge_add_sysctls(mxge_softc_t *sc)
1491{
1492 struct sysctl_ctx_list *ctx;
1493 struct sysctl_oid_list *children;
1494 mcp_irq_data_t *fw;
1495 struct mxge_slice_state *ss;
1496 int slice;
1497 char slice_num[8];
1498
1499 ctx = device_get_sysctl_ctx(sc->dev);
1500 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1501 fw = sc->ss[0].fw_stats;
1502
1503 /* random information */
1504 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1505 "firmware_version",
1506 CTLFLAG_RD, &sc->fw_version,
1507 0, "firmware version");
1508 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1509 "serial_number",
1510 CTLFLAG_RD, &sc->serial_number_string,
1511 0, "serial number");
1512 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1513 "product_code",
1514 CTLFLAG_RD, &sc->product_code_string,
1515 0, "product_code");
1516 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1517 "pcie_link_width",
1518 CTLFLAG_RD, &sc->link_width,
1519 0, "tx_boundary");
1520 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1521 "tx_boundary",
1522 CTLFLAG_RD, &sc->tx_boundary,
1523 0, "tx_boundary");
1524 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1525 "write_combine",
1526 CTLFLAG_RD, &sc->wc,
1527 0, "write combining PIO?");
1528 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1529 "read_dma_MBs",
1530 CTLFLAG_RD, &sc->read_dma,
1531 0, "DMA Read speed in MB/s");
1532 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1533 "write_dma_MBs",
1534 CTLFLAG_RD, &sc->write_dma,
1535 0, "DMA Write speed in MB/s");
1536 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1537 "read_write_dma_MBs",
1538 CTLFLAG_RD, &sc->read_write_dma,
1539 0, "DMA concurrent Read/Write speed in MB/s");
1540 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1541 "watchdog_resets",
1542 CTLFLAG_RD, &sc->watchdog_resets,
1543 0, "Number of times NIC was reset");
1544
1545
1546 /* performance related tunables */
1547 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1548 "intr_coal_delay",
1549 CTLTYPE_INT|CTLFLAG_RW, sc,
1550 0, mxge_change_intr_coal,
1551 "I", "interrupt coalescing delay in usecs");
1552
1553 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1554 "throttle",
1555 CTLTYPE_INT|CTLFLAG_RW, sc,
1556 0, mxge_change_throttle,
1557 "I", "transmit throttling");
1558
1559 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1560 "flow_control_enabled",
1561 CTLTYPE_INT|CTLFLAG_RW, sc,
1562 0, mxge_change_flow_control,
1563 "I", "interrupt coalescing delay in usecs");
1564
1565 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1566 "deassert_wait",
1567 CTLFLAG_RW, &mxge_deassert_wait,
1568 0, "Wait for IRQ line to go low in ihandler");
1569
1570 /* stats block from firmware is in network byte order.
1571 Need to swap it */
1572 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1573 "link_up",
1574 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1575 0, mxge_handle_be32,
1576 "I", "link up");
1577 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1578 "rdma_tags_available",
1579 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1580 0, mxge_handle_be32,
1581 "I", "rdma_tags_available");
1582 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1583 "dropped_bad_crc32",
1584 CTLTYPE_INT|CTLFLAG_RD,
1585 &fw->dropped_bad_crc32,
1586 0, mxge_handle_be32,
1587 "I", "dropped_bad_crc32");
1588 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1589 "dropped_bad_phy",
1590 CTLTYPE_INT|CTLFLAG_RD,
1591 &fw->dropped_bad_phy,
1592 0, mxge_handle_be32,
1593 "I", "dropped_bad_phy");
1594 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1595 "dropped_link_error_or_filtered",
1596 CTLTYPE_INT|CTLFLAG_RD,
1597 &fw->dropped_link_error_or_filtered,
1598 0, mxge_handle_be32,
1599 "I", "dropped_link_error_or_filtered");
1600 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1601 "dropped_link_overflow",
1602 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1603 0, mxge_handle_be32,
1604 "I", "dropped_link_overflow");
1605 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1606 "dropped_multicast_filtered",
1607 CTLTYPE_INT|CTLFLAG_RD,
1608 &fw->dropped_multicast_filtered,
1609 0, mxge_handle_be32,
1610 "I", "dropped_multicast_filtered");
1611 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1612 "dropped_no_big_buffer",
1613 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1614 0, mxge_handle_be32,
1615 "I", "dropped_no_big_buffer");
1616 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1617 "dropped_no_small_buffer",
1618 CTLTYPE_INT|CTLFLAG_RD,
1619 &fw->dropped_no_small_buffer,
1620 0, mxge_handle_be32,
1621 "I", "dropped_no_small_buffer");
1622 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1623 "dropped_overrun",
1624 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1625 0, mxge_handle_be32,
1626 "I", "dropped_overrun");
1627 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1628 "dropped_pause",
1629 CTLTYPE_INT|CTLFLAG_RD,
1630 &fw->dropped_pause,
1631 0, mxge_handle_be32,
1632 "I", "dropped_pause");
1633 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1634 "dropped_runt",
1635 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1636 0, mxge_handle_be32,
1637 "I", "dropped_runt");
1638
1639 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1640 "dropped_unicast_filtered",
1641 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1642 0, mxge_handle_be32,
1643 "I", "dropped_unicast_filtered");
1644
1645 /* verbose printing? */
1646 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1647 "verbose",
1648 CTLFLAG_RW, &mxge_verbose,
1649 0, "verbose printing");
1650
1651 /* lro */
1652 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1653 "lro_cnt",
1654 CTLTYPE_INT|CTLFLAG_RW, sc,
1655 0, mxge_change_lro,
1656 "I", "number of lro merge queues");
1657
1658
1659 /* add counters exported for debugging from all slices */
1660 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1661 sc->slice_sysctl_tree =
1662 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1663 "slice", CTLFLAG_RD, 0, "");
1664
1665 for (slice = 0; slice < sc->num_slices; slice++) {
1666 ss = &sc->ss[slice];
1667 sysctl_ctx_init(&ss->sysctl_ctx);
1668 ctx = &ss->sysctl_ctx;
1669 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1670 sprintf(slice_num, "%d", slice);
1671 ss->sysctl_tree =
1672 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1673 CTLFLAG_RD, 0, "");
1674 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1675 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1676 "rx_small_cnt",
1677 CTLFLAG_RD, &ss->rx_small.cnt,
1678 0, "rx_small_cnt");
1679 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1680 "rx_big_cnt",
1681 CTLFLAG_RD, &ss->rx_big.cnt,
1682 0, "rx_small_cnt");
1683 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1684 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed,
1685 0, "number of lro merge queues flushed");
1686
1687 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1688 "lro_queued", CTLFLAG_RD, &ss->lro_queued,
1689 0, "number of frames appended to lro merge"
1690 "queues");
1691
1692#ifndef IFNET_BUF_RING
1693 /* only transmit from slice 0 for now */
1694 if (slice > 0)
1695 continue;
1696#endif
1697 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1698 "tx_req",
1699 CTLFLAG_RD, &ss->tx.req,
1700 0, "tx_req");
1701
1702 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1703 "tx_done",
1704 CTLFLAG_RD, &ss->tx.done,
1705 0, "tx_done");
1706 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1707 "tx_pkt_done",
1708 CTLFLAG_RD, &ss->tx.pkt_done,
1709 0, "tx_done");
1710 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1711 "tx_stall",
1712 CTLFLAG_RD, &ss->tx.stall,
1713 0, "tx_stall");
1714 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1715 "tx_wake",
1716 CTLFLAG_RD, &ss->tx.wake,
1717 0, "tx_wake");
1718 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1719 "tx_defrag",
1720 CTLFLAG_RD, &ss->tx.defrag,
1721 0, "tx_defrag");
1722 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1723 "tx_queue_active",
1724 CTLFLAG_RD, &ss->tx.queue_active,
1725 0, "tx_queue_active");
1726 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1727 "tx_activate",
1728 CTLFLAG_RD, &ss->tx.activate,
1729 0, "tx_activate");
1730 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1731 "tx_deactivate",
1732 CTLFLAG_RD, &ss->tx.deactivate,
1733 0, "tx_deactivate");
1734 }
1735}
1736
1737/* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1738 backwards one at a time and handle ring wraps */
1739
1740static inline void
1741mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1742 mcp_kreq_ether_send_t *src, int cnt)
1743{
1744 int idx, starting_slot;
1745 starting_slot = tx->req;
1746 while (cnt > 1) {
1747 cnt--;
1748 idx = (starting_slot + cnt) & tx->mask;
1749 mxge_pio_copy(&tx->lanai[idx],
1750 &src[cnt], sizeof(*src));
1751 wmb();
1752 }
1753}
1754
1755/*
1756 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1757 * at most 32 bytes at a time, so as to avoid involving the software
1758 * pio handler in the nic. We re-write the first segment's flags
1759 * to mark them valid only after writing the entire chain
1760 */
1761
1762static inline void
1763mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1764 int cnt)
1765{
1766 int idx, i;
1767 uint32_t *src_ints;
1768 volatile uint32_t *dst_ints;
1769 mcp_kreq_ether_send_t *srcp;
1770 volatile mcp_kreq_ether_send_t *dstp, *dst;
1771 uint8_t last_flags;
1772
1773 idx = tx->req & tx->mask;
1774
1775 last_flags = src->flags;
1776 src->flags = 0;
1777 wmb();
1778 dst = dstp = &tx->lanai[idx];
1779 srcp = src;
1780
1781 if ((idx + cnt) < tx->mask) {
1782 for (i = 0; i < (cnt - 1); i += 2) {
1783 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1784 wmb(); /* force write every 32 bytes */
1785 srcp += 2;
1786 dstp += 2;
1787 }
1788 } else {
1789 /* submit all but the first request, and ensure
1790 that it is submitted below */
1791 mxge_submit_req_backwards(tx, src, cnt);
1792 i = 0;
1793 }
1794 if (i < cnt) {
1795 /* submit the first request */
1796 mxge_pio_copy(dstp, srcp, sizeof(*src));
1797 wmb(); /* barrier before setting valid flag */
1798 }
1799
1800 /* re-write the last 32-bits with the valid flags */
1801 src->flags = last_flags;
1802 src_ints = (uint32_t *)src;
1803 src_ints+=3;
1804 dst_ints = (volatile uint32_t *)dst;
1805 dst_ints+=3;
1806 *dst_ints = *src_ints;
1807 tx->req += cnt;
1808 wmb();
1809}
1810
1811#if IFCAP_TSO4
1812
1813static void
1814mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1815 int busdma_seg_cnt, int ip_off)
1816{
1817 mxge_tx_ring_t *tx;
1818 mcp_kreq_ether_send_t *req;
1819 bus_dma_segment_t *seg;
1820 struct ip *ip;
1821 struct tcphdr *tcp;
1822 uint32_t low, high_swapped;
1823 int len, seglen, cum_len, cum_len_next;
1824 int next_is_first, chop, cnt, rdma_count, small;
1825 uint16_t pseudo_hdr_offset, cksum_offset, mss;
1826 uint8_t flags, flags_next;
1827 static int once;
1828
1829 mss = m->m_pkthdr.tso_segsz;
1830
1831 /* negative cum_len signifies to the
1832 * send loop that we are still in the
1833 * header portion of the TSO packet.
1834 */
1835
1836 /* ensure we have the ethernet, IP and TCP
1837 header together in the first mbuf, copy
1838 it to a scratch buffer if not */
1839 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
1840 m_copydata(m, 0, ip_off + sizeof (*ip),
1841 ss->scratch);
1842 ip = (struct ip *)(ss->scratch + ip_off);
1843 } else {
1844 ip = (struct ip *)(mtod(m, char *) + ip_off);
1845 }
1846 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2)
1847 + sizeof (*tcp))) {
1848 m_copydata(m, 0, ip_off + (ip->ip_hl << 2)
1849 + sizeof (*tcp), ss->scratch);
1850 ip = (struct ip *)(mtod(m, char *) + ip_off);
1851 }
1852
1853 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2));
1854 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2));
1855
1856 /* TSO implies checksum offload on this hardware */
1857 cksum_offset = ip_off + (ip->ip_hl << 2);
1858 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1859
1860
1861 /* for TSO, pseudo_hdr_offset holds mss.
1862 * The firmware figures out where to put
1863 * the checksum by parsing the header. */
1864 pseudo_hdr_offset = htobe16(mss);
1865
1866 tx = &ss->tx;
1867 req = tx->req_list;
1868 seg = tx->seg_list;
1869 cnt = 0;
1870 rdma_count = 0;
1871 /* "rdma_count" is the number of RDMAs belonging to the
1872 * current packet BEFORE the current send request. For
1873 * non-TSO packets, this is equal to "count".
1874 * For TSO packets, rdma_count needs to be reset
1875 * to 0 after a segment cut.
1876 *
1877 * The rdma_count field of the send request is
1878 * the number of RDMAs of the packet starting at
1879 * that request. For TSO send requests with one ore more cuts
1880 * in the middle, this is the number of RDMAs starting
1881 * after the last cut in the request. All previous
1882 * segments before the last cut implicitly have 1 RDMA.
1883 *
1884 * Since the number of RDMAs is not known beforehand,
1885 * it must be filled-in retroactively - after each
1886 * segmentation cut or at the end of the entire packet.
1887 */
1888
1889 while (busdma_seg_cnt) {
1890 /* Break the busdma segment up into pieces*/
1891 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1892 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1893 len = seg->ds_len;
1894
1895 while (len) {
1896 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1897 seglen = len;
1898 cum_len_next = cum_len + seglen;
1899 (req-rdma_count)->rdma_count = rdma_count + 1;
1900 if (__predict_true(cum_len >= 0)) {
1901 /* payload */
1902 chop = (cum_len_next > mss);
1903 cum_len_next = cum_len_next % mss;
1904 next_is_first = (cum_len_next == 0);
1905 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1906 flags_next |= next_is_first *
1907 MXGEFW_FLAGS_FIRST;
1908 rdma_count |= -(chop | next_is_first);
1909 rdma_count += chop & !next_is_first;
1910 } else if (cum_len_next >= 0) {
1911 /* header ends */
1912 rdma_count = -1;
1913 cum_len_next = 0;
1914 seglen = -cum_len;
1915 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1916 flags_next = MXGEFW_FLAGS_TSO_PLD |
1917 MXGEFW_FLAGS_FIRST |
1918 (small * MXGEFW_FLAGS_SMALL);
1919 }
1920
1921 req->addr_high = high_swapped;
1922 req->addr_low = htobe32(low);
1923 req->pseudo_hdr_offset = pseudo_hdr_offset;
1924 req->pad = 0;
1925 req->rdma_count = 1;
1926 req->length = htobe16(seglen);
1927 req->cksum_offset = cksum_offset;
1928 req->flags = flags | ((cum_len & 1) *
1929 MXGEFW_FLAGS_ALIGN_ODD);
1930 low += seglen;
1931 len -= seglen;
1932 cum_len = cum_len_next;
1933 flags = flags_next;
1934 req++;
1935 cnt++;
1936 rdma_count++;
1937 if (__predict_false(cksum_offset > seglen))
1938 cksum_offset -= seglen;
1939 else
1940 cksum_offset = 0;
1941 if (__predict_false(cnt > tx->max_desc))
1942 goto drop;
1943 }
1944 busdma_seg_cnt--;
1945 seg++;
1946 }
1947 (req-rdma_count)->rdma_count = rdma_count;
1948
1949 do {
1950 req--;
1951 req->flags |= MXGEFW_FLAGS_TSO_LAST;
1952 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1953
1954 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1955 mxge_submit_req(tx, tx->req_list, cnt);
1956#ifdef IFNET_BUF_RING
1957 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
1958 /* tell the NIC to start polling this slice */
1959 *tx->send_go = 1;
1960 tx->queue_active = 1;
1961 tx->activate++;
1962 wmb();
1963 }
1964#endif
1965 return;
1966
1967drop:
1968 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
1969 m_freem(m);
1970 ss->oerrors++;
1971 if (!once) {
1972 printf("tx->max_desc exceeded via TSO!\n");
1973 printf("mss = %d, %ld, %d!\n", mss,
1974 (long)seg - (long)tx->seg_list, tx->max_desc);
1975 once = 1;
1976 }
1977 return;
1978
1979}
1980
1981#endif /* IFCAP_TSO4 */
1982
1983#ifdef MXGE_NEW_VLAN_API
1984/*
1985 * We reproduce the software vlan tag insertion from
1986 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
1987 * vlan tag insertion. We need to advertise this in order to have the
1988 * vlan interface respect our csum offload flags.
1989 */
1990static struct mbuf *
1991mxge_vlan_tag_insert(struct mbuf *m)
1992{
1993 struct ether_vlan_header *evl;
1994
1995 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT);
1996 if (__predict_false(m == NULL))
1997 return NULL;
1998 if (m->m_len < sizeof(*evl)) {
1999 m = m_pullup(m, sizeof(*evl));
2000 if (__predict_false(m == NULL))
2001 return NULL;
2002 }
2003 /*
2004 * Transform the Ethernet header into an Ethernet header
2005 * with 802.1Q encapsulation.
2006 */
2007 evl = mtod(m, struct ether_vlan_header *);
2008 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
2009 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
2010 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
2011 evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
2012 m->m_flags &= ~M_VLANTAG;
2013 return m;
2014}
2015#endif /* MXGE_NEW_VLAN_API */
2016
2017static void
2018mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
2019{
2020 mxge_softc_t *sc;
2021 mcp_kreq_ether_send_t *req;
2022 bus_dma_segment_t *seg;
2023 struct mbuf *m_tmp;
2024 struct ifnet *ifp;
2025 mxge_tx_ring_t *tx;
2026 struct ip *ip;
2027 int cnt, cum_len, err, i, idx, odd_flag, ip_off;
2028 uint16_t pseudo_hdr_offset;
2029 uint8_t flags, cksum_offset;
2030
2031
2032 sc = ss->sc;
2033 ifp = sc->ifp;
2034 tx = &ss->tx;
2035
2036 ip_off = sizeof (struct ether_header);
2037#ifdef MXGE_NEW_VLAN_API
2038 if (m->m_flags & M_VLANTAG) {
2039 m = mxge_vlan_tag_insert(m);
2040 if (__predict_false(m == NULL))
2041 goto drop;
2042 ip_off += ETHER_VLAN_ENCAP_LEN;
2043 }
2044#endif
2045 /* (try to) map the frame for DMA */
2046 idx = tx->req & tx->mask;
2047 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
2048 m, tx->seg_list, &cnt,
2049 BUS_DMA_NOWAIT);
2050 if (__predict_false(err == EFBIG)) {
2051 /* Too many segments in the chain. Try
2052 to defrag */
2053 m_tmp = m_defrag(m, M_NOWAIT);
2054 if (m_tmp == NULL) {
2055 goto drop;
2056 }
2057 ss->tx.defrag++;
2058 m = m_tmp;
2059 err = bus_dmamap_load_mbuf_sg(tx->dmat,
2060 tx->info[idx].map,
2061 m, tx->seg_list, &cnt,
2062 BUS_DMA_NOWAIT);
2063 }
2064 if (__predict_false(err != 0)) {
2065 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
2066 " packet len = %d\n", err, m->m_pkthdr.len);
2067 goto drop;
2068 }
2069 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2070 BUS_DMASYNC_PREWRITE);
2071 tx->info[idx].m = m;
2072
2073#if IFCAP_TSO4
2074 /* TSO is different enough, we handle it in another routine */
2075 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2076 mxge_encap_tso(ss, m, cnt, ip_off);
2077 return;
2078 }
2079#endif
2080
2081 req = tx->req_list;
2082 cksum_offset = 0;
2083 pseudo_hdr_offset = 0;
2084 flags = MXGEFW_FLAGS_NO_TSO;
2085
2086 /* checksum offloading? */
2087 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) {
2088 /* ensure ip header is in first mbuf, copy
2089 it to a scratch buffer if not */
2090 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
2091 m_copydata(m, 0, ip_off + sizeof (*ip),
2092 ss->scratch);
2093 ip = (struct ip *)(ss->scratch + ip_off);
2094 } else {
2095 ip = (struct ip *)(mtod(m, char *) + ip_off);
2096 }
2097 cksum_offset = ip_off + (ip->ip_hl << 2);
2098 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2099 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2100 req->cksum_offset = cksum_offset;
2101 flags |= MXGEFW_FLAGS_CKSUM;
2102 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2103 } else {
2104 odd_flag = 0;
2105 }
2106 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2107 flags |= MXGEFW_FLAGS_SMALL;
2108
2109 /* convert segments into a request list */
2110 cum_len = 0;
2111 seg = tx->seg_list;
2112 req->flags = MXGEFW_FLAGS_FIRST;
2113 for (i = 0; i < cnt; i++) {
2114 req->addr_low =
2115 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2116 req->addr_high =
2117 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2118 req->length = htobe16(seg->ds_len);
2119 req->cksum_offset = cksum_offset;
2120 if (cksum_offset > seg->ds_len)
2121 cksum_offset -= seg->ds_len;
2122 else
2123 cksum_offset = 0;
2124 req->pseudo_hdr_offset = pseudo_hdr_offset;
2125 req->pad = 0; /* complete solid 16-byte block */
2126 req->rdma_count = 1;
2127 req->flags |= flags | ((cum_len & 1) * odd_flag);
2128 cum_len += seg->ds_len;
2129 seg++;
2130 req++;
2131 req->flags = 0;
2132 }
2133 req--;
2134 /* pad runts to 60 bytes */
2135 if (cum_len < 60) {
2136 req++;
2137 req->addr_low =
2138 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2139 req->addr_high =
2140 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2141 req->length = htobe16(60 - cum_len);
2142 req->cksum_offset = 0;
2143 req->pseudo_hdr_offset = pseudo_hdr_offset;
2144 req->pad = 0; /* complete solid 16-byte block */
2145 req->rdma_count = 1;
2146 req->flags |= flags | ((cum_len & 1) * odd_flag);
2147 cnt++;
2148 }
2149
2150 tx->req_list[0].rdma_count = cnt;
2151#if 0
2152 /* print what the firmware will see */
2153 for (i = 0; i < cnt; i++) {
2154 printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2155 "cso:%d, flags:0x%x, rdma:%d\n",
2156 i, (int)ntohl(tx->req_list[i].addr_high),
2157 (int)ntohl(tx->req_list[i].addr_low),
2158 (int)ntohs(tx->req_list[i].length),
2159 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2160 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2161 tx->req_list[i].rdma_count);
2162 }
2163 printf("--------------\n");
2164#endif
2165 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2166 mxge_submit_req(tx, tx->req_list, cnt);
2167#ifdef IFNET_BUF_RING
2168 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2169 /* tell the NIC to start polling this slice */
2170 *tx->send_go = 1;
2171 tx->queue_active = 1;
2172 tx->activate++;
2173 wmb();
2174 }
2175#endif
2176 return;
2177
2178drop:
2179 m_freem(m);
2180 ss->oerrors++;
2181 return;
2182}
2183
2184#ifdef IFNET_BUF_RING
2185static void
2186mxge_qflush(struct ifnet *ifp)
2187{
2188 mxge_softc_t *sc = ifp->if_softc;
2189 mxge_tx_ring_t *tx;
2190 struct mbuf *m;
2191 int slice;
2192
2193 for (slice = 0; slice < sc->num_slices; slice++) {
2194 tx = &sc->ss[slice].tx;
2195 mtx_lock(&tx->mtx);
2196 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2197 m_freem(m);
2198 mtx_unlock(&tx->mtx);
2199 }
2200 if_qflush(ifp);
2201}
2202
2203static inline void
2204mxge_start_locked(struct mxge_slice_state *ss)
2205{
2206 mxge_softc_t *sc;
2207 struct mbuf *m;
2208 struct ifnet *ifp;
2209 mxge_tx_ring_t *tx;
2210
2211 sc = ss->sc;
2212 ifp = sc->ifp;
2213 tx = &ss->tx;
2214
2215 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2216 m = drbr_dequeue(ifp, tx->br);
2217 if (m == NULL) {
2218 return;
2219 }
2220 /* let BPF see it */
2221 BPF_MTAP(ifp, m);
2222
2223 /* give it to the nic */
2224 mxge_encap(ss, m);
2225 }
2226 /* ran out of transmit slots */
2227 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0)
2228 && (!drbr_empty(ifp, tx->br))) {
2229 ss->if_drv_flags |= IFF_DRV_OACTIVE;
2230 tx->stall++;
2231 }
2232}
2233
2234static int
2235mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2236{
2237 mxge_softc_t *sc;
2238 struct ifnet *ifp;
2239 mxge_tx_ring_t *tx;
2240 int err;
2241
2242 sc = ss->sc;
2243 ifp = sc->ifp;
2244 tx = &ss->tx;
2245
2246 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
2247 IFF_DRV_RUNNING) {
2248 err = drbr_enqueue(ifp, tx->br, m);
2249 return (err);
2250 }
2251
2252 if (drbr_empty(ifp, tx->br) &&
2252 if (!drbr_needs_enqueue(ifp, tx->br) &&
2253 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2254 /* let BPF see it */
2255 BPF_MTAP(ifp, m);
2256 /* give it to the nic */
2257 mxge_encap(ss, m);
2258 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2259 return (err);
2260 }
2261 if (!drbr_empty(ifp, tx->br))
2262 mxge_start_locked(ss);
2263 return (0);
2264}
2265
2266static int
2267mxge_transmit(struct ifnet *ifp, struct mbuf *m)
2268{
2269 mxge_softc_t *sc = ifp->if_softc;
2270 struct mxge_slice_state *ss;
2271 mxge_tx_ring_t *tx;
2272 int err = 0;
2273 int slice;
2274
2275 slice = m->m_pkthdr.flowid;
2276 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2277
2278 ss = &sc->ss[slice];
2279 tx = &ss->tx;
2280
2281 if (mtx_trylock(&tx->mtx)) {
2282 err = mxge_transmit_locked(ss, m);
2283 mtx_unlock(&tx->mtx);
2284 } else {
2285 err = drbr_enqueue(ifp, tx->br, m);
2286 }
2287
2288 return (err);
2289}
2290
2291#else
2292
2293static inline void
2294mxge_start_locked(struct mxge_slice_state *ss)
2295{
2296 mxge_softc_t *sc;
2297 struct mbuf *m;
2298 struct ifnet *ifp;
2299 mxge_tx_ring_t *tx;
2300
2301 sc = ss->sc;
2302 ifp = sc->ifp;
2303 tx = &ss->tx;
2304 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2305 IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
2306 if (m == NULL) {
2307 return;
2308 }
2309 /* let BPF see it */
2310 BPF_MTAP(ifp, m);
2311
2312 /* give it to the nic */
2313 mxge_encap(ss, m);
2314 }
2315 /* ran out of transmit slots */
2316 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
2317 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2318 tx->stall++;
2319 }
2320}
2321#endif
2322static void
2323mxge_start(struct ifnet *ifp)
2324{
2325 mxge_softc_t *sc = ifp->if_softc;
2326 struct mxge_slice_state *ss;
2327
2328 /* only use the first slice for now */
2329 ss = &sc->ss[0];
2330 mtx_lock(&ss->tx.mtx);
2331 mxge_start_locked(ss);
2332 mtx_unlock(&ss->tx.mtx);
2333}
2334
2335/*
2336 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2337 * at most 32 bytes at a time, so as to avoid involving the software
2338 * pio handler in the nic. We re-write the first segment's low
2339 * DMA address to mark it valid only after we write the entire chunk
2340 * in a burst
2341 */
2342static inline void
2343mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2344 mcp_kreq_ether_recv_t *src)
2345{
2346 uint32_t low;
2347
2348 low = src->addr_low;
2349 src->addr_low = 0xffffffff;
2350 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2351 wmb();
2352 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2353 wmb();
2354 src->addr_low = low;
2355 dst->addr_low = low;
2356 wmb();
2357}
2358
2359static int
2360mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2361{
2362 bus_dma_segment_t seg;
2363 struct mbuf *m;
2364 mxge_rx_ring_t *rx = &ss->rx_small;
2365 int cnt, err;
2366
2367 m = m_gethdr(M_DONTWAIT, MT_DATA);
2368 if (m == NULL) {
2369 rx->alloc_fail++;
2370 err = ENOBUFS;
2371 goto done;
2372 }
2373 m->m_len = MHLEN;
2374 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2375 &seg, &cnt, BUS_DMA_NOWAIT);
2376 if (err != 0) {
2377 m_free(m);
2378 goto done;
2379 }
2380 rx->info[idx].m = m;
2381 rx->shadow[idx].addr_low =
2382 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2383 rx->shadow[idx].addr_high =
2384 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2385
2386done:
2387 if ((idx & 7) == 7)
2388 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2389 return err;
2390}
2391
2392static int
2393mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2394{
2395 bus_dma_segment_t seg[3];
2396 struct mbuf *m;
2397 mxge_rx_ring_t *rx = &ss->rx_big;
2398 int cnt, err, i;
2399
2400 if (rx->cl_size == MCLBYTES)
2401 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
2402 else
2403 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2404 if (m == NULL) {
2405 rx->alloc_fail++;
2406 err = ENOBUFS;
2407 goto done;
2408 }
2409 m->m_len = rx->mlen;
2410 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2411 seg, &cnt, BUS_DMA_NOWAIT);
2412 if (err != 0) {
2413 m_free(m);
2414 goto done;
2415 }
2416 rx->info[idx].m = m;
2417 rx->shadow[idx].addr_low =
2418 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2419 rx->shadow[idx].addr_high =
2420 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2421
2422#if MXGE_VIRT_JUMBOS
2423 for (i = 1; i < cnt; i++) {
2424 rx->shadow[idx + i].addr_low =
2425 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2426 rx->shadow[idx + i].addr_high =
2427 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2428 }
2429#endif
2430
2431done:
2432 for (i = 0; i < rx->nbufs; i++) {
2433 if ((idx & 7) == 7) {
2434 mxge_submit_8rx(&rx->lanai[idx - 7],
2435 &rx->shadow[idx - 7]);
2436 }
2437 idx++;
2438 }
2439 return err;
2440}
2441
2442/*
2443 * Myri10GE hardware checksums are not valid if the sender
2444 * padded the frame with non-zero padding. This is because
2445 * the firmware just does a simple 16-bit 1s complement
2446 * checksum across the entire frame, excluding the first 14
2447 * bytes. It is best to simply to check the checksum and
2448 * tell the stack about it only if the checksum is good
2449 */
2450
2451static inline uint16_t
2452mxge_rx_csum(struct mbuf *m, int csum)
2453{
2454 struct ether_header *eh;
2455 struct ip *ip;
2456 uint16_t c;
2457
2458 eh = mtod(m, struct ether_header *);
2459
2460 /* only deal with IPv4 TCP & UDP for now */
2461 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP)))
2462 return 1;
2463 ip = (struct ip *)(eh + 1);
2464 if (__predict_false(ip->ip_p != IPPROTO_TCP &&
2465 ip->ip_p != IPPROTO_UDP))
2466 return 1;
2467#ifdef INET
2468 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2469 htonl(ntohs(csum) + ntohs(ip->ip_len) +
2470 - (ip->ip_hl << 2) + ip->ip_p));
2471#else
2472 c = 1;
2473#endif
2474 c ^= 0xffff;
2475 return (c);
2476}
2477
2478static void
2479mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2480{
2481 struct ether_vlan_header *evl;
2482 struct ether_header *eh;
2483 uint32_t partial;
2484
2485 evl = mtod(m, struct ether_vlan_header *);
2486 eh = mtod(m, struct ether_header *);
2487
2488 /*
2489 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2490 * after what the firmware thought was the end of the ethernet
2491 * header.
2492 */
2493
2494 /* put checksum into host byte order */
2495 *csum = ntohs(*csum);
2496 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2497 (*csum) += ~partial;
2498 (*csum) += ((*csum) < ~partial);
2499 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2500 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2501
2502 /* restore checksum to network byte order;
2503 later consumers expect this */
2504 *csum = htons(*csum);
2505
2506 /* save the tag */
2507#ifdef MXGE_NEW_VLAN_API
2508 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2509#else
2510 {
2511 struct m_tag *mtag;
2512 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2513 M_NOWAIT);
2514 if (mtag == NULL)
2515 return;
2516 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2517 m_tag_prepend(m, mtag);
2518 }
2519
2520#endif
2521 m->m_flags |= M_VLANTAG;
2522
2523 /*
2524 * Remove the 802.1q header by copying the Ethernet
2525 * addresses over it and adjusting the beginning of
2526 * the data in the mbuf. The encapsulated Ethernet
2527 * type field is already in place.
2528 */
2529 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2530 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2531 m_adj(m, ETHER_VLAN_ENCAP_LEN);
2532}
2533
2534
2535static inline void
2536mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2537{
2538 mxge_softc_t *sc;
2539 struct ifnet *ifp;
2540 struct mbuf *m;
2541 struct ether_header *eh;
2542 mxge_rx_ring_t *rx;
2543 bus_dmamap_t old_map;
2544 int idx;
2545 uint16_t tcpudp_csum;
2546
2547 sc = ss->sc;
2548 ifp = sc->ifp;
2549 rx = &ss->rx_big;
2550 idx = rx->cnt & rx->mask;
2551 rx->cnt += rx->nbufs;
2552 /* save a pointer to the received mbuf */
2553 m = rx->info[idx].m;
2554 /* try to replace the received mbuf */
2555 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2556 /* drop the frame -- the old mbuf is re-cycled */
2557 ifp->if_ierrors++;
2558 return;
2559 }
2560
2561 /* unmap the received buffer */
2562 old_map = rx->info[idx].map;
2563 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2564 bus_dmamap_unload(rx->dmat, old_map);
2565
2566 /* swap the bus_dmamap_t's */
2567 rx->info[idx].map = rx->extra_map;
2568 rx->extra_map = old_map;
2569
2570 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2571 * aligned */
2572 m->m_data += MXGEFW_PAD;
2573
2574 m->m_pkthdr.rcvif = ifp;
2575 m->m_len = m->m_pkthdr.len = len;
2576 ss->ipackets++;
2577 eh = mtod(m, struct ether_header *);
2578 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2579 mxge_vlan_tag_remove(m, &csum);
2580 }
2581 /* if the checksum is valid, mark it in the mbuf header */
2582 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2583 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2584 return;
2585 /* otherwise, it was a UDP frame, or a TCP frame which
2586 we could not do LRO on. Tell the stack that the
2587 checksum is good */
2588 m->m_pkthdr.csum_data = 0xffff;
2589 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2590 }
2591 /* flowid only valid if RSS hashing is enabled */
2592 if (sc->num_slices > 1) {
2593 m->m_pkthdr.flowid = (ss - sc->ss);
2594 m->m_flags |= M_FLOWID;
2595 }
2596 /* pass the frame up the stack */
2597 (*ifp->if_input)(ifp, m);
2598}
2599
2600static inline void
2601mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2602{
2603 mxge_softc_t *sc;
2604 struct ifnet *ifp;
2605 struct ether_header *eh;
2606 struct mbuf *m;
2607 mxge_rx_ring_t *rx;
2608 bus_dmamap_t old_map;
2609 int idx;
2610 uint16_t tcpudp_csum;
2611
2612 sc = ss->sc;
2613 ifp = sc->ifp;
2614 rx = &ss->rx_small;
2615 idx = rx->cnt & rx->mask;
2616 rx->cnt++;
2617 /* save a pointer to the received mbuf */
2618 m = rx->info[idx].m;
2619 /* try to replace the received mbuf */
2620 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2621 /* drop the frame -- the old mbuf is re-cycled */
2622 ifp->if_ierrors++;
2623 return;
2624 }
2625
2626 /* unmap the received buffer */
2627 old_map = rx->info[idx].map;
2628 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2629 bus_dmamap_unload(rx->dmat, old_map);
2630
2631 /* swap the bus_dmamap_t's */
2632 rx->info[idx].map = rx->extra_map;
2633 rx->extra_map = old_map;
2634
2635 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2636 * aligned */
2637 m->m_data += MXGEFW_PAD;
2638
2639 m->m_pkthdr.rcvif = ifp;
2640 m->m_len = m->m_pkthdr.len = len;
2641 ss->ipackets++;
2642 eh = mtod(m, struct ether_header *);
2643 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2644 mxge_vlan_tag_remove(m, &csum);
2645 }
2646 /* if the checksum is valid, mark it in the mbuf header */
2647 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2648 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2649 return;
2650 /* otherwise, it was a UDP frame, or a TCP frame which
2651 we could not do LRO on. Tell the stack that the
2652 checksum is good */
2653 m->m_pkthdr.csum_data = 0xffff;
2654 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2655 }
2656 /* flowid only valid if RSS hashing is enabled */
2657 if (sc->num_slices > 1) {
2658 m->m_pkthdr.flowid = (ss - sc->ss);
2659 m->m_flags |= M_FLOWID;
2660 }
2661 /* pass the frame up the stack */
2662 (*ifp->if_input)(ifp, m);
2663}
2664
2665static inline void
2666mxge_clean_rx_done(struct mxge_slice_state *ss)
2667{
2668 mxge_rx_done_t *rx_done = &ss->rx_done;
2669 int limit = 0;
2670 uint16_t length;
2671 uint16_t checksum;
2672
2673
2674 while (rx_done->entry[rx_done->idx].length != 0) {
2675 length = ntohs(rx_done->entry[rx_done->idx].length);
2676 rx_done->entry[rx_done->idx].length = 0;
2677 checksum = rx_done->entry[rx_done->idx].checksum;
2678 if (length <= (MHLEN - MXGEFW_PAD))
2679 mxge_rx_done_small(ss, length, checksum);
2680 else
2681 mxge_rx_done_big(ss, length, checksum);
2682 rx_done->cnt++;
2683 rx_done->idx = rx_done->cnt & rx_done->mask;
2684
2685 /* limit potential for livelock */
2686 if (__predict_false(++limit > rx_done->mask / 2))
2687 break;
2688 }
2689#ifdef INET
2690 while (!SLIST_EMPTY(&ss->lro_active)) {
2691 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active);
2692 SLIST_REMOVE_HEAD(&ss->lro_active, next);
2693 mxge_lro_flush(ss, lro);
2694 }
2695#endif
2696}
2697
2698
2699static inline void
2700mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2701{
2702 struct ifnet *ifp;
2703 mxge_tx_ring_t *tx;
2704 struct mbuf *m;
2705 bus_dmamap_t map;
2706 int idx;
2707 int *flags;
2708
2709 tx = &ss->tx;
2710 ifp = ss->sc->ifp;
2711 while (tx->pkt_done != mcp_idx) {
2712 idx = tx->done & tx->mask;
2713 tx->done++;
2714 m = tx->info[idx].m;
2715 /* mbuf and DMA map only attached to the first
2716 segment per-mbuf */
2717 if (m != NULL) {
2718 ss->obytes += m->m_pkthdr.len;
2719 if (m->m_flags & M_MCAST)
2720 ss->omcasts++;
2721 ss->opackets++;
2722 tx->info[idx].m = NULL;
2723 map = tx->info[idx].map;
2724 bus_dmamap_unload(tx->dmat, map);
2725 m_freem(m);
2726 }
2727 if (tx->info[idx].flag) {
2728 tx->info[idx].flag = 0;
2729 tx->pkt_done++;
2730 }
2731 }
2732
2733 /* If we have space, clear IFF_OACTIVE to tell the stack that
2734 its OK to send packets */
2735#ifdef IFNET_BUF_RING
2736 flags = &ss->if_drv_flags;
2737#else
2738 flags = &ifp->if_drv_flags;
2739#endif
2740 mtx_lock(&ss->tx.mtx);
2741 if ((*flags) & IFF_DRV_OACTIVE &&
2742 tx->req - tx->done < (tx->mask + 1)/4) {
2743 *(flags) &= ~IFF_DRV_OACTIVE;
2744 ss->tx.wake++;
2745 mxge_start_locked(ss);
2746 }
2747#ifdef IFNET_BUF_RING
2748 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2749 /* let the NIC stop polling this queue, since there
2750 * are no more transmits pending */
2751 if (tx->req == tx->done) {
2752 *tx->send_stop = 1;
2753 tx->queue_active = 0;
2754 tx->deactivate++;
2755 wmb();
2756 }
2757 }
2758#endif
2759 mtx_unlock(&ss->tx.mtx);
2760
2761}
2762
2763static struct mxge_media_type mxge_xfp_media_types[] =
2764{
2765 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2766 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2767 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2768 {0, (1 << 5), "10GBASE-ER"},
2769 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2770 {0, (1 << 3), "10GBASE-SW"},
2771 {0, (1 << 2), "10GBASE-LW"},
2772 {0, (1 << 1), "10GBASE-EW"},
2773 {0, (1 << 0), "Reserved"}
2774};
2775static struct mxge_media_type mxge_sfp_media_types[] =
2776{
2777 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"},
2778 {0, (1 << 7), "Reserved"},
2779 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2780 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2781 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}
2782};
2783
2784static void
2785mxge_set_media(mxge_softc_t *sc, int type)
2786{
2787 sc->media_flags |= type;
2788 ifmedia_add(&sc->media, sc->media_flags, 0, NULL);
2789 ifmedia_set(&sc->media, sc->media_flags);
2790}
2791
2792
2793/*
2794 * Determine the media type for a NIC. Some XFPs will identify
2795 * themselves only when their link is up, so this is initiated via a
2796 * link up interrupt. However, this can potentially take up to
2797 * several milliseconds, so it is run via the watchdog routine, rather
2798 * than in the interrupt handler itself. This need only be done
2799 * once, not each time the link is up.
2800 */
2801static void
2802mxge_media_probe(mxge_softc_t *sc)
2803{
2804 mxge_cmd_t cmd;
2805 char *cage_type;
2806 char *ptr;
2807 struct mxge_media_type *mxge_media_types = NULL;
2808 int i, err, ms, mxge_media_type_entries;
2809 uint32_t byte;
2810
2811 sc->need_media_probe = 0;
2812
2813 /* if we've already set a media type, we're done */
2814 if (sc->media_flags != (IFM_ETHER | IFM_AUTO))
2815 return;
2816
2817 /*
2818 * parse the product code to deterimine the interface type
2819 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2820 * after the 3rd dash in the driver's cached copy of the
2821 * EEPROM's product code string.
2822 */
2823 ptr = sc->product_code_string;
2824 if (ptr == NULL) {
2825 device_printf(sc->dev, "Missing product code\n");
2826 }
2827
2828 for (i = 0; i < 3; i++, ptr++) {
2829 ptr = index(ptr, '-');
2830 if (ptr == NULL) {
2831 device_printf(sc->dev,
2832 "only %d dashes in PC?!?\n", i);
2833 return;
2834 }
2835 }
2836 if (*ptr == 'C') {
2837 /* -C is CX4 */
2838 mxge_set_media(sc, IFM_10G_CX4);
2839 return;
2840 }
2841 else if (*ptr == 'Q') {
2842 /* -Q is Quad Ribbon Fiber */
2843 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2844 /* FreeBSD has no media type for Quad ribbon fiber */
2845 return;
2846 }
2847
2848 if (*ptr == 'R') {
2849 /* -R is XFP */
2850 mxge_media_types = mxge_xfp_media_types;
2851 mxge_media_type_entries =
2852 sizeof (mxge_xfp_media_types) /
2853 sizeof (mxge_xfp_media_types[0]);
2854 byte = MXGE_XFP_COMPLIANCE_BYTE;
2855 cage_type = "XFP";
2856 }
2857
2858 if (*ptr == 'S' || *(ptr +1) == 'S') {
2859 /* -S or -2S is SFP+ */
2860 mxge_media_types = mxge_sfp_media_types;
2861 mxge_media_type_entries =
2862 sizeof (mxge_sfp_media_types) /
2863 sizeof (mxge_sfp_media_types[0]);
2864 cage_type = "SFP+";
2865 byte = 3;
2866 }
2867
2868 if (mxge_media_types == NULL) {
2869 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2870 return;
2871 }
2872
2873 /*
2874 * At this point we know the NIC has an XFP cage, so now we
2875 * try to determine what is in the cage by using the
2876 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2877 * register. We read just one byte, which may take over
2878 * a millisecond
2879 */
2880
2881 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
2882 cmd.data1 = byte;
2883 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
2884 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
2885 device_printf(sc->dev, "failed to read XFP\n");
2886 }
2887 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
2888 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
2889 }
2890 if (err != MXGEFW_CMD_OK) {
2891 return;
2892 }
2893
2894 /* now we wait for the data to be cached */
2895 cmd.data0 = byte;
2896 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2897 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
2898 DELAY(1000);
2899 cmd.data0 = byte;
2900 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2901 }
2902 if (err != MXGEFW_CMD_OK) {
2903 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
2904 cage_type, err, ms);
2905 return;
2906 }
2907
2908 if (cmd.data0 == mxge_media_types[0].bitmask) {
2909 if (mxge_verbose)
2910 device_printf(sc->dev, "%s:%s\n", cage_type,
2911 mxge_media_types[0].name);
2912 mxge_set_media(sc, mxge_media_types[0].flag);
2913 return;
2914 }
2915 for (i = 1; i < mxge_media_type_entries; i++) {
2916 if (cmd.data0 & mxge_media_types[i].bitmask) {
2917 if (mxge_verbose)
2918 device_printf(sc->dev, "%s:%s\n",
2919 cage_type,
2920 mxge_media_types[i].name);
2921
2922 mxge_set_media(sc, mxge_media_types[i].flag);
2923 return;
2924 }
2925 }
2926 device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type,
2927 cmd.data0);
2928
2929 return;
2930}
2931
2932static void
2933mxge_intr(void *arg)
2934{
2935 struct mxge_slice_state *ss = arg;
2936 mxge_softc_t *sc = ss->sc;
2937 mcp_irq_data_t *stats = ss->fw_stats;
2938 mxge_tx_ring_t *tx = &ss->tx;
2939 mxge_rx_done_t *rx_done = &ss->rx_done;
2940 uint32_t send_done_count;
2941 uint8_t valid;
2942
2943
2944#ifndef IFNET_BUF_RING
2945 /* an interrupt on a non-zero slice is implicitly valid
2946 since MSI-X irqs are not shared */
2947 if (ss != sc->ss) {
2948 mxge_clean_rx_done(ss);
2949 *ss->irq_claim = be32toh(3);
2950 return;
2951 }
2952#endif
2953
2954 /* make sure the DMA has finished */
2955 if (!stats->valid) {
2956 return;
2957 }
2958 valid = stats->valid;
2959
2960 if (sc->legacy_irq) {
2961 /* lower legacy IRQ */
2962 *sc->irq_deassert = 0;
2963 if (!mxge_deassert_wait)
2964 /* don't wait for conf. that irq is low */
2965 stats->valid = 0;
2966 } else {
2967 stats->valid = 0;
2968 }
2969
2970 /* loop while waiting for legacy irq deassertion */
2971 do {
2972 /* check for transmit completes and receives */
2973 send_done_count = be32toh(stats->send_done_count);
2974 while ((send_done_count != tx->pkt_done) ||
2975 (rx_done->entry[rx_done->idx].length != 0)) {
2976 if (send_done_count != tx->pkt_done)
2977 mxge_tx_done(ss, (int)send_done_count);
2978 mxge_clean_rx_done(ss);
2979 send_done_count = be32toh(stats->send_done_count);
2980 }
2981 if (sc->legacy_irq && mxge_deassert_wait)
2982 wmb();
2983 } while (*((volatile uint8_t *) &stats->valid));
2984
2985 /* fw link & error stats meaningful only on the first slice */
2986 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
2987 if (sc->link_state != stats->link_up) {
2988 sc->link_state = stats->link_up;
2989 if (sc->link_state) {
2990 if_link_state_change(sc->ifp, LINK_STATE_UP);
2991 if (mxge_verbose)
2992 device_printf(sc->dev, "link up\n");
2993 } else {
2994 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
2995 if (mxge_verbose)
2996 device_printf(sc->dev, "link down\n");
2997 }
2998 sc->need_media_probe = 1;
2999 }
3000 if (sc->rdma_tags_available !=
3001 be32toh(stats->rdma_tags_available)) {
3002 sc->rdma_tags_available =
3003 be32toh(stats->rdma_tags_available);
3004 device_printf(sc->dev, "RDMA timed out! %d tags "
3005 "left\n", sc->rdma_tags_available);
3006 }
3007
3008 if (stats->link_down) {
3009 sc->down_cnt += stats->link_down;
3010 sc->link_state = 0;
3011 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3012 }
3013 }
3014
3015 /* check to see if we have rx token to pass back */
3016 if (valid & 0x1)
3017 *ss->irq_claim = be32toh(3);
3018 *(ss->irq_claim + 1) = be32toh(3);
3019}
3020
3021static void
3022mxge_init(void *arg)
3023{
3024}
3025
3026
3027
3028static void
3029mxge_free_slice_mbufs(struct mxge_slice_state *ss)
3030{
3031 struct lro_entry *lro_entry;
3032 int i;
3033
3034 while (!SLIST_EMPTY(&ss->lro_free)) {
3035 lro_entry = SLIST_FIRST(&ss->lro_free);
3036 SLIST_REMOVE_HEAD(&ss->lro_free, next);
3037 free(lro_entry, M_DEVBUF);
3038 }
3039
3040 for (i = 0; i <= ss->rx_big.mask; i++) {
3041 if (ss->rx_big.info[i].m == NULL)
3042 continue;
3043 bus_dmamap_unload(ss->rx_big.dmat,
3044 ss->rx_big.info[i].map);
3045 m_freem(ss->rx_big.info[i].m);
3046 ss->rx_big.info[i].m = NULL;
3047 }
3048
3049 for (i = 0; i <= ss->rx_small.mask; i++) {
3050 if (ss->rx_small.info[i].m == NULL)
3051 continue;
3052 bus_dmamap_unload(ss->rx_small.dmat,
3053 ss->rx_small.info[i].map);
3054 m_freem(ss->rx_small.info[i].m);
3055 ss->rx_small.info[i].m = NULL;
3056 }
3057
3058 /* transmit ring used only on the first slice */
3059 if (ss->tx.info == NULL)
3060 return;
3061
3062 for (i = 0; i <= ss->tx.mask; i++) {
3063 ss->tx.info[i].flag = 0;
3064 if (ss->tx.info[i].m == NULL)
3065 continue;
3066 bus_dmamap_unload(ss->tx.dmat,
3067 ss->tx.info[i].map);
3068 m_freem(ss->tx.info[i].m);
3069 ss->tx.info[i].m = NULL;
3070 }
3071}
3072
3073static void
3074mxge_free_mbufs(mxge_softc_t *sc)
3075{
3076 int slice;
3077
3078 for (slice = 0; slice < sc->num_slices; slice++)
3079 mxge_free_slice_mbufs(&sc->ss[slice]);
3080}
3081
3082static void
3083mxge_free_slice_rings(struct mxge_slice_state *ss)
3084{
3085 int i;
3086
3087
3088 if (ss->rx_done.entry != NULL)
3089 mxge_dma_free(&ss->rx_done.dma);
3090 ss->rx_done.entry = NULL;
3091
3092 if (ss->tx.req_bytes != NULL)
3093 free(ss->tx.req_bytes, M_DEVBUF);
3094 ss->tx.req_bytes = NULL;
3095
3096 if (ss->tx.seg_list != NULL)
3097 free(ss->tx.seg_list, M_DEVBUF);
3098 ss->tx.seg_list = NULL;
3099
3100 if (ss->rx_small.shadow != NULL)
3101 free(ss->rx_small.shadow, M_DEVBUF);
3102 ss->rx_small.shadow = NULL;
3103
3104 if (ss->rx_big.shadow != NULL)
3105 free(ss->rx_big.shadow, M_DEVBUF);
3106 ss->rx_big.shadow = NULL;
3107
3108 if (ss->tx.info != NULL) {
3109 if (ss->tx.dmat != NULL) {
3110 for (i = 0; i <= ss->tx.mask; i++) {
3111 bus_dmamap_destroy(ss->tx.dmat,
3112 ss->tx.info[i].map);
3113 }
3114 bus_dma_tag_destroy(ss->tx.dmat);
3115 }
3116 free(ss->tx.info, M_DEVBUF);
3117 }
3118 ss->tx.info = NULL;
3119
3120 if (ss->rx_small.info != NULL) {
3121 if (ss->rx_small.dmat != NULL) {
3122 for (i = 0; i <= ss->rx_small.mask; i++) {
3123 bus_dmamap_destroy(ss->rx_small.dmat,
3124 ss->rx_small.info[i].map);
3125 }
3126 bus_dmamap_destroy(ss->rx_small.dmat,
3127 ss->rx_small.extra_map);
3128 bus_dma_tag_destroy(ss->rx_small.dmat);
3129 }
3130 free(ss->rx_small.info, M_DEVBUF);
3131 }
3132 ss->rx_small.info = NULL;
3133
3134 if (ss->rx_big.info != NULL) {
3135 if (ss->rx_big.dmat != NULL) {
3136 for (i = 0; i <= ss->rx_big.mask; i++) {
3137 bus_dmamap_destroy(ss->rx_big.dmat,
3138 ss->rx_big.info[i].map);
3139 }
3140 bus_dmamap_destroy(ss->rx_big.dmat,
3141 ss->rx_big.extra_map);
3142 bus_dma_tag_destroy(ss->rx_big.dmat);
3143 }
3144 free(ss->rx_big.info, M_DEVBUF);
3145 }
3146 ss->rx_big.info = NULL;
3147}
3148
3149static void
3150mxge_free_rings(mxge_softc_t *sc)
3151{
3152 int slice;
3153
3154 for (slice = 0; slice < sc->num_slices; slice++)
3155 mxge_free_slice_rings(&sc->ss[slice]);
3156}
3157
3158static int
3159mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3160 int tx_ring_entries)
3161{
3162 mxge_softc_t *sc = ss->sc;
3163 size_t bytes;
3164 int err, i;
3165
3166 err = ENOMEM;
3167
3168 /* allocate per-slice receive resources */
3169
3170 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3171 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3172
3173 /* allocate the rx shadow rings */
3174 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
3175 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3176 if (ss->rx_small.shadow == NULL)
3177 return err;
3178
3179 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
3180 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3181 if (ss->rx_big.shadow == NULL)
3182 return err;
3183
3184 /* allocate the rx host info rings */
3185 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
3186 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3187 if (ss->rx_small.info == NULL)
3188 return err;
3189
3190 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
3191 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3192 if (ss->rx_big.info == NULL)
3193 return err;
3194
3195 /* allocate the rx busdma resources */
3196 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3197 1, /* alignment */
3198 4096, /* boundary */
3199 BUS_SPACE_MAXADDR, /* low */
3200 BUS_SPACE_MAXADDR, /* high */
3201 NULL, NULL, /* filter */
3202 MHLEN, /* maxsize */
3203 1, /* num segs */
3204 MHLEN, /* maxsegsize */
3205 BUS_DMA_ALLOCNOW, /* flags */
3206 NULL, NULL, /* lock */
3207 &ss->rx_small.dmat); /* tag */
3208 if (err != 0) {
3209 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3210 err);
3211 return err;
3212 }
3213
3214 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3215 1, /* alignment */
3216#if MXGE_VIRT_JUMBOS
3217 4096, /* boundary */
3218#else
3219 0, /* boundary */
3220#endif
3221 BUS_SPACE_MAXADDR, /* low */
3222 BUS_SPACE_MAXADDR, /* high */
3223 NULL, NULL, /* filter */
3224 3*4096, /* maxsize */
3225#if MXGE_VIRT_JUMBOS
3226 3, /* num segs */
3227 4096, /* maxsegsize*/
3228#else
3229 1, /* num segs */
3230 MJUM9BYTES, /* maxsegsize*/
3231#endif
3232 BUS_DMA_ALLOCNOW, /* flags */
3233 NULL, NULL, /* lock */
3234 &ss->rx_big.dmat); /* tag */
3235 if (err != 0) {
3236 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3237 err);
3238 return err;
3239 }
3240 for (i = 0; i <= ss->rx_small.mask; i++) {
3241 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3242 &ss->rx_small.info[i].map);
3243 if (err != 0) {
3244 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3245 err);
3246 return err;
3247 }
3248 }
3249 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3250 &ss->rx_small.extra_map);
3251 if (err != 0) {
3252 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3253 err);
3254 return err;
3255 }
3256
3257 for (i = 0; i <= ss->rx_big.mask; i++) {
3258 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3259 &ss->rx_big.info[i].map);
3260 if (err != 0) {
3261 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3262 err);
3263 return err;
3264 }
3265 }
3266 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3267 &ss->rx_big.extra_map);
3268 if (err != 0) {
3269 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3270 err);
3271 return err;
3272 }
3273
3274 /* now allocate TX resouces */
3275
3276#ifndef IFNET_BUF_RING
3277 /* only use a single TX ring for now */
3278 if (ss != ss->sc->ss)
3279 return 0;
3280#endif
3281
3282 ss->tx.mask = tx_ring_entries - 1;
3283 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3284
3285
3286 /* allocate the tx request copy block */
3287 bytes = 8 +
3288 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3289 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
3290 if (ss->tx.req_bytes == NULL)
3291 return err;
3292 /* ensure req_list entries are aligned to 8 bytes */
3293 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3294 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3295
3296 /* allocate the tx busdma segment list */
3297 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3298 ss->tx.seg_list = (bus_dma_segment_t *)
3299 malloc(bytes, M_DEVBUF, M_WAITOK);
3300 if (ss->tx.seg_list == NULL)
3301 return err;
3302
3303 /* allocate the tx host info ring */
3304 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3305 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3306 if (ss->tx.info == NULL)
3307 return err;
3308
3309 /* allocate the tx busdma resources */
3310 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3311 1, /* alignment */
3312 sc->tx_boundary, /* boundary */
3313 BUS_SPACE_MAXADDR, /* low */
3314 BUS_SPACE_MAXADDR, /* high */
3315 NULL, NULL, /* filter */
3316 65536 + 256, /* maxsize */
3317 ss->tx.max_desc - 2, /* num segs */
3318 sc->tx_boundary, /* maxsegsz */
3319 BUS_DMA_ALLOCNOW, /* flags */
3320 NULL, NULL, /* lock */
3321 &ss->tx.dmat); /* tag */
3322
3323 if (err != 0) {
3324 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3325 err);
3326 return err;
3327 }
3328
3329 /* now use these tags to setup dmamaps for each slot
3330 in the ring */
3331 for (i = 0; i <= ss->tx.mask; i++) {
3332 err = bus_dmamap_create(ss->tx.dmat, 0,
3333 &ss->tx.info[i].map);
3334 if (err != 0) {
3335 device_printf(sc->dev, "Err %d tx dmamap\n",
3336 err);
3337 return err;
3338 }
3339 }
3340 return 0;
3341
3342}
3343
3344static int
3345mxge_alloc_rings(mxge_softc_t *sc)
3346{
3347 mxge_cmd_t cmd;
3348 int tx_ring_size;
3349 int tx_ring_entries, rx_ring_entries;
3350 int err, slice;
3351
3352 /* get ring sizes */
3353 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3354 tx_ring_size = cmd.data0;
3355 if (err != 0) {
3356 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3357 goto abort;
3358 }
3359
3360 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3361 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3362 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
3363 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
3364 IFQ_SET_READY(&sc->ifp->if_snd);
3365
3366 for (slice = 0; slice < sc->num_slices; slice++) {
3367 err = mxge_alloc_slice_rings(&sc->ss[slice],
3368 rx_ring_entries,
3369 tx_ring_entries);
3370 if (err != 0)
3371 goto abort;
3372 }
3373 return 0;
3374
3375abort:
3376 mxge_free_rings(sc);
3377 return err;
3378
3379}
3380
3381
3382static void
3383mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3384{
3385 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3386
3387 if (bufsize < MCLBYTES) {
3388 /* easy, everything fits in a single buffer */
3389 *big_buf_size = MCLBYTES;
3390 *cl_size = MCLBYTES;
3391 *nbufs = 1;
3392 return;
3393 }
3394
3395 if (bufsize < MJUMPAGESIZE) {
3396 /* still easy, everything still fits in a single buffer */
3397 *big_buf_size = MJUMPAGESIZE;
3398 *cl_size = MJUMPAGESIZE;
3399 *nbufs = 1;
3400 return;
3401 }
3402#if MXGE_VIRT_JUMBOS
3403 /* now we need to use virtually contiguous buffers */
3404 *cl_size = MJUM9BYTES;
3405 *big_buf_size = 4096;
3406 *nbufs = mtu / 4096 + 1;
3407 /* needs to be a power of two, so round up */
3408 if (*nbufs == 3)
3409 *nbufs = 4;
3410#else
3411 *cl_size = MJUM9BYTES;
3412 *big_buf_size = MJUM9BYTES;
3413 *nbufs = 1;
3414#endif
3415}
3416
3417static int
3418mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3419{
3420 mxge_softc_t *sc;
3421 mxge_cmd_t cmd;
3422 bus_dmamap_t map;
3423 struct lro_entry *lro_entry;
3424 int err, i, slice;
3425
3426
3427 sc = ss->sc;
3428 slice = ss - sc->ss;
3429
3430 SLIST_INIT(&ss->lro_free);
3431 SLIST_INIT(&ss->lro_active);
3432
3433 for (i = 0; i < sc->lro_cnt; i++) {
3434 lro_entry = (struct lro_entry *)
3435 malloc(sizeof (*lro_entry), M_DEVBUF,
3436 M_NOWAIT | M_ZERO);
3437 if (lro_entry == NULL) {
3438 sc->lro_cnt = i;
3439 break;
3440 }
3441 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next);
3442 }
3443 /* get the lanai pointers to the send and receive rings */
3444
3445 err = 0;
3446#ifndef IFNET_BUF_RING
3447 /* We currently only send from the first slice */
3448 if (slice == 0) {
3449#endif
3450 cmd.data0 = slice;
3451 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3452 ss->tx.lanai =
3453 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3454 ss->tx.send_go = (volatile uint32_t *)
3455 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3456 ss->tx.send_stop = (volatile uint32_t *)
3457 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3458#ifndef IFNET_BUF_RING
3459 }
3460#endif
3461 cmd.data0 = slice;
3462 err |= mxge_send_cmd(sc,
3463 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3464 ss->rx_small.lanai =
3465 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3466 cmd.data0 = slice;
3467 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3468 ss->rx_big.lanai =
3469 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3470
3471 if (err != 0) {
3472 device_printf(sc->dev,
3473 "failed to get ring sizes or locations\n");
3474 return EIO;
3475 }
3476
3477 /* stock receive rings */
3478 for (i = 0; i <= ss->rx_small.mask; i++) {
3479 map = ss->rx_small.info[i].map;
3480 err = mxge_get_buf_small(ss, map, i);
3481 if (err) {
3482 device_printf(sc->dev, "alloced %d/%d smalls\n",
3483 i, ss->rx_small.mask + 1);
3484 return ENOMEM;
3485 }
3486 }
3487 for (i = 0; i <= ss->rx_big.mask; i++) {
3488 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3489 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3490 }
3491 ss->rx_big.nbufs = nbufs;
3492 ss->rx_big.cl_size = cl_size;
3493 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
3494 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3495 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3496 map = ss->rx_big.info[i].map;
3497 err = mxge_get_buf_big(ss, map, i);
3498 if (err) {
3499 device_printf(sc->dev, "alloced %d/%d bigs\n",
3500 i, ss->rx_big.mask + 1);
3501 return ENOMEM;
3502 }
3503 }
3504 return 0;
3505}
3506
3507static int
3508mxge_open(mxge_softc_t *sc)
3509{
3510 mxge_cmd_t cmd;
3511 int err, big_bytes, nbufs, slice, cl_size, i;
3512 bus_addr_t bus;
3513 volatile uint8_t *itable;
3514 struct mxge_slice_state *ss;
3515
3516 /* Copy the MAC address in case it was overridden */
3517 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3518
3519 err = mxge_reset(sc, 1);
3520 if (err != 0) {
3521 device_printf(sc->dev, "failed to reset\n");
3522 return EIO;
3523 }
3524
3525 if (sc->num_slices > 1) {
3526 /* setup the indirection table */
3527 cmd.data0 = sc->num_slices;
3528 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3529 &cmd);
3530
3531 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3532 &cmd);
3533 if (err != 0) {
3534 device_printf(sc->dev,
3535 "failed to setup rss tables\n");
3536 return err;
3537 }
3538
3539 /* just enable an identity mapping */
3540 itable = sc->sram + cmd.data0;
3541 for (i = 0; i < sc->num_slices; i++)
3542 itable[i] = (uint8_t)i;
3543
3544 cmd.data0 = 1;
3545 cmd.data1 = mxge_rss_hash_type;
3546 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3547 if (err != 0) {
3548 device_printf(sc->dev, "failed to enable slices\n");
3549 return err;
3550 }
3551 }
3552
3553
3554 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3555
3556 cmd.data0 = nbufs;
3557 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3558 &cmd);
3559 /* error is only meaningful if we're trying to set
3560 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3561 if (err && nbufs > 1) {
3562 device_printf(sc->dev,
3563 "Failed to set alway-use-n to %d\n",
3564 nbufs);
3565 return EIO;
3566 }
3567 /* Give the firmware the mtu and the big and small buffer
3568 sizes. The firmware wants the big buf size to be a power
3569 of two. Luckily, FreeBSD's clusters are powers of two */
3570 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3571 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3572 cmd.data0 = MHLEN - MXGEFW_PAD;
3573 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3574 &cmd);
3575 cmd.data0 = big_bytes;
3576 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3577
3578 if (err != 0) {
3579 device_printf(sc->dev, "failed to setup params\n");
3580 goto abort;
3581 }
3582
3583 /* Now give him the pointer to the stats block */
3584 for (slice = 0;
3585#ifdef IFNET_BUF_RING
3586 slice < sc->num_slices;
3587#else
3588 slice < 1;
3589#endif
3590 slice++) {
3591 ss = &sc->ss[slice];
3592 cmd.data0 =
3593 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3594 cmd.data1 =
3595 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3596 cmd.data2 = sizeof(struct mcp_irq_data);
3597 cmd.data2 |= (slice << 16);
3598 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3599 }
3600
3601 if (err != 0) {
3602 bus = sc->ss->fw_stats_dma.bus_addr;
3603 bus += offsetof(struct mcp_irq_data, send_done_count);
3604 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3605 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3606 err = mxge_send_cmd(sc,
3607 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3608 &cmd);
3609 /* Firmware cannot support multicast without STATS_DMA_V2 */
3610 sc->fw_multicast_support = 0;
3611 } else {
3612 sc->fw_multicast_support = 1;
3613 }
3614
3615 if (err != 0) {
3616 device_printf(sc->dev, "failed to setup params\n");
3617 goto abort;
3618 }
3619
3620 for (slice = 0; slice < sc->num_slices; slice++) {
3621 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3622 if (err != 0) {
3623 device_printf(sc->dev, "couldn't open slice %d\n",
3624 slice);
3625 goto abort;
3626 }
3627 }
3628
3629 /* Finally, start the firmware running */
3630 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3631 if (err) {
3632 device_printf(sc->dev, "Couldn't bring up link\n");
3633 goto abort;
3634 }
3635#ifdef IFNET_BUF_RING
3636 for (slice = 0; slice < sc->num_slices; slice++) {
3637 ss = &sc->ss[slice];
3638 ss->if_drv_flags |= IFF_DRV_RUNNING;
3639 ss->if_drv_flags &= ~IFF_DRV_OACTIVE;
3640 }
3641#endif
3642 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
3643 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3644
3645 return 0;
3646
3647
3648abort:
3649 mxge_free_mbufs(sc);
3650
3651 return err;
3652}
3653
3654static int
3655mxge_close(mxge_softc_t *sc, int down)
3656{
3657 mxge_cmd_t cmd;
3658 int err, old_down_cnt;
3659#ifdef IFNET_BUF_RING
3660 struct mxge_slice_state *ss;
3661 int slice;
3662#endif
3663
3664#ifdef IFNET_BUF_RING
3665 for (slice = 0; slice < sc->num_slices; slice++) {
3666 ss = &sc->ss[slice];
3667 ss->if_drv_flags &= ~IFF_DRV_RUNNING;
3668 }
3669#endif
3670 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3671 if (!down) {
3672 old_down_cnt = sc->down_cnt;
3673 wmb();
3674 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3675 if (err) {
3676 device_printf(sc->dev,
3677 "Couldn't bring down link\n");
3678 }
3679 if (old_down_cnt == sc->down_cnt) {
3680 /* wait for down irq */
3681 DELAY(10 * sc->intr_coal_delay);
3682 }
3683 wmb();
3684 if (old_down_cnt == sc->down_cnt) {
3685 device_printf(sc->dev, "never got down irq\n");
3686 }
3687 }
3688 mxge_free_mbufs(sc);
3689
3690 return 0;
3691}
3692
3693static void
3694mxge_setup_cfg_space(mxge_softc_t *sc)
3695{
3696 device_t dev = sc->dev;
3697 int reg;
3698 uint16_t cmd, lnk, pectl;
3699
3700 /* find the PCIe link width and set max read request to 4KB*/
3701 if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
3702 lnk = pci_read_config(dev, reg + 0x12, 2);
3703 sc->link_width = (lnk >> 4) & 0x3f;
3704
3705 if (sc->pectl == 0) {
3706 pectl = pci_read_config(dev, reg + 0x8, 2);
3707 pectl = (pectl & ~0x7000) | (5 << 12);
3708 pci_write_config(dev, reg + 0x8, pectl, 2);
3709 sc->pectl = pectl;
3710 } else {
3711 /* restore saved pectl after watchdog reset */
3712 pci_write_config(dev, reg + 0x8, sc->pectl, 2);
3713 }
3714 }
3715
3716 /* Enable DMA and Memory space access */
3717 pci_enable_busmaster(dev);
3718 cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3719 cmd |= PCIM_CMD_MEMEN;
3720 pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3721}
3722
3723static uint32_t
3724mxge_read_reboot(mxge_softc_t *sc)
3725{
3726 device_t dev = sc->dev;
3727 uint32_t vs;
3728
3729 /* find the vendor specific offset */
3730 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) {
3731 device_printf(sc->dev,
3732 "could not find vendor specific offset\n");
3733 return (uint32_t)-1;
3734 }
3735 /* enable read32 mode */
3736 pci_write_config(dev, vs + 0x10, 0x3, 1);
3737 /* tell NIC which register to read */
3738 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3739 return (pci_read_config(dev, vs + 0x14, 4));
3740}
3741
3742static void
3743mxge_watchdog_reset(mxge_softc_t *sc)
3744{
3745 struct pci_devinfo *dinfo;
3746 struct mxge_slice_state *ss;
3747 int err, running, s, num_tx_slices = 1;
3748 uint32_t reboot;
3749 uint16_t cmd;
3750
3751 err = ENXIO;
3752
3753 device_printf(sc->dev, "Watchdog reset!\n");
3754
3755 /*
3756 * check to see if the NIC rebooted. If it did, then all of
3757 * PCI config space has been reset, and things like the
3758 * busmaster bit will be zero. If this is the case, then we
3759 * must restore PCI config space before the NIC can be used
3760 * again
3761 */
3762 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3763 if (cmd == 0xffff) {
3764 /*
3765 * maybe the watchdog caught the NIC rebooting; wait
3766 * up to 100ms for it to finish. If it does not come
3767 * back, then give up
3768 */
3769 DELAY(1000*100);
3770 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3771 if (cmd == 0xffff) {
3772 device_printf(sc->dev, "NIC disappeared!\n");
3773 }
3774 }
3775 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3776 /* print the reboot status */
3777 reboot = mxge_read_reboot(sc);
3778 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3779 reboot);
3780 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
3781 if (running) {
3782
3783 /*
3784 * quiesce NIC so that TX routines will not try to
3785 * xmit after restoration of BAR
3786 */
3787
3788 /* Mark the link as down */
3789 if (sc->link_state) {
3790 sc->link_state = 0;
3791 if_link_state_change(sc->ifp,
3792 LINK_STATE_DOWN);
3793 }
3794#ifdef IFNET_BUF_RING
3795 num_tx_slices = sc->num_slices;
3796#endif
3797 /* grab all TX locks to ensure no tx */
3798 for (s = 0; s < num_tx_slices; s++) {
3799 ss = &sc->ss[s];
3800 mtx_lock(&ss->tx.mtx);
3801 }
3802 mxge_close(sc, 1);
3803 }
3804 /* restore PCI configuration space */
3805 dinfo = device_get_ivars(sc->dev);
3806 pci_cfg_restore(sc->dev, dinfo);
3807
3808 /* and redo any changes we made to our config space */
3809 mxge_setup_cfg_space(sc);
3810
3811 /* reload f/w */
3812 err = mxge_load_firmware(sc, 0);
3813 if (err) {
3814 device_printf(sc->dev,
3815 "Unable to re-load f/w\n");
3816 }
3817 if (running) {
3818 if (!err)
3819 err = mxge_open(sc);
3820 /* release all TX locks */
3821 for (s = 0; s < num_tx_slices; s++) {
3822 ss = &sc->ss[s];
3823#ifdef IFNET_BUF_RING
3824 mxge_start_locked(ss);
3825#endif
3826 mtx_unlock(&ss->tx.mtx);
3827 }
3828 }
3829 sc->watchdog_resets++;
3830 } else {
3831 device_printf(sc->dev,
3832 "NIC did not reboot, not resetting\n");
3833 err = 0;
3834 }
3835 if (err) {
3836 device_printf(sc->dev, "watchdog reset failed\n");
3837 } else {
3838 if (sc->dying == 2)
3839 sc->dying = 0;
3840 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3841 }
3842}
3843
3844static void
3845mxge_watchdog_task(void *arg, int pending)
3846{
3847 mxge_softc_t *sc = arg;
3848
3849
3850 mtx_lock(&sc->driver_mtx);
3851 mxge_watchdog_reset(sc);
3852 mtx_unlock(&sc->driver_mtx);
3853}
3854
3855static void
3856mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
3857{
3858 tx = &sc->ss[slice].tx;
3859 device_printf(sc->dev, "slice %d struck? ring state:\n", slice);
3860 device_printf(sc->dev,
3861 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3862 tx->req, tx->done, tx->queue_active);
3863 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3864 tx->activate, tx->deactivate);
3865 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3866 tx->pkt_done,
3867 be32toh(sc->ss->fw_stats->send_done_count));
3868}
3869
3870static int
3871mxge_watchdog(mxge_softc_t *sc)
3872{
3873 mxge_tx_ring_t *tx;
3874 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
3875 int i, err = 0;
3876
3877 /* see if we have outstanding transmits, which
3878 have been pending for more than mxge_ticks */
3879 for (i = 0;
3880#ifdef IFNET_BUF_RING
3881 (i < sc->num_slices) && (err == 0);
3882#else
3883 (i < 1) && (err == 0);
3884#endif
3885 i++) {
3886 tx = &sc->ss[i].tx;
3887 if (tx->req != tx->done &&
3888 tx->watchdog_req != tx->watchdog_done &&
3889 tx->done == tx->watchdog_done) {
3890 /* check for pause blocking before resetting */
3891 if (tx->watchdog_rx_pause == rx_pause) {
3892 mxge_warn_stuck(sc, tx, i);
3893 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
3894 return (ENXIO);
3895 }
3896 else
3897 device_printf(sc->dev, "Flow control blocking "
3898 "xmits, check link partner\n");
3899 }
3900
3901 tx->watchdog_req = tx->req;
3902 tx->watchdog_done = tx->done;
3903 tx->watchdog_rx_pause = rx_pause;
3904 }
3905
3906 if (sc->need_media_probe)
3907 mxge_media_probe(sc);
3908 return (err);
3909}
3910
3911static u_long
3912mxge_update_stats(mxge_softc_t *sc)
3913{
3914 struct mxge_slice_state *ss;
3915 u_long pkts = 0;
3916 u_long ipackets = 0;
3917 u_long opackets = 0;
3918#ifdef IFNET_BUF_RING
3919 u_long obytes = 0;
3920 u_long omcasts = 0;
3921 u_long odrops = 0;
3922#endif
3923 u_long oerrors = 0;
3924 int slice;
3925
3926 for (slice = 0; slice < sc->num_slices; slice++) {
3927 ss = &sc->ss[slice];
3928 ipackets += ss->ipackets;
3929 opackets += ss->opackets;
3930#ifdef IFNET_BUF_RING
3931 obytes += ss->obytes;
3932 omcasts += ss->omcasts;
3933 odrops += ss->tx.br->br_drops;
3934#endif
3935 oerrors += ss->oerrors;
3936 }
3937 pkts = (ipackets - sc->ifp->if_ipackets);
3938 pkts += (opackets - sc->ifp->if_opackets);
3939 sc->ifp->if_ipackets = ipackets;
3940 sc->ifp->if_opackets = opackets;
3941#ifdef IFNET_BUF_RING
3942 sc->ifp->if_obytes = obytes;
3943 sc->ifp->if_omcasts = omcasts;
3944 sc->ifp->if_snd.ifq_drops = odrops;
3945#endif
3946 sc->ifp->if_oerrors = oerrors;
3947 return pkts;
3948}
3949
3950static void
3951mxge_tick(void *arg)
3952{
3953 mxge_softc_t *sc = arg;
3954 u_long pkts = 0;
3955 int err = 0;
3956 int running, ticks;
3957 uint16_t cmd;
3958
3959 ticks = mxge_ticks;
3960 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
3961 if (running) {
3962 /* aggregate stats from different slices */
3963 pkts = mxge_update_stats(sc);
3964 if (!sc->watchdog_countdown) {
3965 err = mxge_watchdog(sc);
3966 sc->watchdog_countdown = 4;
3967 }
3968 sc->watchdog_countdown--;
3969 }
3970 if (pkts == 0) {
3971 /* ensure NIC did not suffer h/w fault while idle */
3972 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3973 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3974 sc->dying = 2;
3975 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
3976 err = ENXIO;
3977 }
3978 /* look less often if NIC is idle */
3979 ticks *= 4;
3980 }
3981
3982 if (err == 0)
3983 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
3984
3985}
3986
3987static int
3988mxge_media_change(struct ifnet *ifp)
3989{
3990 return EINVAL;
3991}
3992
3993static int
3994mxge_change_mtu(mxge_softc_t *sc, int mtu)
3995{
3996 struct ifnet *ifp = sc->ifp;
3997 int real_mtu, old_mtu;
3998 int err = 0;
3999
4000
4001 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
4002 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
4003 return EINVAL;
4004 mtx_lock(&sc->driver_mtx);
4005 old_mtu = ifp->if_mtu;
4006 ifp->if_mtu = mtu;
4007 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4008 mxge_close(sc, 0);
4009 err = mxge_open(sc);
4010 if (err != 0) {
4011 ifp->if_mtu = old_mtu;
4012 mxge_close(sc, 0);
4013 (void) mxge_open(sc);
4014 }
4015 }
4016 mtx_unlock(&sc->driver_mtx);
4017 return err;
4018}
4019
4020static void
4021mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
4022{
4023 mxge_softc_t *sc = ifp->if_softc;
4024
4025
4026 if (sc == NULL)
4027 return;
4028 ifmr->ifm_status = IFM_AVALID;
4029 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
4030 ifmr->ifm_active = IFM_AUTO | IFM_ETHER;
4031 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0;
4032}
4033
4034static int
4035mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
4036{
4037 mxge_softc_t *sc = ifp->if_softc;
4038 struct ifreq *ifr = (struct ifreq *)data;
4039 int err, mask;
4040
4041 err = 0;
4042 switch (command) {
4043 case SIOCSIFADDR:
4044 case SIOCGIFADDR:
4045 err = ether_ioctl(ifp, command, data);
4046 break;
4047
4048 case SIOCSIFMTU:
4049 err = mxge_change_mtu(sc, ifr->ifr_mtu);
4050 break;
4051
4052 case SIOCSIFFLAGS:
4053 mtx_lock(&sc->driver_mtx);
4054 if (sc->dying) {
4055 mtx_unlock(&sc->driver_mtx);
4056 return EINVAL;
4057 }
4058 if (ifp->if_flags & IFF_UP) {
4059 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4060 err = mxge_open(sc);
4061 } else {
4062 /* take care of promis can allmulti
4063 flag chages */
4064 mxge_change_promisc(sc,
4065 ifp->if_flags & IFF_PROMISC);
4066 mxge_set_multicast_list(sc);
4067 }
4068 } else {
4069 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4070 mxge_close(sc, 0);
4071 }
4072 }
4073 mtx_unlock(&sc->driver_mtx);
4074 break;
4075
4076 case SIOCADDMULTI:
4077 case SIOCDELMULTI:
4078 mtx_lock(&sc->driver_mtx);
4079 mxge_set_multicast_list(sc);
4080 mtx_unlock(&sc->driver_mtx);
4081 break;
4082
4083 case SIOCSIFCAP:
4084 mtx_lock(&sc->driver_mtx);
4085 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
4086 if (mask & IFCAP_TXCSUM) {
4087 if (IFCAP_TXCSUM & ifp->if_capenable) {
4088 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
4089 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
4090 | CSUM_TSO);
4091 } else {
4092 ifp->if_capenable |= IFCAP_TXCSUM;
4093 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
4094 }
4095 } else if (mask & IFCAP_RXCSUM) {
4096 if (IFCAP_RXCSUM & ifp->if_capenable) {
4097 ifp->if_capenable &= ~IFCAP_RXCSUM;
4098 sc->csum_flag = 0;
4099 } else {
4100 ifp->if_capenable |= IFCAP_RXCSUM;
4101 sc->csum_flag = 1;
4102 }
4103 }
4104 if (mask & IFCAP_TSO4) {
4105 if (IFCAP_TSO4 & ifp->if_capenable) {
4106 ifp->if_capenable &= ~IFCAP_TSO4;
4107 ifp->if_hwassist &= ~CSUM_TSO;
4108 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
4109 ifp->if_capenable |= IFCAP_TSO4;
4110 ifp->if_hwassist |= CSUM_TSO;
4111 } else {
4112 printf("mxge requires tx checksum offload"
4113 " be enabled to use TSO\n");
4114 err = EINVAL;
4115 }
4116 }
4117 if (mask & IFCAP_LRO) {
4118 if (IFCAP_LRO & ifp->if_capenable)
4119 err = mxge_change_lro_locked(sc, 0);
4120 else
4121 err = mxge_change_lro_locked(sc, mxge_lro_cnt);
4122 }
4123 if (mask & IFCAP_VLAN_HWTAGGING)
4124 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
4125 mtx_unlock(&sc->driver_mtx);
4126 VLAN_CAPABILITIES(ifp);
4127
4128 break;
4129
4130 case SIOCGIFMEDIA:
4131 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
4132 &sc->media, command);
4133 break;
4134
4135 default:
4136 err = ENOTTY;
4137 }
4138 return err;
4139}
4140
4141static void
4142mxge_fetch_tunables(mxge_softc_t *sc)
4143{
4144
4145 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
4146 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
4147 &mxge_flow_control);
4148 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
4149 &mxge_intr_coal_delay);
4150 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4151 &mxge_nvidia_ecrc_enable);
4152 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4153 &mxge_force_firmware);
4154 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4155 &mxge_deassert_wait);
4156 TUNABLE_INT_FETCH("hw.mxge.verbose",
4157 &mxge_verbose);
4158 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
4159 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt);
4160 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
4161 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
4162 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type);
4163 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
4164 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle);
4165 if (sc->lro_cnt != 0)
4166 mxge_lro_cnt = sc->lro_cnt;
4167
4168 if (bootverbose)
4169 mxge_verbose = 1;
4170 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
4171 mxge_intr_coal_delay = 30;
4172 if (mxge_ticks == 0)
4173 mxge_ticks = hz / 2;
4174 sc->pause = mxge_flow_control;
4175 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
4176 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
4177 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4178 }
4179 if (mxge_initial_mtu > ETHERMTU_JUMBO ||
4180 mxge_initial_mtu < ETHER_MIN_LEN)
4181 mxge_initial_mtu = ETHERMTU_JUMBO;
4182
4183 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE)
4184 mxge_throttle = MXGE_MAX_THROTTLE;
4185 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE)
4186 mxge_throttle = MXGE_MIN_THROTTLE;
4187 sc->throttle = mxge_throttle;
4188}
4189
4190
4191static void
4192mxge_free_slices(mxge_softc_t *sc)
4193{
4194 struct mxge_slice_state *ss;
4195 int i;
4196
4197
4198 if (sc->ss == NULL)
4199 return;
4200
4201 for (i = 0; i < sc->num_slices; i++) {
4202 ss = &sc->ss[i];
4203 if (ss->fw_stats != NULL) {
4204 mxge_dma_free(&ss->fw_stats_dma);
4205 ss->fw_stats = NULL;
4206#ifdef IFNET_BUF_RING
4207 if (ss->tx.br != NULL) {
4208 drbr_free(ss->tx.br, M_DEVBUF);
4209 ss->tx.br = NULL;
4210 }
4211#endif
4212 mtx_destroy(&ss->tx.mtx);
4213 }
4214 if (ss->rx_done.entry != NULL) {
4215 mxge_dma_free(&ss->rx_done.dma);
4216 ss->rx_done.entry = NULL;
4217 }
4218 }
4219 free(sc->ss, M_DEVBUF);
4220 sc->ss = NULL;
4221}
4222
4223static int
4224mxge_alloc_slices(mxge_softc_t *sc)
4225{
4226 mxge_cmd_t cmd;
4227 struct mxge_slice_state *ss;
4228 size_t bytes;
4229 int err, i, max_intr_slots;
4230
4231 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4232 if (err != 0) {
4233 device_printf(sc->dev, "Cannot determine rx ring size\n");
4234 return err;
4235 }
4236 sc->rx_ring_size = cmd.data0;
4237 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
4238
4239 bytes = sizeof (*sc->ss) * sc->num_slices;
4240 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
4241 if (sc->ss == NULL)
4242 return (ENOMEM);
4243 for (i = 0; i < sc->num_slices; i++) {
4244 ss = &sc->ss[i];
4245
4246 ss->sc = sc;
4247
4248 /* allocate per-slice rx interrupt queues */
4249
4250 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
4251 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
4252 if (err != 0)
4253 goto abort;
4254 ss->rx_done.entry = ss->rx_done.dma.addr;
4255 bzero(ss->rx_done.entry, bytes);
4256
4257 /*
4258 * allocate the per-slice firmware stats; stats
4259 * (including tx) are used used only on the first
4260 * slice for now
4261 */
4262#ifndef IFNET_BUF_RING
4263 if (i > 0)
4264 continue;
4265#endif
4266
4267 bytes = sizeof (*ss->fw_stats);
4268 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
4269 sizeof (*ss->fw_stats), 64);
4270 if (err != 0)
4271 goto abort;
4272 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
4273 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name),
4274 "%s:tx(%d)", device_get_nameunit(sc->dev), i);
4275 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF);
4276#ifdef IFNET_BUF_RING
4277 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
4278 &ss->tx.mtx);
4279#endif
4280 }
4281
4282 return (0);
4283
4284abort:
4285 mxge_free_slices(sc);
4286 return (ENOMEM);
4287}
4288
4289static void
4290mxge_slice_probe(mxge_softc_t *sc)
4291{
4292 mxge_cmd_t cmd;
4293 char *old_fw;
4294 int msix_cnt, status, max_intr_slots;
4295
4296 sc->num_slices = 1;
4297 /*
4298 * don't enable multiple slices if they are not enabled,
4299 * or if this is not an SMP system
4300 */
4301
4302 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
4303 return;
4304
4305 /* see how many MSI-X interrupts are available */
4306 msix_cnt = pci_msix_count(sc->dev);
4307 if (msix_cnt < 2)
4308 return;
4309
4310 /* now load the slice aware firmware see what it supports */
4311 old_fw = sc->fw_name;
4312 if (old_fw == mxge_fw_aligned)
4313 sc->fw_name = mxge_fw_rss_aligned;
4314 else
4315 sc->fw_name = mxge_fw_rss_unaligned;
4316 status = mxge_load_firmware(sc, 0);
4317 if (status != 0) {
4318 device_printf(sc->dev, "Falling back to a single slice\n");
4319 return;
4320 }
4321
4322 /* try to send a reset command to the card to see if it
4323 is alive */
4324 memset(&cmd, 0, sizeof (cmd));
4325 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4326 if (status != 0) {
4327 device_printf(sc->dev, "failed reset\n");
4328 goto abort_with_fw;
4329 }
4330
4331 /* get rx ring size */
4332 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4333 if (status != 0) {
4334 device_printf(sc->dev, "Cannot determine rx ring size\n");
4335 goto abort_with_fw;
4336 }
4337 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
4338
4339 /* tell it the size of the interrupt queues */
4340 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
4341 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4342 if (status != 0) {
4343 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4344 goto abort_with_fw;
4345 }
4346
4347 /* ask the maximum number of slices it supports */
4348 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4349 if (status != 0) {
4350 device_printf(sc->dev,
4351 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4352 goto abort_with_fw;
4353 }
4354 sc->num_slices = cmd.data0;
4355 if (sc->num_slices > msix_cnt)
4356 sc->num_slices = msix_cnt;
4357
4358 if (mxge_max_slices == -1) {
4359 /* cap to number of CPUs in system */
4360 if (sc->num_slices > mp_ncpus)
4361 sc->num_slices = mp_ncpus;
4362 } else {
4363 if (sc->num_slices > mxge_max_slices)
4364 sc->num_slices = mxge_max_slices;
4365 }
4366 /* make sure it is a power of two */
4367 while (sc->num_slices & (sc->num_slices - 1))
4368 sc->num_slices--;
4369
4370 if (mxge_verbose)
4371 device_printf(sc->dev, "using %d slices\n",
4372 sc->num_slices);
4373
4374 return;
4375
4376abort_with_fw:
4377 sc->fw_name = old_fw;
4378 (void) mxge_load_firmware(sc, 0);
4379}
4380
4381static int
4382mxge_add_msix_irqs(mxge_softc_t *sc)
4383{
4384 size_t bytes;
4385 int count, err, i, rid;
4386
4387 rid = PCIR_BAR(2);
4388 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4389 &rid, RF_ACTIVE);
4390
4391 if (sc->msix_table_res == NULL) {
4392 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
4393 return ENXIO;
4394 }
4395
4396 count = sc->num_slices;
4397 err = pci_alloc_msix(sc->dev, &count);
4398 if (err != 0) {
4399 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
4400 "err = %d \n", sc->num_slices, err);
4401 goto abort_with_msix_table;
4402 }
4403 if (count < sc->num_slices) {
4404 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
4405 count, sc->num_slices);
4406 device_printf(sc->dev,
4407 "Try setting hw.mxge.max_slices to %d\n",
4408 count);
4409 err = ENOSPC;
4410 goto abort_with_msix;
4411 }
4412 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
4413 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4414 if (sc->msix_irq_res == NULL) {
4415 err = ENOMEM;
4416 goto abort_with_msix;
4417 }
4418
4419 for (i = 0; i < sc->num_slices; i++) {
4420 rid = i + 1;
4421 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
4422 SYS_RES_IRQ,
4423 &rid, RF_ACTIVE);
4424 if (sc->msix_irq_res[i] == NULL) {
4425 device_printf(sc->dev, "couldn't allocate IRQ res"
4426 " for message %d\n", i);
4427 err = ENXIO;
4428 goto abort_with_res;
4429 }
4430 }
4431
4432 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
4433 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4434
4435 for (i = 0; i < sc->num_slices; i++) {
4436 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
4437 INTR_TYPE_NET | INTR_MPSAFE,
4438#if __FreeBSD_version > 700030
4439 NULL,
4440#endif
4441 mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
4442 if (err != 0) {
4443 device_printf(sc->dev, "couldn't setup intr for "
4444 "message %d\n", i);
4445 goto abort_with_intr;
4446 }
4447 }
4448
4449 if (mxge_verbose) {
4450 device_printf(sc->dev, "using %d msix IRQs:",
4451 sc->num_slices);
4452 for (i = 0; i < sc->num_slices; i++)
4453 printf(" %ld", rman_get_start(sc->msix_irq_res[i]));
4454 printf("\n");
4455 }
4456 return (0);
4457
4458abort_with_intr:
4459 for (i = 0; i < sc->num_slices; i++) {
4460 if (sc->msix_ih[i] != NULL) {
4461 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4462 sc->msix_ih[i]);
4463 sc->msix_ih[i] = NULL;
4464 }
4465 }
4466 free(sc->msix_ih, M_DEVBUF);
4467
4468
4469abort_with_res:
4470 for (i = 0; i < sc->num_slices; i++) {
4471 rid = i + 1;
4472 if (sc->msix_irq_res[i] != NULL)
4473 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4474 sc->msix_irq_res[i]);
4475 sc->msix_irq_res[i] = NULL;
4476 }
4477 free(sc->msix_irq_res, M_DEVBUF);
4478
4479
4480abort_with_msix:
4481 pci_release_msi(sc->dev);
4482
4483abort_with_msix_table:
4484 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4485 sc->msix_table_res);
4486
4487 return err;
4488}
4489
4490static int
4491mxge_add_single_irq(mxge_softc_t *sc)
4492{
4493 int count, err, rid;
4494
4495 count = pci_msi_count(sc->dev);
4496 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4497 rid = 1;
4498 } else {
4499 rid = 0;
4500 sc->legacy_irq = 1;
4501 }
4502 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0,
4503 1, RF_SHAREABLE | RF_ACTIVE);
4504 if (sc->irq_res == NULL) {
4505 device_printf(sc->dev, "could not alloc interrupt\n");
4506 return ENXIO;
4507 }
4508 if (mxge_verbose)
4509 device_printf(sc->dev, "using %s irq %ld\n",
4510 sc->legacy_irq ? "INTx" : "MSI",
4511 rman_get_start(sc->irq_res));
4512 err = bus_setup_intr(sc->dev, sc->irq_res,
4513 INTR_TYPE_NET | INTR_MPSAFE,
4514#if __FreeBSD_version > 700030
4515 NULL,
4516#endif
4517 mxge_intr, &sc->ss[0], &sc->ih);
4518 if (err != 0) {
4519 bus_release_resource(sc->dev, SYS_RES_IRQ,
4520 sc->legacy_irq ? 0 : 1, sc->irq_res);
4521 if (!sc->legacy_irq)
4522 pci_release_msi(sc->dev);
4523 }
4524 return err;
4525}
4526
4527static void
4528mxge_rem_msix_irqs(mxge_softc_t *sc)
4529{
4530 int i, rid;
4531
4532 for (i = 0; i < sc->num_slices; i++) {
4533 if (sc->msix_ih[i] != NULL) {
4534 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4535 sc->msix_ih[i]);
4536 sc->msix_ih[i] = NULL;
4537 }
4538 }
4539 free(sc->msix_ih, M_DEVBUF);
4540
4541 for (i = 0; i < sc->num_slices; i++) {
4542 rid = i + 1;
4543 if (sc->msix_irq_res[i] != NULL)
4544 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4545 sc->msix_irq_res[i]);
4546 sc->msix_irq_res[i] = NULL;
4547 }
4548 free(sc->msix_irq_res, M_DEVBUF);
4549
4550 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4551 sc->msix_table_res);
4552
4553 pci_release_msi(sc->dev);
4554 return;
4555}
4556
4557static void
4558mxge_rem_single_irq(mxge_softc_t *sc)
4559{
4560 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4561 bus_release_resource(sc->dev, SYS_RES_IRQ,
4562 sc->legacy_irq ? 0 : 1, sc->irq_res);
4563 if (!sc->legacy_irq)
4564 pci_release_msi(sc->dev);
4565}
4566
4567static void
4568mxge_rem_irq(mxge_softc_t *sc)
4569{
4570 if (sc->num_slices > 1)
4571 mxge_rem_msix_irqs(sc);
4572 else
4573 mxge_rem_single_irq(sc);
4574}
4575
4576static int
4577mxge_add_irq(mxge_softc_t *sc)
4578{
4579 int err;
4580
4581 if (sc->num_slices > 1)
4582 err = mxge_add_msix_irqs(sc);
4583 else
4584 err = mxge_add_single_irq(sc);
4585
4586 if (0 && err == 0 && sc->num_slices > 1) {
4587 mxge_rem_msix_irqs(sc);
4588 err = mxge_add_msix_irqs(sc);
4589 }
4590 return err;
4591}
4592
4593
4594static int
4595mxge_attach(device_t dev)
4596{
4597 mxge_softc_t *sc = device_get_softc(dev);
4598 struct ifnet *ifp;
4599 int err, rid;
4600
4601 sc->dev = dev;
4602 mxge_fetch_tunables(sc);
4603
4604 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc);
4605 sc->tq = taskqueue_create_fast("mxge_taskq", M_WAITOK,
4606 taskqueue_thread_enqueue,
4607 &sc->tq);
4608 if (sc->tq == NULL) {
4609 err = ENOMEM;
4610 goto abort_with_nothing;
4611 }
4612 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
4613 device_get_nameunit(sc->dev));
4614
4615 err = bus_dma_tag_create(NULL, /* parent */
4616 1, /* alignment */
4617 0, /* boundary */
4618 BUS_SPACE_MAXADDR, /* low */
4619 BUS_SPACE_MAXADDR, /* high */
4620 NULL, NULL, /* filter */
4621 65536 + 256, /* maxsize */
4622 MXGE_MAX_SEND_DESC, /* num segs */
4623 65536, /* maxsegsize */
4624 0, /* flags */
4625 NULL, NULL, /* lock */
4626 &sc->parent_dmat); /* tag */
4627
4628 if (err != 0) {
4629 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4630 err);
4631 goto abort_with_tq;
4632 }
4633
4634 ifp = sc->ifp = if_alloc(IFT_ETHER);
4635 if (ifp == NULL) {
4636 device_printf(dev, "can not if_alloc()\n");
4637 err = ENOSPC;
4638 goto abort_with_parent_dmat;
4639 }
4640 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4641
4642 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd",
4643 device_get_nameunit(dev));
4644 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF);
4645 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name),
4646 "%s:drv", device_get_nameunit(dev));
4647 mtx_init(&sc->driver_mtx, sc->driver_mtx_name,
4648 MTX_NETWORK_LOCK, MTX_DEF);
4649
4650 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0);
4651
4652 mxge_setup_cfg_space(sc);
4653
4654 /* Map the board into the kernel */
4655 rid = PCIR_BARS;
4656 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0,
4657 ~0, 1, RF_ACTIVE);
4658 if (sc->mem_res == NULL) {
4659 device_printf(dev, "could not map memory\n");
4660 err = ENXIO;
4661 goto abort_with_lock;
4662 }
4663 sc->sram = rman_get_virtual(sc->mem_res);
4664 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4665 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4666 device_printf(dev, "impossible memory region size %ld\n",
4667 rman_get_size(sc->mem_res));
4668 err = ENXIO;
4669 goto abort_with_mem_res;
4670 }
4671
4672 /* make NULL terminated copy of the EEPROM strings section of
4673 lanai SRAM */
4674 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4675 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4676 rman_get_bushandle(sc->mem_res),
4677 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4678 sc->eeprom_strings,
4679 MXGE_EEPROM_STRINGS_SIZE - 2);
4680 err = mxge_parse_strings(sc);
4681 if (err != 0)
4682 goto abort_with_mem_res;
4683
4684 /* Enable write combining for efficient use of PCIe bus */
4685 mxge_enable_wc(sc);
4686
4687 /* Allocate the out of band dma memory */
4688 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4689 sizeof (mxge_cmd_t), 64);
4690 if (err != 0)
4691 goto abort_with_mem_res;
4692 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4693 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4694 if (err != 0)
4695 goto abort_with_cmd_dma;
4696
4697 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4698 if (err != 0)
4699 goto abort_with_zeropad_dma;
4700
4701 /* select & load the firmware */
4702 err = mxge_select_firmware(sc);
4703 if (err != 0)
4704 goto abort_with_dmabench;
4705 sc->intr_coal_delay = mxge_intr_coal_delay;
4706
4707 mxge_slice_probe(sc);
4708 err = mxge_alloc_slices(sc);
4709 if (err != 0)
4710 goto abort_with_dmabench;
4711
4712 err = mxge_reset(sc, 0);
4713 if (err != 0)
4714 goto abort_with_slices;
4715
4716 err = mxge_alloc_rings(sc);
4717 if (err != 0) {
4718 device_printf(sc->dev, "failed to allocate rings\n");
4719 goto abort_with_dmabench;
4720 }
4721
4722 err = mxge_add_irq(sc);
4723 if (err != 0) {
4724 device_printf(sc->dev, "failed to add irq\n");
4725 goto abort_with_rings;
4726 }
4727
4728 ifp->if_baudrate = IF_Gbps(10UL);
4729 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4730 IFCAP_VLAN_MTU;
4731#ifdef INET
4732 ifp->if_capabilities |= IFCAP_LRO;
4733#endif
4734
4735#ifdef MXGE_NEW_VLAN_API
4736 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
4737#endif
4738
4739 sc->max_mtu = mxge_max_mtu(sc);
4740 if (sc->max_mtu >= 9000)
4741 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
4742 else
4743 device_printf(dev, "MTU limited to %d. Install "
4744 "latest firmware for 9000 byte jumbo support\n",
4745 sc->max_mtu - ETHER_HDR_LEN);
4746 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4747 ifp->if_capenable = ifp->if_capabilities;
4748 if (sc->lro_cnt == 0)
4749 ifp->if_capenable &= ~IFCAP_LRO;
4750 sc->csum_flag = 1;
4751 ifp->if_init = mxge_init;
4752 ifp->if_softc = sc;
4753 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4754 ifp->if_ioctl = mxge_ioctl;
4755 ifp->if_start = mxge_start;
4756 /* Initialise the ifmedia structure */
4757 ifmedia_init(&sc->media, 0, mxge_media_change,
4758 mxge_media_status);
4759 mxge_set_media(sc, IFM_ETHER | IFM_AUTO);
4760 mxge_media_probe(sc);
4761 sc->dying = 0;
4762 ether_ifattach(ifp, sc->mac_addr);
4763 /* ether_ifattach sets mtu to ETHERMTU */
4764 if (mxge_initial_mtu != ETHERMTU)
4765 mxge_change_mtu(sc, mxge_initial_mtu);
4766
4767 mxge_add_sysctls(sc);
4768#ifdef IFNET_BUF_RING
4769 ifp->if_transmit = mxge_transmit;
4770 ifp->if_qflush = mxge_qflush;
4771#endif
4772 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
4773 return 0;
4774
4775abort_with_rings:
4776 mxge_free_rings(sc);
4777abort_with_slices:
4778 mxge_free_slices(sc);
4779abort_with_dmabench:
4780 mxge_dma_free(&sc->dmabench_dma);
4781abort_with_zeropad_dma:
4782 mxge_dma_free(&sc->zeropad_dma);
4783abort_with_cmd_dma:
4784 mxge_dma_free(&sc->cmd_dma);
4785abort_with_mem_res:
4786 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4787abort_with_lock:
4788 pci_disable_busmaster(dev);
4789 mtx_destroy(&sc->cmd_mtx);
4790 mtx_destroy(&sc->driver_mtx);
4791 if_free(ifp);
4792abort_with_parent_dmat:
4793 bus_dma_tag_destroy(sc->parent_dmat);
4794abort_with_tq:
4795 if (sc->tq != NULL) {
4796 taskqueue_drain(sc->tq, &sc->watchdog_task);
4797 taskqueue_free(sc->tq);
4798 sc->tq = NULL;
4799 }
4800abort_with_nothing:
4801 return err;
4802}
4803
4804static int
4805mxge_detach(device_t dev)
4806{
4807 mxge_softc_t *sc = device_get_softc(dev);
4808
4809 if (mxge_vlans_active(sc)) {
4810 device_printf(sc->dev,
4811 "Detach vlans before removing module\n");
4812 return EBUSY;
4813 }
4814 mtx_lock(&sc->driver_mtx);
4815 sc->dying = 1;
4816 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
4817 mxge_close(sc, 0);
4818 mtx_unlock(&sc->driver_mtx);
4819 ether_ifdetach(sc->ifp);
4820 if (sc->tq != NULL) {
4821 taskqueue_drain(sc->tq, &sc->watchdog_task);
4822 taskqueue_free(sc->tq);
4823 sc->tq = NULL;
4824 }
4825 callout_drain(&sc->co_hdl);
4826 ifmedia_removeall(&sc->media);
4827 mxge_dummy_rdma(sc, 0);
4828 mxge_rem_sysctls(sc);
4829 mxge_rem_irq(sc);
4830 mxge_free_rings(sc);
4831 mxge_free_slices(sc);
4832 mxge_dma_free(&sc->dmabench_dma);
4833 mxge_dma_free(&sc->zeropad_dma);
4834 mxge_dma_free(&sc->cmd_dma);
4835 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4836 pci_disable_busmaster(dev);
4837 mtx_destroy(&sc->cmd_mtx);
4838 mtx_destroy(&sc->driver_mtx);
4839 if_free(sc->ifp);
4840 bus_dma_tag_destroy(sc->parent_dmat);
4841 return 0;
4842}
4843
4844static int
4845mxge_shutdown(device_t dev)
4846{
4847 return 0;
4848}
4849
4850/*
4851 This file uses Myri10GE driver indentation.
4852
4853 Local Variables:
4854 c-file-style:"linux"
4855 tab-width:8
4856 End:
4857*/
2253 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2254 /* let BPF see it */
2255 BPF_MTAP(ifp, m);
2256 /* give it to the nic */
2257 mxge_encap(ss, m);
2258 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2259 return (err);
2260 }
2261 if (!drbr_empty(ifp, tx->br))
2262 mxge_start_locked(ss);
2263 return (0);
2264}
2265
2266static int
2267mxge_transmit(struct ifnet *ifp, struct mbuf *m)
2268{
2269 mxge_softc_t *sc = ifp->if_softc;
2270 struct mxge_slice_state *ss;
2271 mxge_tx_ring_t *tx;
2272 int err = 0;
2273 int slice;
2274
2275 slice = m->m_pkthdr.flowid;
2276 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2277
2278 ss = &sc->ss[slice];
2279 tx = &ss->tx;
2280
2281 if (mtx_trylock(&tx->mtx)) {
2282 err = mxge_transmit_locked(ss, m);
2283 mtx_unlock(&tx->mtx);
2284 } else {
2285 err = drbr_enqueue(ifp, tx->br, m);
2286 }
2287
2288 return (err);
2289}
2290
2291#else
2292
2293static inline void
2294mxge_start_locked(struct mxge_slice_state *ss)
2295{
2296 mxge_softc_t *sc;
2297 struct mbuf *m;
2298 struct ifnet *ifp;
2299 mxge_tx_ring_t *tx;
2300
2301 sc = ss->sc;
2302 ifp = sc->ifp;
2303 tx = &ss->tx;
2304 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2305 IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
2306 if (m == NULL) {
2307 return;
2308 }
2309 /* let BPF see it */
2310 BPF_MTAP(ifp, m);
2311
2312 /* give it to the nic */
2313 mxge_encap(ss, m);
2314 }
2315 /* ran out of transmit slots */
2316 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
2317 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2318 tx->stall++;
2319 }
2320}
2321#endif
2322static void
2323mxge_start(struct ifnet *ifp)
2324{
2325 mxge_softc_t *sc = ifp->if_softc;
2326 struct mxge_slice_state *ss;
2327
2328 /* only use the first slice for now */
2329 ss = &sc->ss[0];
2330 mtx_lock(&ss->tx.mtx);
2331 mxge_start_locked(ss);
2332 mtx_unlock(&ss->tx.mtx);
2333}
2334
2335/*
2336 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2337 * at most 32 bytes at a time, so as to avoid involving the software
2338 * pio handler in the nic. We re-write the first segment's low
2339 * DMA address to mark it valid only after we write the entire chunk
2340 * in a burst
2341 */
2342static inline void
2343mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2344 mcp_kreq_ether_recv_t *src)
2345{
2346 uint32_t low;
2347
2348 low = src->addr_low;
2349 src->addr_low = 0xffffffff;
2350 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2351 wmb();
2352 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2353 wmb();
2354 src->addr_low = low;
2355 dst->addr_low = low;
2356 wmb();
2357}
2358
2359static int
2360mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2361{
2362 bus_dma_segment_t seg;
2363 struct mbuf *m;
2364 mxge_rx_ring_t *rx = &ss->rx_small;
2365 int cnt, err;
2366
2367 m = m_gethdr(M_DONTWAIT, MT_DATA);
2368 if (m == NULL) {
2369 rx->alloc_fail++;
2370 err = ENOBUFS;
2371 goto done;
2372 }
2373 m->m_len = MHLEN;
2374 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2375 &seg, &cnt, BUS_DMA_NOWAIT);
2376 if (err != 0) {
2377 m_free(m);
2378 goto done;
2379 }
2380 rx->info[idx].m = m;
2381 rx->shadow[idx].addr_low =
2382 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2383 rx->shadow[idx].addr_high =
2384 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2385
2386done:
2387 if ((idx & 7) == 7)
2388 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2389 return err;
2390}
2391
2392static int
2393mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2394{
2395 bus_dma_segment_t seg[3];
2396 struct mbuf *m;
2397 mxge_rx_ring_t *rx = &ss->rx_big;
2398 int cnt, err, i;
2399
2400 if (rx->cl_size == MCLBYTES)
2401 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
2402 else
2403 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2404 if (m == NULL) {
2405 rx->alloc_fail++;
2406 err = ENOBUFS;
2407 goto done;
2408 }
2409 m->m_len = rx->mlen;
2410 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2411 seg, &cnt, BUS_DMA_NOWAIT);
2412 if (err != 0) {
2413 m_free(m);
2414 goto done;
2415 }
2416 rx->info[idx].m = m;
2417 rx->shadow[idx].addr_low =
2418 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2419 rx->shadow[idx].addr_high =
2420 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2421
2422#if MXGE_VIRT_JUMBOS
2423 for (i = 1; i < cnt; i++) {
2424 rx->shadow[idx + i].addr_low =
2425 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2426 rx->shadow[idx + i].addr_high =
2427 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2428 }
2429#endif
2430
2431done:
2432 for (i = 0; i < rx->nbufs; i++) {
2433 if ((idx & 7) == 7) {
2434 mxge_submit_8rx(&rx->lanai[idx - 7],
2435 &rx->shadow[idx - 7]);
2436 }
2437 idx++;
2438 }
2439 return err;
2440}
2441
2442/*
2443 * Myri10GE hardware checksums are not valid if the sender
2444 * padded the frame with non-zero padding. This is because
2445 * the firmware just does a simple 16-bit 1s complement
2446 * checksum across the entire frame, excluding the first 14
2447 * bytes. It is best to simply to check the checksum and
2448 * tell the stack about it only if the checksum is good
2449 */
2450
2451static inline uint16_t
2452mxge_rx_csum(struct mbuf *m, int csum)
2453{
2454 struct ether_header *eh;
2455 struct ip *ip;
2456 uint16_t c;
2457
2458 eh = mtod(m, struct ether_header *);
2459
2460 /* only deal with IPv4 TCP & UDP for now */
2461 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP)))
2462 return 1;
2463 ip = (struct ip *)(eh + 1);
2464 if (__predict_false(ip->ip_p != IPPROTO_TCP &&
2465 ip->ip_p != IPPROTO_UDP))
2466 return 1;
2467#ifdef INET
2468 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2469 htonl(ntohs(csum) + ntohs(ip->ip_len) +
2470 - (ip->ip_hl << 2) + ip->ip_p));
2471#else
2472 c = 1;
2473#endif
2474 c ^= 0xffff;
2475 return (c);
2476}
2477
2478static void
2479mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2480{
2481 struct ether_vlan_header *evl;
2482 struct ether_header *eh;
2483 uint32_t partial;
2484
2485 evl = mtod(m, struct ether_vlan_header *);
2486 eh = mtod(m, struct ether_header *);
2487
2488 /*
2489 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2490 * after what the firmware thought was the end of the ethernet
2491 * header.
2492 */
2493
2494 /* put checksum into host byte order */
2495 *csum = ntohs(*csum);
2496 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2497 (*csum) += ~partial;
2498 (*csum) += ((*csum) < ~partial);
2499 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2500 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2501
2502 /* restore checksum to network byte order;
2503 later consumers expect this */
2504 *csum = htons(*csum);
2505
2506 /* save the tag */
2507#ifdef MXGE_NEW_VLAN_API
2508 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2509#else
2510 {
2511 struct m_tag *mtag;
2512 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2513 M_NOWAIT);
2514 if (mtag == NULL)
2515 return;
2516 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2517 m_tag_prepend(m, mtag);
2518 }
2519
2520#endif
2521 m->m_flags |= M_VLANTAG;
2522
2523 /*
2524 * Remove the 802.1q header by copying the Ethernet
2525 * addresses over it and adjusting the beginning of
2526 * the data in the mbuf. The encapsulated Ethernet
2527 * type field is already in place.
2528 */
2529 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2530 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2531 m_adj(m, ETHER_VLAN_ENCAP_LEN);
2532}
2533
2534
2535static inline void
2536mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2537{
2538 mxge_softc_t *sc;
2539 struct ifnet *ifp;
2540 struct mbuf *m;
2541 struct ether_header *eh;
2542 mxge_rx_ring_t *rx;
2543 bus_dmamap_t old_map;
2544 int idx;
2545 uint16_t tcpudp_csum;
2546
2547 sc = ss->sc;
2548 ifp = sc->ifp;
2549 rx = &ss->rx_big;
2550 idx = rx->cnt & rx->mask;
2551 rx->cnt += rx->nbufs;
2552 /* save a pointer to the received mbuf */
2553 m = rx->info[idx].m;
2554 /* try to replace the received mbuf */
2555 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2556 /* drop the frame -- the old mbuf is re-cycled */
2557 ifp->if_ierrors++;
2558 return;
2559 }
2560
2561 /* unmap the received buffer */
2562 old_map = rx->info[idx].map;
2563 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2564 bus_dmamap_unload(rx->dmat, old_map);
2565
2566 /* swap the bus_dmamap_t's */
2567 rx->info[idx].map = rx->extra_map;
2568 rx->extra_map = old_map;
2569
2570 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2571 * aligned */
2572 m->m_data += MXGEFW_PAD;
2573
2574 m->m_pkthdr.rcvif = ifp;
2575 m->m_len = m->m_pkthdr.len = len;
2576 ss->ipackets++;
2577 eh = mtod(m, struct ether_header *);
2578 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2579 mxge_vlan_tag_remove(m, &csum);
2580 }
2581 /* if the checksum is valid, mark it in the mbuf header */
2582 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2583 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2584 return;
2585 /* otherwise, it was a UDP frame, or a TCP frame which
2586 we could not do LRO on. Tell the stack that the
2587 checksum is good */
2588 m->m_pkthdr.csum_data = 0xffff;
2589 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2590 }
2591 /* flowid only valid if RSS hashing is enabled */
2592 if (sc->num_slices > 1) {
2593 m->m_pkthdr.flowid = (ss - sc->ss);
2594 m->m_flags |= M_FLOWID;
2595 }
2596 /* pass the frame up the stack */
2597 (*ifp->if_input)(ifp, m);
2598}
2599
2600static inline void
2601mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2602{
2603 mxge_softc_t *sc;
2604 struct ifnet *ifp;
2605 struct ether_header *eh;
2606 struct mbuf *m;
2607 mxge_rx_ring_t *rx;
2608 bus_dmamap_t old_map;
2609 int idx;
2610 uint16_t tcpudp_csum;
2611
2612 sc = ss->sc;
2613 ifp = sc->ifp;
2614 rx = &ss->rx_small;
2615 idx = rx->cnt & rx->mask;
2616 rx->cnt++;
2617 /* save a pointer to the received mbuf */
2618 m = rx->info[idx].m;
2619 /* try to replace the received mbuf */
2620 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2621 /* drop the frame -- the old mbuf is re-cycled */
2622 ifp->if_ierrors++;
2623 return;
2624 }
2625
2626 /* unmap the received buffer */
2627 old_map = rx->info[idx].map;
2628 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2629 bus_dmamap_unload(rx->dmat, old_map);
2630
2631 /* swap the bus_dmamap_t's */
2632 rx->info[idx].map = rx->extra_map;
2633 rx->extra_map = old_map;
2634
2635 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2636 * aligned */
2637 m->m_data += MXGEFW_PAD;
2638
2639 m->m_pkthdr.rcvif = ifp;
2640 m->m_len = m->m_pkthdr.len = len;
2641 ss->ipackets++;
2642 eh = mtod(m, struct ether_header *);
2643 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2644 mxge_vlan_tag_remove(m, &csum);
2645 }
2646 /* if the checksum is valid, mark it in the mbuf header */
2647 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2648 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2649 return;
2650 /* otherwise, it was a UDP frame, or a TCP frame which
2651 we could not do LRO on. Tell the stack that the
2652 checksum is good */
2653 m->m_pkthdr.csum_data = 0xffff;
2654 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2655 }
2656 /* flowid only valid if RSS hashing is enabled */
2657 if (sc->num_slices > 1) {
2658 m->m_pkthdr.flowid = (ss - sc->ss);
2659 m->m_flags |= M_FLOWID;
2660 }
2661 /* pass the frame up the stack */
2662 (*ifp->if_input)(ifp, m);
2663}
2664
2665static inline void
2666mxge_clean_rx_done(struct mxge_slice_state *ss)
2667{
2668 mxge_rx_done_t *rx_done = &ss->rx_done;
2669 int limit = 0;
2670 uint16_t length;
2671 uint16_t checksum;
2672
2673
2674 while (rx_done->entry[rx_done->idx].length != 0) {
2675 length = ntohs(rx_done->entry[rx_done->idx].length);
2676 rx_done->entry[rx_done->idx].length = 0;
2677 checksum = rx_done->entry[rx_done->idx].checksum;
2678 if (length <= (MHLEN - MXGEFW_PAD))
2679 mxge_rx_done_small(ss, length, checksum);
2680 else
2681 mxge_rx_done_big(ss, length, checksum);
2682 rx_done->cnt++;
2683 rx_done->idx = rx_done->cnt & rx_done->mask;
2684
2685 /* limit potential for livelock */
2686 if (__predict_false(++limit > rx_done->mask / 2))
2687 break;
2688 }
2689#ifdef INET
2690 while (!SLIST_EMPTY(&ss->lro_active)) {
2691 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active);
2692 SLIST_REMOVE_HEAD(&ss->lro_active, next);
2693 mxge_lro_flush(ss, lro);
2694 }
2695#endif
2696}
2697
2698
2699static inline void
2700mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2701{
2702 struct ifnet *ifp;
2703 mxge_tx_ring_t *tx;
2704 struct mbuf *m;
2705 bus_dmamap_t map;
2706 int idx;
2707 int *flags;
2708
2709 tx = &ss->tx;
2710 ifp = ss->sc->ifp;
2711 while (tx->pkt_done != mcp_idx) {
2712 idx = tx->done & tx->mask;
2713 tx->done++;
2714 m = tx->info[idx].m;
2715 /* mbuf and DMA map only attached to the first
2716 segment per-mbuf */
2717 if (m != NULL) {
2718 ss->obytes += m->m_pkthdr.len;
2719 if (m->m_flags & M_MCAST)
2720 ss->omcasts++;
2721 ss->opackets++;
2722 tx->info[idx].m = NULL;
2723 map = tx->info[idx].map;
2724 bus_dmamap_unload(tx->dmat, map);
2725 m_freem(m);
2726 }
2727 if (tx->info[idx].flag) {
2728 tx->info[idx].flag = 0;
2729 tx->pkt_done++;
2730 }
2731 }
2732
2733 /* If we have space, clear IFF_OACTIVE to tell the stack that
2734 its OK to send packets */
2735#ifdef IFNET_BUF_RING
2736 flags = &ss->if_drv_flags;
2737#else
2738 flags = &ifp->if_drv_flags;
2739#endif
2740 mtx_lock(&ss->tx.mtx);
2741 if ((*flags) & IFF_DRV_OACTIVE &&
2742 tx->req - tx->done < (tx->mask + 1)/4) {
2743 *(flags) &= ~IFF_DRV_OACTIVE;
2744 ss->tx.wake++;
2745 mxge_start_locked(ss);
2746 }
2747#ifdef IFNET_BUF_RING
2748 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2749 /* let the NIC stop polling this queue, since there
2750 * are no more transmits pending */
2751 if (tx->req == tx->done) {
2752 *tx->send_stop = 1;
2753 tx->queue_active = 0;
2754 tx->deactivate++;
2755 wmb();
2756 }
2757 }
2758#endif
2759 mtx_unlock(&ss->tx.mtx);
2760
2761}
2762
2763static struct mxge_media_type mxge_xfp_media_types[] =
2764{
2765 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2766 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2767 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2768 {0, (1 << 5), "10GBASE-ER"},
2769 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2770 {0, (1 << 3), "10GBASE-SW"},
2771 {0, (1 << 2), "10GBASE-LW"},
2772 {0, (1 << 1), "10GBASE-EW"},
2773 {0, (1 << 0), "Reserved"}
2774};
2775static struct mxge_media_type mxge_sfp_media_types[] =
2776{
2777 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"},
2778 {0, (1 << 7), "Reserved"},
2779 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2780 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2781 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}
2782};
2783
2784static void
2785mxge_set_media(mxge_softc_t *sc, int type)
2786{
2787 sc->media_flags |= type;
2788 ifmedia_add(&sc->media, sc->media_flags, 0, NULL);
2789 ifmedia_set(&sc->media, sc->media_flags);
2790}
2791
2792
2793/*
2794 * Determine the media type for a NIC. Some XFPs will identify
2795 * themselves only when their link is up, so this is initiated via a
2796 * link up interrupt. However, this can potentially take up to
2797 * several milliseconds, so it is run via the watchdog routine, rather
2798 * than in the interrupt handler itself. This need only be done
2799 * once, not each time the link is up.
2800 */
2801static void
2802mxge_media_probe(mxge_softc_t *sc)
2803{
2804 mxge_cmd_t cmd;
2805 char *cage_type;
2806 char *ptr;
2807 struct mxge_media_type *mxge_media_types = NULL;
2808 int i, err, ms, mxge_media_type_entries;
2809 uint32_t byte;
2810
2811 sc->need_media_probe = 0;
2812
2813 /* if we've already set a media type, we're done */
2814 if (sc->media_flags != (IFM_ETHER | IFM_AUTO))
2815 return;
2816
2817 /*
2818 * parse the product code to deterimine the interface type
2819 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2820 * after the 3rd dash in the driver's cached copy of the
2821 * EEPROM's product code string.
2822 */
2823 ptr = sc->product_code_string;
2824 if (ptr == NULL) {
2825 device_printf(sc->dev, "Missing product code\n");
2826 }
2827
2828 for (i = 0; i < 3; i++, ptr++) {
2829 ptr = index(ptr, '-');
2830 if (ptr == NULL) {
2831 device_printf(sc->dev,
2832 "only %d dashes in PC?!?\n", i);
2833 return;
2834 }
2835 }
2836 if (*ptr == 'C') {
2837 /* -C is CX4 */
2838 mxge_set_media(sc, IFM_10G_CX4);
2839 return;
2840 }
2841 else if (*ptr == 'Q') {
2842 /* -Q is Quad Ribbon Fiber */
2843 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2844 /* FreeBSD has no media type for Quad ribbon fiber */
2845 return;
2846 }
2847
2848 if (*ptr == 'R') {
2849 /* -R is XFP */
2850 mxge_media_types = mxge_xfp_media_types;
2851 mxge_media_type_entries =
2852 sizeof (mxge_xfp_media_types) /
2853 sizeof (mxge_xfp_media_types[0]);
2854 byte = MXGE_XFP_COMPLIANCE_BYTE;
2855 cage_type = "XFP";
2856 }
2857
2858 if (*ptr == 'S' || *(ptr +1) == 'S') {
2859 /* -S or -2S is SFP+ */
2860 mxge_media_types = mxge_sfp_media_types;
2861 mxge_media_type_entries =
2862 sizeof (mxge_sfp_media_types) /
2863 sizeof (mxge_sfp_media_types[0]);
2864 cage_type = "SFP+";
2865 byte = 3;
2866 }
2867
2868 if (mxge_media_types == NULL) {
2869 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2870 return;
2871 }
2872
2873 /*
2874 * At this point we know the NIC has an XFP cage, so now we
2875 * try to determine what is in the cage by using the
2876 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2877 * register. We read just one byte, which may take over
2878 * a millisecond
2879 */
2880
2881 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
2882 cmd.data1 = byte;
2883 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
2884 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
2885 device_printf(sc->dev, "failed to read XFP\n");
2886 }
2887 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
2888 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
2889 }
2890 if (err != MXGEFW_CMD_OK) {
2891 return;
2892 }
2893
2894 /* now we wait for the data to be cached */
2895 cmd.data0 = byte;
2896 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2897 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
2898 DELAY(1000);
2899 cmd.data0 = byte;
2900 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2901 }
2902 if (err != MXGEFW_CMD_OK) {
2903 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
2904 cage_type, err, ms);
2905 return;
2906 }
2907
2908 if (cmd.data0 == mxge_media_types[0].bitmask) {
2909 if (mxge_verbose)
2910 device_printf(sc->dev, "%s:%s\n", cage_type,
2911 mxge_media_types[0].name);
2912 mxge_set_media(sc, mxge_media_types[0].flag);
2913 return;
2914 }
2915 for (i = 1; i < mxge_media_type_entries; i++) {
2916 if (cmd.data0 & mxge_media_types[i].bitmask) {
2917 if (mxge_verbose)
2918 device_printf(sc->dev, "%s:%s\n",
2919 cage_type,
2920 mxge_media_types[i].name);
2921
2922 mxge_set_media(sc, mxge_media_types[i].flag);
2923 return;
2924 }
2925 }
2926 device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type,
2927 cmd.data0);
2928
2929 return;
2930}
2931
2932static void
2933mxge_intr(void *arg)
2934{
2935 struct mxge_slice_state *ss = arg;
2936 mxge_softc_t *sc = ss->sc;
2937 mcp_irq_data_t *stats = ss->fw_stats;
2938 mxge_tx_ring_t *tx = &ss->tx;
2939 mxge_rx_done_t *rx_done = &ss->rx_done;
2940 uint32_t send_done_count;
2941 uint8_t valid;
2942
2943
2944#ifndef IFNET_BUF_RING
2945 /* an interrupt on a non-zero slice is implicitly valid
2946 since MSI-X irqs are not shared */
2947 if (ss != sc->ss) {
2948 mxge_clean_rx_done(ss);
2949 *ss->irq_claim = be32toh(3);
2950 return;
2951 }
2952#endif
2953
2954 /* make sure the DMA has finished */
2955 if (!stats->valid) {
2956 return;
2957 }
2958 valid = stats->valid;
2959
2960 if (sc->legacy_irq) {
2961 /* lower legacy IRQ */
2962 *sc->irq_deassert = 0;
2963 if (!mxge_deassert_wait)
2964 /* don't wait for conf. that irq is low */
2965 stats->valid = 0;
2966 } else {
2967 stats->valid = 0;
2968 }
2969
2970 /* loop while waiting for legacy irq deassertion */
2971 do {
2972 /* check for transmit completes and receives */
2973 send_done_count = be32toh(stats->send_done_count);
2974 while ((send_done_count != tx->pkt_done) ||
2975 (rx_done->entry[rx_done->idx].length != 0)) {
2976 if (send_done_count != tx->pkt_done)
2977 mxge_tx_done(ss, (int)send_done_count);
2978 mxge_clean_rx_done(ss);
2979 send_done_count = be32toh(stats->send_done_count);
2980 }
2981 if (sc->legacy_irq && mxge_deassert_wait)
2982 wmb();
2983 } while (*((volatile uint8_t *) &stats->valid));
2984
2985 /* fw link & error stats meaningful only on the first slice */
2986 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
2987 if (sc->link_state != stats->link_up) {
2988 sc->link_state = stats->link_up;
2989 if (sc->link_state) {
2990 if_link_state_change(sc->ifp, LINK_STATE_UP);
2991 if (mxge_verbose)
2992 device_printf(sc->dev, "link up\n");
2993 } else {
2994 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
2995 if (mxge_verbose)
2996 device_printf(sc->dev, "link down\n");
2997 }
2998 sc->need_media_probe = 1;
2999 }
3000 if (sc->rdma_tags_available !=
3001 be32toh(stats->rdma_tags_available)) {
3002 sc->rdma_tags_available =
3003 be32toh(stats->rdma_tags_available);
3004 device_printf(sc->dev, "RDMA timed out! %d tags "
3005 "left\n", sc->rdma_tags_available);
3006 }
3007
3008 if (stats->link_down) {
3009 sc->down_cnt += stats->link_down;
3010 sc->link_state = 0;
3011 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
3012 }
3013 }
3014
3015 /* check to see if we have rx token to pass back */
3016 if (valid & 0x1)
3017 *ss->irq_claim = be32toh(3);
3018 *(ss->irq_claim + 1) = be32toh(3);
3019}
3020
3021static void
3022mxge_init(void *arg)
3023{
3024}
3025
3026
3027
3028static void
3029mxge_free_slice_mbufs(struct mxge_slice_state *ss)
3030{
3031 struct lro_entry *lro_entry;
3032 int i;
3033
3034 while (!SLIST_EMPTY(&ss->lro_free)) {
3035 lro_entry = SLIST_FIRST(&ss->lro_free);
3036 SLIST_REMOVE_HEAD(&ss->lro_free, next);
3037 free(lro_entry, M_DEVBUF);
3038 }
3039
3040 for (i = 0; i <= ss->rx_big.mask; i++) {
3041 if (ss->rx_big.info[i].m == NULL)
3042 continue;
3043 bus_dmamap_unload(ss->rx_big.dmat,
3044 ss->rx_big.info[i].map);
3045 m_freem(ss->rx_big.info[i].m);
3046 ss->rx_big.info[i].m = NULL;
3047 }
3048
3049 for (i = 0; i <= ss->rx_small.mask; i++) {
3050 if (ss->rx_small.info[i].m == NULL)
3051 continue;
3052 bus_dmamap_unload(ss->rx_small.dmat,
3053 ss->rx_small.info[i].map);
3054 m_freem(ss->rx_small.info[i].m);
3055 ss->rx_small.info[i].m = NULL;
3056 }
3057
3058 /* transmit ring used only on the first slice */
3059 if (ss->tx.info == NULL)
3060 return;
3061
3062 for (i = 0; i <= ss->tx.mask; i++) {
3063 ss->tx.info[i].flag = 0;
3064 if (ss->tx.info[i].m == NULL)
3065 continue;
3066 bus_dmamap_unload(ss->tx.dmat,
3067 ss->tx.info[i].map);
3068 m_freem(ss->tx.info[i].m);
3069 ss->tx.info[i].m = NULL;
3070 }
3071}
3072
3073static void
3074mxge_free_mbufs(mxge_softc_t *sc)
3075{
3076 int slice;
3077
3078 for (slice = 0; slice < sc->num_slices; slice++)
3079 mxge_free_slice_mbufs(&sc->ss[slice]);
3080}
3081
3082static void
3083mxge_free_slice_rings(struct mxge_slice_state *ss)
3084{
3085 int i;
3086
3087
3088 if (ss->rx_done.entry != NULL)
3089 mxge_dma_free(&ss->rx_done.dma);
3090 ss->rx_done.entry = NULL;
3091
3092 if (ss->tx.req_bytes != NULL)
3093 free(ss->tx.req_bytes, M_DEVBUF);
3094 ss->tx.req_bytes = NULL;
3095
3096 if (ss->tx.seg_list != NULL)
3097 free(ss->tx.seg_list, M_DEVBUF);
3098 ss->tx.seg_list = NULL;
3099
3100 if (ss->rx_small.shadow != NULL)
3101 free(ss->rx_small.shadow, M_DEVBUF);
3102 ss->rx_small.shadow = NULL;
3103
3104 if (ss->rx_big.shadow != NULL)
3105 free(ss->rx_big.shadow, M_DEVBUF);
3106 ss->rx_big.shadow = NULL;
3107
3108 if (ss->tx.info != NULL) {
3109 if (ss->tx.dmat != NULL) {
3110 for (i = 0; i <= ss->tx.mask; i++) {
3111 bus_dmamap_destroy(ss->tx.dmat,
3112 ss->tx.info[i].map);
3113 }
3114 bus_dma_tag_destroy(ss->tx.dmat);
3115 }
3116 free(ss->tx.info, M_DEVBUF);
3117 }
3118 ss->tx.info = NULL;
3119
3120 if (ss->rx_small.info != NULL) {
3121 if (ss->rx_small.dmat != NULL) {
3122 for (i = 0; i <= ss->rx_small.mask; i++) {
3123 bus_dmamap_destroy(ss->rx_small.dmat,
3124 ss->rx_small.info[i].map);
3125 }
3126 bus_dmamap_destroy(ss->rx_small.dmat,
3127 ss->rx_small.extra_map);
3128 bus_dma_tag_destroy(ss->rx_small.dmat);
3129 }
3130 free(ss->rx_small.info, M_DEVBUF);
3131 }
3132 ss->rx_small.info = NULL;
3133
3134 if (ss->rx_big.info != NULL) {
3135 if (ss->rx_big.dmat != NULL) {
3136 for (i = 0; i <= ss->rx_big.mask; i++) {
3137 bus_dmamap_destroy(ss->rx_big.dmat,
3138 ss->rx_big.info[i].map);
3139 }
3140 bus_dmamap_destroy(ss->rx_big.dmat,
3141 ss->rx_big.extra_map);
3142 bus_dma_tag_destroy(ss->rx_big.dmat);
3143 }
3144 free(ss->rx_big.info, M_DEVBUF);
3145 }
3146 ss->rx_big.info = NULL;
3147}
3148
3149static void
3150mxge_free_rings(mxge_softc_t *sc)
3151{
3152 int slice;
3153
3154 for (slice = 0; slice < sc->num_slices; slice++)
3155 mxge_free_slice_rings(&sc->ss[slice]);
3156}
3157
3158static int
3159mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3160 int tx_ring_entries)
3161{
3162 mxge_softc_t *sc = ss->sc;
3163 size_t bytes;
3164 int err, i;
3165
3166 err = ENOMEM;
3167
3168 /* allocate per-slice receive resources */
3169
3170 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3171 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3172
3173 /* allocate the rx shadow rings */
3174 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
3175 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3176 if (ss->rx_small.shadow == NULL)
3177 return err;
3178
3179 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
3180 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3181 if (ss->rx_big.shadow == NULL)
3182 return err;
3183
3184 /* allocate the rx host info rings */
3185 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
3186 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3187 if (ss->rx_small.info == NULL)
3188 return err;
3189
3190 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
3191 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3192 if (ss->rx_big.info == NULL)
3193 return err;
3194
3195 /* allocate the rx busdma resources */
3196 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3197 1, /* alignment */
3198 4096, /* boundary */
3199 BUS_SPACE_MAXADDR, /* low */
3200 BUS_SPACE_MAXADDR, /* high */
3201 NULL, NULL, /* filter */
3202 MHLEN, /* maxsize */
3203 1, /* num segs */
3204 MHLEN, /* maxsegsize */
3205 BUS_DMA_ALLOCNOW, /* flags */
3206 NULL, NULL, /* lock */
3207 &ss->rx_small.dmat); /* tag */
3208 if (err != 0) {
3209 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3210 err);
3211 return err;
3212 }
3213
3214 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3215 1, /* alignment */
3216#if MXGE_VIRT_JUMBOS
3217 4096, /* boundary */
3218#else
3219 0, /* boundary */
3220#endif
3221 BUS_SPACE_MAXADDR, /* low */
3222 BUS_SPACE_MAXADDR, /* high */
3223 NULL, NULL, /* filter */
3224 3*4096, /* maxsize */
3225#if MXGE_VIRT_JUMBOS
3226 3, /* num segs */
3227 4096, /* maxsegsize*/
3228#else
3229 1, /* num segs */
3230 MJUM9BYTES, /* maxsegsize*/
3231#endif
3232 BUS_DMA_ALLOCNOW, /* flags */
3233 NULL, NULL, /* lock */
3234 &ss->rx_big.dmat); /* tag */
3235 if (err != 0) {
3236 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3237 err);
3238 return err;
3239 }
3240 for (i = 0; i <= ss->rx_small.mask; i++) {
3241 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3242 &ss->rx_small.info[i].map);
3243 if (err != 0) {
3244 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3245 err);
3246 return err;
3247 }
3248 }
3249 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3250 &ss->rx_small.extra_map);
3251 if (err != 0) {
3252 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3253 err);
3254 return err;
3255 }
3256
3257 for (i = 0; i <= ss->rx_big.mask; i++) {
3258 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3259 &ss->rx_big.info[i].map);
3260 if (err != 0) {
3261 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3262 err);
3263 return err;
3264 }
3265 }
3266 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3267 &ss->rx_big.extra_map);
3268 if (err != 0) {
3269 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3270 err);
3271 return err;
3272 }
3273
3274 /* now allocate TX resouces */
3275
3276#ifndef IFNET_BUF_RING
3277 /* only use a single TX ring for now */
3278 if (ss != ss->sc->ss)
3279 return 0;
3280#endif
3281
3282 ss->tx.mask = tx_ring_entries - 1;
3283 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3284
3285
3286 /* allocate the tx request copy block */
3287 bytes = 8 +
3288 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3289 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
3290 if (ss->tx.req_bytes == NULL)
3291 return err;
3292 /* ensure req_list entries are aligned to 8 bytes */
3293 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3294 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3295
3296 /* allocate the tx busdma segment list */
3297 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3298 ss->tx.seg_list = (bus_dma_segment_t *)
3299 malloc(bytes, M_DEVBUF, M_WAITOK);
3300 if (ss->tx.seg_list == NULL)
3301 return err;
3302
3303 /* allocate the tx host info ring */
3304 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3305 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3306 if (ss->tx.info == NULL)
3307 return err;
3308
3309 /* allocate the tx busdma resources */
3310 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3311 1, /* alignment */
3312 sc->tx_boundary, /* boundary */
3313 BUS_SPACE_MAXADDR, /* low */
3314 BUS_SPACE_MAXADDR, /* high */
3315 NULL, NULL, /* filter */
3316 65536 + 256, /* maxsize */
3317 ss->tx.max_desc - 2, /* num segs */
3318 sc->tx_boundary, /* maxsegsz */
3319 BUS_DMA_ALLOCNOW, /* flags */
3320 NULL, NULL, /* lock */
3321 &ss->tx.dmat); /* tag */
3322
3323 if (err != 0) {
3324 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3325 err);
3326 return err;
3327 }
3328
3329 /* now use these tags to setup dmamaps for each slot
3330 in the ring */
3331 for (i = 0; i <= ss->tx.mask; i++) {
3332 err = bus_dmamap_create(ss->tx.dmat, 0,
3333 &ss->tx.info[i].map);
3334 if (err != 0) {
3335 device_printf(sc->dev, "Err %d tx dmamap\n",
3336 err);
3337 return err;
3338 }
3339 }
3340 return 0;
3341
3342}
3343
3344static int
3345mxge_alloc_rings(mxge_softc_t *sc)
3346{
3347 mxge_cmd_t cmd;
3348 int tx_ring_size;
3349 int tx_ring_entries, rx_ring_entries;
3350 int err, slice;
3351
3352 /* get ring sizes */
3353 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3354 tx_ring_size = cmd.data0;
3355 if (err != 0) {
3356 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3357 goto abort;
3358 }
3359
3360 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3361 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3362 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
3363 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
3364 IFQ_SET_READY(&sc->ifp->if_snd);
3365
3366 for (slice = 0; slice < sc->num_slices; slice++) {
3367 err = mxge_alloc_slice_rings(&sc->ss[slice],
3368 rx_ring_entries,
3369 tx_ring_entries);
3370 if (err != 0)
3371 goto abort;
3372 }
3373 return 0;
3374
3375abort:
3376 mxge_free_rings(sc);
3377 return err;
3378
3379}
3380
3381
3382static void
3383mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3384{
3385 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3386
3387 if (bufsize < MCLBYTES) {
3388 /* easy, everything fits in a single buffer */
3389 *big_buf_size = MCLBYTES;
3390 *cl_size = MCLBYTES;
3391 *nbufs = 1;
3392 return;
3393 }
3394
3395 if (bufsize < MJUMPAGESIZE) {
3396 /* still easy, everything still fits in a single buffer */
3397 *big_buf_size = MJUMPAGESIZE;
3398 *cl_size = MJUMPAGESIZE;
3399 *nbufs = 1;
3400 return;
3401 }
3402#if MXGE_VIRT_JUMBOS
3403 /* now we need to use virtually contiguous buffers */
3404 *cl_size = MJUM9BYTES;
3405 *big_buf_size = 4096;
3406 *nbufs = mtu / 4096 + 1;
3407 /* needs to be a power of two, so round up */
3408 if (*nbufs == 3)
3409 *nbufs = 4;
3410#else
3411 *cl_size = MJUM9BYTES;
3412 *big_buf_size = MJUM9BYTES;
3413 *nbufs = 1;
3414#endif
3415}
3416
3417static int
3418mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3419{
3420 mxge_softc_t *sc;
3421 mxge_cmd_t cmd;
3422 bus_dmamap_t map;
3423 struct lro_entry *lro_entry;
3424 int err, i, slice;
3425
3426
3427 sc = ss->sc;
3428 slice = ss - sc->ss;
3429
3430 SLIST_INIT(&ss->lro_free);
3431 SLIST_INIT(&ss->lro_active);
3432
3433 for (i = 0; i < sc->lro_cnt; i++) {
3434 lro_entry = (struct lro_entry *)
3435 malloc(sizeof (*lro_entry), M_DEVBUF,
3436 M_NOWAIT | M_ZERO);
3437 if (lro_entry == NULL) {
3438 sc->lro_cnt = i;
3439 break;
3440 }
3441 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next);
3442 }
3443 /* get the lanai pointers to the send and receive rings */
3444
3445 err = 0;
3446#ifndef IFNET_BUF_RING
3447 /* We currently only send from the first slice */
3448 if (slice == 0) {
3449#endif
3450 cmd.data0 = slice;
3451 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3452 ss->tx.lanai =
3453 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3454 ss->tx.send_go = (volatile uint32_t *)
3455 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3456 ss->tx.send_stop = (volatile uint32_t *)
3457 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3458#ifndef IFNET_BUF_RING
3459 }
3460#endif
3461 cmd.data0 = slice;
3462 err |= mxge_send_cmd(sc,
3463 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3464 ss->rx_small.lanai =
3465 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3466 cmd.data0 = slice;
3467 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3468 ss->rx_big.lanai =
3469 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3470
3471 if (err != 0) {
3472 device_printf(sc->dev,
3473 "failed to get ring sizes or locations\n");
3474 return EIO;
3475 }
3476
3477 /* stock receive rings */
3478 for (i = 0; i <= ss->rx_small.mask; i++) {
3479 map = ss->rx_small.info[i].map;
3480 err = mxge_get_buf_small(ss, map, i);
3481 if (err) {
3482 device_printf(sc->dev, "alloced %d/%d smalls\n",
3483 i, ss->rx_small.mask + 1);
3484 return ENOMEM;
3485 }
3486 }
3487 for (i = 0; i <= ss->rx_big.mask; i++) {
3488 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3489 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3490 }
3491 ss->rx_big.nbufs = nbufs;
3492 ss->rx_big.cl_size = cl_size;
3493 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
3494 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3495 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3496 map = ss->rx_big.info[i].map;
3497 err = mxge_get_buf_big(ss, map, i);
3498 if (err) {
3499 device_printf(sc->dev, "alloced %d/%d bigs\n",
3500 i, ss->rx_big.mask + 1);
3501 return ENOMEM;
3502 }
3503 }
3504 return 0;
3505}
3506
3507static int
3508mxge_open(mxge_softc_t *sc)
3509{
3510 mxge_cmd_t cmd;
3511 int err, big_bytes, nbufs, slice, cl_size, i;
3512 bus_addr_t bus;
3513 volatile uint8_t *itable;
3514 struct mxge_slice_state *ss;
3515
3516 /* Copy the MAC address in case it was overridden */
3517 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3518
3519 err = mxge_reset(sc, 1);
3520 if (err != 0) {
3521 device_printf(sc->dev, "failed to reset\n");
3522 return EIO;
3523 }
3524
3525 if (sc->num_slices > 1) {
3526 /* setup the indirection table */
3527 cmd.data0 = sc->num_slices;
3528 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3529 &cmd);
3530
3531 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3532 &cmd);
3533 if (err != 0) {
3534 device_printf(sc->dev,
3535 "failed to setup rss tables\n");
3536 return err;
3537 }
3538
3539 /* just enable an identity mapping */
3540 itable = sc->sram + cmd.data0;
3541 for (i = 0; i < sc->num_slices; i++)
3542 itable[i] = (uint8_t)i;
3543
3544 cmd.data0 = 1;
3545 cmd.data1 = mxge_rss_hash_type;
3546 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3547 if (err != 0) {
3548 device_printf(sc->dev, "failed to enable slices\n");
3549 return err;
3550 }
3551 }
3552
3553
3554 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3555
3556 cmd.data0 = nbufs;
3557 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3558 &cmd);
3559 /* error is only meaningful if we're trying to set
3560 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3561 if (err && nbufs > 1) {
3562 device_printf(sc->dev,
3563 "Failed to set alway-use-n to %d\n",
3564 nbufs);
3565 return EIO;
3566 }
3567 /* Give the firmware the mtu and the big and small buffer
3568 sizes. The firmware wants the big buf size to be a power
3569 of two. Luckily, FreeBSD's clusters are powers of two */
3570 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3571 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3572 cmd.data0 = MHLEN - MXGEFW_PAD;
3573 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3574 &cmd);
3575 cmd.data0 = big_bytes;
3576 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3577
3578 if (err != 0) {
3579 device_printf(sc->dev, "failed to setup params\n");
3580 goto abort;
3581 }
3582
3583 /* Now give him the pointer to the stats block */
3584 for (slice = 0;
3585#ifdef IFNET_BUF_RING
3586 slice < sc->num_slices;
3587#else
3588 slice < 1;
3589#endif
3590 slice++) {
3591 ss = &sc->ss[slice];
3592 cmd.data0 =
3593 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3594 cmd.data1 =
3595 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3596 cmd.data2 = sizeof(struct mcp_irq_data);
3597 cmd.data2 |= (slice << 16);
3598 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3599 }
3600
3601 if (err != 0) {
3602 bus = sc->ss->fw_stats_dma.bus_addr;
3603 bus += offsetof(struct mcp_irq_data, send_done_count);
3604 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3605 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3606 err = mxge_send_cmd(sc,
3607 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3608 &cmd);
3609 /* Firmware cannot support multicast without STATS_DMA_V2 */
3610 sc->fw_multicast_support = 0;
3611 } else {
3612 sc->fw_multicast_support = 1;
3613 }
3614
3615 if (err != 0) {
3616 device_printf(sc->dev, "failed to setup params\n");
3617 goto abort;
3618 }
3619
3620 for (slice = 0; slice < sc->num_slices; slice++) {
3621 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3622 if (err != 0) {
3623 device_printf(sc->dev, "couldn't open slice %d\n",
3624 slice);
3625 goto abort;
3626 }
3627 }
3628
3629 /* Finally, start the firmware running */
3630 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3631 if (err) {
3632 device_printf(sc->dev, "Couldn't bring up link\n");
3633 goto abort;
3634 }
3635#ifdef IFNET_BUF_RING
3636 for (slice = 0; slice < sc->num_slices; slice++) {
3637 ss = &sc->ss[slice];
3638 ss->if_drv_flags |= IFF_DRV_RUNNING;
3639 ss->if_drv_flags &= ~IFF_DRV_OACTIVE;
3640 }
3641#endif
3642 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
3643 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3644
3645 return 0;
3646
3647
3648abort:
3649 mxge_free_mbufs(sc);
3650
3651 return err;
3652}
3653
3654static int
3655mxge_close(mxge_softc_t *sc, int down)
3656{
3657 mxge_cmd_t cmd;
3658 int err, old_down_cnt;
3659#ifdef IFNET_BUF_RING
3660 struct mxge_slice_state *ss;
3661 int slice;
3662#endif
3663
3664#ifdef IFNET_BUF_RING
3665 for (slice = 0; slice < sc->num_slices; slice++) {
3666 ss = &sc->ss[slice];
3667 ss->if_drv_flags &= ~IFF_DRV_RUNNING;
3668 }
3669#endif
3670 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3671 if (!down) {
3672 old_down_cnt = sc->down_cnt;
3673 wmb();
3674 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3675 if (err) {
3676 device_printf(sc->dev,
3677 "Couldn't bring down link\n");
3678 }
3679 if (old_down_cnt == sc->down_cnt) {
3680 /* wait for down irq */
3681 DELAY(10 * sc->intr_coal_delay);
3682 }
3683 wmb();
3684 if (old_down_cnt == sc->down_cnt) {
3685 device_printf(sc->dev, "never got down irq\n");
3686 }
3687 }
3688 mxge_free_mbufs(sc);
3689
3690 return 0;
3691}
3692
3693static void
3694mxge_setup_cfg_space(mxge_softc_t *sc)
3695{
3696 device_t dev = sc->dev;
3697 int reg;
3698 uint16_t cmd, lnk, pectl;
3699
3700 /* find the PCIe link width and set max read request to 4KB*/
3701 if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
3702 lnk = pci_read_config(dev, reg + 0x12, 2);
3703 sc->link_width = (lnk >> 4) & 0x3f;
3704
3705 if (sc->pectl == 0) {
3706 pectl = pci_read_config(dev, reg + 0x8, 2);
3707 pectl = (pectl & ~0x7000) | (5 << 12);
3708 pci_write_config(dev, reg + 0x8, pectl, 2);
3709 sc->pectl = pectl;
3710 } else {
3711 /* restore saved pectl after watchdog reset */
3712 pci_write_config(dev, reg + 0x8, sc->pectl, 2);
3713 }
3714 }
3715
3716 /* Enable DMA and Memory space access */
3717 pci_enable_busmaster(dev);
3718 cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3719 cmd |= PCIM_CMD_MEMEN;
3720 pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3721}
3722
3723static uint32_t
3724mxge_read_reboot(mxge_softc_t *sc)
3725{
3726 device_t dev = sc->dev;
3727 uint32_t vs;
3728
3729 /* find the vendor specific offset */
3730 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) {
3731 device_printf(sc->dev,
3732 "could not find vendor specific offset\n");
3733 return (uint32_t)-1;
3734 }
3735 /* enable read32 mode */
3736 pci_write_config(dev, vs + 0x10, 0x3, 1);
3737 /* tell NIC which register to read */
3738 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3739 return (pci_read_config(dev, vs + 0x14, 4));
3740}
3741
3742static void
3743mxge_watchdog_reset(mxge_softc_t *sc)
3744{
3745 struct pci_devinfo *dinfo;
3746 struct mxge_slice_state *ss;
3747 int err, running, s, num_tx_slices = 1;
3748 uint32_t reboot;
3749 uint16_t cmd;
3750
3751 err = ENXIO;
3752
3753 device_printf(sc->dev, "Watchdog reset!\n");
3754
3755 /*
3756 * check to see if the NIC rebooted. If it did, then all of
3757 * PCI config space has been reset, and things like the
3758 * busmaster bit will be zero. If this is the case, then we
3759 * must restore PCI config space before the NIC can be used
3760 * again
3761 */
3762 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3763 if (cmd == 0xffff) {
3764 /*
3765 * maybe the watchdog caught the NIC rebooting; wait
3766 * up to 100ms for it to finish. If it does not come
3767 * back, then give up
3768 */
3769 DELAY(1000*100);
3770 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3771 if (cmd == 0xffff) {
3772 device_printf(sc->dev, "NIC disappeared!\n");
3773 }
3774 }
3775 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3776 /* print the reboot status */
3777 reboot = mxge_read_reboot(sc);
3778 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3779 reboot);
3780 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
3781 if (running) {
3782
3783 /*
3784 * quiesce NIC so that TX routines will not try to
3785 * xmit after restoration of BAR
3786 */
3787
3788 /* Mark the link as down */
3789 if (sc->link_state) {
3790 sc->link_state = 0;
3791 if_link_state_change(sc->ifp,
3792 LINK_STATE_DOWN);
3793 }
3794#ifdef IFNET_BUF_RING
3795 num_tx_slices = sc->num_slices;
3796#endif
3797 /* grab all TX locks to ensure no tx */
3798 for (s = 0; s < num_tx_slices; s++) {
3799 ss = &sc->ss[s];
3800 mtx_lock(&ss->tx.mtx);
3801 }
3802 mxge_close(sc, 1);
3803 }
3804 /* restore PCI configuration space */
3805 dinfo = device_get_ivars(sc->dev);
3806 pci_cfg_restore(sc->dev, dinfo);
3807
3808 /* and redo any changes we made to our config space */
3809 mxge_setup_cfg_space(sc);
3810
3811 /* reload f/w */
3812 err = mxge_load_firmware(sc, 0);
3813 if (err) {
3814 device_printf(sc->dev,
3815 "Unable to re-load f/w\n");
3816 }
3817 if (running) {
3818 if (!err)
3819 err = mxge_open(sc);
3820 /* release all TX locks */
3821 for (s = 0; s < num_tx_slices; s++) {
3822 ss = &sc->ss[s];
3823#ifdef IFNET_BUF_RING
3824 mxge_start_locked(ss);
3825#endif
3826 mtx_unlock(&ss->tx.mtx);
3827 }
3828 }
3829 sc->watchdog_resets++;
3830 } else {
3831 device_printf(sc->dev,
3832 "NIC did not reboot, not resetting\n");
3833 err = 0;
3834 }
3835 if (err) {
3836 device_printf(sc->dev, "watchdog reset failed\n");
3837 } else {
3838 if (sc->dying == 2)
3839 sc->dying = 0;
3840 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3841 }
3842}
3843
3844static void
3845mxge_watchdog_task(void *arg, int pending)
3846{
3847 mxge_softc_t *sc = arg;
3848
3849
3850 mtx_lock(&sc->driver_mtx);
3851 mxge_watchdog_reset(sc);
3852 mtx_unlock(&sc->driver_mtx);
3853}
3854
3855static void
3856mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
3857{
3858 tx = &sc->ss[slice].tx;
3859 device_printf(sc->dev, "slice %d struck? ring state:\n", slice);
3860 device_printf(sc->dev,
3861 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3862 tx->req, tx->done, tx->queue_active);
3863 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3864 tx->activate, tx->deactivate);
3865 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3866 tx->pkt_done,
3867 be32toh(sc->ss->fw_stats->send_done_count));
3868}
3869
3870static int
3871mxge_watchdog(mxge_softc_t *sc)
3872{
3873 mxge_tx_ring_t *tx;
3874 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
3875 int i, err = 0;
3876
3877 /* see if we have outstanding transmits, which
3878 have been pending for more than mxge_ticks */
3879 for (i = 0;
3880#ifdef IFNET_BUF_RING
3881 (i < sc->num_slices) && (err == 0);
3882#else
3883 (i < 1) && (err == 0);
3884#endif
3885 i++) {
3886 tx = &sc->ss[i].tx;
3887 if (tx->req != tx->done &&
3888 tx->watchdog_req != tx->watchdog_done &&
3889 tx->done == tx->watchdog_done) {
3890 /* check for pause blocking before resetting */
3891 if (tx->watchdog_rx_pause == rx_pause) {
3892 mxge_warn_stuck(sc, tx, i);
3893 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
3894 return (ENXIO);
3895 }
3896 else
3897 device_printf(sc->dev, "Flow control blocking "
3898 "xmits, check link partner\n");
3899 }
3900
3901 tx->watchdog_req = tx->req;
3902 tx->watchdog_done = tx->done;
3903 tx->watchdog_rx_pause = rx_pause;
3904 }
3905
3906 if (sc->need_media_probe)
3907 mxge_media_probe(sc);
3908 return (err);
3909}
3910
3911static u_long
3912mxge_update_stats(mxge_softc_t *sc)
3913{
3914 struct mxge_slice_state *ss;
3915 u_long pkts = 0;
3916 u_long ipackets = 0;
3917 u_long opackets = 0;
3918#ifdef IFNET_BUF_RING
3919 u_long obytes = 0;
3920 u_long omcasts = 0;
3921 u_long odrops = 0;
3922#endif
3923 u_long oerrors = 0;
3924 int slice;
3925
3926 for (slice = 0; slice < sc->num_slices; slice++) {
3927 ss = &sc->ss[slice];
3928 ipackets += ss->ipackets;
3929 opackets += ss->opackets;
3930#ifdef IFNET_BUF_RING
3931 obytes += ss->obytes;
3932 omcasts += ss->omcasts;
3933 odrops += ss->tx.br->br_drops;
3934#endif
3935 oerrors += ss->oerrors;
3936 }
3937 pkts = (ipackets - sc->ifp->if_ipackets);
3938 pkts += (opackets - sc->ifp->if_opackets);
3939 sc->ifp->if_ipackets = ipackets;
3940 sc->ifp->if_opackets = opackets;
3941#ifdef IFNET_BUF_RING
3942 sc->ifp->if_obytes = obytes;
3943 sc->ifp->if_omcasts = omcasts;
3944 sc->ifp->if_snd.ifq_drops = odrops;
3945#endif
3946 sc->ifp->if_oerrors = oerrors;
3947 return pkts;
3948}
3949
3950static void
3951mxge_tick(void *arg)
3952{
3953 mxge_softc_t *sc = arg;
3954 u_long pkts = 0;
3955 int err = 0;
3956 int running, ticks;
3957 uint16_t cmd;
3958
3959 ticks = mxge_ticks;
3960 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
3961 if (running) {
3962 /* aggregate stats from different slices */
3963 pkts = mxge_update_stats(sc);
3964 if (!sc->watchdog_countdown) {
3965 err = mxge_watchdog(sc);
3966 sc->watchdog_countdown = 4;
3967 }
3968 sc->watchdog_countdown--;
3969 }
3970 if (pkts == 0) {
3971 /* ensure NIC did not suffer h/w fault while idle */
3972 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3973 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3974 sc->dying = 2;
3975 taskqueue_enqueue(sc->tq, &sc->watchdog_task);
3976 err = ENXIO;
3977 }
3978 /* look less often if NIC is idle */
3979 ticks *= 4;
3980 }
3981
3982 if (err == 0)
3983 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
3984
3985}
3986
3987static int
3988mxge_media_change(struct ifnet *ifp)
3989{
3990 return EINVAL;
3991}
3992
3993static int
3994mxge_change_mtu(mxge_softc_t *sc, int mtu)
3995{
3996 struct ifnet *ifp = sc->ifp;
3997 int real_mtu, old_mtu;
3998 int err = 0;
3999
4000
4001 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
4002 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
4003 return EINVAL;
4004 mtx_lock(&sc->driver_mtx);
4005 old_mtu = ifp->if_mtu;
4006 ifp->if_mtu = mtu;
4007 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4008 mxge_close(sc, 0);
4009 err = mxge_open(sc);
4010 if (err != 0) {
4011 ifp->if_mtu = old_mtu;
4012 mxge_close(sc, 0);
4013 (void) mxge_open(sc);
4014 }
4015 }
4016 mtx_unlock(&sc->driver_mtx);
4017 return err;
4018}
4019
4020static void
4021mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
4022{
4023 mxge_softc_t *sc = ifp->if_softc;
4024
4025
4026 if (sc == NULL)
4027 return;
4028 ifmr->ifm_status = IFM_AVALID;
4029 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
4030 ifmr->ifm_active = IFM_AUTO | IFM_ETHER;
4031 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0;
4032}
4033
4034static int
4035mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
4036{
4037 mxge_softc_t *sc = ifp->if_softc;
4038 struct ifreq *ifr = (struct ifreq *)data;
4039 int err, mask;
4040
4041 err = 0;
4042 switch (command) {
4043 case SIOCSIFADDR:
4044 case SIOCGIFADDR:
4045 err = ether_ioctl(ifp, command, data);
4046 break;
4047
4048 case SIOCSIFMTU:
4049 err = mxge_change_mtu(sc, ifr->ifr_mtu);
4050 break;
4051
4052 case SIOCSIFFLAGS:
4053 mtx_lock(&sc->driver_mtx);
4054 if (sc->dying) {
4055 mtx_unlock(&sc->driver_mtx);
4056 return EINVAL;
4057 }
4058 if (ifp->if_flags & IFF_UP) {
4059 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4060 err = mxge_open(sc);
4061 } else {
4062 /* take care of promis can allmulti
4063 flag chages */
4064 mxge_change_promisc(sc,
4065 ifp->if_flags & IFF_PROMISC);
4066 mxge_set_multicast_list(sc);
4067 }
4068 } else {
4069 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
4070 mxge_close(sc, 0);
4071 }
4072 }
4073 mtx_unlock(&sc->driver_mtx);
4074 break;
4075
4076 case SIOCADDMULTI:
4077 case SIOCDELMULTI:
4078 mtx_lock(&sc->driver_mtx);
4079 mxge_set_multicast_list(sc);
4080 mtx_unlock(&sc->driver_mtx);
4081 break;
4082
4083 case SIOCSIFCAP:
4084 mtx_lock(&sc->driver_mtx);
4085 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
4086 if (mask & IFCAP_TXCSUM) {
4087 if (IFCAP_TXCSUM & ifp->if_capenable) {
4088 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
4089 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
4090 | CSUM_TSO);
4091 } else {
4092 ifp->if_capenable |= IFCAP_TXCSUM;
4093 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
4094 }
4095 } else if (mask & IFCAP_RXCSUM) {
4096 if (IFCAP_RXCSUM & ifp->if_capenable) {
4097 ifp->if_capenable &= ~IFCAP_RXCSUM;
4098 sc->csum_flag = 0;
4099 } else {
4100 ifp->if_capenable |= IFCAP_RXCSUM;
4101 sc->csum_flag = 1;
4102 }
4103 }
4104 if (mask & IFCAP_TSO4) {
4105 if (IFCAP_TSO4 & ifp->if_capenable) {
4106 ifp->if_capenable &= ~IFCAP_TSO4;
4107 ifp->if_hwassist &= ~CSUM_TSO;
4108 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
4109 ifp->if_capenable |= IFCAP_TSO4;
4110 ifp->if_hwassist |= CSUM_TSO;
4111 } else {
4112 printf("mxge requires tx checksum offload"
4113 " be enabled to use TSO\n");
4114 err = EINVAL;
4115 }
4116 }
4117 if (mask & IFCAP_LRO) {
4118 if (IFCAP_LRO & ifp->if_capenable)
4119 err = mxge_change_lro_locked(sc, 0);
4120 else
4121 err = mxge_change_lro_locked(sc, mxge_lro_cnt);
4122 }
4123 if (mask & IFCAP_VLAN_HWTAGGING)
4124 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
4125 mtx_unlock(&sc->driver_mtx);
4126 VLAN_CAPABILITIES(ifp);
4127
4128 break;
4129
4130 case SIOCGIFMEDIA:
4131 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
4132 &sc->media, command);
4133 break;
4134
4135 default:
4136 err = ENOTTY;
4137 }
4138 return err;
4139}
4140
4141static void
4142mxge_fetch_tunables(mxge_softc_t *sc)
4143{
4144
4145 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
4146 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
4147 &mxge_flow_control);
4148 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
4149 &mxge_intr_coal_delay);
4150 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4151 &mxge_nvidia_ecrc_enable);
4152 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4153 &mxge_force_firmware);
4154 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4155 &mxge_deassert_wait);
4156 TUNABLE_INT_FETCH("hw.mxge.verbose",
4157 &mxge_verbose);
4158 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
4159 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt);
4160 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
4161 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
4162 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type);
4163 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
4164 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle);
4165 if (sc->lro_cnt != 0)
4166 mxge_lro_cnt = sc->lro_cnt;
4167
4168 if (bootverbose)
4169 mxge_verbose = 1;
4170 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
4171 mxge_intr_coal_delay = 30;
4172 if (mxge_ticks == 0)
4173 mxge_ticks = hz / 2;
4174 sc->pause = mxge_flow_control;
4175 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
4176 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
4177 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
4178 }
4179 if (mxge_initial_mtu > ETHERMTU_JUMBO ||
4180 mxge_initial_mtu < ETHER_MIN_LEN)
4181 mxge_initial_mtu = ETHERMTU_JUMBO;
4182
4183 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE)
4184 mxge_throttle = MXGE_MAX_THROTTLE;
4185 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE)
4186 mxge_throttle = MXGE_MIN_THROTTLE;
4187 sc->throttle = mxge_throttle;
4188}
4189
4190
4191static void
4192mxge_free_slices(mxge_softc_t *sc)
4193{
4194 struct mxge_slice_state *ss;
4195 int i;
4196
4197
4198 if (sc->ss == NULL)
4199 return;
4200
4201 for (i = 0; i < sc->num_slices; i++) {
4202 ss = &sc->ss[i];
4203 if (ss->fw_stats != NULL) {
4204 mxge_dma_free(&ss->fw_stats_dma);
4205 ss->fw_stats = NULL;
4206#ifdef IFNET_BUF_RING
4207 if (ss->tx.br != NULL) {
4208 drbr_free(ss->tx.br, M_DEVBUF);
4209 ss->tx.br = NULL;
4210 }
4211#endif
4212 mtx_destroy(&ss->tx.mtx);
4213 }
4214 if (ss->rx_done.entry != NULL) {
4215 mxge_dma_free(&ss->rx_done.dma);
4216 ss->rx_done.entry = NULL;
4217 }
4218 }
4219 free(sc->ss, M_DEVBUF);
4220 sc->ss = NULL;
4221}
4222
4223static int
4224mxge_alloc_slices(mxge_softc_t *sc)
4225{
4226 mxge_cmd_t cmd;
4227 struct mxge_slice_state *ss;
4228 size_t bytes;
4229 int err, i, max_intr_slots;
4230
4231 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4232 if (err != 0) {
4233 device_printf(sc->dev, "Cannot determine rx ring size\n");
4234 return err;
4235 }
4236 sc->rx_ring_size = cmd.data0;
4237 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
4238
4239 bytes = sizeof (*sc->ss) * sc->num_slices;
4240 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
4241 if (sc->ss == NULL)
4242 return (ENOMEM);
4243 for (i = 0; i < sc->num_slices; i++) {
4244 ss = &sc->ss[i];
4245
4246 ss->sc = sc;
4247
4248 /* allocate per-slice rx interrupt queues */
4249
4250 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
4251 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
4252 if (err != 0)
4253 goto abort;
4254 ss->rx_done.entry = ss->rx_done.dma.addr;
4255 bzero(ss->rx_done.entry, bytes);
4256
4257 /*
4258 * allocate the per-slice firmware stats; stats
4259 * (including tx) are used used only on the first
4260 * slice for now
4261 */
4262#ifndef IFNET_BUF_RING
4263 if (i > 0)
4264 continue;
4265#endif
4266
4267 bytes = sizeof (*ss->fw_stats);
4268 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
4269 sizeof (*ss->fw_stats), 64);
4270 if (err != 0)
4271 goto abort;
4272 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
4273 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name),
4274 "%s:tx(%d)", device_get_nameunit(sc->dev), i);
4275 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF);
4276#ifdef IFNET_BUF_RING
4277 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
4278 &ss->tx.mtx);
4279#endif
4280 }
4281
4282 return (0);
4283
4284abort:
4285 mxge_free_slices(sc);
4286 return (ENOMEM);
4287}
4288
4289static void
4290mxge_slice_probe(mxge_softc_t *sc)
4291{
4292 mxge_cmd_t cmd;
4293 char *old_fw;
4294 int msix_cnt, status, max_intr_slots;
4295
4296 sc->num_slices = 1;
4297 /*
4298 * don't enable multiple slices if they are not enabled,
4299 * or if this is not an SMP system
4300 */
4301
4302 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
4303 return;
4304
4305 /* see how many MSI-X interrupts are available */
4306 msix_cnt = pci_msix_count(sc->dev);
4307 if (msix_cnt < 2)
4308 return;
4309
4310 /* now load the slice aware firmware see what it supports */
4311 old_fw = sc->fw_name;
4312 if (old_fw == mxge_fw_aligned)
4313 sc->fw_name = mxge_fw_rss_aligned;
4314 else
4315 sc->fw_name = mxge_fw_rss_unaligned;
4316 status = mxge_load_firmware(sc, 0);
4317 if (status != 0) {
4318 device_printf(sc->dev, "Falling back to a single slice\n");
4319 return;
4320 }
4321
4322 /* try to send a reset command to the card to see if it
4323 is alive */
4324 memset(&cmd, 0, sizeof (cmd));
4325 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4326 if (status != 0) {
4327 device_printf(sc->dev, "failed reset\n");
4328 goto abort_with_fw;
4329 }
4330
4331 /* get rx ring size */
4332 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4333 if (status != 0) {
4334 device_printf(sc->dev, "Cannot determine rx ring size\n");
4335 goto abort_with_fw;
4336 }
4337 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
4338
4339 /* tell it the size of the interrupt queues */
4340 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
4341 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4342 if (status != 0) {
4343 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4344 goto abort_with_fw;
4345 }
4346
4347 /* ask the maximum number of slices it supports */
4348 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4349 if (status != 0) {
4350 device_printf(sc->dev,
4351 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4352 goto abort_with_fw;
4353 }
4354 sc->num_slices = cmd.data0;
4355 if (sc->num_slices > msix_cnt)
4356 sc->num_slices = msix_cnt;
4357
4358 if (mxge_max_slices == -1) {
4359 /* cap to number of CPUs in system */
4360 if (sc->num_slices > mp_ncpus)
4361 sc->num_slices = mp_ncpus;
4362 } else {
4363 if (sc->num_slices > mxge_max_slices)
4364 sc->num_slices = mxge_max_slices;
4365 }
4366 /* make sure it is a power of two */
4367 while (sc->num_slices & (sc->num_slices - 1))
4368 sc->num_slices--;
4369
4370 if (mxge_verbose)
4371 device_printf(sc->dev, "using %d slices\n",
4372 sc->num_slices);
4373
4374 return;
4375
4376abort_with_fw:
4377 sc->fw_name = old_fw;
4378 (void) mxge_load_firmware(sc, 0);
4379}
4380
4381static int
4382mxge_add_msix_irqs(mxge_softc_t *sc)
4383{
4384 size_t bytes;
4385 int count, err, i, rid;
4386
4387 rid = PCIR_BAR(2);
4388 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4389 &rid, RF_ACTIVE);
4390
4391 if (sc->msix_table_res == NULL) {
4392 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
4393 return ENXIO;
4394 }
4395
4396 count = sc->num_slices;
4397 err = pci_alloc_msix(sc->dev, &count);
4398 if (err != 0) {
4399 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
4400 "err = %d \n", sc->num_slices, err);
4401 goto abort_with_msix_table;
4402 }
4403 if (count < sc->num_slices) {
4404 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
4405 count, sc->num_slices);
4406 device_printf(sc->dev,
4407 "Try setting hw.mxge.max_slices to %d\n",
4408 count);
4409 err = ENOSPC;
4410 goto abort_with_msix;
4411 }
4412 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
4413 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4414 if (sc->msix_irq_res == NULL) {
4415 err = ENOMEM;
4416 goto abort_with_msix;
4417 }
4418
4419 for (i = 0; i < sc->num_slices; i++) {
4420 rid = i + 1;
4421 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
4422 SYS_RES_IRQ,
4423 &rid, RF_ACTIVE);
4424 if (sc->msix_irq_res[i] == NULL) {
4425 device_printf(sc->dev, "couldn't allocate IRQ res"
4426 " for message %d\n", i);
4427 err = ENXIO;
4428 goto abort_with_res;
4429 }
4430 }
4431
4432 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
4433 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4434
4435 for (i = 0; i < sc->num_slices; i++) {
4436 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
4437 INTR_TYPE_NET | INTR_MPSAFE,
4438#if __FreeBSD_version > 700030
4439 NULL,
4440#endif
4441 mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
4442 if (err != 0) {
4443 device_printf(sc->dev, "couldn't setup intr for "
4444 "message %d\n", i);
4445 goto abort_with_intr;
4446 }
4447 }
4448
4449 if (mxge_verbose) {
4450 device_printf(sc->dev, "using %d msix IRQs:",
4451 sc->num_slices);
4452 for (i = 0; i < sc->num_slices; i++)
4453 printf(" %ld", rman_get_start(sc->msix_irq_res[i]));
4454 printf("\n");
4455 }
4456 return (0);
4457
4458abort_with_intr:
4459 for (i = 0; i < sc->num_slices; i++) {
4460 if (sc->msix_ih[i] != NULL) {
4461 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4462 sc->msix_ih[i]);
4463 sc->msix_ih[i] = NULL;
4464 }
4465 }
4466 free(sc->msix_ih, M_DEVBUF);
4467
4468
4469abort_with_res:
4470 for (i = 0; i < sc->num_slices; i++) {
4471 rid = i + 1;
4472 if (sc->msix_irq_res[i] != NULL)
4473 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4474 sc->msix_irq_res[i]);
4475 sc->msix_irq_res[i] = NULL;
4476 }
4477 free(sc->msix_irq_res, M_DEVBUF);
4478
4479
4480abort_with_msix:
4481 pci_release_msi(sc->dev);
4482
4483abort_with_msix_table:
4484 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4485 sc->msix_table_res);
4486
4487 return err;
4488}
4489
4490static int
4491mxge_add_single_irq(mxge_softc_t *sc)
4492{
4493 int count, err, rid;
4494
4495 count = pci_msi_count(sc->dev);
4496 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4497 rid = 1;
4498 } else {
4499 rid = 0;
4500 sc->legacy_irq = 1;
4501 }
4502 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0,
4503 1, RF_SHAREABLE | RF_ACTIVE);
4504 if (sc->irq_res == NULL) {
4505 device_printf(sc->dev, "could not alloc interrupt\n");
4506 return ENXIO;
4507 }
4508 if (mxge_verbose)
4509 device_printf(sc->dev, "using %s irq %ld\n",
4510 sc->legacy_irq ? "INTx" : "MSI",
4511 rman_get_start(sc->irq_res));
4512 err = bus_setup_intr(sc->dev, sc->irq_res,
4513 INTR_TYPE_NET | INTR_MPSAFE,
4514#if __FreeBSD_version > 700030
4515 NULL,
4516#endif
4517 mxge_intr, &sc->ss[0], &sc->ih);
4518 if (err != 0) {
4519 bus_release_resource(sc->dev, SYS_RES_IRQ,
4520 sc->legacy_irq ? 0 : 1, sc->irq_res);
4521 if (!sc->legacy_irq)
4522 pci_release_msi(sc->dev);
4523 }
4524 return err;
4525}
4526
4527static void
4528mxge_rem_msix_irqs(mxge_softc_t *sc)
4529{
4530 int i, rid;
4531
4532 for (i = 0; i < sc->num_slices; i++) {
4533 if (sc->msix_ih[i] != NULL) {
4534 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4535 sc->msix_ih[i]);
4536 sc->msix_ih[i] = NULL;
4537 }
4538 }
4539 free(sc->msix_ih, M_DEVBUF);
4540
4541 for (i = 0; i < sc->num_slices; i++) {
4542 rid = i + 1;
4543 if (sc->msix_irq_res[i] != NULL)
4544 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4545 sc->msix_irq_res[i]);
4546 sc->msix_irq_res[i] = NULL;
4547 }
4548 free(sc->msix_irq_res, M_DEVBUF);
4549
4550 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4551 sc->msix_table_res);
4552
4553 pci_release_msi(sc->dev);
4554 return;
4555}
4556
4557static void
4558mxge_rem_single_irq(mxge_softc_t *sc)
4559{
4560 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4561 bus_release_resource(sc->dev, SYS_RES_IRQ,
4562 sc->legacy_irq ? 0 : 1, sc->irq_res);
4563 if (!sc->legacy_irq)
4564 pci_release_msi(sc->dev);
4565}
4566
4567static void
4568mxge_rem_irq(mxge_softc_t *sc)
4569{
4570 if (sc->num_slices > 1)
4571 mxge_rem_msix_irqs(sc);
4572 else
4573 mxge_rem_single_irq(sc);
4574}
4575
4576static int
4577mxge_add_irq(mxge_softc_t *sc)
4578{
4579 int err;
4580
4581 if (sc->num_slices > 1)
4582 err = mxge_add_msix_irqs(sc);
4583 else
4584 err = mxge_add_single_irq(sc);
4585
4586 if (0 && err == 0 && sc->num_slices > 1) {
4587 mxge_rem_msix_irqs(sc);
4588 err = mxge_add_msix_irqs(sc);
4589 }
4590 return err;
4591}
4592
4593
4594static int
4595mxge_attach(device_t dev)
4596{
4597 mxge_softc_t *sc = device_get_softc(dev);
4598 struct ifnet *ifp;
4599 int err, rid;
4600
4601 sc->dev = dev;
4602 mxge_fetch_tunables(sc);
4603
4604 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc);
4605 sc->tq = taskqueue_create_fast("mxge_taskq", M_WAITOK,
4606 taskqueue_thread_enqueue,
4607 &sc->tq);
4608 if (sc->tq == NULL) {
4609 err = ENOMEM;
4610 goto abort_with_nothing;
4611 }
4612 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
4613 device_get_nameunit(sc->dev));
4614
4615 err = bus_dma_tag_create(NULL, /* parent */
4616 1, /* alignment */
4617 0, /* boundary */
4618 BUS_SPACE_MAXADDR, /* low */
4619 BUS_SPACE_MAXADDR, /* high */
4620 NULL, NULL, /* filter */
4621 65536 + 256, /* maxsize */
4622 MXGE_MAX_SEND_DESC, /* num segs */
4623 65536, /* maxsegsize */
4624 0, /* flags */
4625 NULL, NULL, /* lock */
4626 &sc->parent_dmat); /* tag */
4627
4628 if (err != 0) {
4629 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4630 err);
4631 goto abort_with_tq;
4632 }
4633
4634 ifp = sc->ifp = if_alloc(IFT_ETHER);
4635 if (ifp == NULL) {
4636 device_printf(dev, "can not if_alloc()\n");
4637 err = ENOSPC;
4638 goto abort_with_parent_dmat;
4639 }
4640 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4641
4642 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd",
4643 device_get_nameunit(dev));
4644 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF);
4645 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name),
4646 "%s:drv", device_get_nameunit(dev));
4647 mtx_init(&sc->driver_mtx, sc->driver_mtx_name,
4648 MTX_NETWORK_LOCK, MTX_DEF);
4649
4650 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0);
4651
4652 mxge_setup_cfg_space(sc);
4653
4654 /* Map the board into the kernel */
4655 rid = PCIR_BARS;
4656 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0,
4657 ~0, 1, RF_ACTIVE);
4658 if (sc->mem_res == NULL) {
4659 device_printf(dev, "could not map memory\n");
4660 err = ENXIO;
4661 goto abort_with_lock;
4662 }
4663 sc->sram = rman_get_virtual(sc->mem_res);
4664 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4665 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4666 device_printf(dev, "impossible memory region size %ld\n",
4667 rman_get_size(sc->mem_res));
4668 err = ENXIO;
4669 goto abort_with_mem_res;
4670 }
4671
4672 /* make NULL terminated copy of the EEPROM strings section of
4673 lanai SRAM */
4674 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4675 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4676 rman_get_bushandle(sc->mem_res),
4677 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4678 sc->eeprom_strings,
4679 MXGE_EEPROM_STRINGS_SIZE - 2);
4680 err = mxge_parse_strings(sc);
4681 if (err != 0)
4682 goto abort_with_mem_res;
4683
4684 /* Enable write combining for efficient use of PCIe bus */
4685 mxge_enable_wc(sc);
4686
4687 /* Allocate the out of band dma memory */
4688 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4689 sizeof (mxge_cmd_t), 64);
4690 if (err != 0)
4691 goto abort_with_mem_res;
4692 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4693 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4694 if (err != 0)
4695 goto abort_with_cmd_dma;
4696
4697 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4698 if (err != 0)
4699 goto abort_with_zeropad_dma;
4700
4701 /* select & load the firmware */
4702 err = mxge_select_firmware(sc);
4703 if (err != 0)
4704 goto abort_with_dmabench;
4705 sc->intr_coal_delay = mxge_intr_coal_delay;
4706
4707 mxge_slice_probe(sc);
4708 err = mxge_alloc_slices(sc);
4709 if (err != 0)
4710 goto abort_with_dmabench;
4711
4712 err = mxge_reset(sc, 0);
4713 if (err != 0)
4714 goto abort_with_slices;
4715
4716 err = mxge_alloc_rings(sc);
4717 if (err != 0) {
4718 device_printf(sc->dev, "failed to allocate rings\n");
4719 goto abort_with_dmabench;
4720 }
4721
4722 err = mxge_add_irq(sc);
4723 if (err != 0) {
4724 device_printf(sc->dev, "failed to add irq\n");
4725 goto abort_with_rings;
4726 }
4727
4728 ifp->if_baudrate = IF_Gbps(10UL);
4729 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4730 IFCAP_VLAN_MTU;
4731#ifdef INET
4732 ifp->if_capabilities |= IFCAP_LRO;
4733#endif
4734
4735#ifdef MXGE_NEW_VLAN_API
4736 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
4737#endif
4738
4739 sc->max_mtu = mxge_max_mtu(sc);
4740 if (sc->max_mtu >= 9000)
4741 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
4742 else
4743 device_printf(dev, "MTU limited to %d. Install "
4744 "latest firmware for 9000 byte jumbo support\n",
4745 sc->max_mtu - ETHER_HDR_LEN);
4746 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4747 ifp->if_capenable = ifp->if_capabilities;
4748 if (sc->lro_cnt == 0)
4749 ifp->if_capenable &= ~IFCAP_LRO;
4750 sc->csum_flag = 1;
4751 ifp->if_init = mxge_init;
4752 ifp->if_softc = sc;
4753 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4754 ifp->if_ioctl = mxge_ioctl;
4755 ifp->if_start = mxge_start;
4756 /* Initialise the ifmedia structure */
4757 ifmedia_init(&sc->media, 0, mxge_media_change,
4758 mxge_media_status);
4759 mxge_set_media(sc, IFM_ETHER | IFM_AUTO);
4760 mxge_media_probe(sc);
4761 sc->dying = 0;
4762 ether_ifattach(ifp, sc->mac_addr);
4763 /* ether_ifattach sets mtu to ETHERMTU */
4764 if (mxge_initial_mtu != ETHERMTU)
4765 mxge_change_mtu(sc, mxge_initial_mtu);
4766
4767 mxge_add_sysctls(sc);
4768#ifdef IFNET_BUF_RING
4769 ifp->if_transmit = mxge_transmit;
4770 ifp->if_qflush = mxge_qflush;
4771#endif
4772 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
4773 return 0;
4774
4775abort_with_rings:
4776 mxge_free_rings(sc);
4777abort_with_slices:
4778 mxge_free_slices(sc);
4779abort_with_dmabench:
4780 mxge_dma_free(&sc->dmabench_dma);
4781abort_with_zeropad_dma:
4782 mxge_dma_free(&sc->zeropad_dma);
4783abort_with_cmd_dma:
4784 mxge_dma_free(&sc->cmd_dma);
4785abort_with_mem_res:
4786 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4787abort_with_lock:
4788 pci_disable_busmaster(dev);
4789 mtx_destroy(&sc->cmd_mtx);
4790 mtx_destroy(&sc->driver_mtx);
4791 if_free(ifp);
4792abort_with_parent_dmat:
4793 bus_dma_tag_destroy(sc->parent_dmat);
4794abort_with_tq:
4795 if (sc->tq != NULL) {
4796 taskqueue_drain(sc->tq, &sc->watchdog_task);
4797 taskqueue_free(sc->tq);
4798 sc->tq = NULL;
4799 }
4800abort_with_nothing:
4801 return err;
4802}
4803
4804static int
4805mxge_detach(device_t dev)
4806{
4807 mxge_softc_t *sc = device_get_softc(dev);
4808
4809 if (mxge_vlans_active(sc)) {
4810 device_printf(sc->dev,
4811 "Detach vlans before removing module\n");
4812 return EBUSY;
4813 }
4814 mtx_lock(&sc->driver_mtx);
4815 sc->dying = 1;
4816 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
4817 mxge_close(sc, 0);
4818 mtx_unlock(&sc->driver_mtx);
4819 ether_ifdetach(sc->ifp);
4820 if (sc->tq != NULL) {
4821 taskqueue_drain(sc->tq, &sc->watchdog_task);
4822 taskqueue_free(sc->tq);
4823 sc->tq = NULL;
4824 }
4825 callout_drain(&sc->co_hdl);
4826 ifmedia_removeall(&sc->media);
4827 mxge_dummy_rdma(sc, 0);
4828 mxge_rem_sysctls(sc);
4829 mxge_rem_irq(sc);
4830 mxge_free_rings(sc);
4831 mxge_free_slices(sc);
4832 mxge_dma_free(&sc->dmabench_dma);
4833 mxge_dma_free(&sc->zeropad_dma);
4834 mxge_dma_free(&sc->cmd_dma);
4835 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4836 pci_disable_busmaster(dev);
4837 mtx_destroy(&sc->cmd_mtx);
4838 mtx_destroy(&sc->driver_mtx);
4839 if_free(sc->ifp);
4840 bus_dma_tag_destroy(sc->parent_dmat);
4841 return 0;
4842}
4843
4844static int
4845mxge_shutdown(device_t dev)
4846{
4847 return 0;
4848}
4849
4850/*
4851 This file uses Myri10GE driver indentation.
4852
4853 Local Variables:
4854 c-file-style:"linux"
4855 tab-width:8
4856 End:
4857*/