if_mxge.c revision 188737
1/******************************************************************************
2
3Copyright (c) 2006-2009, Myricom Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Myricom Inc, nor the names of its
13    contributors may be used to endorse or promote products derived from
14    this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/dev/mxge/if_mxge.c 188737 2009-02-17 22:25:19Z gallatin $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/linker.h>
36#include <sys/firmware.h>
37#include <sys/endian.h>
38#include <sys/sockio.h>
39#include <sys/mbuf.h>
40#include <sys/malloc.h>
41#include <sys/kdb.h>
42#include <sys/kernel.h>
43#include <sys/lock.h>
44#include <sys/module.h>
45#include <sys/socket.h>
46#include <sys/sysctl.h>
47#include <sys/sx.h>
48
49#include <net/if.h>
50#include <net/if_arp.h>
51#include <net/ethernet.h>
52#include <net/if_dl.h>
53#include <net/if_media.h>
54
55#include <net/bpf.h>
56
57#include <net/if_types.h>
58#include <net/if_vlan_var.h>
59#include <net/zlib.h>
60
61#include <netinet/in_systm.h>
62#include <netinet/in.h>
63#include <netinet/ip.h>
64#include <netinet/tcp.h>
65
66#include <machine/bus.h>
67#include <machine/in_cksum.h>
68#include <machine/resource.h>
69#include <sys/bus.h>
70#include <sys/rman.h>
71#include <sys/smp.h>
72
73#include <dev/pci/pcireg.h>
74#include <dev/pci/pcivar.h>
75#include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */
76
77#include <vm/vm.h>		/* for pmap_mapdev() */
78#include <vm/pmap.h>
79
80#if defined(__i386) || defined(__amd64)
81#include <machine/specialreg.h>
82#endif
83
84#include <dev/mxge/mxge_mcp.h>
85#include <dev/mxge/mcp_gen_header.h>
86/*#define MXGE_FAKE_IFP*/
87#include <dev/mxge/if_mxge_var.h>
88
89/* tunable params */
90static int mxge_nvidia_ecrc_enable = 1;
91static int mxge_force_firmware = 0;
92static int mxge_intr_coal_delay = 30;
93static int mxge_deassert_wait = 1;
94static int mxge_flow_control = 1;
95static int mxge_verbose = 0;
96static int mxge_lro_cnt = 8;
97static int mxge_ticks;
98static int mxge_max_slices = 1;
99static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT;
100static int mxge_always_promisc = 0;
101static char *mxge_fw_unaligned = "mxge_ethp_z8e";
102static char *mxge_fw_aligned = "mxge_eth_z8e";
103static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
104static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
105
106static int mxge_probe(device_t dev);
107static int mxge_attach(device_t dev);
108static int mxge_detach(device_t dev);
109static int mxge_shutdown(device_t dev);
110static void mxge_intr(void *arg);
111
112static device_method_t mxge_methods[] =
113{
114  /* Device interface */
115  DEVMETHOD(device_probe, mxge_probe),
116  DEVMETHOD(device_attach, mxge_attach),
117  DEVMETHOD(device_detach, mxge_detach),
118  DEVMETHOD(device_shutdown, mxge_shutdown),
119  {0, 0}
120};
121
122static driver_t mxge_driver =
123{
124  "mxge",
125  mxge_methods,
126  sizeof(mxge_softc_t),
127};
128
129static devclass_t mxge_devclass;
130
131/* Declare ourselves to be a child of the PCI bus.*/
132DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
133MODULE_DEPEND(mxge, firmware, 1, 1, 1);
134MODULE_DEPEND(mxge, zlib, 1, 1, 1);
135
136static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
137static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
138static int mxge_close(mxge_softc_t *sc);
139static int mxge_open(mxge_softc_t *sc);
140static void mxge_tick(void *arg);
141
142static int
143mxge_probe(device_t dev)
144{
145	int rev;
146
147
148	if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
149	    ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
150	     (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
151		rev = pci_get_revid(dev);
152		switch (rev) {
153		case MXGE_PCI_REV_Z8E:
154			device_set_desc(dev, "Myri10G-PCIE-8A");
155			break;
156		case MXGE_PCI_REV_Z8ES:
157			device_set_desc(dev, "Myri10G-PCIE-8B");
158			break;
159		default:
160			device_set_desc(dev, "Myri10G-PCIE-8??");
161			device_printf(dev, "Unrecognized rev %d NIC\n",
162				      rev);
163			break;
164		}
165		return 0;
166	}
167	return ENXIO;
168}
169
170static void
171mxge_enable_wc(mxge_softc_t *sc)
172{
173#if defined(__i386) || defined(__amd64)
174	vm_offset_t len;
175	int err;
176
177	sc->wc = 1;
178	len = rman_get_size(sc->mem_res);
179	err = pmap_change_attr((vm_offset_t) sc->sram,
180			       len, PAT_WRITE_COMBINING);
181	if (err != 0) {
182		device_printf(sc->dev, "pmap_change_attr failed, %d\n",
183			      err);
184		sc->wc = 0;
185	}
186#endif
187}
188
189
190/* callback to get our DMA address */
191static void
192mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
193			 int error)
194{
195	if (error == 0) {
196		*(bus_addr_t *) arg = segs->ds_addr;
197	}
198}
199
200static int
201mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
202		   bus_size_t alignment)
203{
204	int err;
205	device_t dev = sc->dev;
206	bus_size_t boundary, maxsegsize;
207
208	if (bytes > 4096 && alignment == 4096) {
209		boundary = 0;
210		maxsegsize = bytes;
211	} else {
212		boundary = 4096;
213		maxsegsize = 4096;
214	}
215
216	/* allocate DMAable memory tags */
217	err = bus_dma_tag_create(sc->parent_dmat,	/* parent */
218				 alignment,		/* alignment */
219				 boundary,		/* boundary */
220				 BUS_SPACE_MAXADDR,	/* low */
221				 BUS_SPACE_MAXADDR,	/* high */
222				 NULL, NULL,		/* filter */
223				 bytes,			/* maxsize */
224				 1,			/* num segs */
225				 maxsegsize,		/* maxsegsize */
226				 BUS_DMA_COHERENT,	/* flags */
227				 NULL, NULL,		/* lock */
228				 &dma->dmat);		/* tag */
229	if (err != 0) {
230		device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
231		return err;
232	}
233
234	/* allocate DMAable memory & map */
235	err = bus_dmamem_alloc(dma->dmat, &dma->addr,
236			       (BUS_DMA_WAITOK | BUS_DMA_COHERENT
237				| BUS_DMA_ZERO),  &dma->map);
238	if (err != 0) {
239		device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
240		goto abort_with_dmat;
241	}
242
243	/* load the memory */
244	err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
245			      mxge_dmamap_callback,
246			      (void *)&dma->bus_addr, 0);
247	if (err != 0) {
248		device_printf(dev, "couldn't load map (err = %d)\n", err);
249		goto abort_with_mem;
250	}
251	return 0;
252
253abort_with_mem:
254	bus_dmamem_free(dma->dmat, dma->addr, dma->map);
255abort_with_dmat:
256	(void)bus_dma_tag_destroy(dma->dmat);
257	return err;
258}
259
260
261static void
262mxge_dma_free(mxge_dma_t *dma)
263{
264	bus_dmamap_unload(dma->dmat, dma->map);
265	bus_dmamem_free(dma->dmat, dma->addr, dma->map);
266	(void)bus_dma_tag_destroy(dma->dmat);
267}
268
269/*
270 * The eeprom strings on the lanaiX have the format
271 * SN=x\0
272 * MAC=x:x:x:x:x:x\0
273 * PC=text\0
274 */
275
276static int
277mxge_parse_strings(mxge_softc_t *sc)
278{
279#define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++)
280
281	char *ptr, *limit;
282	int i, found_mac;
283
284	ptr = sc->eeprom_strings;
285	limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE;
286	found_mac = 0;
287	while (ptr < limit && *ptr != '\0') {
288		if (memcmp(ptr, "MAC=", 4) == 0) {
289			ptr += 1;
290			sc->mac_addr_string = ptr;
291			for (i = 0; i < 6; i++) {
292				ptr += 3;
293				if ((ptr + 2) > limit)
294					goto abort;
295				sc->mac_addr[i] = strtoul(ptr, NULL, 16);
296				found_mac = 1;
297			}
298		} else if (memcmp(ptr, "PC=", 3) == 0) {
299			ptr += 3;
300			strncpy(sc->product_code_string, ptr,
301				sizeof (sc->product_code_string) - 1);
302		} else if (memcmp(ptr, "SN=", 3) == 0) {
303			ptr += 3;
304			strncpy(sc->serial_number_string, ptr,
305				sizeof (sc->serial_number_string) - 1);
306		}
307		MXGE_NEXT_STRING(ptr);
308	}
309
310	if (found_mac)
311		return 0;
312
313 abort:
314	device_printf(sc->dev, "failed to parse eeprom_strings\n");
315
316	return ENXIO;
317}
318
319#if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
320static void
321mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
322{
323	uint32_t val;
324	unsigned long base, off;
325	char *va, *cfgptr;
326	device_t pdev, mcp55;
327	uint16_t vendor_id, device_id, word;
328	uintptr_t bus, slot, func, ivend, idev;
329	uint32_t *ptr32;
330
331
332	if (!mxge_nvidia_ecrc_enable)
333		return;
334
335	pdev = device_get_parent(device_get_parent(sc->dev));
336	if (pdev == NULL) {
337		device_printf(sc->dev, "could not find parent?\n");
338		return;
339	}
340	vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
341	device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
342
343	if (vendor_id != 0x10de)
344		return;
345
346	base = 0;
347
348	if (device_id == 0x005d) {
349		/* ck804, base address is magic */
350		base = 0xe0000000UL;
351	} else if (device_id >= 0x0374 && device_id <= 0x378) {
352		/* mcp55, base address stored in chipset */
353		mcp55 = pci_find_bsf(0, 0, 0);
354		if (mcp55 &&
355		    0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
356		    0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
357			word = pci_read_config(mcp55, 0x90, 2);
358			base = ((unsigned long)word & 0x7ffeU) << 25;
359		}
360	}
361	if (!base)
362		return;
363
364	/* XXXX
365	   Test below is commented because it is believed that doing
366	   config read/write beyond 0xff will access the config space
367	   for the next larger function.  Uncomment this and remove
368	   the hacky pmap_mapdev() way of accessing config space when
369	   FreeBSD grows support for extended pcie config space access
370	*/
371#if 0
372	/* See if we can, by some miracle, access the extended
373	   config space */
374	val = pci_read_config(pdev, 0x178, 4);
375	if (val != 0xffffffff) {
376		val |= 0x40;
377		pci_write_config(pdev, 0x178, val, 4);
378		return;
379	}
380#endif
381	/* Rather than using normal pci config space writes, we must
382	 * map the Nvidia config space ourselves.  This is because on
383	 * opteron/nvidia class machine the 0xe000000 mapping is
384	 * handled by the nvidia chipset, that means the internal PCI
385	 * device (the on-chip northbridge), or the amd-8131 bridge
386	 * and things behind them are not visible by this method.
387	 */
388
389	BUS_READ_IVAR(device_get_parent(pdev), pdev,
390		      PCI_IVAR_BUS, &bus);
391	BUS_READ_IVAR(device_get_parent(pdev), pdev,
392		      PCI_IVAR_SLOT, &slot);
393	BUS_READ_IVAR(device_get_parent(pdev), pdev,
394		      PCI_IVAR_FUNCTION, &func);
395	BUS_READ_IVAR(device_get_parent(pdev), pdev,
396		      PCI_IVAR_VENDOR, &ivend);
397	BUS_READ_IVAR(device_get_parent(pdev), pdev,
398		      PCI_IVAR_DEVICE, &idev);
399
400	off =  base
401		+ 0x00100000UL * (unsigned long)bus
402		+ 0x00001000UL * (unsigned long)(func
403						 + 8 * slot);
404
405	/* map it into the kernel */
406	va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
407
408
409	if (va == NULL) {
410		device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
411		return;
412	}
413	/* get a pointer to the config space mapped into the kernel */
414	cfgptr = va + (off & PAGE_MASK);
415
416	/* make sure that we can really access it */
417	vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
418	device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
419	if (! (vendor_id == ivend && device_id == idev)) {
420		device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
421			      vendor_id, device_id);
422		pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
423		return;
424	}
425
426	ptr32 = (uint32_t*)(cfgptr + 0x178);
427	val = *ptr32;
428
429	if (val == 0xffffffff) {
430		device_printf(sc->dev, "extended mapping failed\n");
431		pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
432		return;
433	}
434	*ptr32 = val | 0x40;
435	pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
436	if (mxge_verbose)
437		device_printf(sc->dev,
438			      "Enabled ECRC on upstream Nvidia bridge "
439			      "at %d:%d:%d\n",
440			      (int)bus, (int)slot, (int)func);
441	return;
442}
443#else
444static void
445mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
446{
447	device_printf(sc->dev,
448		      "Nforce 4 chipset on non-x86/amd64!?!?!\n");
449	return;
450}
451#endif
452
453
454static int
455mxge_dma_test(mxge_softc_t *sc, int test_type)
456{
457	mxge_cmd_t cmd;
458	bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
459	int status;
460	uint32_t len;
461	char *test = " ";
462
463
464	/* Run a small DMA test.
465	 * The magic multipliers to the length tell the firmware
466	 * to do DMA read, write, or read+write tests.  The
467	 * results are returned in cmd.data0.  The upper 16
468	 * bits of the return is the number of transfers completed.
469	 * The lower 16 bits is the time in 0.5us ticks that the
470	 * transfers took to complete.
471	 */
472
473	len = sc->tx_boundary;
474
475	cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
476	cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
477	cmd.data2 = len * 0x10000;
478	status = mxge_send_cmd(sc, test_type, &cmd);
479	if (status != 0) {
480		test = "read";
481		goto abort;
482	}
483	sc->read_dma = ((cmd.data0>>16) * len * 2) /
484		(cmd.data0 & 0xffff);
485	cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
486	cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
487	cmd.data2 = len * 0x1;
488	status = mxge_send_cmd(sc, test_type, &cmd);
489	if (status != 0) {
490		test = "write";
491		goto abort;
492	}
493	sc->write_dma = ((cmd.data0>>16) * len * 2) /
494		(cmd.data0 & 0xffff);
495
496	cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
497	cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
498	cmd.data2 = len * 0x10001;
499	status = mxge_send_cmd(sc, test_type, &cmd);
500	if (status != 0) {
501		test = "read/write";
502		goto abort;
503	}
504	sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
505		(cmd.data0 & 0xffff);
506
507abort:
508	if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
509		device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
510			      test, status);
511
512	return status;
513}
514
515/*
516 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
517 * when the PCI-E Completion packets are aligned on an 8-byte
518 * boundary.  Some PCI-E chip sets always align Completion packets; on
519 * the ones that do not, the alignment can be enforced by enabling
520 * ECRC generation (if supported).
521 *
522 * When PCI-E Completion packets are not aligned, it is actually more
523 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
524 *
525 * If the driver can neither enable ECRC nor verify that it has
526 * already been enabled, then it must use a firmware image which works
527 * around unaligned completion packets (ethp_z8e.dat), and it should
528 * also ensure that it never gives the device a Read-DMA which is
529 * larger than 2KB by setting the tx_boundary to 2KB.  If ECRC is
530 * enabled, then the driver should use the aligned (eth_z8e.dat)
531 * firmware image, and set tx_boundary to 4KB.
532 */
533
534static int
535mxge_firmware_probe(mxge_softc_t *sc)
536{
537	device_t dev = sc->dev;
538	int reg, status;
539	uint16_t pectl;
540
541	sc->tx_boundary = 4096;
542	/*
543	 * Verify the max read request size was set to 4KB
544	 * before trying the test with 4KB.
545	 */
546	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
547		pectl = pci_read_config(dev, reg + 0x8, 2);
548		if ((pectl & (5 << 12)) != (5 << 12)) {
549			device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
550				      pectl);
551			sc->tx_boundary = 2048;
552		}
553	}
554
555	/*
556	 * load the optimized firmware (which assumes aligned PCIe
557	 * completions) in order to see if it works on this host.
558	 */
559	sc->fw_name = mxge_fw_aligned;
560	status = mxge_load_firmware(sc, 1);
561	if (status != 0) {
562		return status;
563	}
564
565	/*
566	 * Enable ECRC if possible
567	 */
568	mxge_enable_nvidia_ecrc(sc);
569
570	/*
571	 * Run a DMA test which watches for unaligned completions and
572	 * aborts on the first one seen.
573	 */
574
575	status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
576	if (status == 0)
577		return 0; /* keep the aligned firmware */
578
579	if (status != E2BIG)
580		device_printf(dev, "DMA test failed: %d\n", status);
581	if (status == ENOSYS)
582		device_printf(dev, "Falling back to ethp! "
583			      "Please install up to date fw\n");
584	return status;
585}
586
587static int
588mxge_select_firmware(mxge_softc_t *sc)
589{
590	int aligned = 0;
591
592
593	if (mxge_force_firmware != 0) {
594		if (mxge_force_firmware == 1)
595			aligned = 1;
596		else
597			aligned = 0;
598		if (mxge_verbose)
599			device_printf(sc->dev,
600				      "Assuming %s completions (forced)\n",
601				      aligned ? "aligned" : "unaligned");
602		goto abort;
603	}
604
605	/* if the PCIe link width is 4 or less, we can use the aligned
606	   firmware and skip any checks */
607	if (sc->link_width != 0 && sc->link_width <= 4) {
608		device_printf(sc->dev,
609			      "PCIe x%d Link, expect reduced performance\n",
610			      sc->link_width);
611		aligned = 1;
612		goto abort;
613	}
614
615	if (0 == mxge_firmware_probe(sc))
616		return 0;
617
618abort:
619	if (aligned) {
620		sc->fw_name = mxge_fw_aligned;
621		sc->tx_boundary = 4096;
622	} else {
623		sc->fw_name = mxge_fw_unaligned;
624		sc->tx_boundary = 2048;
625	}
626	return (mxge_load_firmware(sc, 0));
627}
628
629union qualhack
630{
631        const char *ro_char;
632        char *rw_char;
633};
634
635static int
636mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
637{
638
639
640	if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
641		device_printf(sc->dev, "Bad firmware type: 0x%x\n",
642			      be32toh(hdr->mcp_type));
643		return EIO;
644	}
645
646	/* save firmware version for sysctl */
647	strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version));
648	if (mxge_verbose)
649		device_printf(sc->dev, "firmware id: %s\n", hdr->version);
650
651	sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
652	       &sc->fw_ver_minor, &sc->fw_ver_tiny);
653
654	if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
655	      && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
656		device_printf(sc->dev, "Found firmware version %s\n",
657			      sc->fw_version);
658		device_printf(sc->dev, "Driver needs %d.%d\n",
659			      MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
660		return EINVAL;
661	}
662	return 0;
663
664}
665
666static void *
667z_alloc(void *nil, u_int items, u_int size)
668{
669        void *ptr;
670
671        ptr = malloc(items * size, M_TEMP, M_NOWAIT);
672        return ptr;
673}
674
675static void
676z_free(void *nil, void *ptr)
677{
678        free(ptr, M_TEMP);
679}
680
681
682static int
683mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
684{
685	z_stream zs;
686	char *inflate_buffer;
687	const struct firmware *fw;
688	const mcp_gen_header_t *hdr;
689	unsigned hdr_offset;
690	int status;
691	unsigned int i;
692	char dummy;
693	size_t fw_len;
694
695	fw = firmware_get(sc->fw_name);
696	if (fw == NULL) {
697		device_printf(sc->dev, "Could not find firmware image %s\n",
698			      sc->fw_name);
699		return ENOENT;
700	}
701
702
703
704	/* setup zlib and decompress f/w */
705	bzero(&zs, sizeof (zs));
706	zs.zalloc = z_alloc;
707	zs.zfree = z_free;
708	status = inflateInit(&zs);
709	if (status != Z_OK) {
710		status = EIO;
711		goto abort_with_fw;
712	}
713
714	/* the uncompressed size is stored as the firmware version,
715	   which would otherwise go unused */
716	fw_len = (size_t) fw->version;
717	inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT);
718	if (inflate_buffer == NULL)
719		goto abort_with_zs;
720	zs.avail_in = fw->datasize;
721	zs.next_in = __DECONST(char *, fw->data);
722	zs.avail_out = fw_len;
723	zs.next_out = inflate_buffer;
724	status = inflate(&zs, Z_FINISH);
725	if (status != Z_STREAM_END) {
726		device_printf(sc->dev, "zlib %d\n", status);
727		status = EIO;
728		goto abort_with_buffer;
729	}
730
731	/* check id */
732	hdr_offset = htobe32(*(const uint32_t *)
733			     (inflate_buffer + MCP_HEADER_PTR_OFFSET));
734	if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
735		device_printf(sc->dev, "Bad firmware file");
736		status = EIO;
737		goto abort_with_buffer;
738	}
739	hdr = (const void*)(inflate_buffer + hdr_offset);
740
741	status = mxge_validate_firmware(sc, hdr);
742	if (status != 0)
743		goto abort_with_buffer;
744
745	/* Copy the inflated firmware to NIC SRAM. */
746	for (i = 0; i < fw_len; i += 256) {
747		mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
748			      inflate_buffer + i,
749			      min(256U, (unsigned)(fw_len - i)));
750		wmb();
751		dummy = *sc->sram;
752		wmb();
753	}
754
755	*limit = fw_len;
756	status = 0;
757abort_with_buffer:
758	free(inflate_buffer, M_TEMP);
759abort_with_zs:
760	inflateEnd(&zs);
761abort_with_fw:
762	firmware_put(fw, FIRMWARE_UNLOAD);
763	return status;
764}
765
766/*
767 * Enable or disable periodic RDMAs from the host to make certain
768 * chipsets resend dropped PCIe messages
769 */
770
771static void
772mxge_dummy_rdma(mxge_softc_t *sc, int enable)
773{
774	char buf_bytes[72];
775	volatile uint32_t *confirm;
776	volatile char *submit;
777	uint32_t *buf, dma_low, dma_high;
778	int i;
779
780	buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
781
782	/* clear confirmation addr */
783	confirm = (volatile uint32_t *)sc->cmd;
784	*confirm = 0;
785	wmb();
786
787	/* send an rdma command to the PCIe engine, and wait for the
788	   response in the confirmation address.  The firmware should
789	   write a -1 there to indicate it is alive and well
790	*/
791
792	dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
793	dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
794	buf[0] = htobe32(dma_high);		/* confirm addr MSW */
795	buf[1] = htobe32(dma_low);		/* confirm addr LSW */
796	buf[2] = htobe32(0xffffffff);		/* confirm data */
797	dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
798	dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
799	buf[3] = htobe32(dma_high); 		/* dummy addr MSW */
800	buf[4] = htobe32(dma_low); 		/* dummy addr LSW */
801	buf[5] = htobe32(enable);			/* enable? */
802
803
804	submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
805
806	mxge_pio_copy(submit, buf, 64);
807	wmb();
808	DELAY(1000);
809	wmb();
810	i = 0;
811	while (*confirm != 0xffffffff && i < 20) {
812		DELAY(1000);
813		i++;
814	}
815	if (*confirm != 0xffffffff) {
816		device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
817			      (enable ? "enable" : "disable"), confirm,
818			      *confirm);
819	}
820	return;
821}
822
823static int
824mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
825{
826	mcp_cmd_t *buf;
827	char buf_bytes[sizeof(*buf) + 8];
828	volatile mcp_cmd_response_t *response = sc->cmd;
829	volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
830	uint32_t dma_low, dma_high;
831	int err, sleep_total = 0;
832
833	/* ensure buf is aligned to 8 bytes */
834	buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
835
836	buf->data0 = htobe32(data->data0);
837	buf->data1 = htobe32(data->data1);
838	buf->data2 = htobe32(data->data2);
839	buf->cmd = htobe32(cmd);
840	dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
841	dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
842
843	buf->response_addr.low = htobe32(dma_low);
844	buf->response_addr.high = htobe32(dma_high);
845	mtx_lock(&sc->cmd_mtx);
846	response->result = 0xffffffff;
847	wmb();
848	mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
849
850	/* wait up to 20ms */
851	err = EAGAIN;
852	for (sleep_total = 0; sleep_total <  20; sleep_total++) {
853		bus_dmamap_sync(sc->cmd_dma.dmat,
854				sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
855		wmb();
856		switch (be32toh(response->result)) {
857		case 0:
858			data->data0 = be32toh(response->data);
859			err = 0;
860			break;
861		case 0xffffffff:
862			DELAY(1000);
863			break;
864		case MXGEFW_CMD_UNKNOWN:
865			err = ENOSYS;
866			break;
867		case MXGEFW_CMD_ERROR_UNALIGNED:
868			err = E2BIG;
869			break;
870		case MXGEFW_CMD_ERROR_BUSY:
871			err = EBUSY;
872			break;
873		default:
874			device_printf(sc->dev,
875				      "mxge: command %d "
876				      "failed, result = %d\n",
877				      cmd, be32toh(response->result));
878			err = ENXIO;
879			break;
880		}
881		if (err != EAGAIN)
882			break;
883	}
884	if (err == EAGAIN)
885		device_printf(sc->dev, "mxge: command %d timed out"
886			      "result = %d\n",
887			      cmd, be32toh(response->result));
888	mtx_unlock(&sc->cmd_mtx);
889	return err;
890}
891
892static int
893mxge_adopt_running_firmware(mxge_softc_t *sc)
894{
895	struct mcp_gen_header *hdr;
896	const size_t bytes = sizeof (struct mcp_gen_header);
897	size_t hdr_offset;
898	int status;
899
900	/* find running firmware header */
901	hdr_offset = htobe32(*(volatile uint32_t *)
902			     (sc->sram + MCP_HEADER_PTR_OFFSET));
903
904	if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
905		device_printf(sc->dev,
906			      "Running firmware has bad header offset (%d)\n",
907			      (int)hdr_offset);
908		return EIO;
909	}
910
911	/* copy header of running firmware from SRAM to host memory to
912	 * validate firmware */
913	hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
914	if (hdr == NULL) {
915		device_printf(sc->dev, "could not malloc firmware hdr\n");
916		return ENOMEM;
917	}
918	bus_space_read_region_1(rman_get_bustag(sc->mem_res),
919				rman_get_bushandle(sc->mem_res),
920				hdr_offset, (char *)hdr, bytes);
921	status = mxge_validate_firmware(sc, hdr);
922	free(hdr, M_DEVBUF);
923
924	/*
925	 * check to see if adopted firmware has bug where adopting
926	 * it will cause broadcasts to be filtered unless the NIC
927	 * is kept in ALLMULTI mode
928	 */
929	if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
930	    sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
931		sc->adopted_rx_filter_bug = 1;
932		device_printf(sc->dev, "Adopting fw %d.%d.%d: "
933			      "working around rx filter bug\n",
934			      sc->fw_ver_major, sc->fw_ver_minor,
935			      sc->fw_ver_tiny);
936	}
937
938	return status;
939}
940
941
942static int
943mxge_load_firmware(mxge_softc_t *sc, int adopt)
944{
945	volatile uint32_t *confirm;
946	volatile char *submit;
947	char buf_bytes[72];
948	uint32_t *buf, size, dma_low, dma_high;
949	int status, i;
950
951	buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
952
953	size = sc->sram_size;
954	status = mxge_load_firmware_helper(sc, &size);
955	if (status) {
956		if (!adopt)
957			return status;
958		/* Try to use the currently running firmware, if
959		   it is new enough */
960		status = mxge_adopt_running_firmware(sc);
961		if (status) {
962			device_printf(sc->dev,
963				      "failed to adopt running firmware\n");
964			return status;
965		}
966		device_printf(sc->dev,
967			      "Successfully adopted running firmware\n");
968		if (sc->tx_boundary == 4096) {
969			device_printf(sc->dev,
970				"Using firmware currently running on NIC"
971				 ".  For optimal\n");
972			device_printf(sc->dev,
973				 "performance consider loading optimized "
974				 "firmware\n");
975		}
976		sc->fw_name = mxge_fw_unaligned;
977		sc->tx_boundary = 2048;
978		return 0;
979	}
980	/* clear confirmation addr */
981	confirm = (volatile uint32_t *)sc->cmd;
982	*confirm = 0;
983	wmb();
984	/* send a reload command to the bootstrap MCP, and wait for the
985	   response in the confirmation address.  The firmware should
986	   write a -1 there to indicate it is alive and well
987	*/
988
989	dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
990	dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
991
992	buf[0] = htobe32(dma_high);	/* confirm addr MSW */
993	buf[1] = htobe32(dma_low);	/* confirm addr LSW */
994	buf[2] = htobe32(0xffffffff);	/* confirm data */
995
996	/* FIX: All newest firmware should un-protect the bottom of
997	   the sram before handoff. However, the very first interfaces
998	   do not. Therefore the handoff copy must skip the first 8 bytes
999	*/
1000					/* where the code starts*/
1001	buf[3] = htobe32(MXGE_FW_OFFSET + 8);
1002	buf[4] = htobe32(size - 8); 	/* length of code */
1003	buf[5] = htobe32(8);		/* where to copy to */
1004	buf[6] = htobe32(0);		/* where to jump to */
1005
1006	submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1007	mxge_pio_copy(submit, buf, 64);
1008	wmb();
1009	DELAY(1000);
1010	wmb();
1011	i = 0;
1012	while (*confirm != 0xffffffff && i < 20) {
1013		DELAY(1000*10);
1014		i++;
1015		bus_dmamap_sync(sc->cmd_dma.dmat,
1016				sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1017	}
1018	if (*confirm != 0xffffffff) {
1019		device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1020			confirm, *confirm);
1021
1022		return ENXIO;
1023	}
1024	return 0;
1025}
1026
1027static int
1028mxge_update_mac_address(mxge_softc_t *sc)
1029{
1030	mxge_cmd_t cmd;
1031	uint8_t *addr = sc->mac_addr;
1032	int status;
1033
1034
1035	cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1036		     | (addr[2] << 8) | addr[3]);
1037
1038	cmd.data1 = ((addr[4] << 8) | (addr[5]));
1039
1040	status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1041	return status;
1042}
1043
1044static int
1045mxge_change_pause(mxge_softc_t *sc, int pause)
1046{
1047	mxge_cmd_t cmd;
1048	int status;
1049
1050	if (pause)
1051		status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1052				       &cmd);
1053	else
1054		status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1055				       &cmd);
1056
1057	if (status) {
1058		device_printf(sc->dev, "Failed to set flow control mode\n");
1059		return ENXIO;
1060	}
1061	sc->pause = pause;
1062	return 0;
1063}
1064
1065static void
1066mxge_change_promisc(mxge_softc_t *sc, int promisc)
1067{
1068	mxge_cmd_t cmd;
1069	int status;
1070
1071	if (mxge_always_promisc)
1072		promisc = 1;
1073
1074	if (promisc)
1075		status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1076				       &cmd);
1077	else
1078		status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1079				       &cmd);
1080
1081	if (status) {
1082		device_printf(sc->dev, "Failed to set promisc mode\n");
1083	}
1084}
1085
1086static void
1087mxge_set_multicast_list(mxge_softc_t *sc)
1088{
1089	mxge_cmd_t cmd;
1090	struct ifmultiaddr *ifma;
1091	struct ifnet *ifp = sc->ifp;
1092	int err;
1093
1094	/* This firmware is known to not support multicast */
1095	if (!sc->fw_multicast_support)
1096		return;
1097
1098	/* Disable multicast filtering while we play with the lists*/
1099	err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1100	if (err != 0) {
1101		device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1102		       " error status: %d\n", err);
1103		return;
1104	}
1105
1106	if (sc->adopted_rx_filter_bug)
1107		return;
1108
1109	if (ifp->if_flags & IFF_ALLMULTI)
1110		/* request to disable multicast filtering, so quit here */
1111		return;
1112
1113	/* Flush all the filters */
1114
1115	err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1116	if (err != 0) {
1117		device_printf(sc->dev,
1118			      "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1119			      ", error status: %d\n", err);
1120		return;
1121	}
1122
1123	/* Walk the multicast list, and add each address */
1124
1125	IF_ADDR_LOCK(ifp);
1126	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1127		if (ifma->ifma_addr->sa_family != AF_LINK)
1128			continue;
1129		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1130		      &cmd.data0, 4);
1131		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1132		      &cmd.data1, 2);
1133		cmd.data0 = htonl(cmd.data0);
1134		cmd.data1 = htonl(cmd.data1);
1135		err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1136		if (err != 0) {
1137			device_printf(sc->dev, "Failed "
1138			       "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1139			       "%d\t", err);
1140			/* abort, leaving multicast filtering off */
1141			IF_ADDR_UNLOCK(ifp);
1142			return;
1143		}
1144	}
1145	IF_ADDR_UNLOCK(ifp);
1146	/* Enable multicast filtering */
1147	err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1148	if (err != 0) {
1149		device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1150		       ", error status: %d\n", err);
1151	}
1152}
1153
1154static int
1155mxge_max_mtu(mxge_softc_t *sc)
1156{
1157	mxge_cmd_t cmd;
1158	int status;
1159
1160	if (MJUMPAGESIZE - MXGEFW_PAD >  MXGEFW_MAX_MTU)
1161		return  MXGEFW_MAX_MTU - MXGEFW_PAD;
1162
1163	/* try to set nbufs to see if it we can
1164	   use virtually contiguous jumbos */
1165	cmd.data0 = 0;
1166	status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1167			       &cmd);
1168	if (status == 0)
1169		return  MXGEFW_MAX_MTU - MXGEFW_PAD;
1170
1171	/* otherwise, we're limited to MJUMPAGESIZE */
1172	return MJUMPAGESIZE - MXGEFW_PAD;
1173}
1174
1175static int
1176mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1177{
1178	struct mxge_slice_state *ss;
1179	mxge_rx_done_t *rx_done;
1180	volatile uint32_t *irq_claim;
1181	mxge_cmd_t cmd;
1182	int slice, status;
1183
1184	/* try to send a reset command to the card to see if it
1185	   is alive */
1186	memset(&cmd, 0, sizeof (cmd));
1187	status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1188	if (status != 0) {
1189		device_printf(sc->dev, "failed reset\n");
1190		return ENXIO;
1191	}
1192
1193	mxge_dummy_rdma(sc, 1);
1194
1195
1196	/* set the intrq size */
1197	cmd.data0 = sc->rx_ring_size;
1198	status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1199
1200	/*
1201	 * Even though we already know how many slices are supported
1202	 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1203	 * has magic side effects, and must be called after a reset.
1204	 * It must be called prior to calling any RSS related cmds,
1205	 * including assigning an interrupt queue for anything but
1206	 * slice 0.  It must also be called *after*
1207	 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1208	 * the firmware to compute offsets.
1209	 */
1210
1211	if (sc->num_slices > 1) {
1212		/* ask the maximum number of slices it supports */
1213		status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1214					   &cmd);
1215		if (status != 0) {
1216			device_printf(sc->dev,
1217				      "failed to get number of slices\n");
1218			return status;
1219		}
1220		/*
1221		 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1222		 * to setting up the interrupt queue DMA
1223		 */
1224		cmd.data0 = sc->num_slices;
1225		cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1226		status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1227					   &cmd);
1228		if (status != 0) {
1229			device_printf(sc->dev,
1230				      "failed to set number of slices\n");
1231			return status;
1232		}
1233	}
1234
1235
1236	if (interrupts_setup) {
1237		/* Now exchange information about interrupts  */
1238		for (slice = 0; slice < sc->num_slices; slice++) {
1239			rx_done = &sc->ss[slice].rx_done;
1240			memset(rx_done->entry, 0, sc->rx_ring_size);
1241			cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1242			cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1243			cmd.data2 = slice;
1244			status |= mxge_send_cmd(sc,
1245						MXGEFW_CMD_SET_INTRQ_DMA,
1246						&cmd);
1247		}
1248	}
1249
1250	status |= mxge_send_cmd(sc,
1251				MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1252
1253
1254	sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1255
1256	status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1257	irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1258
1259
1260	status |= mxge_send_cmd(sc,  MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1261				&cmd);
1262	sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1263	if (status != 0) {
1264		device_printf(sc->dev, "failed set interrupt parameters\n");
1265		return status;
1266	}
1267
1268
1269	*sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1270
1271
1272	/* run a DMA benchmark */
1273	(void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1274
1275	for (slice = 0; slice < sc->num_slices; slice++) {
1276		ss = &sc->ss[slice];
1277
1278		ss->irq_claim = irq_claim + (2 * slice);
1279		/* reset mcp/driver shared state back to 0 */
1280		ss->rx_done.idx = 0;
1281		ss->rx_done.cnt = 0;
1282		ss->tx.req = 0;
1283		ss->tx.done = 0;
1284		ss->tx.pkt_done = 0;
1285		ss->tx.wake = 0;
1286		ss->tx.defrag = 0;
1287		ss->tx.stall = 0;
1288		ss->rx_big.cnt = 0;
1289		ss->rx_small.cnt = 0;
1290		ss->lro_bad_csum = 0;
1291		ss->lro_queued = 0;
1292		ss->lro_flushed = 0;
1293		if (ss->fw_stats != NULL) {
1294			ss->fw_stats->valid = 0;
1295			ss->fw_stats->send_done_count = 0;
1296		}
1297	}
1298	sc->rdma_tags_available = 15;
1299	status = mxge_update_mac_address(sc);
1300	mxge_change_promisc(sc, 0);
1301	mxge_change_pause(sc, sc->pause);
1302	mxge_set_multicast_list(sc);
1303	return status;
1304}
1305
1306static int
1307mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1308{
1309        mxge_softc_t *sc;
1310        unsigned int intr_coal_delay;
1311        int err;
1312
1313        sc = arg1;
1314        intr_coal_delay = sc->intr_coal_delay;
1315        err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1316        if (err != 0) {
1317                return err;
1318        }
1319        if (intr_coal_delay == sc->intr_coal_delay)
1320                return 0;
1321
1322        if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1323                return EINVAL;
1324
1325	mtx_lock(&sc->driver_mtx);
1326	*sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1327	sc->intr_coal_delay = intr_coal_delay;
1328
1329	mtx_unlock(&sc->driver_mtx);
1330        return err;
1331}
1332
1333static int
1334mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1335{
1336        mxge_softc_t *sc;
1337        unsigned int enabled;
1338        int err;
1339
1340        sc = arg1;
1341        enabled = sc->pause;
1342        err = sysctl_handle_int(oidp, &enabled, arg2, req);
1343        if (err != 0) {
1344                return err;
1345        }
1346        if (enabled == sc->pause)
1347                return 0;
1348
1349	mtx_lock(&sc->driver_mtx);
1350	err = mxge_change_pause(sc, enabled);
1351	mtx_unlock(&sc->driver_mtx);
1352        return err;
1353}
1354
1355static int
1356mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt)
1357{
1358	struct ifnet *ifp;
1359	int err = 0;
1360
1361	ifp = sc->ifp;
1362	if (lro_cnt == 0)
1363		ifp->if_capenable &= ~IFCAP_LRO;
1364	else
1365		ifp->if_capenable |= IFCAP_LRO;
1366	sc->lro_cnt = lro_cnt;
1367	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1368		mxge_close(sc);
1369		err = mxge_open(sc);
1370	}
1371	return err;
1372}
1373
1374static int
1375mxge_change_lro(SYSCTL_HANDLER_ARGS)
1376{
1377	mxge_softc_t *sc;
1378	unsigned int lro_cnt;
1379	int err;
1380
1381	sc = arg1;
1382	lro_cnt = sc->lro_cnt;
1383	err = sysctl_handle_int(oidp, &lro_cnt, arg2, req);
1384	if (err != 0)
1385		return err;
1386
1387	if (lro_cnt == sc->lro_cnt)
1388		return 0;
1389
1390	if (lro_cnt > 128)
1391		return EINVAL;
1392
1393	mtx_lock(&sc->driver_mtx);
1394	err = mxge_change_lro_locked(sc, lro_cnt);
1395	mtx_unlock(&sc->driver_mtx);
1396	return err;
1397}
1398
1399static int
1400mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1401{
1402        int err;
1403
1404        if (arg1 == NULL)
1405                return EFAULT;
1406        arg2 = be32toh(*(int *)arg1);
1407        arg1 = NULL;
1408        err = sysctl_handle_int(oidp, arg1, arg2, req);
1409
1410        return err;
1411}
1412
1413static void
1414mxge_rem_sysctls(mxge_softc_t *sc)
1415{
1416	struct mxge_slice_state *ss;
1417	int slice;
1418
1419	if (sc->slice_sysctl_tree == NULL)
1420		return;
1421
1422	for (slice = 0; slice < sc->num_slices; slice++) {
1423		ss = &sc->ss[slice];
1424		if (ss == NULL || ss->sysctl_tree == NULL)
1425			continue;
1426		sysctl_ctx_free(&ss->sysctl_ctx);
1427		ss->sysctl_tree = NULL;
1428	}
1429	sysctl_ctx_free(&sc->slice_sysctl_ctx);
1430	sc->slice_sysctl_tree = NULL;
1431}
1432
1433static void
1434mxge_add_sysctls(mxge_softc_t *sc)
1435{
1436	struct sysctl_ctx_list *ctx;
1437	struct sysctl_oid_list *children;
1438	mcp_irq_data_t *fw;
1439	struct mxge_slice_state *ss;
1440	int slice;
1441	char slice_num[8];
1442
1443	ctx = device_get_sysctl_ctx(sc->dev);
1444	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1445	fw = sc->ss[0].fw_stats;
1446
1447	/* random information */
1448	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1449		       "firmware_version",
1450		       CTLFLAG_RD, &sc->fw_version,
1451		       0, "firmware version");
1452	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1453		       "serial_number",
1454		       CTLFLAG_RD, &sc->serial_number_string,
1455		       0, "serial number");
1456	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1457		       "product_code",
1458		       CTLFLAG_RD, &sc->product_code_string,
1459		       0, "product_code");
1460	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1461		       "pcie_link_width",
1462		       CTLFLAG_RD, &sc->link_width,
1463		       0, "tx_boundary");
1464	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1465		       "tx_boundary",
1466		       CTLFLAG_RD, &sc->tx_boundary,
1467		       0, "tx_boundary");
1468	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1469		       "write_combine",
1470		       CTLFLAG_RD, &sc->wc,
1471		       0, "write combining PIO?");
1472	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1473		       "read_dma_MBs",
1474		       CTLFLAG_RD, &sc->read_dma,
1475		       0, "DMA Read speed in MB/s");
1476	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1477		       "write_dma_MBs",
1478		       CTLFLAG_RD, &sc->write_dma,
1479		       0, "DMA Write speed in MB/s");
1480	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1481		       "read_write_dma_MBs",
1482		       CTLFLAG_RD, &sc->read_write_dma,
1483		       0, "DMA concurrent Read/Write speed in MB/s");
1484
1485
1486	/* performance related tunables */
1487	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1488			"intr_coal_delay",
1489			CTLTYPE_INT|CTLFLAG_RW, sc,
1490			0, mxge_change_intr_coal,
1491			"I", "interrupt coalescing delay in usecs");
1492
1493	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1494			"flow_control_enabled",
1495			CTLTYPE_INT|CTLFLAG_RW, sc,
1496			0, mxge_change_flow_control,
1497			"I", "interrupt coalescing delay in usecs");
1498
1499	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1500		       "deassert_wait",
1501		       CTLFLAG_RW, &mxge_deassert_wait,
1502		       0, "Wait for IRQ line to go low in ihandler");
1503
1504	/* stats block from firmware is in network byte order.
1505	   Need to swap it */
1506	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1507			"link_up",
1508			CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1509			0, mxge_handle_be32,
1510			"I", "link up");
1511	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1512			"rdma_tags_available",
1513			CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1514			0, mxge_handle_be32,
1515			"I", "rdma_tags_available");
1516	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1517			"dropped_bad_crc32",
1518			CTLTYPE_INT|CTLFLAG_RD,
1519			&fw->dropped_bad_crc32,
1520			0, mxge_handle_be32,
1521			"I", "dropped_bad_crc32");
1522	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1523			"dropped_bad_phy",
1524			CTLTYPE_INT|CTLFLAG_RD,
1525			&fw->dropped_bad_phy,
1526			0, mxge_handle_be32,
1527			"I", "dropped_bad_phy");
1528	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1529			"dropped_link_error_or_filtered",
1530			CTLTYPE_INT|CTLFLAG_RD,
1531			&fw->dropped_link_error_or_filtered,
1532			0, mxge_handle_be32,
1533			"I", "dropped_link_error_or_filtered");
1534	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1535			"dropped_link_overflow",
1536			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1537			0, mxge_handle_be32,
1538			"I", "dropped_link_overflow");
1539	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1540			"dropped_multicast_filtered",
1541			CTLTYPE_INT|CTLFLAG_RD,
1542			&fw->dropped_multicast_filtered,
1543			0, mxge_handle_be32,
1544			"I", "dropped_multicast_filtered");
1545	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1546			"dropped_no_big_buffer",
1547			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1548			0, mxge_handle_be32,
1549			"I", "dropped_no_big_buffer");
1550	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1551			"dropped_no_small_buffer",
1552			CTLTYPE_INT|CTLFLAG_RD,
1553			&fw->dropped_no_small_buffer,
1554			0, mxge_handle_be32,
1555			"I", "dropped_no_small_buffer");
1556	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1557			"dropped_overrun",
1558			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1559			0, mxge_handle_be32,
1560			"I", "dropped_overrun");
1561	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1562			"dropped_pause",
1563			CTLTYPE_INT|CTLFLAG_RD,
1564			&fw->dropped_pause,
1565			0, mxge_handle_be32,
1566			"I", "dropped_pause");
1567	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1568			"dropped_runt",
1569			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1570			0, mxge_handle_be32,
1571			"I", "dropped_runt");
1572
1573	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1574			"dropped_unicast_filtered",
1575			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1576			0, mxge_handle_be32,
1577			"I", "dropped_unicast_filtered");
1578
1579	/* verbose printing? */
1580	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1581		       "verbose",
1582		       CTLFLAG_RW, &mxge_verbose,
1583		       0, "verbose printing");
1584
1585	/* lro */
1586	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1587			"lro_cnt",
1588			CTLTYPE_INT|CTLFLAG_RW, sc,
1589			0, mxge_change_lro,
1590			"I", "number of lro merge queues");
1591
1592
1593	/* add counters exported for debugging from all slices */
1594	sysctl_ctx_init(&sc->slice_sysctl_ctx);
1595	sc->slice_sysctl_tree =
1596		SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1597				"slice", CTLFLAG_RD, 0, "");
1598
1599	for (slice = 0; slice < sc->num_slices; slice++) {
1600		ss = &sc->ss[slice];
1601		sysctl_ctx_init(&ss->sysctl_ctx);
1602		ctx = &ss->sysctl_ctx;
1603		children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1604		sprintf(slice_num, "%d", slice);
1605		ss->sysctl_tree =
1606			SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1607					CTLFLAG_RD, 0, "");
1608		children = SYSCTL_CHILDREN(ss->sysctl_tree);
1609		SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1610			       "rx_small_cnt",
1611			       CTLFLAG_RD, &ss->rx_small.cnt,
1612			       0, "rx_small_cnt");
1613		SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1614			       "rx_big_cnt",
1615			       CTLFLAG_RD, &ss->rx_big.cnt,
1616			       0, "rx_small_cnt");
1617		SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1618			       "tx_req",
1619			       CTLFLAG_RD, &ss->tx.req,
1620			       0, "tx_req");
1621		SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1622			       "lro_flushed", CTLFLAG_RD, &ss->lro_flushed,
1623			       0, "number of lro merge queues flushed");
1624
1625		SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1626			       "lro_queued", CTLFLAG_RD, &ss->lro_queued,
1627			       0, "number of frames appended to lro merge"
1628			       "queues");
1629
1630		/* only transmit from slice 0 for now */
1631		if (slice > 0)
1632			continue;
1633
1634		SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1635			       "tx_done",
1636			       CTLFLAG_RD, &ss->tx.done,
1637			       0, "tx_done");
1638		SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1639			       "tx_pkt_done",
1640			       CTLFLAG_RD, &ss->tx.pkt_done,
1641			       0, "tx_done");
1642		SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1643			       "tx_stall",
1644			       CTLFLAG_RD, &ss->tx.stall,
1645			       0, "tx_stall");
1646		SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1647			       "tx_wake",
1648			       CTLFLAG_RD, &ss->tx.wake,
1649			       0, "tx_wake");
1650		SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1651			       "tx_defrag",
1652			       CTLFLAG_RD, &ss->tx.defrag,
1653			       0, "tx_defrag");
1654	}
1655}
1656
1657/* copy an array of mcp_kreq_ether_send_t's to the mcp.  Copy
1658   backwards one at a time and handle ring wraps */
1659
1660static inline void
1661mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1662			    mcp_kreq_ether_send_t *src, int cnt)
1663{
1664        int idx, starting_slot;
1665        starting_slot = tx->req;
1666        while (cnt > 1) {
1667                cnt--;
1668                idx = (starting_slot + cnt) & tx->mask;
1669                mxge_pio_copy(&tx->lanai[idx],
1670			      &src[cnt], sizeof(*src));
1671                wmb();
1672        }
1673}
1674
1675/*
1676 * copy an array of mcp_kreq_ether_send_t's to the mcp.  Copy
1677 * at most 32 bytes at a time, so as to avoid involving the software
1678 * pio handler in the nic.   We re-write the first segment's flags
1679 * to mark them valid only after writing the entire chain
1680 */
1681
1682static inline void
1683mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1684                  int cnt)
1685{
1686        int idx, i;
1687        uint32_t *src_ints;
1688	volatile uint32_t *dst_ints;
1689        mcp_kreq_ether_send_t *srcp;
1690	volatile mcp_kreq_ether_send_t *dstp, *dst;
1691	uint8_t last_flags;
1692
1693        idx = tx->req & tx->mask;
1694
1695	last_flags = src->flags;
1696	src->flags = 0;
1697        wmb();
1698        dst = dstp = &tx->lanai[idx];
1699        srcp = src;
1700
1701        if ((idx + cnt) < tx->mask) {
1702                for (i = 0; i < (cnt - 1); i += 2) {
1703                        mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1704                        wmb(); /* force write every 32 bytes */
1705                        srcp += 2;
1706                        dstp += 2;
1707                }
1708        } else {
1709                /* submit all but the first request, and ensure
1710                   that it is submitted below */
1711                mxge_submit_req_backwards(tx, src, cnt);
1712                i = 0;
1713        }
1714        if (i < cnt) {
1715                /* submit the first request */
1716                mxge_pio_copy(dstp, srcp, sizeof(*src));
1717                wmb(); /* barrier before setting valid flag */
1718        }
1719
1720        /* re-write the last 32-bits with the valid flags */
1721        src->flags = last_flags;
1722        src_ints = (uint32_t *)src;
1723        src_ints+=3;
1724        dst_ints = (volatile uint32_t *)dst;
1725        dst_ints+=3;
1726        *dst_ints =  *src_ints;
1727        tx->req += cnt;
1728        wmb();
1729}
1730
1731#if IFCAP_TSO4
1732
1733static void
1734mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1735	       int busdma_seg_cnt, int ip_off)
1736{
1737	mxge_tx_ring_t *tx;
1738	mcp_kreq_ether_send_t *req;
1739	bus_dma_segment_t *seg;
1740	struct ip *ip;
1741	struct tcphdr *tcp;
1742	uint32_t low, high_swapped;
1743	int len, seglen, cum_len, cum_len_next;
1744	int next_is_first, chop, cnt, rdma_count, small;
1745	uint16_t pseudo_hdr_offset, cksum_offset, mss;
1746	uint8_t flags, flags_next;
1747	static int once;
1748
1749	mss = m->m_pkthdr.tso_segsz;
1750
1751	/* negative cum_len signifies to the
1752	 * send loop that we are still in the
1753	 * header portion of the TSO packet.
1754	 */
1755
1756	/* ensure we have the ethernet, IP and TCP
1757	   header together in the first mbuf, copy
1758	   it to a scratch buffer if not */
1759	if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
1760		m_copydata(m, 0, ip_off + sizeof (*ip),
1761			   ss->scratch);
1762		ip = (struct ip *)(ss->scratch + ip_off);
1763	} else {
1764		ip = (struct ip *)(mtod(m, char *) + ip_off);
1765	}
1766	if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2)
1767			    + sizeof (*tcp))) {
1768		m_copydata(m, 0, ip_off + (ip->ip_hl << 2)
1769			   + sizeof (*tcp),  ss->scratch);
1770		ip = (struct ip *)(mtod(m, char *) + ip_off);
1771	}
1772
1773	tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2));
1774	cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2));
1775
1776	/* TSO implies checksum offload on this hardware */
1777	cksum_offset = ip_off + (ip->ip_hl << 2);
1778	flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1779
1780
1781	/* for TSO, pseudo_hdr_offset holds mss.
1782	 * The firmware figures out where to put
1783	 * the checksum by parsing the header. */
1784	pseudo_hdr_offset = htobe16(mss);
1785
1786	tx = &ss->tx;
1787	req = tx->req_list;
1788	seg = tx->seg_list;
1789	cnt = 0;
1790	rdma_count = 0;
1791	/* "rdma_count" is the number of RDMAs belonging to the
1792	 * current packet BEFORE the current send request. For
1793	 * non-TSO packets, this is equal to "count".
1794	 * For TSO packets, rdma_count needs to be reset
1795	 * to 0 after a segment cut.
1796	 *
1797	 * The rdma_count field of the send request is
1798	 * the number of RDMAs of the packet starting at
1799	 * that request. For TSO send requests with one ore more cuts
1800	 * in the middle, this is the number of RDMAs starting
1801	 * after the last cut in the request. All previous
1802	 * segments before the last cut implicitly have 1 RDMA.
1803	 *
1804	 * Since the number of RDMAs is not known beforehand,
1805	 * it must be filled-in retroactively - after each
1806	 * segmentation cut or at the end of the entire packet.
1807	 */
1808
1809	while (busdma_seg_cnt) {
1810		/* Break the busdma segment up into pieces*/
1811		low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1812		high_swapped = 	htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1813		len = seg->ds_len;
1814
1815		while (len) {
1816			flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1817			seglen = len;
1818			cum_len_next = cum_len + seglen;
1819			(req-rdma_count)->rdma_count = rdma_count + 1;
1820			if (__predict_true(cum_len >= 0)) {
1821				/* payload */
1822				chop = (cum_len_next > mss);
1823				cum_len_next = cum_len_next % mss;
1824				next_is_first = (cum_len_next == 0);
1825				flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1826				flags_next |= next_is_first *
1827					MXGEFW_FLAGS_FIRST;
1828				rdma_count |= -(chop | next_is_first);
1829				rdma_count += chop & !next_is_first;
1830			} else if (cum_len_next >= 0) {
1831				/* header ends */
1832				rdma_count = -1;
1833				cum_len_next = 0;
1834				seglen = -cum_len;
1835				small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1836				flags_next = MXGEFW_FLAGS_TSO_PLD |
1837					MXGEFW_FLAGS_FIRST |
1838					(small * MXGEFW_FLAGS_SMALL);
1839			    }
1840
1841			req->addr_high = high_swapped;
1842			req->addr_low = htobe32(low);
1843			req->pseudo_hdr_offset = pseudo_hdr_offset;
1844			req->pad = 0;
1845			req->rdma_count = 1;
1846			req->length = htobe16(seglen);
1847			req->cksum_offset = cksum_offset;
1848			req->flags = flags | ((cum_len & 1) *
1849					      MXGEFW_FLAGS_ALIGN_ODD);
1850			low += seglen;
1851			len -= seglen;
1852			cum_len = cum_len_next;
1853			flags = flags_next;
1854			req++;
1855			cnt++;
1856			rdma_count++;
1857			if (__predict_false(cksum_offset > seglen))
1858				cksum_offset -= seglen;
1859			else
1860				cksum_offset = 0;
1861			if (__predict_false(cnt > tx->max_desc))
1862				goto drop;
1863		}
1864		busdma_seg_cnt--;
1865		seg++;
1866	}
1867	(req-rdma_count)->rdma_count = rdma_count;
1868
1869	do {
1870		req--;
1871		req->flags |= MXGEFW_FLAGS_TSO_LAST;
1872	} while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1873
1874	tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1875	mxge_submit_req(tx, tx->req_list, cnt);
1876	return;
1877
1878drop:
1879	bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
1880	m_freem(m);
1881	ss->sc->ifp->if_oerrors++;
1882	if (!once) {
1883		printf("tx->max_desc exceeded via TSO!\n");
1884		printf("mss = %d, %ld, %d!\n", mss,
1885		       (long)seg - (long)tx->seg_list, tx->max_desc);
1886		once = 1;
1887	}
1888	return;
1889
1890}
1891
1892#endif /* IFCAP_TSO4 */
1893
1894#ifdef MXGE_NEW_VLAN_API
1895/*
1896 * We reproduce the software vlan tag insertion from
1897 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
1898 * vlan tag insertion. We need to advertise this in order to have the
1899 * vlan interface respect our csum offload flags.
1900 */
1901static struct mbuf *
1902mxge_vlan_tag_insert(struct mbuf *m)
1903{
1904	struct ether_vlan_header *evl;
1905
1906	M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT);
1907	if (__predict_false(m == NULL))
1908		return NULL;
1909	if (m->m_len < sizeof(*evl)) {
1910		m = m_pullup(m, sizeof(*evl));
1911		if (__predict_false(m == NULL))
1912			return NULL;
1913	}
1914	/*
1915	 * Transform the Ethernet header into an Ethernet header
1916	 * with 802.1Q encapsulation.
1917	 */
1918	evl = mtod(m, struct ether_vlan_header *);
1919	bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
1920	      (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
1921	evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1922	evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
1923	m->m_flags &= ~M_VLANTAG;
1924	return m;
1925}
1926#endif /* MXGE_NEW_VLAN_API */
1927
1928static void
1929mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
1930{
1931	mxge_softc_t *sc;
1932	mcp_kreq_ether_send_t *req;
1933	bus_dma_segment_t *seg;
1934	struct mbuf *m_tmp;
1935	struct ifnet *ifp;
1936	mxge_tx_ring_t *tx;
1937	struct ip *ip;
1938	int cnt, cum_len, err, i, idx, odd_flag, ip_off;
1939	uint16_t pseudo_hdr_offset;
1940        uint8_t flags, cksum_offset;
1941
1942
1943	sc = ss->sc;
1944	ifp = sc->ifp;
1945	tx = &ss->tx;
1946
1947	ip_off = sizeof (struct ether_header);
1948#ifdef MXGE_NEW_VLAN_API
1949	if (m->m_flags & M_VLANTAG) {
1950		m = mxge_vlan_tag_insert(m);
1951		if (__predict_false(m == NULL))
1952			goto drop;
1953		ip_off += ETHER_VLAN_ENCAP_LEN;
1954	}
1955#endif
1956	/* (try to) map the frame for DMA */
1957	idx = tx->req & tx->mask;
1958	err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
1959				      m, tx->seg_list, &cnt,
1960				      BUS_DMA_NOWAIT);
1961	if (__predict_false(err == EFBIG)) {
1962		/* Too many segments in the chain.  Try
1963		   to defrag */
1964		m_tmp = m_defrag(m, M_NOWAIT);
1965		if (m_tmp == NULL) {
1966			goto drop;
1967		}
1968		ss->tx.defrag++;
1969		m = m_tmp;
1970		err = bus_dmamap_load_mbuf_sg(tx->dmat,
1971					      tx->info[idx].map,
1972					      m, tx->seg_list, &cnt,
1973					      BUS_DMA_NOWAIT);
1974	}
1975	if (__predict_false(err != 0)) {
1976		device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
1977			      " packet len = %d\n", err, m->m_pkthdr.len);
1978		goto drop;
1979	}
1980	bus_dmamap_sync(tx->dmat, tx->info[idx].map,
1981			BUS_DMASYNC_PREWRITE);
1982	tx->info[idx].m = m;
1983
1984#if IFCAP_TSO4
1985	/* TSO is different enough, we handle it in another routine */
1986	if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
1987		mxge_encap_tso(ss, m, cnt, ip_off);
1988		return;
1989	}
1990#endif
1991
1992	req = tx->req_list;
1993	cksum_offset = 0;
1994	pseudo_hdr_offset = 0;
1995	flags = MXGEFW_FLAGS_NO_TSO;
1996
1997	/* checksum offloading? */
1998	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) {
1999		/* ensure ip header is in first mbuf, copy
2000		   it to a scratch buffer if not */
2001		if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
2002			m_copydata(m, 0, ip_off + sizeof (*ip),
2003				   ss->scratch);
2004			ip = (struct ip *)(ss->scratch + ip_off);
2005		} else {
2006			ip = (struct ip *)(mtod(m, char *) + ip_off);
2007		}
2008		cksum_offset = ip_off + (ip->ip_hl << 2);
2009		pseudo_hdr_offset = cksum_offset +  m->m_pkthdr.csum_data;
2010		pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2011		req->cksum_offset = cksum_offset;
2012		flags |= MXGEFW_FLAGS_CKSUM;
2013		odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2014	} else {
2015		odd_flag = 0;
2016	}
2017	if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2018		flags |= MXGEFW_FLAGS_SMALL;
2019
2020	/* convert segments into a request list */
2021	cum_len = 0;
2022	seg = tx->seg_list;
2023	req->flags = MXGEFW_FLAGS_FIRST;
2024	for (i = 0; i < cnt; i++) {
2025		req->addr_low =
2026			htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2027		req->addr_high =
2028			htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2029		req->length = htobe16(seg->ds_len);
2030		req->cksum_offset = cksum_offset;
2031		if (cksum_offset > seg->ds_len)
2032			cksum_offset -= seg->ds_len;
2033		else
2034			cksum_offset = 0;
2035		req->pseudo_hdr_offset = pseudo_hdr_offset;
2036		req->pad = 0; /* complete solid 16-byte block */
2037		req->rdma_count = 1;
2038		req->flags |= flags | ((cum_len & 1) * odd_flag);
2039		cum_len += seg->ds_len;
2040		seg++;
2041		req++;
2042		req->flags = 0;
2043	}
2044	req--;
2045	/* pad runts to 60 bytes */
2046	if (cum_len < 60) {
2047		req++;
2048		req->addr_low =
2049			htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2050		req->addr_high =
2051			htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2052		req->length = htobe16(60 - cum_len);
2053		req->cksum_offset = 0;
2054		req->pseudo_hdr_offset = pseudo_hdr_offset;
2055		req->pad = 0; /* complete solid 16-byte block */
2056		req->rdma_count = 1;
2057		req->flags |= flags | ((cum_len & 1) * odd_flag);
2058		cnt++;
2059	}
2060
2061	tx->req_list[0].rdma_count = cnt;
2062#if 0
2063	/* print what the firmware will see */
2064	for (i = 0; i < cnt; i++) {
2065		printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2066		    "cso:%d, flags:0x%x, rdma:%d\n",
2067		    i, (int)ntohl(tx->req_list[i].addr_high),
2068		    (int)ntohl(tx->req_list[i].addr_low),
2069		    (int)ntohs(tx->req_list[i].length),
2070		    (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2071		    tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2072		    tx->req_list[i].rdma_count);
2073	}
2074	printf("--------------\n");
2075#endif
2076	tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2077	mxge_submit_req(tx, tx->req_list, cnt);
2078	return;
2079
2080drop:
2081	m_freem(m);
2082	ifp->if_oerrors++;
2083	return;
2084}
2085
2086
2087
2088
2089static inline void
2090mxge_start_locked(struct mxge_slice_state *ss)
2091{
2092	mxge_softc_t *sc;
2093	struct mbuf *m;
2094	struct ifnet *ifp;
2095	mxge_tx_ring_t *tx;
2096
2097	sc = ss->sc;
2098	ifp = sc->ifp;
2099	tx = &ss->tx;
2100	while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2101		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
2102		if (m == NULL) {
2103			return;
2104		}
2105		/* let BPF see it */
2106		BPF_MTAP(ifp, m);
2107
2108		/* give it to the nic */
2109		mxge_encap(ss, m);
2110	}
2111	/* ran out of transmit slots */
2112	if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
2113		sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2114		tx->stall++;
2115	}
2116}
2117
2118static void
2119mxge_start(struct ifnet *ifp)
2120{
2121	mxge_softc_t *sc = ifp->if_softc;
2122	struct mxge_slice_state *ss;
2123
2124	/* only use the first slice for now */
2125	ss = &sc->ss[0];
2126	mtx_lock(&ss->tx.mtx);
2127	mxge_start_locked(ss);
2128	mtx_unlock(&ss->tx.mtx);
2129}
2130
2131/*
2132 * copy an array of mcp_kreq_ether_recv_t's to the mcp.  Copy
2133 * at most 32 bytes at a time, so as to avoid involving the software
2134 * pio handler in the nic.   We re-write the first segment's low
2135 * DMA address to mark it valid only after we write the entire chunk
2136 * in a burst
2137 */
2138static inline void
2139mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2140		mcp_kreq_ether_recv_t *src)
2141{
2142	uint32_t low;
2143
2144	low = src->addr_low;
2145	src->addr_low = 0xffffffff;
2146	mxge_pio_copy(dst, src, 4 * sizeof (*src));
2147	wmb();
2148	mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2149	wmb();
2150	src->addr_low = low;
2151	dst->addr_low = low;
2152	wmb();
2153}
2154
2155static int
2156mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2157{
2158	bus_dma_segment_t seg;
2159	struct mbuf *m;
2160	mxge_rx_ring_t *rx = &ss->rx_small;
2161	int cnt, err;
2162
2163	m = m_gethdr(M_DONTWAIT, MT_DATA);
2164	if (m == NULL) {
2165		rx->alloc_fail++;
2166		err = ENOBUFS;
2167		goto done;
2168	}
2169	m->m_len = MHLEN;
2170	err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2171				      &seg, &cnt, BUS_DMA_NOWAIT);
2172	if (err != 0) {
2173		m_free(m);
2174		goto done;
2175	}
2176	rx->info[idx].m = m;
2177	rx->shadow[idx].addr_low =
2178		htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2179	rx->shadow[idx].addr_high =
2180		htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2181
2182done:
2183	if ((idx & 7) == 7)
2184		mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2185	return err;
2186}
2187
2188static int
2189mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2190{
2191	bus_dma_segment_t seg[3];
2192	struct mbuf *m;
2193	mxge_rx_ring_t *rx = &ss->rx_big;
2194	int cnt, err, i;
2195
2196	if (rx->cl_size == MCLBYTES)
2197		m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
2198	else
2199		m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2200	if (m == NULL) {
2201		rx->alloc_fail++;
2202		err = ENOBUFS;
2203		goto done;
2204	}
2205	m->m_len = rx->cl_size;
2206	err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2207				      seg, &cnt, BUS_DMA_NOWAIT);
2208	if (err != 0) {
2209		m_free(m);
2210		goto done;
2211	}
2212	rx->info[idx].m = m;
2213	rx->shadow[idx].addr_low =
2214		htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2215	rx->shadow[idx].addr_high =
2216		htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2217
2218#if MXGE_VIRT_JUMBOS
2219	for (i = 1; i < cnt; i++) {
2220		rx->shadow[idx + i].addr_low =
2221			htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2222		rx->shadow[idx + i].addr_high =
2223			htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2224       }
2225#endif
2226
2227done:
2228       for (i = 0; i < rx->nbufs; i++) {
2229		if ((idx & 7) == 7) {
2230			mxge_submit_8rx(&rx->lanai[idx - 7],
2231					&rx->shadow[idx - 7]);
2232		}
2233		idx++;
2234	}
2235	return err;
2236}
2237
2238/*
2239 *  Myri10GE hardware checksums are not valid if the sender
2240 *  padded the frame with non-zero padding.  This is because
2241 *  the firmware just does a simple 16-bit 1s complement
2242 *  checksum across the entire frame, excluding the first 14
2243 *  bytes.  It is best to simply to check the checksum and
2244 *  tell the stack about it only if the checksum is good
2245 */
2246
2247static inline uint16_t
2248mxge_rx_csum(struct mbuf *m, int csum)
2249{
2250	struct ether_header *eh;
2251	struct ip *ip;
2252	uint16_t c;
2253
2254	eh = mtod(m, struct ether_header *);
2255
2256	/* only deal with IPv4 TCP & UDP for now */
2257	if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP)))
2258		return 1;
2259	ip = (struct ip *)(eh + 1);
2260	if (__predict_false(ip->ip_p != IPPROTO_TCP &&
2261			    ip->ip_p != IPPROTO_UDP))
2262		return 1;
2263
2264	c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2265		      htonl(ntohs(csum) + ntohs(ip->ip_len) +
2266			    - (ip->ip_hl << 2) + ip->ip_p));
2267	c ^= 0xffff;
2268	return (c);
2269}
2270
2271static void
2272mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2273{
2274	struct ether_vlan_header *evl;
2275	struct ether_header *eh;
2276	uint32_t partial;
2277
2278	evl = mtod(m, struct ether_vlan_header *);
2279	eh = mtod(m, struct ether_header *);
2280
2281	/*
2282	 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2283	 * after what the firmware thought was the end of the ethernet
2284	 * header.
2285	 */
2286
2287	/* put checksum into host byte order */
2288	*csum = ntohs(*csum);
2289	partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2290	(*csum) += ~partial;
2291	(*csum) +=  ((*csum) < ~partial);
2292	(*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2293	(*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2294
2295	/* restore checksum to network byte order;
2296	   later consumers expect this */
2297	*csum = htons(*csum);
2298
2299	/* save the tag */
2300#ifdef MXGE_NEW_VLAN_API
2301	m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2302#else
2303	{
2304		struct m_tag *mtag;
2305		mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2306				   M_NOWAIT);
2307		if (mtag == NULL)
2308			return;
2309		VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2310		m_tag_prepend(m, mtag);
2311	}
2312
2313#endif
2314	m->m_flags |= M_VLANTAG;
2315
2316	/*
2317	 * Remove the 802.1q header by copying the Ethernet
2318	 * addresses over it and adjusting the beginning of
2319	 * the data in the mbuf.  The encapsulated Ethernet
2320	 * type field is already in place.
2321	 */
2322	bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2323	      ETHER_HDR_LEN - ETHER_TYPE_LEN);
2324	m_adj(m, ETHER_VLAN_ENCAP_LEN);
2325}
2326
2327
2328static inline void
2329mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2330{
2331	mxge_softc_t *sc;
2332	struct ifnet *ifp;
2333	struct mbuf *m;
2334	struct ether_header *eh;
2335	mxge_rx_ring_t *rx;
2336	bus_dmamap_t old_map;
2337	int idx;
2338	uint16_t tcpudp_csum;
2339
2340	sc = ss->sc;
2341	ifp = sc->ifp;
2342	rx = &ss->rx_big;
2343	idx = rx->cnt & rx->mask;
2344	rx->cnt += rx->nbufs;
2345	/* save a pointer to the received mbuf */
2346	m = rx->info[idx].m;
2347	/* try to replace the received mbuf */
2348	if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2349		/* drop the frame -- the old mbuf is re-cycled */
2350		ifp->if_ierrors++;
2351		return;
2352	}
2353
2354	/* unmap the received buffer */
2355	old_map = rx->info[idx].map;
2356	bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2357	bus_dmamap_unload(rx->dmat, old_map);
2358
2359	/* swap the bus_dmamap_t's */
2360	rx->info[idx].map = rx->extra_map;
2361	rx->extra_map = old_map;
2362
2363	/* mcp implicitly skips 1st 2 bytes so that packet is properly
2364	 * aligned */
2365	m->m_data += MXGEFW_PAD;
2366
2367	m->m_pkthdr.rcvif = ifp;
2368	m->m_len = m->m_pkthdr.len = len;
2369	ss->ipackets++;
2370	eh = mtod(m, struct ether_header *);
2371	if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2372		mxge_vlan_tag_remove(m, &csum);
2373	}
2374	/* if the checksum is valid, mark it in the mbuf header */
2375	if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2376		if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2377			return;
2378		/* otherwise, it was a UDP frame, or a TCP frame which
2379		   we could not do LRO on.  Tell the stack that the
2380		   checksum is good */
2381		m->m_pkthdr.csum_data = 0xffff;
2382		m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2383	}
2384	/* pass the frame up the stack */
2385	(*ifp->if_input)(ifp, m);
2386}
2387
2388static inline void
2389mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2390{
2391	mxge_softc_t *sc;
2392	struct ifnet *ifp;
2393	struct ether_header *eh;
2394	struct mbuf *m;
2395	mxge_rx_ring_t *rx;
2396	bus_dmamap_t old_map;
2397	int idx;
2398	uint16_t tcpudp_csum;
2399
2400	sc = ss->sc;
2401	ifp = sc->ifp;
2402	rx = &ss->rx_small;
2403	idx = rx->cnt & rx->mask;
2404	rx->cnt++;
2405	/* save a pointer to the received mbuf */
2406	m = rx->info[idx].m;
2407	/* try to replace the received mbuf */
2408	if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2409		/* drop the frame -- the old mbuf is re-cycled */
2410		ifp->if_ierrors++;
2411		return;
2412	}
2413
2414	/* unmap the received buffer */
2415	old_map = rx->info[idx].map;
2416	bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2417	bus_dmamap_unload(rx->dmat, old_map);
2418
2419	/* swap the bus_dmamap_t's */
2420	rx->info[idx].map = rx->extra_map;
2421	rx->extra_map = old_map;
2422
2423	/* mcp implicitly skips 1st 2 bytes so that packet is properly
2424	 * aligned */
2425	m->m_data += MXGEFW_PAD;
2426
2427	m->m_pkthdr.rcvif = ifp;
2428	m->m_len = m->m_pkthdr.len = len;
2429	ss->ipackets++;
2430	eh = mtod(m, struct ether_header *);
2431	if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2432		mxge_vlan_tag_remove(m, &csum);
2433	}
2434	/* if the checksum is valid, mark it in the mbuf header */
2435	if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2436		if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2437			return;
2438		/* otherwise, it was a UDP frame, or a TCP frame which
2439		   we could not do LRO on.  Tell the stack that the
2440		   checksum is good */
2441		m->m_pkthdr.csum_data = 0xffff;
2442		m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2443	}
2444	/* pass the frame up the stack */
2445	(*ifp->if_input)(ifp, m);
2446}
2447
2448static inline void
2449mxge_clean_rx_done(struct mxge_slice_state *ss)
2450{
2451	mxge_rx_done_t *rx_done = &ss->rx_done;
2452	struct lro_entry *lro;
2453	int limit = 0;
2454	uint16_t length;
2455	uint16_t checksum;
2456
2457
2458	while (rx_done->entry[rx_done->idx].length != 0) {
2459		length = ntohs(rx_done->entry[rx_done->idx].length);
2460		rx_done->entry[rx_done->idx].length = 0;
2461		checksum = rx_done->entry[rx_done->idx].checksum;
2462		if (length <= (MHLEN - MXGEFW_PAD))
2463			mxge_rx_done_small(ss, length, checksum);
2464		else
2465			mxge_rx_done_big(ss, length, checksum);
2466		rx_done->cnt++;
2467		rx_done->idx = rx_done->cnt & rx_done->mask;
2468
2469		/* limit potential for livelock */
2470		if (__predict_false(++limit > rx_done->mask / 2))
2471			break;
2472	}
2473	while (!SLIST_EMPTY(&ss->lro_active)) {
2474		lro = SLIST_FIRST(&ss->lro_active);
2475		SLIST_REMOVE_HEAD(&ss->lro_active, next);
2476		mxge_lro_flush(ss, lro);
2477	}
2478}
2479
2480
2481static inline void
2482mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2483{
2484	struct ifnet *ifp;
2485	mxge_tx_ring_t *tx;
2486	struct mbuf *m;
2487	bus_dmamap_t map;
2488	int idx;
2489
2490	tx = &ss->tx;
2491	ifp = ss->sc->ifp;
2492	while (tx->pkt_done != mcp_idx) {
2493		idx = tx->done & tx->mask;
2494		tx->done++;
2495		m = tx->info[idx].m;
2496		/* mbuf and DMA map only attached to the first
2497		   segment per-mbuf */
2498		if (m != NULL) {
2499			ifp->if_opackets++;
2500			tx->info[idx].m = NULL;
2501			map = tx->info[idx].map;
2502			bus_dmamap_unload(tx->dmat, map);
2503			m_freem(m);
2504		}
2505		if (tx->info[idx].flag) {
2506			tx->info[idx].flag = 0;
2507			tx->pkt_done++;
2508		}
2509	}
2510
2511	/* If we have space, clear IFF_OACTIVE to tell the stack that
2512           its OK to send packets */
2513
2514	if (ifp->if_drv_flags & IFF_DRV_OACTIVE &&
2515	    tx->req - tx->done < (tx->mask + 1)/4) {
2516		mtx_lock(&ss->tx.mtx);
2517		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2518		ss->tx.wake++;
2519		mxge_start_locked(ss);
2520		mtx_unlock(&ss->tx.mtx);
2521	}
2522}
2523
2524static struct mxge_media_type mxge_xfp_media_types[] =
2525{
2526	{IFM_10G_CX4,	0x7f, 		"10GBASE-CX4 (module)"},
2527	{IFM_10G_SR, 	(1 << 7),	"10GBASE-SR"},
2528	{IFM_10G_LR, 	(1 << 6),	"10GBASE-LR"},
2529	{0,		(1 << 5),	"10GBASE-ER"},
2530	{IFM_10G_LRM,	(1 << 4),	"10GBASE-LRM"},
2531	{0,		(1 << 3),	"10GBASE-SW"},
2532	{0,		(1 << 2),	"10GBASE-LW"},
2533	{0,		(1 << 1),	"10GBASE-EW"},
2534	{0,		(1 << 0),	"Reserved"}
2535};
2536static struct mxge_media_type mxge_sfp_media_types[] =
2537{
2538	{0,		(1 << 7),	"Reserved"},
2539	{IFM_10G_LRM,	(1 << 6),	"10GBASE-LRM"},
2540	{IFM_10G_LR, 	(1 << 5),	"10GBASE-LR"},
2541	{IFM_10G_SR,	(1 << 4),	"10GBASE-SR"}
2542};
2543
2544static void
2545mxge_set_media(mxge_softc_t *sc, int type)
2546{
2547	sc->media_flags |= type;
2548	ifmedia_add(&sc->media, sc->media_flags, 0, NULL);
2549	ifmedia_set(&sc->media, sc->media_flags);
2550}
2551
2552
2553/*
2554 * Determine the media type for a NIC.  Some XFPs will identify
2555 * themselves only when their link is up, so this is initiated via a
2556 * link up interrupt.  However, this can potentially take up to
2557 * several milliseconds, so it is run via the watchdog routine, rather
2558 * than in the interrupt handler itself.   This need only be done
2559 * once, not each time the link is up.
2560 */
2561static void
2562mxge_media_probe(mxge_softc_t *sc)
2563{
2564	mxge_cmd_t cmd;
2565	char *cage_type;
2566	char *ptr;
2567	struct mxge_media_type *mxge_media_types = NULL;
2568	int i, err, ms, mxge_media_type_entries;
2569	uint32_t byte;
2570
2571	sc->need_media_probe = 0;
2572
2573	/* if we've already set a media type, we're done */
2574	if (sc->media_flags  != (IFM_ETHER | IFM_AUTO))
2575		return;
2576
2577	/*
2578	 * parse the product code to deterimine the interface type
2579	 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2580	 * after the 3rd dash in the driver's cached copy of the
2581	 * EEPROM's product code string.
2582	 */
2583	ptr = sc->product_code_string;
2584	if (ptr == NULL) {
2585		device_printf(sc->dev, "Missing product code\n");
2586	}
2587
2588	for (i = 0; i < 3; i++, ptr++) {
2589		ptr = index(ptr, '-');
2590		if (ptr == NULL) {
2591			device_printf(sc->dev,
2592				      "only %d dashes in PC?!?\n", i);
2593			return;
2594		}
2595	}
2596	if (*ptr == 'C') {
2597		/* -C is CX4 */
2598		mxge_set_media(sc, IFM_10G_CX4);
2599		return;
2600	}
2601	else if (*ptr == 'Q') {
2602		/* -Q is Quad Ribbon Fiber */
2603		device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2604		/* FreeBSD has no media type for Quad ribbon fiber */
2605		return;
2606	}
2607
2608	if (*ptr == 'R') {
2609		/* -R is XFP */
2610		mxge_media_types = mxge_xfp_media_types;
2611		mxge_media_type_entries =
2612			sizeof (mxge_xfp_media_types) /
2613			sizeof (mxge_xfp_media_types[0]);
2614		byte = MXGE_XFP_COMPLIANCE_BYTE;
2615		cage_type = "XFP";
2616	}
2617
2618	if (*ptr == 'S' || *(ptr +1) == 'S') {
2619		/* -S or -2S is SFP+ */
2620		mxge_media_types = mxge_sfp_media_types;
2621		mxge_media_type_entries =
2622			sizeof (mxge_sfp_media_types) /
2623			sizeof (mxge_sfp_media_types[0]);
2624		cage_type = "SFP+";
2625		byte = 3;
2626	}
2627
2628	if (mxge_media_types == NULL) {
2629		device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2630		return;
2631	}
2632
2633	/*
2634	 * At this point we know the NIC has an XFP cage, so now we
2635	 * try to determine what is in the cage by using the
2636	 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2637	 * register.  We read just one byte, which may take over
2638	 * a millisecond
2639	 */
2640
2641	cmd.data0 = 0;	 /* just fetch 1 byte, not all 256 */
2642	cmd.data1 = byte;
2643	err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
2644	if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
2645		device_printf(sc->dev, "failed to read XFP\n");
2646	}
2647	if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
2648		device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
2649	}
2650	if (err != MXGEFW_CMD_OK) {
2651		return;
2652	}
2653
2654	/* now we wait for the data to be cached */
2655	cmd.data0 = byte;
2656	err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2657	for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
2658		DELAY(1000);
2659		cmd.data0 = byte;
2660		err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2661	}
2662	if (err != MXGEFW_CMD_OK) {
2663		device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
2664			      cage_type, err, ms);
2665		return;
2666	}
2667
2668	if (cmd.data0 == mxge_media_types[0].bitmask) {
2669		if (mxge_verbose)
2670			device_printf(sc->dev, "%s:%s\n", cage_type,
2671				      mxge_media_types[0].name);
2672		mxge_set_media(sc, IFM_10G_CX4);
2673		return;
2674	}
2675	for (i = 1; i < mxge_media_type_entries; i++) {
2676		if (cmd.data0 & mxge_media_types[i].bitmask) {
2677			if (mxge_verbose)
2678				device_printf(sc->dev, "%s:%s\n",
2679					      cage_type,
2680					      mxge_media_types[i].name);
2681
2682			mxge_set_media(sc, mxge_media_types[i].flag);
2683			return;
2684		}
2685	}
2686	device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type,
2687		      cmd.data0);
2688
2689	return;
2690}
2691
2692static void
2693mxge_intr(void *arg)
2694{
2695	struct mxge_slice_state *ss = arg;
2696	mxge_softc_t *sc = ss->sc;
2697	mcp_irq_data_t *stats = ss->fw_stats;
2698	mxge_tx_ring_t *tx = &ss->tx;
2699	mxge_rx_done_t *rx_done = &ss->rx_done;
2700	uint32_t send_done_count;
2701	uint8_t valid;
2702
2703
2704	/* an interrupt on a non-zero slice is implicitly valid
2705	   since MSI-X irqs are not shared */
2706	if (ss != sc->ss) {
2707		mxge_clean_rx_done(ss);
2708		*ss->irq_claim = be32toh(3);
2709		return;
2710	}
2711
2712	/* make sure the DMA has finished */
2713	if (!stats->valid) {
2714		return;
2715	}
2716	valid = stats->valid;
2717
2718	if (sc->legacy_irq) {
2719		/* lower legacy IRQ  */
2720		*sc->irq_deassert = 0;
2721		if (!mxge_deassert_wait)
2722			/* don't wait for conf. that irq is low */
2723			stats->valid = 0;
2724	} else {
2725		stats->valid = 0;
2726	}
2727
2728	/* loop while waiting for legacy irq deassertion */
2729	do {
2730		/* check for transmit completes and receives */
2731		send_done_count = be32toh(stats->send_done_count);
2732		while ((send_done_count != tx->pkt_done) ||
2733		       (rx_done->entry[rx_done->idx].length != 0)) {
2734			mxge_tx_done(ss, (int)send_done_count);
2735			mxge_clean_rx_done(ss);
2736			send_done_count = be32toh(stats->send_done_count);
2737		}
2738		if (sc->legacy_irq && mxge_deassert_wait)
2739			wmb();
2740	} while (*((volatile uint8_t *) &stats->valid));
2741
2742	if (__predict_false(stats->stats_updated)) {
2743		if (sc->link_state != stats->link_up) {
2744			sc->link_state = stats->link_up;
2745			if (sc->link_state) {
2746				if_link_state_change(sc->ifp, LINK_STATE_UP);
2747				if (mxge_verbose)
2748					device_printf(sc->dev, "link up\n");
2749			} else {
2750				if_link_state_change(sc->ifp, LINK_STATE_DOWN);
2751				if (mxge_verbose)
2752					device_printf(sc->dev, "link down\n");
2753			}
2754			sc->need_media_probe = 1;
2755		}
2756		if (sc->rdma_tags_available !=
2757		    be32toh(stats->rdma_tags_available)) {
2758			sc->rdma_tags_available =
2759				be32toh(stats->rdma_tags_available);
2760			device_printf(sc->dev, "RDMA timed out! %d tags "
2761				      "left\n", sc->rdma_tags_available);
2762		}
2763
2764		if (stats->link_down) {
2765			sc->down_cnt += stats->link_down;
2766			sc->link_state = 0;
2767			if_link_state_change(sc->ifp, LINK_STATE_DOWN);
2768		}
2769	}
2770
2771	/* check to see if we have rx token to pass back */
2772	if (valid & 0x1)
2773	    *ss->irq_claim = be32toh(3);
2774	*(ss->irq_claim + 1) = be32toh(3);
2775}
2776
2777static void
2778mxge_init(void *arg)
2779{
2780}
2781
2782
2783
2784static void
2785mxge_free_slice_mbufs(struct mxge_slice_state *ss)
2786{
2787	struct lro_entry *lro_entry;
2788	int i;
2789
2790	while (!SLIST_EMPTY(&ss->lro_free)) {
2791		lro_entry = SLIST_FIRST(&ss->lro_free);
2792		SLIST_REMOVE_HEAD(&ss->lro_free, next);
2793		free(lro_entry, M_DEVBUF);
2794	}
2795
2796	for (i = 0; i <= ss->rx_big.mask; i++) {
2797		if (ss->rx_big.info[i].m == NULL)
2798			continue;
2799		bus_dmamap_unload(ss->rx_big.dmat,
2800				  ss->rx_big.info[i].map);
2801		m_freem(ss->rx_big.info[i].m);
2802		ss->rx_big.info[i].m = NULL;
2803	}
2804
2805	for (i = 0; i <= ss->rx_small.mask; i++) {
2806		if (ss->rx_small.info[i].m == NULL)
2807			continue;
2808		bus_dmamap_unload(ss->rx_small.dmat,
2809				  ss->rx_small.info[i].map);
2810		m_freem(ss->rx_small.info[i].m);
2811		ss->rx_small.info[i].m = NULL;
2812	}
2813
2814	/* transmit ring used only on the first slice */
2815	if (ss->tx.info == NULL)
2816		return;
2817
2818	for (i = 0; i <= ss->tx.mask; i++) {
2819		ss->tx.info[i].flag = 0;
2820		if (ss->tx.info[i].m == NULL)
2821			continue;
2822		bus_dmamap_unload(ss->tx.dmat,
2823				  ss->tx.info[i].map);
2824		m_freem(ss->tx.info[i].m);
2825		ss->tx.info[i].m = NULL;
2826	}
2827}
2828
2829static void
2830mxge_free_mbufs(mxge_softc_t *sc)
2831{
2832	int slice;
2833
2834	for (slice = 0; slice < sc->num_slices; slice++)
2835		mxge_free_slice_mbufs(&sc->ss[slice]);
2836}
2837
2838static void
2839mxge_free_slice_rings(struct mxge_slice_state *ss)
2840{
2841	int i;
2842
2843
2844	if (ss->rx_done.entry != NULL)
2845		mxge_dma_free(&ss->rx_done.dma);
2846	ss->rx_done.entry = NULL;
2847
2848	if (ss->tx.req_bytes != NULL)
2849		free(ss->tx.req_bytes, M_DEVBUF);
2850	ss->tx.req_bytes = NULL;
2851
2852	if (ss->tx.seg_list != NULL)
2853		free(ss->tx.seg_list, M_DEVBUF);
2854	ss->tx.seg_list = NULL;
2855
2856	if (ss->rx_small.shadow != NULL)
2857		free(ss->rx_small.shadow, M_DEVBUF);
2858	ss->rx_small.shadow = NULL;
2859
2860	if (ss->rx_big.shadow != NULL)
2861		free(ss->rx_big.shadow, M_DEVBUF);
2862	ss->rx_big.shadow = NULL;
2863
2864	if (ss->tx.info != NULL) {
2865		if (ss->tx.dmat != NULL) {
2866			for (i = 0; i <= ss->tx.mask; i++) {
2867				bus_dmamap_destroy(ss->tx.dmat,
2868						   ss->tx.info[i].map);
2869			}
2870			bus_dma_tag_destroy(ss->tx.dmat);
2871		}
2872		free(ss->tx.info, M_DEVBUF);
2873	}
2874	ss->tx.info = NULL;
2875
2876	if (ss->rx_small.info != NULL) {
2877		if (ss->rx_small.dmat != NULL) {
2878			for (i = 0; i <= ss->rx_small.mask; i++) {
2879				bus_dmamap_destroy(ss->rx_small.dmat,
2880						   ss->rx_small.info[i].map);
2881			}
2882			bus_dmamap_destroy(ss->rx_small.dmat,
2883					   ss->rx_small.extra_map);
2884			bus_dma_tag_destroy(ss->rx_small.dmat);
2885		}
2886		free(ss->rx_small.info, M_DEVBUF);
2887	}
2888	ss->rx_small.info = NULL;
2889
2890	if (ss->rx_big.info != NULL) {
2891		if (ss->rx_big.dmat != NULL) {
2892			for (i = 0; i <= ss->rx_big.mask; i++) {
2893				bus_dmamap_destroy(ss->rx_big.dmat,
2894						   ss->rx_big.info[i].map);
2895			}
2896			bus_dmamap_destroy(ss->rx_big.dmat,
2897					   ss->rx_big.extra_map);
2898			bus_dma_tag_destroy(ss->rx_big.dmat);
2899		}
2900		free(ss->rx_big.info, M_DEVBUF);
2901	}
2902	ss->rx_big.info = NULL;
2903}
2904
2905static void
2906mxge_free_rings(mxge_softc_t *sc)
2907{
2908	int slice;
2909
2910	for (slice = 0; slice < sc->num_slices; slice++)
2911		mxge_free_slice_rings(&sc->ss[slice]);
2912}
2913
2914static int
2915mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
2916		       int tx_ring_entries)
2917{
2918	mxge_softc_t *sc = ss->sc;
2919	size_t bytes;
2920	int err, i;
2921
2922	err = ENOMEM;
2923
2924	/* allocate per-slice receive resources */
2925
2926	ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
2927	ss->rx_done.mask = (2 * rx_ring_entries) - 1;
2928
2929	/* allocate the rx shadow rings */
2930	bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
2931	ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
2932	if (ss->rx_small.shadow == NULL)
2933		return err;;
2934
2935	bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
2936	ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
2937	if (ss->rx_big.shadow == NULL)
2938		return err;;
2939
2940	/* allocate the rx host info rings */
2941	bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
2942	ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
2943	if (ss->rx_small.info == NULL)
2944		return err;;
2945
2946	bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
2947	ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
2948	if (ss->rx_big.info == NULL)
2949		return err;;
2950
2951	/* allocate the rx busdma resources */
2952	err = bus_dma_tag_create(sc->parent_dmat,	/* parent */
2953				 1,			/* alignment */
2954				 4096,			/* boundary */
2955				 BUS_SPACE_MAXADDR,	/* low */
2956				 BUS_SPACE_MAXADDR,	/* high */
2957				 NULL, NULL,		/* filter */
2958				 MHLEN,			/* maxsize */
2959				 1,			/* num segs */
2960				 MHLEN,			/* maxsegsize */
2961				 BUS_DMA_ALLOCNOW,	/* flags */
2962				 NULL, NULL,		/* lock */
2963				 &ss->rx_small.dmat);	/* tag */
2964	if (err != 0) {
2965		device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
2966			      err);
2967		return err;;
2968	}
2969
2970	err = bus_dma_tag_create(sc->parent_dmat,	/* parent */
2971				 1,			/* alignment */
2972#if MXGE_VIRT_JUMBOS
2973				 4096,			/* boundary */
2974#else
2975				 0,			/* boundary */
2976#endif
2977				 BUS_SPACE_MAXADDR,	/* low */
2978				 BUS_SPACE_MAXADDR,	/* high */
2979				 NULL, NULL,		/* filter */
2980				 3*4096,		/* maxsize */
2981#if MXGE_VIRT_JUMBOS
2982				 3,			/* num segs */
2983				 4096,			/* maxsegsize*/
2984#else
2985				 1,			/* num segs */
2986				 MJUM9BYTES,		/* maxsegsize*/
2987#endif
2988				 BUS_DMA_ALLOCNOW,	/* flags */
2989				 NULL, NULL,		/* lock */
2990				 &ss->rx_big.dmat);	/* tag */
2991	if (err != 0) {
2992		device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
2993			      err);
2994		return err;;
2995	}
2996	for (i = 0; i <= ss->rx_small.mask; i++) {
2997		err = bus_dmamap_create(ss->rx_small.dmat, 0,
2998					&ss->rx_small.info[i].map);
2999		if (err != 0) {
3000			device_printf(sc->dev, "Err %d  rx_small dmamap\n",
3001				      err);
3002			return err;;
3003		}
3004	}
3005	err = bus_dmamap_create(ss->rx_small.dmat, 0,
3006				&ss->rx_small.extra_map);
3007	if (err != 0) {
3008		device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3009			      err);
3010		return err;;
3011	}
3012
3013	for (i = 0; i <= ss->rx_big.mask; i++) {
3014		err = bus_dmamap_create(ss->rx_big.dmat, 0,
3015					&ss->rx_big.info[i].map);
3016		if (err != 0) {
3017			device_printf(sc->dev, "Err %d  rx_big dmamap\n",
3018				      err);
3019			return err;;
3020		}
3021	}
3022	err = bus_dmamap_create(ss->rx_big.dmat, 0,
3023				&ss->rx_big.extra_map);
3024	if (err != 0) {
3025		device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3026			      err);
3027		return err;;
3028	}
3029
3030	/* now allocate TX resouces */
3031
3032	/* only use a single TX ring for now */
3033	if (ss != ss->sc->ss)
3034		return 0;
3035
3036	ss->tx.mask = tx_ring_entries - 1;
3037	ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3038
3039
3040	/* allocate the tx request copy block */
3041	bytes = 8 +
3042		sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3043	ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
3044	if (ss->tx.req_bytes == NULL)
3045		return err;;
3046	/* ensure req_list entries are aligned to 8 bytes */
3047	ss->tx.req_list = (mcp_kreq_ether_send_t *)
3048		((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3049
3050	/* allocate the tx busdma segment list */
3051	bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3052	ss->tx.seg_list = (bus_dma_segment_t *)
3053		malloc(bytes, M_DEVBUF, M_WAITOK);
3054	if (ss->tx.seg_list == NULL)
3055		return err;;
3056
3057	/* allocate the tx host info ring */
3058	bytes = tx_ring_entries * sizeof (*ss->tx.info);
3059	ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3060	if (ss->tx.info == NULL)
3061		return err;;
3062
3063	/* allocate the tx busdma resources */
3064	err = bus_dma_tag_create(sc->parent_dmat,	/* parent */
3065				 1,			/* alignment */
3066				 sc->tx_boundary,	/* boundary */
3067				 BUS_SPACE_MAXADDR,	/* low */
3068				 BUS_SPACE_MAXADDR,	/* high */
3069				 NULL, NULL,		/* filter */
3070				 65536 + 256,		/* maxsize */
3071				 ss->tx.max_desc - 2,	/* num segs */
3072				 sc->tx_boundary,	/* maxsegsz */
3073				 BUS_DMA_ALLOCNOW,	/* flags */
3074				 NULL, NULL,		/* lock */
3075				 &ss->tx.dmat);		/* tag */
3076
3077	if (err != 0) {
3078		device_printf(sc->dev, "Err %d allocating tx dmat\n",
3079			      err);
3080		return err;;
3081	}
3082
3083	/* now use these tags to setup dmamaps for each slot
3084	   in the ring */
3085	for (i = 0; i <= ss->tx.mask; i++) {
3086		err = bus_dmamap_create(ss->tx.dmat, 0,
3087					&ss->tx.info[i].map);
3088		if (err != 0) {
3089			device_printf(sc->dev, "Err %d  tx dmamap\n",
3090				      err);
3091			return err;;
3092		}
3093	}
3094	return 0;
3095
3096}
3097
3098static int
3099mxge_alloc_rings(mxge_softc_t *sc)
3100{
3101	mxge_cmd_t cmd;
3102	int tx_ring_size;
3103	int tx_ring_entries, rx_ring_entries;
3104	int err, slice;
3105
3106	/* get ring sizes */
3107	err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3108	tx_ring_size = cmd.data0;
3109	if (err != 0) {
3110		device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3111		goto abort;
3112	}
3113
3114	tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3115	rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3116	IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
3117	sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
3118	IFQ_SET_READY(&sc->ifp->if_snd);
3119
3120	for (slice = 0; slice < sc->num_slices; slice++) {
3121		err = mxge_alloc_slice_rings(&sc->ss[slice],
3122					     rx_ring_entries,
3123					     tx_ring_entries);
3124		if (err != 0)
3125			goto abort;
3126	}
3127	return 0;
3128
3129abort:
3130	mxge_free_rings(sc);
3131	return err;
3132
3133}
3134
3135
3136static void
3137mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3138{
3139	int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3140
3141	if (bufsize < MCLBYTES) {
3142		/* easy, everything fits in a single buffer */
3143		*big_buf_size = MCLBYTES;
3144		*cl_size = MCLBYTES;
3145		*nbufs = 1;
3146		return;
3147	}
3148
3149	if (bufsize < MJUMPAGESIZE) {
3150		/* still easy, everything still fits in a single buffer */
3151		*big_buf_size = MJUMPAGESIZE;
3152		*cl_size = MJUMPAGESIZE;
3153		*nbufs = 1;
3154		return;
3155	}
3156#if MXGE_VIRT_JUMBOS
3157	/* now we need to use virtually contiguous buffers */
3158	*cl_size = MJUM9BYTES;
3159	*big_buf_size = 4096;
3160	*nbufs = mtu / 4096 + 1;
3161	/* needs to be a power of two, so round up */
3162	if (*nbufs == 3)
3163		*nbufs = 4;
3164#else
3165	*cl_size = MJUM9BYTES;
3166	*big_buf_size = MJUM9BYTES;
3167	*nbufs = 1;
3168#endif
3169}
3170
3171static int
3172mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3173{
3174	mxge_softc_t *sc;
3175	mxge_cmd_t cmd;
3176	bus_dmamap_t map;
3177	struct lro_entry *lro_entry;
3178	int err, i, slice;
3179
3180
3181	sc = ss->sc;
3182	slice = ss - sc->ss;
3183
3184	SLIST_INIT(&ss->lro_free);
3185	SLIST_INIT(&ss->lro_active);
3186
3187	for (i = 0; i < sc->lro_cnt; i++) {
3188		lro_entry = (struct lro_entry *)
3189			malloc(sizeof (*lro_entry), M_DEVBUF,
3190			       M_NOWAIT | M_ZERO);
3191		if (lro_entry == NULL) {
3192			sc->lro_cnt = i;
3193			break;
3194		}
3195		SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next);
3196	}
3197	/* get the lanai pointers to the send and receive rings */
3198
3199	err = 0;
3200	/* We currently only send from the first slice */
3201	if (slice == 0) {
3202		cmd.data0 = slice;
3203		err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3204		ss->tx.lanai =
3205			(volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3206	}
3207	cmd.data0 = slice;
3208	err |= mxge_send_cmd(sc,
3209			     MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3210	ss->rx_small.lanai =
3211		(volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3212	cmd.data0 = slice;
3213	err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3214	ss->rx_big.lanai =
3215		(volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3216
3217	if (err != 0) {
3218		device_printf(sc->dev,
3219			      "failed to get ring sizes or locations\n");
3220		return EIO;
3221	}
3222
3223	/* stock receive rings */
3224	for (i = 0; i <= ss->rx_small.mask; i++) {
3225		map = ss->rx_small.info[i].map;
3226		err = mxge_get_buf_small(ss, map, i);
3227		if (err) {
3228			device_printf(sc->dev, "alloced %d/%d smalls\n",
3229				      i, ss->rx_small.mask + 1);
3230			return ENOMEM;
3231		}
3232	}
3233	for (i = 0; i <= ss->rx_big.mask; i++) {
3234		ss->rx_big.shadow[i].addr_low = 0xffffffff;
3235		ss->rx_big.shadow[i].addr_high = 0xffffffff;
3236	}
3237	ss->rx_big.nbufs = nbufs;
3238	ss->rx_big.cl_size = cl_size;
3239	for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3240		map = ss->rx_big.info[i].map;
3241		err = mxge_get_buf_big(ss, map, i);
3242		if (err) {
3243			device_printf(sc->dev, "alloced %d/%d bigs\n",
3244				      i, ss->rx_big.mask + 1);
3245			return ENOMEM;
3246		}
3247	}
3248	return 0;
3249}
3250
3251static int
3252mxge_open(mxge_softc_t *sc)
3253{
3254	mxge_cmd_t cmd;
3255	int err, big_bytes, nbufs, slice, cl_size, i;
3256	bus_addr_t bus;
3257	volatile uint8_t *itable;
3258
3259	/* Copy the MAC address in case it was overridden */
3260	bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3261
3262	err = mxge_reset(sc, 1);
3263	if (err != 0) {
3264		device_printf(sc->dev, "failed to reset\n");
3265		return EIO;
3266	}
3267
3268	if (sc->num_slices > 1) {
3269		/* setup the indirection table */
3270		cmd.data0 = sc->num_slices;
3271		err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3272				    &cmd);
3273
3274		err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3275				     &cmd);
3276		if (err != 0) {
3277			device_printf(sc->dev,
3278				      "failed to setup rss tables\n");
3279			return err;
3280		}
3281
3282		/* just enable an identity mapping */
3283		itable = sc->sram + cmd.data0;
3284		for (i = 0; i < sc->num_slices; i++)
3285			itable[i] = (uint8_t)i;
3286
3287		cmd.data0 = 1;
3288		cmd.data1 = mxge_rss_hash_type;
3289		err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3290		if (err != 0) {
3291			device_printf(sc->dev, "failed to enable slices\n");
3292			return err;
3293		}
3294	}
3295
3296
3297	mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3298
3299	cmd.data0 = nbufs;
3300	err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3301			    &cmd);
3302	/* error is only meaningful if we're trying to set
3303	   MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3304	if (err && nbufs > 1) {
3305		device_printf(sc->dev,
3306			      "Failed to set alway-use-n to %d\n",
3307			      nbufs);
3308		return EIO;
3309	}
3310	/* Give the firmware the mtu and the big and small buffer
3311	   sizes.  The firmware wants the big buf size to be a power
3312	   of two. Luckily, FreeBSD's clusters are powers of two */
3313	cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3314	err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3315	cmd.data0 = MHLEN - MXGEFW_PAD;
3316	err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3317			     &cmd);
3318	cmd.data0 = big_bytes;
3319	err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3320
3321	if (err != 0) {
3322		device_printf(sc->dev, "failed to setup params\n");
3323		goto abort;
3324	}
3325
3326	/* Now give him the pointer to the stats block */
3327	cmd.data0 = MXGE_LOWPART_TO_U32(sc->ss->fw_stats_dma.bus_addr);
3328	cmd.data1 = MXGE_HIGHPART_TO_U32(sc->ss->fw_stats_dma.bus_addr);
3329	cmd.data2 = sizeof(struct mcp_irq_data);
3330	err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3331
3332	if (err != 0) {
3333		bus = sc->ss->fw_stats_dma.bus_addr;
3334		bus += offsetof(struct mcp_irq_data, send_done_count);
3335		cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3336		cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3337		err = mxge_send_cmd(sc,
3338				    MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3339				    &cmd);
3340		/* Firmware cannot support multicast without STATS_DMA_V2 */
3341		sc->fw_multicast_support = 0;
3342	} else {
3343		sc->fw_multicast_support = 1;
3344	}
3345
3346	if (err != 0) {
3347		device_printf(sc->dev, "failed to setup params\n");
3348		goto abort;
3349	}
3350
3351	for (slice = 0; slice < sc->num_slices; slice++) {
3352		err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3353		if (err != 0) {
3354			device_printf(sc->dev, "couldn't open slice %d\n",
3355				      slice);
3356			goto abort;
3357		}
3358	}
3359
3360	/* Finally, start the firmware running */
3361	err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3362	if (err) {
3363		device_printf(sc->dev, "Couldn't bring up link\n");
3364		goto abort;
3365	}
3366	sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
3367	sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3368	callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3369
3370	return 0;
3371
3372
3373abort:
3374	mxge_free_mbufs(sc);
3375
3376	return err;
3377}
3378
3379static int
3380mxge_close(mxge_softc_t *sc)
3381{
3382	mxge_cmd_t cmd;
3383	int err, old_down_cnt;
3384
3385	callout_stop(&sc->co_hdl);
3386	sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3387	old_down_cnt = sc->down_cnt;
3388	wmb();
3389	err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3390	if (err) {
3391		device_printf(sc->dev, "Couldn't bring down link\n");
3392	}
3393	if (old_down_cnt == sc->down_cnt) {
3394		/* wait for down irq */
3395		DELAY(10 * sc->intr_coal_delay);
3396	}
3397	wmb();
3398	if (old_down_cnt == sc->down_cnt) {
3399		device_printf(sc->dev, "never got down irq\n");
3400	}
3401
3402	mxge_free_mbufs(sc);
3403
3404	return 0;
3405}
3406
3407static void
3408mxge_setup_cfg_space(mxge_softc_t *sc)
3409{
3410	device_t dev = sc->dev;
3411	int reg;
3412	uint16_t cmd, lnk, pectl;
3413
3414	/* find the PCIe link width and set max read request to 4KB*/
3415	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
3416		lnk = pci_read_config(dev, reg + 0x12, 2);
3417		sc->link_width = (lnk >> 4) & 0x3f;
3418
3419		pectl = pci_read_config(dev, reg + 0x8, 2);
3420		pectl = (pectl & ~0x7000) | (5 << 12);
3421		pci_write_config(dev, reg + 0x8, pectl, 2);
3422	}
3423
3424	/* Enable DMA and Memory space access */
3425	pci_enable_busmaster(dev);
3426	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3427	cmd |= PCIM_CMD_MEMEN;
3428	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3429}
3430
3431static uint32_t
3432mxge_read_reboot(mxge_softc_t *sc)
3433{
3434	device_t dev = sc->dev;
3435	uint32_t vs;
3436
3437	/* find the vendor specific offset */
3438	if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) {
3439		device_printf(sc->dev,
3440			      "could not find vendor specific offset\n");
3441		return (uint32_t)-1;
3442	}
3443	/* enable read32 mode */
3444	pci_write_config(dev, vs + 0x10, 0x3, 1);
3445	/* tell NIC which register to read */
3446	pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3447	return (pci_read_config(dev, vs + 0x14, 4));
3448}
3449
3450static int
3451mxge_watchdog_reset(mxge_softc_t *sc)
3452{
3453	struct pci_devinfo *dinfo;
3454	int err;
3455	uint32_t reboot;
3456	uint16_t cmd;
3457
3458	err = ENXIO;
3459
3460	device_printf(sc->dev, "Watchdog reset!\n");
3461
3462	/*
3463	 * check to see if the NIC rebooted.  If it did, then all of
3464	 * PCI config space has been reset, and things like the
3465	 * busmaster bit will be zero.  If this is the case, then we
3466	 * must restore PCI config space before the NIC can be used
3467	 * again
3468	 */
3469	cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3470	if (cmd == 0xffff) {
3471		/*
3472		 * maybe the watchdog caught the NIC rebooting; wait
3473		 * up to 100ms for it to finish.  If it does not come
3474		 * back, then give up
3475		 */
3476		DELAY(1000*100);
3477		cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3478		if (cmd == 0xffff) {
3479			device_printf(sc->dev, "NIC disappeared!\n");
3480			return (err);
3481		}
3482	}
3483	if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3484		/* print the reboot status */
3485		reboot = mxge_read_reboot(sc);
3486		device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3487			      reboot);
3488		/* restore PCI configuration space */
3489		dinfo = device_get_ivars(sc->dev);
3490		pci_cfg_restore(sc->dev, dinfo);
3491
3492		/* and redo any changes we made to our config space */
3493		mxge_setup_cfg_space(sc);
3494
3495		if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) {
3496			mxge_close(sc);
3497			err = mxge_open(sc);
3498		}
3499	} else {
3500		device_printf(sc->dev, "NIC did not reboot, ring state:\n");
3501		device_printf(sc->dev, "tx.req=%d tx.done=%d\n",
3502			      sc->ss->tx.req, sc->ss->tx.done);
3503		device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3504			      sc->ss->tx.pkt_done,
3505			      be32toh(sc->ss->fw_stats->send_done_count));
3506		device_printf(sc->dev, "not resetting\n");
3507	}
3508	return (err);
3509}
3510
3511static int
3512mxge_watchdog(mxge_softc_t *sc)
3513{
3514	mxge_tx_ring_t *tx = &sc->ss->tx;
3515	uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
3516	int err = 0;
3517
3518	/* see if we have outstanding transmits, which
3519	   have been pending for more than mxge_ticks */
3520	if (tx->req != tx->done &&
3521	    tx->watchdog_req != tx->watchdog_done &&
3522	    tx->done == tx->watchdog_done) {
3523		/* check for pause blocking before resetting */
3524		if (tx->watchdog_rx_pause == rx_pause)
3525			err = mxge_watchdog_reset(sc);
3526		else
3527			device_printf(sc->dev, "Flow control blocking "
3528				      "xmits, check link partner\n");
3529	}
3530
3531	tx->watchdog_req = tx->req;
3532	tx->watchdog_done = tx->done;
3533	tx->watchdog_rx_pause = rx_pause;
3534
3535	if (sc->need_media_probe)
3536		mxge_media_probe(sc);
3537	return (err);
3538}
3539
3540static void
3541mxge_update_stats(mxge_softc_t *sc)
3542{
3543	struct mxge_slice_state *ss;
3544	u_long ipackets = 0;
3545	int slice;
3546
3547	for(slice = 0; slice < sc->num_slices; slice++) {
3548		ss = &sc->ss[slice];
3549		ipackets += ss->ipackets;
3550	}
3551	sc->ifp->if_ipackets = ipackets;
3552
3553}
3554static void
3555mxge_tick(void *arg)
3556{
3557	mxge_softc_t *sc = arg;
3558	int err = 0;
3559
3560	/* aggregate stats from different slices */
3561	mxge_update_stats(sc);
3562	if (!sc->watchdog_countdown) {
3563		err = mxge_watchdog(sc);
3564		sc->watchdog_countdown = 4;
3565	}
3566	sc->watchdog_countdown--;
3567	if (err == 0)
3568		callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3569
3570}
3571
3572static int
3573mxge_media_change(struct ifnet *ifp)
3574{
3575	return EINVAL;
3576}
3577
3578static int
3579mxge_change_mtu(mxge_softc_t *sc, int mtu)
3580{
3581	struct ifnet *ifp = sc->ifp;
3582	int real_mtu, old_mtu;
3583	int err = 0;
3584
3585
3586	real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3587	if ((real_mtu > sc->max_mtu) || real_mtu < 60)
3588		return EINVAL;
3589	mtx_lock(&sc->driver_mtx);
3590	old_mtu = ifp->if_mtu;
3591	ifp->if_mtu = mtu;
3592	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3593		mxge_close(sc);
3594		err = mxge_open(sc);
3595		if (err != 0) {
3596			ifp->if_mtu = old_mtu;
3597			mxge_close(sc);
3598			(void) mxge_open(sc);
3599		}
3600	}
3601	mtx_unlock(&sc->driver_mtx);
3602	return err;
3603}
3604
3605static void
3606mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3607{
3608	mxge_softc_t *sc = ifp->if_softc;
3609
3610
3611	if (sc == NULL)
3612		return;
3613	ifmr->ifm_status = IFM_AVALID;
3614	ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
3615	ifmr->ifm_active = IFM_AUTO | IFM_ETHER;
3616	ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0;
3617}
3618
3619static int
3620mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
3621{
3622	mxge_softc_t *sc = ifp->if_softc;
3623	struct ifreq *ifr = (struct ifreq *)data;
3624	int err, mask;
3625
3626	err = 0;
3627	switch (command) {
3628	case SIOCSIFADDR:
3629	case SIOCGIFADDR:
3630		err = ether_ioctl(ifp, command, data);
3631		break;
3632
3633	case SIOCSIFMTU:
3634		err = mxge_change_mtu(sc, ifr->ifr_mtu);
3635		break;
3636
3637	case SIOCSIFFLAGS:
3638		mtx_lock(&sc->driver_mtx);
3639		if (ifp->if_flags & IFF_UP) {
3640			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3641				err = mxge_open(sc);
3642			} else {
3643				/* take care of promis can allmulti
3644				   flag chages */
3645				mxge_change_promisc(sc,
3646						    ifp->if_flags & IFF_PROMISC);
3647				mxge_set_multicast_list(sc);
3648			}
3649		} else {
3650			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3651				mxge_close(sc);
3652			}
3653		}
3654		mtx_unlock(&sc->driver_mtx);
3655		break;
3656
3657	case SIOCADDMULTI:
3658	case SIOCDELMULTI:
3659		mtx_lock(&sc->driver_mtx);
3660		mxge_set_multicast_list(sc);
3661		mtx_unlock(&sc->driver_mtx);
3662		break;
3663
3664	case SIOCSIFCAP:
3665		mtx_lock(&sc->driver_mtx);
3666		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3667		if (mask & IFCAP_TXCSUM) {
3668			if (IFCAP_TXCSUM & ifp->if_capenable) {
3669				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
3670				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
3671						      | CSUM_TSO);
3672			} else {
3673				ifp->if_capenable |= IFCAP_TXCSUM;
3674				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
3675			}
3676		} else if (mask & IFCAP_RXCSUM) {
3677			if (IFCAP_RXCSUM & ifp->if_capenable) {
3678				ifp->if_capenable &= ~IFCAP_RXCSUM;
3679				sc->csum_flag = 0;
3680			} else {
3681				ifp->if_capenable |= IFCAP_RXCSUM;
3682				sc->csum_flag = 1;
3683			}
3684		}
3685		if (mask & IFCAP_TSO4) {
3686			if (IFCAP_TSO4 & ifp->if_capenable) {
3687				ifp->if_capenable &= ~IFCAP_TSO4;
3688				ifp->if_hwassist &= ~CSUM_TSO;
3689			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
3690				ifp->if_capenable |= IFCAP_TSO4;
3691				ifp->if_hwassist |= CSUM_TSO;
3692			} else {
3693				printf("mxge requires tx checksum offload"
3694				       " be enabled to use TSO\n");
3695				err = EINVAL;
3696			}
3697		}
3698		if (mask & IFCAP_LRO) {
3699			if (IFCAP_LRO & ifp->if_capenable)
3700				err = mxge_change_lro_locked(sc, 0);
3701			else
3702				err = mxge_change_lro_locked(sc, mxge_lro_cnt);
3703		}
3704		if (mask & IFCAP_VLAN_HWTAGGING)
3705			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
3706		mtx_unlock(&sc->driver_mtx);
3707		VLAN_CAPABILITIES(ifp);
3708
3709		break;
3710
3711	case SIOCGIFMEDIA:
3712		err = ifmedia_ioctl(ifp, (struct ifreq *)data,
3713				    &sc->media, command);
3714                break;
3715
3716	default:
3717		err = ENOTTY;
3718        }
3719	return err;
3720}
3721
3722static void
3723mxge_fetch_tunables(mxge_softc_t *sc)
3724{
3725
3726	TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
3727	TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
3728			  &mxge_flow_control);
3729	TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
3730			  &mxge_intr_coal_delay);
3731	TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
3732			  &mxge_nvidia_ecrc_enable);
3733	TUNABLE_INT_FETCH("hw.mxge.force_firmware",
3734			  &mxge_force_firmware);
3735	TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
3736			  &mxge_deassert_wait);
3737	TUNABLE_INT_FETCH("hw.mxge.verbose",
3738			  &mxge_verbose);
3739	TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
3740	TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt);
3741	TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
3742	TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
3743	if (sc->lro_cnt != 0)
3744		mxge_lro_cnt = sc->lro_cnt;
3745
3746	if (bootverbose)
3747		mxge_verbose = 1;
3748	if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
3749		mxge_intr_coal_delay = 30;
3750	if (mxge_ticks == 0)
3751		mxge_ticks = hz / 2;
3752	sc->pause = mxge_flow_control;
3753	if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
3754	    || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_SRC_PORT) {
3755		mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT;
3756	}
3757}
3758
3759
3760static void
3761mxge_free_slices(mxge_softc_t *sc)
3762{
3763	struct mxge_slice_state *ss;
3764	int i;
3765
3766
3767	if (sc->ss == NULL)
3768		return;
3769
3770	for (i = 0; i < sc->num_slices; i++) {
3771		ss = &sc->ss[i];
3772		if (ss->fw_stats != NULL) {
3773			mxge_dma_free(&ss->fw_stats_dma);
3774			ss->fw_stats = NULL;
3775			mtx_destroy(&ss->tx.mtx);
3776		}
3777		if (ss->rx_done.entry != NULL) {
3778			mxge_dma_free(&ss->rx_done.dma);
3779			ss->rx_done.entry = NULL;
3780		}
3781	}
3782	free(sc->ss, M_DEVBUF);
3783	sc->ss = NULL;
3784}
3785
3786static int
3787mxge_alloc_slices(mxge_softc_t *sc)
3788{
3789	mxge_cmd_t cmd;
3790	struct mxge_slice_state *ss;
3791	size_t bytes;
3792	int err, i, max_intr_slots;
3793
3794	err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
3795	if (err != 0) {
3796		device_printf(sc->dev, "Cannot determine rx ring size\n");
3797		return err;
3798	}
3799	sc->rx_ring_size = cmd.data0;
3800	max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
3801
3802	bytes = sizeof (*sc->ss) * sc->num_slices;
3803	sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
3804	if (sc->ss == NULL)
3805		return (ENOMEM);
3806	for (i = 0; i < sc->num_slices; i++) {
3807		ss = &sc->ss[i];
3808
3809		ss->sc = sc;
3810
3811		/* allocate per-slice rx interrupt queues */
3812
3813		bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
3814		err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
3815		if (err != 0)
3816			goto abort;
3817		ss->rx_done.entry = ss->rx_done.dma.addr;
3818		bzero(ss->rx_done.entry, bytes);
3819
3820		/*
3821		 * allocate the per-slice firmware stats; stats
3822		 * (including tx) are used used only on the first
3823		 * slice for now
3824		 */
3825		if (i > 0)
3826			continue;
3827
3828		bytes = sizeof (*ss->fw_stats);
3829		err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
3830				     sizeof (*ss->fw_stats), 64);
3831		if (err != 0)
3832			goto abort;
3833		ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
3834		snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name),
3835			 "%s:tx(%d)", device_get_nameunit(sc->dev), i);
3836		mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF);
3837	}
3838
3839	return (0);
3840
3841abort:
3842	mxge_free_slices(sc);
3843	return (ENOMEM);
3844}
3845
3846static void
3847mxge_slice_probe(mxge_softc_t *sc)
3848{
3849	mxge_cmd_t cmd;
3850	char *old_fw;
3851	int msix_cnt, status, max_intr_slots;
3852
3853	sc->num_slices = 1;
3854	/*
3855	 *  don't enable multiple slices if they are not enabled,
3856	 *  or if this is not an SMP system
3857	 */
3858
3859	if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
3860		return;
3861
3862	/* see how many MSI-X interrupts are available */
3863	msix_cnt = pci_msix_count(sc->dev);
3864	if (msix_cnt < 2)
3865		return;
3866
3867	/* now load the slice aware firmware see what it supports */
3868	old_fw = sc->fw_name;
3869	if (old_fw == mxge_fw_aligned)
3870		sc->fw_name = mxge_fw_rss_aligned;
3871	else
3872		sc->fw_name = mxge_fw_rss_unaligned;
3873	status = mxge_load_firmware(sc, 0);
3874	if (status != 0) {
3875		device_printf(sc->dev, "Falling back to a single slice\n");
3876		return;
3877	}
3878
3879	/* try to send a reset command to the card to see if it
3880	   is alive */
3881	memset(&cmd, 0, sizeof (cmd));
3882	status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
3883	if (status != 0) {
3884		device_printf(sc->dev, "failed reset\n");
3885		goto abort_with_fw;
3886	}
3887
3888	/* get rx ring size */
3889	status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
3890	if (status != 0) {
3891		device_printf(sc->dev, "Cannot determine rx ring size\n");
3892		goto abort_with_fw;
3893	}
3894	max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
3895
3896	/* tell it the size of the interrupt queues */
3897	cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
3898	status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
3899	if (status != 0) {
3900		device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
3901		goto abort_with_fw;
3902	}
3903
3904	/* ask the maximum number of slices it supports */
3905	status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
3906	if (status != 0) {
3907		device_printf(sc->dev,
3908			      "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
3909		goto abort_with_fw;
3910	}
3911	sc->num_slices = cmd.data0;
3912	if (sc->num_slices > msix_cnt)
3913		sc->num_slices = msix_cnt;
3914
3915	if (mxge_max_slices == -1) {
3916		/* cap to number of CPUs in system */
3917		if (sc->num_slices > mp_ncpus)
3918			sc->num_slices = mp_ncpus;
3919	} else {
3920		if (sc->num_slices > mxge_max_slices)
3921			sc->num_slices = mxge_max_slices;
3922	}
3923	/* make sure it is a power of two */
3924	while (sc->num_slices & (sc->num_slices - 1))
3925		sc->num_slices--;
3926
3927	if (mxge_verbose)
3928		device_printf(sc->dev, "using %d slices\n",
3929			      sc->num_slices);
3930
3931	return;
3932
3933abort_with_fw:
3934	sc->fw_name = old_fw;
3935	(void) mxge_load_firmware(sc, 0);
3936}
3937
3938static int
3939mxge_add_msix_irqs(mxge_softc_t *sc)
3940{
3941	size_t bytes;
3942	int count, err, i, rid;
3943
3944	rid = PCIR_BAR(2);
3945	sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
3946						    &rid, RF_ACTIVE);
3947
3948	if (sc->msix_table_res == NULL) {
3949		device_printf(sc->dev, "couldn't alloc MSIX table res\n");
3950		return ENXIO;
3951	}
3952
3953	count = sc->num_slices;
3954	err = pci_alloc_msix(sc->dev, &count);
3955	if (err != 0) {
3956		device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
3957			      "err = %d \n", sc->num_slices, err);
3958		goto abort_with_msix_table;
3959	}
3960	if (count < sc->num_slices) {
3961		device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
3962			      count, sc->num_slices);
3963		device_printf(sc->dev,
3964			      "Try setting hw.mxge.max_slices to %d\n",
3965			      count);
3966		err = ENOSPC;
3967		goto abort_with_msix;
3968	}
3969	bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
3970	sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
3971	if (sc->msix_irq_res == NULL) {
3972		err = ENOMEM;
3973		goto abort_with_msix;
3974	}
3975
3976	for (i = 0; i < sc->num_slices; i++) {
3977		rid = i + 1;
3978		sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
3979							  SYS_RES_IRQ,
3980							  &rid, RF_ACTIVE);
3981		if (sc->msix_irq_res[i] == NULL) {
3982			device_printf(sc->dev, "couldn't allocate IRQ res"
3983				      " for message %d\n", i);
3984			err = ENXIO;
3985			goto abort_with_res;
3986		}
3987	}
3988
3989	bytes = sizeof (*sc->msix_ih) * sc->num_slices;
3990	sc->msix_ih =  malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
3991
3992	for (i = 0; i < sc->num_slices; i++) {
3993		err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
3994				     INTR_TYPE_NET | INTR_MPSAFE,
3995#if __FreeBSD_version > 700030
3996				     NULL,
3997#endif
3998				     mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
3999		if (err != 0) {
4000			device_printf(sc->dev, "couldn't setup intr for "
4001				      "message %d\n", i);
4002			goto abort_with_intr;
4003		}
4004	}
4005
4006	if (mxge_verbose) {
4007		device_printf(sc->dev, "using %d msix IRQs:",
4008			      sc->num_slices);
4009		for (i = 0; i < sc->num_slices; i++)
4010			printf(" %ld",  rman_get_start(sc->msix_irq_res[i]));
4011		printf("\n");
4012	}
4013	return (0);
4014
4015abort_with_intr:
4016	for (i = 0; i < sc->num_slices; i++) {
4017		if (sc->msix_ih[i] != NULL) {
4018			bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4019					  sc->msix_ih[i]);
4020			sc->msix_ih[i] = NULL;
4021		}
4022	}
4023	free(sc->msix_ih, M_DEVBUF);
4024
4025
4026abort_with_res:
4027	for (i = 0; i < sc->num_slices; i++) {
4028		rid = i + 1;
4029		if (sc->msix_irq_res[i] != NULL)
4030			bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4031					     sc->msix_irq_res[i]);
4032		sc->msix_irq_res[i] = NULL;
4033	}
4034	free(sc->msix_irq_res, M_DEVBUF);
4035
4036
4037abort_with_msix:
4038	pci_release_msi(sc->dev);
4039
4040abort_with_msix_table:
4041	bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4042			     sc->msix_table_res);
4043
4044	return err;
4045}
4046
4047static int
4048mxge_add_single_irq(mxge_softc_t *sc)
4049{
4050	int count, err, rid;
4051
4052	count = pci_msi_count(sc->dev);
4053	if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4054		rid = 1;
4055	} else {
4056		rid = 0;
4057		sc->legacy_irq = 1;
4058	}
4059	sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0,
4060					 1, RF_SHAREABLE | RF_ACTIVE);
4061	if (sc->irq_res == NULL) {
4062		device_printf(sc->dev, "could not alloc interrupt\n");
4063		return ENXIO;
4064	}
4065	if (mxge_verbose)
4066		device_printf(sc->dev, "using %s irq %ld\n",
4067			      sc->legacy_irq ? "INTx" : "MSI",
4068			      rman_get_start(sc->irq_res));
4069	err = bus_setup_intr(sc->dev, sc->irq_res,
4070			     INTR_TYPE_NET | INTR_MPSAFE,
4071#if __FreeBSD_version > 700030
4072			     NULL,
4073#endif
4074			     mxge_intr, &sc->ss[0], &sc->ih);
4075	if (err != 0) {
4076		bus_release_resource(sc->dev, SYS_RES_IRQ,
4077				     sc->legacy_irq ? 0 : 1, sc->irq_res);
4078		if (!sc->legacy_irq)
4079			pci_release_msi(sc->dev);
4080	}
4081	return err;
4082}
4083
4084static void
4085mxge_rem_msix_irqs(mxge_softc_t *sc)
4086{
4087	int i, rid;
4088
4089	for (i = 0; i < sc->num_slices; i++) {
4090		if (sc->msix_ih[i] != NULL) {
4091			bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4092					  sc->msix_ih[i]);
4093			sc->msix_ih[i] = NULL;
4094		}
4095	}
4096	free(sc->msix_ih, M_DEVBUF);
4097
4098	for (i = 0; i < sc->num_slices; i++) {
4099		rid = i + 1;
4100		if (sc->msix_irq_res[i] != NULL)
4101			bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4102					     sc->msix_irq_res[i]);
4103		sc->msix_irq_res[i] = NULL;
4104	}
4105	free(sc->msix_irq_res, M_DEVBUF);
4106
4107	bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4108			     sc->msix_table_res);
4109
4110	pci_release_msi(sc->dev);
4111	return;
4112}
4113
4114static void
4115mxge_rem_single_irq(mxge_softc_t *sc)
4116{
4117	bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4118	bus_release_resource(sc->dev, SYS_RES_IRQ,
4119			     sc->legacy_irq ? 0 : 1, sc->irq_res);
4120	if (!sc->legacy_irq)
4121		pci_release_msi(sc->dev);
4122}
4123
4124static void
4125mxge_rem_irq(mxge_softc_t *sc)
4126{
4127	if (sc->num_slices > 1)
4128		mxge_rem_msix_irqs(sc);
4129	else
4130		mxge_rem_single_irq(sc);
4131}
4132
4133static int
4134mxge_add_irq(mxge_softc_t *sc)
4135{
4136	int err;
4137
4138	if (sc->num_slices > 1)
4139		err = mxge_add_msix_irqs(sc);
4140	else
4141		err = mxge_add_single_irq(sc);
4142
4143	if (0 && err == 0 && sc->num_slices > 1) {
4144		mxge_rem_msix_irqs(sc);
4145		err = mxge_add_msix_irqs(sc);
4146	}
4147	return err;
4148}
4149
4150
4151static int
4152mxge_attach(device_t dev)
4153{
4154	mxge_softc_t *sc = device_get_softc(dev);
4155	struct ifnet *ifp;
4156	int err, rid;
4157
4158	sc->dev = dev;
4159	mxge_fetch_tunables(sc);
4160
4161	err = bus_dma_tag_create(NULL,			/* parent */
4162				 1,			/* alignment */
4163				 0,			/* boundary */
4164				 BUS_SPACE_MAXADDR,	/* low */
4165				 BUS_SPACE_MAXADDR,	/* high */
4166				 NULL, NULL,		/* filter */
4167				 65536 + 256,		/* maxsize */
4168				 MXGE_MAX_SEND_DESC, 	/* num segs */
4169				 65536,			/* maxsegsize */
4170				 0,			/* flags */
4171				 NULL, NULL,		/* lock */
4172				 &sc->parent_dmat);	/* tag */
4173
4174	if (err != 0) {
4175		device_printf(sc->dev, "Err %d allocating parent dmat\n",
4176			      err);
4177		goto abort_with_nothing;
4178	}
4179
4180	ifp = sc->ifp = if_alloc(IFT_ETHER);
4181	if (ifp == NULL) {
4182		device_printf(dev, "can not if_alloc()\n");
4183		err = ENOSPC;
4184		goto abort_with_parent_dmat;
4185	}
4186	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4187
4188	snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd",
4189		 device_get_nameunit(dev));
4190	mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF);
4191	snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name),
4192		 "%s:drv", device_get_nameunit(dev));
4193	mtx_init(&sc->driver_mtx, sc->driver_mtx_name,
4194		 MTX_NETWORK_LOCK, MTX_DEF);
4195
4196	callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0);
4197
4198	mxge_setup_cfg_space(sc);
4199
4200	/* Map the board into the kernel */
4201	rid = PCIR_BARS;
4202	sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0,
4203					 ~0, 1, RF_ACTIVE);
4204	if (sc->mem_res == NULL) {
4205		device_printf(dev, "could not map memory\n");
4206		err = ENXIO;
4207		goto abort_with_lock;
4208	}
4209	sc->sram = rman_get_virtual(sc->mem_res);
4210	sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4211	if (sc->sram_size > rman_get_size(sc->mem_res)) {
4212		device_printf(dev, "impossible memory region size %ld\n",
4213			      rman_get_size(sc->mem_res));
4214		err = ENXIO;
4215		goto abort_with_mem_res;
4216	}
4217
4218	/* make NULL terminated copy of the EEPROM strings section of
4219	   lanai SRAM */
4220	bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4221	bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4222				rman_get_bushandle(sc->mem_res),
4223				sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4224				sc->eeprom_strings,
4225				MXGE_EEPROM_STRINGS_SIZE - 2);
4226	err = mxge_parse_strings(sc);
4227	if (err != 0)
4228		goto abort_with_mem_res;
4229
4230	/* Enable write combining for efficient use of PCIe bus */
4231	mxge_enable_wc(sc);
4232
4233	/* Allocate the out of band dma memory */
4234	err = mxge_dma_alloc(sc, &sc->cmd_dma,
4235			     sizeof (mxge_cmd_t), 64);
4236	if (err != 0)
4237		goto abort_with_mem_res;
4238	sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4239	err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4240	if (err != 0)
4241		goto abort_with_cmd_dma;
4242
4243	err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4244	if (err != 0)
4245		goto abort_with_zeropad_dma;
4246
4247	/* select & load the firmware */
4248	err = mxge_select_firmware(sc);
4249	if (err != 0)
4250		goto abort_with_dmabench;
4251	sc->intr_coal_delay = mxge_intr_coal_delay;
4252
4253	mxge_slice_probe(sc);
4254	err = mxge_alloc_slices(sc);
4255	if (err != 0)
4256		goto abort_with_dmabench;
4257
4258	err = mxge_reset(sc, 0);
4259	if (err != 0)
4260		goto abort_with_slices;
4261
4262	err = mxge_alloc_rings(sc);
4263	if (err != 0) {
4264		device_printf(sc->dev, "failed to allocate rings\n");
4265		goto abort_with_dmabench;
4266	}
4267
4268	err = mxge_add_irq(sc);
4269	if (err != 0) {
4270		device_printf(sc->dev, "failed to add irq\n");
4271		goto abort_with_rings;
4272	}
4273
4274	ifp->if_baudrate = IF_Gbps(10UL);
4275	ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4276		IFCAP_VLAN_MTU | IFCAP_LRO;
4277
4278#ifdef MXGE_NEW_VLAN_API
4279	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
4280#endif
4281
4282	sc->max_mtu = mxge_max_mtu(sc);
4283	if (sc->max_mtu >= 9000)
4284		ifp->if_capabilities |= IFCAP_JUMBO_MTU;
4285	else
4286		device_printf(dev, "MTU limited to %d.  Install "
4287			      "latest firmware for 9000 byte jumbo support\n",
4288			      sc->max_mtu - ETHER_HDR_LEN);
4289	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4290	ifp->if_capenable = ifp->if_capabilities;
4291	if (sc->lro_cnt == 0)
4292		ifp->if_capenable &= ~IFCAP_LRO;
4293	sc->csum_flag = 1;
4294        ifp->if_init = mxge_init;
4295        ifp->if_softc = sc;
4296        ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4297        ifp->if_ioctl = mxge_ioctl;
4298        ifp->if_start = mxge_start;
4299	/* Initialise the ifmedia structure */
4300	ifmedia_init(&sc->media, 0, mxge_media_change,
4301		     mxge_media_status);
4302	mxge_set_media(sc, IFM_ETHER | IFM_AUTO);
4303	mxge_media_probe(sc);
4304	ether_ifattach(ifp, sc->mac_addr);
4305	/* ether_ifattach sets mtu to 1500 */
4306	if (ifp->if_capabilities & IFCAP_JUMBO_MTU)
4307		ifp->if_mtu = 9000;
4308
4309	mxge_add_sysctls(sc);
4310	return 0;
4311
4312abort_with_rings:
4313	mxge_free_rings(sc);
4314abort_with_slices:
4315	mxge_free_slices(sc);
4316abort_with_dmabench:
4317	mxge_dma_free(&sc->dmabench_dma);
4318abort_with_zeropad_dma:
4319	mxge_dma_free(&sc->zeropad_dma);
4320abort_with_cmd_dma:
4321	mxge_dma_free(&sc->cmd_dma);
4322abort_with_mem_res:
4323	bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4324abort_with_lock:
4325	pci_disable_busmaster(dev);
4326	mtx_destroy(&sc->cmd_mtx);
4327	mtx_destroy(&sc->driver_mtx);
4328	if_free(ifp);
4329abort_with_parent_dmat:
4330	bus_dma_tag_destroy(sc->parent_dmat);
4331
4332abort_with_nothing:
4333	return err;
4334}
4335
4336static int
4337mxge_detach(device_t dev)
4338{
4339	mxge_softc_t *sc = device_get_softc(dev);
4340
4341	if (mxge_vlans_active(sc)) {
4342		device_printf(sc->dev,
4343			      "Detach vlans before removing module\n");
4344		return EBUSY;
4345	}
4346	mtx_lock(&sc->driver_mtx);
4347	if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
4348		mxge_close(sc);
4349	mtx_unlock(&sc->driver_mtx);
4350	ether_ifdetach(sc->ifp);
4351	callout_drain(&sc->co_hdl);
4352	ifmedia_removeall(&sc->media);
4353	mxge_dummy_rdma(sc, 0);
4354	mxge_rem_sysctls(sc);
4355	mxge_rem_irq(sc);
4356	mxge_free_rings(sc);
4357	mxge_free_slices(sc);
4358	mxge_dma_free(&sc->dmabench_dma);
4359	mxge_dma_free(&sc->zeropad_dma);
4360	mxge_dma_free(&sc->cmd_dma);
4361	bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4362	pci_disable_busmaster(dev);
4363	mtx_destroy(&sc->cmd_mtx);
4364	mtx_destroy(&sc->driver_mtx);
4365	if_free(sc->ifp);
4366	bus_dma_tag_destroy(sc->parent_dmat);
4367	return 0;
4368}
4369
4370static int
4371mxge_shutdown(device_t dev)
4372{
4373	return 0;
4374}
4375
4376/*
4377  This file uses Myri10GE driver indentation.
4378
4379  Local Variables:
4380  c-file-style:"linux"
4381  tab-width:8
4382  End:
4383*/
4384