if_mxge.c revision 160456
1/******************************************************************************
2
3Copyright (c) 2006, Myricom Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13    notice, this list of conditions and the following disclaimer in the
14    documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Myricom Inc, nor the names of its
17    contributors may be used to endorse or promote products derived from
18    this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30POSSIBILITY OF SUCH DAMAGE.
31
32***************************************************************************/
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: head/sys/dev/mxge/if_mxge.c 160456 2006-07-17 22:17:05Z gallatin $");
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/linker.h>
40#include <sys/firmware.h>
41#include <sys/endian.h>
42#include <sys/sockio.h>
43#include <sys/mbuf.h>
44#include <sys/malloc.h>
45#include <sys/kdb.h>
46#include <sys/kernel.h>
47#include <sys/module.h>
48#include <sys/memrange.h>
49#include <sys/socket.h>
50#include <sys/sysctl.h>
51#include <sys/sx.h>
52
53#include <net/if.h>
54#include <net/if_arp.h>
55#include <net/ethernet.h>
56#include <net/if_dl.h>
57#include <net/if_media.h>
58
59#include <net/bpf.h>
60
61#include <net/if_types.h>
62#include <net/if_vlan_var.h>
63#include <net/zlib.h>
64
65#include <netinet/in_systm.h>
66#include <netinet/in.h>
67#include <netinet/ip.h>
68
69#include <machine/bus.h>
70#include <machine/resource.h>
71#include <sys/bus.h>
72#include <sys/rman.h>
73
74#include <dev/pci/pcireg.h>
75#include <dev/pci/pcivar.h>
76
77#include <vm/vm.h>		/* for pmap_mapdev() */
78#include <vm/pmap.h>
79
80#include <dev/mxge/mxge_mcp.h>
81#include <dev/mxge/mcp_gen_header.h>
82#include <dev/mxge/if_mxge_var.h>
83
84/* tunable params */
85static int mxge_nvidia_ecrc_enable = 1;
86static int mxge_max_intr_slots = 1024;
87static int mxge_intr_coal_delay = 30;
88static int mxge_deassert_wait = 1;
89static int mxge_flow_control = 1;
90static int mxge_verbose = 0;
91static char *mxge_fw_unaligned = "mxge_ethp_z8e";
92static char *mxge_fw_aligned = "mxge_eth_z8e";
93
94static int mxge_probe(device_t dev);
95static int mxge_attach(device_t dev);
96static int mxge_detach(device_t dev);
97static int mxge_shutdown(device_t dev);
98static void mxge_intr(void *arg);
99
100static device_method_t mxge_methods[] =
101{
102  /* Device interface */
103  DEVMETHOD(device_probe, mxge_probe),
104  DEVMETHOD(device_attach, mxge_attach),
105  DEVMETHOD(device_detach, mxge_detach),
106  DEVMETHOD(device_shutdown, mxge_shutdown),
107  {0, 0}
108};
109
110static driver_t mxge_driver =
111{
112  "mxge",
113  mxge_methods,
114  sizeof(mxge_softc_t),
115};
116
117static devclass_t mxge_devclass;
118
119/* Declare ourselves to be a child of the PCI bus.*/
120DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
121MODULE_DEPEND(mxge, firmware, 1, 1, 1);
122
123static int
124mxge_probe(device_t dev)
125{
126  if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
127      (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E)) {
128	  device_set_desc(dev, "Myri10G-PCIE-8A");
129	  return 0;
130  }
131  return ENXIO;
132}
133
134static void
135mxge_enable_wc(mxge_softc_t *sc)
136{
137	struct mem_range_desc mrdesc;
138	vm_paddr_t pa;
139	vm_offset_t len;
140	int err, action;
141
142	pa = rman_get_start(sc->mem_res);
143	len = rman_get_size(sc->mem_res);
144	mrdesc.mr_base = pa;
145	mrdesc.mr_len = len;
146	mrdesc.mr_flags = MDF_WRITECOMBINE;
147	action = MEMRANGE_SET_UPDATE;
148	strcpy((char *)&mrdesc.mr_owner, "mxge");
149	err = mem_range_attr_set(&mrdesc, &action);
150	if (err != 0) {
151		device_printf(sc->dev,
152			      "w/c failed for pa 0x%lx, len 0x%lx, err = %d\n",
153			      (unsigned long)pa, (unsigned long)len, err);
154	} else {
155		sc->wc = 1;
156	}
157}
158
159
160/* callback to get our DMA address */
161static void
162mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
163			 int error)
164{
165	if (error == 0) {
166		*(bus_addr_t *) arg = segs->ds_addr;
167	}
168}
169
170static int
171mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
172		   bus_size_t alignment)
173{
174	int err;
175	device_t dev = sc->dev;
176
177	/* allocate DMAable memory tags */
178	err = bus_dma_tag_create(sc->parent_dmat,	/* parent */
179				 alignment,		/* alignment */
180				 4096,			/* boundary */
181				 BUS_SPACE_MAXADDR,	/* low */
182				 BUS_SPACE_MAXADDR,	/* high */
183				 NULL, NULL,		/* filter */
184				 bytes,			/* maxsize */
185				 1,			/* num segs */
186				 4096,			/* maxsegsize */
187				 BUS_DMA_COHERENT,	/* flags */
188				 NULL, NULL,		/* lock */
189				 &dma->dmat);		/* tag */
190	if (err != 0) {
191		device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
192		return err;
193	}
194
195	/* allocate DMAable memory & map */
196	err = bus_dmamem_alloc(dma->dmat, &dma->addr,
197			       (BUS_DMA_WAITOK | BUS_DMA_COHERENT
198				| BUS_DMA_ZERO),  &dma->map);
199	if (err != 0) {
200		device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
201		goto abort_with_dmat;
202	}
203
204	/* load the memory */
205	err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
206			      mxge_dmamap_callback,
207			      (void *)&dma->bus_addr, 0);
208	if (err != 0) {
209		device_printf(dev, "couldn't load map (err = %d)\n", err);
210		goto abort_with_mem;
211	}
212	return 0;
213
214abort_with_mem:
215	bus_dmamem_free(dma->dmat, dma->addr, dma->map);
216abort_with_dmat:
217	(void)bus_dma_tag_destroy(dma->dmat);
218	return err;
219}
220
221
222static void
223mxge_dma_free(mxge_dma_t *dma)
224{
225	bus_dmamap_unload(dma->dmat, dma->map);
226	bus_dmamem_free(dma->dmat, dma->addr, dma->map);
227	(void)bus_dma_tag_destroy(dma->dmat);
228}
229
230/*
231 * The eeprom strings on the lanaiX have the format
232 * SN=x\0
233 * MAC=x:x:x:x:x:x\0
234 * PC=text\0
235 */
236
237static int
238mxge_parse_strings(mxge_softc_t *sc)
239{
240#define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++)
241
242	char *ptr, *limit;
243	int i, found_mac;
244
245	ptr = sc->eeprom_strings;
246	limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE;
247	found_mac = 0;
248	while (ptr < limit && *ptr != '\0') {
249		if (memcmp(ptr, "MAC=", 4) == 0) {
250			ptr += 1;
251			sc->mac_addr_string = ptr;
252			for (i = 0; i < 6; i++) {
253				ptr += 3;
254				if ((ptr + 2) > limit)
255					goto abort;
256				sc->mac_addr[i] = strtoul(ptr, NULL, 16);
257				found_mac = 1;
258			}
259		} else if (memcmp(ptr, "PC=", 3) == 0) {
260			ptr += 3;
261			strncpy(sc->product_code_string, ptr,
262				sizeof (sc->product_code_string) - 1);
263		} else if (memcmp(ptr, "SN=", 3) == 0) {
264			ptr += 3;
265			strncpy(sc->serial_number_string, ptr,
266				sizeof (sc->serial_number_string) - 1);
267		}
268		MXGE_NEXT_STRING(ptr);
269	}
270
271	if (found_mac)
272		return 0;
273
274 abort:
275	device_printf(sc->dev, "failed to parse eeprom_strings\n");
276
277	return ENXIO;
278}
279
280#if #cpu(i386) || defined __i386 || defined i386 || defined __i386__ || #cpu(x86_64) || defined __x86_64__
281static int
282mxge_enable_nvidia_ecrc(mxge_softc_t *sc, device_t pdev)
283{
284	uint32_t val;
285	unsigned long off;
286	char *va, *cfgptr;
287	uint16_t vendor_id, device_id;
288	uintptr_t bus, slot, func, ivend, idev;
289	uint32_t *ptr32;
290
291	/* XXXX
292	   Test below is commented because it is believed that doing
293	   config read/write beyond 0xff will access the config space
294	   for the next larger function.  Uncomment this and remove
295	   the hacky pmap_mapdev() way of accessing config space when
296	   FreeBSD grows support for extended pcie config space access
297	*/
298#if 0
299	/* See if we can, by some miracle, access the extended
300	   config space */
301	val = pci_read_config(pdev, 0x178, 4);
302	if (val != 0xffffffff) {
303		val |= 0x40;
304		pci_write_config(pdev, 0x178, val, 4);
305		return 0;
306	}
307#endif
308	/* Rather than using normal pci config space writes, we must
309	 * map the Nvidia config space ourselves.  This is because on
310	 * opteron/nvidia class machine the 0xe000000 mapping is
311	 * handled by the nvidia chipset, that means the internal PCI
312	 * device (the on-chip northbridge), or the amd-8131 bridge
313	 * and things behind them are not visible by this method.
314	 */
315
316	BUS_READ_IVAR(device_get_parent(pdev), pdev,
317		      PCI_IVAR_BUS, &bus);
318	BUS_READ_IVAR(device_get_parent(pdev), pdev,
319		      PCI_IVAR_SLOT, &slot);
320	BUS_READ_IVAR(device_get_parent(pdev), pdev,
321		      PCI_IVAR_FUNCTION, &func);
322	BUS_READ_IVAR(device_get_parent(pdev), pdev,
323		      PCI_IVAR_VENDOR, &ivend);
324	BUS_READ_IVAR(device_get_parent(pdev), pdev,
325		      PCI_IVAR_DEVICE, &idev);
326
327	off =  0xe0000000UL
328		+ 0x00100000UL * (unsigned long)bus
329		+ 0x00001000UL * (unsigned long)(func
330						 + 8 * slot);
331
332	/* map it into the kernel */
333	va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
334
335
336	if (va == NULL) {
337		device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
338		return EIO;
339	}
340	/* get a pointer to the config space mapped into the kernel */
341	cfgptr = va + (off & PAGE_MASK);
342
343	/* make sure that we can really access it */
344	vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
345	device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
346	if (! (vendor_id == ivend && device_id == idev)) {
347		device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
348			      vendor_id, device_id);
349		pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
350		return EIO;
351	}
352
353	ptr32 = (uint32_t*)(cfgptr + 0x178);
354	val = *ptr32;
355
356	if (val == 0xffffffff) {
357		device_printf(sc->dev, "extended mapping failed\n");
358		pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
359		return EIO;
360	}
361	*ptr32 = val | 0x40;
362	pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
363	if (mxge_verbose)
364		device_printf(sc->dev,
365			      "Enabled ECRC on upstream Nvidia bridge "
366			      "at %d:%d:%d\n",
367			      (int)bus, (int)slot, (int)func);
368	return 0;
369}
370#else
371static int
372mxge_enable_nvidia_ecrc(mxge_softc_t *sc, device_t pdev)
373{
374	device_printf(sc->dev,
375		      "Nforce 4 chipset on non-x86/amd64!?!?!\n");
376	return ENXIO;
377}
378#endif
379/*
380 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
381 * when the PCI-E Completion packets are aligned on an 8-byte
382 * boundary.  Some PCI-E chip sets always align Completion packets; on
383 * the ones that do not, the alignment can be enforced by enabling
384 * ECRC generation (if supported).
385 *
386 * When PCI-E Completion packets are not aligned, it is actually more
387 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
388 *
389 * If the driver can neither enable ECRC nor verify that it has
390 * already been enabled, then it must use a firmware image which works
391 * around unaligned completion packets (ethp_z8e.dat), and it should
392 * also ensure that it never gives the device a Read-DMA which is
393 * larger than 2KB by setting the tx.boundary to 2KB.  If ECRC is
394 * enabled, then the driver should use the aligned (eth_z8e.dat)
395 * firmware image, and set tx.boundary to 4KB.
396 */
397
398static void
399mxge_select_firmware(mxge_softc_t *sc)
400{
401	int err, aligned = 0;
402	device_t pdev;
403	uint16_t pvend, pdid;
404
405	pdev = device_get_parent(device_get_parent(sc->dev));
406	if (pdev == NULL) {
407		device_printf(sc->dev, "could not find parent?\n");
408		goto abort;
409	}
410	pvend = pci_read_config(pdev, PCIR_VENDOR, 2);
411	pdid = pci_read_config(pdev, PCIR_DEVICE, 2);
412
413	/* see if we can enable ECRC's on an upstream
414	   Nvidia bridge */
415	if (mxge_nvidia_ecrc_enable &&
416	    (pvend == 0x10de && pdid == 0x005d)) {
417		err = mxge_enable_nvidia_ecrc(sc, pdev);
418		if (err == 0) {
419			aligned = 1;
420			if (mxge_verbose)
421				device_printf(sc->dev,
422					      "Assuming aligned completions"
423					      " (ECRC)\n");
424		}
425	}
426	/* see if the upstream bridge is known to
427	   provided aligned completions */
428	if (/* HT2000  */ (pvend == 0x1166 && pdid == 0x0132) ||
429	    /* Ontario */ (pvend == 0x10b5 && pdid == 0x8532)) {
430		if (mxge_verbose)
431			device_printf(sc->dev,
432				      "Assuming aligned completions "
433				      "(0x%x:0x%x)\n", pvend, pdid);
434	}
435
436abort:
437	if (aligned) {
438		sc->fw_name = mxge_fw_aligned;
439		sc->tx.boundary = 4096;
440	} else {
441		sc->fw_name = mxge_fw_unaligned;
442		sc->tx.boundary = 2048;
443	}
444}
445
446union qualhack
447{
448        const char *ro_char;
449        char *rw_char;
450};
451
452static int
453mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
454{
455	int major, minor;
456
457	if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
458		device_printf(sc->dev, "Bad firmware type: 0x%x\n",
459			      be32toh(hdr->mcp_type));
460		return EIO;
461	}
462
463	/* save firmware version for sysctl */
464	strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version));
465	if (mxge_verbose)
466		device_printf(sc->dev, "firmware id: %s\n", hdr->version);
467
468	sscanf(sc->fw_version, "%d.%d", &major, &minor);
469
470	if (!(major == MXGEFW_VERSION_MAJOR
471	      && minor == MXGEFW_VERSION_MINOR)) {
472		device_printf(sc->dev, "Found firmware version %s\n",
473			      sc->fw_version);
474		device_printf(sc->dev, "Driver needs %d.%d\n",
475			      MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
476		return EINVAL;
477	}
478	return 0;
479
480}
481
482static int
483mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
484{
485	struct firmware *fw;
486	const mcp_gen_header_t *hdr;
487	unsigned hdr_offset;
488	const char *fw_data;
489	union qualhack hack;
490	int status;
491	unsigned int i;
492	char dummy;
493
494
495	fw = firmware_get(sc->fw_name);
496
497	if (fw == NULL) {
498		device_printf(sc->dev, "Could not find firmware image %s\n",
499			      sc->fw_name);
500		return ENOENT;
501	}
502	if (fw->datasize > *limit ||
503	    fw->datasize < MCP_HEADER_PTR_OFFSET + 4) {
504		device_printf(sc->dev, "Firmware image %s too large (%d/%d)\n",
505			      sc->fw_name, (int)fw->datasize, (int) *limit);
506		status = ENOSPC;
507		goto abort_with_fw;
508	}
509	*limit = fw->datasize;
510
511	/* check id */
512	fw_data = (const char *)fw->data;
513	hdr_offset = htobe32(*(const uint32_t *)
514			     (fw_data + MCP_HEADER_PTR_OFFSET));
515	if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw->datasize) {
516		device_printf(sc->dev, "Bad firmware file");
517		status = EIO;
518		goto abort_with_fw;
519	}
520	hdr = (const void*)(fw_data + hdr_offset);
521
522	status = mxge_validate_firmware(sc, hdr);
523	if (status != 0)
524		goto abort_with_fw;
525
526	hack.ro_char = fw_data;
527	/* Copy the inflated firmware to NIC SRAM. */
528	for (i = 0; i < *limit; i += 256) {
529		mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
530			      hack.rw_char + i,
531			      min(256U, (unsigned)(*limit - i)));
532		mb();
533		dummy = *sc->sram;
534		mb();
535	}
536
537	status = 0;
538abort_with_fw:
539	firmware_put(fw, FIRMWARE_UNLOAD);
540	return status;
541}
542
543/*
544 * Enable or disable periodic RDMAs from the host to make certain
545 * chipsets resend dropped PCIe messages
546 */
547
548static void
549mxge_dummy_rdma(mxge_softc_t *sc, int enable)
550{
551	char buf_bytes[72];
552	volatile uint32_t *confirm;
553	volatile char *submit;
554	uint32_t *buf, dma_low, dma_high;
555	int i;
556
557	buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
558
559	/* clear confirmation addr */
560	confirm = (volatile uint32_t *)sc->cmd;
561	*confirm = 0;
562	mb();
563
564	/* send an rdma command to the PCIe engine, and wait for the
565	   response in the confirmation address.  The firmware should
566	   write a -1 there to indicate it is alive and well
567	*/
568
569	dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
570	dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
571	buf[0] = htobe32(dma_high);		/* confirm addr MSW */
572	buf[1] = htobe32(dma_low);		/* confirm addr LSW */
573	buf[2] = htobe32(0xffffffff);		/* confirm data */
574	dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
575	dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
576	buf[3] = htobe32(dma_high); 		/* dummy addr MSW */
577	buf[4] = htobe32(dma_low); 		/* dummy addr LSW */
578	buf[5] = htobe32(enable);			/* enable? */
579
580
581	submit = (volatile char *)(sc->sram + 0xfc01c0);
582
583	mxge_pio_copy(submit, buf, 64);
584	mb();
585	DELAY(1000);
586	mb();
587	i = 0;
588	while (*confirm != 0xffffffff && i < 20) {
589		DELAY(1000);
590		i++;
591	}
592	if (*confirm != 0xffffffff) {
593		device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
594			      (enable ? "enable" : "disable"), confirm,
595			      *confirm);
596	}
597	return;
598}
599
600static int
601mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
602{
603	mcp_cmd_t *buf;
604	char buf_bytes[sizeof(*buf) + 8];
605	volatile mcp_cmd_response_t *response = sc->cmd;
606	volatile char *cmd_addr = sc->sram + MXGEFW_CMD_OFFSET;
607	uint32_t dma_low, dma_high;
608	int sleep_total = 0;
609
610	/* ensure buf is aligned to 8 bytes */
611	buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
612
613	buf->data0 = htobe32(data->data0);
614	buf->data1 = htobe32(data->data1);
615	buf->data2 = htobe32(data->data2);
616	buf->cmd = htobe32(cmd);
617	dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
618	dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
619
620	buf->response_addr.low = htobe32(dma_low);
621	buf->response_addr.high = htobe32(dma_high);
622	mtx_lock(&sc->cmd_lock);
623	response->result = 0xffffffff;
624	mb();
625	mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
626
627	/* wait up to 20ms */
628	for (sleep_total = 0; sleep_total <  20; sleep_total++) {
629		bus_dmamap_sync(sc->cmd_dma.dmat,
630				sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
631		mb();
632		if (response->result != 0xffffffff) {
633			if (response->result == 0) {
634				data->data0 = be32toh(response->data);
635				mtx_unlock(&sc->cmd_lock);
636				return 0;
637			} else {
638				device_printf(sc->dev,
639					      "mxge: command %d "
640					      "failed, result = %d\n",
641					      cmd, be32toh(response->result));
642				mtx_unlock(&sc->cmd_lock);
643				return ENXIO;
644			}
645		}
646		DELAY(1000);
647	}
648	mtx_unlock(&sc->cmd_lock);
649	device_printf(sc->dev, "mxge: command %d timed out"
650		      "result = %d\n",
651		      cmd, be32toh(response->result));
652	return EAGAIN;
653}
654
655static int
656mxge_adopt_running_firmware(mxge_softc_t *sc)
657{
658	struct mcp_gen_header *hdr;
659	const size_t bytes = sizeof (struct mcp_gen_header);
660	size_t hdr_offset;
661	int status;
662
663	/* find running firmware header */
664	hdr_offset = htobe32(*(volatile uint32_t *)
665			     (sc->sram + MCP_HEADER_PTR_OFFSET));
666
667	if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
668		device_printf(sc->dev,
669			      "Running firmware has bad header offset (%d)\n",
670			      (int)hdr_offset);
671		return EIO;
672	}
673
674	/* copy header of running firmware from SRAM to host memory to
675	 * validate firmware */
676	hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
677	if (hdr == NULL) {
678		device_printf(sc->dev, "could not malloc firmware hdr\n");
679		return ENOMEM;
680	}
681	bus_space_read_region_1(rman_get_bustag(sc->mem_res),
682				rman_get_bushandle(sc->mem_res),
683				hdr_offset, (char *)hdr, bytes);
684	status = mxge_validate_firmware(sc, hdr);
685	free(hdr, M_DEVBUF);
686	return status;
687}
688
689
690static int
691mxge_load_firmware(mxge_softc_t *sc)
692{
693	volatile uint32_t *confirm;
694	volatile char *submit;
695	char buf_bytes[72];
696	uint32_t *buf, size, dma_low, dma_high;
697	int status, i;
698
699	buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
700
701	size = sc->sram_size;
702	status = mxge_load_firmware_helper(sc, &size);
703	if (status) {
704		/* Try to use the currently running firmware, if
705		   it is new enough */
706		status = mxge_adopt_running_firmware(sc);
707		if (status) {
708			device_printf(sc->dev,
709				      "failed to adopt running firmware\n");
710			return status;
711		}
712		device_printf(sc->dev,
713			      "Successfully adopted running firmware\n");
714		if (sc->tx.boundary == 4096) {
715			device_printf(sc->dev,
716				"Using firmware currently running on NIC"
717				 ".  For optimal\n");
718			device_printf(sc->dev,
719				 "performance consider loading optimized "
720				 "firmware\n");
721		}
722
723	}
724	/* clear confirmation addr */
725	confirm = (volatile uint32_t *)sc->cmd;
726	*confirm = 0;
727	mb();
728	/* send a reload command to the bootstrap MCP, and wait for the
729	   response in the confirmation address.  The firmware should
730	   write a -1 there to indicate it is alive and well
731	*/
732
733	dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
734	dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
735
736	buf[0] = htobe32(dma_high);	/* confirm addr MSW */
737	buf[1] = htobe32(dma_low);	/* confirm addr LSW */
738	buf[2] = htobe32(0xffffffff);	/* confirm data */
739
740	/* FIX: All newest firmware should un-protect the bottom of
741	   the sram before handoff. However, the very first interfaces
742	   do not. Therefore the handoff copy must skip the first 8 bytes
743	*/
744					/* where the code starts*/
745	buf[3] = htobe32(MXGE_FW_OFFSET + 8);
746	buf[4] = htobe32(size - 8); 	/* length of code */
747	buf[5] = htobe32(8);		/* where to copy to */
748	buf[6] = htobe32(0);		/* where to jump to */
749
750	submit = (volatile char *)(sc->sram + 0xfc0000);
751	mxge_pio_copy(submit, buf, 64);
752	mb();
753	DELAY(1000);
754	mb();
755	i = 0;
756	while (*confirm != 0xffffffff && i < 20) {
757		DELAY(1000*10);
758		i++;
759		bus_dmamap_sync(sc->cmd_dma.dmat,
760				sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
761	}
762	if (*confirm != 0xffffffff) {
763		device_printf(sc->dev,"handoff failed (%p = 0x%x)",
764			confirm, *confirm);
765
766		return ENXIO;
767	}
768	mxge_dummy_rdma(sc, 1);
769	return 0;
770}
771
772static int
773mxge_update_mac_address(mxge_softc_t *sc)
774{
775	mxge_cmd_t cmd;
776	uint8_t *addr = sc->mac_addr;
777	int status;
778
779
780	cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
781		     | (addr[2] << 8) | addr[3]);
782
783	cmd.data1 = ((addr[4] << 8) | (addr[5]));
784
785	status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
786	return status;
787}
788
789static int
790mxge_change_pause(mxge_softc_t *sc, int pause)
791{
792	mxge_cmd_t cmd;
793	int status;
794
795	if (pause)
796		status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
797				       &cmd);
798	else
799		status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
800				       &cmd);
801
802	if (status) {
803		device_printf(sc->dev, "Failed to set flow control mode\n");
804		return ENXIO;
805	}
806	sc->pause = pause;
807	return 0;
808}
809
810static void
811mxge_change_promisc(mxge_softc_t *sc, int promisc)
812{
813	mxge_cmd_t cmd;
814	int status;
815
816	if (promisc)
817		status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
818				       &cmd);
819	else
820		status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
821				       &cmd);
822
823	if (status) {
824		device_printf(sc->dev, "Failed to set promisc mode\n");
825	}
826}
827
828static int
829mxge_reset(mxge_softc_t *sc)
830{
831
832	mxge_cmd_t cmd;
833	mxge_dma_t dmabench_dma;
834	size_t bytes;
835	int status;
836
837	/* try to send a reset command to the card to see if it
838	   is alive */
839	memset(&cmd, 0, sizeof (cmd));
840	status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
841	if (status != 0) {
842		device_printf(sc->dev, "failed reset\n");
843		return ENXIO;
844	}
845
846	/* Now exchange information about interrupts  */
847	bytes = mxge_max_intr_slots * sizeof (*sc->rx_done.entry);\
848	memset(sc->rx_done.entry, 0, bytes);
849	cmd.data0 = (uint32_t)bytes;
850	status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
851	cmd.data0 = MXGE_LOWPART_TO_U32(sc->rx_done.dma.bus_addr);
852	cmd.data1 = MXGE_HIGHPART_TO_U32(sc->rx_done.dma.bus_addr);
853	status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA, &cmd);
854
855	status |= mxge_send_cmd(sc,
856				MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
857
858
859	sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
860
861	status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
862	sc->irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
863
864
865	status |= mxge_send_cmd(sc,  MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
866				&cmd);
867	sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
868	if (status != 0) {
869		device_printf(sc->dev, "failed set interrupt parameters\n");
870		return status;
871	}
872
873
874	*sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
875
876
877	/* run a DMA benchmark */
878	sc->read_dma = sc->write_dma = sc->read_write_dma = 0;
879	status = mxge_dma_alloc(sc, &dmabench_dma, 4096, 4096);
880	if (status)
881		goto dmabench_fail;
882
883	/* Read DMA */
884	cmd.data0 = MXGE_LOWPART_TO_U32(dmabench_dma.bus_addr);
885	cmd.data1 = MXGE_HIGHPART_TO_U32(dmabench_dma.bus_addr);
886	cmd.data2 = sc->tx.boundary * 0x10000;
887
888	status = mxge_send_cmd(sc, MXGEFW_DMA_TEST, &cmd);
889	if (status != 0)
890		device_printf(sc->dev, "read dma benchmark failed\n");
891	else
892		sc->read_dma = ((cmd.data0>>16) * sc->tx.boundary * 2) /
893			(cmd.data0 & 0xffff);
894
895	/* Write DMA */
896	cmd.data0 = MXGE_LOWPART_TO_U32(dmabench_dma.bus_addr);
897	cmd.data1 = MXGE_HIGHPART_TO_U32(dmabench_dma.bus_addr);
898	cmd.data2 = sc->tx.boundary * 0x1;
899	status = mxge_send_cmd(sc, MXGEFW_DMA_TEST, &cmd);
900	if (status != 0)
901		device_printf(sc->dev, "write dma benchmark failed\n");
902	else
903		sc->write_dma = ((cmd.data0>>16) * sc->tx.boundary * 2) /
904			(cmd.data0 & 0xffff);
905	/* Read/Write DMA */
906	cmd.data0 = MXGE_LOWPART_TO_U32(dmabench_dma.bus_addr);
907	cmd.data1 = MXGE_HIGHPART_TO_U32(dmabench_dma.bus_addr);
908	cmd.data2 = sc->tx.boundary * 0x10001;
909	status = mxge_send_cmd(sc, MXGEFW_DMA_TEST, &cmd);
910	if (status != 0)
911		device_printf(sc->dev, "read/write dma benchmark failed\n");
912	else
913		sc->read_write_dma =
914			((cmd.data0>>16) * sc->tx.boundary * 2 * 2) /
915			(cmd.data0 & 0xffff);
916
917	mxge_dma_free(&dmabench_dma);
918
919dmabench_fail:
920	/* reset mcp/driver shared state back to 0 */
921	bzero(sc->rx_done.entry, bytes);
922	sc->rx_done.idx = 0;
923	sc->rx_done.cnt = 0;
924	sc->tx.req = 0;
925	sc->tx.done = 0;
926	sc->tx.pkt_done = 0;
927	sc->rx_big.cnt = 0;
928	sc->rx_small.cnt = 0;
929	sc->rdma_tags_available = 15;
930	status = mxge_update_mac_address(sc);
931	mxge_change_promisc(sc, 0);
932	mxge_change_pause(sc, sc->pause);
933	return status;
934}
935
936static int
937mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
938{
939        mxge_softc_t *sc;
940        unsigned int intr_coal_delay;
941        int err;
942
943        sc = arg1;
944        intr_coal_delay = sc->intr_coal_delay;
945        err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
946        if (err != 0) {
947                return err;
948        }
949        if (intr_coal_delay == sc->intr_coal_delay)
950                return 0;
951
952        if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
953                return EINVAL;
954
955	sx_xlock(&sc->driver_lock);
956	*sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
957	sc->intr_coal_delay = intr_coal_delay;
958
959	sx_xunlock(&sc->driver_lock);
960        return err;
961}
962
963static int
964mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
965{
966        mxge_softc_t *sc;
967        unsigned int enabled;
968        int err;
969
970        sc = arg1;
971        enabled = sc->pause;
972        err = sysctl_handle_int(oidp, &enabled, arg2, req);
973        if (err != 0) {
974                return err;
975        }
976        if (enabled == sc->pause)
977                return 0;
978
979	sx_xlock(&sc->driver_lock);
980	err = mxge_change_pause(sc, enabled);
981	sx_xunlock(&sc->driver_lock);
982        return err;
983}
984
985static int
986mxge_handle_be32(SYSCTL_HANDLER_ARGS)
987{
988        int err;
989
990        if (arg1 == NULL)
991                return EFAULT;
992        arg2 = be32toh(*(int *)arg1);
993        arg1 = NULL;
994        err = sysctl_handle_int(oidp, arg1, arg2, req);
995
996        return err;
997}
998
999static void
1000mxge_add_sysctls(mxge_softc_t *sc)
1001{
1002	struct sysctl_ctx_list *ctx;
1003	struct sysctl_oid_list *children;
1004	mcp_irq_data_t *fw;
1005
1006	ctx = device_get_sysctl_ctx(sc->dev);
1007	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1008	fw = sc->fw_stats;
1009
1010	/* random information */
1011	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1012		       "firmware_version",
1013		       CTLFLAG_RD, &sc->fw_version,
1014		       0, "firmware version");
1015	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1016		       "serial_number",
1017		       CTLFLAG_RD, &sc->serial_number_string,
1018		       0, "serial number");
1019	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1020		       "product_code",
1021		       CTLFLAG_RD, &sc->product_code_string,
1022		       0, "product_code");
1023	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1024		       "tx_boundary",
1025		       CTLFLAG_RD, &sc->tx.boundary,
1026		       0, "tx_boundary");
1027	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1028		       "read_dma_MBs",
1029		       CTLFLAG_RD, &sc->read_dma,
1030		       0, "DMA Read speed in MB/s");
1031	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1032		       "write_dma_MBs",
1033		       CTLFLAG_RD, &sc->write_dma,
1034		       0, "DMA Write speed in MB/s");
1035	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1036		       "read_write_dma_MBs",
1037		       CTLFLAG_RD, &sc->read_write_dma,
1038		       0, "DMA concurrent Read/Write speed in MB/s");
1039
1040
1041	/* performance related tunables */
1042	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1043			"intr_coal_delay",
1044			CTLTYPE_INT|CTLFLAG_RW, sc,
1045			0, mxge_change_intr_coal,
1046			"I", "interrupt coalescing delay in usecs");
1047
1048	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1049			"flow_control_enabled",
1050			CTLTYPE_INT|CTLFLAG_RW, sc,
1051			0, mxge_change_flow_control,
1052			"I", "interrupt coalescing delay in usecs");
1053
1054	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1055		       "deassert_wait",
1056		       CTLFLAG_RW, &mxge_deassert_wait,
1057		       0, "Wait for IRQ line to go low in ihandler");
1058
1059	/* stats block from firmware is in network byte order.
1060	   Need to swap it */
1061	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1062			"link_up",
1063			CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1064			0, mxge_handle_be32,
1065			"I", "link up");
1066	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1067			"rdma_tags_available",
1068			CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1069			0, mxge_handle_be32,
1070			"I", "rdma_tags_available");
1071	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1072			"dropped_link_overflow",
1073			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1074			0, mxge_handle_be32,
1075			"I", "dropped_link_overflow");
1076	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1077			"dropped_link_error_or_filtered",
1078			CTLTYPE_INT|CTLFLAG_RD,
1079			&fw->dropped_link_error_or_filtered,
1080			0, mxge_handle_be32,
1081			"I", "dropped_link_error_or_filtered");
1082	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1083			"dropped_runt",
1084			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1085			0, mxge_handle_be32,
1086			"I", "dropped_runt");
1087	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1088			"dropped_overrun",
1089			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1090			0, mxge_handle_be32,
1091			"I", "dropped_overrun");
1092	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1093			"dropped_no_small_buffer",
1094			CTLTYPE_INT|CTLFLAG_RD,
1095			&fw->dropped_no_small_buffer,
1096			0, mxge_handle_be32,
1097			"I", "dropped_no_small_buffer");
1098	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1099			"dropped_no_big_buffer",
1100			CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1101			0, mxge_handle_be32,
1102			"I", "dropped_no_big_buffer");
1103
1104	/* host counters exported for debugging */
1105	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1106		       "rx_small_cnt",
1107		       CTLFLAG_RD, &sc->rx_small.cnt,
1108		       0, "rx_small_cnt");
1109	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1110		       "rx_big_cnt",
1111		       CTLFLAG_RD, &sc->rx_big.cnt,
1112		       0, "rx_small_cnt");
1113	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1114		       "tx_req",
1115		       CTLFLAG_RD, &sc->tx.req,
1116		       0, "tx_req");
1117	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1118		       "tx_done",
1119		       CTLFLAG_RD, &sc->tx.done,
1120		       0, "tx_done");
1121	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1122		       "tx_pkt_done",
1123		       CTLFLAG_RD, &sc->tx.pkt_done,
1124		       0, "tx_done");
1125
1126	/* verbose printing? */
1127	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1128		       "verbose",
1129		       CTLFLAG_RW, &mxge_verbose,
1130		       0, "verbose printing");
1131
1132}
1133
1134/* copy an array of mcp_kreq_ether_send_t's to the mcp.  Copy
1135   backwards one at a time and handle ring wraps */
1136
1137static inline void
1138mxge_submit_req_backwards(mxge_tx_buf_t *tx,
1139			    mcp_kreq_ether_send_t *src, int cnt)
1140{
1141        int idx, starting_slot;
1142        starting_slot = tx->req;
1143        while (cnt > 1) {
1144                cnt--;
1145                idx = (starting_slot + cnt) & tx->mask;
1146                mxge_pio_copy(&tx->lanai[idx],
1147			      &src[cnt], sizeof(*src));
1148                mb();
1149        }
1150}
1151
1152/*
1153 * copy an array of mcp_kreq_ether_send_t's to the mcp.  Copy
1154 * at most 32 bytes at a time, so as to avoid involving the software
1155 * pio handler in the nic.   We re-write the first segment's flags
1156 * to mark them valid only after writing the entire chain
1157 */
1158
1159static inline void
1160mxge_submit_req(mxge_tx_buf_t *tx, mcp_kreq_ether_send_t *src,
1161                  int cnt)
1162{
1163        int idx, i;
1164        uint32_t *src_ints;
1165	volatile uint32_t *dst_ints;
1166        mcp_kreq_ether_send_t *srcp;
1167	volatile mcp_kreq_ether_send_t *dstp, *dst;
1168	uint8_t last_flags;
1169
1170        idx = tx->req & tx->mask;
1171
1172	last_flags = src->flags;
1173	src->flags = 0;
1174        mb();
1175        dst = dstp = &tx->lanai[idx];
1176        srcp = src;
1177
1178        if ((idx + cnt) < tx->mask) {
1179                for (i = 0; i < (cnt - 1); i += 2) {
1180                        mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1181                        mb(); /* force write every 32 bytes */
1182                        srcp += 2;
1183                        dstp += 2;
1184                }
1185        } else {
1186                /* submit all but the first request, and ensure
1187                   that it is submitted below */
1188                mxge_submit_req_backwards(tx, src, cnt);
1189                i = 0;
1190        }
1191        if (i < cnt) {
1192                /* submit the first request */
1193                mxge_pio_copy(dstp, srcp, sizeof(*src));
1194                mb(); /* barrier before setting valid flag */
1195        }
1196
1197        /* re-write the last 32-bits with the valid flags */
1198        src->flags = last_flags;
1199        src_ints = (uint32_t *)src;
1200        src_ints+=3;
1201        dst_ints = (volatile uint32_t *)dst;
1202        dst_ints+=3;
1203        *dst_ints =  *src_ints;
1204        tx->req += cnt;
1205        mb();
1206}
1207
1208static inline void
1209mxge_submit_req_wc(mxge_tx_buf_t *tx, mcp_kreq_ether_send_t *src, int cnt)
1210{
1211    tx->req += cnt;
1212    mb();
1213    while (cnt >= 4) {
1214	    mxge_pio_copy((volatile char *)tx->wc_fifo, src, 64);
1215	    mb();
1216	    src += 4;
1217	    cnt -= 4;
1218    }
1219    if (cnt > 0) {
1220	    /* pad it to 64 bytes.  The src is 64 bytes bigger than it
1221	       needs to be so that we don't overrun it */
1222	    mxge_pio_copy(tx->wc_fifo + (cnt<<18), src, 64);
1223	    mb();
1224    }
1225}
1226
1227static void
1228mxge_encap(mxge_softc_t *sc, struct mbuf *m)
1229{
1230	mcp_kreq_ether_send_t *req;
1231	bus_dma_segment_t seg_list[MXGE_MAX_SEND_DESC];
1232	bus_dma_segment_t *seg;
1233	struct mbuf *m_tmp;
1234	struct ifnet *ifp;
1235	mxge_tx_buf_t *tx;
1236	struct ether_header *eh;
1237	struct ip *ip;
1238	int cnt, cum_len, err, i, idx;
1239	uint16_t flags, pseudo_hdr_offset;
1240        uint8_t cksum_offset;
1241
1242
1243
1244	ifp = sc->ifp;
1245	tx = &sc->tx;
1246
1247	/* (try to) map the frame for DMA */
1248	idx = tx->req & tx->mask;
1249	err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
1250				      m, seg_list, &cnt,
1251				      BUS_DMA_NOWAIT);
1252	if (err == EFBIG) {
1253		/* Too many segments in the chain.  Try
1254		   to defrag */
1255		m_tmp = m_defrag(m, M_NOWAIT);
1256		if (m_tmp == NULL) {
1257			goto drop;
1258		}
1259		m = m_tmp;
1260		err = bus_dmamap_load_mbuf_sg(tx->dmat,
1261					      tx->info[idx].map,
1262					      m, seg_list, &cnt,
1263					      BUS_DMA_NOWAIT);
1264	}
1265	if (err != 0) {
1266		device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d\n",
1267			      err);
1268		goto drop;
1269	}
1270	bus_dmamap_sync(tx->dmat, tx->info[idx].map,
1271			BUS_DMASYNC_PREWRITE);
1272	tx->info[idx].m = m;
1273
1274	req = tx->req_list;
1275	cksum_offset = 0;
1276	pseudo_hdr_offset = 0;
1277	flags = MXGEFW_FLAGS_NO_TSO;
1278
1279	/* checksum offloading? */
1280	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) {
1281		eh = mtod(m, struct ether_header *);
1282		ip = (struct ip *) (eh + 1);
1283		cksum_offset = sizeof(*eh) + (ip->ip_hl << 2);
1284		pseudo_hdr_offset = cksum_offset +  m->m_pkthdr.csum_data;
1285		pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
1286		req->cksum_offset = cksum_offset;
1287		flags |= MXGEFW_FLAGS_CKSUM;
1288	}
1289	if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
1290		flags |= MXGEFW_FLAGS_SMALL;
1291
1292	/* convert segments into a request list */
1293	cum_len = 0;
1294	seg = seg_list;
1295	req->flags = MXGEFW_FLAGS_FIRST;
1296	for (i = 0; i < cnt; i++) {
1297		req->addr_low =
1298			htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
1299		req->addr_high =
1300			htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1301		req->length = htobe16(seg->ds_len);
1302		req->cksum_offset = cksum_offset;
1303		if (cksum_offset > seg->ds_len)
1304			cksum_offset -= seg->ds_len;
1305		else
1306			cksum_offset = 0;
1307		req->pseudo_hdr_offset = pseudo_hdr_offset;
1308		req->pad = 0; /* complete solid 16-byte block */
1309		req->rdma_count = 1;
1310		req->flags |= flags | ((cum_len & 1) * MXGEFW_FLAGS_ALIGN_ODD);
1311		cum_len += seg->ds_len;
1312		seg++;
1313		req++;
1314		req->flags = 0;
1315	}
1316	req--;
1317	/* pad runts to 60 bytes */
1318	if (cum_len < 60) {
1319		req++;
1320		req->addr_low =
1321			htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
1322		req->addr_high =
1323			htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
1324		req->length = htobe16(60 - cum_len);
1325		req->cksum_offset = 0;
1326		req->pseudo_hdr_offset = pseudo_hdr_offset;
1327		req->pad = 0; /* complete solid 16-byte block */
1328		req->rdma_count = 1;
1329		req->flags |= flags | ((cum_len & 1) * MXGEFW_FLAGS_ALIGN_ODD);
1330		cnt++;
1331	}
1332
1333	tx->req_list[0].rdma_count = cnt;
1334#if 0
1335	/* print what the firmware will see */
1336	for (i = 0; i < cnt; i++) {
1337		printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
1338		    "cso:%d, flags:0x%x, rdma:%d\n",
1339		    i, (int)ntohl(tx->req_list[i].addr_high),
1340		    (int)ntohl(tx->req_list[i].addr_low),
1341		    (int)ntohs(tx->req_list[i].length),
1342		    (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
1343		    tx->req_list[i].cksum_offset, tx->req_list[i].flags,
1344		    tx->req_list[i].rdma_count);
1345	}
1346	printf("--------------\n");
1347#endif
1348	tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1349	if (tx->wc_fifo == NULL)
1350		mxge_submit_req(tx, tx->req_list, cnt);
1351	else
1352		mxge_submit_req_wc(tx, tx->req_list, cnt);
1353	return;
1354
1355drop:
1356	m_freem(m);
1357	ifp->if_oerrors++;
1358	return;
1359}
1360
1361
1362
1363
1364static inline void
1365mxge_start_locked(mxge_softc_t *sc)
1366{
1367	struct mbuf *m;
1368	struct ifnet *ifp;
1369
1370	ifp = sc->ifp;
1371	while ((sc->tx.mask - (sc->tx.req - sc->tx.done))
1372	       > MXGE_MAX_SEND_DESC) {
1373
1374		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
1375		if (m == NULL) {
1376			return;
1377		}
1378		/* let BPF see it */
1379		BPF_MTAP(ifp, m);
1380
1381		/* give it to the nic */
1382		mxge_encap(sc, m);
1383	}
1384	/* ran out of transmit slots */
1385	sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1386}
1387
1388static void
1389mxge_start(struct ifnet *ifp)
1390{
1391	mxge_softc_t *sc = ifp->if_softc;
1392
1393
1394	mtx_lock(&sc->tx_lock);
1395	mxge_start_locked(sc);
1396	mtx_unlock(&sc->tx_lock);
1397}
1398
1399/*
1400 * copy an array of mcp_kreq_ether_recv_t's to the mcp.  Copy
1401 * at most 32 bytes at a time, so as to avoid involving the software
1402 * pio handler in the nic.   We re-write the first segment's low
1403 * DMA address to mark it valid only after we write the entire chunk
1404 * in a burst
1405 */
1406static inline void
1407mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
1408		mcp_kreq_ether_recv_t *src)
1409{
1410	uint32_t low;
1411
1412	low = src->addr_low;
1413	src->addr_low = 0xffffffff;
1414	mxge_pio_copy(dst, src, 8 * sizeof (*src));
1415	mb();
1416	dst->addr_low = low;
1417	mb();
1418}
1419
1420static int
1421mxge_get_buf_small(mxge_softc_t *sc, bus_dmamap_t map, int idx)
1422{
1423	bus_dma_segment_t seg;
1424	struct mbuf *m;
1425	mxge_rx_buf_t *rx = &sc->rx_small;
1426	int cnt, err;
1427
1428	m = m_gethdr(M_DONTWAIT, MT_DATA);
1429	if (m == NULL) {
1430		rx->alloc_fail++;
1431		err = ENOBUFS;
1432		goto done;
1433	}
1434	m->m_len = MHLEN;
1435	err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
1436				      &seg, &cnt, BUS_DMA_NOWAIT);
1437	if (err != 0) {
1438		m_free(m);
1439		goto done;
1440	}
1441	rx->info[idx].m = m;
1442	rx->shadow[idx].addr_low =
1443		htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
1444	rx->shadow[idx].addr_high =
1445		htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
1446
1447done:
1448	if ((idx & 7) == 7) {
1449		if (rx->wc_fifo == NULL)
1450			mxge_submit_8rx(&rx->lanai[idx - 7],
1451					&rx->shadow[idx - 7]);
1452		else {
1453			mb();
1454			mxge_pio_copy(rx->wc_fifo, &rx->shadow[idx - 7], 64);
1455		}
1456        }
1457	return err;
1458}
1459
1460static int
1461mxge_get_buf_big(mxge_softc_t *sc, bus_dmamap_t map, int idx)
1462{
1463	bus_dma_segment_t seg;
1464	struct mbuf *m;
1465	mxge_rx_buf_t *rx = &sc->rx_big;
1466	int cnt, err;
1467
1468	m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, sc->big_bytes);
1469	if (m == NULL) {
1470		rx->alloc_fail++;
1471		err = ENOBUFS;
1472		goto done;
1473	}
1474	m->m_len = sc->big_bytes;
1475	err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
1476				      &seg, &cnt, BUS_DMA_NOWAIT);
1477	if (err != 0) {
1478		m_free(m);
1479		goto done;
1480	}
1481	rx->info[idx].m = m;
1482	rx->shadow[idx].addr_low =
1483		htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
1484	rx->shadow[idx].addr_high =
1485		htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
1486
1487done:
1488	if ((idx & 7) == 7) {
1489		if (rx->wc_fifo == NULL)
1490			mxge_submit_8rx(&rx->lanai[idx - 7],
1491					&rx->shadow[idx - 7]);
1492		else {
1493			mb();
1494			mxge_pio_copy(rx->wc_fifo, &rx->shadow[idx - 7], 64);
1495		}
1496        }
1497	return err;
1498}
1499
1500static inline void
1501mxge_rx_csum(struct mbuf *m, int csum)
1502{
1503	struct ether_header *eh;
1504	struct ip *ip;
1505
1506	eh = mtod(m, struct ether_header *);
1507	if (__predict_true(eh->ether_type ==  htons(ETHERTYPE_IP))) {
1508		ip = (struct ip *)(eh + 1);
1509		if (__predict_true(ip->ip_p == IPPROTO_TCP ||
1510				   ip->ip_p == IPPROTO_UDP)) {
1511			m->m_pkthdr.csum_data = csum;
1512			m->m_pkthdr.csum_flags = CSUM_DATA_VALID;
1513		}
1514	}
1515}
1516
1517static inline void
1518mxge_rx_done_big(mxge_softc_t *sc, int len, int csum)
1519{
1520	struct ifnet *ifp;
1521	struct mbuf *m = 0; 		/* -Wunitialized */
1522	struct mbuf *m_prev = 0;	/* -Wunitialized */
1523	struct mbuf *m_head = 0;
1524	bus_dmamap_t old_map;
1525	mxge_rx_buf_t *rx;
1526	int idx;
1527
1528
1529	rx = &sc->rx_big;
1530	ifp = sc->ifp;
1531	while (len > 0) {
1532		idx = rx->cnt & rx->mask;
1533                rx->cnt++;
1534		/* save a pointer to the received mbuf */
1535		m = rx->info[idx].m;
1536		/* try to replace the received mbuf */
1537		if (mxge_get_buf_big(sc, rx->extra_map, idx)) {
1538			goto drop;
1539		}
1540		/* unmap the received buffer */
1541		old_map = rx->info[idx].map;
1542		bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
1543		bus_dmamap_unload(rx->dmat, old_map);
1544
1545		/* swap the bus_dmamap_t's */
1546		rx->info[idx].map = rx->extra_map;
1547		rx->extra_map = old_map;
1548
1549		/* chain multiple segments together */
1550		if (!m_head) {
1551			m_head = m;
1552			/* mcp implicitly skips 1st bytes so that
1553			 * packet is properly aligned */
1554			m->m_data += MXGEFW_PAD;
1555			m->m_pkthdr.len = len;
1556			m->m_len = sc->big_bytes - MXGEFW_PAD;
1557		} else {
1558			m->m_len = sc->big_bytes;
1559			m->m_flags &= ~M_PKTHDR;
1560			m_prev->m_next = m;
1561		}
1562		len -= m->m_len;
1563		m_prev = m;
1564	}
1565
1566	/* trim trailing garbage from the last mbuf in the chain.  If
1567	 * there is any garbage, len will be negative */
1568	m->m_len += len;
1569
1570	/* if the checksum is valid, mark it in the mbuf header */
1571	if (sc->csum_flag)
1572		mxge_rx_csum(m_head, csum);
1573
1574	/* pass the frame up the stack */
1575	m_head->m_pkthdr.rcvif = ifp;
1576	ifp->if_ipackets++;
1577	(*ifp->if_input)(ifp, m_head);
1578	return;
1579
1580drop:
1581	/* drop the frame -- the old mbuf(s) are re-cycled by running
1582	   every slot through the allocator */
1583        if (m_head) {
1584                len -= sc->big_bytes;
1585                m_freem(m_head);
1586        } else {
1587                len -= (sc->big_bytes + MXGEFW_PAD);
1588        }
1589        while ((int)len > 0) {
1590                idx = rx->cnt & rx->mask;
1591                rx->cnt++;
1592                m = rx->info[idx].m;
1593                if (0 == (mxge_get_buf_big(sc, rx->extra_map, idx))) {
1594			m_freem(m);
1595			/* unmap the received buffer */
1596			old_map = rx->info[idx].map;
1597			bus_dmamap_sync(rx->dmat, old_map,
1598					BUS_DMASYNC_POSTREAD);
1599			bus_dmamap_unload(rx->dmat, old_map);
1600
1601			/* swap the bus_dmamap_t's */
1602			rx->info[idx].map = rx->extra_map;
1603			rx->extra_map = old_map;
1604		}
1605                len -= sc->big_bytes;
1606        }
1607
1608	ifp->if_ierrors++;
1609
1610}
1611
1612static inline void
1613mxge_rx_done_small(mxge_softc_t *sc, uint32_t len, uint32_t csum)
1614{
1615	struct ifnet *ifp;
1616	struct mbuf *m;
1617	mxge_rx_buf_t *rx;
1618	bus_dmamap_t old_map;
1619	int idx;
1620
1621	ifp = sc->ifp;
1622	rx = &sc->rx_small;
1623	idx = rx->cnt & rx->mask;
1624	rx->cnt++;
1625	/* save a pointer to the received mbuf */
1626	m = rx->info[idx].m;
1627	/* try to replace the received mbuf */
1628	if (mxge_get_buf_small(sc, rx->extra_map, idx)) {
1629		/* drop the frame -- the old mbuf is re-cycled */
1630		ifp->if_ierrors++;
1631		return;
1632	}
1633
1634	/* unmap the received buffer */
1635	old_map = rx->info[idx].map;
1636	bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
1637	bus_dmamap_unload(rx->dmat, old_map);
1638
1639	/* swap the bus_dmamap_t's */
1640	rx->info[idx].map = rx->extra_map;
1641	rx->extra_map = old_map;
1642
1643	/* mcp implicitly skips 1st 2 bytes so that packet is properly
1644	 * aligned */
1645	m->m_data += MXGEFW_PAD;
1646
1647	/* if the checksum is valid, mark it in the mbuf header */
1648	if (sc->csum_flag)
1649		mxge_rx_csum(m, csum);
1650
1651	/* pass the frame up the stack */
1652	m->m_pkthdr.rcvif = ifp;
1653	m->m_len = m->m_pkthdr.len = len;
1654	ifp->if_ipackets++;
1655	(*ifp->if_input)(ifp, m);
1656}
1657
1658static inline void
1659mxge_clean_rx_done(mxge_softc_t *sc)
1660{
1661	mxge_rx_done_t *rx_done = &sc->rx_done;
1662	int limit = 0;
1663	uint16_t length;
1664	uint16_t checksum;
1665
1666
1667	while (rx_done->entry[rx_done->idx].length != 0) {
1668		length = ntohs(rx_done->entry[rx_done->idx].length);
1669		rx_done->entry[rx_done->idx].length = 0;
1670		checksum = ntohs(rx_done->entry[rx_done->idx].checksum);
1671		if (length <= MHLEN)
1672			mxge_rx_done_small(sc, length, checksum);
1673		else
1674			mxge_rx_done_big(sc, length, checksum);
1675		rx_done->cnt++;
1676		rx_done->idx = rx_done->cnt & (mxge_max_intr_slots - 1);
1677
1678		/* limit potential for livelock */
1679		if (__predict_false(++limit > 2 * mxge_max_intr_slots))
1680			break;
1681
1682	}
1683}
1684
1685
1686static inline void
1687mxge_tx_done(mxge_softc_t *sc, uint32_t mcp_idx)
1688{
1689	struct ifnet *ifp;
1690	mxge_tx_buf_t *tx;
1691	struct mbuf *m;
1692	bus_dmamap_t map;
1693	int idx, limit;
1694
1695	limit = 0;
1696	tx = &sc->tx;
1697	ifp = sc->ifp;
1698	while (tx->pkt_done != mcp_idx) {
1699		idx = tx->done & tx->mask;
1700		tx->done++;
1701		m = tx->info[idx].m;
1702		/* mbuf and DMA map only attached to the first
1703		   segment per-mbuf */
1704		if (m != NULL) {
1705			ifp->if_opackets++;
1706			tx->info[idx].m = NULL;
1707			map = tx->info[idx].map;
1708			bus_dmamap_unload(tx->dmat, map);
1709			m_freem(m);
1710		}
1711		if (tx->info[idx].flag) {
1712			tx->info[idx].flag = 0;
1713			tx->pkt_done++;
1714		}
1715		/* limit potential for livelock by only handling
1716		   2 full tx rings per call */
1717		if (__predict_false(++limit >  2 * tx->mask))
1718			break;
1719	}
1720
1721	/* If we have space, clear IFF_OACTIVE to tell the stack that
1722           its OK to send packets */
1723
1724	if (ifp->if_drv_flags & IFF_DRV_OACTIVE &&
1725	    tx->req - tx->done < (tx->mask + 1)/4) {
1726		mtx_lock(&sc->tx_lock);
1727		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1728		mxge_start_locked(sc);
1729		mtx_unlock(&sc->tx_lock);
1730	}
1731}
1732
1733static void
1734mxge_intr(void *arg)
1735{
1736	mxge_softc_t *sc = arg;
1737	mcp_irq_data_t *stats = sc->fw_stats;
1738	mxge_tx_buf_t *tx = &sc->tx;
1739	mxge_rx_done_t *rx_done = &sc->rx_done;
1740	uint32_t send_done_count;
1741	uint8_t valid;
1742
1743
1744	/* make sure the DMA has finished */
1745	if (!stats->valid) {
1746		return;
1747	}
1748	valid = stats->valid;
1749
1750	/* lower legacy IRQ  */
1751	*sc->irq_deassert = 0;
1752	mb();
1753	if (!mxge_deassert_wait)
1754		/* don't wait for conf. that irq is low */
1755		stats->valid = 0;
1756	do {
1757		/* check for transmit completes and receives */
1758		send_done_count = be32toh(stats->send_done_count);
1759		while ((send_done_count != tx->pkt_done) ||
1760		       (rx_done->entry[rx_done->idx].length != 0)) {
1761			mxge_tx_done(sc, (int)send_done_count);
1762			mxge_clean_rx_done(sc);
1763			send_done_count = be32toh(stats->send_done_count);
1764		}
1765	} while (*((volatile uint8_t *) &stats->valid));
1766
1767	if (__predict_false(stats->stats_updated)) {
1768		if (sc->link_state != stats->link_up) {
1769			sc->link_state = stats->link_up;
1770			if (sc->link_state) {
1771				if_link_state_change(sc->ifp, LINK_STATE_UP);
1772				if (mxge_verbose)
1773					device_printf(sc->dev, "link up\n");
1774			} else {
1775				if_link_state_change(sc->ifp, LINK_STATE_DOWN);
1776				if (mxge_verbose)
1777					device_printf(sc->dev, "link down\n");
1778			}
1779		}
1780		if (sc->rdma_tags_available !=
1781		    be32toh(sc->fw_stats->rdma_tags_available)) {
1782			sc->rdma_tags_available =
1783				be32toh(sc->fw_stats->rdma_tags_available);
1784			device_printf(sc->dev, "RDMA timed out! %d tags "
1785				      "left\n", sc->rdma_tags_available);
1786		}
1787		sc->down_cnt += stats->link_down;
1788	}
1789
1790	/* check to see if we have rx token to pass back */
1791	if (valid & 0x1)
1792	    *sc->irq_claim = be32toh(3);
1793	*(sc->irq_claim + 1) = be32toh(3);
1794}
1795
1796static void
1797mxge_watchdog(struct ifnet *ifp)
1798{
1799	printf("%s called\n", __FUNCTION__);
1800}
1801
1802static void
1803mxge_init(void *arg)
1804{
1805}
1806
1807
1808
1809static void
1810mxge_free_mbufs(mxge_softc_t *sc)
1811{
1812	int i;
1813
1814	for (i = 0; i <= sc->rx_big.mask; i++) {
1815		if (sc->rx_big.info[i].m == NULL)
1816			continue;
1817		bus_dmamap_unload(sc->rx_big.dmat,
1818				  sc->rx_big.info[i].map);
1819		m_freem(sc->rx_big.info[i].m);
1820		sc->rx_big.info[i].m = NULL;
1821	}
1822
1823	for (i = 0; i <= sc->rx_big.mask; i++) {
1824		if (sc->rx_big.info[i].m == NULL)
1825			continue;
1826		bus_dmamap_unload(sc->rx_big.dmat,
1827				  sc->rx_big.info[i].map);
1828		m_freem(sc->rx_big.info[i].m);
1829		sc->rx_big.info[i].m = NULL;
1830	}
1831
1832	for (i = 0; i <= sc->tx.mask; i++) {
1833		if (sc->tx.info[i].m == NULL)
1834			continue;
1835		bus_dmamap_unload(sc->tx.dmat,
1836				  sc->tx.info[i].map);
1837		m_freem(sc->tx.info[i].m);
1838		sc->tx.info[i].m = NULL;
1839	}
1840}
1841
1842static void
1843mxge_free_rings(mxge_softc_t *sc)
1844{
1845	int i;
1846
1847	if (sc->tx.req_bytes != NULL) {
1848		free(sc->tx.req_bytes, M_DEVBUF);
1849	}
1850	if (sc->rx_small.shadow != NULL)
1851		free(sc->rx_small.shadow, M_DEVBUF);
1852	if (sc->rx_big.shadow != NULL)
1853		free(sc->rx_big.shadow, M_DEVBUF);
1854	if (sc->tx.info != NULL) {
1855		for (i = 0; i <= sc->tx.mask; i++) {
1856			if (sc->tx.info[i].map != NULL)
1857				bus_dmamap_destroy(sc->tx.dmat,
1858						   sc->tx.info[i].map);
1859		}
1860		free(sc->tx.info, M_DEVBUF);
1861	}
1862	if (sc->rx_small.info != NULL) {
1863		for (i = 0; i <= sc->rx_small.mask; i++) {
1864			if (sc->rx_small.info[i].map != NULL)
1865				bus_dmamap_destroy(sc->rx_small.dmat,
1866						   sc->rx_small.info[i].map);
1867		}
1868		free(sc->rx_small.info, M_DEVBUF);
1869	}
1870	if (sc->rx_big.info != NULL) {
1871		for (i = 0; i <= sc->rx_big.mask; i++) {
1872			if (sc->rx_big.info[i].map != NULL)
1873				bus_dmamap_destroy(sc->rx_big.dmat,
1874						   sc->rx_big.info[i].map);
1875		}
1876		free(sc->rx_big.info, M_DEVBUF);
1877	}
1878	if (sc->rx_big.extra_map != NULL)
1879		bus_dmamap_destroy(sc->rx_big.dmat,
1880				   sc->rx_big.extra_map);
1881	if (sc->rx_small.extra_map != NULL)
1882		bus_dmamap_destroy(sc->rx_small.dmat,
1883				   sc->rx_small.extra_map);
1884	if (sc->tx.dmat != NULL)
1885		bus_dma_tag_destroy(sc->tx.dmat);
1886	if (sc->rx_small.dmat != NULL)
1887		bus_dma_tag_destroy(sc->rx_small.dmat);
1888	if (sc->rx_big.dmat != NULL)
1889		bus_dma_tag_destroy(sc->rx_big.dmat);
1890}
1891
1892static int
1893mxge_alloc_rings(mxge_softc_t *sc)
1894{
1895	mxge_cmd_t cmd;
1896	int tx_ring_size, rx_ring_size;
1897	int tx_ring_entries, rx_ring_entries;
1898	int i, err;
1899	unsigned long bytes;
1900
1901	/* get ring sizes */
1902	err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
1903	tx_ring_size = cmd.data0;
1904	err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
1905	if (err != 0) {
1906		device_printf(sc->dev, "Cannot determine ring sizes\n");
1907		goto abort_with_nothing;
1908	}
1909
1910	rx_ring_size = cmd.data0;
1911
1912	tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
1913	rx_ring_entries = rx_ring_size / sizeof (mcp_dma_addr_t);
1914	sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
1915	IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
1916	IFQ_SET_READY(&sc->ifp->if_snd);
1917
1918	sc->tx.mask = tx_ring_entries - 1;
1919	sc->rx_small.mask = sc->rx_big.mask = rx_ring_entries - 1;
1920
1921	err = ENOMEM;
1922
1923	/* allocate the tx request copy block */
1924	bytes = 8 +
1925		sizeof (*sc->tx.req_list) * (MXGE_MAX_SEND_DESC + 4);
1926	sc->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
1927	if (sc->tx.req_bytes == NULL)
1928		goto abort_with_nothing;
1929	/* ensure req_list entries are aligned to 8 bytes */
1930	sc->tx.req_list = (mcp_kreq_ether_send_t *)
1931		((unsigned long)(sc->tx.req_bytes + 7) & ~7UL);
1932
1933	/* allocate the rx shadow rings */
1934	bytes = rx_ring_entries * sizeof (*sc->rx_small.shadow);
1935	sc->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
1936	if (sc->rx_small.shadow == NULL)
1937		goto abort_with_alloc;
1938
1939	bytes = rx_ring_entries * sizeof (*sc->rx_big.shadow);
1940	sc->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
1941	if (sc->rx_big.shadow == NULL)
1942		goto abort_with_alloc;
1943
1944	/* allocate the host info rings */
1945	bytes = tx_ring_entries * sizeof (*sc->tx.info);
1946	sc->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
1947	if (sc->tx.info == NULL)
1948		goto abort_with_alloc;
1949
1950	bytes = rx_ring_entries * sizeof (*sc->rx_small.info);
1951	sc->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
1952	if (sc->rx_small.info == NULL)
1953		goto abort_with_alloc;
1954
1955	bytes = rx_ring_entries * sizeof (*sc->rx_big.info);
1956	sc->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
1957	if (sc->rx_big.info == NULL)
1958		goto abort_with_alloc;
1959
1960	/* allocate the busdma resources */
1961	err = bus_dma_tag_create(sc->parent_dmat,	/* parent */
1962				 1,			/* alignment */
1963				 sc->tx.boundary,	/* boundary */
1964				 BUS_SPACE_MAXADDR,	/* low */
1965				 BUS_SPACE_MAXADDR,	/* high */
1966				 NULL, NULL,		/* filter */
1967				 MXGE_MAX_ETHER_MTU,	/* maxsize */
1968				 MXGE_MAX_SEND_DESC,	/* num segs */
1969				 sc->tx.boundary,	/* maxsegsize */
1970				 BUS_DMA_ALLOCNOW,	/* flags */
1971				 NULL, NULL,		/* lock */
1972				 &sc->tx.dmat);		/* tag */
1973
1974	if (err != 0) {
1975		device_printf(sc->dev, "Err %d allocating tx dmat\n",
1976			      err);
1977		goto abort_with_alloc;
1978	}
1979
1980	err = bus_dma_tag_create(sc->parent_dmat,	/* parent */
1981				 1,			/* alignment */
1982				 4096,			/* boundary */
1983				 BUS_SPACE_MAXADDR,	/* low */
1984				 BUS_SPACE_MAXADDR,	/* high */
1985				 NULL, NULL,		/* filter */
1986				 MHLEN,			/* maxsize */
1987				 1,			/* num segs */
1988				 MHLEN,			/* maxsegsize */
1989				 BUS_DMA_ALLOCNOW,	/* flags */
1990				 NULL, NULL,		/* lock */
1991				 &sc->rx_small.dmat);	/* tag */
1992	if (err != 0) {
1993		device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
1994			      err);
1995		goto abort_with_alloc;
1996	}
1997
1998	err = bus_dma_tag_create(sc->parent_dmat,	/* parent */
1999				 1,			/* alignment */
2000				 4096,			/* boundary */
2001				 BUS_SPACE_MAXADDR,	/* low */
2002				 BUS_SPACE_MAXADDR,	/* high */
2003				 NULL, NULL,		/* filter */
2004				 4096,			/* maxsize */
2005				 1,			/* num segs */
2006				 4096,			/* maxsegsize */
2007				 BUS_DMA_ALLOCNOW,	/* flags */
2008				 NULL, NULL,		/* lock */
2009				 &sc->rx_big.dmat);	/* tag */
2010	if (err != 0) {
2011		device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
2012			      err);
2013		goto abort_with_alloc;
2014	}
2015
2016	/* now use these tags to setup dmamaps for each slot
2017	   in each ring */
2018	for (i = 0; i <= sc->tx.mask; i++) {
2019		err = bus_dmamap_create(sc->tx.dmat, 0,
2020					&sc->tx.info[i].map);
2021		if (err != 0) {
2022			device_printf(sc->dev, "Err %d  tx dmamap\n",
2023			      err);
2024			goto abort_with_alloc;
2025		}
2026	}
2027	for (i = 0; i <= sc->rx_small.mask; i++) {
2028		err = bus_dmamap_create(sc->rx_small.dmat, 0,
2029					&sc->rx_small.info[i].map);
2030		if (err != 0) {
2031			device_printf(sc->dev, "Err %d  rx_small dmamap\n",
2032				      err);
2033			goto abort_with_alloc;
2034		}
2035	}
2036	err = bus_dmamap_create(sc->rx_small.dmat, 0,
2037				&sc->rx_small.extra_map);
2038	if (err != 0) {
2039		device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
2040			      err);
2041			goto abort_with_alloc;
2042	}
2043
2044	for (i = 0; i <= sc->rx_big.mask; i++) {
2045		err = bus_dmamap_create(sc->rx_big.dmat, 0,
2046					&sc->rx_big.info[i].map);
2047		if (err != 0) {
2048			device_printf(sc->dev, "Err %d  rx_big dmamap\n",
2049			      err);
2050			goto abort_with_alloc;
2051		}
2052	}
2053	err = bus_dmamap_create(sc->rx_big.dmat, 0,
2054				&sc->rx_big.extra_map);
2055	if (err != 0) {
2056		device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
2057			      err);
2058			goto abort_with_alloc;
2059	}
2060	return 0;
2061
2062abort_with_alloc:
2063	mxge_free_rings(sc);
2064
2065abort_with_nothing:
2066	return err;
2067}
2068
2069static int
2070mxge_open(mxge_softc_t *sc)
2071{
2072	mxge_cmd_t cmd;
2073	int i, err;
2074	bus_dmamap_t map;
2075
2076
2077	err = mxge_reset(sc);
2078	if (err != 0) {
2079		device_printf(sc->dev, "failed to reset\n");
2080		return EIO;
2081	}
2082
2083	if (MCLBYTES >=
2084	    sc->ifp->if_mtu + ETHER_HDR_LEN + MXGEFW_PAD)
2085		sc->big_bytes = MCLBYTES;
2086	else
2087		sc->big_bytes = MJUMPAGESIZE;
2088
2089	err = mxge_alloc_rings(sc);
2090	if (err != 0) {
2091		device_printf(sc->dev, "failed to allocate rings\n");
2092		return err;
2093	}
2094
2095	err = bus_setup_intr(sc->dev, sc->irq_res,
2096			     INTR_TYPE_NET | INTR_MPSAFE,
2097			     mxge_intr, sc, &sc->ih);
2098	if (err != 0) {
2099		goto abort_with_rings;
2100	}
2101
2102	/* get the lanai pointers to the send and receive rings */
2103
2104	err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
2105	sc->tx.lanai =
2106		(volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
2107	err |= mxge_send_cmd(sc,
2108				 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
2109	sc->rx_small.lanai =
2110		(volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
2111	err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
2112	sc->rx_big.lanai =
2113		(volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
2114
2115	if (err != 0) {
2116		device_printf(sc->dev,
2117			      "failed to get ring sizes or locations\n");
2118		err = EIO;
2119		goto abort_with_irq;
2120	}
2121
2122	if (sc->wc) {
2123		sc->tx.wc_fifo = sc->sram + 0x200000;
2124		sc->rx_small.wc_fifo = sc->sram + 0x300000;
2125		sc->rx_big.wc_fifo = sc->sram + 0x340000;
2126	} else {
2127		sc->tx.wc_fifo = 0;
2128		sc->rx_small.wc_fifo = 0;
2129		sc->rx_big.wc_fifo = 0;
2130	}
2131
2132
2133	/* stock receive rings */
2134	for (i = 0; i <= sc->rx_small.mask; i++) {
2135		map = sc->rx_small.info[i].map;
2136		err = mxge_get_buf_small(sc, map, i);
2137		if (err) {
2138			device_printf(sc->dev, "alloced %d/%d smalls\n",
2139				      i, sc->rx_small.mask + 1);
2140			goto abort;
2141		}
2142	}
2143	for (i = 0; i <= sc->rx_big.mask; i++) {
2144		map = sc->rx_big.info[i].map;
2145		err = mxge_get_buf_big(sc, map, i);
2146		if (err) {
2147			device_printf(sc->dev, "alloced %d/%d bigs\n",
2148				      i, sc->rx_big.mask + 1);
2149			goto abort;
2150		}
2151	}
2152
2153	/* Give the firmware the mtu and the big and small buffer
2154	   sizes.  The firmware wants the big buf size to be a power
2155	   of two. Luckily, FreeBSD's clusters are powers of two */
2156	cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN;
2157	err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
2158	cmd.data0 = MHLEN;
2159	err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
2160			     &cmd);
2161	cmd.data0 = sc->big_bytes;
2162	err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
2163	/* Now give him the pointer to the stats block */
2164	cmd.data0 = MXGE_LOWPART_TO_U32(sc->fw_stats_dma.bus_addr);
2165	cmd.data1 = MXGE_HIGHPART_TO_U32(sc->fw_stats_dma.bus_addr);
2166	err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA, &cmd);
2167
2168	if (err != 0) {
2169		device_printf(sc->dev, "failed to setup params\n");
2170		goto abort;
2171	}
2172
2173	/* Finally, start the firmware running */
2174	err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
2175	if (err) {
2176		device_printf(sc->dev, "Couldn't bring up link\n");
2177		goto abort;
2178	}
2179	sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
2180	sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2181
2182	return 0;
2183
2184
2185abort:
2186	mxge_free_mbufs(sc);
2187abort_with_irq:
2188	bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
2189abort_with_rings:
2190	mxge_free_rings(sc);
2191	return err;
2192}
2193
2194static int
2195mxge_close(mxge_softc_t *sc)
2196{
2197	mxge_cmd_t cmd;
2198	int err, old_down_cnt;
2199
2200	sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2201	old_down_cnt = sc->down_cnt;
2202	mb();
2203	err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
2204	if (err) {
2205		device_printf(sc->dev, "Couldn't bring down link\n");
2206	}
2207	if (old_down_cnt == sc->down_cnt) {
2208		/* wait for down irq */
2209		(void)tsleep(&sc->down_cnt, PWAIT, "down mxge", hz);
2210	}
2211	if (old_down_cnt == sc->down_cnt) {
2212		device_printf(sc->dev, "never got down irq\n");
2213	}
2214	if (sc->ih != NULL)
2215		bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
2216	mxge_free_mbufs(sc);
2217	mxge_free_rings(sc);
2218	return 0;
2219}
2220
2221
2222static int
2223mxge_media_change(struct ifnet *ifp)
2224{
2225	return EINVAL;
2226}
2227
2228static int
2229mxge_change_mtu(mxge_softc_t *sc, int mtu)
2230{
2231	struct ifnet *ifp = sc->ifp;
2232	int real_mtu, old_mtu;
2233	int err = 0;
2234
2235
2236	real_mtu = mtu + ETHER_HDR_LEN;
2237	if ((real_mtu > MXGE_MAX_ETHER_MTU) ||
2238	    real_mtu < 60)
2239		return EINVAL;
2240	sx_xlock(&sc->driver_lock);
2241	old_mtu = ifp->if_mtu;
2242	ifp->if_mtu = mtu;
2243	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2244		mxge_close(sc);
2245		err = mxge_open(sc);
2246		if (err != 0) {
2247			ifp->if_mtu = old_mtu;
2248			mxge_close(sc);
2249			(void) mxge_open(sc);
2250		}
2251	}
2252	sx_xunlock(&sc->driver_lock);
2253	return err;
2254}
2255
2256static void
2257mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2258{
2259	mxge_softc_t *sc = ifp->if_softc;
2260
2261
2262	if (sc == NULL)
2263		return;
2264	ifmr->ifm_status = IFM_AVALID;
2265	ifmr->ifm_status |= sc->fw_stats->link_up ? IFM_ACTIVE : 0;
2266	ifmr->ifm_active = IFM_AUTO | IFM_ETHER;
2267	ifmr->ifm_active |= sc->fw_stats->link_up ? IFM_FDX : 0;
2268}
2269
2270static int
2271mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2272{
2273	mxge_softc_t *sc = ifp->if_softc;
2274	struct ifreq *ifr = (struct ifreq *)data;
2275	int err, mask;
2276
2277	err = 0;
2278	switch (command) {
2279	case SIOCSIFADDR:
2280	case SIOCGIFADDR:
2281		err = ether_ioctl(ifp, command, data);
2282		break;
2283
2284	case SIOCSIFMTU:
2285		err = mxge_change_mtu(sc, ifr->ifr_mtu);
2286		break;
2287
2288	case SIOCSIFFLAGS:
2289		sx_xlock(&sc->driver_lock);
2290		if (ifp->if_flags & IFF_UP) {
2291			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
2292				err = mxge_open(sc);
2293		} else {
2294			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2295				mxge_close(sc);
2296		}
2297		sx_xunlock(&sc->driver_lock);
2298		break;
2299
2300	case SIOCADDMULTI:
2301	case SIOCDELMULTI:
2302		err = 0;
2303		break;
2304
2305	case SIOCSIFCAP:
2306		sx_xlock(&sc->driver_lock);
2307		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2308		if (mask & IFCAP_TXCSUM) {
2309			if (IFCAP_TXCSUM & ifp->if_capenable) {
2310				ifp->if_capenable &= ~IFCAP_TXCSUM;
2311				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
2312			} else {
2313				ifp->if_capenable |= IFCAP_TXCSUM;
2314				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
2315			}
2316		} else if (mask & IFCAP_RXCSUM) {
2317			if (IFCAP_RXCSUM & ifp->if_capenable) {
2318				ifp->if_capenable &= ~IFCAP_RXCSUM;
2319				sc->csum_flag = 0;
2320			} else {
2321				ifp->if_capenable |= IFCAP_RXCSUM;
2322				sc->csum_flag = 1;
2323			}
2324		}
2325		sx_xunlock(&sc->driver_lock);
2326		break;
2327
2328	case SIOCGIFMEDIA:
2329		err = ifmedia_ioctl(ifp, (struct ifreq *)data,
2330				    &sc->media, command);
2331                break;
2332
2333	default:
2334		err = ENOTTY;
2335        }
2336	return err;
2337}
2338
2339static void
2340mxge_fetch_tunables(mxge_softc_t *sc)
2341{
2342
2343	TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
2344			  &mxge_flow_control);
2345	TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
2346			  &mxge_intr_coal_delay);
2347	TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
2348			  &mxge_nvidia_ecrc_enable);
2349	TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
2350			  &mxge_deassert_wait);
2351	TUNABLE_INT_FETCH("hw.mxge.verbose",
2352			  &mxge_verbose);
2353
2354	if (bootverbose)
2355		mxge_verbose = 1;
2356	if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
2357		mxge_intr_coal_delay = 30;
2358	sc->pause = mxge_flow_control;
2359}
2360
2361static int
2362mxge_attach(device_t dev)
2363{
2364	mxge_softc_t *sc = device_get_softc(dev);
2365	struct ifnet *ifp;
2366	size_t bytes;
2367	int rid, err;
2368	uint16_t cmd;
2369
2370	sc->dev = dev;
2371	mxge_fetch_tunables(sc);
2372
2373	err = bus_dma_tag_create(NULL,			/* parent */
2374				 1,			/* alignment */
2375				 4096,			/* boundary */
2376				 BUS_SPACE_MAXADDR,	/* low */
2377				 BUS_SPACE_MAXADDR,	/* high */
2378				 NULL, NULL,		/* filter */
2379				 MXGE_MAX_ETHER_MTU,	/* maxsize */
2380				 MXGE_MAX_SEND_DESC, 	/* num segs */
2381				 4096,			/* maxsegsize */
2382				 0,			/* flags */
2383				 NULL, NULL,		/* lock */
2384				 &sc->parent_dmat);	/* tag */
2385
2386	if (err != 0) {
2387		device_printf(sc->dev, "Err %d allocating parent dmat\n",
2388			      err);
2389		goto abort_with_nothing;
2390	}
2391
2392	ifp = sc->ifp = if_alloc(IFT_ETHER);
2393	if (ifp == NULL) {
2394		device_printf(dev, "can not if_alloc()\n");
2395		err = ENOSPC;
2396		goto abort_with_parent_dmat;
2397	}
2398	mtx_init(&sc->cmd_lock, NULL,
2399		 MTX_NETWORK_LOCK, MTX_DEF);
2400	mtx_init(&sc->tx_lock, device_get_nameunit(dev),
2401		 MTX_NETWORK_LOCK, MTX_DEF);
2402	sx_init(&sc->driver_lock, device_get_nameunit(dev));
2403
2404	/* Enable DMA and Memory space access */
2405	pci_enable_busmaster(dev);
2406	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2407	cmd |= PCIM_CMD_MEMEN;
2408	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2409
2410	/* Map the board into the kernel */
2411	rid = PCIR_BARS;
2412	sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0,
2413					 ~0, 1, RF_ACTIVE);
2414	if (sc->mem_res == NULL) {
2415		device_printf(dev, "could not map memory\n");
2416		err = ENXIO;
2417		goto abort_with_lock;
2418	}
2419	sc->sram = rman_get_virtual(sc->mem_res);
2420	sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
2421	if (sc->sram_size > rman_get_size(sc->mem_res)) {
2422		device_printf(dev, "impossible memory region size %ld\n",
2423			      rman_get_size(sc->mem_res));
2424		err = ENXIO;
2425		goto abort_with_mem_res;
2426	}
2427
2428	/* make NULL terminated copy of the EEPROM strings section of
2429	   lanai SRAM */
2430	bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
2431	bus_space_read_region_1(rman_get_bustag(sc->mem_res),
2432				rman_get_bushandle(sc->mem_res),
2433				sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
2434				sc->eeprom_strings,
2435				MXGE_EEPROM_STRINGS_SIZE - 2);
2436	err = mxge_parse_strings(sc);
2437	if (err != 0)
2438		goto abort_with_mem_res;
2439
2440	/* Enable write combining for efficient use of PCIe bus */
2441	mxge_enable_wc(sc);
2442
2443	/* Allocate the out of band dma memory */
2444	err = mxge_dma_alloc(sc, &sc->cmd_dma,
2445			     sizeof (mxge_cmd_t), 64);
2446	if (err != 0)
2447		goto abort_with_mem_res;
2448	sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
2449	err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
2450	if (err != 0)
2451		goto abort_with_cmd_dma;
2452
2453	err = mxge_dma_alloc(sc, &sc->fw_stats_dma,
2454			     sizeof (*sc->fw_stats), 64);
2455	if (err != 0)
2456		goto abort_with_zeropad_dma;
2457	sc->fw_stats = (mcp_irq_data_t *)sc->fw_stats_dma.addr;
2458
2459
2460	/* allocate interrupt queues */
2461	bytes = mxge_max_intr_slots * sizeof (*sc->rx_done.entry);
2462	err = mxge_dma_alloc(sc, &sc->rx_done.dma, bytes, 4096);
2463	if (err != 0)
2464		goto abort_with_fw_stats;
2465	sc->rx_done.entry = sc->rx_done.dma.addr;
2466	bzero(sc->rx_done.entry, bytes);
2467	/* Add our ithread  */
2468	rid = 0;
2469	sc->irq_res = bus_alloc_resource(dev, SYS_RES_IRQ, &rid, 0, ~0,
2470					 1, RF_SHAREABLE | RF_ACTIVE);
2471	if (sc->irq_res == NULL) {
2472		device_printf(dev, "could not alloc interrupt\n");
2473		goto abort_with_rx_done;
2474	}
2475
2476	/* load the firmware */
2477	mxge_select_firmware(sc);
2478
2479	err = mxge_load_firmware(sc);
2480	if (err != 0)
2481		goto abort_with_irq_res;
2482	sc->intr_coal_delay = mxge_intr_coal_delay;
2483	err = mxge_reset(sc);
2484	if (err != 0)
2485		goto abort_with_irq_res;
2486
2487	/* hook into the network stack */
2488	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2489	ifp->if_baudrate = 100000000;
2490	ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM;
2491	ifp->if_hwassist = CSUM_TCP | CSUM_UDP;
2492	ifp->if_capenable = ifp->if_capabilities;
2493	sc->csum_flag = 1;
2494        ifp->if_init = mxge_init;
2495        ifp->if_softc = sc;
2496        ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2497        ifp->if_ioctl = mxge_ioctl;
2498        ifp->if_start = mxge_start;
2499	ifp->if_watchdog = mxge_watchdog;
2500	ether_ifattach(ifp, sc->mac_addr);
2501	/* ether_ifattach sets mtu to 1500 */
2502	ifp->if_mtu = MXGE_MAX_ETHER_MTU - ETHER_HDR_LEN;
2503
2504	/* Initialise the ifmedia structure */
2505	ifmedia_init(&sc->media, 0, mxge_media_change,
2506		     mxge_media_status);
2507	ifmedia_add(&sc->media, IFM_ETHER|IFM_AUTO, 0, NULL);
2508	mxge_add_sysctls(sc);
2509	return 0;
2510
2511abort_with_irq_res:
2512	bus_release_resource(dev, SYS_RES_IRQ, 0, sc->irq_res);
2513abort_with_rx_done:
2514	sc->rx_done.entry = NULL;
2515	mxge_dma_free(&sc->rx_done.dma);
2516abort_with_fw_stats:
2517	mxge_dma_free(&sc->fw_stats_dma);
2518abort_with_zeropad_dma:
2519	mxge_dma_free(&sc->zeropad_dma);
2520abort_with_cmd_dma:
2521	mxge_dma_free(&sc->cmd_dma);
2522abort_with_mem_res:
2523	bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
2524abort_with_lock:
2525	pci_disable_busmaster(dev);
2526	mtx_destroy(&sc->cmd_lock);
2527	mtx_destroy(&sc->tx_lock);
2528	sx_destroy(&sc->driver_lock);
2529	if_free(ifp);
2530abort_with_parent_dmat:
2531	bus_dma_tag_destroy(sc->parent_dmat);
2532
2533abort_with_nothing:
2534	return err;
2535}
2536
2537static int
2538mxge_detach(device_t dev)
2539{
2540	mxge_softc_t *sc = device_get_softc(dev);
2541
2542	sx_xlock(&sc->driver_lock);
2543	if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
2544		mxge_close(sc);
2545	sx_xunlock(&sc->driver_lock);
2546	ether_ifdetach(sc->ifp);
2547	bus_release_resource(dev, SYS_RES_IRQ, 0, sc->irq_res);
2548	sc->rx_done.entry = NULL;
2549	mxge_dma_free(&sc->rx_done.dma);
2550	mxge_dma_free(&sc->fw_stats_dma);
2551	mxge_dma_free(&sc->zeropad_dma);
2552	mxge_dma_free(&sc->cmd_dma);
2553	bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
2554	pci_disable_busmaster(dev);
2555	mtx_destroy(&sc->cmd_lock);
2556	mtx_destroy(&sc->tx_lock);
2557	sx_destroy(&sc->driver_lock);
2558	if_free(sc->ifp);
2559	bus_dma_tag_destroy(sc->parent_dmat);
2560	return 0;
2561}
2562
2563static int
2564mxge_shutdown(device_t dev)
2565{
2566	return 0;
2567}
2568
2569/*
2570  This file uses Myri10GE driver indentation.
2571
2572  Local Variables:
2573  c-file-style:"linux"
2574  tab-width:8
2575  End:
2576*/
2577