cxgb_main.c revision 170789
1/**************************************************************************
2
3Copyright (c) 2007, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
122. Neither the name of the Chelsio Corporation nor the names of its
13    contributors may be used to endorse or promote products derived from
14    this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_main.c 170789 2007-06-15 20:02:02Z kmacy $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/bus.h>
37#include <sys/module.h>
38#include <sys/pciio.h>
39#include <sys/conf.h>
40#include <machine/bus.h>
41#include <machine/resource.h>
42#include <sys/bus_dma.h>
43#include <sys/rman.h>
44#include <sys/ioccom.h>
45#include <sys/mbuf.h>
46#include <sys/linker.h>
47#include <sys/firmware.h>
48#include <sys/socket.h>
49#include <sys/sockio.h>
50#include <sys/smp.h>
51#include <sys/sysctl.h>
52#include <sys/queue.h>
53#include <sys/taskqueue.h>
54
55#include <net/bpf.h>
56#include <net/ethernet.h>
57#include <net/if.h>
58#include <net/if_arp.h>
59#include <net/if_dl.h>
60#include <net/if_media.h>
61#include <net/if_types.h>
62
63#include <netinet/in_systm.h>
64#include <netinet/in.h>
65#include <netinet/if_ether.h>
66#include <netinet/ip.h>
67#include <netinet/ip.h>
68#include <netinet/tcp.h>
69#include <netinet/udp.h>
70
71#include <dev/pci/pcireg.h>
72#include <dev/pci/pcivar.h>
73#include <dev/pci/pci_private.h>
74
75#ifdef CONFIG_DEFINED
76#include <cxgb_include.h>
77#else
78#include <dev/cxgb/cxgb_include.h>
79#endif
80
81#ifdef PRIV_SUPPORTED
82#include <sys/priv.h>
83#endif
84
85static int cxgb_setup_msix(adapter_t *, int);
86static void cxgb_teardown_msix(adapter_t *);
87static void cxgb_init(void *);
88static void cxgb_init_locked(struct port_info *);
89static void cxgb_stop_locked(struct port_info *);
90static void cxgb_set_rxmode(struct port_info *);
91static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92static void cxgb_start(struct ifnet *);
93static void cxgb_start_proc(void *, int ncount);
94static int cxgb_media_change(struct ifnet *);
95static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
96static int setup_sge_qsets(adapter_t *);
97static void cxgb_async_intr(void *);
98static void cxgb_ext_intr_handler(void *, int);
99static void cxgb_down(struct adapter *sc);
100static void cxgb_tick(void *);
101static void setup_rss(adapter_t *sc);
102
103/* Attachment glue for the PCI controller end of the device.  Each port of
104 * the device is attached separately, as defined later.
105 */
106static int cxgb_controller_probe(device_t);
107static int cxgb_controller_attach(device_t);
108static int cxgb_controller_detach(device_t);
109static void cxgb_free(struct adapter *);
110static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
111    unsigned int end);
112static void cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf);
113static int cxgb_get_regs_len(void);
114static int offload_open(struct port_info *pi);
115#ifdef notyet
116static int offload_close(struct toedev *tdev);
117#endif
118
119
120static device_method_t cxgb_controller_methods[] = {
121	DEVMETHOD(device_probe,		cxgb_controller_probe),
122	DEVMETHOD(device_attach,	cxgb_controller_attach),
123	DEVMETHOD(device_detach,	cxgb_controller_detach),
124
125	/* bus interface */
126	DEVMETHOD(bus_print_child,	bus_generic_print_child),
127	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
128
129	{ 0, 0 }
130};
131
132static driver_t cxgb_controller_driver = {
133	"cxgbc",
134	cxgb_controller_methods,
135	sizeof(struct adapter)
136};
137
138static devclass_t	cxgb_controller_devclass;
139DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
140
141/*
142 * Attachment glue for the ports.  Attachment is done directly to the
143 * controller device.
144 */
145static int cxgb_port_probe(device_t);
146static int cxgb_port_attach(device_t);
147static int cxgb_port_detach(device_t);
148
149static device_method_t cxgb_port_methods[] = {
150	DEVMETHOD(device_probe,		cxgb_port_probe),
151	DEVMETHOD(device_attach,	cxgb_port_attach),
152	DEVMETHOD(device_detach,	cxgb_port_detach),
153	{ 0, 0 }
154};
155
156static driver_t cxgb_port_driver = {
157	"cxgb",
158	cxgb_port_methods,
159	0
160};
161
162static d_ioctl_t cxgb_extension_ioctl;
163static d_open_t cxgb_extension_open;
164static d_close_t cxgb_extension_close;
165
166static struct cdevsw cxgb_cdevsw = {
167       .d_version =    D_VERSION,
168       .d_flags =      0,
169       .d_open =       cxgb_extension_open,
170       .d_close =      cxgb_extension_close,
171       .d_ioctl =      cxgb_extension_ioctl,
172       .d_name =       "cxgb",
173};
174
175static devclass_t	cxgb_port_devclass;
176DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
177
178#define SGE_MSIX_COUNT (SGE_QSETS + 1)
179
180extern int collapse_mbufs;
181/*
182 * The driver uses the best interrupt scheme available on a platform in the
183 * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
184 * of these schemes the driver may consider as follows:
185 *
186 * msi = 2: choose from among all three options
187 * msi = 1 : only consider MSI and pin interrupts
188 * msi = 0: force pin interrupts
189 */
190static int msi_allowed = 2;
191
192TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
193SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
194SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
195    "MSI-X, MSI, INTx selector");
196
197/*
198 * The driver enables offload as a default.
199 * To disable it, use ofld_disable = 1.
200 */
201static int ofld_disable = 0;
202TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
203SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
204    "disable ULP offload");
205
206/*
207 * The driver uses an auto-queue algorithm by default.
208 * To disable it and force a single queue-set per port, use singleq = 1.
209 */
210static int singleq = 1;
211TUNABLE_INT("hw.cxgb.singleq", &singleq);
212SYSCTL_UINT(_hw_cxgb, OID_AUTO, singleq, CTLFLAG_RDTUN, &singleq, 0,
213    "use a single queue-set per port");
214
215enum {
216	MAX_TXQ_ENTRIES      = 16384,
217	MAX_CTRL_TXQ_ENTRIES = 1024,
218	MAX_RSPQ_ENTRIES     = 16384,
219	MAX_RX_BUFFERS       = 16384,
220	MAX_RX_JUMBO_BUFFERS = 16384,
221	MIN_TXQ_ENTRIES      = 4,
222	MIN_CTRL_TXQ_ENTRIES = 4,
223	MIN_RSPQ_ENTRIES     = 32,
224	MIN_FL_ENTRIES       = 32
225};
226
227#define PORT_MASK ((1 << MAX_NPORTS) - 1)
228
229/* Table for probing the cards.  The desc field isn't actually used */
230struct cxgb_ident {
231	uint16_t	vendor;
232	uint16_t	device;
233	int		index;
234	char		*desc;
235} cxgb_identifiers[] = {
236	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
237	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
238	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
239	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
240	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
241	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
242	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
243	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
244	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
245	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
246	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
247	{0, 0, 0, NULL}
248};
249
250static struct cxgb_ident *
251cxgb_get_ident(device_t dev)
252{
253	struct cxgb_ident *id;
254
255	for (id = cxgb_identifiers; id->desc != NULL; id++) {
256		if ((id->vendor == pci_get_vendor(dev)) &&
257		    (id->device == pci_get_device(dev))) {
258			return (id);
259		}
260	}
261	return (NULL);
262}
263
264static const struct adapter_info *
265cxgb_get_adapter_info(device_t dev)
266{
267	struct cxgb_ident *id;
268	const struct adapter_info *ai;
269
270	id = cxgb_get_ident(dev);
271	if (id == NULL)
272		return (NULL);
273
274	ai = t3_get_adapter_info(id->index);
275
276	return (ai);
277}
278
279static int
280cxgb_controller_probe(device_t dev)
281{
282	const struct adapter_info *ai;
283	char *ports, buf[80];
284	int nports;
285
286	ai = cxgb_get_adapter_info(dev);
287	if (ai == NULL)
288		return (ENXIO);
289
290	nports = ai->nports0 + ai->nports1;
291	if (nports == 1)
292		ports = "port";
293	else
294		ports = "ports";
295
296	snprintf(buf, sizeof(buf), "%s RNIC, %d %s", ai->desc, nports, ports);
297	device_set_desc_copy(dev, buf);
298	return (BUS_PROBE_DEFAULT);
299}
300
301static int
302upgrade_fw(adapter_t *sc)
303{
304	char buf[32];
305#ifdef FIRMWARE_LATEST
306	const struct firmware *fw;
307#else
308	struct firmware *fw;
309#endif
310	int status;
311
312	snprintf(&buf[0], sizeof(buf), "t3fw%d%d%d", FW_VERSION_MAJOR,
313	    FW_VERSION_MINOR, FW_VERSION_MICRO);
314
315	fw = firmware_get(buf);
316
317	if (fw == NULL) {
318		device_printf(sc->dev, "Could not find firmware image %s\n", buf);
319		return (ENOENT);
320	}
321
322	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
323
324	firmware_put(fw, FIRMWARE_UNLOAD);
325
326	return (status);
327}
328
329static int
330cxgb_controller_attach(device_t dev)
331{
332	device_t child;
333	const struct adapter_info *ai;
334	struct adapter *sc;
335	int i, reg, msi_needed, error = 0;
336	uint32_t vers;
337	int port_qsets = 1;
338
339	sc = device_get_softc(dev);
340	sc->dev = dev;
341	sc->msi_count = 0;
342
343	/* find the PCIe link width and set max read request to 4KB*/
344	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
345		uint16_t lnk, pectl;
346		lnk = pci_read_config(dev, reg + 0x12, 2);
347		sc->link_width = (lnk >> 4) & 0x3f;
348
349		pectl = pci_read_config(dev, reg + 0x8, 2);
350		pectl = (pectl & ~0x7000) | (5 << 12);
351		pci_write_config(dev, reg + 0x8, pectl, 2);
352	}
353	if (sc->link_width != 0 && sc->link_width <= 4) {
354		device_printf(sc->dev,
355		    "PCIe x%d Link, expect reduced performance\n",
356		    sc->link_width);
357	}
358
359	pci_enable_busmaster(dev);
360	/*
361	 * Allocate the registers and make them available to the driver.
362	 * The registers that we care about for NIC mode are in BAR 0
363	 */
364	sc->regs_rid = PCIR_BAR(0);
365	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
366	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
367		device_printf(dev, "Cannot allocate BAR\n");
368		return (ENXIO);
369	}
370
371	mtx_init(&sc->sge.reg_lock, "SGE reg lock", NULL, MTX_DEF);
372	mtx_init(&sc->lock, "cxgb controller lock", NULL, MTX_DEF);
373	mtx_init(&sc->mdio_lock, "cxgb mdio", NULL, MTX_DEF);
374	mtx_init(&sc->elmer_lock, "cxgb elmer", NULL, MTX_DEF);
375
376	sc->bt = rman_get_bustag(sc->regs_res);
377	sc->bh = rman_get_bushandle(sc->regs_res);
378	sc->mmio_len = rman_get_size(sc->regs_res);
379
380	ai = cxgb_get_adapter_info(dev);
381	if (t3_prep_adapter(sc, ai, 1) < 0) {
382		printf("prep adapter failed\n");
383		error = ENODEV;
384		goto out;
385	}
386	/* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
387	 * enough messages for the queue sets.  If that fails, try falling
388	 * back to MSI.  If that fails, then try falling back to the legacy
389	 * interrupt pin model.
390	 */
391#ifdef MSI_SUPPORTED
392
393	sc->msix_regs_rid = 0x20;
394	if ((msi_allowed >= 2) &&
395	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
396	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
397
398		msi_needed = sc->msi_count = SGE_MSIX_COUNT;
399
400		if (((error = pci_alloc_msix(dev, &sc->msi_count)) != 0) ||
401		    (sc->msi_count != msi_needed)) {
402			device_printf(dev, "msix allocation failed - msi_count = %d"
403			    " msi_needed=%d will try msi err=%d\n", sc->msi_count,
404			    msi_needed, error);
405			sc->msi_count = 0;
406			pci_release_msi(dev);
407			bus_release_resource(dev, SYS_RES_MEMORY,
408			    sc->msix_regs_rid, sc->msix_regs_res);
409			sc->msix_regs_res = NULL;
410		} else {
411			sc->flags |= USING_MSIX;
412			sc->cxgb_intr = t3_intr_msix;
413		}
414	}
415
416	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
417		sc->msi_count = 1;
418		if (pci_alloc_msi(dev, &sc->msi_count)) {
419			device_printf(dev, "alloc msi failed - will try INTx\n");
420			sc->msi_count = 0;
421			pci_release_msi(dev);
422		} else {
423			sc->flags |= USING_MSI;
424			sc->irq_rid = 1;
425			sc->cxgb_intr = t3_intr_msi;
426		}
427	}
428#endif
429	if (sc->msi_count == 0) {
430		device_printf(dev, "using line interrupts\n");
431		sc->irq_rid = 0;
432		sc->cxgb_intr = t3b_intr;
433	}
434
435
436	/* Create a private taskqueue thread for handling driver events */
437#ifdef TASKQUEUE_CURRENT
438	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
439	    taskqueue_thread_enqueue, &sc->tq);
440#else
441	sc->tq = taskqueue_create_fast("cxgb_taskq", M_NOWAIT,
442	    taskqueue_thread_enqueue, &sc->tq);
443#endif
444	if (sc->tq == NULL) {
445		device_printf(dev, "failed to allocate controller task queue\n");
446		goto out;
447	}
448
449	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
450	    device_get_nameunit(dev));
451	TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
452
453
454	/* Create a periodic callout for checking adapter status */
455	callout_init_mtx(&sc->cxgb_tick_ch, &sc->lock, CALLOUT_RETURNUNLOCKED);
456
457	if (t3_check_fw_version(sc) != 0) {
458		/*
459		 * Warn user that a firmware update will be attempted in init.
460		 */
461		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
462		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
463		sc->flags &= ~FW_UPTODATE;
464	} else {
465		sc->flags |= FW_UPTODATE;
466	}
467
468	if ((sc->flags & USING_MSIX) && !singleq)
469		port_qsets = min((SGE_QSETS/(sc)->params.nports), mp_ncpus);
470
471	/*
472	 * Create a child device for each MAC.  The ethernet attachment
473	 * will be done in these children.
474	 */
475	for (i = 0; i < (sc)->params.nports; i++) {
476		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
477			device_printf(dev, "failed to add child port\n");
478			error = EINVAL;
479			goto out;
480		}
481		sc->portdev[i] = child;
482		sc->port[i].adapter = sc;
483		sc->port[i].nqsets = port_qsets;
484		sc->port[i].first_qset = i*port_qsets;
485		sc->port[i].port = i;
486		device_set_softc(child, &sc->port[i]);
487	}
488	if ((error = bus_generic_attach(dev)) != 0)
489		goto out;
490
491	/*
492	 * XXX need to poll for link status
493	 */
494	sc->params.stats_update_period = 1;
495
496	/* initialize sge private state */
497	t3_sge_init_adapter(sc);
498
499	t3_led_ready(sc);
500
501	cxgb_offload_init();
502	if (is_offload(sc)) {
503		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
504		cxgb_adapter_ofld(sc);
505        }
506	error = t3_get_fw_version(sc, &vers);
507	if (error)
508		goto out;
509
510	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
511	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
512	    G_FW_VERSION_MICRO(vers));
513
514	t3_add_sysctls(sc);
515out:
516	if (error)
517		cxgb_free(sc);
518
519	return (error);
520}
521
522static int
523cxgb_controller_detach(device_t dev)
524{
525	struct adapter *sc;
526
527	sc = device_get_softc(dev);
528
529	cxgb_free(sc);
530
531	return (0);
532}
533
534static void
535cxgb_free(struct adapter *sc)
536{
537	int i;
538
539	cxgb_down(sc);
540
541#ifdef MSI_SUPPORTED
542	if (sc->flags & (USING_MSI | USING_MSIX)) {
543		device_printf(sc->dev, "releasing msi message(s)\n");
544		pci_release_msi(sc->dev);
545	} else {
546		device_printf(sc->dev, "no msi message to release\n");
547	}
548#endif
549	if (sc->msix_regs_res != NULL) {
550		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
551		    sc->msix_regs_res);
552	}
553
554	/*
555	 * XXX need to drain the ifq by hand until
556	 * it is taught about mbuf iovecs
557	 */
558	callout_drain(&sc->cxgb_tick_ch);
559
560	t3_sge_deinit_sw(sc);
561
562	if (sc->tq != NULL) {
563		taskqueue_drain(sc->tq, &sc->ext_intr_task);
564		taskqueue_free(sc->tq);
565	}
566
567	for (i = 0; i < (sc)->params.nports; ++i) {
568		if (sc->portdev[i] != NULL)
569			device_delete_child(sc->dev, sc->portdev[i]);
570	}
571
572	bus_generic_detach(sc->dev);
573#ifdef notyet
574	if (is_offload(sc)) {
575		cxgb_adapter_unofld(sc);
576		if (isset(&sc->open_device_map,	OFFLOAD_DEVMAP_BIT))
577			offload_close(&sc->tdev);
578	}
579#endif
580	t3_free_sge_resources(sc);
581	t3_sge_free(sc);
582
583	if (sc->regs_res != NULL)
584		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
585		    sc->regs_res);
586
587	mtx_destroy(&sc->mdio_lock);
588	mtx_destroy(&sc->sge.reg_lock);
589	mtx_destroy(&sc->lock);
590
591	return;
592}
593
594/**
595 *	setup_sge_qsets - configure SGE Tx/Rx/response queues
596 *	@sc: the controller softc
597 *
598 *	Determines how many sets of SGE queues to use and initializes them.
599 *	We support multiple queue sets per port if we have MSI-X, otherwise
600 *	just one queue set per port.
601 */
602static int
603setup_sge_qsets(adapter_t *sc)
604{
605	int i, j, err, irq_idx, qset_idx;
606	u_int ntxq = SGE_TXQ_PER_SET;
607
608	if ((err = t3_sge_alloc(sc)) != 0) {
609		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
610		return (err);
611	}
612
613	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
614		irq_idx = -1;
615	else
616		irq_idx = 0;
617
618	for (qset_idx = 0, i = 0; i < (sc)->params.nports; ++i) {
619		struct port_info *pi = &sc->port[i];
620
621		for (j = 0; j < pi->nqsets; ++j, ++qset_idx) {
622			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
623			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
624			    &sc->params.sge.qset[qset_idx], ntxq, pi);
625			if (err) {
626				t3_free_sge_resources(sc);
627				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n", err);
628				return (err);
629			}
630		}
631	}
632
633	return (0);
634}
635
636static void
637cxgb_teardown_msix(adapter_t *sc)
638{
639	int i, nqsets;
640
641	for (nqsets = i = 0; i < (sc)->params.nports; i++)
642		nqsets += sc->port[i].nqsets;
643
644	for (i = 0; i < nqsets; i++) {
645		if (sc->msix_intr_tag[i] != NULL) {
646			bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
647			    sc->msix_intr_tag[i]);
648			sc->msix_intr_tag[i] = NULL;
649		}
650		if (sc->msix_irq_res[i] != NULL) {
651			bus_release_resource(sc->dev, SYS_RES_IRQ,
652			    sc->msix_irq_rid[i], sc->msix_irq_res[i]);
653			sc->msix_irq_res[i] = NULL;
654		}
655	}
656}
657
658static int
659cxgb_setup_msix(adapter_t *sc, int msix_count)
660{
661	int i, j, k, nqsets, rid;
662
663	/* The first message indicates link changes and error conditions */
664	sc->irq_rid = 1;
665	if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
666	   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
667		device_printf(sc->dev, "Cannot allocate msix interrupt\n");
668		return (EINVAL);
669	}
670
671	if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
672#ifdef INTR_FILTERS
673			NULL,
674#endif
675		cxgb_async_intr, sc, &sc->intr_tag)) {
676		device_printf(sc->dev, "Cannot set up interrupt\n");
677		return (EINVAL);
678	}
679	for (i = k = 0; i < (sc)->params.nports; i++) {
680		nqsets = sc->port[i].nqsets;
681		for (j = 0; j < nqsets; j++, k++) {
682			struct sge_qset *qs = &sc->sge.qs[k];
683
684			rid = k + 2;
685			if (cxgb_debug)
686				printf("rid=%d ", rid);
687			if ((sc->msix_irq_res[k] = bus_alloc_resource_any(
688			    sc->dev, SYS_RES_IRQ, &rid,
689			    RF_SHAREABLE | RF_ACTIVE)) == NULL) {
690				device_printf(sc->dev, "Cannot allocate "
691				    "interrupt for message %d\n", rid);
692				return (EINVAL);
693			}
694			sc->msix_irq_rid[k] = rid;
695			if (bus_setup_intr(sc->dev, sc->msix_irq_res[k],
696			    INTR_MPSAFE|INTR_TYPE_NET,
697#ifdef INTR_FILTERS
698			NULL,
699#endif
700				t3_intr_msix, qs, &sc->msix_intr_tag[k])) {
701				device_printf(sc->dev, "Cannot set up "
702				    "interrupt for message %d\n", rid);
703				return (EINVAL);
704			}
705		}
706	}
707
708
709	return (0);
710}
711
712static int
713cxgb_port_probe(device_t dev)
714{
715	struct port_info *p;
716	char buf[80];
717
718	p = device_get_softc(dev);
719
720	snprintf(buf, sizeof(buf), "Port %d %s", p->port, p->port_type->desc);
721	device_set_desc_copy(dev, buf);
722	return (0);
723}
724
725
726static int
727cxgb_makedev(struct port_info *pi)
728{
729
730	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
731	    UID_ROOT, GID_WHEEL, 0600, if_name(pi->ifp));
732
733	if (pi->port_cdev == NULL)
734		return (ENOMEM);
735
736	pi->port_cdev->si_drv1 = (void *)pi;
737
738	return (0);
739}
740
741
742#ifdef TSO_SUPPORTED
743#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU)
744/* Don't enable TSO6 yet */
745#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU)
746#else
747#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_JUMBO_MTU)
748/* Don't enable TSO6 yet */
749#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM |  IFCAP_JUMBO_MTU)
750#define IFCAP_TSO4 0x0
751#define CSUM_TSO   0x0
752#endif
753
754
755static int
756cxgb_port_attach(device_t dev)
757{
758	struct port_info *p;
759	struct ifnet *ifp;
760	int err, media_flags;
761	char buf[64];
762
763	p = device_get_softc(dev);
764
765	snprintf(buf, sizeof(buf), "cxgb port %d", p->port);
766	mtx_init(&p->lock, buf, 0, MTX_DEF);
767
768	/* Allocate an ifnet object and set it up */
769	ifp = p->ifp = if_alloc(IFT_ETHER);
770	if (ifp == NULL) {
771		device_printf(dev, "Cannot allocate ifnet\n");
772		return (ENOMEM);
773	}
774
775	/*
776	 * Note that there is currently no watchdog timer.
777	 */
778	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
779	ifp->if_init = cxgb_init;
780	ifp->if_softc = p;
781	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
782	ifp->if_ioctl = cxgb_ioctl;
783	ifp->if_start = cxgb_start;
784	ifp->if_timer = 0;	/* Disable ifnet watchdog */
785	ifp->if_watchdog = NULL;
786
787	ifp->if_snd.ifq_drv_maxlen = TX_ETH_Q_SIZE;
788	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
789	IFQ_SET_READY(&ifp->if_snd);
790
791	ifp->if_hwassist = ifp->if_capabilities = ifp->if_capenable = 0;
792	ifp->if_capabilities |= CXGB_CAP;
793	ifp->if_capenable |= CXGB_CAP_ENABLE;
794	ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO);
795
796	ether_ifattach(ifp, p->hw_addr);
797#ifdef DEFAULT_JUMBO
798	ifp->if_mtu = 9000;
799#endif
800	if ((err = cxgb_makedev(p)) != 0) {
801		printf("makedev failed %d\n", err);
802		return (err);
803	}
804	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
805	    cxgb_media_status);
806
807	if (!strcmp(p->port_type->desc, "10GBASE-CX4")) {
808		media_flags = IFM_ETHER | IFM_10G_CX4 | IFM_FDX;
809	} else if (!strcmp(p->port_type->desc, "10GBASE-SR")) {
810		media_flags = IFM_ETHER | IFM_10G_SR | IFM_FDX;
811	} else if (!strcmp(p->port_type->desc, "10GBASE-XR")) {
812		media_flags = IFM_ETHER | IFM_10G_LR | IFM_FDX;
813	} else if (!strcmp(p->port_type->desc, "10/100/1000BASE-T")) {
814		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T, 0, NULL);
815		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T | IFM_FDX,
816			    0, NULL);
817		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX,
818			    0, NULL);
819		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
820			    0, NULL);
821		ifmedia_add(&p->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
822			    0, NULL);
823		media_flags = 0;
824	} else {
825	        printf("unsupported media type %s\n", p->port_type->desc);
826		return (ENXIO);
827	}
828	if (media_flags) {
829		ifmedia_add(&p->media, media_flags, 0, NULL);
830		ifmedia_set(&p->media, media_flags);
831	} else {
832		ifmedia_add(&p->media, IFM_ETHER | IFM_AUTO, 0, NULL);
833		ifmedia_set(&p->media, IFM_ETHER | IFM_AUTO);
834	}
835
836
837	snprintf(buf, sizeof(buf), "cxgb_port_taskq%d", p->port);
838#ifdef TASKQUEUE_CURRENT
839	/* Create a port for handling TX without starvation */
840	p->tq = taskqueue_create(buf, M_NOWAIT,
841	    taskqueue_thread_enqueue, &p->tq);
842#else
843	/* Create a port for handling TX without starvation */
844	p->tq = taskqueue_create_fast(buf, M_NOWAIT,
845	    taskqueue_thread_enqueue, &p->tq);
846#endif
847
848	if (p->tq == NULL) {
849		device_printf(dev, "failed to allocate port task queue\n");
850		return (ENOMEM);
851	}
852	taskqueue_start_threads(&p->tq, 1, PI_NET, "%s taskq",
853	    device_get_nameunit(dev));
854	TASK_INIT(&p->start_task, 0, cxgb_start_proc, ifp);
855
856	t3_sge_init_port(p);
857
858	return (0);
859}
860
861static int
862cxgb_port_detach(device_t dev)
863{
864	struct port_info *p;
865
866	p = device_get_softc(dev);
867
868	PORT_LOCK(p);
869	if (p->ifp->if_drv_flags & IFF_DRV_RUNNING)
870		cxgb_stop_locked(p);
871	PORT_UNLOCK(p);
872
873	mtx_destroy(&p->lock);
874	if (p->tq != NULL) {
875		taskqueue_drain(p->tq, &p->start_task);
876		taskqueue_free(p->tq);
877		p->tq = NULL;
878	}
879
880	ether_ifdetach(p->ifp);
881	if_free(p->ifp);
882
883	if (p->port_cdev != NULL)
884		destroy_dev(p->port_cdev);
885
886	return (0);
887}
888
889void
890t3_fatal_err(struct adapter *sc)
891{
892	u_int fw_status[4];
893
894	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
895	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
896		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
897		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
898}
899
900int
901t3_os_find_pci_capability(adapter_t *sc, int cap)
902{
903	device_t dev;
904	struct pci_devinfo *dinfo;
905	pcicfgregs *cfg;
906	uint32_t status;
907	uint8_t ptr;
908
909	dev = sc->dev;
910	dinfo = device_get_ivars(dev);
911	cfg = &dinfo->cfg;
912
913	status = pci_read_config(dev, PCIR_STATUS, 2);
914	if (!(status & PCIM_STATUS_CAPPRESENT))
915		return (0);
916
917	switch (cfg->hdrtype & PCIM_HDRTYPE) {
918	case 0:
919	case 1:
920		ptr = PCIR_CAP_PTR;
921		break;
922	case 2:
923		ptr = PCIR_CAP_PTR_2;
924		break;
925	default:
926		return (0);
927		break;
928	}
929	ptr = pci_read_config(dev, ptr, 1);
930
931	while (ptr != 0) {
932		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
933			return (ptr);
934		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
935	}
936
937	return (0);
938}
939
940int
941t3_os_pci_save_state(struct adapter *sc)
942{
943	device_t dev;
944	struct pci_devinfo *dinfo;
945
946	dev = sc->dev;
947	dinfo = device_get_ivars(dev);
948
949	pci_cfg_save(dev, dinfo, 0);
950	return (0);
951}
952
953int
954t3_os_pci_restore_state(struct adapter *sc)
955{
956	device_t dev;
957	struct pci_devinfo *dinfo;
958
959	dev = sc->dev;
960	dinfo = device_get_ivars(dev);
961
962	pci_cfg_restore(dev, dinfo);
963	return (0);
964}
965
966/**
967 *	t3_os_link_changed - handle link status changes
968 *	@adapter: the adapter associated with the link change
969 *	@port_id: the port index whose limk status has changed
970 *	@link_stat: the new status of the link
971 *	@speed: the new speed setting
972 *	@duplex: the new duplex setting
973 *	@fc: the new flow-control setting
974 *
975 *	This is the OS-dependent handler for link status changes.  The OS
976 *	neutral handler takes care of most of the processing for these events,
977 *	then calls this handler for any OS-specific processing.
978 */
979void
980t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
981     int duplex, int fc)
982{
983	struct port_info *pi = &adapter->port[port_id];
984	struct cmac *mac = &adapter->port[port_id].mac;
985
986	if ((pi->ifp->if_flags & IFF_UP) == 0)
987		return;
988
989	if (link_status) {
990		t3_mac_enable(mac, MAC_DIRECTION_RX);
991		if_link_state_change(pi->ifp, LINK_STATE_UP);
992	} else {
993		if_link_state_change(pi->ifp, LINK_STATE_DOWN);
994		pi->phy.ops->power_down(&pi->phy, 1);
995		t3_mac_disable(mac, MAC_DIRECTION_RX);
996		t3_link_start(&pi->phy, mac, &pi->link_config);
997	}
998}
999
1000
1001/*
1002 * Interrupt-context handler for external (PHY) interrupts.
1003 */
1004void
1005t3_os_ext_intr_handler(adapter_t *sc)
1006{
1007	if (cxgb_debug)
1008		printf("t3_os_ext_intr_handler\n");
1009	/*
1010	 * Schedule a task to handle external interrupts as they may be slow
1011	 * and we use a mutex to protect MDIO registers.  We disable PHY
1012	 * interrupts in the meantime and let the task reenable them when
1013	 * it's done.
1014	 */
1015	ADAPTER_LOCK(sc);
1016	if (sc->slow_intr_mask) {
1017		sc->slow_intr_mask &= ~F_T3DBG;
1018		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1019		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
1020	}
1021	ADAPTER_UNLOCK(sc);
1022}
1023
1024void
1025t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1026{
1027
1028	/*
1029	 * The ifnet might not be allocated before this gets called,
1030	 * as this is called early on in attach by t3_prep_adapter
1031	 * save the address off in the port structure
1032	 */
1033	if (cxgb_debug)
1034		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1035	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1036}
1037
1038/**
1039 *	link_start - enable a port
1040 *	@p: the port to enable
1041 *
1042 *	Performs the MAC and PHY actions needed to enable a port.
1043 */
1044static void
1045cxgb_link_start(struct port_info *p)
1046{
1047	struct ifnet *ifp;
1048	struct t3_rx_mode rm;
1049	struct cmac *mac = &p->mac;
1050
1051	ifp = p->ifp;
1052
1053	t3_init_rx_mode(&rm, p);
1054	t3_mac_reset(mac);
1055	t3_mac_set_mtu(mac, ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
1056	t3_mac_set_address(mac, 0, p->hw_addr);
1057	t3_mac_set_rx_mode(mac, &rm);
1058	t3_link_start(&p->phy, mac, &p->link_config);
1059	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1060}
1061
1062/**
1063 *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1064 *	@adap: the adapter
1065 *
1066 *	Sets up RSS to distribute packets to multiple receive queues.  We
1067 *	configure the RSS CPU lookup table to distribute to the number of HW
1068 *	receive queues, and the response queue lookup table to narrow that
1069 *	down to the response queues actually configured for each port.
1070 *	We always configure the RSS mapping for two ports since the mapping
1071 *	table has plenty of entries.
1072 */
1073static void
1074setup_rss(adapter_t *adap)
1075{
1076	int i;
1077	u_int nq0 = adap->port[0].nqsets;
1078	u_int nq1 = max((u_int)adap->port[1].nqsets, 1U);
1079	uint8_t cpus[SGE_QSETS + 1];
1080	uint16_t rspq_map[RSS_TABLE_SIZE];
1081
1082	for (i = 0; i < SGE_QSETS; ++i)
1083		cpus[i] = i;
1084	cpus[SGE_QSETS] = 0xff;
1085
1086	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1087		rspq_map[i] = i % nq0;
1088		rspq_map[i + RSS_TABLE_SIZE / 2] = (i % nq1) + nq0;
1089	}
1090
1091	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1092	    F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN |
1093	    V_RRCPLCPUSIZE(6), cpus, rspq_map);
1094}
1095
1096/*
1097 * Sends an mbuf to an offload queue driver
1098 * after dealing with any active network taps.
1099 */
1100static inline int
1101offload_tx(struct toedev *tdev, struct mbuf *m)
1102{
1103	int ret;
1104
1105	critical_enter();
1106	ret = t3_offload_tx(tdev, m);
1107	critical_exit();
1108	return (ret);
1109}
1110
1111static int
1112write_smt_entry(struct adapter *adapter, int idx)
1113{
1114	struct port_info *pi = &adapter->port[idx];
1115	struct cpl_smt_write_req *req;
1116	struct mbuf *m;
1117
1118	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1119		return (ENOMEM);
1120
1121	req = mtod(m, struct cpl_smt_write_req *);
1122	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1123	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1124	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1125	req->iff = idx;
1126	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1127	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1128
1129	m_set_priority(m, 1);
1130
1131	offload_tx(&adapter->tdev, m);
1132
1133	return (0);
1134}
1135
1136static int
1137init_smt(struct adapter *adapter)
1138{
1139	int i;
1140
1141	for_each_port(adapter, i)
1142		write_smt_entry(adapter, i);
1143	return 0;
1144}
1145
1146static void
1147init_port_mtus(adapter_t *adapter)
1148{
1149	unsigned int mtus = adapter->port[0].ifp->if_mtu;
1150
1151	if (adapter->port[1].ifp)
1152		mtus |= adapter->port[1].ifp->if_mtu << 16;
1153	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1154}
1155
1156static void
1157send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1158			      int hi, int port)
1159{
1160	struct mbuf *m;
1161	struct mngt_pktsched_wr *req;
1162
1163	m = m_gethdr(M_NOWAIT, MT_DATA);
1164	if (m) {
1165		req = mtod(m, struct mngt_pktsched_wr *);
1166		req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1167		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1168		req->sched = sched;
1169		req->idx = qidx;
1170		req->min = lo;
1171		req->max = hi;
1172		req->binding = port;
1173		m->m_len = m->m_pkthdr.len = sizeof(*req);
1174		t3_mgmt_tx(adap, m);
1175	}
1176}
1177
1178static void
1179bind_qsets(adapter_t *sc)
1180{
1181	int i, j;
1182
1183	for (i = 0; i < (sc)->params.nports; ++i) {
1184		const struct port_info *pi = adap2pinfo(sc, i);
1185
1186		for (j = 0; j < pi->nqsets; ++j)
1187			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1188					  -1, i);
1189	}
1190}
1191
1192/**
1193 *	cxgb_up - enable the adapter
1194 *	@adap: adapter being enabled
1195 *
1196 *	Called when the first port is enabled, this function performs the
1197 *	actions necessary to make an adapter operational, such as completing
1198 *	the initialization of HW modules, and enabling interrupts.
1199 *
1200 */
1201static int
1202cxgb_up(struct adapter *sc)
1203{
1204	int err = 0;
1205
1206	if ((sc->flags & FULL_INIT_DONE) == 0) {
1207
1208		if ((sc->flags & FW_UPTODATE) == 0)
1209			err = upgrade_fw(sc);
1210
1211		if (err)
1212			goto out;
1213
1214		err = t3_init_hw(sc, 0);
1215		if (err)
1216			goto out;
1217
1218		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1219
1220		err = setup_sge_qsets(sc);
1221		if (err)
1222			goto out;
1223
1224		setup_rss(sc);
1225		sc->flags |= FULL_INIT_DONE;
1226	}
1227
1228	t3_intr_clear(sc);
1229
1230	/* If it's MSI or INTx, allocate a single interrupt for everything */
1231	if ((sc->flags & USING_MSIX) == 0) {
1232		if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
1233		   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
1234			device_printf(sc->dev, "Cannot allocate interrupt rid=%d\n", sc->irq_rid);
1235			err = EINVAL;
1236			goto out;
1237		}
1238		device_printf(sc->dev, "allocated irq_res=%p\n", sc->irq_res);
1239
1240		if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
1241#ifdef INTR_FILTERS
1242			NULL,
1243#endif
1244			sc->cxgb_intr, sc, &sc->intr_tag)) {
1245			device_printf(sc->dev, "Cannot set up interrupt\n");
1246			err = EINVAL;
1247			goto irq_err;
1248		}
1249	} else {
1250		cxgb_setup_msix(sc, sc->msi_count);
1251	}
1252
1253	t3_sge_start(sc);
1254	t3_intr_enable(sc);
1255
1256	if ((sc->flags & (USING_MSIX | QUEUES_BOUND)) == USING_MSIX)
1257		bind_qsets(sc);
1258	sc->flags |= QUEUES_BOUND;
1259out:
1260	return (err);
1261irq_err:
1262	CH_ERR(sc, "request_irq failed, err %d\n", err);
1263	goto out;
1264}
1265
1266
1267/*
1268 * Release resources when all the ports and offloading have been stopped.
1269 */
1270static void
1271cxgb_down(struct adapter *sc)
1272{
1273	int i;
1274
1275	t3_sge_stop(sc);
1276	ADAPTER_LOCK(sc);
1277	t3_intr_disable(sc);
1278	ADAPTER_UNLOCK(sc);
1279
1280
1281	if (sc->intr_tag != NULL) {
1282		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
1283		sc->intr_tag = NULL;
1284	}
1285	if (sc->irq_res != NULL) {
1286		device_printf(sc->dev, "de-allocating interrupt irq_rid=%d irq_res=%p\n",
1287		    sc->irq_rid, sc->irq_res);
1288		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
1289		    sc->irq_res);
1290		sc->irq_res = NULL;
1291	}
1292
1293	if (sc->flags & USING_MSIX)
1294		cxgb_teardown_msix(sc);
1295
1296	callout_drain(&sc->sge_timer_ch);
1297	if (sc->tq != NULL)
1298		taskqueue_drain(sc->tq, &sc->slow_intr_task);
1299	for (i = 0; i < sc->params.nports; i++)
1300		if (sc->port[i].tq != NULL)
1301			taskqueue_drain(sc->port[i].tq, &sc->port[i].timer_reclaim_task);
1302
1303}
1304
1305static int
1306offload_open(struct port_info *pi)
1307{
1308	struct adapter *adapter = pi->adapter;
1309	struct toedev *tdev = TOEDEV(pi->ifp);
1310	int adap_up = adapter->open_device_map & PORT_MASK;
1311	int err = 0;
1312
1313	if (atomic_cmpset_int(&adapter->open_device_map,
1314		(adapter->open_device_map & ~OFFLOAD_DEVMAP_BIT),
1315		(adapter->open_device_map | OFFLOAD_DEVMAP_BIT)) == 0)
1316		return (0);
1317
1318	ADAPTER_LOCK(pi->adapter);
1319	if (!adap_up)
1320		err = cxgb_up(adapter);
1321	ADAPTER_UNLOCK(pi->adapter);
1322	if (err < 0)
1323		return (err);
1324
1325	t3_tp_set_offload_mode(adapter, 1);
1326	tdev->lldev = adapter->port[0].ifp;
1327	err = cxgb_offload_activate(adapter);
1328	if (err)
1329		goto out;
1330
1331	init_port_mtus(adapter);
1332	t3_load_mtus(adapter, adapter->params.mtus, adapter->params.a_wnd,
1333		     adapter->params.b_wnd,
1334		     adapter->params.rev == 0 ?
1335		       adapter->port[0].ifp->if_mtu : 0xffff);
1336	init_smt(adapter);
1337
1338	/* Call back all registered clients */
1339	cxgb_add_clients(tdev);
1340
1341out:
1342	/* restore them in case the offload module has changed them */
1343	if (err) {
1344		t3_tp_set_offload_mode(adapter, 0);
1345		clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1346		cxgb_set_dummy_ops(tdev);
1347	}
1348	return (err);
1349}
1350#ifdef notyet
1351static int
1352offload_close(struct toedev *tdev)
1353{
1354	struct adapter *adapter = tdev2adap(tdev);
1355
1356	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1357		return (0);
1358
1359	/* Call back all registered clients */
1360	cxgb_remove_clients(tdev);
1361	tdev->lldev = NULL;
1362	cxgb_set_dummy_ops(tdev);
1363	t3_tp_set_offload_mode(adapter, 0);
1364	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1365
1366	ADAPTER_LOCK(adapter);
1367	if (!adapter->open_device_map)
1368		cxgb_down(adapter);
1369	ADAPTER_UNLOCK(adapter);
1370
1371	cxgb_offload_deactivate(adapter);
1372	return (0);
1373}
1374#endif
1375
1376static void
1377cxgb_init(void *arg)
1378{
1379	struct port_info *p = arg;
1380
1381	PORT_LOCK(p);
1382	cxgb_init_locked(p);
1383	PORT_UNLOCK(p);
1384}
1385
1386static void
1387cxgb_init_locked(struct port_info *p)
1388{
1389	struct ifnet *ifp;
1390	adapter_t *sc = p->adapter;
1391	int err;
1392
1393	mtx_assert(&p->lock, MA_OWNED);
1394	ifp = p->ifp;
1395
1396	ADAPTER_LOCK(p->adapter);
1397	if ((sc->open_device_map == 0) && ((err = cxgb_up(sc)) < 0)) {
1398		ADAPTER_UNLOCK(p->adapter);
1399		cxgb_stop_locked(p);
1400		return;
1401	}
1402	if (p->adapter->open_device_map == 0)
1403		t3_intr_clear(sc);
1404
1405	setbit(&p->adapter->open_device_map, p->port);
1406	ADAPTER_UNLOCK(p->adapter);
1407
1408	if (is_offload(sc) && !ofld_disable) {
1409		err = offload_open(p);
1410		if (err)
1411			log(LOG_WARNING,
1412			    "Could not initialize offload capabilities\n");
1413	}
1414	cxgb_link_start(p);
1415	t3_link_changed(sc, p->port);
1416	ifp->if_baudrate = p->link_config.speed * 1000000;
1417
1418	t3_port_intr_enable(sc, p->port);
1419
1420	callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
1421	    cxgb_tick, sc);
1422
1423	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1424	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1425}
1426
1427static void
1428cxgb_set_rxmode(struct port_info *p)
1429{
1430	struct t3_rx_mode rm;
1431	struct cmac *mac = &p->mac;
1432
1433	mtx_assert(&p->lock, MA_OWNED);
1434
1435	t3_init_rx_mode(&rm, p);
1436	t3_mac_set_rx_mode(mac, &rm);
1437}
1438
1439static void
1440cxgb_stop_locked(struct port_info *p)
1441{
1442	struct ifnet *ifp;
1443
1444	mtx_assert(&p->lock, MA_OWNED);
1445	mtx_assert(&p->adapter->lock, MA_NOTOWNED);
1446
1447	ifp = p->ifp;
1448
1449	t3_port_intr_disable(p->adapter, p->port);
1450	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1451	p->phy.ops->power_down(&p->phy, 1);
1452	t3_mac_disable(&p->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
1453
1454	ADAPTER_LOCK(p->adapter);
1455	clrbit(&p->adapter->open_device_map, p->port);
1456	/*
1457	 * XXX cancel check_task
1458	 */
1459	if (p->adapter->open_device_map == 0)
1460		cxgb_down(p->adapter);
1461	ADAPTER_UNLOCK(p->adapter);
1462}
1463
1464static int
1465cxgb_set_mtu(struct port_info *p, int mtu)
1466{
1467	struct ifnet *ifp = p->ifp;
1468	int error = 0;
1469
1470	if ((mtu < ETHERMIN) || (mtu > ETHER_MAX_LEN_JUMBO))
1471		error = EINVAL;
1472	else if (ifp->if_mtu != mtu) {
1473		PORT_LOCK(p);
1474		ifp->if_mtu = mtu;
1475		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1476			callout_stop(&p->adapter->cxgb_tick_ch);
1477			cxgb_stop_locked(p);
1478			cxgb_init_locked(p);
1479		}
1480		PORT_UNLOCK(p);
1481	}
1482	return (error);
1483}
1484
1485static int
1486cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1487{
1488	struct port_info *p = ifp->if_softc;
1489	struct ifaddr *ifa = (struct ifaddr *)data;
1490	struct ifreq *ifr = (struct ifreq *)data;
1491	int flags, error = 0;
1492	uint32_t mask;
1493
1494	/*
1495	 * XXX need to check that we aren't in the middle of an unload
1496	 */
1497	switch (command) {
1498	case SIOCSIFMTU:
1499		error = cxgb_set_mtu(p, ifr->ifr_mtu);
1500		break;
1501	case SIOCSIFADDR:
1502	case SIOCGIFADDR:
1503		PORT_LOCK(p);
1504		if (ifa->ifa_addr->sa_family == AF_INET) {
1505			ifp->if_flags |= IFF_UP;
1506			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1507				cxgb_init_locked(p);
1508			arp_ifinit(ifp, ifa);
1509		} else
1510			error = ether_ioctl(ifp, command, data);
1511		PORT_UNLOCK(p);
1512		break;
1513	case SIOCSIFFLAGS:
1514		PORT_LOCK(p);
1515		if (ifp->if_flags & IFF_UP) {
1516			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1517				flags = p->if_flags;
1518				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1519				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI))
1520					cxgb_set_rxmode(p);
1521
1522			} else
1523				cxgb_init_locked(p);
1524			p->if_flags = ifp->if_flags;
1525		} else {
1526			callout_stop(&p->adapter->cxgb_tick_ch);
1527			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1528				cxgb_stop_locked(p);
1529			} else {
1530				adapter_t *sc = p->adapter;
1531				callout_reset(&sc->cxgb_tick_ch,
1532				    sc->params.stats_update_period * hz,
1533				    cxgb_tick, sc);
1534			}
1535		}
1536		PORT_UNLOCK(p);
1537		break;
1538	case SIOCSIFMEDIA:
1539	case SIOCGIFMEDIA:
1540		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
1541		break;
1542	case SIOCSIFCAP:
1543		PORT_LOCK(p);
1544		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1545		if (mask & IFCAP_TXCSUM) {
1546			if (IFCAP_TXCSUM & ifp->if_capenable) {
1547				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
1548				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
1549				    | CSUM_TSO);
1550			} else {
1551				ifp->if_capenable |= IFCAP_TXCSUM;
1552				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1553			}
1554		} else if (mask & IFCAP_RXCSUM) {
1555			if (IFCAP_RXCSUM & ifp->if_capenable) {
1556				ifp->if_capenable &= ~IFCAP_RXCSUM;
1557			} else {
1558				ifp->if_capenable |= IFCAP_RXCSUM;
1559			}
1560		}
1561		if (mask & IFCAP_TSO4) {
1562			if (IFCAP_TSO4 & ifp->if_capenable) {
1563				ifp->if_capenable &= ~IFCAP_TSO4;
1564				ifp->if_hwassist &= ~CSUM_TSO;
1565			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
1566				ifp->if_capenable |= IFCAP_TSO4;
1567				ifp->if_hwassist |= CSUM_TSO;
1568			} else {
1569				if (cxgb_debug)
1570					printf("cxgb requires tx checksum offload"
1571					    " be enabled to use TSO\n");
1572				error = EINVAL;
1573			}
1574		}
1575		PORT_UNLOCK(p);
1576		break;
1577	default:
1578		error = ether_ioctl(ifp, command, data);
1579		break;
1580	}
1581	return (error);
1582}
1583
1584static int
1585cxgb_start_tx(struct ifnet *ifp, uint32_t txmax)
1586{
1587	struct sge_qset *qs;
1588	struct sge_txq *txq;
1589	struct port_info *p = ifp->if_softc;
1590	struct mbuf *m0, *m = NULL;
1591	int err, in_use_init;
1592
1593	if (!p->link_config.link_ok)
1594		return (ENXIO);
1595
1596	if (IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1597		return (ENOBUFS);
1598
1599	qs = &p->adapter->sge.qs[p->first_qset];
1600	txq = &qs->txq[TXQ_ETH];
1601	err = 0;
1602
1603	mtx_lock(&txq->lock);
1604	in_use_init = txq->in_use;
1605	while ((txq->in_use - in_use_init < txmax) &&
1606	    (txq->size > txq->in_use + TX_MAX_DESC)) {
1607		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
1608		if (m == NULL)
1609			break;
1610		/*
1611		 * Convert chain to M_IOVEC
1612		 */
1613		KASSERT((m->m_flags & M_IOVEC) == 0, ("IOVEC set too early"));
1614		m0 = m;
1615#ifdef INVARIANTS
1616		/*
1617		 * Clean up after net stack sloppiness
1618		 * before calling m_sanity
1619		 */
1620		m0 = m->m_next;
1621		while (m0) {
1622			m0->m_flags &= ~M_PKTHDR;
1623			m0 = m0->m_next;
1624		}
1625		m_sanity(m0, 0);
1626		m0 = m;
1627#endif
1628		if (collapse_mbufs && m->m_pkthdr.len > MCLBYTES &&
1629		    m_collapse(m, TX_MAX_SEGS, &m0) == EFBIG) {
1630			if ((m0 = m_defrag(m, M_NOWAIT)) != NULL) {
1631				m = m0;
1632				m_collapse(m, TX_MAX_SEGS, &m0);
1633			} else
1634				break;
1635		}
1636		m = m0;
1637		if ((err = t3_encap(p, &m)) != 0)
1638			break;
1639		BPF_MTAP(ifp, m);
1640	}
1641	mtx_unlock(&txq->lock);
1642
1643	if (__predict_false(err)) {
1644		if (err == ENOMEM) {
1645			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1646			IFQ_LOCK(&ifp->if_snd);
1647			IFQ_DRV_PREPEND(&ifp->if_snd, m);
1648			IFQ_UNLOCK(&ifp->if_snd);
1649		}
1650	}
1651	if (err == 0 && m == NULL)
1652		err = ENOBUFS;
1653	else if ((err == 0) &&  (txq->size <= txq->in_use + TX_MAX_DESC) &&
1654	    (ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
1655		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1656		err = ENOSPC;
1657	}
1658	return (err);
1659}
1660
1661static void
1662cxgb_start_proc(void *arg, int ncount)
1663{
1664	struct ifnet *ifp = arg;
1665	struct port_info *pi = ifp->if_softc;
1666	struct sge_qset *qs;
1667	struct sge_txq *txq;
1668	int error;
1669
1670	qs = &pi->adapter->sge.qs[pi->first_qset];
1671	txq = &qs->txq[TXQ_ETH];
1672
1673	do {
1674		if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC)
1675			taskqueue_enqueue(pi->adapter->tq,
1676			    &pi->timer_reclaim_task);
1677
1678		error = cxgb_start_tx(ifp, TX_START_MAX_DESC);
1679	} while (error == 0);
1680}
1681
1682static void
1683cxgb_start(struct ifnet *ifp)
1684{
1685	struct port_info *pi = ifp->if_softc;
1686	struct sge_qset *qs;
1687	struct sge_txq *txq;
1688	int err;
1689
1690	qs = &pi->adapter->sge.qs[pi->first_qset];
1691	txq = &qs->txq[TXQ_ETH];
1692
1693	if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC)
1694		taskqueue_enqueue(pi->adapter->tq,
1695		    &pi->timer_reclaim_task);
1696
1697	err = cxgb_start_tx(ifp, TX_START_MAX_DESC);
1698
1699	if (err == 0)
1700		taskqueue_enqueue(pi->tq, &pi->start_task);
1701}
1702
1703
1704static int
1705cxgb_media_change(struct ifnet *ifp)
1706{
1707	if_printf(ifp, "media change not supported\n");
1708	return (ENXIO);
1709}
1710
1711static void
1712cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1713{
1714	struct port_info *p = ifp->if_softc;
1715
1716	ifmr->ifm_status = IFM_AVALID;
1717	ifmr->ifm_active = IFM_ETHER;
1718
1719	if (!p->link_config.link_ok)
1720		return;
1721
1722	ifmr->ifm_status |= IFM_ACTIVE;
1723
1724	switch (p->link_config.speed) {
1725	case 10:
1726		ifmr->ifm_active |= IFM_10_T;
1727		break;
1728	case 100:
1729		ifmr->ifm_active |= IFM_100_TX;
1730			break;
1731	case 1000:
1732		ifmr->ifm_active |= IFM_1000_T;
1733		break;
1734	}
1735
1736	if (p->link_config.duplex)
1737		ifmr->ifm_active |= IFM_FDX;
1738	else
1739		ifmr->ifm_active |= IFM_HDX;
1740}
1741
1742static void
1743cxgb_async_intr(void *data)
1744{
1745	adapter_t *sc = data;
1746
1747	if (cxgb_debug)
1748		device_printf(sc->dev, "cxgb_async_intr\n");
1749
1750	t3_slow_intr_handler(sc);
1751
1752}
1753
1754static void
1755cxgb_ext_intr_handler(void *arg, int count)
1756{
1757	adapter_t *sc = (adapter_t *)arg;
1758
1759	if (cxgb_debug)
1760		printf("cxgb_ext_intr_handler\n");
1761
1762	t3_phy_intr_handler(sc);
1763
1764	/* Now reenable external interrupts */
1765	ADAPTER_LOCK(sc);
1766	if (sc->slow_intr_mask) {
1767		sc->slow_intr_mask |= F_T3DBG;
1768		t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
1769		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1770	}
1771	ADAPTER_UNLOCK(sc);
1772}
1773
1774static void
1775check_link_status(adapter_t *sc)
1776{
1777	int i;
1778
1779	for (i = 0; i < (sc)->params.nports; ++i) {
1780		struct port_info *p = &sc->port[i];
1781
1782		if (!(p->port_type->caps & SUPPORTED_IRQ))
1783			t3_link_changed(sc, i);
1784		p->ifp->if_baudrate = p->link_config.speed * 1000000;
1785	}
1786}
1787
1788static void
1789check_t3b2_mac(struct adapter *adapter)
1790{
1791	int i;
1792
1793	for_each_port(adapter, i) {
1794		struct port_info *p = &adapter->port[i];
1795		struct ifnet *ifp = p->ifp;
1796		int status;
1797
1798		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1799			continue;
1800
1801		status = 0;
1802		PORT_LOCK(p);
1803		if ((ifp->if_drv_flags & IFF_DRV_RUNNING))
1804			status = t3b2_mac_watchdog_task(&p->mac);
1805		if (status == 1)
1806			p->mac.stats.num_toggled++;
1807		else if (status == 2) {
1808			struct cmac *mac = &p->mac;
1809
1810			t3_mac_set_mtu(mac, ifp->if_mtu + ETHER_HDR_LEN
1811			    + ETHER_VLAN_ENCAP_LEN);
1812			t3_mac_set_address(mac, 0, p->hw_addr);
1813			cxgb_set_rxmode(p);
1814			t3_link_start(&p->phy, mac, &p->link_config);
1815			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1816			t3_port_intr_enable(adapter, p->port);
1817			p->mac.stats.num_resets++;
1818		}
1819		PORT_UNLOCK(p);
1820	}
1821}
1822
1823static void
1824cxgb_tick(void *arg)
1825{
1826	adapter_t *sc = (adapter_t *)arg;
1827	const struct adapter_params *p = &sc->params;
1828
1829	if (p->linkpoll_period)
1830		check_link_status(sc);
1831	callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
1832	    cxgb_tick, sc);
1833
1834	/*
1835	 * adapter lock can currently only be acquire after the
1836	 * port lock
1837	 */
1838	ADAPTER_UNLOCK(sc);
1839
1840	if (p->rev == T3_REV_B2)
1841		check_t3b2_mac(sc);
1842}
1843
1844static int
1845in_range(int val, int lo, int hi)
1846{
1847	return val < 0 || (val <= hi && val >= lo);
1848}
1849
1850static int
1851cxgb_extension_open(struct cdev *dev, int flags, int fmp, d_thread_t *td)
1852{
1853       return (0);
1854}
1855
1856static int
1857cxgb_extension_close(struct cdev *dev, int flags, int fmt, d_thread_t *td)
1858{
1859       return (0);
1860}
1861
1862static int
1863cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
1864    int fflag, struct thread *td)
1865{
1866	int mmd, error = 0;
1867	struct port_info *pi = dev->si_drv1;
1868	adapter_t *sc = pi->adapter;
1869
1870#ifdef PRIV_SUPPORTED
1871	if (priv_check(td, PRIV_DRIVER)) {
1872		if (cxgb_debug)
1873			printf("user does not have access to privileged ioctls\n");
1874		return (EPERM);
1875	}
1876#else
1877	if (suser(td)) {
1878		if (cxgb_debug)
1879			printf("user does not have access to privileged ioctls\n");
1880		return (EPERM);
1881	}
1882#endif
1883
1884	switch (cmd) {
1885	case SIOCGMIIREG: {
1886		uint32_t val;
1887		struct cphy *phy = &pi->phy;
1888		struct mii_data *mid = (struct mii_data *)data;
1889
1890		if (!phy->mdio_read)
1891			return (EOPNOTSUPP);
1892		if (is_10G(sc)) {
1893			mmd = mid->phy_id >> 8;
1894			if (!mmd)
1895				mmd = MDIO_DEV_PCS;
1896			else if (mmd > MDIO_DEV_XGXS)
1897				return -EINVAL;
1898
1899			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
1900					     mid->reg_num, &val);
1901		} else
1902		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
1903					     mid->reg_num & 0x1f, &val);
1904		if (error == 0)
1905			mid->val_out = val;
1906		break;
1907	}
1908	case SIOCSMIIREG: {
1909		struct cphy *phy = &pi->phy;
1910		struct mii_data *mid = (struct mii_data *)data;
1911
1912		if (!phy->mdio_write)
1913			return (EOPNOTSUPP);
1914		if (is_10G(sc)) {
1915			mmd = mid->phy_id >> 8;
1916			if (!mmd)
1917				mmd = MDIO_DEV_PCS;
1918			else if (mmd > MDIO_DEV_XGXS)
1919				return (EINVAL);
1920
1921			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
1922					      mmd, mid->reg_num, mid->val_in);
1923		} else
1924			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
1925					      mid->reg_num & 0x1f,
1926					      mid->val_in);
1927		break;
1928	}
1929	case CHELSIO_SETREG: {
1930		struct ch_reg *edata = (struct ch_reg *)data;
1931		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
1932			return (EFAULT);
1933		t3_write_reg(sc, edata->addr, edata->val);
1934		break;
1935	}
1936	case CHELSIO_GETREG: {
1937		struct ch_reg *edata = (struct ch_reg *)data;
1938		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
1939			return (EFAULT);
1940		edata->val = t3_read_reg(sc, edata->addr);
1941		break;
1942	}
1943	case CHELSIO_GET_SGE_CONTEXT: {
1944		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
1945		mtx_lock(&sc->sge.reg_lock);
1946		switch (ecntxt->cntxt_type) {
1947		case CNTXT_TYPE_EGRESS:
1948			error = t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
1949			    ecntxt->data);
1950			break;
1951		case CNTXT_TYPE_FL:
1952			error = t3_sge_read_fl(sc, ecntxt->cntxt_id,
1953			    ecntxt->data);
1954			break;
1955		case CNTXT_TYPE_RSP:
1956			error = t3_sge_read_rspq(sc, ecntxt->cntxt_id,
1957			    ecntxt->data);
1958			break;
1959		case CNTXT_TYPE_CQ:
1960			error = t3_sge_read_cq(sc, ecntxt->cntxt_id,
1961			    ecntxt->data);
1962			break;
1963		default:
1964			error = EINVAL;
1965			break;
1966		}
1967		mtx_unlock(&sc->sge.reg_lock);
1968		break;
1969	}
1970	case CHELSIO_GET_SGE_DESC: {
1971		struct ch_desc *edesc = (struct ch_desc *)data;
1972		int ret;
1973		if (edesc->queue_num >= SGE_QSETS * 6)
1974			return (EINVAL);
1975		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
1976		    edesc->queue_num % 6, edesc->idx, edesc->data);
1977		if (ret < 0)
1978			return (EINVAL);
1979		edesc->size = ret;
1980		break;
1981	}
1982	case CHELSIO_SET_QSET_PARAMS: {
1983		struct qset_params *q;
1984		struct ch_qset_params *t = (struct ch_qset_params *)data;
1985
1986		if (t->qset_idx >= SGE_QSETS)
1987			return -EINVAL;
1988		if (!in_range(t->intr_lat, 0, M_NEWTIMER) ||
1989		    !in_range(t->cong_thres, 0, 255) ||
1990		    !in_range(t->txq_size[0], MIN_TXQ_ENTRIES,
1991			      MAX_TXQ_ENTRIES) ||
1992		    !in_range(t->txq_size[1], MIN_TXQ_ENTRIES,
1993			      MAX_TXQ_ENTRIES) ||
1994		    !in_range(t->txq_size[2], MIN_CTRL_TXQ_ENTRIES,
1995			      MAX_CTRL_TXQ_ENTRIES) ||
1996		    !in_range(t->fl_size[0], MIN_FL_ENTRIES, MAX_RX_BUFFERS) ||
1997		    !in_range(t->fl_size[1], MIN_FL_ENTRIES,
1998			      MAX_RX_JUMBO_BUFFERS) ||
1999		    !in_range(t->rspq_size, MIN_RSPQ_ENTRIES, MAX_RSPQ_ENTRIES))
2000		       return -EINVAL;
2001		if ((sc->flags & FULL_INIT_DONE) &&
2002		    (t->rspq_size >= 0 || t->fl_size[0] >= 0 ||
2003		     t->fl_size[1] >= 0 || t->txq_size[0] >= 0 ||
2004		     t->txq_size[1] >= 0 || t->txq_size[2] >= 0 ||
2005		     t->polling >= 0 || t->cong_thres >= 0))
2006			return -EBUSY;
2007
2008		q = &sc->params.sge.qset[t->qset_idx];
2009
2010		if (t->rspq_size >= 0)
2011			q->rspq_size = t->rspq_size;
2012		if (t->fl_size[0] >= 0)
2013			q->fl_size = t->fl_size[0];
2014		if (t->fl_size[1] >= 0)
2015			q->jumbo_size = t->fl_size[1];
2016		if (t->txq_size[0] >= 0)
2017			q->txq_size[0] = t->txq_size[0];
2018		if (t->txq_size[1] >= 0)
2019			q->txq_size[1] = t->txq_size[1];
2020		if (t->txq_size[2] >= 0)
2021			q->txq_size[2] = t->txq_size[2];
2022		if (t->cong_thres >= 0)
2023			q->cong_thres = t->cong_thres;
2024		if (t->intr_lat >= 0) {
2025			struct sge_qset *qs = &sc->sge.qs[t->qset_idx];
2026
2027			q->coalesce_nsecs = t->intr_lat*1000;
2028			t3_update_qset_coalesce(qs, q);
2029		}
2030		break;
2031	}
2032	case CHELSIO_GET_QSET_PARAMS: {
2033		struct qset_params *q;
2034		struct ch_qset_params *t = (struct ch_qset_params *)data;
2035
2036		if (t->qset_idx >= SGE_QSETS)
2037			return (EINVAL);
2038
2039		q = &(sc)->params.sge.qset[t->qset_idx];
2040		t->rspq_size   = q->rspq_size;
2041		t->txq_size[0] = q->txq_size[0];
2042		t->txq_size[1] = q->txq_size[1];
2043		t->txq_size[2] = q->txq_size[2];
2044		t->fl_size[0]  = q->fl_size;
2045		t->fl_size[1]  = q->jumbo_size;
2046		t->polling     = q->polling;
2047		t->intr_lat    = q->coalesce_nsecs / 1000;
2048		t->cong_thres  = q->cong_thres;
2049		break;
2050	}
2051	case CHELSIO_SET_QSET_NUM: {
2052		struct ch_reg *edata = (struct ch_reg *)data;
2053		unsigned int port_idx = pi->port;
2054
2055		if (sc->flags & FULL_INIT_DONE)
2056			return (EBUSY);
2057		if (edata->val < 1 ||
2058		    (edata->val > 1 && !(sc->flags & USING_MSIX)))
2059			return (EINVAL);
2060		if (edata->val + sc->port[!port_idx].nqsets > SGE_QSETS)
2061			return (EINVAL);
2062		sc->port[port_idx].nqsets = edata->val;
2063		sc->port[0].first_qset = 0;
2064		/*
2065		 * XXX hardcode ourselves to 2 ports just like LEEENUX
2066		 */
2067		sc->port[1].first_qset = sc->port[0].nqsets;
2068		break;
2069	}
2070	case CHELSIO_GET_QSET_NUM: {
2071		struct ch_reg *edata = (struct ch_reg *)data;
2072		edata->val = pi->nqsets;
2073		break;
2074	}
2075#ifdef notyet
2076	case CHELSIO_LOAD_FW:
2077	case CHELSIO_GET_PM:
2078	case CHELSIO_SET_PM:
2079		return (EOPNOTSUPP);
2080		break;
2081#endif
2082	case CHELSIO_SETMTUTAB: {
2083		struct ch_mtus *m = (struct ch_mtus *)data;
2084		int i;
2085
2086		if (!is_offload(sc))
2087			return (EOPNOTSUPP);
2088		if (offload_running(sc))
2089			return (EBUSY);
2090		if (m->nmtus != NMTUS)
2091			return (EINVAL);
2092		if (m->mtus[0] < 81)         /* accommodate SACK */
2093			return (EINVAL);
2094
2095		/*
2096		 * MTUs must be in ascending order
2097		 */
2098		for (i = 1; i < NMTUS; ++i)
2099			if (m->mtus[i] < m->mtus[i - 1])
2100				return (EINVAL);
2101
2102		memcpy(sc->params.mtus, m->mtus,
2103		       sizeof(sc->params.mtus));
2104		break;
2105	}
2106	case CHELSIO_GETMTUTAB: {
2107		struct ch_mtus *m = (struct ch_mtus *)data;
2108
2109		if (!is_offload(sc))
2110			return (EOPNOTSUPP);
2111
2112		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2113		m->nmtus = NMTUS;
2114		break;
2115	}
2116	case CHELSIO_DEVUP:
2117		if (!is_offload(sc))
2118			return (EOPNOTSUPP);
2119		return offload_open(pi);
2120		break;
2121	case CHELSIO_GET_MEM: {
2122		struct ch_mem_range *t = (struct ch_mem_range *)data;
2123		struct mc7 *mem;
2124		uint8_t *useraddr;
2125		u64 buf[32];
2126
2127		if (!is_offload(sc))
2128			return (EOPNOTSUPP);
2129		if (!(sc->flags & FULL_INIT_DONE))
2130			return (EIO);         /* need the memory controllers */
2131		if ((t->addr & 0x7) || (t->len & 0x7))
2132			return (EINVAL);
2133		if (t->mem_id == MEM_CM)
2134			mem = &sc->cm;
2135		else if (t->mem_id == MEM_PMRX)
2136			mem = &sc->pmrx;
2137		else if (t->mem_id == MEM_PMTX)
2138			mem = &sc->pmtx;
2139		else
2140			return (EINVAL);
2141
2142		/*
2143		 * Version scheme:
2144		 * bits 0..9: chip version
2145		 * bits 10..15: chip revision
2146		 */
2147		t->version = 3 | (sc->params.rev << 10);
2148
2149		/*
2150		 * Read 256 bytes at a time as len can be large and we don't
2151		 * want to use huge intermediate buffers.
2152		 */
2153		useraddr = (uint8_t *)(t + 1);   /* advance to start of buffer */
2154		while (t->len) {
2155			unsigned int chunk = min(t->len, sizeof(buf));
2156
2157			error = t3_mc7_bd_read(mem, t->addr / 8, chunk / 8, buf);
2158			if (error)
2159				return (-error);
2160			if (copyout(buf, useraddr, chunk))
2161				return (EFAULT);
2162			useraddr += chunk;
2163			t->addr += chunk;
2164			t->len -= chunk;
2165		}
2166		break;
2167	}
2168	case CHELSIO_READ_TCAM_WORD: {
2169		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2170
2171		if (!is_offload(sc))
2172			return (EOPNOTSUPP);
2173		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2174		break;
2175	}
2176	case CHELSIO_SET_TRACE_FILTER: {
2177		struct ch_trace *t = (struct ch_trace *)data;
2178		const struct trace_params *tp;
2179
2180		tp = (const struct trace_params *)&t->sip;
2181		if (t->config_tx)
2182			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2183					       t->trace_tx);
2184		if (t->config_rx)
2185			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2186					       t->trace_rx);
2187		break;
2188	}
2189	case CHELSIO_SET_PKTSCHED: {
2190		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2191		if (sc->open_device_map == 0)
2192			return (EAGAIN);
2193		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2194		    p->binding);
2195		break;
2196	}
2197	case CHELSIO_IFCONF_GETREGS: {
2198		struct ifconf_regs *regs = (struct ifconf_regs *)data;
2199		int reglen = cxgb_get_regs_len();
2200		uint8_t *buf = malloc(REGDUMP_SIZE, M_DEVBUF, M_NOWAIT);
2201		if (buf == NULL) {
2202			return (ENOMEM);
2203		} if (regs->len > reglen)
2204			regs->len = reglen;
2205		else if (regs->len < reglen) {
2206			error = E2BIG;
2207			goto done;
2208		}
2209		cxgb_get_regs(sc, regs, buf);
2210		error = copyout(buf, regs->data, reglen);
2211
2212		done:
2213		free(buf, M_DEVBUF);
2214
2215		break;
2216	}
2217	case CHELSIO_SET_HW_SCHED: {
2218		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2219		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2220
2221		if ((sc->flags & FULL_INIT_DONE) == 0)
2222			return (EAGAIN);       /* need TP to be initialized */
2223		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2224		    !in_range(t->channel, 0, 1) ||
2225		    !in_range(t->kbps, 0, 10000000) ||
2226		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2227		    !in_range(t->flow_ipg, 0,
2228			      dack_ticks_to_usec(sc, 0x7ff)))
2229			return (EINVAL);
2230
2231		if (t->kbps >= 0) {
2232			error = t3_config_sched(sc, t->kbps, t->sched);
2233			if (error < 0)
2234				return (-error);
2235		}
2236		if (t->class_ipg >= 0)
2237			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2238		if (t->flow_ipg >= 0) {
2239			t->flow_ipg *= 1000;     /* us -> ns */
2240			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2241		}
2242		if (t->mode >= 0) {
2243			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2244
2245			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2246					 bit, t->mode ? bit : 0);
2247		}
2248		if (t->channel >= 0)
2249			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2250					 1 << t->sched, t->channel << t->sched);
2251		break;
2252	}
2253	default:
2254		return (EOPNOTSUPP);
2255		break;
2256	}
2257
2258	return (error);
2259}
2260
2261static __inline void
2262reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
2263    unsigned int end)
2264{
2265	uint32_t *p = (uint32_t *)buf + start;
2266
2267	for ( ; start <= end; start += sizeof(uint32_t))
2268		*p++ = t3_read_reg(ap, start);
2269}
2270
2271#define T3_REGMAP_SIZE (3 * 1024)
2272static int
2273cxgb_get_regs_len(void)
2274{
2275	return T3_REGMAP_SIZE;
2276}
2277#undef T3_REGMAP_SIZE
2278
2279static void
2280cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf)
2281{
2282
2283	/*
2284	 * Version scheme:
2285	 * bits 0..9: chip version
2286	 * bits 10..15: chip revision
2287	 * bit 31: set for PCIe cards
2288	 */
2289	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
2290
2291	/*
2292	 * We skip the MAC statistics registers because they are clear-on-read.
2293	 * Also reading multi-register stats would need to synchronize with the
2294	 * periodic mac stats accumulation.  Hard to justify the complexity.
2295	 */
2296	memset(buf, 0, REGDUMP_SIZE);
2297	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
2298	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
2299	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
2300	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
2301	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
2302	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
2303		       XGM_REG(A_XGM_SERDES_STAT3, 1));
2304	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
2305		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
2306}
2307