cxgb_main.c revision 170654
1/**************************************************************************
2
3Copyright (c) 2007, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
122. Neither the name of the Chelsio Corporation nor the names of its
13    contributors may be used to endorse or promote products derived from
14    this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_main.c 170654 2007-06-13 05:36:00Z kmacy $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/bus.h>
37#include <sys/module.h>
38#include <sys/pciio.h>
39#include <sys/conf.h>
40#include <machine/bus.h>
41#include <machine/resource.h>
42#include <sys/bus_dma.h>
43#include <sys/rman.h>
44#include <sys/ioccom.h>
45#include <sys/mbuf.h>
46#include <sys/linker.h>
47#include <sys/firmware.h>
48#include <sys/socket.h>
49#include <sys/sockio.h>
50#include <sys/smp.h>
51#include <sys/sysctl.h>
52#include <sys/queue.h>
53#include <sys/taskqueue.h>
54
55#include <net/bpf.h>
56#include <net/ethernet.h>
57#include <net/if.h>
58#include <net/if_arp.h>
59#include <net/if_dl.h>
60#include <net/if_media.h>
61#include <net/if_types.h>
62
63#include <netinet/in_systm.h>
64#include <netinet/in.h>
65#include <netinet/if_ether.h>
66#include <netinet/ip.h>
67#include <netinet/ip.h>
68#include <netinet/tcp.h>
69#include <netinet/udp.h>
70
71#include <dev/pci/pcireg.h>
72#include <dev/pci/pcivar.h>
73#include <dev/pci/pci_private.h>
74
75#ifdef CONFIG_DEFINED
76#include <cxgb_include.h>
77#else
78#include <dev/cxgb/cxgb_include.h>
79#endif
80
81#ifdef PRIV_SUPPORTED
82#include <sys/priv.h>
83#endif
84
85static int cxgb_setup_msix(adapter_t *, int);
86static void cxgb_teardown_msix(adapter_t *);
87static void cxgb_init(void *);
88static void cxgb_init_locked(struct port_info *);
89static void cxgb_stop_locked(struct port_info *);
90static void cxgb_set_rxmode(struct port_info *);
91static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92static void cxgb_start(struct ifnet *);
93static void cxgb_start_proc(void *, int ncount);
94static int cxgb_media_change(struct ifnet *);
95static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
96static int setup_sge_qsets(adapter_t *);
97static void cxgb_async_intr(void *);
98static void cxgb_ext_intr_handler(void *, int);
99static void cxgb_down(struct adapter *sc);
100static void cxgb_tick(void *);
101static void setup_rss(adapter_t *sc);
102
103/* Attachment glue for the PCI controller end of the device.  Each port of
104 * the device is attached separately, as defined later.
105 */
106static int cxgb_controller_probe(device_t);
107static int cxgb_controller_attach(device_t);
108static int cxgb_controller_detach(device_t);
109static void cxgb_free(struct adapter *);
110static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
111    unsigned int end);
112static void cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf);
113static int cxgb_get_regs_len(void);
114static int offload_open(struct port_info *pi);
115static int offload_close(struct toedev *tdev);
116
117
118
119static device_method_t cxgb_controller_methods[] = {
120	DEVMETHOD(device_probe,		cxgb_controller_probe),
121	DEVMETHOD(device_attach,	cxgb_controller_attach),
122	DEVMETHOD(device_detach,	cxgb_controller_detach),
123
124	/* bus interface */
125	DEVMETHOD(bus_print_child,	bus_generic_print_child),
126	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
127
128	{ 0, 0 }
129};
130
131static driver_t cxgb_controller_driver = {
132	"cxgbc",
133	cxgb_controller_methods,
134	sizeof(struct adapter)
135};
136
137static devclass_t	cxgb_controller_devclass;
138DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
139
140/*
141 * Attachment glue for the ports.  Attachment is done directly to the
142 * controller device.
143 */
144static int cxgb_port_probe(device_t);
145static int cxgb_port_attach(device_t);
146static int cxgb_port_detach(device_t);
147
148static device_method_t cxgb_port_methods[] = {
149	DEVMETHOD(device_probe,		cxgb_port_probe),
150	DEVMETHOD(device_attach,	cxgb_port_attach),
151	DEVMETHOD(device_detach,	cxgb_port_detach),
152	{ 0, 0 }
153};
154
155static driver_t cxgb_port_driver = {
156	"cxgb",
157	cxgb_port_methods,
158	0
159};
160
161static d_ioctl_t cxgb_extension_ioctl;
162static d_open_t cxgb_extension_open;
163static d_close_t cxgb_extension_close;
164
165static struct cdevsw cxgb_cdevsw = {
166       .d_version =    D_VERSION,
167       .d_flags =      0,
168       .d_open =       cxgb_extension_open,
169       .d_close =      cxgb_extension_close,
170       .d_ioctl =      cxgb_extension_ioctl,
171       .d_name =       "cxgb",
172};
173
174static devclass_t	cxgb_port_devclass;
175DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
176
177#define SGE_MSIX_COUNT (SGE_QSETS + 1)
178
179extern int collapse_mbufs;
180/*
181 * The driver uses the best interrupt scheme available on a platform in the
182 * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
183 * of these schemes the driver may consider as follows:
184 *
185 * msi = 2: choose from among all three options
186 * msi = 1 : only consider MSI and pin interrupts
187 * msi = 0: force pin interrupts
188 */
189static int msi_allowed = 2;
190
191TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
192SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
193SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
194    "MSI-X, MSI, INTx selector");
195
196/*
197 * The driver enables offload as a default.
198 * To disable it, use ofld_disable = 1.
199 */
200static int ofld_disable = 0;
201TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
202SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
203    "disable ULP offload");
204
205/*
206 * The driver uses an auto-queue algorithm by default.
207 * To disable it and force a single queue-set per port, use singleq = 1.
208 */
209static int singleq = 1;
210TUNABLE_INT("hw.cxgb.singleq", &singleq);
211SYSCTL_UINT(_hw_cxgb, OID_AUTO, singleq, CTLFLAG_RDTUN, &singleq, 0,
212    "use a single queue-set per port");
213
214enum {
215	MAX_TXQ_ENTRIES      = 16384,
216	MAX_CTRL_TXQ_ENTRIES = 1024,
217	MAX_RSPQ_ENTRIES     = 16384,
218	MAX_RX_BUFFERS       = 16384,
219	MAX_RX_JUMBO_BUFFERS = 16384,
220	MIN_TXQ_ENTRIES      = 4,
221	MIN_CTRL_TXQ_ENTRIES = 4,
222	MIN_RSPQ_ENTRIES     = 32,
223	MIN_FL_ENTRIES       = 32
224};
225
226#define PORT_MASK ((1 << MAX_NPORTS) - 1)
227
228/* Table for probing the cards.  The desc field isn't actually used */
229struct cxgb_ident {
230	uint16_t	vendor;
231	uint16_t	device;
232	int		index;
233	char		*desc;
234} cxgb_identifiers[] = {
235	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
236	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
237	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
238	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
239	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
240	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
241	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
242	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
243	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
244	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
245	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
246	{0, 0, 0, NULL}
247};
248
249static struct cxgb_ident *
250cxgb_get_ident(device_t dev)
251{
252	struct cxgb_ident *id;
253
254	for (id = cxgb_identifiers; id->desc != NULL; id++) {
255		if ((id->vendor == pci_get_vendor(dev)) &&
256		    (id->device == pci_get_device(dev))) {
257			return (id);
258		}
259	}
260	return (NULL);
261}
262
263static const struct adapter_info *
264cxgb_get_adapter_info(device_t dev)
265{
266	struct cxgb_ident *id;
267	const struct adapter_info *ai;
268
269	id = cxgb_get_ident(dev);
270	if (id == NULL)
271		return (NULL);
272
273	ai = t3_get_adapter_info(id->index);
274
275	return (ai);
276}
277
278static int
279cxgb_controller_probe(device_t dev)
280{
281	const struct adapter_info *ai;
282	char *ports, buf[80];
283	int nports;
284
285	ai = cxgb_get_adapter_info(dev);
286	if (ai == NULL)
287		return (ENXIO);
288
289	nports = ai->nports0 + ai->nports1;
290	if (nports == 1)
291		ports = "port";
292	else
293		ports = "ports";
294
295	snprintf(buf, sizeof(buf), "%s RNIC, %d %s", ai->desc, nports, ports);
296	device_set_desc_copy(dev, buf);
297	return (BUS_PROBE_DEFAULT);
298}
299
300static int
301upgrade_fw(adapter_t *sc)
302{
303	char buf[32];
304#ifdef FIRMWARE_LATEST
305	const struct firmware *fw;
306#else
307	struct firmware *fw;
308#endif
309	int status;
310
311	snprintf(&buf[0], sizeof(buf), "t3fw%d%d%d", FW_VERSION_MAJOR,
312	    FW_VERSION_MINOR, FW_VERSION_MICRO);
313
314	fw = firmware_get(buf);
315
316	if (fw == NULL) {
317		device_printf(sc->dev, "Could not find firmware image %s\n", buf);
318		return (ENOENT);
319	}
320
321	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
322
323	firmware_put(fw, FIRMWARE_UNLOAD);
324
325	return (status);
326}
327
328static int
329cxgb_controller_attach(device_t dev)
330{
331	device_t child;
332	const struct adapter_info *ai;
333	struct adapter *sc;
334	int i, reg, msi_needed, error = 0;
335	uint32_t vers;
336	int port_qsets = 1;
337
338	sc = device_get_softc(dev);
339	sc->dev = dev;
340	sc->msi_count = 0;
341
342	/* find the PCIe link width and set max read request to 4KB*/
343	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
344		uint16_t lnk, pectl;
345		lnk = pci_read_config(dev, reg + 0x12, 2);
346		sc->link_width = (lnk >> 4) & 0x3f;
347
348		pectl = pci_read_config(dev, reg + 0x8, 2);
349		pectl = (pectl & ~0x7000) | (5 << 12);
350		pci_write_config(dev, reg + 0x8, pectl, 2);
351	}
352	if (sc->link_width != 0 && sc->link_width <= 4) {
353		device_printf(sc->dev,
354		    "PCIe x%d Link, expect reduced performance\n",
355		    sc->link_width);
356	}
357
358	pci_enable_busmaster(dev);
359	/*
360	 * Allocate the registers and make them available to the driver.
361	 * The registers that we care about for NIC mode are in BAR 0
362	 */
363	sc->regs_rid = PCIR_BAR(0);
364	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
365	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
366		device_printf(dev, "Cannot allocate BAR\n");
367		return (ENXIO);
368	}
369
370	mtx_init(&sc->sge.reg_lock, "SGE reg lock", NULL, MTX_DEF);
371	mtx_init(&sc->lock, "cxgb controller lock", NULL, MTX_DEF);
372	mtx_init(&sc->mdio_lock, "cxgb mdio", NULL, MTX_DEF);
373	mtx_init(&sc->elmer_lock, "cxgb elmer", NULL, MTX_DEF);
374
375	sc->bt = rman_get_bustag(sc->regs_res);
376	sc->bh = rman_get_bushandle(sc->regs_res);
377	sc->mmio_len = rman_get_size(sc->regs_res);
378
379	ai = cxgb_get_adapter_info(dev);
380	if (t3_prep_adapter(sc, ai, 1) < 0) {
381		printf("prep adapter failed\n");
382		error = ENODEV;
383		goto out;
384	}
385	/* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
386	 * enough messages for the queue sets.  If that fails, try falling
387	 * back to MSI.  If that fails, then try falling back to the legacy
388	 * interrupt pin model.
389	 */
390#ifdef MSI_SUPPORTED
391
392	sc->msix_regs_rid = 0x20;
393	if ((msi_allowed >= 2) &&
394	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
395	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
396
397		msi_needed = sc->msi_count = SGE_MSIX_COUNT;
398
399		if (((error = pci_alloc_msix(dev, &sc->msi_count)) != 0) ||
400		    (sc->msi_count != msi_needed)) {
401			device_printf(dev, "msix allocation failed - msi_count = %d"
402			    " msi_needed=%d will try msi err=%d\n", sc->msi_count,
403			    msi_needed, error);
404			sc->msi_count = 0;
405			pci_release_msi(dev);
406			bus_release_resource(dev, SYS_RES_MEMORY,
407			    sc->msix_regs_rid, sc->msix_regs_res);
408			sc->msix_regs_res = NULL;
409		} else {
410			sc->flags |= USING_MSIX;
411			sc->cxgb_intr = t3_intr_msix;
412		}
413	}
414
415	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
416		sc->msi_count = 1;
417		if (pci_alloc_msi(dev, &sc->msi_count)) {
418			device_printf(dev, "alloc msi failed - will try INTx\n");
419			sc->msi_count = 0;
420			pci_release_msi(dev);
421		} else {
422			sc->flags |= USING_MSI;
423			sc->irq_rid = 1;
424			sc->cxgb_intr = t3_intr_msi;
425		}
426	}
427#endif
428	if (sc->msi_count == 0) {
429		device_printf(dev, "using line interrupts\n");
430		sc->irq_rid = 0;
431		sc->cxgb_intr = t3b_intr;
432	}
433
434
435	/* Create a private taskqueue thread for handling driver events */
436#ifdef TASKQUEUE_CURRENT
437	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
438	    taskqueue_thread_enqueue, &sc->tq);
439#else
440	sc->tq = taskqueue_create_fast("cxgb_taskq", M_NOWAIT,
441	    taskqueue_thread_enqueue, &sc->tq);
442#endif
443	if (sc->tq == NULL) {
444		device_printf(dev, "failed to allocate controller task queue\n");
445		goto out;
446	}
447
448	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
449	    device_get_nameunit(dev));
450	TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
451
452
453	/* Create a periodic callout for checking adapter status */
454	callout_init_mtx(&sc->cxgb_tick_ch, &sc->lock, CALLOUT_RETURNUNLOCKED);
455
456	if (t3_check_fw_version(sc) != 0) {
457		/*
458		 * Warn user that a firmware update will be attempted in init.
459		 */
460		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
461		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
462		sc->flags &= ~FW_UPTODATE;
463	} else {
464		sc->flags |= FW_UPTODATE;
465	}
466
467	if ((sc->flags & USING_MSIX) && !singleq)
468		port_qsets = min((SGE_QSETS/(sc)->params.nports), mp_ncpus);
469
470	/*
471	 * Create a child device for each MAC.  The ethernet attachment
472	 * will be done in these children.
473	 */
474	for (i = 0; i < (sc)->params.nports; i++) {
475		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
476			device_printf(dev, "failed to add child port\n");
477			error = EINVAL;
478			goto out;
479		}
480		sc->portdev[i] = child;
481		sc->port[i].adapter = sc;
482		sc->port[i].nqsets = port_qsets;
483		sc->port[i].first_qset = i*port_qsets;
484		sc->port[i].port = i;
485		device_set_softc(child, &sc->port[i]);
486	}
487	if ((error = bus_generic_attach(dev)) != 0)
488		goto out;
489
490	/*
491	 * XXX need to poll for link status
492	 */
493	sc->params.stats_update_period = 1;
494
495	/* initialize sge private state */
496	t3_sge_init_adapter(sc);
497
498	t3_led_ready(sc);
499
500	cxgb_offload_init();
501	if (is_offload(sc)) {
502		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
503		cxgb_adapter_ofld(sc);
504        }
505	error = t3_get_fw_version(sc, &vers);
506	if (error)
507		goto out;
508
509	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
510	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
511	    G_FW_VERSION_MICRO(vers));
512
513	t3_add_sysctls(sc);
514out:
515	if (error)
516		cxgb_free(sc);
517
518	return (error);
519}
520
521static int
522cxgb_controller_detach(device_t dev)
523{
524	struct adapter *sc;
525
526	sc = device_get_softc(dev);
527
528	cxgb_free(sc);
529
530	return (0);
531}
532
533static void
534cxgb_free(struct adapter *sc)
535{
536	int i;
537
538	cxgb_down(sc);
539
540#ifdef MSI_SUPPORTED
541	if (sc->flags & (USING_MSI | USING_MSIX)) {
542		device_printf(sc->dev, "releasing msi message(s)\n");
543		pci_release_msi(sc->dev);
544	} else {
545		device_printf(sc->dev, "no msi message to release\n");
546	}
547#endif
548	if (sc->msix_regs_res != NULL) {
549		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
550		    sc->msix_regs_res);
551	}
552
553	/*
554	 * XXX need to drain the ifq by hand until
555	 * it is taught about mbuf iovecs
556	 */
557	callout_drain(&sc->cxgb_tick_ch);
558
559	t3_sge_deinit_sw(sc);
560
561	if (sc->tq != NULL) {
562		taskqueue_drain(sc->tq, &sc->ext_intr_task);
563		taskqueue_free(sc->tq);
564	}
565
566	for (i = 0; i < (sc)->params.nports; ++i) {
567		if (sc->portdev[i] != NULL)
568			device_delete_child(sc->dev, sc->portdev[i]);
569	}
570
571	bus_generic_detach(sc->dev);
572#ifdef notyet
573	if (is_offload(sc)) {
574		cxgb_adapter_unofld(sc);
575		if (isset(&sc->open_device_map,	OFFLOAD_DEVMAP_BIT))
576			offload_close(&sc->tdev);
577	}
578#endif
579	t3_free_sge_resources(sc);
580	t3_sge_free(sc);
581
582	if (sc->regs_res != NULL)
583		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
584		    sc->regs_res);
585
586	mtx_destroy(&sc->mdio_lock);
587	mtx_destroy(&sc->sge.reg_lock);
588	mtx_destroy(&sc->lock);
589
590	return;
591}
592
593/**
594 *	setup_sge_qsets - configure SGE Tx/Rx/response queues
595 *	@sc: the controller softc
596 *
597 *	Determines how many sets of SGE queues to use and initializes them.
598 *	We support multiple queue sets per port if we have MSI-X, otherwise
599 *	just one queue set per port.
600 */
601static int
602setup_sge_qsets(adapter_t *sc)
603{
604	int i, j, err, irq_idx, qset_idx;
605	u_int ntxq = SGE_TXQ_PER_SET;
606
607	if ((err = t3_sge_alloc(sc)) != 0) {
608		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
609		return (err);
610	}
611
612	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
613		irq_idx = -1;
614	else
615		irq_idx = 0;
616
617	for (qset_idx = 0, i = 0; i < (sc)->params.nports; ++i) {
618		struct port_info *pi = &sc->port[i];
619
620		for (j = 0; j < pi->nqsets; ++j, ++qset_idx) {
621			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
622			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
623			    &sc->params.sge.qset[qset_idx], ntxq, pi);
624			if (err) {
625				t3_free_sge_resources(sc);
626				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n", err);
627				return (err);
628			}
629		}
630	}
631
632	return (0);
633}
634
635static void
636cxgb_teardown_msix(adapter_t *sc)
637{
638	int i, nqsets;
639
640	for (nqsets = i = 0; i < (sc)->params.nports; i++)
641		nqsets += sc->port[i].nqsets;
642
643	for (i = 0; i < nqsets; i++) {
644		if (sc->msix_intr_tag[i] != NULL) {
645			bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
646			    sc->msix_intr_tag[i]);
647			sc->msix_intr_tag[i] = NULL;
648		}
649		if (sc->msix_irq_res[i] != NULL) {
650			bus_release_resource(sc->dev, SYS_RES_IRQ,
651			    sc->msix_irq_rid[i], sc->msix_irq_res[i]);
652			sc->msix_irq_res[i] = NULL;
653		}
654	}
655}
656
657static int
658cxgb_setup_msix(adapter_t *sc, int msix_count)
659{
660	int i, j, k, nqsets, rid;
661
662	/* The first message indicates link changes and error conditions */
663	sc->irq_rid = 1;
664	if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
665	   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
666		device_printf(sc->dev, "Cannot allocate msix interrupt\n");
667		return (EINVAL);
668	}
669
670	if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
671#ifdef INTR_FILTERS
672			NULL,
673#endif
674		cxgb_async_intr, sc, &sc->intr_tag)) {
675		device_printf(sc->dev, "Cannot set up interrupt\n");
676		return (EINVAL);
677	}
678	for (i = k = 0; i < (sc)->params.nports; i++) {
679		nqsets = sc->port[i].nqsets;
680		for (j = 0; j < nqsets; j++, k++) {
681			struct sge_qset *qs = &sc->sge.qs[k];
682
683			rid = k + 2;
684			if (cxgb_debug)
685				printf("rid=%d ", rid);
686			if ((sc->msix_irq_res[k] = bus_alloc_resource_any(
687			    sc->dev, SYS_RES_IRQ, &rid,
688			    RF_SHAREABLE | RF_ACTIVE)) == NULL) {
689				device_printf(sc->dev, "Cannot allocate "
690				    "interrupt for message %d\n", rid);
691				return (EINVAL);
692			}
693			sc->msix_irq_rid[k] = rid;
694			if (bus_setup_intr(sc->dev, sc->msix_irq_res[k],
695			    INTR_MPSAFE|INTR_TYPE_NET,
696#ifdef INTR_FILTERS
697			NULL,
698#endif
699				t3_intr_msix, qs, &sc->msix_intr_tag[k])) {
700				device_printf(sc->dev, "Cannot set up "
701				    "interrupt for message %d\n", rid);
702				return (EINVAL);
703			}
704		}
705	}
706
707
708	return (0);
709}
710
711static int
712cxgb_port_probe(device_t dev)
713{
714	struct port_info *p;
715	char buf[80];
716
717	p = device_get_softc(dev);
718
719	snprintf(buf, sizeof(buf), "Port %d %s", p->port, p->port_type->desc);
720	device_set_desc_copy(dev, buf);
721	return (0);
722}
723
724
725static int
726cxgb_makedev(struct port_info *pi)
727{
728
729	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
730	    UID_ROOT, GID_WHEEL, 0600, if_name(pi->ifp));
731
732	if (pi->port_cdev == NULL)
733		return (ENOMEM);
734
735	pi->port_cdev->si_drv1 = (void *)pi;
736
737	return (0);
738}
739
740
741#ifdef TSO_SUPPORTED
742#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU)
743/* Don't enable TSO6 yet */
744#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU)
745#else
746#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_JUMBO_MTU)
747/* Don't enable TSO6 yet */
748#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM |  IFCAP_JUMBO_MTU)
749#define IFCAP_TSO4 0x0
750#define CSUM_TSO   0x0
751#endif
752
753
754static int
755cxgb_port_attach(device_t dev)
756{
757	struct port_info *p;
758	struct ifnet *ifp;
759	int err, media_flags;
760	char buf[64];
761
762	p = device_get_softc(dev);
763
764	snprintf(buf, sizeof(buf), "cxgb port %d", p->port);
765	mtx_init(&p->lock, buf, 0, MTX_DEF);
766
767	/* Allocate an ifnet object and set it up */
768	ifp = p->ifp = if_alloc(IFT_ETHER);
769	if (ifp == NULL) {
770		device_printf(dev, "Cannot allocate ifnet\n");
771		return (ENOMEM);
772	}
773
774	/*
775	 * Note that there is currently no watchdog timer.
776	 */
777	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
778	ifp->if_init = cxgb_init;
779	ifp->if_softc = p;
780	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
781	ifp->if_ioctl = cxgb_ioctl;
782	ifp->if_start = cxgb_start;
783	ifp->if_timer = 0;	/* Disable ifnet watchdog */
784	ifp->if_watchdog = NULL;
785
786	ifp->if_snd.ifq_drv_maxlen = TX_ETH_Q_SIZE;
787	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
788	IFQ_SET_READY(&ifp->if_snd);
789
790	ifp->if_hwassist = ifp->if_capabilities = ifp->if_capenable = 0;
791	ifp->if_capabilities |= CXGB_CAP;
792	ifp->if_capenable |= CXGB_CAP_ENABLE;
793	ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO);
794
795	ether_ifattach(ifp, p->hw_addr);
796#ifdef DEFAULT_JUMBO
797	ifp->if_mtu = 9000;
798#endif
799	if ((err = cxgb_makedev(p)) != 0) {
800		printf("makedev failed %d\n", err);
801		return (err);
802	}
803	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
804	    cxgb_media_status);
805
806	if (!strcmp(p->port_type->desc, "10GBASE-CX4")) {
807		media_flags = IFM_ETHER | IFM_10G_CX4 | IFM_FDX;
808	} else if (!strcmp(p->port_type->desc, "10GBASE-SR")) {
809		media_flags = IFM_ETHER | IFM_10G_SR | IFM_FDX;
810	} else if (!strcmp(p->port_type->desc, "10GBASE-XR")) {
811		media_flags = IFM_ETHER | IFM_10G_LR | IFM_FDX;
812	} else if (!strcmp(p->port_type->desc, "10/100/1000BASE-T")) {
813		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T, 0, NULL);
814		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T | IFM_FDX,
815			    0, NULL);
816		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX,
817			    0, NULL);
818		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
819			    0, NULL);
820		ifmedia_add(&p->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
821			    0, NULL);
822		media_flags = 0;
823	} else {
824	        printf("unsupported media type %s\n", p->port_type->desc);
825		return (ENXIO);
826	}
827	if (media_flags) {
828		ifmedia_add(&p->media, media_flags, 0, NULL);
829		ifmedia_set(&p->media, media_flags);
830	} else {
831		ifmedia_add(&p->media, IFM_ETHER | IFM_AUTO, 0, NULL);
832		ifmedia_set(&p->media, IFM_ETHER | IFM_AUTO);
833	}
834
835
836	snprintf(buf, sizeof(buf), "cxgb_port_taskq%d", p->port);
837#ifdef TASKQUEUE_CURRENT
838	/* Create a port for handling TX without starvation */
839	p->tq = taskqueue_create(buf, M_NOWAIT,
840	    taskqueue_thread_enqueue, &p->tq);
841#else
842	/* Create a port for handling TX without starvation */
843	p->tq = taskqueue_create_fast(buf, M_NOWAIT,
844	    taskqueue_thread_enqueue, &p->tq);
845#endif
846
847	if (p->tq == NULL) {
848		device_printf(dev, "failed to allocate port task queue\n");
849		return (ENOMEM);
850	}
851	taskqueue_start_threads(&p->tq, 1, PI_NET, "%s taskq",
852	    device_get_nameunit(dev));
853	TASK_INIT(&p->start_task, 0, cxgb_start_proc, ifp);
854
855	t3_sge_init_port(p);
856
857	return (0);
858}
859
860static int
861cxgb_port_detach(device_t dev)
862{
863	struct port_info *p;
864
865	p = device_get_softc(dev);
866
867	PORT_LOCK(p);
868	if (p->ifp->if_drv_flags & IFF_DRV_RUNNING)
869		cxgb_stop_locked(p);
870	PORT_UNLOCK(p);
871
872	mtx_destroy(&p->lock);
873	if (p->tq != NULL) {
874		taskqueue_drain(p->tq, &p->start_task);
875		taskqueue_free(p->tq);
876		p->tq = NULL;
877	}
878
879	ether_ifdetach(p->ifp);
880	if_free(p->ifp);
881
882	if (p->port_cdev != NULL)
883		destroy_dev(p->port_cdev);
884
885	return (0);
886}
887
888void
889t3_fatal_err(struct adapter *sc)
890{
891	u_int fw_status[4];
892
893	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
894	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
895		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
896		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
897}
898
899int
900t3_os_find_pci_capability(adapter_t *sc, int cap)
901{
902	device_t dev;
903	struct pci_devinfo *dinfo;
904	pcicfgregs *cfg;
905	uint32_t status;
906	uint8_t ptr;
907
908	dev = sc->dev;
909	dinfo = device_get_ivars(dev);
910	cfg = &dinfo->cfg;
911
912	status = pci_read_config(dev, PCIR_STATUS, 2);
913	if (!(status & PCIM_STATUS_CAPPRESENT))
914		return (0);
915
916	switch (cfg->hdrtype & PCIM_HDRTYPE) {
917	case 0:
918	case 1:
919		ptr = PCIR_CAP_PTR;
920		break;
921	case 2:
922		ptr = PCIR_CAP_PTR_2;
923		break;
924	default:
925		return (0);
926		break;
927	}
928	ptr = pci_read_config(dev, ptr, 1);
929
930	while (ptr != 0) {
931		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
932			return (ptr);
933		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
934	}
935
936	return (0);
937}
938
939int
940t3_os_pci_save_state(struct adapter *sc)
941{
942	device_t dev;
943	struct pci_devinfo *dinfo;
944
945	dev = sc->dev;
946	dinfo = device_get_ivars(dev);
947
948	pci_cfg_save(dev, dinfo, 0);
949	return (0);
950}
951
952int
953t3_os_pci_restore_state(struct adapter *sc)
954{
955	device_t dev;
956	struct pci_devinfo *dinfo;
957
958	dev = sc->dev;
959	dinfo = device_get_ivars(dev);
960
961	pci_cfg_restore(dev, dinfo);
962	return (0);
963}
964
965/**
966 *	t3_os_link_changed - handle link status changes
967 *	@adapter: the adapter associated with the link change
968 *	@port_id: the port index whose limk status has changed
969 *	@link_stat: the new status of the link
970 *	@speed: the new speed setting
971 *	@duplex: the new duplex setting
972 *	@fc: the new flow-control setting
973 *
974 *	This is the OS-dependent handler for link status changes.  The OS
975 *	neutral handler takes care of most of the processing for these events,
976 *	then calls this handler for any OS-specific processing.
977 */
978void
979t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
980     int duplex, int fc)
981{
982	struct port_info *pi = &adapter->port[port_id];
983	struct cmac *mac = &adapter->port[port_id].mac;
984
985	if ((pi->ifp->if_flags & IFF_UP) == 0)
986		return;
987
988	if (link_status) {
989		t3_mac_enable(mac, MAC_DIRECTION_RX);
990		if_link_state_change(pi->ifp, LINK_STATE_UP);
991	} else {
992		if_link_state_change(pi->ifp, LINK_STATE_DOWN);
993		pi->phy.ops->power_down(&pi->phy, 1);
994		t3_mac_disable(mac, MAC_DIRECTION_RX);
995		t3_link_start(&pi->phy, mac, &pi->link_config);
996	}
997}
998
999
1000/*
1001 * Interrupt-context handler for external (PHY) interrupts.
1002 */
1003void
1004t3_os_ext_intr_handler(adapter_t *sc)
1005{
1006	if (cxgb_debug)
1007		printf("t3_os_ext_intr_handler\n");
1008	/*
1009	 * Schedule a task to handle external interrupts as they may be slow
1010	 * and we use a mutex to protect MDIO registers.  We disable PHY
1011	 * interrupts in the meantime and let the task reenable them when
1012	 * it's done.
1013	 */
1014	ADAPTER_LOCK(sc);
1015	if (sc->slow_intr_mask) {
1016		sc->slow_intr_mask &= ~F_T3DBG;
1017		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1018		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
1019	}
1020	ADAPTER_UNLOCK(sc);
1021}
1022
1023void
1024t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1025{
1026
1027	/*
1028	 * The ifnet might not be allocated before this gets called,
1029	 * as this is called early on in attach by t3_prep_adapter
1030	 * save the address off in the port structure
1031	 */
1032	if (cxgb_debug)
1033		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1034	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1035}
1036
1037/**
1038 *	link_start - enable a port
1039 *	@p: the port to enable
1040 *
1041 *	Performs the MAC and PHY actions needed to enable a port.
1042 */
1043static void
1044cxgb_link_start(struct port_info *p)
1045{
1046	struct ifnet *ifp;
1047	struct t3_rx_mode rm;
1048	struct cmac *mac = &p->mac;
1049
1050	ifp = p->ifp;
1051
1052	t3_init_rx_mode(&rm, p);
1053	t3_mac_reset(mac);
1054	t3_mac_set_mtu(mac, ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
1055	t3_mac_set_address(mac, 0, p->hw_addr);
1056	t3_mac_set_rx_mode(mac, &rm);
1057	t3_link_start(&p->phy, mac, &p->link_config);
1058	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1059}
1060
1061/**
1062 *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1063 *	@adap: the adapter
1064 *
1065 *	Sets up RSS to distribute packets to multiple receive queues.  We
1066 *	configure the RSS CPU lookup table to distribute to the number of HW
1067 *	receive queues, and the response queue lookup table to narrow that
1068 *	down to the response queues actually configured for each port.
1069 *	We always configure the RSS mapping for two ports since the mapping
1070 *	table has plenty of entries.
1071 */
1072static void
1073setup_rss(adapter_t *adap)
1074{
1075	int i;
1076	u_int nq0 = adap->port[0].nqsets;
1077	u_int nq1 = max((u_int)adap->port[1].nqsets, 1U);
1078	uint8_t cpus[SGE_QSETS + 1];
1079	uint16_t rspq_map[RSS_TABLE_SIZE];
1080
1081	for (i = 0; i < SGE_QSETS; ++i)
1082		cpus[i] = i;
1083	cpus[SGE_QSETS] = 0xff;
1084
1085	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1086		rspq_map[i] = i % nq0;
1087		rspq_map[i + RSS_TABLE_SIZE / 2] = (i % nq1) + nq0;
1088	}
1089
1090	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1091	    F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN |
1092	    V_RRCPLCPUSIZE(6), cpus, rspq_map);
1093}
1094
1095/*
1096 * Sends an mbuf to an offload queue driver
1097 * after dealing with any active network taps.
1098 */
1099static inline int
1100offload_tx(struct toedev *tdev, struct mbuf *m)
1101{
1102	int ret;
1103
1104	critical_enter();
1105	ret = t3_offload_tx(tdev, m);
1106	critical_exit();
1107	return (ret);
1108}
1109
1110static int
1111write_smt_entry(struct adapter *adapter, int idx)
1112{
1113	struct port_info *pi = &adapter->port[idx];
1114	struct cpl_smt_write_req *req;
1115	struct mbuf *m;
1116
1117	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1118		return (ENOMEM);
1119
1120	req = mtod(m, struct cpl_smt_write_req *);
1121	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1122	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1123	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1124	req->iff = idx;
1125	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1126	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1127
1128	m_set_priority(m, 1);
1129
1130	offload_tx(&adapter->tdev, m);
1131
1132	return (0);
1133}
1134
1135static int
1136init_smt(struct adapter *adapter)
1137{
1138	int i;
1139
1140	for_each_port(adapter, i)
1141		write_smt_entry(adapter, i);
1142	return 0;
1143}
1144
1145static void
1146init_port_mtus(adapter_t *adapter)
1147{
1148	unsigned int mtus = adapter->port[0].ifp->if_mtu;
1149
1150	if (adapter->port[1].ifp)
1151		mtus |= adapter->port[1].ifp->if_mtu << 16;
1152	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1153}
1154
1155static void
1156send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1157			      int hi, int port)
1158{
1159	struct mbuf *m;
1160	struct mngt_pktsched_wr *req;
1161
1162	m = m_gethdr(M_NOWAIT, MT_DATA);
1163	if (m) {
1164		req = mtod(m, struct mngt_pktsched_wr *);
1165		req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1166		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1167		req->sched = sched;
1168		req->idx = qidx;
1169		req->min = lo;
1170		req->max = hi;
1171		req->binding = port;
1172		m->m_len = m->m_pkthdr.len = sizeof(*req);
1173		t3_mgmt_tx(adap, m);
1174	}
1175}
1176
1177static void
1178bind_qsets(adapter_t *sc)
1179{
1180	int i, j;
1181
1182	for (i = 0; i < (sc)->params.nports; ++i) {
1183		const struct port_info *pi = adap2pinfo(sc, i);
1184
1185		for (j = 0; j < pi->nqsets; ++j)
1186			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1187					  -1, i);
1188	}
1189}
1190
1191/**
1192 *	cxgb_up - enable the adapter
1193 *	@adap: adapter being enabled
1194 *
1195 *	Called when the first port is enabled, this function performs the
1196 *	actions necessary to make an adapter operational, such as completing
1197 *	the initialization of HW modules, and enabling interrupts.
1198 *
1199 */
1200static int
1201cxgb_up(struct adapter *sc)
1202{
1203	int err = 0;
1204
1205	if ((sc->flags & FULL_INIT_DONE) == 0) {
1206
1207		if ((sc->flags & FW_UPTODATE) == 0)
1208			err = upgrade_fw(sc);
1209
1210		if (err)
1211			goto out;
1212
1213		err = t3_init_hw(sc, 0);
1214		if (err)
1215			goto out;
1216
1217		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1218
1219		err = setup_sge_qsets(sc);
1220		if (err)
1221			goto out;
1222
1223		setup_rss(sc);
1224		sc->flags |= FULL_INIT_DONE;
1225	}
1226
1227	t3_intr_clear(sc);
1228
1229	/* If it's MSI or INTx, allocate a single interrupt for everything */
1230	if ((sc->flags & USING_MSIX) == 0) {
1231		if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
1232		   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
1233			device_printf(sc->dev, "Cannot allocate interrupt rid=%d\n", sc->irq_rid);
1234			err = EINVAL;
1235			goto out;
1236		}
1237		device_printf(sc->dev, "allocated irq_res=%p\n", sc->irq_res);
1238
1239		if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
1240#ifdef INTR_FILTERS
1241			NULL,
1242#endif
1243			sc->cxgb_intr, sc, &sc->intr_tag)) {
1244			device_printf(sc->dev, "Cannot set up interrupt\n");
1245			err = EINVAL;
1246			goto irq_err;
1247		}
1248	} else {
1249		cxgb_setup_msix(sc, sc->msi_count);
1250	}
1251
1252	t3_sge_start(sc);
1253	t3_intr_enable(sc);
1254
1255	if ((sc->flags & (USING_MSIX | QUEUES_BOUND)) == USING_MSIX)
1256		bind_qsets(sc);
1257	sc->flags |= QUEUES_BOUND;
1258out:
1259	return (err);
1260irq_err:
1261	CH_ERR(sc, "request_irq failed, err %d\n", err);
1262	goto out;
1263}
1264
1265
1266/*
1267 * Release resources when all the ports and offloading have been stopped.
1268 */
1269static void
1270cxgb_down(struct adapter *sc)
1271{
1272	int i;
1273
1274	t3_sge_stop(sc);
1275	ADAPTER_LOCK(sc);
1276	t3_intr_disable(sc);
1277	ADAPTER_UNLOCK(sc);
1278
1279
1280	if (sc->intr_tag != NULL) {
1281		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
1282		sc->intr_tag = NULL;
1283	}
1284	if (sc->irq_res != NULL) {
1285		device_printf(sc->dev, "de-allocating interrupt irq_rid=%d irq_res=%p\n",
1286		    sc->irq_rid, sc->irq_res);
1287		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
1288		    sc->irq_res);
1289		sc->irq_res = NULL;
1290	}
1291
1292	if (sc->flags & USING_MSIX)
1293		cxgb_teardown_msix(sc);
1294
1295	callout_drain(&sc->sge_timer_ch);
1296	if (sc->tq != NULL)
1297		taskqueue_drain(sc->tq, &sc->slow_intr_task);
1298	for (i = 0; i < sc->params.nports; i++)
1299		if (sc->port[i].tq != NULL)
1300			taskqueue_drain(sc->port[i].tq, &sc->port[i].timer_reclaim_task);
1301
1302}
1303
1304static int
1305offload_open(struct port_info *pi)
1306{
1307	struct adapter *adapter = pi->adapter;
1308	struct toedev *tdev = TOEDEV(pi->ifp);
1309	int adap_up = adapter->open_device_map & PORT_MASK;
1310	int err = 0;
1311
1312	if (atomic_cmpset_int(&adapter->open_device_map,
1313		(adapter->open_device_map & ~OFFLOAD_DEVMAP_BIT),
1314		(adapter->open_device_map | OFFLOAD_DEVMAP_BIT)) == 0)
1315		return (0);
1316
1317	ADAPTER_LOCK(pi->adapter);
1318	if (!adap_up)
1319		err = cxgb_up(adapter);
1320	ADAPTER_UNLOCK(pi->adapter);
1321	if (err < 0)
1322		return (err);
1323
1324	t3_tp_set_offload_mode(adapter, 1);
1325	tdev->lldev = adapter->port[0].ifp;
1326	err = cxgb_offload_activate(adapter);
1327	if (err)
1328		goto out;
1329
1330	init_port_mtus(adapter);
1331	t3_load_mtus(adapter, adapter->params.mtus, adapter->params.a_wnd,
1332		     adapter->params.b_wnd,
1333		     adapter->params.rev == 0 ?
1334		       adapter->port[0].ifp->if_mtu : 0xffff);
1335	init_smt(adapter);
1336
1337	/* Call back all registered clients */
1338	cxgb_add_clients(tdev);
1339
1340out:
1341	/* restore them in case the offload module has changed them */
1342	if (err) {
1343		t3_tp_set_offload_mode(adapter, 0);
1344		clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1345		cxgb_set_dummy_ops(tdev);
1346	}
1347	return (err);
1348}
1349
1350static int
1351offload_close(struct toedev *tdev)
1352{
1353	struct adapter *adapter = tdev2adap(tdev);
1354
1355	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1356		return (0);
1357
1358	/* Call back all registered clients */
1359	cxgb_remove_clients(tdev);
1360	tdev->lldev = NULL;
1361	cxgb_set_dummy_ops(tdev);
1362	t3_tp_set_offload_mode(adapter, 0);
1363	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1364
1365	ADAPTER_LOCK(adapter);
1366	if (!adapter->open_device_map)
1367		cxgb_down(adapter);
1368	ADAPTER_UNLOCK(adapter);
1369
1370	cxgb_offload_deactivate(adapter);
1371	return (0);
1372}
1373
1374static void
1375cxgb_init(void *arg)
1376{
1377	struct port_info *p = arg;
1378
1379	PORT_LOCK(p);
1380	cxgb_init_locked(p);
1381	PORT_UNLOCK(p);
1382}
1383
1384static void
1385cxgb_init_locked(struct port_info *p)
1386{
1387	struct ifnet *ifp;
1388	adapter_t *sc = p->adapter;
1389	int err;
1390
1391	mtx_assert(&p->lock, MA_OWNED);
1392	ifp = p->ifp;
1393
1394	ADAPTER_LOCK(p->adapter);
1395	if ((sc->open_device_map == 0) && ((err = cxgb_up(sc)) < 0)) {
1396		ADAPTER_UNLOCK(p->adapter);
1397		cxgb_stop_locked(p);
1398		return;
1399	}
1400	if (p->adapter->open_device_map == 0)
1401		t3_intr_clear(sc);
1402
1403	setbit(&p->adapter->open_device_map, p->port);
1404	ADAPTER_UNLOCK(p->adapter);
1405
1406	if (is_offload(sc) && !ofld_disable) {
1407		err = offload_open(p);
1408		if (err)
1409			log(LOG_WARNING,
1410			    "Could not initialize offload capabilities\n");
1411	}
1412	cxgb_link_start(p);
1413	t3_link_changed(sc, p->port);
1414	ifp->if_baudrate = p->link_config.speed * 1000000;
1415
1416	t3_port_intr_enable(sc, p->port);
1417
1418	callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
1419	    cxgb_tick, sc);
1420
1421	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1422	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1423}
1424
1425static void
1426cxgb_set_rxmode(struct port_info *p)
1427{
1428	struct t3_rx_mode rm;
1429	struct cmac *mac = &p->mac;
1430
1431	mtx_assert(&p->lock, MA_OWNED);
1432
1433	t3_init_rx_mode(&rm, p);
1434	t3_mac_set_rx_mode(mac, &rm);
1435}
1436
1437static void
1438cxgb_stop_locked(struct port_info *p)
1439{
1440	struct ifnet *ifp;
1441
1442	mtx_assert(&p->lock, MA_OWNED);
1443	mtx_assert(&p->adapter->lock, MA_NOTOWNED);
1444
1445	ifp = p->ifp;
1446
1447	t3_port_intr_disable(p->adapter, p->port);
1448	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1449	p->phy.ops->power_down(&p->phy, 1);
1450	t3_mac_disable(&p->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
1451
1452	ADAPTER_LOCK(p->adapter);
1453	clrbit(&p->adapter->open_device_map, p->port);
1454	/*
1455	 * XXX cancel check_task
1456	 */
1457	if (p->adapter->open_device_map == 0)
1458		cxgb_down(p->adapter);
1459	ADAPTER_UNLOCK(p->adapter);
1460}
1461
1462static int
1463cxgb_set_mtu(struct port_info *p, int mtu)
1464{
1465	struct ifnet *ifp = p->ifp;
1466	int error = 0;
1467
1468	if ((mtu < ETHERMIN) || (mtu > ETHER_MAX_LEN_JUMBO))
1469		error = EINVAL;
1470	else if (ifp->if_mtu != mtu) {
1471		PORT_LOCK(p);
1472		ifp->if_mtu = mtu;
1473		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1474			callout_stop(&p->adapter->cxgb_tick_ch);
1475			cxgb_stop_locked(p);
1476			cxgb_init_locked(p);
1477		}
1478		PORT_UNLOCK(p);
1479	}
1480	return (error);
1481}
1482
1483static int
1484cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1485{
1486	struct port_info *p = ifp->if_softc;
1487	struct ifaddr *ifa = (struct ifaddr *)data;
1488	struct ifreq *ifr = (struct ifreq *)data;
1489	int flags, error = 0;
1490	uint32_t mask;
1491
1492	/*
1493	 * XXX need to check that we aren't in the middle of an unload
1494	 */
1495	switch (command) {
1496	case SIOCSIFMTU:
1497		error = cxgb_set_mtu(p, ifr->ifr_mtu);
1498		break;
1499	case SIOCSIFADDR:
1500	case SIOCGIFADDR:
1501		PORT_LOCK(p);
1502		if (ifa->ifa_addr->sa_family == AF_INET) {
1503			ifp->if_flags |= IFF_UP;
1504			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1505				cxgb_init_locked(p);
1506			arp_ifinit(ifp, ifa);
1507		} else
1508			error = ether_ioctl(ifp, command, data);
1509		PORT_UNLOCK(p);
1510		break;
1511	case SIOCSIFFLAGS:
1512		PORT_LOCK(p);
1513		if (ifp->if_flags & IFF_UP) {
1514			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1515				flags = p->if_flags;
1516				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1517				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI))
1518					cxgb_set_rxmode(p);
1519
1520			} else
1521				cxgb_init_locked(p);
1522			p->if_flags = ifp->if_flags;
1523		} else {
1524			callout_stop(&p->adapter->cxgb_tick_ch);
1525			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1526				cxgb_stop_locked(p);
1527			} else {
1528				adapter_t *sc = p->adapter;
1529				callout_reset(&sc->cxgb_tick_ch,
1530				    sc->params.stats_update_period * hz,
1531				    cxgb_tick, sc);
1532			}
1533		}
1534		PORT_UNLOCK(p);
1535		break;
1536	case SIOCSIFMEDIA:
1537	case SIOCGIFMEDIA:
1538		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
1539		break;
1540	case SIOCSIFCAP:
1541		PORT_LOCK(p);
1542		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1543		if (mask & IFCAP_TXCSUM) {
1544			if (IFCAP_TXCSUM & ifp->if_capenable) {
1545				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
1546				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
1547				    | CSUM_TSO);
1548			} else {
1549				ifp->if_capenable |= IFCAP_TXCSUM;
1550				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1551			}
1552		} else if (mask & IFCAP_RXCSUM) {
1553			if (IFCAP_RXCSUM & ifp->if_capenable) {
1554				ifp->if_capenable &= ~IFCAP_RXCSUM;
1555			} else {
1556				ifp->if_capenable |= IFCAP_RXCSUM;
1557			}
1558		}
1559		if (mask & IFCAP_TSO4) {
1560			if (IFCAP_TSO4 & ifp->if_capenable) {
1561				ifp->if_capenable &= ~IFCAP_TSO4;
1562				ifp->if_hwassist &= ~CSUM_TSO;
1563			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
1564				ifp->if_capenable |= IFCAP_TSO4;
1565				ifp->if_hwassist |= CSUM_TSO;
1566			} else {
1567				if (cxgb_debug)
1568					printf("cxgb requires tx checksum offload"
1569					    " be enabled to use TSO\n");
1570				error = EINVAL;
1571			}
1572		}
1573		PORT_UNLOCK(p);
1574		break;
1575	default:
1576		error = ether_ioctl(ifp, command, data);
1577		break;
1578	}
1579	return (error);
1580}
1581
1582static int
1583cxgb_start_tx(struct ifnet *ifp, uint32_t txmax)
1584{
1585	struct sge_qset *qs;
1586	struct sge_txq *txq;
1587	struct port_info *p = ifp->if_softc;
1588	struct mbuf *m0, *m = NULL;
1589	int err, in_use_init;
1590
1591	if (!p->link_config.link_ok)
1592		return (ENXIO);
1593
1594	if (IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1595		return (ENOBUFS);
1596
1597	qs = &p->adapter->sge.qs[p->first_qset];
1598	txq = &qs->txq[TXQ_ETH];
1599	err = 0;
1600
1601	mtx_lock(&txq->lock);
1602	in_use_init = txq->in_use;
1603	while ((txq->in_use - in_use_init < txmax) &&
1604	    (txq->size > txq->in_use + TX_MAX_DESC)) {
1605		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
1606		if (m == NULL)
1607			break;
1608		/*
1609		 * Convert chain to M_IOVEC
1610		 */
1611		KASSERT((m->m_flags & M_IOVEC) == 0, ("IOVEC set too early"));
1612		m0 = m;
1613#ifdef INVARIANTS
1614		/*
1615		 * Clean up after net stack sloppiness
1616		 * before calling m_sanity
1617		 */
1618		m0 = m->m_next;
1619		while (m0) {
1620			m0->m_flags &= ~M_PKTHDR;
1621			m0 = m0->m_next;
1622		}
1623		m_sanity(m0, 0);
1624		m0 = m;
1625#endif
1626		if (collapse_mbufs && m->m_pkthdr.len > MCLBYTES &&
1627		    m_collapse(m, TX_MAX_SEGS, &m0) == EFBIG) {
1628			if ((m0 = m_defrag(m, M_NOWAIT)) != NULL) {
1629				m = m0;
1630				m_collapse(m, TX_MAX_SEGS, &m0);
1631			} else
1632				break;
1633		}
1634		m = m0;
1635		if ((err = t3_encap(p, &m)) != 0)
1636			break;
1637		BPF_MTAP(ifp, m);
1638	}
1639	mtx_unlock(&txq->lock);
1640
1641	if (__predict_false(err)) {
1642		if (err == ENOMEM) {
1643			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1644			IFQ_LOCK(&ifp->if_snd);
1645			IFQ_DRV_PREPEND(&ifp->if_snd, m);
1646			IFQ_UNLOCK(&ifp->if_snd);
1647		}
1648	}
1649	if (err == 0 && m == NULL)
1650		err = ENOBUFS;
1651	else if ((err == 0) &&  (txq->size <= txq->in_use + TX_MAX_DESC) &&
1652	    (ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
1653		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1654		err = ENOSPC;
1655	}
1656	return (err);
1657}
1658
1659static void
1660cxgb_start_proc(void *arg, int ncount)
1661{
1662	struct ifnet *ifp = arg;
1663	struct port_info *pi = ifp->if_softc;
1664	struct sge_qset *qs;
1665	struct sge_txq *txq;
1666	int error;
1667
1668	qs = &pi->adapter->sge.qs[pi->first_qset];
1669	txq = &qs->txq[TXQ_ETH];
1670
1671	do {
1672		if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC)
1673			taskqueue_enqueue(pi->adapter->tq,
1674			    &pi->timer_reclaim_task);
1675
1676		error = cxgb_start_tx(ifp, TX_START_MAX_DESC);
1677	} while (error == 0);
1678}
1679
1680static void
1681cxgb_start(struct ifnet *ifp)
1682{
1683	struct port_info *pi = ifp->if_softc;
1684	struct sge_qset *qs;
1685	struct sge_txq *txq;
1686	int err;
1687
1688	qs = &pi->adapter->sge.qs[pi->first_qset];
1689	txq = &qs->txq[TXQ_ETH];
1690
1691	if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC)
1692		taskqueue_enqueue(pi->adapter->tq,
1693		    &pi->timer_reclaim_task);
1694
1695	err = cxgb_start_tx(ifp, TX_START_MAX_DESC);
1696
1697	if (err == 0)
1698		taskqueue_enqueue(pi->tq, &pi->start_task);
1699}
1700
1701
1702static int
1703cxgb_media_change(struct ifnet *ifp)
1704{
1705	if_printf(ifp, "media change not supported\n");
1706	return (ENXIO);
1707}
1708
1709static void
1710cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1711{
1712	struct port_info *p = ifp->if_softc;
1713
1714	ifmr->ifm_status = IFM_AVALID;
1715	ifmr->ifm_active = IFM_ETHER;
1716
1717	if (!p->link_config.link_ok)
1718		return;
1719
1720	ifmr->ifm_status |= IFM_ACTIVE;
1721
1722	switch (p->link_config.speed) {
1723	case 10:
1724		ifmr->ifm_active |= IFM_10_T;
1725		break;
1726	case 100:
1727		ifmr->ifm_active |= IFM_100_TX;
1728			break;
1729	case 1000:
1730		ifmr->ifm_active |= IFM_1000_T;
1731		break;
1732	}
1733
1734	if (p->link_config.duplex)
1735		ifmr->ifm_active |= IFM_FDX;
1736	else
1737		ifmr->ifm_active |= IFM_HDX;
1738}
1739
1740static void
1741cxgb_async_intr(void *data)
1742{
1743	adapter_t *sc = data;
1744
1745	if (cxgb_debug)
1746		device_printf(sc->dev, "cxgb_async_intr\n");
1747
1748	t3_slow_intr_handler(sc);
1749
1750}
1751
1752static void
1753cxgb_ext_intr_handler(void *arg, int count)
1754{
1755	adapter_t *sc = (adapter_t *)arg;
1756
1757	if (cxgb_debug)
1758		printf("cxgb_ext_intr_handler\n");
1759
1760	t3_phy_intr_handler(sc);
1761
1762	/* Now reenable external interrupts */
1763	ADAPTER_LOCK(sc);
1764	if (sc->slow_intr_mask) {
1765		sc->slow_intr_mask |= F_T3DBG;
1766		t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
1767		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1768	}
1769	ADAPTER_UNLOCK(sc);
1770}
1771
1772static void
1773check_link_status(adapter_t *sc)
1774{
1775	int i;
1776
1777	for (i = 0; i < (sc)->params.nports; ++i) {
1778		struct port_info *p = &sc->port[i];
1779
1780		if (!(p->port_type->caps & SUPPORTED_IRQ))
1781			t3_link_changed(sc, i);
1782		p->ifp->if_baudrate = p->link_config.speed * 1000000;
1783	}
1784}
1785
1786static void
1787check_t3b2_mac(struct adapter *adapter)
1788{
1789	int i;
1790
1791	for_each_port(adapter, i) {
1792		struct port_info *p = &adapter->port[i];
1793		struct ifnet *ifp = p->ifp;
1794		int status;
1795
1796		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1797			continue;
1798
1799		status = 0;
1800		PORT_LOCK(p);
1801		if ((ifp->if_drv_flags & IFF_DRV_RUNNING))
1802			status = t3b2_mac_watchdog_task(&p->mac);
1803		if (status == 1)
1804			p->mac.stats.num_toggled++;
1805		else if (status == 2) {
1806			struct cmac *mac = &p->mac;
1807
1808			t3_mac_set_mtu(mac, ifp->if_mtu + ETHER_HDR_LEN
1809			    + ETHER_VLAN_ENCAP_LEN);
1810			t3_mac_set_address(mac, 0, p->hw_addr);
1811			cxgb_set_rxmode(p);
1812			t3_link_start(&p->phy, mac, &p->link_config);
1813			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1814			t3_port_intr_enable(adapter, p->port);
1815			p->mac.stats.num_resets++;
1816		}
1817		PORT_UNLOCK(p);
1818	}
1819}
1820
1821static void
1822cxgb_tick(void *arg)
1823{
1824	adapter_t *sc = (adapter_t *)arg;
1825	const struct adapter_params *p = &sc->params;
1826
1827	if (p->linkpoll_period)
1828		check_link_status(sc);
1829	callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
1830	    cxgb_tick, sc);
1831
1832	/*
1833	 * adapter lock can currently only be acquire after the
1834	 * port lock
1835	 */
1836	ADAPTER_UNLOCK(sc);
1837
1838	if (p->rev == T3_REV_B2)
1839		check_t3b2_mac(sc);
1840}
1841
1842static int
1843in_range(int val, int lo, int hi)
1844{
1845	return val < 0 || (val <= hi && val >= lo);
1846}
1847
1848static int
1849cxgb_extension_open(struct cdev *dev, int flags, int fmp, d_thread_t *td)
1850{
1851       return (0);
1852}
1853
1854static int
1855cxgb_extension_close(struct cdev *dev, int flags, int fmt, d_thread_t *td)
1856{
1857       return (0);
1858}
1859
1860static int
1861cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
1862    int fflag, struct thread *td)
1863{
1864	int mmd, error = 0;
1865	struct port_info *pi = dev->si_drv1;
1866	adapter_t *sc = pi->adapter;
1867
1868#ifdef PRIV_SUPPORTED
1869	if (priv_check(td, PRIV_DRIVER)) {
1870		if (cxgb_debug)
1871			printf("user does not have access to privileged ioctls\n");
1872		return (EPERM);
1873	}
1874#else
1875	if (suser(td)) {
1876		if (cxgb_debug)
1877			printf("user does not have access to privileged ioctls\n");
1878		return (EPERM);
1879	}
1880#endif
1881
1882	switch (cmd) {
1883	case SIOCGMIIREG: {
1884		uint32_t val;
1885		struct cphy *phy = &pi->phy;
1886		struct mii_data *mid = (struct mii_data *)data;
1887
1888		if (!phy->mdio_read)
1889			return (EOPNOTSUPP);
1890		if (is_10G(sc)) {
1891			mmd = mid->phy_id >> 8;
1892			if (!mmd)
1893				mmd = MDIO_DEV_PCS;
1894			else if (mmd > MDIO_DEV_XGXS)
1895				return -EINVAL;
1896
1897			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
1898					     mid->reg_num, &val);
1899		} else
1900		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
1901					     mid->reg_num & 0x1f, &val);
1902		if (error == 0)
1903			mid->val_out = val;
1904		break;
1905	}
1906	case SIOCSMIIREG: {
1907		struct cphy *phy = &pi->phy;
1908		struct mii_data *mid = (struct mii_data *)data;
1909
1910		if (!phy->mdio_write)
1911			return (EOPNOTSUPP);
1912		if (is_10G(sc)) {
1913			mmd = mid->phy_id >> 8;
1914			if (!mmd)
1915				mmd = MDIO_DEV_PCS;
1916			else if (mmd > MDIO_DEV_XGXS)
1917				return (EINVAL);
1918
1919			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
1920					      mmd, mid->reg_num, mid->val_in);
1921		} else
1922			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
1923					      mid->reg_num & 0x1f,
1924					      mid->val_in);
1925		break;
1926	}
1927	case CHELSIO_SETREG: {
1928		struct ch_reg *edata = (struct ch_reg *)data;
1929		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
1930			return (EFAULT);
1931		t3_write_reg(sc, edata->addr, edata->val);
1932		break;
1933	}
1934	case CHELSIO_GETREG: {
1935		struct ch_reg *edata = (struct ch_reg *)data;
1936		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
1937			return (EFAULT);
1938		edata->val = t3_read_reg(sc, edata->addr);
1939		break;
1940	}
1941	case CHELSIO_GET_SGE_CONTEXT: {
1942		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
1943		mtx_lock(&sc->sge.reg_lock);
1944		switch (ecntxt->cntxt_type) {
1945		case CNTXT_TYPE_EGRESS:
1946			error = t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
1947			    ecntxt->data);
1948			break;
1949		case CNTXT_TYPE_FL:
1950			error = t3_sge_read_fl(sc, ecntxt->cntxt_id,
1951			    ecntxt->data);
1952			break;
1953		case CNTXT_TYPE_RSP:
1954			error = t3_sge_read_rspq(sc, ecntxt->cntxt_id,
1955			    ecntxt->data);
1956			break;
1957		case CNTXT_TYPE_CQ:
1958			error = t3_sge_read_cq(sc, ecntxt->cntxt_id,
1959			    ecntxt->data);
1960			break;
1961		default:
1962			error = EINVAL;
1963			break;
1964		}
1965		mtx_unlock(&sc->sge.reg_lock);
1966		break;
1967	}
1968	case CHELSIO_GET_SGE_DESC: {
1969		struct ch_desc *edesc = (struct ch_desc *)data;
1970		int ret;
1971		if (edesc->queue_num >= SGE_QSETS * 6)
1972			return (EINVAL);
1973		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
1974		    edesc->queue_num % 6, edesc->idx, edesc->data);
1975		if (ret < 0)
1976			return (EINVAL);
1977		edesc->size = ret;
1978		break;
1979	}
1980	case CHELSIO_SET_QSET_PARAMS: {
1981		struct qset_params *q;
1982		struct ch_qset_params *t = (struct ch_qset_params *)data;
1983
1984		if (t->qset_idx >= SGE_QSETS)
1985			return -EINVAL;
1986		if (!in_range(t->intr_lat, 0, M_NEWTIMER) ||
1987		    !in_range(t->cong_thres, 0, 255) ||
1988		    !in_range(t->txq_size[0], MIN_TXQ_ENTRIES,
1989			      MAX_TXQ_ENTRIES) ||
1990		    !in_range(t->txq_size[1], MIN_TXQ_ENTRIES,
1991			      MAX_TXQ_ENTRIES) ||
1992		    !in_range(t->txq_size[2], MIN_CTRL_TXQ_ENTRIES,
1993			      MAX_CTRL_TXQ_ENTRIES) ||
1994		    !in_range(t->fl_size[0], MIN_FL_ENTRIES, MAX_RX_BUFFERS) ||
1995		    !in_range(t->fl_size[1], MIN_FL_ENTRIES,
1996			      MAX_RX_JUMBO_BUFFERS) ||
1997		    !in_range(t->rspq_size, MIN_RSPQ_ENTRIES, MAX_RSPQ_ENTRIES))
1998		       return -EINVAL;
1999		if ((sc->flags & FULL_INIT_DONE) &&
2000		    (t->rspq_size >= 0 || t->fl_size[0] >= 0 ||
2001		     t->fl_size[1] >= 0 || t->txq_size[0] >= 0 ||
2002		     t->txq_size[1] >= 0 || t->txq_size[2] >= 0 ||
2003		     t->polling >= 0 || t->cong_thres >= 0))
2004			return -EBUSY;
2005
2006		q = &sc->params.sge.qset[t->qset_idx];
2007
2008		if (t->rspq_size >= 0)
2009			q->rspq_size = t->rspq_size;
2010		if (t->fl_size[0] >= 0)
2011			q->fl_size = t->fl_size[0];
2012		if (t->fl_size[1] >= 0)
2013			q->jumbo_size = t->fl_size[1];
2014		if (t->txq_size[0] >= 0)
2015			q->txq_size[0] = t->txq_size[0];
2016		if (t->txq_size[1] >= 0)
2017			q->txq_size[1] = t->txq_size[1];
2018		if (t->txq_size[2] >= 0)
2019			q->txq_size[2] = t->txq_size[2];
2020		if (t->cong_thres >= 0)
2021			q->cong_thres = t->cong_thres;
2022		if (t->intr_lat >= 0) {
2023			struct sge_qset *qs = &sc->sge.qs[t->qset_idx];
2024
2025			q->coalesce_nsecs = t->intr_lat*1000;
2026			t3_update_qset_coalesce(qs, q);
2027		}
2028		break;
2029	}
2030	case CHELSIO_GET_QSET_PARAMS: {
2031		struct qset_params *q;
2032		struct ch_qset_params *t = (struct ch_qset_params *)data;
2033
2034		if (t->qset_idx >= SGE_QSETS)
2035			return (EINVAL);
2036
2037		q = &(sc)->params.sge.qset[t->qset_idx];
2038		t->rspq_size   = q->rspq_size;
2039		t->txq_size[0] = q->txq_size[0];
2040		t->txq_size[1] = q->txq_size[1];
2041		t->txq_size[2] = q->txq_size[2];
2042		t->fl_size[0]  = q->fl_size;
2043		t->fl_size[1]  = q->jumbo_size;
2044		t->polling     = q->polling;
2045		t->intr_lat    = q->coalesce_nsecs / 1000;
2046		t->cong_thres  = q->cong_thres;
2047		break;
2048	}
2049	case CHELSIO_SET_QSET_NUM: {
2050		struct ch_reg *edata = (struct ch_reg *)data;
2051		unsigned int port_idx = pi->port;
2052
2053		if (sc->flags & FULL_INIT_DONE)
2054			return (EBUSY);
2055		if (edata->val < 1 ||
2056		    (edata->val > 1 && !(sc->flags & USING_MSIX)))
2057			return (EINVAL);
2058		if (edata->val + sc->port[!port_idx].nqsets > SGE_QSETS)
2059			return (EINVAL);
2060		sc->port[port_idx].nqsets = edata->val;
2061		sc->port[0].first_qset = 0;
2062		/*
2063		 * XXX hardcode ourselves to 2 ports just like LEEENUX
2064		 */
2065		sc->port[1].first_qset = sc->port[0].nqsets;
2066		break;
2067	}
2068	case CHELSIO_GET_QSET_NUM: {
2069		struct ch_reg *edata = (struct ch_reg *)data;
2070		edata->val = pi->nqsets;
2071		break;
2072	}
2073#ifdef notyet
2074	case CHELSIO_LOAD_FW:
2075	case CHELSIO_GET_PM:
2076	case CHELSIO_SET_PM:
2077		return (EOPNOTSUPP);
2078		break;
2079#endif
2080	case CHELSIO_SETMTUTAB: {
2081		struct ch_mtus *m = (struct ch_mtus *)data;
2082		int i;
2083
2084		if (!is_offload(sc))
2085			return (EOPNOTSUPP);
2086		if (offload_running(sc))
2087			return (EBUSY);
2088		if (m->nmtus != NMTUS)
2089			return (EINVAL);
2090		if (m->mtus[0] < 81)         /* accommodate SACK */
2091			return (EINVAL);
2092
2093		/*
2094		 * MTUs must be in ascending order
2095		 */
2096		for (i = 1; i < NMTUS; ++i)
2097			if (m->mtus[i] < m->mtus[i - 1])
2098				return (EINVAL);
2099
2100		memcpy(sc->params.mtus, m->mtus,
2101		       sizeof(sc->params.mtus));
2102		break;
2103	}
2104	case CHELSIO_GETMTUTAB: {
2105		struct ch_mtus *m = (struct ch_mtus *)data;
2106
2107		if (!is_offload(sc))
2108			return (EOPNOTSUPP);
2109
2110		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2111		m->nmtus = NMTUS;
2112		break;
2113	}
2114	case CHELSIO_DEVUP:
2115		if (!is_offload(sc))
2116			return (EOPNOTSUPP);
2117		return offload_open(pi);
2118		break;
2119	case CHELSIO_GET_MEM: {
2120		struct ch_mem_range *t = (struct ch_mem_range *)data;
2121		struct mc7 *mem;
2122		uint8_t *useraddr;
2123		u64 buf[32];
2124
2125		if (!is_offload(sc))
2126			return (EOPNOTSUPP);
2127		if (!(sc->flags & FULL_INIT_DONE))
2128			return (EIO);         /* need the memory controllers */
2129		if ((t->addr & 0x7) || (t->len & 0x7))
2130			return (EINVAL);
2131		if (t->mem_id == MEM_CM)
2132			mem = &sc->cm;
2133		else if (t->mem_id == MEM_PMRX)
2134			mem = &sc->pmrx;
2135		else if (t->mem_id == MEM_PMTX)
2136			mem = &sc->pmtx;
2137		else
2138			return (EINVAL);
2139
2140		/*
2141		 * Version scheme:
2142		 * bits 0..9: chip version
2143		 * bits 10..15: chip revision
2144		 */
2145		t->version = 3 | (sc->params.rev << 10);
2146
2147		/*
2148		 * Read 256 bytes at a time as len can be large and we don't
2149		 * want to use huge intermediate buffers.
2150		 */
2151		useraddr = (uint8_t *)(t + 1);   /* advance to start of buffer */
2152		while (t->len) {
2153			unsigned int chunk = min(t->len, sizeof(buf));
2154
2155			error = t3_mc7_bd_read(mem, t->addr / 8, chunk / 8, buf);
2156			if (error)
2157				return (-error);
2158			if (copyout(buf, useraddr, chunk))
2159				return (EFAULT);
2160			useraddr += chunk;
2161			t->addr += chunk;
2162			t->len -= chunk;
2163		}
2164		break;
2165	}
2166	case CHELSIO_READ_TCAM_WORD: {
2167		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2168
2169		if (!is_offload(sc))
2170			return (EOPNOTSUPP);
2171		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2172		break;
2173	}
2174	case CHELSIO_SET_TRACE_FILTER: {
2175		struct ch_trace *t = (struct ch_trace *)data;
2176		const struct trace_params *tp;
2177
2178		tp = (const struct trace_params *)&t->sip;
2179		if (t->config_tx)
2180			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2181					       t->trace_tx);
2182		if (t->config_rx)
2183			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2184					       t->trace_rx);
2185		break;
2186	}
2187	case CHELSIO_SET_PKTSCHED: {
2188		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2189		if (sc->open_device_map == 0)
2190			return (EAGAIN);
2191		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2192		    p->binding);
2193		break;
2194	}
2195	case CHELSIO_IFCONF_GETREGS: {
2196		struct ifconf_regs *regs = (struct ifconf_regs *)data;
2197		int reglen = cxgb_get_regs_len();
2198		uint8_t *buf = malloc(REGDUMP_SIZE, M_DEVBUF, M_NOWAIT);
2199		if (buf == NULL) {
2200			return (ENOMEM);
2201		} if (regs->len > reglen)
2202			regs->len = reglen;
2203		else if (regs->len < reglen) {
2204			error = E2BIG;
2205			goto done;
2206		}
2207		cxgb_get_regs(sc, regs, buf);
2208		error = copyout(buf, regs->data, reglen);
2209
2210		done:
2211		free(buf, M_DEVBUF);
2212
2213		break;
2214	}
2215	case CHELSIO_SET_HW_SCHED: {
2216		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2217		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2218
2219		if ((sc->flags & FULL_INIT_DONE) == 0)
2220			return (EAGAIN);       /* need TP to be initialized */
2221		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2222		    !in_range(t->channel, 0, 1) ||
2223		    !in_range(t->kbps, 0, 10000000) ||
2224		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2225		    !in_range(t->flow_ipg, 0,
2226			      dack_ticks_to_usec(sc, 0x7ff)))
2227			return (EINVAL);
2228
2229		if (t->kbps >= 0) {
2230			error = t3_config_sched(sc, t->kbps, t->sched);
2231			if (error < 0)
2232				return (-error);
2233		}
2234		if (t->class_ipg >= 0)
2235			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2236		if (t->flow_ipg >= 0) {
2237			t->flow_ipg *= 1000;     /* us -> ns */
2238			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2239		}
2240		if (t->mode >= 0) {
2241			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2242
2243			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2244					 bit, t->mode ? bit : 0);
2245		}
2246		if (t->channel >= 0)
2247			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2248					 1 << t->sched, t->channel << t->sched);
2249		break;
2250	}
2251	default:
2252		return (EOPNOTSUPP);
2253		break;
2254	}
2255
2256	return (error);
2257}
2258
2259static __inline void
2260reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
2261    unsigned int end)
2262{
2263	uint32_t *p = (uint32_t *)buf + start;
2264
2265	for ( ; start <= end; start += sizeof(uint32_t))
2266		*p++ = t3_read_reg(ap, start);
2267}
2268
2269#define T3_REGMAP_SIZE (3 * 1024)
2270static int
2271cxgb_get_regs_len(void)
2272{
2273	return T3_REGMAP_SIZE;
2274}
2275#undef T3_REGMAP_SIZE
2276
2277static void
2278cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf)
2279{
2280
2281	/*
2282	 * Version scheme:
2283	 * bits 0..9: chip version
2284	 * bits 10..15: chip revision
2285	 * bit 31: set for PCIe cards
2286	 */
2287	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
2288
2289	/*
2290	 * We skip the MAC statistics registers because they are clear-on-read.
2291	 * Also reading multi-register stats would need to synchronize with the
2292	 * periodic mac stats accumulation.  Hard to justify the complexity.
2293	 */
2294	memset(buf, 0, REGDUMP_SIZE);
2295	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
2296	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
2297	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
2298	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
2299	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
2300	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
2301		       XGM_REG(A_XGM_SERDES_STAT3, 1));
2302	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
2303		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
2304}
2305