cxgb_main.c revision 170007
1/**************************************************************************
2
3Copyright (c) 2007, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
122. Neither the name of the Chelsio Corporation nor the names of its
13    contributors may be used to endorse or promote products derived from
14    this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_main.c 170007 2007-05-27 04:39:07Z kmacy $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/bus.h>
37#include <sys/module.h>
38#include <sys/pciio.h>
39#include <sys/conf.h>
40#include <machine/bus.h>
41#include <machine/resource.h>
42#include <sys/bus_dma.h>
43#include <sys/rman.h>
44#include <sys/ioccom.h>
45#include <sys/mbuf.h>
46#include <sys/linker.h>
47#include <sys/firmware.h>
48#include <sys/socket.h>
49#include <sys/sockio.h>
50#include <sys/smp.h>
51#include <sys/sysctl.h>
52#include <sys/queue.h>
53#include <sys/taskqueue.h>
54
55#include <net/bpf.h>
56#include <net/ethernet.h>
57#include <net/if.h>
58#include <net/if_arp.h>
59#include <net/if_dl.h>
60#include <net/if_media.h>
61#include <net/if_types.h>
62
63#include <netinet/in_systm.h>
64#include <netinet/in.h>
65#include <netinet/if_ether.h>
66#include <netinet/ip.h>
67#include <netinet/ip.h>
68#include <netinet/tcp.h>
69#include <netinet/udp.h>
70
71#include <dev/pci/pcireg.h>
72#include <dev/pci/pcivar.h>
73#include <dev/pci/pci_private.h>
74
75#include <dev/cxgb/cxgb_osdep.h>
76#include <dev/cxgb/common/cxgb_common.h>
77#include <dev/cxgb/cxgb_ioctl.h>
78#include <dev/cxgb/cxgb_offload.h>
79#include <dev/cxgb/common/cxgb_regs.h>
80#include <dev/cxgb/common/cxgb_t3_cpl.h>
81#include <dev/cxgb/common/cxgb_firmware_exports.h>
82
83#include <dev/cxgb/sys/mvec.h>
84
85
86#ifdef PRIV_SUPPORTED
87#include <sys/priv.h>
88#endif
89
90static int cxgb_setup_msix(adapter_t *, int);
91static void cxgb_init(void *);
92static void cxgb_init_locked(struct port_info *);
93static void cxgb_stop_locked(struct port_info *);
94static void cxgb_set_rxmode(struct port_info *);
95static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
96static void cxgb_start(struct ifnet *);
97static void cxgb_start_proc(void *, int ncount);
98static int cxgb_media_change(struct ifnet *);
99static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
100static int setup_sge_qsets(adapter_t *);
101static void cxgb_async_intr(void *);
102static void cxgb_ext_intr_handler(void *, int);
103static void cxgb_down(struct adapter *sc);
104static void cxgb_tick(void *);
105static void setup_rss(adapter_t *sc);
106
107/* Attachment glue for the PCI controller end of the device.  Each port of
108 * the device is attached separately, as defined later.
109 */
110static int cxgb_controller_probe(device_t);
111static int cxgb_controller_attach(device_t);
112static int cxgb_controller_detach(device_t);
113static void cxgb_free(struct adapter *);
114static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
115    unsigned int end);
116static void cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf);
117static int cxgb_get_regs_len(void);
118static int offload_open(struct port_info *pi);
119static int offload_close(struct toedev *tdev);
120
121
122
123static device_method_t cxgb_controller_methods[] = {
124	DEVMETHOD(device_probe,		cxgb_controller_probe),
125	DEVMETHOD(device_attach,	cxgb_controller_attach),
126	DEVMETHOD(device_detach,	cxgb_controller_detach),
127
128	/* bus interface */
129	DEVMETHOD(bus_print_child,	bus_generic_print_child),
130	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
131
132	{ 0, 0 }
133};
134
135static driver_t cxgb_controller_driver = {
136	"cxgbc",
137	cxgb_controller_methods,
138	sizeof(struct adapter)
139};
140
141static devclass_t	cxgb_controller_devclass;
142DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
143
144/*
145 * Attachment glue for the ports.  Attachment is done directly to the
146 * controller device.
147 */
148static int cxgb_port_probe(device_t);
149static int cxgb_port_attach(device_t);
150static int cxgb_port_detach(device_t);
151
152static device_method_t cxgb_port_methods[] = {
153	DEVMETHOD(device_probe,		cxgb_port_probe),
154	DEVMETHOD(device_attach,	cxgb_port_attach),
155	DEVMETHOD(device_detach,	cxgb_port_detach),
156	{ 0, 0 }
157};
158
159static driver_t cxgb_port_driver = {
160	"cxgb",
161	cxgb_port_methods,
162	0
163};
164
165static d_ioctl_t cxgb_extension_ioctl;
166
167static devclass_t	cxgb_port_devclass;
168DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
169
170#define SGE_MSIX_COUNT (SGE_QSETS + 1)
171
172extern int collapse_mbufs;
173/*
174 * The driver uses the best interrupt scheme available on a platform in the
175 * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
176 * of these schemes the driver may consider as follows:
177 *
178 * msi = 2: choose from among all three options
179 * msi = 1 : only consider MSI and pin interrupts
180 * msi = 0: force pin interrupts
181 */
182static int msi_allowed = 2;
183TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
184SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
185SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
186    "MSI-X, MSI, INTx selector");
187
188/*
189 * The driver enables offload as a default.
190 * To disable it, use ofld_disable = 1.
191 */
192static int ofld_disable = 0;
193TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
194SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
195    "disable ULP offload");
196
197/*
198 * The driver uses an auto-queue algorithm by default.
199 * To disable it and force a single queue-set per port, use singleq = 1.
200 */
201static int singleq = 1;
202TUNABLE_INT("hw.cxgb.singleq", &singleq);
203SYSCTL_UINT(_hw_cxgb, OID_AUTO, singleq, CTLFLAG_RDTUN, &singleq, 0,
204    "use a single queue-set per port");
205
206enum {
207	MAX_TXQ_ENTRIES      = 16384,
208	MAX_CTRL_TXQ_ENTRIES = 1024,
209	MAX_RSPQ_ENTRIES     = 16384,
210	MAX_RX_BUFFERS       = 16384,
211	MAX_RX_JUMBO_BUFFERS = 16384,
212	MIN_TXQ_ENTRIES      = 4,
213	MIN_CTRL_TXQ_ENTRIES = 4,
214	MIN_RSPQ_ENTRIES     = 32,
215	MIN_FL_ENTRIES       = 32
216};
217
218#define PORT_MASK ((1 << MAX_NPORTS) - 1)
219
220/* Table for probing the cards.  The desc field isn't actually used */
221struct cxgb_ident {
222	uint16_t	vendor;
223	uint16_t	device;
224	int		index;
225	char		*desc;
226} cxgb_identifiers[] = {
227	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
228	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
229	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
230	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
231	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
232	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
233	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
234	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
235	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
236	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
237	{0, 0, 0, NULL}
238};
239
240static struct cxgb_ident *
241cxgb_get_ident(device_t dev)
242{
243	struct cxgb_ident *id;
244
245	for (id = cxgb_identifiers; id->desc != NULL; id++) {
246		if ((id->vendor == pci_get_vendor(dev)) &&
247		    (id->device == pci_get_device(dev))) {
248			return (id);
249		}
250	}
251	return (NULL);
252}
253
254static const struct adapter_info *
255cxgb_get_adapter_info(device_t dev)
256{
257	struct cxgb_ident *id;
258	const struct adapter_info *ai;
259
260	id = cxgb_get_ident(dev);
261	if (id == NULL)
262		return (NULL);
263
264	ai = t3_get_adapter_info(id->index);
265
266	return (ai);
267}
268
269static int
270cxgb_controller_probe(device_t dev)
271{
272	const struct adapter_info *ai;
273	char *ports, buf[80];
274
275	ai = cxgb_get_adapter_info(dev);
276	if (ai == NULL)
277		return (ENXIO);
278
279	if (ai->nports == 1)
280		ports = "port";
281	else
282		ports = "ports";
283
284	snprintf(buf, sizeof(buf), "%s RNIC, %d %s", ai->desc, ai->nports, ports);
285	device_set_desc_copy(dev, buf);
286	return (BUS_PROBE_DEFAULT);
287}
288
289static int
290upgrade_fw(adapter_t *sc)
291{
292	char buf[32];
293#ifdef FIRMWARE_LATEST
294	const struct firmware *fw;
295#else
296	struct firmware *fw;
297#endif
298	int status;
299
300	snprintf(&buf[0], sizeof(buf), "t3fw%d%d%d", FW_VERSION_MAJOR,
301	    FW_VERSION_MINOR, FW_VERSION_MICRO);
302
303	fw = firmware_get(buf);
304
305	if (fw == NULL) {
306		device_printf(sc->dev, "Could not find firmware image %s\n", buf);
307		return (ENOENT);
308	}
309
310	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
311
312	firmware_put(fw, FIRMWARE_UNLOAD);
313
314	return (status);
315}
316
317static int
318cxgb_controller_attach(device_t dev)
319{
320	driver_intr_t *cxgb_intr = NULL;
321	device_t child;
322	const struct adapter_info *ai;
323	struct adapter *sc;
324	int i, reg, msi_needed, error = 0;
325	uint32_t vers;
326	int port_qsets = 1;
327
328	sc = device_get_softc(dev);
329	sc->dev = dev;
330	sc->msi_count = 0;
331
332	/* find the PCIe link width and set max read request to 4KB*/
333	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
334		uint16_t lnk, pectl;
335		lnk = pci_read_config(dev, reg + 0x12, 2);
336		sc->link_width = (lnk >> 4) & 0x3f;
337
338		pectl = pci_read_config(dev, reg + 0x8, 2);
339		pectl = (pectl & ~0x7000) | (5 << 12);
340		pci_write_config(dev, reg + 0x8, pectl, 2);
341	}
342	if (sc->link_width != 0 && sc->link_width <= 4) {
343		device_printf(sc->dev,
344		    "PCIe x%d Link, expect reduced performance\n",
345		    sc->link_width);
346	}
347
348	pci_enable_busmaster(dev);
349
350	/*
351	 * Allocate the registers and make them available to the driver.
352	 * The registers that we care about for NIC mode are in BAR 0
353	 */
354	sc->regs_rid = PCIR_BAR(0);
355	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
356	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
357		device_printf(dev, "Cannot allocate BAR\n");
358		return (ENXIO);
359	}
360
361	mtx_init(&sc->sge.reg_lock, "SGE reg lock", NULL, MTX_DEF);
362	mtx_init(&sc->lock, "cxgb controller lock", NULL, MTX_DEF);
363	mtx_init(&sc->mdio_lock, "cxgb mdio", NULL, MTX_DEF);
364
365	sc->bt = rman_get_bustag(sc->regs_res);
366	sc->bh = rman_get_bushandle(sc->regs_res);
367	sc->mmio_len = rman_get_size(sc->regs_res);
368
369	ai = cxgb_get_adapter_info(dev);
370	if (t3_prep_adapter(sc, ai, 1) < 0) {
371		error = ENODEV;
372		goto out;
373	}
374
375	/* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
376	 * enough messages for the queue sets.  If that fails, try falling
377	 * back to MSI.  If that fails, then try falling back to the legacy
378	 * interrupt pin model.
379	 */
380#ifdef MSI_SUPPORTED
381
382	sc->msix_regs_rid = 0x20;
383	if ((msi_allowed >= 2) &&
384	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
385	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
386
387		msi_needed = sc->msi_count = SGE_MSIX_COUNT;
388
389		if (((error = pci_alloc_msix(dev, &sc->msi_count)) != 0) ||
390		    (sc->msi_count != msi_needed)) {
391			device_printf(dev, "msix allocation failed - msi_count = %d"
392			    " msi_needed=%d will try msi err=%d\n", sc->msi_count,
393			    msi_needed, error);
394			sc->msi_count = 0;
395			pci_release_msi(dev);
396			bus_release_resource(dev, SYS_RES_MEMORY,
397			    sc->msix_regs_rid, sc->msix_regs_res);
398			sc->msix_regs_res = NULL;
399		} else {
400			sc->flags |= USING_MSIX;
401			cxgb_intr = t3_intr_msix;
402		}
403	}
404
405	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
406		sc->msi_count = 1;
407		if (pci_alloc_msi(dev, &sc->msi_count)) {
408			device_printf(dev, "alloc msi failed - will try INTx\n");
409			sc->msi_count = 0;
410			pci_release_msi(dev);
411		} else {
412			sc->flags |= USING_MSI;
413			sc->irq_rid = 1;
414			cxgb_intr = t3_intr_msi;
415		}
416	}
417#endif
418	if (sc->msi_count == 0) {
419		device_printf(dev, "using line interrupts\n");
420		sc->irq_rid = 0;
421		cxgb_intr = t3b_intr;
422	}
423
424
425	/* Create a private taskqueue thread for handling driver events */
426#ifdef TASKQUEUE_CURRENT
427	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
428	    taskqueue_thread_enqueue, &sc->tq);
429#else
430	sc->tq = taskqueue_create_fast("cxgb_taskq", M_NOWAIT,
431	    taskqueue_thread_enqueue, &sc->tq);
432#endif
433	if (sc->tq == NULL) {
434		device_printf(dev, "failed to allocate controller task queue\n");
435		goto out;
436	}
437
438	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
439	    device_get_nameunit(dev));
440	TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
441
442
443	/* Create a periodic callout for checking adapter status */
444	callout_init_mtx(&sc->cxgb_tick_ch, &sc->lock, CALLOUT_RETURNUNLOCKED);
445
446	if (t3_check_fw_version(sc) != 0) {
447		/*
448		 * Warn user that a firmware update will be attempted in init.
449		 */
450		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
451		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
452		sc->flags &= ~FW_UPTODATE;
453	} else {
454		sc->flags |= FW_UPTODATE;
455	}
456
457	if ((sc->flags & USING_MSIX) && !singleq)
458		port_qsets = min((SGE_QSETS/(sc)->params.nports), mp_ncpus);
459
460	/*
461	 * Create a child device for each MAC.  The ethernet attachment
462	 * will be done in these children.
463	 */
464	for (i = 0; i < (sc)->params.nports; i++) {
465		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
466			device_printf(dev, "failed to add child port\n");
467			error = EINVAL;
468			goto out;
469		}
470		sc->portdev[i] = child;
471		sc->port[i].adapter = sc;
472		sc->port[i].nqsets = port_qsets;
473		sc->port[i].first_qset = i*port_qsets;
474		sc->port[i].port = i;
475		device_set_softc(child, &sc->port[i]);
476	}
477	if ((error = bus_generic_attach(dev)) != 0)
478		goto out;
479
480	/*
481	 * XXX need to poll for link status
482	 */
483	sc->params.stats_update_period = 1;
484
485	/* initialize sge private state */
486	t3_sge_init_sw(sc);
487
488	t3_led_ready(sc);
489
490	cxgb_offload_init();
491	if (is_offload(sc)) {
492		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
493		cxgb_adapter_ofld(sc);
494        }
495	error = t3_get_fw_version(sc, &vers);
496	if (error)
497		goto out;
498
499	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
500	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
501	    G_FW_VERSION_MICRO(vers));
502
503	t3_add_sysctls(sc);
504out:
505	if (error)
506		cxgb_free(sc);
507
508	return (error);
509}
510
511static int
512cxgb_controller_detach(device_t dev)
513{
514	struct adapter *sc;
515
516	sc = device_get_softc(dev);
517
518	cxgb_free(sc);
519
520	return (0);
521}
522
523static void
524cxgb_free(struct adapter *sc)
525{
526	int i;
527
528	cxgb_down(sc);
529
530#ifdef MSI_SUPPORTED
531	if (sc->flags & (USING_MSI | USING_MSIX)) {
532		device_printf(sc->dev, "releasing msi message(s)\n");
533		pci_release_msi(sc->dev);
534	} else {
535		device_printf(sc->dev, "no msi message to release\n");
536	}
537#endif
538	if (sc->msix_regs_res != NULL) {
539		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
540		    sc->msix_regs_res);
541	}
542
543	/*
544	 * XXX need to drain the ifq by hand until
545	 * it is taught about mbuf iovecs
546	 */
547	callout_drain(&sc->cxgb_tick_ch);
548
549	t3_sge_deinit_sw(sc);
550
551	if (sc->tq != NULL) {
552		taskqueue_drain(sc->tq, &sc->ext_intr_task);
553		taskqueue_free(sc->tq);
554	}
555
556	for (i = 0; i < (sc)->params.nports; ++i) {
557		if (sc->portdev[i] != NULL)
558			device_delete_child(sc->dev, sc->portdev[i]);
559	}
560
561	bus_generic_detach(sc->dev);
562
563	if (is_offload(sc)) {
564		cxgb_adapter_unofld(sc);
565		if (isset(&sc->open_device_map,	OFFLOAD_DEVMAP_BIT))
566			offload_close(&sc->tdev);
567	}
568	t3_free_sge_resources(sc);
569	t3_sge_free(sc);
570
571	if (sc->regs_res != NULL)
572		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
573		    sc->regs_res);
574
575	mtx_destroy(&sc->mdio_lock);
576	mtx_destroy(&sc->sge.reg_lock);
577	mtx_destroy(&sc->lock);
578
579	return;
580}
581
582/**
583 *	setup_sge_qsets - configure SGE Tx/Rx/response queues
584 *	@sc: the controller softc
585 *
586 *	Determines how many sets of SGE queues to use and initializes them.
587 *	We support multiple queue sets per port if we have MSI-X, otherwise
588 *	just one queue set per port.
589 */
590static int
591setup_sge_qsets(adapter_t *sc)
592{
593	int i, j, err, irq_idx, qset_idx;
594	u_int ntxq = SGE_TXQ_PER_SET;
595
596	if ((err = t3_sge_alloc(sc)) != 0) {
597		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
598		return (err);
599	}
600
601	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
602		irq_idx = -1;
603	else
604		irq_idx = 0;
605
606	for (qset_idx = 0, i = 0; i < (sc)->params.nports; ++i) {
607		struct port_info *pi = &sc->port[i];
608
609		for (j = 0; j < pi->nqsets; ++j, ++qset_idx) {
610			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
611			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
612			    &sc->params.sge.qset[qset_idx], ntxq, pi);
613			if (err) {
614				t3_free_sge_resources(sc);
615				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n", err);
616				return (err);
617			}
618		}
619	}
620
621	return (0);
622}
623
624static int
625cxgb_setup_msix(adapter_t *sc, int msix_count)
626{
627	int i, j, k, nqsets, rid;
628
629	/* The first message indicates link changes and error conditions */
630	sc->irq_rid = 1;
631	if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
632	   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
633		device_printf(sc->dev, "Cannot allocate msix interrupt\n");
634		return (EINVAL);
635	}
636
637	if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
638#ifdef INTR_FILTERS
639			NULL,
640#endif
641		cxgb_async_intr, sc, &sc->intr_tag)) {
642		device_printf(sc->dev, "Cannot set up interrupt\n");
643		return (EINVAL);
644	}
645	for (i = 0, k = 0; i < (sc)->params.nports; ++i) {
646		nqsets = sc->port[i].nqsets;
647		for (j = 0; j < nqsets; ++j, k++) {
648			struct sge_qset *qs = &sc->sge.qs[k];
649
650			rid = k + 2;
651			if (cxgb_debug)
652				printf("rid=%d ", rid);
653			if ((sc->msix_irq_res[k] = bus_alloc_resource_any(
654			    sc->dev, SYS_RES_IRQ, &rid,
655			    RF_SHAREABLE | RF_ACTIVE)) == NULL) {
656				device_printf(sc->dev, "Cannot allocate "
657				    "interrupt for message %d\n", rid);
658				return (EINVAL);
659			}
660			sc->msix_irq_rid[k] = rid;
661			if (bus_setup_intr(sc->dev, sc->msix_irq_res[j],
662			    INTR_MPSAFE|INTR_TYPE_NET,
663#ifdef INTR_FILTERS
664			NULL,
665#endif
666				t3_intr_msix, qs, &sc->msix_intr_tag[k])) {
667				device_printf(sc->dev, "Cannot set up "
668				    "interrupt for message %d\n", rid);
669				return (EINVAL);
670			}
671		}
672	}
673
674
675	return (0);
676}
677
678static int
679cxgb_port_probe(device_t dev)
680{
681	struct port_info *p;
682	char buf[80];
683
684	p = device_get_softc(dev);
685
686	snprintf(buf, sizeof(buf), "Port %d %s", p->port, p->port_type->desc);
687	device_set_desc_copy(dev, buf);
688	return (0);
689}
690
691
692static int
693cxgb_makedev(struct port_info *pi)
694{
695	struct cdevsw *cxgb_cdevsw;
696
697	if ((cxgb_cdevsw = malloc(sizeof(struct cdevsw), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
698		return (ENOMEM);
699
700	cxgb_cdevsw->d_version = D_VERSION;
701	cxgb_cdevsw->d_name = strdup(pi->ifp->if_xname, M_DEVBUF);
702	cxgb_cdevsw->d_ioctl = cxgb_extension_ioctl;
703
704	pi->port_cdev = make_dev(cxgb_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
705	    pi->ifp->if_xname);
706
707	if (pi->port_cdev == NULL)
708		return (ENOMEM);
709
710	pi->port_cdev->si_drv1 = (void *)pi;
711
712	return (0);
713}
714
715
716#ifdef TSO_SUPPORTED
717#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU)
718/* Don't enable TSO6 yet */
719#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU)
720#else
721#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_JUMBO_MTU)
722/* Don't enable TSO6 yet */
723#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM |  IFCAP_JUMBO_MTU)
724#define IFCAP_TSO4 0x0
725#define CSUM_TSO   0x0
726#endif
727
728
729static int
730cxgb_port_attach(device_t dev)
731{
732	struct port_info *p;
733	struct ifnet *ifp;
734	int media_flags;
735	int err;
736	char buf[64];
737
738	p = device_get_softc(dev);
739
740	snprintf(buf, sizeof(buf), "cxgb port %d", p->port);
741	mtx_init(&p->lock, buf, 0, MTX_DEF);
742
743	/* Allocate an ifnet object and set it up */
744	ifp = p->ifp = if_alloc(IFT_ETHER);
745	if (ifp == NULL) {
746		device_printf(dev, "Cannot allocate ifnet\n");
747		return (ENOMEM);
748	}
749
750	/*
751	 * Note that there is currently no watchdog timer.
752	 */
753	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
754	ifp->if_init = cxgb_init;
755	ifp->if_softc = p;
756	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
757	ifp->if_ioctl = cxgb_ioctl;
758	ifp->if_start = cxgb_start;
759	ifp->if_timer = 0;	/* Disable ifnet watchdog */
760	ifp->if_watchdog = NULL;
761
762	ifp->if_snd.ifq_drv_maxlen = TX_ETH_Q_SIZE;
763	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
764	IFQ_SET_READY(&ifp->if_snd);
765
766	ifp->if_hwassist = ifp->if_capabilities = ifp->if_capenable = 0;
767	ifp->if_capabilities |= CXGB_CAP;
768	ifp->if_capenable |= CXGB_CAP_ENABLE;
769	ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO);
770	ifp->if_baudrate = 100000000;
771
772	ether_ifattach(ifp, p->hw_addr);
773#ifdef DEFAULT_JUMBO
774	ifp->if_mtu = 9000;
775#endif
776	if ((err = cxgb_makedev(p)) != 0) {
777		printf("makedev failed %d\n", err);
778		return (err);
779	}
780	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
781	    cxgb_media_status);
782
783	if (!strcmp(p->port_type->desc, "10GBASE-CX4"))
784	        media_flags = IFM_ETHER | IFM_10G_CX4;
785	else if (!strcmp(p->port_type->desc, "10GBASE-SR"))
786	        media_flags = IFM_ETHER | IFM_10G_SR;
787	else if (!strcmp(p->port_type->desc, "10GBASE-XR"))
788	        media_flags = IFM_ETHER | IFM_10G_LR;
789	else {
790	        printf("unsupported media type %s\n", p->port_type->desc);
791		return (ENXIO);
792	}
793
794	ifmedia_add(&p->media, media_flags, 0, NULL);
795	ifmedia_add(&p->media, IFM_ETHER | IFM_AUTO, 0, NULL);
796	ifmedia_set(&p->media, media_flags);
797
798	snprintf(buf, sizeof(buf), "cxgb_port_taskq%d", p->port);
799#ifdef TASKQUEUE_CURRENT
800	/* Create a port for handling TX without starvation */
801	p->tq = taskqueue_create(buf, M_NOWAIT,
802	    taskqueue_thread_enqueue, &p->tq);
803#else
804	/* Create a port for handling TX without starvation */
805	p->tq = taskqueue_create_fast(buf, M_NOWAIT,
806	    taskqueue_thread_enqueue, &p->tq);
807#endif
808
809
810	if (p->tq == NULL) {
811		device_printf(dev, "failed to allocate port task queue\n");
812		return (ENOMEM);
813	}
814	taskqueue_start_threads(&p->tq, 1, PI_NET, "%s taskq",
815	    device_get_nameunit(dev));
816	TASK_INIT(&p->start_task, 0, cxgb_start_proc, ifp);
817
818
819	return (0);
820}
821
822static int
823cxgb_port_detach(device_t dev)
824{
825	struct port_info *p;
826
827	p = device_get_softc(dev);
828
829	PORT_LOCK(p);
830	cxgb_stop_locked(p);
831	PORT_UNLOCK(p);
832
833	mtx_destroy(&p->lock);
834	if (p->tq != NULL) {
835		taskqueue_drain(p->tq, &p->start_task);
836		taskqueue_free(p->tq);
837		p->tq = NULL;
838	}
839
840	ether_ifdetach(p->ifp);
841	if_free(p->ifp);
842
843	destroy_dev(p->port_cdev);
844
845
846	return (0);
847}
848
849void
850t3_fatal_err(struct adapter *sc)
851{
852	u_int fw_status[4];
853
854	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
855	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
856		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
857		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
858}
859
860int
861t3_os_find_pci_capability(adapter_t *sc, int cap)
862{
863	device_t dev;
864	struct pci_devinfo *dinfo;
865	pcicfgregs *cfg;
866	uint32_t status;
867	uint8_t ptr;
868
869	dev = sc->dev;
870	dinfo = device_get_ivars(dev);
871	cfg = &dinfo->cfg;
872
873	status = pci_read_config(dev, PCIR_STATUS, 2);
874	if (!(status & PCIM_STATUS_CAPPRESENT))
875		return (0);
876
877	switch (cfg->hdrtype & PCIM_HDRTYPE) {
878	case 0:
879	case 1:
880		ptr = PCIR_CAP_PTR;
881		break;
882	case 2:
883		ptr = PCIR_CAP_PTR_2;
884		break;
885	default:
886		return (0);
887		break;
888	}
889	ptr = pci_read_config(dev, ptr, 1);
890
891	while (ptr != 0) {
892		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
893			return (ptr);
894		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
895	}
896
897	return (0);
898}
899
900int
901t3_os_pci_save_state(struct adapter *sc)
902{
903	device_t dev;
904	struct pci_devinfo *dinfo;
905
906	dev = sc->dev;
907	dinfo = device_get_ivars(dev);
908
909	pci_cfg_save(dev, dinfo, 0);
910	return (0);
911}
912
913int
914t3_os_pci_restore_state(struct adapter *sc)
915{
916	device_t dev;
917	struct pci_devinfo *dinfo;
918
919	dev = sc->dev;
920	dinfo = device_get_ivars(dev);
921
922	pci_cfg_restore(dev, dinfo);
923	return (0);
924}
925
926/**
927 *	t3_os_link_changed - handle link status changes
928 *	@adapter: the adapter associated with the link change
929 *	@port_id: the port index whose limk status has changed
930 *	@link_stat: the new status of the link
931 *	@speed: the new speed setting
932 *	@duplex: the new duplex setting
933 *	@fc: the new flow-control setting
934 *
935 *	This is the OS-dependent handler for link status changes.  The OS
936 *	neutral handler takes care of most of the processing for these events,
937 *	then calls this handler for any OS-specific processing.
938 */
939void
940t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
941     int duplex, int fc)
942{
943	struct port_info *pi = &adapter->port[port_id];
944	struct cmac *mac = &adapter->port[port_id].mac;
945
946	if ((pi->ifp->if_flags & IFF_UP) == 0)
947		return;
948
949	if (link_status) {
950		t3_mac_enable(mac, MAC_DIRECTION_RX);
951		if_link_state_change(pi->ifp, LINK_STATE_UP);
952	} else {
953		if_link_state_change(pi->ifp, LINK_STATE_DOWN);
954		pi->phy.ops->power_down(&pi->phy, 1);
955		t3_mac_disable(mac, MAC_DIRECTION_RX);
956		t3_link_start(&pi->phy, mac, &pi->link_config);
957	}
958}
959
960
961/*
962 * Interrupt-context handler for external (PHY) interrupts.
963 */
964void
965t3_os_ext_intr_handler(adapter_t *sc)
966{
967	if (cxgb_debug)
968		printf("t3_os_ext_intr_handler\n");
969	/*
970	 * Schedule a task to handle external interrupts as they may be slow
971	 * and we use a mutex to protect MDIO registers.  We disable PHY
972	 * interrupts in the meantime and let the task reenable them when
973	 * it's done.
974	 */
975	ADAPTER_LOCK(sc);
976	if (sc->slow_intr_mask) {
977		sc->slow_intr_mask &= ~F_T3DBG;
978		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
979		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
980	}
981	ADAPTER_UNLOCK(sc);
982}
983
984void
985t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
986{
987
988	/*
989	 * The ifnet might not be allocated before this gets called,
990	 * as this is called early on in attach by t3_prep_adapter
991	 * save the address off in the port structure
992	 */
993	if (cxgb_debug)
994		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
995	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
996}
997
998/**
999 *	link_start - enable a port
1000 *	@p: the port to enable
1001 *
1002 *	Performs the MAC and PHY actions needed to enable a port.
1003 */
1004static void
1005cxgb_link_start(struct port_info *p)
1006{
1007	struct ifnet *ifp;
1008	struct t3_rx_mode rm;
1009	struct cmac *mac = &p->mac;
1010
1011	ifp = p->ifp;
1012
1013	t3_init_rx_mode(&rm, p);
1014	t3_mac_reset(mac);
1015	t3_mac_set_mtu(mac, ifp->if_mtu + ETHER_HDR_LEN);
1016	t3_mac_set_address(mac, 0, p->hw_addr);
1017	t3_mac_set_rx_mode(mac, &rm);
1018	t3_link_start(&p->phy, mac, &p->link_config);
1019	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1020}
1021
1022/**
1023 *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1024 *	@adap: the adapter
1025 *
1026 *	Sets up RSS to distribute packets to multiple receive queues.  We
1027 *	configure the RSS CPU lookup table to distribute to the number of HW
1028 *	receive queues, and the response queue lookup table to narrow that
1029 *	down to the response queues actually configured for each port.
1030 *	We always configure the RSS mapping for two ports since the mapping
1031 *	table has plenty of entries.
1032 */
1033static void
1034setup_rss(adapter_t *adap)
1035{
1036	int i;
1037	u_int nq0 = adap->port[0].nqsets;
1038	u_int nq1 = max((u_int)adap->port[1].nqsets, 1U);
1039	uint8_t cpus[SGE_QSETS + 1];
1040	uint16_t rspq_map[RSS_TABLE_SIZE];
1041
1042	for (i = 0; i < SGE_QSETS; ++i)
1043		cpus[i] = i;
1044	cpus[SGE_QSETS] = 0xff;
1045
1046	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1047		rspq_map[i] = i % nq0;
1048		rspq_map[i + RSS_TABLE_SIZE / 2] = (i % nq1) + nq0;
1049	}
1050
1051	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1052	    F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN |
1053	    V_RRCPLCPUSIZE(6), cpus, rspq_map);
1054}
1055
1056/*
1057 * Sends an mbuf to an offload queue driver
1058 * after dealing with any active network taps.
1059 */
1060static inline int
1061offload_tx(struct toedev *tdev, struct mbuf *m)
1062{
1063	int ret;
1064
1065	critical_enter();
1066	ret = t3_offload_tx(tdev, m);
1067	critical_exit();
1068	return ret;
1069}
1070
1071static int
1072write_smt_entry(struct adapter *adapter, int idx)
1073{
1074	struct port_info *pi = &adapter->port[idx];
1075	struct cpl_smt_write_req *req;
1076	struct mbuf *m;
1077
1078	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1079		return (ENOMEM);
1080
1081	req = mtod(m, struct cpl_smt_write_req *);
1082	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1083	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1084	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1085	req->iff = idx;
1086	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1087	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1088
1089	m_set_priority(m, 1);
1090
1091	offload_tx(&adapter->tdev, m);
1092
1093	return (0);
1094}
1095
1096static int
1097init_smt(struct adapter *adapter)
1098{
1099	int i;
1100
1101	for_each_port(adapter, i)
1102		write_smt_entry(adapter, i);
1103	return 0;
1104}
1105
1106static void
1107init_port_mtus(adapter_t *adapter)
1108{
1109	unsigned int mtus = adapter->port[0].ifp->if_mtu;
1110
1111	if (adapter->port[1].ifp)
1112		mtus |= adapter->port[1].ifp->if_mtu << 16;
1113	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1114}
1115
1116static void
1117send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1118			      int hi, int port)
1119{
1120	struct mbuf *m;
1121	struct mngt_pktsched_wr *req;
1122
1123	m = m_gethdr(M_NOWAIT, MT_DATA);
1124	if (m) {
1125		req = mtod(m, struct mngt_pktsched_wr *);
1126		req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1127		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1128		req->sched = sched;
1129		req->idx = qidx;
1130		req->min = lo;
1131		req->max = hi;
1132		req->binding = port;
1133		m->m_len = m->m_pkthdr.len = sizeof(*req);
1134		t3_mgmt_tx(adap, m);
1135	}
1136}
1137
1138static void
1139bind_qsets(adapter_t *sc)
1140{
1141	int i, j;
1142
1143	for (i = 0; i < (sc)->params.nports; ++i) {
1144		const struct port_info *pi = adap2pinfo(sc, i);
1145
1146		for (j = 0; j < pi->nqsets; ++j)
1147			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1148					  -1, i);
1149	}
1150}
1151
1152/**
1153 *	cxgb_up - enable the adapter
1154 *	@adap: adapter being enabled
1155 *
1156 *	Called when the first port is enabled, this function performs the
1157 *	actions necessary to make an adapter operational, such as completing
1158 *	the initialization of HW modules, and enabling interrupts.
1159 *
1160 */
1161static int
1162cxgb_up(struct adapter *sc)
1163{
1164	int err = 0;
1165
1166	if ((sc->flags & FULL_INIT_DONE) == 0) {
1167
1168		if ((sc->flags & FW_UPTODATE) == 0)
1169			err = upgrade_fw(sc);
1170
1171		if (err)
1172			goto out;
1173
1174		err = t3_init_hw(sc, 0);
1175		if (err)
1176			goto out;
1177
1178		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1179
1180		err = setup_sge_qsets(sc);
1181		if (err)
1182			goto out;
1183
1184		setup_rss(sc);
1185		sc->flags |= FULL_INIT_DONE;
1186	}
1187
1188	t3_intr_clear(sc);
1189
1190	/* If it's MSI or INTx, allocate a single interrupt for everything */
1191	if ((sc->flags & USING_MSIX) == 0) {
1192		if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
1193		   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
1194			device_printf(sc->dev, "Cannot allocate interrupt rid=%d\n", sc->irq_rid);
1195			err = EINVAL;
1196			goto out;
1197		}
1198		device_printf(sc->dev, "allocated irq_res=%p\n", sc->irq_res);
1199
1200		if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
1201#ifdef INTR_FILTERS
1202			NULL,
1203#endif
1204			sc->cxgb_intr, sc, &sc->intr_tag)) {
1205			device_printf(sc->dev, "Cannot set up interrupt\n");
1206			err = EINVAL;
1207			goto irq_err;
1208		}
1209	} else {
1210		cxgb_setup_msix(sc, sc->msi_count);
1211	}
1212
1213	t3_sge_start(sc);
1214	t3_intr_enable(sc);
1215
1216	if ((sc->flags & (USING_MSIX | QUEUES_BOUND)) == USING_MSIX)
1217		bind_qsets(sc);
1218	sc->flags |= QUEUES_BOUND;
1219out:
1220	return (err);
1221irq_err:
1222	CH_ERR(sc, "request_irq failed, err %d\n", err);
1223	goto out;
1224}
1225
1226
1227/*
1228 * Release resources when all the ports and offloading have been stopped.
1229 */
1230static void
1231cxgb_down(struct adapter *sc)
1232{
1233	int i;
1234
1235	t3_sge_stop(sc);
1236	t3_intr_disable(sc);
1237
1238	for (i = 0; i < SGE_QSETS; i++) {
1239		if (sc->msix_intr_tag[i] != NULL) {
1240			bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
1241			    sc->msix_intr_tag[i]);
1242			sc->msix_intr_tag[i] = NULL;
1243		}
1244		if (sc->msix_irq_res[i] != NULL) {
1245			bus_release_resource(sc->dev, SYS_RES_IRQ,
1246			    sc->msix_irq_rid[i], sc->msix_irq_res[i]);
1247			sc->msix_irq_res[i] = NULL;
1248		}
1249	}
1250
1251	if (sc->intr_tag != NULL) {
1252		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
1253		sc->intr_tag = NULL;
1254	}
1255	if (sc->irq_res != NULL) {
1256		device_printf(sc->dev, "de-allocating interrupt irq_rid=%d irq_res=%p\n",
1257		    sc->irq_rid, sc->irq_res);
1258		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
1259		    sc->irq_res);
1260		sc->irq_res = NULL;
1261	}
1262
1263	callout_drain(&sc->sge_timer_ch);
1264	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1265	taskqueue_drain(sc->tq, &sc->timer_reclaim_task);
1266}
1267
1268static int
1269offload_open(struct port_info *pi)
1270{
1271	struct adapter *adapter = pi->adapter;
1272	struct toedev *tdev = TOEDEV(pi->ifp);
1273	int adap_up = adapter->open_device_map & PORT_MASK;
1274	int err = 0;
1275
1276	if (atomic_cmpset_int(&adapter->open_device_map,
1277		(adapter->open_device_map & ~OFFLOAD_DEVMAP_BIT),
1278		(adapter->open_device_map | OFFLOAD_DEVMAP_BIT)) == 0)
1279		return (0);
1280
1281	ADAPTER_LOCK(pi->adapter);
1282	if (!adap_up)
1283		err = cxgb_up(adapter);
1284	ADAPTER_UNLOCK(pi->adapter);
1285	if (err < 0)
1286		return (err);
1287
1288	t3_tp_set_offload_mode(adapter, 1);
1289	tdev->lldev = adapter->port[0].ifp;
1290	err = cxgb_offload_activate(adapter);
1291	if (err)
1292		goto out;
1293
1294	init_port_mtus(adapter);
1295	t3_load_mtus(adapter, adapter->params.mtus, adapter->params.a_wnd,
1296		     adapter->params.b_wnd,
1297		     adapter->params.rev == 0 ?
1298		       adapter->port[0].ifp->if_mtu : 0xffff);
1299	init_smt(adapter);
1300
1301	/* Call back all registered clients */
1302	cxgb_add_clients(tdev);
1303
1304out:
1305	/* restore them in case the offload module has changed them */
1306	if (err) {
1307		t3_tp_set_offload_mode(adapter, 0);
1308		clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1309		cxgb_set_dummy_ops(tdev);
1310	}
1311	return (err);
1312}
1313
1314static int
1315offload_close(struct toedev *tdev)
1316{
1317	struct adapter *adapter = tdev2adap(tdev);
1318
1319	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1320		return 0;
1321
1322	/* Call back all registered clients */
1323	cxgb_remove_clients(tdev);
1324	tdev->lldev = NULL;
1325	cxgb_set_dummy_ops(tdev);
1326	t3_tp_set_offload_mode(adapter, 0);
1327	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1328
1329	ADAPTER_LOCK(adapter);
1330	if (!adapter->open_device_map)
1331		cxgb_down(adapter);
1332	ADAPTER_UNLOCK(adapter);
1333
1334	cxgb_offload_deactivate(adapter);
1335	return 0;
1336}
1337
1338static void
1339cxgb_init(void *arg)
1340{
1341	struct port_info *p = arg;
1342
1343	PORT_LOCK(p);
1344	cxgb_init_locked(p);
1345	PORT_UNLOCK(p);
1346}
1347
1348static void
1349cxgb_init_locked(struct port_info *p)
1350{
1351	struct ifnet *ifp;
1352	adapter_t *sc = p->adapter;
1353	int err;
1354
1355	mtx_assert(&p->lock, MA_OWNED);
1356	ifp = p->ifp;
1357
1358	ADAPTER_LOCK(p->adapter);
1359	if ((sc->open_device_map == 0) && ((err = cxgb_up(sc)) < 0)) {
1360		ADAPTER_UNLOCK(p->adapter);
1361		cxgb_stop_locked(p);
1362		return;
1363	}
1364	if (p->adapter->open_device_map == 0)
1365		t3_intr_clear(sc);
1366
1367	setbit(&p->adapter->open_device_map, p->port);
1368
1369	ADAPTER_UNLOCK(p->adapter);
1370	if (is_offload(sc) && !ofld_disable) {
1371		err = offload_open(p);
1372		if (err)
1373			log(LOG_WARNING,
1374			    "Could not initialize offload capabilities\n");
1375	}
1376	cxgb_link_start(p);
1377	t3_port_intr_enable(sc, p->port);
1378
1379	callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
1380	    cxgb_tick, sc);
1381
1382	PORT_LOCK(p);
1383	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1384	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1385	PORT_UNLOCK(p);
1386}
1387
1388static void
1389cxgb_set_rxmode(struct port_info *p)
1390{
1391	struct t3_rx_mode rm;
1392	struct cmac *mac = &p->mac;
1393
1394	mtx_assert(&p->lock, MA_OWNED);
1395
1396	t3_init_rx_mode(&rm, p);
1397	t3_mac_set_rx_mode(mac, &rm);
1398}
1399
1400static void
1401cxgb_stop_locked(struct port_info *p)
1402{
1403	struct ifnet *ifp;
1404
1405	mtx_assert(&p->lock, MA_OWNED);
1406	mtx_assert(&p->adapter->lock, MA_NOTOWNED);
1407
1408	ifp = p->ifp;
1409
1410	t3_port_intr_disable(p->adapter, p->port);
1411	PORT_LOCK(p);
1412	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1413	PORT_UNLOCK(p);
1414	p->phy.ops->power_down(&p->phy, 1);
1415	t3_mac_disable(&p->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
1416
1417	ADAPTER_LOCK(p->adapter);
1418	clrbit(&p->adapter->open_device_map, p->port);
1419	/*
1420	 * XXX cancel check_task
1421	 */
1422	if (p->adapter->open_device_map == 0)
1423		cxgb_down(p->adapter);
1424	ADAPTER_UNLOCK(p->adapter);
1425}
1426
1427static int
1428cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1429{
1430	struct port_info *p = ifp->if_softc;
1431	struct ifaddr *ifa = (struct ifaddr *)data;
1432	struct ifreq *ifr = (struct ifreq *)data;
1433	int flags, error = 0;
1434	uint32_t mask;
1435
1436	/*
1437	 * XXX need to check that we aren't in the middle of an unload
1438	 */
1439	switch (command) {
1440	case SIOCSIFMTU:
1441		if ((ifr->ifr_mtu < ETHERMIN) ||
1442		    (ifr->ifr_mtu > ETHER_MAX_LEN_JUMBO))
1443			error = EINVAL;
1444		else if (ifp->if_mtu != ifr->ifr_mtu) {
1445			PORT_LOCK(p);
1446			ifp->if_mtu = ifr->ifr_mtu;
1447			t3_mac_set_mtu(&p->mac, ifp->if_mtu + ETHER_HDR_LEN);
1448			PORT_UNLOCK(p);
1449		}
1450		break;
1451	case SIOCSIFADDR:
1452	case SIOCGIFADDR:
1453		if (ifa->ifa_addr->sa_family == AF_INET) {
1454			ifp->if_flags |= IFF_UP;
1455			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1456				cxgb_init(p);
1457			}
1458			arp_ifinit(ifp, ifa);
1459		} else
1460			error = ether_ioctl(ifp, command, data);
1461		break;
1462	case SIOCSIFFLAGS:
1463		if (ifp->if_flags & IFF_UP) {
1464			PORT_LOCK(p);
1465			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1466				flags = p->if_flags;
1467				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1468				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI))
1469					cxgb_set_rxmode(p);
1470
1471			} else
1472				cxgb_init_locked(p);
1473			p->if_flags = ifp->if_flags;
1474			PORT_UNLOCK(p);
1475		} else {
1476			callout_drain(&p->adapter->cxgb_tick_ch);
1477			PORT_LOCK(p);
1478			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1479				cxgb_stop_locked(p);
1480			} else {
1481				adapter_t *sc = p->adapter;
1482				callout_reset(&sc->cxgb_tick_ch,
1483				    sc->params.stats_update_period * hz,
1484				    cxgb_tick, sc);
1485			}
1486			PORT_UNLOCK(p);
1487		}
1488
1489
1490		break;
1491	case SIOCSIFMEDIA:
1492	case SIOCGIFMEDIA:
1493		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
1494		break;
1495	case SIOCSIFCAP:
1496		PORT_LOCK(p);
1497		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1498		if (mask & IFCAP_TXCSUM) {
1499			if (IFCAP_TXCSUM & ifp->if_capenable) {
1500				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
1501				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
1502				    | CSUM_TSO);
1503			} else {
1504				ifp->if_capenable |= IFCAP_TXCSUM;
1505				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1506			}
1507		} else if (mask & IFCAP_RXCSUM) {
1508			if (IFCAP_RXCSUM & ifp->if_capenable) {
1509				ifp->if_capenable &= ~IFCAP_RXCSUM;
1510			} else {
1511				ifp->if_capenable |= IFCAP_RXCSUM;
1512			}
1513		}
1514		if (mask & IFCAP_TSO4) {
1515			if (IFCAP_TSO4 & ifp->if_capenable) {
1516				ifp->if_capenable &= ~IFCAP_TSO4;
1517				ifp->if_hwassist &= ~CSUM_TSO;
1518			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
1519				ifp->if_capenable |= IFCAP_TSO4;
1520				ifp->if_hwassist |= CSUM_TSO;
1521			} else {
1522				if (cxgb_debug)
1523					printf("cxgb requires tx checksum offload"
1524					    " be enabled to use TSO\n");
1525				error = EINVAL;
1526			}
1527		}
1528		PORT_UNLOCK(p);
1529		break;
1530	default:
1531		error = ether_ioctl(ifp, command, data);
1532		break;
1533	}
1534
1535	return (error);
1536}
1537
1538static int
1539cxgb_start_tx(struct ifnet *ifp, uint32_t txmax)
1540{
1541	struct sge_qset *qs;
1542	struct sge_txq *txq;
1543	struct port_info *p = ifp->if_softc;
1544	struct mbuf *m0, *m = NULL;
1545	int err, in_use_init;
1546
1547
1548	if (!p->link_config.link_ok)
1549		return (ENXIO);
1550
1551	if (IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1552		return (ENOBUFS);
1553
1554	qs = &p->adapter->sge.qs[p->first_qset];
1555	txq = &qs->txq[TXQ_ETH];
1556	err = 0;
1557
1558	mtx_lock(&txq->lock);
1559	in_use_init = txq->in_use;
1560	while ((txq->in_use - in_use_init < txmax) &&
1561	    (txq->size > txq->in_use + TX_MAX_DESC)) {
1562		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
1563		if (m == NULL)
1564			break;
1565		/*
1566		 * Convert chain to M_IOVEC
1567		 */
1568		KASSERT((m->m_flags & M_IOVEC) == 0, ("IOVEC set too early"));
1569		m0 = m;
1570#ifdef INVARIANTS
1571		/*
1572		 * Clean up after net stack sloppiness
1573		 * before calling m_sanity
1574		 */
1575		m0 = m->m_next;
1576		while (m0) {
1577			m0->m_flags &= ~M_PKTHDR;
1578			m0 = m0->m_next;
1579		}
1580		m_sanity(m0, 0);
1581		m0 = m;
1582#endif
1583		if (collapse_mbufs && m->m_pkthdr.len > MCLBYTES &&
1584		    m_collapse(m, TX_MAX_SEGS, &m0) == EFBIG) {
1585			if ((m0 = m_defrag(m, M_NOWAIT)) != NULL) {
1586				m = m0;
1587				m_collapse(m, TX_MAX_SEGS, &m0);
1588			} else
1589				break;
1590		}
1591		m = m0;
1592		if ((err = t3_encap(p, &m)) != 0)
1593			break;
1594		BPF_MTAP(ifp, m);
1595	}
1596	mtx_unlock(&txq->lock);
1597
1598	if (__predict_false(err)) {
1599		if (cxgb_debug)
1600			printf("would set OFLAGS\n");
1601		if (err == ENOMEM) {
1602			IFQ_LOCK(&ifp->if_snd);
1603			IFQ_DRV_PREPEND(&ifp->if_snd, m);
1604			IFQ_UNLOCK(&ifp->if_snd);
1605		}
1606	}
1607	if (err == 0 && m == NULL)
1608		err = ENOBUFS;
1609
1610	if ((err == 0) &&  (txq->size <= txq->in_use + TX_MAX_DESC) &&
1611	    (ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
1612		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1613		err = ENOSPC;
1614	}
1615	return (err);
1616}
1617
1618static void
1619cxgb_start_proc(void *arg, int ncount)
1620{
1621	struct ifnet *ifp = arg;
1622	struct port_info *pi = ifp->if_softc;
1623	struct sge_qset *qs;
1624	struct sge_txq *txq;
1625	int error = 0;
1626
1627	qs = &pi->adapter->sge.qs[pi->first_qset];
1628	txq = &qs->txq[TXQ_ETH];
1629
1630	while (error == 0) {
1631		if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC)
1632			taskqueue_enqueue(pi->adapter->tq,
1633			    &pi->adapter->timer_reclaim_task);
1634
1635		error = cxgb_start_tx(ifp, TX_START_MAX_DESC);
1636	}
1637}
1638
1639static void
1640cxgb_start(struct ifnet *ifp)
1641{
1642	struct port_info *pi = ifp->if_softc;
1643	struct sge_qset *qs;
1644	struct sge_txq *txq;
1645	int err;
1646
1647	qs = &pi->adapter->sge.qs[pi->first_qset];
1648	txq = &qs->txq[TXQ_ETH];
1649
1650	if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC)
1651		taskqueue_enqueue(pi->adapter->tq,
1652		    &pi->adapter->timer_reclaim_task);
1653
1654	err = cxgb_start_tx(ifp, TX_START_MAX_DESC);
1655
1656	if (err == 0)
1657		taskqueue_enqueue(pi->tq, &pi->start_task);
1658}
1659
1660
1661static int
1662cxgb_media_change(struct ifnet *ifp)
1663{
1664	if_printf(ifp, "media change not supported\n");
1665	return (ENXIO);
1666}
1667
1668static void
1669cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1670{
1671	struct port_info *p = ifp->if_softc;
1672
1673	ifmr->ifm_status = IFM_AVALID;
1674	ifmr->ifm_active = IFM_ETHER;
1675
1676	if (!p->link_config.link_ok)
1677		return;
1678
1679	ifmr->ifm_status |= IFM_ACTIVE;
1680
1681	if (p->link_config.duplex)
1682		ifmr->ifm_active |= IFM_FDX;
1683	else
1684		ifmr->ifm_active |= IFM_HDX;
1685}
1686
1687static void
1688cxgb_async_intr(void *data)
1689{
1690	adapter_t *sc = data;
1691
1692	if (cxgb_debug)
1693		device_printf(sc->dev, "cxgb_async_intr\n");
1694
1695	t3_slow_intr_handler(sc);
1696
1697}
1698
1699static void
1700cxgb_ext_intr_handler(void *arg, int count)
1701{
1702	adapter_t *sc = (adapter_t *)arg;
1703
1704	if (cxgb_debug)
1705		printf("cxgb_ext_intr_handler\n");
1706
1707	t3_phy_intr_handler(sc);
1708
1709	/* Now reenable external interrupts */
1710	ADAPTER_LOCK(sc);
1711	if (sc->slow_intr_mask) {
1712		sc->slow_intr_mask |= F_T3DBG;
1713		t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
1714		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1715	}
1716	ADAPTER_UNLOCK(sc);
1717}
1718
1719static void
1720check_link_status(adapter_t *sc)
1721{
1722	int i;
1723
1724	for (i = 0; i < (sc)->params.nports; ++i) {
1725		struct port_info *p = &sc->port[i];
1726
1727		if (!(p->port_type->caps & SUPPORTED_IRQ))
1728			t3_link_changed(sc, i);
1729	}
1730}
1731
1732static void
1733check_t3b2_mac(struct adapter *adapter)
1734{
1735	int i;
1736
1737	for_each_port(adapter, i) {
1738		struct port_info *p = &adapter->port[i];
1739		struct ifnet *ifp = p->ifp;
1740		int status;
1741
1742		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1743			continue;
1744
1745		status = 0;
1746		PORT_LOCK(p);
1747		if ((ifp->if_drv_flags & IFF_DRV_RUNNING))
1748			status = t3b2_mac_watchdog_task(&p->mac);
1749		if (status == 1)
1750			p->mac.stats.num_toggled++;
1751		else if (status == 2) {
1752			struct cmac *mac = &p->mac;
1753
1754			t3_mac_set_mtu(mac, ifp->if_mtu + ETHER_HDR_LEN);
1755			t3_mac_set_address(mac, 0, p->hw_addr);
1756			cxgb_set_rxmode(p);
1757			t3_link_start(&p->phy, mac, &p->link_config);
1758			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1759			t3_port_intr_enable(adapter, p->port);
1760			p->mac.stats.num_resets++;
1761		}
1762		PORT_UNLOCK(p);
1763	}
1764}
1765
1766static void
1767cxgb_tick(void *arg)
1768{
1769	adapter_t *sc = (adapter_t *)arg;
1770	const struct adapter_params *p = &sc->params;
1771
1772	if (p->linkpoll_period)
1773		check_link_status(sc);
1774	callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
1775	    cxgb_tick, sc);
1776
1777	/*
1778	 * adapter lock can currently only be acquire after the
1779	 * port lock
1780	 */
1781	ADAPTER_UNLOCK(sc);
1782	if (p->rev == T3_REV_B2)
1783		check_t3b2_mac(sc);
1784
1785}
1786
1787static int
1788in_range(int val, int lo, int hi)
1789{
1790	return val < 0 || (val <= hi && val >= lo);
1791}
1792
1793static int
1794cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
1795    int fflag, struct thread *td)
1796{
1797	int mmd, error = 0;
1798	struct port_info *pi = dev->si_drv1;
1799	adapter_t *sc = pi->adapter;
1800
1801#ifdef PRIV_SUPPORTED
1802	if (priv_check(td, PRIV_DRIVER)) {
1803		if (cxgb_debug)
1804			printf("user does not have access to privileged ioctls\n");
1805		return (EPERM);
1806	}
1807#else
1808	if (suser(td)) {
1809		if (cxgb_debug)
1810			printf("user does not have access to privileged ioctls\n");
1811		return (EPERM);
1812	}
1813#endif
1814
1815	switch (cmd) {
1816	case SIOCGMIIREG: {
1817		uint32_t val;
1818		struct cphy *phy = &pi->phy;
1819		struct mii_data *mid = (struct mii_data *)data;
1820
1821		if (!phy->mdio_read)
1822			return (EOPNOTSUPP);
1823		if (is_10G(sc)) {
1824			mmd = mid->phy_id >> 8;
1825			if (!mmd)
1826				mmd = MDIO_DEV_PCS;
1827			else if (mmd > MDIO_DEV_XGXS)
1828				return -EINVAL;
1829
1830			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
1831					     mid->reg_num, &val);
1832		} else
1833		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
1834					     mid->reg_num & 0x1f, &val);
1835		if (error == 0)
1836			mid->val_out = val;
1837		break;
1838	}
1839	case SIOCSMIIREG: {
1840		struct cphy *phy = &pi->phy;
1841		struct mii_data *mid = (struct mii_data *)data;
1842
1843		if (!phy->mdio_write)
1844			return (EOPNOTSUPP);
1845		if (is_10G(sc)) {
1846			mmd = mid->phy_id >> 8;
1847			if (!mmd)
1848				mmd = MDIO_DEV_PCS;
1849			else if (mmd > MDIO_DEV_XGXS)
1850				return (EINVAL);
1851
1852			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
1853					      mmd, mid->reg_num, mid->val_in);
1854		} else
1855			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
1856					      mid->reg_num & 0x1f,
1857					      mid->val_in);
1858		break;
1859	}
1860	case CHELSIO_SETREG: {
1861		struct ch_reg *edata = (struct ch_reg *)data;
1862		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
1863			return (EFAULT);
1864		t3_write_reg(sc, edata->addr, edata->val);
1865		break;
1866	}
1867	case CHELSIO_GETREG: {
1868		struct ch_reg *edata = (struct ch_reg *)data;
1869		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
1870			return (EFAULT);
1871		edata->val = t3_read_reg(sc, edata->addr);
1872		break;
1873	}
1874	case CHELSIO_GET_SGE_CONTEXT: {
1875		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
1876		mtx_lock(&sc->sge.reg_lock);
1877		switch (ecntxt->cntxt_type) {
1878		case CNTXT_TYPE_EGRESS:
1879			error = t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
1880			    ecntxt->data);
1881			break;
1882		case CNTXT_TYPE_FL:
1883			error = t3_sge_read_fl(sc, ecntxt->cntxt_id,
1884			    ecntxt->data);
1885			break;
1886		case CNTXT_TYPE_RSP:
1887			error = t3_sge_read_rspq(sc, ecntxt->cntxt_id,
1888			    ecntxt->data);
1889			break;
1890		case CNTXT_TYPE_CQ:
1891			error = t3_sge_read_cq(sc, ecntxt->cntxt_id,
1892			    ecntxt->data);
1893			break;
1894		default:
1895			error = EINVAL;
1896			break;
1897		}
1898		mtx_unlock(&sc->sge.reg_lock);
1899		break;
1900	}
1901	case CHELSIO_GET_SGE_DESC: {
1902		struct ch_desc *edesc = (struct ch_desc *)data;
1903		int ret;
1904		if (edesc->queue_num >= SGE_QSETS * 6)
1905			return (EINVAL);
1906		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
1907		    edesc->queue_num % 6, edesc->idx, edesc->data);
1908		if (ret < 0)
1909			return (EINVAL);
1910		edesc->size = ret;
1911		break;
1912	}
1913	case CHELSIO_SET_QSET_PARAMS: {
1914		struct qset_params *q;
1915		struct ch_qset_params *t = (struct ch_qset_params *)data;
1916
1917		if (t->qset_idx >= SGE_QSETS)
1918			return -EINVAL;
1919		if (!in_range(t->intr_lat, 0, M_NEWTIMER) ||
1920		    !in_range(t->cong_thres, 0, 255) ||
1921		    !in_range(t->txq_size[0], MIN_TXQ_ENTRIES,
1922			      MAX_TXQ_ENTRIES) ||
1923		    !in_range(t->txq_size[1], MIN_TXQ_ENTRIES,
1924			      MAX_TXQ_ENTRIES) ||
1925		    !in_range(t->txq_size[2], MIN_CTRL_TXQ_ENTRIES,
1926			      MAX_CTRL_TXQ_ENTRIES) ||
1927		    !in_range(t->fl_size[0], MIN_FL_ENTRIES, MAX_RX_BUFFERS) ||
1928		    !in_range(t->fl_size[1], MIN_FL_ENTRIES,
1929			      MAX_RX_JUMBO_BUFFERS) ||
1930		    !in_range(t->rspq_size, MIN_RSPQ_ENTRIES, MAX_RSPQ_ENTRIES))
1931		       return -EINVAL;
1932		if ((sc->flags & FULL_INIT_DONE) &&
1933		    (t->rspq_size >= 0 || t->fl_size[0] >= 0 ||
1934		     t->fl_size[1] >= 0 || t->txq_size[0] >= 0 ||
1935		     t->txq_size[1] >= 0 || t->txq_size[2] >= 0 ||
1936		     t->polling >= 0 || t->cong_thres >= 0))
1937			return -EBUSY;
1938
1939		q = &sc->params.sge.qset[t->qset_idx];
1940
1941		if (t->rspq_size >= 0)
1942			q->rspq_size = t->rspq_size;
1943		if (t->fl_size[0] >= 0)
1944			q->fl_size = t->fl_size[0];
1945		if (t->fl_size[1] >= 0)
1946			q->jumbo_size = t->fl_size[1];
1947		if (t->txq_size[0] >= 0)
1948			q->txq_size[0] = t->txq_size[0];
1949		if (t->txq_size[1] >= 0)
1950			q->txq_size[1] = t->txq_size[1];
1951		if (t->txq_size[2] >= 0)
1952			q->txq_size[2] = t->txq_size[2];
1953		if (t->cong_thres >= 0)
1954			q->cong_thres = t->cong_thres;
1955		if (t->intr_lat >= 0) {
1956			struct sge_qset *qs = &sc->sge.qs[t->qset_idx];
1957
1958			q->coalesce_nsecs = t->intr_lat*1000;
1959			t3_update_qset_coalesce(qs, q);
1960		}
1961		break;
1962	}
1963	case CHELSIO_GET_QSET_PARAMS: {
1964		struct qset_params *q;
1965		struct ch_qset_params *t = (struct ch_qset_params *)data;
1966
1967		if (t->qset_idx >= SGE_QSETS)
1968			return (EINVAL);
1969
1970		q = &(sc)->params.sge.qset[t->qset_idx];
1971		t->rspq_size   = q->rspq_size;
1972		t->txq_size[0] = q->txq_size[0];
1973		t->txq_size[1] = q->txq_size[1];
1974		t->txq_size[2] = q->txq_size[2];
1975		t->fl_size[0]  = q->fl_size;
1976		t->fl_size[1]  = q->jumbo_size;
1977		t->polling     = q->polling;
1978		t->intr_lat    = q->coalesce_nsecs / 1000;
1979		t->cong_thres  = q->cong_thres;
1980		break;
1981	}
1982	case CHELSIO_SET_QSET_NUM: {
1983		struct ch_reg *edata = (struct ch_reg *)data;
1984		unsigned int port_idx = pi->port;
1985
1986		if (sc->flags & FULL_INIT_DONE)
1987			return (EBUSY);
1988		if (edata->val < 1 ||
1989		    (edata->val > 1 && !(sc->flags & USING_MSIX)))
1990			return (EINVAL);
1991		if (edata->val + sc->port[!port_idx].nqsets > SGE_QSETS)
1992			return (EINVAL);
1993		sc->port[port_idx].nqsets = edata->val;
1994		sc->port[0].first_qset = 0;
1995		/*
1996		 * XXX hardcode ourselves to 2 ports just like LEEENUX
1997		 */
1998		sc->port[1].first_qset = sc->port[0].nqsets;
1999		break;
2000	}
2001	case CHELSIO_GET_QSET_NUM: {
2002		struct ch_reg *edata = (struct ch_reg *)data;
2003		edata->val = pi->nqsets;
2004		break;
2005	}
2006#ifdef notyet
2007	case CHELSIO_LOAD_FW:
2008	case CHELSIO_GET_PM:
2009	case CHELSIO_SET_PM:
2010		return (EOPNOTSUPP);
2011		break;
2012#endif
2013	case CHELSIO_SETMTUTAB: {
2014		struct ch_mtus *m = (struct ch_mtus *)data;
2015		int i;
2016
2017		if (!is_offload(sc))
2018			return (EOPNOTSUPP);
2019		if (offload_running(sc))
2020			return (EBUSY);
2021		if (m->nmtus != NMTUS)
2022			return (EINVAL);
2023		if (m->mtus[0] < 81)         /* accommodate SACK */
2024			return (EINVAL);
2025
2026		/*
2027		 * MTUs must be in ascending order
2028		 */
2029		for (i = 1; i < NMTUS; ++i)
2030			if (m->mtus[i] < m->mtus[i - 1])
2031				return (EINVAL);
2032
2033		memcpy(sc->params.mtus, m->mtus,
2034		       sizeof(sc->params.mtus));
2035		break;
2036	}
2037	case CHELSIO_GETMTUTAB: {
2038		struct ch_mtus *m = (struct ch_mtus *)data;
2039
2040		if (!is_offload(sc))
2041			return (EOPNOTSUPP);
2042
2043		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2044		m->nmtus = NMTUS;
2045		break;
2046	}
2047	case CHELSIO_DEVUP:
2048		if (!is_offload(sc))
2049			return (EOPNOTSUPP);
2050		return offload_open(pi);
2051		break;
2052	case CHELSIO_GET_MEM: {
2053		struct ch_mem_range *t = (struct ch_mem_range *)data;
2054		struct mc7 *mem;
2055		uint8_t *useraddr;
2056		u64 buf[32];
2057
2058		if (!is_offload(sc))
2059			return (EOPNOTSUPP);
2060		if (!(sc->flags & FULL_INIT_DONE))
2061			return (EIO);         /* need the memory controllers */
2062		if ((t->addr & 0x7) || (t->len & 0x7))
2063			return (EINVAL);
2064		if (t->mem_id == MEM_CM)
2065			mem = &sc->cm;
2066		else if (t->mem_id == MEM_PMRX)
2067			mem = &sc->pmrx;
2068		else if (t->mem_id == MEM_PMTX)
2069			mem = &sc->pmtx;
2070		else
2071			return (EINVAL);
2072
2073		/*
2074		 * Version scheme:
2075		 * bits 0..9: chip version
2076		 * bits 10..15: chip revision
2077		 */
2078		t->version = 3 | (sc->params.rev << 10);
2079
2080		/*
2081		 * Read 256 bytes at a time as len can be large and we don't
2082		 * want to use huge intermediate buffers.
2083		 */
2084		useraddr = (uint8_t *)(t + 1);   /* advance to start of buffer */
2085		while (t->len) {
2086			unsigned int chunk = min(t->len, sizeof(buf));
2087
2088			error = t3_mc7_bd_read(mem, t->addr / 8, chunk / 8, buf);
2089			if (error)
2090				return (-error);
2091			if (copyout(buf, useraddr, chunk))
2092				return (EFAULT);
2093			useraddr += chunk;
2094			t->addr += chunk;
2095			t->len -= chunk;
2096		}
2097		break;
2098	}
2099	case CHELSIO_READ_TCAM_WORD: {
2100		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2101
2102		if (!is_offload(sc))
2103			return (EOPNOTSUPP);
2104		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2105		break;
2106	}
2107	case CHELSIO_SET_TRACE_FILTER: {
2108		struct ch_trace *t = (struct ch_trace *)data;
2109		const struct trace_params *tp;
2110
2111		tp = (const struct trace_params *)&t->sip;
2112		if (t->config_tx)
2113			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2114					       t->trace_tx);
2115		if (t->config_rx)
2116			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2117					       t->trace_rx);
2118		break;
2119	}
2120	case CHELSIO_SET_PKTSCHED: {
2121		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2122		if (sc->open_device_map == 0)
2123			return (EAGAIN);
2124		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2125		    p->binding);
2126		break;
2127	}
2128	case CHELSIO_IFCONF_GETREGS: {
2129		struct ifconf_regs *regs = (struct ifconf_regs *)data;
2130		int reglen = cxgb_get_regs_len();
2131		uint8_t *buf = malloc(REGDUMP_SIZE, M_DEVBUF, M_NOWAIT);
2132		if (buf == NULL) {
2133			return (ENOMEM);
2134		} if (regs->len > reglen)
2135			regs->len = reglen;
2136		else if (regs->len < reglen) {
2137			error = E2BIG;
2138			goto done;
2139		}
2140		cxgb_get_regs(sc, regs, buf);
2141		error = copyout(buf, regs->data, reglen);
2142
2143		done:
2144		free(buf, M_DEVBUF);
2145
2146		break;
2147	}
2148	case CHELSIO_SET_HW_SCHED: {
2149		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2150		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2151
2152		if ((sc->flags & FULL_INIT_DONE) == 0)
2153			return (EAGAIN);       /* need TP to be initialized */
2154		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2155		    !in_range(t->channel, 0, 1) ||
2156		    !in_range(t->kbps, 0, 10000000) ||
2157		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2158		    !in_range(t->flow_ipg, 0,
2159			      dack_ticks_to_usec(sc, 0x7ff)))
2160			return (EINVAL);
2161
2162		if (t->kbps >= 0) {
2163			error = t3_config_sched(sc, t->kbps, t->sched);
2164			if (error < 0)
2165				return (-error);
2166		}
2167		if (t->class_ipg >= 0)
2168			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2169		if (t->flow_ipg >= 0) {
2170			t->flow_ipg *= 1000;     /* us -> ns */
2171			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2172		}
2173		if (t->mode >= 0) {
2174			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2175
2176			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2177					 bit, t->mode ? bit : 0);
2178		}
2179		if (t->channel >= 0)
2180			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2181					 1 << t->sched, t->channel << t->sched);
2182		break;
2183	}
2184	default:
2185		return (EOPNOTSUPP);
2186		break;
2187	}
2188
2189	return (error);
2190}
2191
2192static __inline void
2193reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
2194    unsigned int end)
2195{
2196	uint32_t *p = (uint32_t *)buf + start;
2197
2198	for ( ; start <= end; start += sizeof(uint32_t))
2199		*p++ = t3_read_reg(ap, start);
2200}
2201
2202#define T3_REGMAP_SIZE (3 * 1024)
2203static int
2204cxgb_get_regs_len(void)
2205{
2206	return T3_REGMAP_SIZE;
2207}
2208#undef T3_REGMAP_SIZE
2209
2210static void
2211cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf)
2212{
2213
2214	/*
2215	 * Version scheme:
2216	 * bits 0..9: chip version
2217	 * bits 10..15: chip revision
2218	 * bit 31: set for PCIe cards
2219	 */
2220	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
2221
2222	/*
2223	 * We skip the MAC statistics registers because they are clear-on-read.
2224	 * Also reading multi-register stats would need to synchronize with the
2225	 * periodic mac stats accumulation.  Hard to justify the complexity.
2226	 */
2227	memset(buf, 0, REGDUMP_SIZE);
2228	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
2229	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
2230	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
2231	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
2232	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
2233	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
2234		       XGM_REG(A_XGM_SERDES_STAT3, 1));
2235	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
2236		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
2237}
2238