cxgb_main.c revision 170081
1/**************************************************************************
2
3Copyright (c) 2007, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
122. Neither the name of the Chelsio Corporation nor the names of its
13    contributors may be used to endorse or promote products derived from
14    this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_main.c 170081 2007-05-29 03:13:53Z kmacy $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/bus.h>
37#include <sys/module.h>
38#include <sys/pciio.h>
39#include <sys/conf.h>
40#include <machine/bus.h>
41#include <machine/resource.h>
42#include <sys/bus_dma.h>
43#include <sys/rman.h>
44#include <sys/ioccom.h>
45#include <sys/mbuf.h>
46#include <sys/linker.h>
47#include <sys/firmware.h>
48#include <sys/socket.h>
49#include <sys/sockio.h>
50#include <sys/smp.h>
51#include <sys/sysctl.h>
52#include <sys/queue.h>
53#include <sys/taskqueue.h>
54
55#include <net/bpf.h>
56#include <net/ethernet.h>
57#include <net/if.h>
58#include <net/if_arp.h>
59#include <net/if_dl.h>
60#include <net/if_media.h>
61#include <net/if_types.h>
62
63#include <netinet/in_systm.h>
64#include <netinet/in.h>
65#include <netinet/if_ether.h>
66#include <netinet/ip.h>
67#include <netinet/ip.h>
68#include <netinet/tcp.h>
69#include <netinet/udp.h>
70
71#include <dev/pci/pcireg.h>
72#include <dev/pci/pcivar.h>
73#include <dev/pci/pci_private.h>
74
75#ifdef CONFIG_DEFINED
76#include <cxgb_include.h>
77#else
78#include <dev/cxgb/cxgb_include.h>
79#endif
80
81#ifdef PRIV_SUPPORTED
82#include <sys/priv.h>
83#endif
84
85static int cxgb_setup_msix(adapter_t *, int);
86static void cxgb_init(void *);
87static void cxgb_init_locked(struct port_info *);
88static void cxgb_stop_locked(struct port_info *);
89static void cxgb_set_rxmode(struct port_info *);
90static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
91static void cxgb_start(struct ifnet *);
92static void cxgb_start_proc(void *, int ncount);
93static int cxgb_media_change(struct ifnet *);
94static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
95static int setup_sge_qsets(adapter_t *);
96static void cxgb_async_intr(void *);
97static void cxgb_ext_intr_handler(void *, int);
98static void cxgb_down(struct adapter *sc);
99static void cxgb_tick(void *);
100static void setup_rss(adapter_t *sc);
101
102/* Attachment glue for the PCI controller end of the device.  Each port of
103 * the device is attached separately, as defined later.
104 */
105static int cxgb_controller_probe(device_t);
106static int cxgb_controller_attach(device_t);
107static int cxgb_controller_detach(device_t);
108static void cxgb_free(struct adapter *);
109static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
110    unsigned int end);
111static void cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf);
112static int cxgb_get_regs_len(void);
113static int offload_open(struct port_info *pi);
114static int offload_close(struct toedev *tdev);
115
116
117
118static device_method_t cxgb_controller_methods[] = {
119	DEVMETHOD(device_probe,		cxgb_controller_probe),
120	DEVMETHOD(device_attach,	cxgb_controller_attach),
121	DEVMETHOD(device_detach,	cxgb_controller_detach),
122
123	/* bus interface */
124	DEVMETHOD(bus_print_child,	bus_generic_print_child),
125	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
126
127	{ 0, 0 }
128};
129
130static driver_t cxgb_controller_driver = {
131	"cxgbc",
132	cxgb_controller_methods,
133	sizeof(struct adapter)
134};
135
136static devclass_t	cxgb_controller_devclass;
137DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
138
139/*
140 * Attachment glue for the ports.  Attachment is done directly to the
141 * controller device.
142 */
143static int cxgb_port_probe(device_t);
144static int cxgb_port_attach(device_t);
145static int cxgb_port_detach(device_t);
146
147static device_method_t cxgb_port_methods[] = {
148	DEVMETHOD(device_probe,		cxgb_port_probe),
149	DEVMETHOD(device_attach,	cxgb_port_attach),
150	DEVMETHOD(device_detach,	cxgb_port_detach),
151	{ 0, 0 }
152};
153
154static driver_t cxgb_port_driver = {
155	"cxgb",
156	cxgb_port_methods,
157	0
158};
159
160static d_ioctl_t cxgb_extension_ioctl;
161
162static devclass_t	cxgb_port_devclass;
163DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
164
165#define SGE_MSIX_COUNT (SGE_QSETS + 1)
166
167extern int collapse_mbufs;
168/*
169 * The driver uses the best interrupt scheme available on a platform in the
170 * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
171 * of these schemes the driver may consider as follows:
172 *
173 * msi = 2: choose from among all three options
174 * msi = 1 : only consider MSI and pin interrupts
175 * msi = 0: force pin interrupts
176 */
177static int msi_allowed = 2;
178TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
179SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
180SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
181    "MSI-X, MSI, INTx selector");
182
183/*
184 * The driver enables offload as a default.
185 * To disable it, use ofld_disable = 1.
186 */
187static int ofld_disable = 0;
188TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
189SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
190    "disable ULP offload");
191
192/*
193 * The driver uses an auto-queue algorithm by default.
194 * To disable it and force a single queue-set per port, use singleq = 1.
195 */
196static int singleq = 1;
197TUNABLE_INT("hw.cxgb.singleq", &singleq);
198SYSCTL_UINT(_hw_cxgb, OID_AUTO, singleq, CTLFLAG_RDTUN, &singleq, 0,
199    "use a single queue-set per port");
200
201enum {
202	MAX_TXQ_ENTRIES      = 16384,
203	MAX_CTRL_TXQ_ENTRIES = 1024,
204	MAX_RSPQ_ENTRIES     = 16384,
205	MAX_RX_BUFFERS       = 16384,
206	MAX_RX_JUMBO_BUFFERS = 16384,
207	MIN_TXQ_ENTRIES      = 4,
208	MIN_CTRL_TXQ_ENTRIES = 4,
209	MIN_RSPQ_ENTRIES     = 32,
210	MIN_FL_ENTRIES       = 32
211};
212
213#define PORT_MASK ((1 << MAX_NPORTS) - 1)
214
215/* Table for probing the cards.  The desc field isn't actually used */
216struct cxgb_ident {
217	uint16_t	vendor;
218	uint16_t	device;
219	int		index;
220	char		*desc;
221} cxgb_identifiers[] = {
222	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
223	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
224	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
225	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
226	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
227	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
228	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
229	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
230	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
231	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
232	{0, 0, 0, NULL}
233};
234
235static struct cxgb_ident *
236cxgb_get_ident(device_t dev)
237{
238	struct cxgb_ident *id;
239
240	for (id = cxgb_identifiers; id->desc != NULL; id++) {
241		if ((id->vendor == pci_get_vendor(dev)) &&
242		    (id->device == pci_get_device(dev))) {
243			return (id);
244		}
245	}
246	return (NULL);
247}
248
249static const struct adapter_info *
250cxgb_get_adapter_info(device_t dev)
251{
252	struct cxgb_ident *id;
253	const struct adapter_info *ai;
254
255	id = cxgb_get_ident(dev);
256	if (id == NULL)
257		return (NULL);
258
259	ai = t3_get_adapter_info(id->index);
260
261	return (ai);
262}
263
264static int
265cxgb_controller_probe(device_t dev)
266{
267	const struct adapter_info *ai;
268	char *ports, buf[80];
269
270	ai = cxgb_get_adapter_info(dev);
271	if (ai == NULL)
272		return (ENXIO);
273
274	if (ai->nports == 1)
275		ports = "port";
276	else
277		ports = "ports";
278
279	snprintf(buf, sizeof(buf), "%s RNIC, %d %s", ai->desc, ai->nports, ports);
280	device_set_desc_copy(dev, buf);
281	return (BUS_PROBE_DEFAULT);
282}
283
284static int
285upgrade_fw(adapter_t *sc)
286{
287	char buf[32];
288#ifdef FIRMWARE_LATEST
289	const struct firmware *fw;
290#else
291	struct firmware *fw;
292#endif
293	int status;
294
295	snprintf(&buf[0], sizeof(buf), "t3fw%d%d%d", FW_VERSION_MAJOR,
296	    FW_VERSION_MINOR, FW_VERSION_MICRO);
297
298	fw = firmware_get(buf);
299
300	if (fw == NULL) {
301		device_printf(sc->dev, "Could not find firmware image %s\n", buf);
302		return (ENOENT);
303	}
304
305	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
306
307	firmware_put(fw, FIRMWARE_UNLOAD);
308
309	return (status);
310}
311
312static int
313cxgb_controller_attach(device_t dev)
314{
315	device_t child;
316	const struct adapter_info *ai;
317	struct adapter *sc;
318	int i, reg, msi_needed, error = 0;
319	uint32_t vers;
320	int port_qsets = 1;
321
322	sc = device_get_softc(dev);
323	sc->dev = dev;
324	sc->msi_count = 0;
325
326	/* find the PCIe link width and set max read request to 4KB*/
327	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
328		uint16_t lnk, pectl;
329		lnk = pci_read_config(dev, reg + 0x12, 2);
330		sc->link_width = (lnk >> 4) & 0x3f;
331
332		pectl = pci_read_config(dev, reg + 0x8, 2);
333		pectl = (pectl & ~0x7000) | (5 << 12);
334		pci_write_config(dev, reg + 0x8, pectl, 2);
335	}
336	if (sc->link_width != 0 && sc->link_width <= 4) {
337		device_printf(sc->dev,
338		    "PCIe x%d Link, expect reduced performance\n",
339		    sc->link_width);
340	}
341
342	pci_enable_busmaster(dev);
343
344	/*
345	 * Allocate the registers and make them available to the driver.
346	 * The registers that we care about for NIC mode are in BAR 0
347	 */
348	sc->regs_rid = PCIR_BAR(0);
349	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
350	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
351		device_printf(dev, "Cannot allocate BAR\n");
352		return (ENXIO);
353	}
354
355	mtx_init(&sc->sge.reg_lock, "SGE reg lock", NULL, MTX_DEF);
356	mtx_init(&sc->lock, "cxgb controller lock", NULL, MTX_DEF);
357	mtx_init(&sc->mdio_lock, "cxgb mdio", NULL, MTX_DEF);
358
359	sc->bt = rman_get_bustag(sc->regs_res);
360	sc->bh = rman_get_bushandle(sc->regs_res);
361	sc->mmio_len = rman_get_size(sc->regs_res);
362
363	ai = cxgb_get_adapter_info(dev);
364	if (t3_prep_adapter(sc, ai, 1) < 0) {
365		error = ENODEV;
366		goto out;
367	}
368
369	/* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
370	 * enough messages for the queue sets.  If that fails, try falling
371	 * back to MSI.  If that fails, then try falling back to the legacy
372	 * interrupt pin model.
373	 */
374#ifdef MSI_SUPPORTED
375
376	sc->msix_regs_rid = 0x20;
377	if ((msi_allowed >= 2) &&
378	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
379	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
380
381		msi_needed = sc->msi_count = SGE_MSIX_COUNT;
382
383		if (((error = pci_alloc_msix(dev, &sc->msi_count)) != 0) ||
384		    (sc->msi_count != msi_needed)) {
385			device_printf(dev, "msix allocation failed - msi_count = %d"
386			    " msi_needed=%d will try msi err=%d\n", sc->msi_count,
387			    msi_needed, error);
388			sc->msi_count = 0;
389			pci_release_msi(dev);
390			bus_release_resource(dev, SYS_RES_MEMORY,
391			    sc->msix_regs_rid, sc->msix_regs_res);
392			sc->msix_regs_res = NULL;
393		} else {
394			sc->flags |= USING_MSIX;
395			sc->cxgb_intr = t3_intr_msix;
396		}
397	}
398
399	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
400		sc->msi_count = 1;
401		if (pci_alloc_msi(dev, &sc->msi_count)) {
402			device_printf(dev, "alloc msi failed - will try INTx\n");
403			sc->msi_count = 0;
404			pci_release_msi(dev);
405		} else {
406			sc->flags |= USING_MSI;
407			sc->irq_rid = 1;
408			sc->cxgb_intr = t3_intr_msi;
409		}
410	}
411#endif
412	if (sc->msi_count == 0) {
413		device_printf(dev, "using line interrupts\n");
414		sc->irq_rid = 0;
415		sc->cxgb_intr = t3b_intr;
416	}
417
418
419	/* Create a private taskqueue thread for handling driver events */
420#ifdef TASKQUEUE_CURRENT
421	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
422	    taskqueue_thread_enqueue, &sc->tq);
423#else
424	sc->tq = taskqueue_create_fast("cxgb_taskq", M_NOWAIT,
425	    taskqueue_thread_enqueue, &sc->tq);
426#endif
427	if (sc->tq == NULL) {
428		device_printf(dev, "failed to allocate controller task queue\n");
429		goto out;
430	}
431
432	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
433	    device_get_nameunit(dev));
434	TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
435
436
437	/* Create a periodic callout for checking adapter status */
438	callout_init_mtx(&sc->cxgb_tick_ch, &sc->lock, CALLOUT_RETURNUNLOCKED);
439
440	if (t3_check_fw_version(sc) != 0) {
441		/*
442		 * Warn user that a firmware update will be attempted in init.
443		 */
444		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
445		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
446		sc->flags &= ~FW_UPTODATE;
447	} else {
448		sc->flags |= FW_UPTODATE;
449	}
450
451	if ((sc->flags & USING_MSIX) && !singleq)
452		port_qsets = min((SGE_QSETS/(sc)->params.nports), mp_ncpus);
453
454	/*
455	 * Create a child device for each MAC.  The ethernet attachment
456	 * will be done in these children.
457	 */
458	for (i = 0; i < (sc)->params.nports; i++) {
459		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
460			device_printf(dev, "failed to add child port\n");
461			error = EINVAL;
462			goto out;
463		}
464		sc->portdev[i] = child;
465		sc->port[i].adapter = sc;
466		sc->port[i].nqsets = port_qsets;
467		sc->port[i].first_qset = i*port_qsets;
468		sc->port[i].port = i;
469		device_set_softc(child, &sc->port[i]);
470	}
471	if ((error = bus_generic_attach(dev)) != 0)
472		goto out;
473
474	/*
475	 * XXX need to poll for link status
476	 */
477	sc->params.stats_update_period = 1;
478
479	/* initialize sge private state */
480	t3_sge_init_sw(sc);
481
482	t3_led_ready(sc);
483
484	cxgb_offload_init();
485	if (is_offload(sc)) {
486		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
487		cxgb_adapter_ofld(sc);
488        }
489	error = t3_get_fw_version(sc, &vers);
490	if (error)
491		goto out;
492
493	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
494	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
495	    G_FW_VERSION_MICRO(vers));
496
497	t3_add_sysctls(sc);
498out:
499	if (error)
500		cxgb_free(sc);
501
502	return (error);
503}
504
505static int
506cxgb_controller_detach(device_t dev)
507{
508	struct adapter *sc;
509
510	sc = device_get_softc(dev);
511
512	cxgb_free(sc);
513
514	return (0);
515}
516
517static void
518cxgb_free(struct adapter *sc)
519{
520	int i;
521
522	cxgb_down(sc);
523
524#ifdef MSI_SUPPORTED
525	if (sc->flags & (USING_MSI | USING_MSIX)) {
526		device_printf(sc->dev, "releasing msi message(s)\n");
527		pci_release_msi(sc->dev);
528	} else {
529		device_printf(sc->dev, "no msi message to release\n");
530	}
531#endif
532	if (sc->msix_regs_res != NULL) {
533		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
534		    sc->msix_regs_res);
535	}
536
537	/*
538	 * XXX need to drain the ifq by hand until
539	 * it is taught about mbuf iovecs
540	 */
541	callout_drain(&sc->cxgb_tick_ch);
542
543	t3_sge_deinit_sw(sc);
544
545	if (sc->tq != NULL) {
546		taskqueue_drain(sc->tq, &sc->ext_intr_task);
547		taskqueue_free(sc->tq);
548	}
549
550	for (i = 0; i < (sc)->params.nports; ++i) {
551		if (sc->portdev[i] != NULL)
552			device_delete_child(sc->dev, sc->portdev[i]);
553	}
554
555	bus_generic_detach(sc->dev);
556
557	if (is_offload(sc)) {
558		cxgb_adapter_unofld(sc);
559		if (isset(&sc->open_device_map,	OFFLOAD_DEVMAP_BIT))
560			offload_close(&sc->tdev);
561	}
562	t3_free_sge_resources(sc);
563	t3_sge_free(sc);
564
565	if (sc->regs_res != NULL)
566		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
567		    sc->regs_res);
568
569	mtx_destroy(&sc->mdio_lock);
570	mtx_destroy(&sc->sge.reg_lock);
571	mtx_destroy(&sc->lock);
572
573	return;
574}
575
576/**
577 *	setup_sge_qsets - configure SGE Tx/Rx/response queues
578 *	@sc: the controller softc
579 *
580 *	Determines how many sets of SGE queues to use and initializes them.
581 *	We support multiple queue sets per port if we have MSI-X, otherwise
582 *	just one queue set per port.
583 */
584static int
585setup_sge_qsets(adapter_t *sc)
586{
587	int i, j, err, irq_idx, qset_idx;
588	u_int ntxq = SGE_TXQ_PER_SET;
589
590	if ((err = t3_sge_alloc(sc)) != 0) {
591		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
592		return (err);
593	}
594
595	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
596		irq_idx = -1;
597	else
598		irq_idx = 0;
599
600	for (qset_idx = 0, i = 0; i < (sc)->params.nports; ++i) {
601		struct port_info *pi = &sc->port[i];
602
603		for (j = 0; j < pi->nqsets; ++j, ++qset_idx) {
604			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
605			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
606			    &sc->params.sge.qset[qset_idx], ntxq, pi);
607			if (err) {
608				t3_free_sge_resources(sc);
609				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n", err);
610				return (err);
611			}
612		}
613	}
614
615	return (0);
616}
617
618static int
619cxgb_setup_msix(adapter_t *sc, int msix_count)
620{
621	int i, j, k, nqsets, rid;
622
623	/* The first message indicates link changes and error conditions */
624	sc->irq_rid = 1;
625	if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
626	   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
627		device_printf(sc->dev, "Cannot allocate msix interrupt\n");
628		return (EINVAL);
629	}
630
631	if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
632#ifdef INTR_FILTERS
633			NULL,
634#endif
635		cxgb_async_intr, sc, &sc->intr_tag)) {
636		device_printf(sc->dev, "Cannot set up interrupt\n");
637		return (EINVAL);
638	}
639	for (i = 0, k = 0; i < (sc)->params.nports; ++i) {
640		nqsets = sc->port[i].nqsets;
641		for (j = 0; j < nqsets; ++j, k++) {
642			struct sge_qset *qs = &sc->sge.qs[k];
643
644			rid = k + 2;
645			if (cxgb_debug)
646				printf("rid=%d ", rid);
647			if ((sc->msix_irq_res[k] = bus_alloc_resource_any(
648			    sc->dev, SYS_RES_IRQ, &rid,
649			    RF_SHAREABLE | RF_ACTIVE)) == NULL) {
650				device_printf(sc->dev, "Cannot allocate "
651				    "interrupt for message %d\n", rid);
652				return (EINVAL);
653			}
654			sc->msix_irq_rid[k] = rid;
655			if (bus_setup_intr(sc->dev, sc->msix_irq_res[j],
656			    INTR_MPSAFE|INTR_TYPE_NET,
657#ifdef INTR_FILTERS
658			NULL,
659#endif
660				t3_intr_msix, qs, &sc->msix_intr_tag[k])) {
661				device_printf(sc->dev, "Cannot set up "
662				    "interrupt for message %d\n", rid);
663				return (EINVAL);
664			}
665		}
666	}
667
668
669	return (0);
670}
671
672static int
673cxgb_port_probe(device_t dev)
674{
675	struct port_info *p;
676	char buf[80];
677
678	p = device_get_softc(dev);
679
680	snprintf(buf, sizeof(buf), "Port %d %s", p->port, p->port_type->desc);
681	device_set_desc_copy(dev, buf);
682	return (0);
683}
684
685
686static int
687cxgb_makedev(struct port_info *pi)
688{
689	struct cdevsw *cxgb_cdevsw;
690
691	if ((cxgb_cdevsw = malloc(sizeof(struct cdevsw), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
692		return (ENOMEM);
693
694	cxgb_cdevsw->d_version = D_VERSION;
695	cxgb_cdevsw->d_name = strdup(pi->ifp->if_xname, M_DEVBUF);
696	cxgb_cdevsw->d_ioctl = cxgb_extension_ioctl;
697
698	pi->port_cdev = make_dev(cxgb_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
699	    pi->ifp->if_xname);
700
701	if (pi->port_cdev == NULL)
702		return (ENOMEM);
703
704	pi->port_cdev->si_drv1 = (void *)pi;
705
706	return (0);
707}
708
709
710#ifdef TSO_SUPPORTED
711#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU)
712/* Don't enable TSO6 yet */
713#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU)
714#else
715#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_JUMBO_MTU)
716/* Don't enable TSO6 yet */
717#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM |  IFCAP_JUMBO_MTU)
718#define IFCAP_TSO4 0x0
719#define CSUM_TSO   0x0
720#endif
721
722
723static int
724cxgb_port_attach(device_t dev)
725{
726	struct port_info *p;
727	struct ifnet *ifp;
728	int media_flags;
729	int err;
730	char buf[64];
731
732	p = device_get_softc(dev);
733
734	snprintf(buf, sizeof(buf), "cxgb port %d", p->port);
735	mtx_init(&p->lock, buf, 0, MTX_DEF);
736
737	/* Allocate an ifnet object and set it up */
738	ifp = p->ifp = if_alloc(IFT_ETHER);
739	if (ifp == NULL) {
740		device_printf(dev, "Cannot allocate ifnet\n");
741		return (ENOMEM);
742	}
743
744	/*
745	 * Note that there is currently no watchdog timer.
746	 */
747	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
748	ifp->if_init = cxgb_init;
749	ifp->if_softc = p;
750	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
751	ifp->if_ioctl = cxgb_ioctl;
752	ifp->if_start = cxgb_start;
753	ifp->if_timer = 0;	/* Disable ifnet watchdog */
754	ifp->if_watchdog = NULL;
755
756	ifp->if_snd.ifq_drv_maxlen = TX_ETH_Q_SIZE;
757	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
758	IFQ_SET_READY(&ifp->if_snd);
759
760	ifp->if_hwassist = ifp->if_capabilities = ifp->if_capenable = 0;
761	ifp->if_capabilities |= CXGB_CAP;
762	ifp->if_capenable |= CXGB_CAP_ENABLE;
763	ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO);
764	ifp->if_baudrate = 100000000;
765
766	ether_ifattach(ifp, p->hw_addr);
767#ifdef DEFAULT_JUMBO
768	ifp->if_mtu = 9000;
769#endif
770	if ((err = cxgb_makedev(p)) != 0) {
771		printf("makedev failed %d\n", err);
772		return (err);
773	}
774	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
775	    cxgb_media_status);
776
777	if (!strcmp(p->port_type->desc, "10GBASE-CX4"))
778	        media_flags = IFM_ETHER | IFM_10G_CX4;
779	else if (!strcmp(p->port_type->desc, "10GBASE-SR"))
780	        media_flags = IFM_ETHER | IFM_10G_SR;
781	else if (!strcmp(p->port_type->desc, "10GBASE-XR"))
782	        media_flags = IFM_ETHER | IFM_10G_LR;
783	else {
784	        printf("unsupported media type %s\n", p->port_type->desc);
785		return (ENXIO);
786	}
787
788	ifmedia_add(&p->media, media_flags, 0, NULL);
789	ifmedia_add(&p->media, IFM_ETHER | IFM_AUTO, 0, NULL);
790	ifmedia_set(&p->media, media_flags);
791
792	snprintf(buf, sizeof(buf), "cxgb_port_taskq%d", p->port);
793#ifdef TASKQUEUE_CURRENT
794	/* Create a port for handling TX without starvation */
795	p->tq = taskqueue_create(buf, M_NOWAIT,
796	    taskqueue_thread_enqueue, &p->tq);
797#else
798	/* Create a port for handling TX without starvation */
799	p->tq = taskqueue_create_fast(buf, M_NOWAIT,
800	    taskqueue_thread_enqueue, &p->tq);
801#endif
802
803
804	if (p->tq == NULL) {
805		device_printf(dev, "failed to allocate port task queue\n");
806		return (ENOMEM);
807	}
808	taskqueue_start_threads(&p->tq, 1, PI_NET, "%s taskq",
809	    device_get_nameunit(dev));
810	TASK_INIT(&p->start_task, 0, cxgb_start_proc, ifp);
811
812
813	return (0);
814}
815
816static int
817cxgb_port_detach(device_t dev)
818{
819	struct port_info *p;
820
821	p = device_get_softc(dev);
822
823	PORT_LOCK(p);
824	cxgb_stop_locked(p);
825	PORT_UNLOCK(p);
826
827	mtx_destroy(&p->lock);
828	if (p->tq != NULL) {
829		taskqueue_drain(p->tq, &p->start_task);
830		taskqueue_free(p->tq);
831		p->tq = NULL;
832	}
833
834	ether_ifdetach(p->ifp);
835	if_free(p->ifp);
836
837	destroy_dev(p->port_cdev);
838
839
840	return (0);
841}
842
843void
844t3_fatal_err(struct adapter *sc)
845{
846	u_int fw_status[4];
847
848	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
849	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
850		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
851		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
852}
853
854int
855t3_os_find_pci_capability(adapter_t *sc, int cap)
856{
857	device_t dev;
858	struct pci_devinfo *dinfo;
859	pcicfgregs *cfg;
860	uint32_t status;
861	uint8_t ptr;
862
863	dev = sc->dev;
864	dinfo = device_get_ivars(dev);
865	cfg = &dinfo->cfg;
866
867	status = pci_read_config(dev, PCIR_STATUS, 2);
868	if (!(status & PCIM_STATUS_CAPPRESENT))
869		return (0);
870
871	switch (cfg->hdrtype & PCIM_HDRTYPE) {
872	case 0:
873	case 1:
874		ptr = PCIR_CAP_PTR;
875		break;
876	case 2:
877		ptr = PCIR_CAP_PTR_2;
878		break;
879	default:
880		return (0);
881		break;
882	}
883	ptr = pci_read_config(dev, ptr, 1);
884
885	while (ptr != 0) {
886		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
887			return (ptr);
888		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
889	}
890
891	return (0);
892}
893
894int
895t3_os_pci_save_state(struct adapter *sc)
896{
897	device_t dev;
898	struct pci_devinfo *dinfo;
899
900	dev = sc->dev;
901	dinfo = device_get_ivars(dev);
902
903	pci_cfg_save(dev, dinfo, 0);
904	return (0);
905}
906
907int
908t3_os_pci_restore_state(struct adapter *sc)
909{
910	device_t dev;
911	struct pci_devinfo *dinfo;
912
913	dev = sc->dev;
914	dinfo = device_get_ivars(dev);
915
916	pci_cfg_restore(dev, dinfo);
917	return (0);
918}
919
920/**
921 *	t3_os_link_changed - handle link status changes
922 *	@adapter: the adapter associated with the link change
923 *	@port_id: the port index whose limk status has changed
924 *	@link_stat: the new status of the link
925 *	@speed: the new speed setting
926 *	@duplex: the new duplex setting
927 *	@fc: the new flow-control setting
928 *
929 *	This is the OS-dependent handler for link status changes.  The OS
930 *	neutral handler takes care of most of the processing for these events,
931 *	then calls this handler for any OS-specific processing.
932 */
933void
934t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
935     int duplex, int fc)
936{
937	struct port_info *pi = &adapter->port[port_id];
938	struct cmac *mac = &adapter->port[port_id].mac;
939
940	if ((pi->ifp->if_flags & IFF_UP) == 0)
941		return;
942
943	if (link_status) {
944		t3_mac_enable(mac, MAC_DIRECTION_RX);
945		if_link_state_change(pi->ifp, LINK_STATE_UP);
946	} else {
947		if_link_state_change(pi->ifp, LINK_STATE_DOWN);
948		pi->phy.ops->power_down(&pi->phy, 1);
949		t3_mac_disable(mac, MAC_DIRECTION_RX);
950		t3_link_start(&pi->phy, mac, &pi->link_config);
951	}
952}
953
954
955/*
956 * Interrupt-context handler for external (PHY) interrupts.
957 */
958void
959t3_os_ext_intr_handler(adapter_t *sc)
960{
961	if (cxgb_debug)
962		printf("t3_os_ext_intr_handler\n");
963	/*
964	 * Schedule a task to handle external interrupts as they may be slow
965	 * and we use a mutex to protect MDIO registers.  We disable PHY
966	 * interrupts in the meantime and let the task reenable them when
967	 * it's done.
968	 */
969	ADAPTER_LOCK(sc);
970	if (sc->slow_intr_mask) {
971		sc->slow_intr_mask &= ~F_T3DBG;
972		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
973		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
974	}
975	ADAPTER_UNLOCK(sc);
976}
977
978void
979t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
980{
981
982	/*
983	 * The ifnet might not be allocated before this gets called,
984	 * as this is called early on in attach by t3_prep_adapter
985	 * save the address off in the port structure
986	 */
987	if (cxgb_debug)
988		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
989	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
990}
991
992/**
993 *	link_start - enable a port
994 *	@p: the port to enable
995 *
996 *	Performs the MAC and PHY actions needed to enable a port.
997 */
998static void
999cxgb_link_start(struct port_info *p)
1000{
1001	struct ifnet *ifp;
1002	struct t3_rx_mode rm;
1003	struct cmac *mac = &p->mac;
1004
1005	ifp = p->ifp;
1006
1007	t3_init_rx_mode(&rm, p);
1008	t3_mac_reset(mac);
1009	t3_mac_set_mtu(mac, ifp->if_mtu + ETHER_HDR_LEN);
1010	t3_mac_set_address(mac, 0, p->hw_addr);
1011	t3_mac_set_rx_mode(mac, &rm);
1012	t3_link_start(&p->phy, mac, &p->link_config);
1013	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1014}
1015
1016/**
1017 *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1018 *	@adap: the adapter
1019 *
1020 *	Sets up RSS to distribute packets to multiple receive queues.  We
1021 *	configure the RSS CPU lookup table to distribute to the number of HW
1022 *	receive queues, and the response queue lookup table to narrow that
1023 *	down to the response queues actually configured for each port.
1024 *	We always configure the RSS mapping for two ports since the mapping
1025 *	table has plenty of entries.
1026 */
1027static void
1028setup_rss(adapter_t *adap)
1029{
1030	int i;
1031	u_int nq0 = adap->port[0].nqsets;
1032	u_int nq1 = max((u_int)adap->port[1].nqsets, 1U);
1033	uint8_t cpus[SGE_QSETS + 1];
1034	uint16_t rspq_map[RSS_TABLE_SIZE];
1035
1036	for (i = 0; i < SGE_QSETS; ++i)
1037		cpus[i] = i;
1038	cpus[SGE_QSETS] = 0xff;
1039
1040	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1041		rspq_map[i] = i % nq0;
1042		rspq_map[i + RSS_TABLE_SIZE / 2] = (i % nq1) + nq0;
1043	}
1044
1045	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1046	    F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN |
1047	    V_RRCPLCPUSIZE(6), cpus, rspq_map);
1048}
1049
1050/*
1051 * Sends an mbuf to an offload queue driver
1052 * after dealing with any active network taps.
1053 */
1054static inline int
1055offload_tx(struct toedev *tdev, struct mbuf *m)
1056{
1057	int ret;
1058
1059	critical_enter();
1060	ret = t3_offload_tx(tdev, m);
1061	critical_exit();
1062	return ret;
1063}
1064
1065static int
1066write_smt_entry(struct adapter *adapter, int idx)
1067{
1068	struct port_info *pi = &adapter->port[idx];
1069	struct cpl_smt_write_req *req;
1070	struct mbuf *m;
1071
1072	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1073		return (ENOMEM);
1074
1075	req = mtod(m, struct cpl_smt_write_req *);
1076	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1077	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1078	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1079	req->iff = idx;
1080	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1081	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1082
1083	m_set_priority(m, 1);
1084
1085	offload_tx(&adapter->tdev, m);
1086
1087	return (0);
1088}
1089
1090static int
1091init_smt(struct adapter *adapter)
1092{
1093	int i;
1094
1095	for_each_port(adapter, i)
1096		write_smt_entry(adapter, i);
1097	return 0;
1098}
1099
1100static void
1101init_port_mtus(adapter_t *adapter)
1102{
1103	unsigned int mtus = adapter->port[0].ifp->if_mtu;
1104
1105	if (adapter->port[1].ifp)
1106		mtus |= adapter->port[1].ifp->if_mtu << 16;
1107	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1108}
1109
1110static void
1111send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1112			      int hi, int port)
1113{
1114	struct mbuf *m;
1115	struct mngt_pktsched_wr *req;
1116
1117	m = m_gethdr(M_NOWAIT, MT_DATA);
1118	if (m) {
1119		req = mtod(m, struct mngt_pktsched_wr *);
1120		req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1121		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1122		req->sched = sched;
1123		req->idx = qidx;
1124		req->min = lo;
1125		req->max = hi;
1126		req->binding = port;
1127		m->m_len = m->m_pkthdr.len = sizeof(*req);
1128		t3_mgmt_tx(adap, m);
1129	}
1130}
1131
1132static void
1133bind_qsets(adapter_t *sc)
1134{
1135	int i, j;
1136
1137	if (singleq)
1138		return;
1139
1140	for (i = 0; i < (sc)->params.nports; ++i) {
1141		const struct port_info *pi = adap2pinfo(sc, i);
1142
1143		for (j = 0; j < pi->nqsets; ++j)
1144			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1145					  -1, i);
1146	}
1147}
1148
1149/**
1150 *	cxgb_up - enable the adapter
1151 *	@adap: adapter being enabled
1152 *
1153 *	Called when the first port is enabled, this function performs the
1154 *	actions necessary to make an adapter operational, such as completing
1155 *	the initialization of HW modules, and enabling interrupts.
1156 *
1157 */
1158static int
1159cxgb_up(struct adapter *sc)
1160{
1161	int err = 0;
1162
1163	if ((sc->flags & FULL_INIT_DONE) == 0) {
1164
1165		if ((sc->flags & FW_UPTODATE) == 0)
1166			err = upgrade_fw(sc);
1167
1168		if (err)
1169			goto out;
1170
1171		err = t3_init_hw(sc, 0);
1172		if (err)
1173			goto out;
1174
1175		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1176
1177		err = setup_sge_qsets(sc);
1178		if (err)
1179			goto out;
1180
1181		setup_rss(sc);
1182		sc->flags |= FULL_INIT_DONE;
1183	}
1184
1185	t3_intr_clear(sc);
1186
1187	/* If it's MSI or INTx, allocate a single interrupt for everything */
1188	if ((sc->flags & USING_MSIX) == 0) {
1189		if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
1190		   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
1191			device_printf(sc->dev, "Cannot allocate interrupt rid=%d\n", sc->irq_rid);
1192			err = EINVAL;
1193			goto out;
1194		}
1195		device_printf(sc->dev, "allocated irq_res=%p\n", sc->irq_res);
1196
1197		if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
1198#ifdef INTR_FILTERS
1199			NULL,
1200#endif
1201			sc->cxgb_intr, sc, &sc->intr_tag)) {
1202			device_printf(sc->dev, "Cannot set up interrupt\n");
1203			err = EINVAL;
1204			goto irq_err;
1205		}
1206	} else {
1207		cxgb_setup_msix(sc, sc->msi_count);
1208	}
1209
1210	t3_sge_start(sc);
1211	t3_intr_enable(sc);
1212
1213	if ((sc->flags & (USING_MSIX | QUEUES_BOUND)) == USING_MSIX)
1214		bind_qsets(sc);
1215	sc->flags |= QUEUES_BOUND;
1216out:
1217	return (err);
1218irq_err:
1219	CH_ERR(sc, "request_irq failed, err %d\n", err);
1220	goto out;
1221}
1222
1223
1224/*
1225 * Release resources when all the ports and offloading have been stopped.
1226 */
1227static void
1228cxgb_down(struct adapter *sc)
1229{
1230	int i;
1231
1232	t3_sge_stop(sc);
1233	t3_intr_disable(sc);
1234
1235	for (i = 0; i < SGE_QSETS; i++) {
1236		if (sc->msix_intr_tag[i] != NULL) {
1237			bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
1238			    sc->msix_intr_tag[i]);
1239			sc->msix_intr_tag[i] = NULL;
1240		}
1241		if (sc->msix_irq_res[i] != NULL) {
1242			bus_release_resource(sc->dev, SYS_RES_IRQ,
1243			    sc->msix_irq_rid[i], sc->msix_irq_res[i]);
1244			sc->msix_irq_res[i] = NULL;
1245		}
1246	}
1247
1248	if (sc->intr_tag != NULL) {
1249		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
1250		sc->intr_tag = NULL;
1251	}
1252	if (sc->irq_res != NULL) {
1253		device_printf(sc->dev, "de-allocating interrupt irq_rid=%d irq_res=%p\n",
1254		    sc->irq_rid, sc->irq_res);
1255		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
1256		    sc->irq_res);
1257		sc->irq_res = NULL;
1258	}
1259
1260	callout_drain(&sc->sge_timer_ch);
1261	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1262	taskqueue_drain(sc->tq, &sc->timer_reclaim_task);
1263}
1264
1265static int
1266offload_open(struct port_info *pi)
1267{
1268	struct adapter *adapter = pi->adapter;
1269	struct toedev *tdev = TOEDEV(pi->ifp);
1270	int adap_up = adapter->open_device_map & PORT_MASK;
1271	int err = 0;
1272
1273	if (atomic_cmpset_int(&adapter->open_device_map,
1274		(adapter->open_device_map & ~OFFLOAD_DEVMAP_BIT),
1275		(adapter->open_device_map | OFFLOAD_DEVMAP_BIT)) == 0)
1276		return (0);
1277
1278	ADAPTER_LOCK(pi->adapter);
1279	if (!adap_up)
1280		err = cxgb_up(adapter);
1281	ADAPTER_UNLOCK(pi->adapter);
1282	if (err < 0)
1283		return (err);
1284
1285	t3_tp_set_offload_mode(adapter, 1);
1286	tdev->lldev = adapter->port[0].ifp;
1287	err = cxgb_offload_activate(adapter);
1288	if (err)
1289		goto out;
1290
1291	init_port_mtus(adapter);
1292	t3_load_mtus(adapter, adapter->params.mtus, adapter->params.a_wnd,
1293		     adapter->params.b_wnd,
1294		     adapter->params.rev == 0 ?
1295		       adapter->port[0].ifp->if_mtu : 0xffff);
1296	init_smt(adapter);
1297
1298	/* Call back all registered clients */
1299	cxgb_add_clients(tdev);
1300
1301out:
1302	/* restore them in case the offload module has changed them */
1303	if (err) {
1304		t3_tp_set_offload_mode(adapter, 0);
1305		clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1306		cxgb_set_dummy_ops(tdev);
1307	}
1308	return (err);
1309}
1310
1311static int
1312offload_close(struct toedev *tdev)
1313{
1314	struct adapter *adapter = tdev2adap(tdev);
1315
1316	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1317		return 0;
1318
1319	/* Call back all registered clients */
1320	cxgb_remove_clients(tdev);
1321	tdev->lldev = NULL;
1322	cxgb_set_dummy_ops(tdev);
1323	t3_tp_set_offload_mode(adapter, 0);
1324	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1325
1326	ADAPTER_LOCK(adapter);
1327	if (!adapter->open_device_map)
1328		cxgb_down(adapter);
1329	ADAPTER_UNLOCK(adapter);
1330
1331	cxgb_offload_deactivate(adapter);
1332	return 0;
1333}
1334
1335static void
1336cxgb_init(void *arg)
1337{
1338	struct port_info *p = arg;
1339
1340	PORT_LOCK(p);
1341	cxgb_init_locked(p);
1342	PORT_UNLOCK(p);
1343}
1344
1345static void
1346cxgb_init_locked(struct port_info *p)
1347{
1348	struct ifnet *ifp;
1349	adapter_t *sc = p->adapter;
1350	int err;
1351
1352	mtx_assert(&p->lock, MA_OWNED);
1353	ifp = p->ifp;
1354
1355	ADAPTER_LOCK(p->adapter);
1356	if ((sc->open_device_map == 0) && ((err = cxgb_up(sc)) < 0)) {
1357		ADAPTER_UNLOCK(p->adapter);
1358		cxgb_stop_locked(p);
1359		return;
1360	}
1361	if (p->adapter->open_device_map == 0)
1362		t3_intr_clear(sc);
1363
1364	setbit(&p->adapter->open_device_map, p->port);
1365
1366	ADAPTER_UNLOCK(p->adapter);
1367	if (is_offload(sc) && !ofld_disable) {
1368		err = offload_open(p);
1369		if (err)
1370			log(LOG_WARNING,
1371			    "Could not initialize offload capabilities\n");
1372	}
1373	cxgb_link_start(p);
1374	t3_port_intr_enable(sc, p->port);
1375
1376	callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
1377	    cxgb_tick, sc);
1378
1379	PORT_LOCK(p);
1380	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1381	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1382	PORT_UNLOCK(p);
1383}
1384
1385static void
1386cxgb_set_rxmode(struct port_info *p)
1387{
1388	struct t3_rx_mode rm;
1389	struct cmac *mac = &p->mac;
1390
1391	mtx_assert(&p->lock, MA_OWNED);
1392
1393	t3_init_rx_mode(&rm, p);
1394	t3_mac_set_rx_mode(mac, &rm);
1395}
1396
1397static void
1398cxgb_stop_locked(struct port_info *p)
1399{
1400	struct ifnet *ifp;
1401
1402	mtx_assert(&p->lock, MA_OWNED);
1403	mtx_assert(&p->adapter->lock, MA_NOTOWNED);
1404
1405	ifp = p->ifp;
1406
1407	t3_port_intr_disable(p->adapter, p->port);
1408	PORT_LOCK(p);
1409	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1410	PORT_UNLOCK(p);
1411	p->phy.ops->power_down(&p->phy, 1);
1412	t3_mac_disable(&p->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
1413
1414	ADAPTER_LOCK(p->adapter);
1415	clrbit(&p->adapter->open_device_map, p->port);
1416	/*
1417	 * XXX cancel check_task
1418	 */
1419	if (p->adapter->open_device_map == 0)
1420		cxgb_down(p->adapter);
1421	ADAPTER_UNLOCK(p->adapter);
1422}
1423
1424static int
1425cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1426{
1427	struct port_info *p = ifp->if_softc;
1428	struct ifaddr *ifa = (struct ifaddr *)data;
1429	struct ifreq *ifr = (struct ifreq *)data;
1430	int flags, error = 0;
1431	uint32_t mask;
1432
1433	/*
1434	 * XXX need to check that we aren't in the middle of an unload
1435	 */
1436	switch (command) {
1437	case SIOCSIFMTU:
1438		if ((ifr->ifr_mtu < ETHERMIN) ||
1439		    (ifr->ifr_mtu > ETHER_MAX_LEN_JUMBO))
1440			error = EINVAL;
1441		else if (ifp->if_mtu != ifr->ifr_mtu) {
1442			PORT_LOCK(p);
1443			ifp->if_mtu = ifr->ifr_mtu;
1444			t3_mac_set_mtu(&p->mac, ifp->if_mtu + ETHER_HDR_LEN);
1445			PORT_UNLOCK(p);
1446		}
1447		break;
1448	case SIOCSIFADDR:
1449	case SIOCGIFADDR:
1450		if (ifa->ifa_addr->sa_family == AF_INET) {
1451			ifp->if_flags |= IFF_UP;
1452			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1453				cxgb_init(p);
1454			}
1455			arp_ifinit(ifp, ifa);
1456		} else
1457			error = ether_ioctl(ifp, command, data);
1458		break;
1459	case SIOCSIFFLAGS:
1460		if (ifp->if_flags & IFF_UP) {
1461			PORT_LOCK(p);
1462			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1463				flags = p->if_flags;
1464				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1465				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI))
1466					cxgb_set_rxmode(p);
1467
1468			} else
1469				cxgb_init_locked(p);
1470			p->if_flags = ifp->if_flags;
1471			PORT_UNLOCK(p);
1472		} else {
1473			callout_drain(&p->adapter->cxgb_tick_ch);
1474			PORT_LOCK(p);
1475			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1476				cxgb_stop_locked(p);
1477			} else {
1478				adapter_t *sc = p->adapter;
1479				callout_reset(&sc->cxgb_tick_ch,
1480				    sc->params.stats_update_period * hz,
1481				    cxgb_tick, sc);
1482			}
1483			PORT_UNLOCK(p);
1484		}
1485
1486
1487		break;
1488	case SIOCSIFMEDIA:
1489	case SIOCGIFMEDIA:
1490		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
1491		break;
1492	case SIOCSIFCAP:
1493		PORT_LOCK(p);
1494		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1495		if (mask & IFCAP_TXCSUM) {
1496			if (IFCAP_TXCSUM & ifp->if_capenable) {
1497				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
1498				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
1499				    | CSUM_TSO);
1500			} else {
1501				ifp->if_capenable |= IFCAP_TXCSUM;
1502				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1503			}
1504		} else if (mask & IFCAP_RXCSUM) {
1505			if (IFCAP_RXCSUM & ifp->if_capenable) {
1506				ifp->if_capenable &= ~IFCAP_RXCSUM;
1507			} else {
1508				ifp->if_capenable |= IFCAP_RXCSUM;
1509			}
1510		}
1511		if (mask & IFCAP_TSO4) {
1512			if (IFCAP_TSO4 & ifp->if_capenable) {
1513				ifp->if_capenable &= ~IFCAP_TSO4;
1514				ifp->if_hwassist &= ~CSUM_TSO;
1515			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
1516				ifp->if_capenable |= IFCAP_TSO4;
1517				ifp->if_hwassist |= CSUM_TSO;
1518			} else {
1519				if (cxgb_debug)
1520					printf("cxgb requires tx checksum offload"
1521					    " be enabled to use TSO\n");
1522				error = EINVAL;
1523			}
1524		}
1525		PORT_UNLOCK(p);
1526		break;
1527	default:
1528		error = ether_ioctl(ifp, command, data);
1529		break;
1530	}
1531
1532	return (error);
1533}
1534
1535static int
1536cxgb_start_tx(struct ifnet *ifp, uint32_t txmax)
1537{
1538	struct sge_qset *qs;
1539	struct sge_txq *txq;
1540	struct port_info *p = ifp->if_softc;
1541	struct mbuf *m0, *m = NULL;
1542	int err, in_use_init;
1543
1544	if (!p->link_config.link_ok)
1545		return (ENXIO);
1546
1547	if (IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1548		return (ENOBUFS);
1549
1550	qs = &p->adapter->sge.qs[p->first_qset];
1551	txq = &qs->txq[TXQ_ETH];
1552	err = 0;
1553
1554	mtx_lock(&txq->lock);
1555	in_use_init = txq->in_use;
1556	while ((txq->in_use - in_use_init < txmax) &&
1557	    (txq->size > txq->in_use + TX_MAX_DESC)) {
1558		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
1559		if (m == NULL)
1560			break;
1561		/*
1562		 * Convert chain to M_IOVEC
1563		 */
1564		KASSERT((m->m_flags & M_IOVEC) == 0, ("IOVEC set too early"));
1565		m0 = m;
1566#ifdef INVARIANTS
1567		/*
1568		 * Clean up after net stack sloppiness
1569		 * before calling m_sanity
1570		 */
1571		m0 = m->m_next;
1572		while (m0) {
1573			m0->m_flags &= ~M_PKTHDR;
1574			m0 = m0->m_next;
1575		}
1576		m_sanity(m0, 0);
1577		m0 = m;
1578#endif
1579		if (collapse_mbufs && m->m_pkthdr.len > MCLBYTES &&
1580		    m_collapse(m, TX_MAX_SEGS, &m0) == EFBIG) {
1581			if ((m0 = m_defrag(m, M_NOWAIT)) != NULL) {
1582				m = m0;
1583				m_collapse(m, TX_MAX_SEGS, &m0);
1584			} else
1585				break;
1586		}
1587		m = m0;
1588		if ((err = t3_encap(p, &m)) != 0)
1589			break;
1590		BPF_MTAP(ifp, m);
1591	}
1592	mtx_unlock(&txq->lock);
1593
1594	if (__predict_false(err)) {
1595		if (err == ENOMEM) {
1596			IFQ_LOCK(&ifp->if_snd);
1597			IFQ_DRV_PREPEND(&ifp->if_snd, m);
1598			IFQ_UNLOCK(&ifp->if_snd);
1599		}
1600	}
1601	if (err == 0 && m == NULL) {
1602		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1603		return (ENOBUFS);
1604	}
1605	if ((err == 0) &&  (txq->size <= txq->in_use + TX_MAX_DESC) &&
1606	    (ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
1607		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1608		return (ENOSPC);
1609	}
1610	return (err);
1611}
1612
1613static void
1614cxgb_start_proc(void *arg, int ncount)
1615{
1616	struct ifnet *ifp = arg;
1617	struct port_info *pi = ifp->if_softc;
1618	struct sge_qset *qs;
1619	struct sge_txq *txq;
1620	int error = 0;
1621
1622	qs = &pi->adapter->sge.qs[pi->first_qset];
1623	txq = &qs->txq[TXQ_ETH];
1624
1625	while (error == 0) {
1626		if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC)
1627			taskqueue_enqueue(pi->adapter->tq,
1628			    &pi->adapter->timer_reclaim_task);
1629
1630		error = cxgb_start_tx(ifp, TX_START_MAX_DESC);
1631	}
1632}
1633
1634static void
1635cxgb_start(struct ifnet *ifp)
1636{
1637	struct port_info *pi = ifp->if_softc;
1638	struct sge_qset *qs;
1639	struct sge_txq *txq;
1640	int err;
1641
1642	qs = &pi->adapter->sge.qs[pi->first_qset];
1643	txq = &qs->txq[TXQ_ETH];
1644
1645	if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC)
1646		taskqueue_enqueue(pi->adapter->tq,
1647		    &pi->adapter->timer_reclaim_task);
1648
1649	err = cxgb_start_tx(ifp, TX_START_MAX_DESC);
1650
1651	if (err == 0)
1652		taskqueue_enqueue(pi->tq, &pi->start_task);
1653}
1654
1655
1656static int
1657cxgb_media_change(struct ifnet *ifp)
1658{
1659	if_printf(ifp, "media change not supported\n");
1660	return (ENXIO);
1661}
1662
1663static void
1664cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1665{
1666	struct port_info *p = ifp->if_softc;
1667
1668	ifmr->ifm_status = IFM_AVALID;
1669	ifmr->ifm_active = IFM_ETHER;
1670
1671	if (!p->link_config.link_ok)
1672		return;
1673
1674	ifmr->ifm_status |= IFM_ACTIVE;
1675
1676	if (p->link_config.duplex)
1677		ifmr->ifm_active |= IFM_FDX;
1678	else
1679		ifmr->ifm_active |= IFM_HDX;
1680}
1681
1682static void
1683cxgb_async_intr(void *data)
1684{
1685	adapter_t *sc = data;
1686
1687	if (cxgb_debug)
1688		device_printf(sc->dev, "cxgb_async_intr\n");
1689
1690	t3_slow_intr_handler(sc);
1691
1692}
1693
1694static void
1695cxgb_ext_intr_handler(void *arg, int count)
1696{
1697	adapter_t *sc = (adapter_t *)arg;
1698
1699	if (cxgb_debug)
1700		printf("cxgb_ext_intr_handler\n");
1701
1702	t3_phy_intr_handler(sc);
1703
1704	/* Now reenable external interrupts */
1705	ADAPTER_LOCK(sc);
1706	if (sc->slow_intr_mask) {
1707		sc->slow_intr_mask |= F_T3DBG;
1708		t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
1709		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1710	}
1711	ADAPTER_UNLOCK(sc);
1712}
1713
1714static void
1715check_link_status(adapter_t *sc)
1716{
1717	int i;
1718
1719	for (i = 0; i < (sc)->params.nports; ++i) {
1720		struct port_info *p = &sc->port[i];
1721
1722		if (!(p->port_type->caps & SUPPORTED_IRQ))
1723			t3_link_changed(sc, i);
1724	}
1725}
1726
1727static void
1728check_t3b2_mac(struct adapter *adapter)
1729{
1730	int i;
1731
1732	for_each_port(adapter, i) {
1733		struct port_info *p = &adapter->port[i];
1734		struct ifnet *ifp = p->ifp;
1735		int status;
1736
1737		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1738			continue;
1739
1740		status = 0;
1741		PORT_LOCK(p);
1742		if ((ifp->if_drv_flags & IFF_DRV_RUNNING))
1743			status = t3b2_mac_watchdog_task(&p->mac);
1744		if (status == 1)
1745			p->mac.stats.num_toggled++;
1746		else if (status == 2) {
1747			struct cmac *mac = &p->mac;
1748
1749			t3_mac_set_mtu(mac, ifp->if_mtu + ETHER_HDR_LEN);
1750			t3_mac_set_address(mac, 0, p->hw_addr);
1751			cxgb_set_rxmode(p);
1752			t3_link_start(&p->phy, mac, &p->link_config);
1753			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1754			t3_port_intr_enable(adapter, p->port);
1755			p->mac.stats.num_resets++;
1756		}
1757		PORT_UNLOCK(p);
1758	}
1759}
1760
1761static void
1762cxgb_tick(void *arg)
1763{
1764	adapter_t *sc = (adapter_t *)arg;
1765	const struct adapter_params *p = &sc->params;
1766
1767	if (p->linkpoll_period)
1768		check_link_status(sc);
1769	callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
1770	    cxgb_tick, sc);
1771
1772	/*
1773	 * adapter lock can currently only be acquire after the
1774	 * port lock
1775	 */
1776	ADAPTER_UNLOCK(sc);
1777	if (p->rev == T3_REV_B2)
1778		check_t3b2_mac(sc);
1779
1780}
1781
1782static int
1783in_range(int val, int lo, int hi)
1784{
1785	return val < 0 || (val <= hi && val >= lo);
1786}
1787
1788static int
1789cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
1790    int fflag, struct thread *td)
1791{
1792	int mmd, error = 0;
1793	struct port_info *pi = dev->si_drv1;
1794	adapter_t *sc = pi->adapter;
1795
1796#ifdef PRIV_SUPPORTED
1797	if (priv_check(td, PRIV_DRIVER)) {
1798		if (cxgb_debug)
1799			printf("user does not have access to privileged ioctls\n");
1800		return (EPERM);
1801	}
1802#else
1803	if (suser(td)) {
1804		if (cxgb_debug)
1805			printf("user does not have access to privileged ioctls\n");
1806		return (EPERM);
1807	}
1808#endif
1809
1810	switch (cmd) {
1811	case SIOCGMIIREG: {
1812		uint32_t val;
1813		struct cphy *phy = &pi->phy;
1814		struct mii_data *mid = (struct mii_data *)data;
1815
1816		if (!phy->mdio_read)
1817			return (EOPNOTSUPP);
1818		if (is_10G(sc)) {
1819			mmd = mid->phy_id >> 8;
1820			if (!mmd)
1821				mmd = MDIO_DEV_PCS;
1822			else if (mmd > MDIO_DEV_XGXS)
1823				return -EINVAL;
1824
1825			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
1826					     mid->reg_num, &val);
1827		} else
1828		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
1829					     mid->reg_num & 0x1f, &val);
1830		if (error == 0)
1831			mid->val_out = val;
1832		break;
1833	}
1834	case SIOCSMIIREG: {
1835		struct cphy *phy = &pi->phy;
1836		struct mii_data *mid = (struct mii_data *)data;
1837
1838		if (!phy->mdio_write)
1839			return (EOPNOTSUPP);
1840		if (is_10G(sc)) {
1841			mmd = mid->phy_id >> 8;
1842			if (!mmd)
1843				mmd = MDIO_DEV_PCS;
1844			else if (mmd > MDIO_DEV_XGXS)
1845				return (EINVAL);
1846
1847			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
1848					      mmd, mid->reg_num, mid->val_in);
1849		} else
1850			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
1851					      mid->reg_num & 0x1f,
1852					      mid->val_in);
1853		break;
1854	}
1855	case CHELSIO_SETREG: {
1856		struct ch_reg *edata = (struct ch_reg *)data;
1857		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
1858			return (EFAULT);
1859		t3_write_reg(sc, edata->addr, edata->val);
1860		break;
1861	}
1862	case CHELSIO_GETREG: {
1863		struct ch_reg *edata = (struct ch_reg *)data;
1864		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
1865			return (EFAULT);
1866		edata->val = t3_read_reg(sc, edata->addr);
1867		break;
1868	}
1869	case CHELSIO_GET_SGE_CONTEXT: {
1870		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
1871		mtx_lock(&sc->sge.reg_lock);
1872		switch (ecntxt->cntxt_type) {
1873		case CNTXT_TYPE_EGRESS:
1874			error = t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
1875			    ecntxt->data);
1876			break;
1877		case CNTXT_TYPE_FL:
1878			error = t3_sge_read_fl(sc, ecntxt->cntxt_id,
1879			    ecntxt->data);
1880			break;
1881		case CNTXT_TYPE_RSP:
1882			error = t3_sge_read_rspq(sc, ecntxt->cntxt_id,
1883			    ecntxt->data);
1884			break;
1885		case CNTXT_TYPE_CQ:
1886			error = t3_sge_read_cq(sc, ecntxt->cntxt_id,
1887			    ecntxt->data);
1888			break;
1889		default:
1890			error = EINVAL;
1891			break;
1892		}
1893		mtx_unlock(&sc->sge.reg_lock);
1894		break;
1895	}
1896	case CHELSIO_GET_SGE_DESC: {
1897		struct ch_desc *edesc = (struct ch_desc *)data;
1898		int ret;
1899		if (edesc->queue_num >= SGE_QSETS * 6)
1900			return (EINVAL);
1901		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
1902		    edesc->queue_num % 6, edesc->idx, edesc->data);
1903		if (ret < 0)
1904			return (EINVAL);
1905		edesc->size = ret;
1906		break;
1907	}
1908	case CHELSIO_SET_QSET_PARAMS: {
1909		struct qset_params *q;
1910		struct ch_qset_params *t = (struct ch_qset_params *)data;
1911
1912		if (t->qset_idx >= SGE_QSETS)
1913			return -EINVAL;
1914		if (!in_range(t->intr_lat, 0, M_NEWTIMER) ||
1915		    !in_range(t->cong_thres, 0, 255) ||
1916		    !in_range(t->txq_size[0], MIN_TXQ_ENTRIES,
1917			      MAX_TXQ_ENTRIES) ||
1918		    !in_range(t->txq_size[1], MIN_TXQ_ENTRIES,
1919			      MAX_TXQ_ENTRIES) ||
1920		    !in_range(t->txq_size[2], MIN_CTRL_TXQ_ENTRIES,
1921			      MAX_CTRL_TXQ_ENTRIES) ||
1922		    !in_range(t->fl_size[0], MIN_FL_ENTRIES, MAX_RX_BUFFERS) ||
1923		    !in_range(t->fl_size[1], MIN_FL_ENTRIES,
1924			      MAX_RX_JUMBO_BUFFERS) ||
1925		    !in_range(t->rspq_size, MIN_RSPQ_ENTRIES, MAX_RSPQ_ENTRIES))
1926		       return -EINVAL;
1927		if ((sc->flags & FULL_INIT_DONE) &&
1928		    (t->rspq_size >= 0 || t->fl_size[0] >= 0 ||
1929		     t->fl_size[1] >= 0 || t->txq_size[0] >= 0 ||
1930		     t->txq_size[1] >= 0 || t->txq_size[2] >= 0 ||
1931		     t->polling >= 0 || t->cong_thres >= 0))
1932			return -EBUSY;
1933
1934		q = &sc->params.sge.qset[t->qset_idx];
1935
1936		if (t->rspq_size >= 0)
1937			q->rspq_size = t->rspq_size;
1938		if (t->fl_size[0] >= 0)
1939			q->fl_size = t->fl_size[0];
1940		if (t->fl_size[1] >= 0)
1941			q->jumbo_size = t->fl_size[1];
1942		if (t->txq_size[0] >= 0)
1943			q->txq_size[0] = t->txq_size[0];
1944		if (t->txq_size[1] >= 0)
1945			q->txq_size[1] = t->txq_size[1];
1946		if (t->txq_size[2] >= 0)
1947			q->txq_size[2] = t->txq_size[2];
1948		if (t->cong_thres >= 0)
1949			q->cong_thres = t->cong_thres;
1950		if (t->intr_lat >= 0) {
1951			struct sge_qset *qs = &sc->sge.qs[t->qset_idx];
1952
1953			q->coalesce_nsecs = t->intr_lat*1000;
1954			t3_update_qset_coalesce(qs, q);
1955		}
1956		break;
1957	}
1958	case CHELSIO_GET_QSET_PARAMS: {
1959		struct qset_params *q;
1960		struct ch_qset_params *t = (struct ch_qset_params *)data;
1961
1962		if (t->qset_idx >= SGE_QSETS)
1963			return (EINVAL);
1964
1965		q = &(sc)->params.sge.qset[t->qset_idx];
1966		t->rspq_size   = q->rspq_size;
1967		t->txq_size[0] = q->txq_size[0];
1968		t->txq_size[1] = q->txq_size[1];
1969		t->txq_size[2] = q->txq_size[2];
1970		t->fl_size[0]  = q->fl_size;
1971		t->fl_size[1]  = q->jumbo_size;
1972		t->polling     = q->polling;
1973		t->intr_lat    = q->coalesce_nsecs / 1000;
1974		t->cong_thres  = q->cong_thres;
1975		break;
1976	}
1977	case CHELSIO_SET_QSET_NUM: {
1978		struct ch_reg *edata = (struct ch_reg *)data;
1979		unsigned int port_idx = pi->port;
1980
1981		if (sc->flags & FULL_INIT_DONE)
1982			return (EBUSY);
1983		if (edata->val < 1 ||
1984		    (edata->val > 1 && !(sc->flags & USING_MSIX)))
1985			return (EINVAL);
1986		if (edata->val + sc->port[!port_idx].nqsets > SGE_QSETS)
1987			return (EINVAL);
1988		sc->port[port_idx].nqsets = edata->val;
1989		sc->port[0].first_qset = 0;
1990		/*
1991		 * XXX hardcode ourselves to 2 ports just like LEEENUX
1992		 */
1993		sc->port[1].first_qset = sc->port[0].nqsets;
1994		break;
1995	}
1996	case CHELSIO_GET_QSET_NUM: {
1997		struct ch_reg *edata = (struct ch_reg *)data;
1998		edata->val = pi->nqsets;
1999		break;
2000	}
2001#ifdef notyet
2002	case CHELSIO_LOAD_FW:
2003	case CHELSIO_GET_PM:
2004	case CHELSIO_SET_PM:
2005		return (EOPNOTSUPP);
2006		break;
2007#endif
2008	case CHELSIO_SETMTUTAB: {
2009		struct ch_mtus *m = (struct ch_mtus *)data;
2010		int i;
2011
2012		if (!is_offload(sc))
2013			return (EOPNOTSUPP);
2014		if (offload_running(sc))
2015			return (EBUSY);
2016		if (m->nmtus != NMTUS)
2017			return (EINVAL);
2018		if (m->mtus[0] < 81)         /* accommodate SACK */
2019			return (EINVAL);
2020
2021		/*
2022		 * MTUs must be in ascending order
2023		 */
2024		for (i = 1; i < NMTUS; ++i)
2025			if (m->mtus[i] < m->mtus[i - 1])
2026				return (EINVAL);
2027
2028		memcpy(sc->params.mtus, m->mtus,
2029		       sizeof(sc->params.mtus));
2030		break;
2031	}
2032	case CHELSIO_GETMTUTAB: {
2033		struct ch_mtus *m = (struct ch_mtus *)data;
2034
2035		if (!is_offload(sc))
2036			return (EOPNOTSUPP);
2037
2038		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2039		m->nmtus = NMTUS;
2040		break;
2041	}
2042	case CHELSIO_DEVUP:
2043		if (!is_offload(sc))
2044			return (EOPNOTSUPP);
2045		return offload_open(pi);
2046		break;
2047	case CHELSIO_GET_MEM: {
2048		struct ch_mem_range *t = (struct ch_mem_range *)data;
2049		struct mc7 *mem;
2050		uint8_t *useraddr;
2051		u64 buf[32];
2052
2053		if (!is_offload(sc))
2054			return (EOPNOTSUPP);
2055		if (!(sc->flags & FULL_INIT_DONE))
2056			return (EIO);         /* need the memory controllers */
2057		if ((t->addr & 0x7) || (t->len & 0x7))
2058			return (EINVAL);
2059		if (t->mem_id == MEM_CM)
2060			mem = &sc->cm;
2061		else if (t->mem_id == MEM_PMRX)
2062			mem = &sc->pmrx;
2063		else if (t->mem_id == MEM_PMTX)
2064			mem = &sc->pmtx;
2065		else
2066			return (EINVAL);
2067
2068		/*
2069		 * Version scheme:
2070		 * bits 0..9: chip version
2071		 * bits 10..15: chip revision
2072		 */
2073		t->version = 3 | (sc->params.rev << 10);
2074
2075		/*
2076		 * Read 256 bytes at a time as len can be large and we don't
2077		 * want to use huge intermediate buffers.
2078		 */
2079		useraddr = (uint8_t *)(t + 1);   /* advance to start of buffer */
2080		while (t->len) {
2081			unsigned int chunk = min(t->len, sizeof(buf));
2082
2083			error = t3_mc7_bd_read(mem, t->addr / 8, chunk / 8, buf);
2084			if (error)
2085				return (-error);
2086			if (copyout(buf, useraddr, chunk))
2087				return (EFAULT);
2088			useraddr += chunk;
2089			t->addr += chunk;
2090			t->len -= chunk;
2091		}
2092		break;
2093	}
2094	case CHELSIO_READ_TCAM_WORD: {
2095		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2096
2097		if (!is_offload(sc))
2098			return (EOPNOTSUPP);
2099		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2100		break;
2101	}
2102	case CHELSIO_SET_TRACE_FILTER: {
2103		struct ch_trace *t = (struct ch_trace *)data;
2104		const struct trace_params *tp;
2105
2106		tp = (const struct trace_params *)&t->sip;
2107		if (t->config_tx)
2108			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2109					       t->trace_tx);
2110		if (t->config_rx)
2111			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2112					       t->trace_rx);
2113		break;
2114	}
2115	case CHELSIO_SET_PKTSCHED: {
2116		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2117		if (sc->open_device_map == 0)
2118			return (EAGAIN);
2119		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2120		    p->binding);
2121		break;
2122	}
2123	case CHELSIO_IFCONF_GETREGS: {
2124		struct ifconf_regs *regs = (struct ifconf_regs *)data;
2125		int reglen = cxgb_get_regs_len();
2126		uint8_t *buf = malloc(REGDUMP_SIZE, M_DEVBUF, M_NOWAIT);
2127		if (buf == NULL) {
2128			return (ENOMEM);
2129		} if (regs->len > reglen)
2130			regs->len = reglen;
2131		else if (regs->len < reglen) {
2132			error = E2BIG;
2133			goto done;
2134		}
2135		cxgb_get_regs(sc, regs, buf);
2136		error = copyout(buf, regs->data, reglen);
2137
2138		done:
2139		free(buf, M_DEVBUF);
2140
2141		break;
2142	}
2143	case CHELSIO_SET_HW_SCHED: {
2144		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2145		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2146
2147		if ((sc->flags & FULL_INIT_DONE) == 0)
2148			return (EAGAIN);       /* need TP to be initialized */
2149		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2150		    !in_range(t->channel, 0, 1) ||
2151		    !in_range(t->kbps, 0, 10000000) ||
2152		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2153		    !in_range(t->flow_ipg, 0,
2154			      dack_ticks_to_usec(sc, 0x7ff)))
2155			return (EINVAL);
2156
2157		if (t->kbps >= 0) {
2158			error = t3_config_sched(sc, t->kbps, t->sched);
2159			if (error < 0)
2160				return (-error);
2161		}
2162		if (t->class_ipg >= 0)
2163			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2164		if (t->flow_ipg >= 0) {
2165			t->flow_ipg *= 1000;     /* us -> ns */
2166			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2167		}
2168		if (t->mode >= 0) {
2169			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2170
2171			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2172					 bit, t->mode ? bit : 0);
2173		}
2174		if (t->channel >= 0)
2175			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2176					 1 << t->sched, t->channel << t->sched);
2177		break;
2178	}
2179	default:
2180		return (EOPNOTSUPP);
2181		break;
2182	}
2183
2184	return (error);
2185}
2186
2187static __inline void
2188reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
2189    unsigned int end)
2190{
2191	uint32_t *p = (uint32_t *)buf + start;
2192
2193	for ( ; start <= end; start += sizeof(uint32_t))
2194		*p++ = t3_read_reg(ap, start);
2195}
2196
2197#define T3_REGMAP_SIZE (3 * 1024)
2198static int
2199cxgb_get_regs_len(void)
2200{
2201	return T3_REGMAP_SIZE;
2202}
2203#undef T3_REGMAP_SIZE
2204
2205static void
2206cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf)
2207{
2208
2209	/*
2210	 * Version scheme:
2211	 * bits 0..9: chip version
2212	 * bits 10..15: chip revision
2213	 * bit 31: set for PCIe cards
2214	 */
2215	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
2216
2217	/*
2218	 * We skip the MAC statistics registers because they are clear-on-read.
2219	 * Also reading multi-register stats would need to synchronize with the
2220	 * periodic mac stats accumulation.  Hard to justify the complexity.
2221	 */
2222	memset(buf, 0, REGDUMP_SIZE);
2223	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
2224	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
2225	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
2226	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
2227	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
2228	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
2229		       XGM_REG(A_XGM_SERDES_STAT3, 1));
2230	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
2231		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
2232}
2233