cxgb_main.c revision 170076
1/**************************************************************************
2
3Copyright (c) 2007, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
122. Neither the name of the Chelsio Corporation nor the names of its
13    contributors may be used to endorse or promote products derived from
14    this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_main.c 170076 2007-05-28 22:57:27Z kmacy $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/bus.h>
37#include <sys/module.h>
38#include <sys/pciio.h>
39#include <sys/conf.h>
40#include <machine/bus.h>
41#include <machine/resource.h>
42#include <sys/bus_dma.h>
43#include <sys/rman.h>
44#include <sys/ioccom.h>
45#include <sys/mbuf.h>
46#include <sys/linker.h>
47#include <sys/firmware.h>
48#include <sys/socket.h>
49#include <sys/sockio.h>
50#include <sys/smp.h>
51#include <sys/sysctl.h>
52#include <sys/queue.h>
53#include <sys/taskqueue.h>
54
55#include <net/bpf.h>
56#include <net/ethernet.h>
57#include <net/if.h>
58#include <net/if_arp.h>
59#include <net/if_dl.h>
60#include <net/if_media.h>
61#include <net/if_types.h>
62
63#include <netinet/in_systm.h>
64#include <netinet/in.h>
65#include <netinet/if_ether.h>
66#include <netinet/ip.h>
67#include <netinet/ip.h>
68#include <netinet/tcp.h>
69#include <netinet/udp.h>
70
71#include <dev/pci/pcireg.h>
72#include <dev/pci/pcivar.h>
73#include <dev/pci/pci_private.h>
74
75#ifdef CONFIG_DEFINED
76#include <cxgb_include.h>
77#else
78#include <dev/cxgb/cxgb_include.h>
79#endif
80
81#ifdef PRIV_SUPPORTED
82#include <sys/priv.h>
83#endif
84
85static int cxgb_setup_msix(adapter_t *, int);
86static void cxgb_init(void *);
87static void cxgb_init_locked(struct port_info *);
88static void cxgb_stop_locked(struct port_info *);
89static void cxgb_set_rxmode(struct port_info *);
90static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
91static void cxgb_start(struct ifnet *);
92static void cxgb_start_proc(void *, int ncount);
93static int cxgb_media_change(struct ifnet *);
94static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
95static int setup_sge_qsets(adapter_t *);
96static void cxgb_async_intr(void *);
97static void cxgb_ext_intr_handler(void *, int);
98static void cxgb_down(struct adapter *sc);
99static void cxgb_tick(void *);
100static void setup_rss(adapter_t *sc);
101
102/* Attachment glue for the PCI controller end of the device.  Each port of
103 * the device is attached separately, as defined later.
104 */
105static int cxgb_controller_probe(device_t);
106static int cxgb_controller_attach(device_t);
107static int cxgb_controller_detach(device_t);
108static void cxgb_free(struct adapter *);
109static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
110    unsigned int end);
111static void cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf);
112static int cxgb_get_regs_len(void);
113static int offload_open(struct port_info *pi);
114static int offload_close(struct toedev *tdev);
115
116
117
118static device_method_t cxgb_controller_methods[] = {
119	DEVMETHOD(device_probe,		cxgb_controller_probe),
120	DEVMETHOD(device_attach,	cxgb_controller_attach),
121	DEVMETHOD(device_detach,	cxgb_controller_detach),
122
123	/* bus interface */
124	DEVMETHOD(bus_print_child,	bus_generic_print_child),
125	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
126
127	{ 0, 0 }
128};
129
130static driver_t cxgb_controller_driver = {
131	"cxgbc",
132	cxgb_controller_methods,
133	sizeof(struct adapter)
134};
135
136static devclass_t	cxgb_controller_devclass;
137DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
138
139/*
140 * Attachment glue for the ports.  Attachment is done directly to the
141 * controller device.
142 */
143static int cxgb_port_probe(device_t);
144static int cxgb_port_attach(device_t);
145static int cxgb_port_detach(device_t);
146
147static device_method_t cxgb_port_methods[] = {
148	DEVMETHOD(device_probe,		cxgb_port_probe),
149	DEVMETHOD(device_attach,	cxgb_port_attach),
150	DEVMETHOD(device_detach,	cxgb_port_detach),
151	{ 0, 0 }
152};
153
154static driver_t cxgb_port_driver = {
155	"cxgb",
156	cxgb_port_methods,
157	0
158};
159
160static d_ioctl_t cxgb_extension_ioctl;
161
162static devclass_t	cxgb_port_devclass;
163DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
164
165#define SGE_MSIX_COUNT (SGE_QSETS + 1)
166
167extern int collapse_mbufs;
168/*
169 * The driver uses the best interrupt scheme available on a platform in the
170 * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
171 * of these schemes the driver may consider as follows:
172 *
173 * msi = 2: choose from among all three options
174 * msi = 1 : only consider MSI and pin interrupts
175 * msi = 0: force pin interrupts
176 */
177static int msi_allowed = 2;
178TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
179SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
180SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
181    "MSI-X, MSI, INTx selector");
182
183/*
184 * The driver enables offload as a default.
185 * To disable it, use ofld_disable = 1.
186 */
187static int ofld_disable = 0;
188TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
189SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
190    "disable ULP offload");
191
192/*
193 * The driver uses an auto-queue algorithm by default.
194 * To disable it and force a single queue-set per port, use singleq = 1.
195 */
196static int singleq = 1;
197TUNABLE_INT("hw.cxgb.singleq", &singleq);
198SYSCTL_UINT(_hw_cxgb, OID_AUTO, singleq, CTLFLAG_RDTUN, &singleq, 0,
199    "use a single queue-set per port");
200
201enum {
202	MAX_TXQ_ENTRIES      = 16384,
203	MAX_CTRL_TXQ_ENTRIES = 1024,
204	MAX_RSPQ_ENTRIES     = 16384,
205	MAX_RX_BUFFERS       = 16384,
206	MAX_RX_JUMBO_BUFFERS = 16384,
207	MIN_TXQ_ENTRIES      = 4,
208	MIN_CTRL_TXQ_ENTRIES = 4,
209	MIN_RSPQ_ENTRIES     = 32,
210	MIN_FL_ENTRIES       = 32
211};
212
213#define PORT_MASK ((1 << MAX_NPORTS) - 1)
214
215/* Table for probing the cards.  The desc field isn't actually used */
216struct cxgb_ident {
217	uint16_t	vendor;
218	uint16_t	device;
219	int		index;
220	char		*desc;
221} cxgb_identifiers[] = {
222	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
223	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
224	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
225	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
226	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
227	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
228	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
229	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
230	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
231	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
232	{0, 0, 0, NULL}
233};
234
235static struct cxgb_ident *
236cxgb_get_ident(device_t dev)
237{
238	struct cxgb_ident *id;
239
240	for (id = cxgb_identifiers; id->desc != NULL; id++) {
241		if ((id->vendor == pci_get_vendor(dev)) &&
242		    (id->device == pci_get_device(dev))) {
243			return (id);
244		}
245	}
246	return (NULL);
247}
248
249static const struct adapter_info *
250cxgb_get_adapter_info(device_t dev)
251{
252	struct cxgb_ident *id;
253	const struct adapter_info *ai;
254
255	id = cxgb_get_ident(dev);
256	if (id == NULL)
257		return (NULL);
258
259	ai = t3_get_adapter_info(id->index);
260
261	return (ai);
262}
263
264static int
265cxgb_controller_probe(device_t dev)
266{
267	const struct adapter_info *ai;
268	char *ports, buf[80];
269
270	ai = cxgb_get_adapter_info(dev);
271	if (ai == NULL)
272		return (ENXIO);
273
274	if (ai->nports == 1)
275		ports = "port";
276	else
277		ports = "ports";
278
279	snprintf(buf, sizeof(buf), "%s RNIC, %d %s", ai->desc, ai->nports, ports);
280	device_set_desc_copy(dev, buf);
281	return (BUS_PROBE_DEFAULT);
282}
283
284static int
285upgrade_fw(adapter_t *sc)
286{
287	char buf[32];
288#ifdef FIRMWARE_LATEST
289	const struct firmware *fw;
290#else
291	struct firmware *fw;
292#endif
293	int status;
294
295	snprintf(&buf[0], sizeof(buf), "t3fw%d%d%d", FW_VERSION_MAJOR,
296	    FW_VERSION_MINOR, FW_VERSION_MICRO);
297
298	fw = firmware_get(buf);
299
300	if (fw == NULL) {
301		device_printf(sc->dev, "Could not find firmware image %s\n", buf);
302		return (ENOENT);
303	}
304
305	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
306
307	firmware_put(fw, FIRMWARE_UNLOAD);
308
309	return (status);
310}
311
312static int
313cxgb_controller_attach(device_t dev)
314{
315	driver_intr_t *cxgb_intr = NULL;
316	device_t child;
317	const struct adapter_info *ai;
318	struct adapter *sc;
319	int i, reg, msi_needed, error = 0;
320	uint32_t vers;
321	int port_qsets = 1;
322
323	sc = device_get_softc(dev);
324	sc->dev = dev;
325	sc->msi_count = 0;
326
327	/* find the PCIe link width and set max read request to 4KB*/
328	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
329		uint16_t lnk, pectl;
330		lnk = pci_read_config(dev, reg + 0x12, 2);
331		sc->link_width = (lnk >> 4) & 0x3f;
332
333		pectl = pci_read_config(dev, reg + 0x8, 2);
334		pectl = (pectl & ~0x7000) | (5 << 12);
335		pci_write_config(dev, reg + 0x8, pectl, 2);
336	}
337	if (sc->link_width != 0 && sc->link_width <= 4) {
338		device_printf(sc->dev,
339		    "PCIe x%d Link, expect reduced performance\n",
340		    sc->link_width);
341	}
342
343	pci_enable_busmaster(dev);
344
345	/*
346	 * Allocate the registers and make them available to the driver.
347	 * The registers that we care about for NIC mode are in BAR 0
348	 */
349	sc->regs_rid = PCIR_BAR(0);
350	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
351	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
352		device_printf(dev, "Cannot allocate BAR\n");
353		return (ENXIO);
354	}
355
356	mtx_init(&sc->sge.reg_lock, "SGE reg lock", NULL, MTX_DEF);
357	mtx_init(&sc->lock, "cxgb controller lock", NULL, MTX_DEF);
358	mtx_init(&sc->mdio_lock, "cxgb mdio", NULL, MTX_DEF);
359
360	sc->bt = rman_get_bustag(sc->regs_res);
361	sc->bh = rman_get_bushandle(sc->regs_res);
362	sc->mmio_len = rman_get_size(sc->regs_res);
363
364	ai = cxgb_get_adapter_info(dev);
365	if (t3_prep_adapter(sc, ai, 1) < 0) {
366		error = ENODEV;
367		goto out;
368	}
369
370	/* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
371	 * enough messages for the queue sets.  If that fails, try falling
372	 * back to MSI.  If that fails, then try falling back to the legacy
373	 * interrupt pin model.
374	 */
375#ifdef MSI_SUPPORTED
376
377	sc->msix_regs_rid = 0x20;
378	if ((msi_allowed >= 2) &&
379	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
380	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
381
382		msi_needed = sc->msi_count = SGE_MSIX_COUNT;
383
384		if (((error = pci_alloc_msix(dev, &sc->msi_count)) != 0) ||
385		    (sc->msi_count != msi_needed)) {
386			device_printf(dev, "msix allocation failed - msi_count = %d"
387			    " msi_needed=%d will try msi err=%d\n", sc->msi_count,
388			    msi_needed, error);
389			sc->msi_count = 0;
390			pci_release_msi(dev);
391			bus_release_resource(dev, SYS_RES_MEMORY,
392			    sc->msix_regs_rid, sc->msix_regs_res);
393			sc->msix_regs_res = NULL;
394		} else {
395			sc->flags |= USING_MSIX;
396			cxgb_intr = t3_intr_msix;
397		}
398	}
399
400	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
401		sc->msi_count = 1;
402		if (pci_alloc_msi(dev, &sc->msi_count)) {
403			device_printf(dev, "alloc msi failed - will try INTx\n");
404			sc->msi_count = 0;
405			pci_release_msi(dev);
406		} else {
407			sc->flags |= USING_MSI;
408			sc->irq_rid = 1;
409			cxgb_intr = t3_intr_msi;
410		}
411	}
412#endif
413	if (sc->msi_count == 0) {
414		device_printf(dev, "using line interrupts\n");
415		sc->irq_rid = 0;
416		cxgb_intr = t3b_intr;
417	}
418
419
420	/* Create a private taskqueue thread for handling driver events */
421#ifdef TASKQUEUE_CURRENT
422	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
423	    taskqueue_thread_enqueue, &sc->tq);
424#else
425	sc->tq = taskqueue_create_fast("cxgb_taskq", M_NOWAIT,
426	    taskqueue_thread_enqueue, &sc->tq);
427#endif
428	if (sc->tq == NULL) {
429		device_printf(dev, "failed to allocate controller task queue\n");
430		goto out;
431	}
432
433	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
434	    device_get_nameunit(dev));
435	TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
436
437
438	/* Create a periodic callout for checking adapter status */
439	callout_init_mtx(&sc->cxgb_tick_ch, &sc->lock, CALLOUT_RETURNUNLOCKED);
440
441	if (t3_check_fw_version(sc) != 0) {
442		/*
443		 * Warn user that a firmware update will be attempted in init.
444		 */
445		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
446		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
447		sc->flags &= ~FW_UPTODATE;
448	} else {
449		sc->flags |= FW_UPTODATE;
450	}
451
452	if ((sc->flags & USING_MSIX) && !singleq)
453		port_qsets = min((SGE_QSETS/(sc)->params.nports), mp_ncpus);
454
455	/*
456	 * Create a child device for each MAC.  The ethernet attachment
457	 * will be done in these children.
458	 */
459	for (i = 0; i < (sc)->params.nports; i++) {
460		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
461			device_printf(dev, "failed to add child port\n");
462			error = EINVAL;
463			goto out;
464		}
465		sc->portdev[i] = child;
466		sc->port[i].adapter = sc;
467		sc->port[i].nqsets = port_qsets;
468		sc->port[i].first_qset = i*port_qsets;
469		sc->port[i].port = i;
470		device_set_softc(child, &sc->port[i]);
471	}
472	if ((error = bus_generic_attach(dev)) != 0)
473		goto out;
474
475	/*
476	 * XXX need to poll for link status
477	 */
478	sc->params.stats_update_period = 1;
479
480	/* initialize sge private state */
481	t3_sge_init_sw(sc);
482
483	t3_led_ready(sc);
484
485	cxgb_offload_init();
486	if (is_offload(sc)) {
487		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
488		cxgb_adapter_ofld(sc);
489        }
490	error = t3_get_fw_version(sc, &vers);
491	if (error)
492		goto out;
493
494	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
495	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
496	    G_FW_VERSION_MICRO(vers));
497
498	t3_add_sysctls(sc);
499out:
500	if (error)
501		cxgb_free(sc);
502
503	return (error);
504}
505
506static int
507cxgb_controller_detach(device_t dev)
508{
509	struct adapter *sc;
510
511	sc = device_get_softc(dev);
512
513	cxgb_free(sc);
514
515	return (0);
516}
517
518static void
519cxgb_free(struct adapter *sc)
520{
521	int i;
522
523	cxgb_down(sc);
524
525#ifdef MSI_SUPPORTED
526	if (sc->flags & (USING_MSI | USING_MSIX)) {
527		device_printf(sc->dev, "releasing msi message(s)\n");
528		pci_release_msi(sc->dev);
529	} else {
530		device_printf(sc->dev, "no msi message to release\n");
531	}
532#endif
533	if (sc->msix_regs_res != NULL) {
534		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
535		    sc->msix_regs_res);
536	}
537
538	/*
539	 * XXX need to drain the ifq by hand until
540	 * it is taught about mbuf iovecs
541	 */
542	callout_drain(&sc->cxgb_tick_ch);
543
544	t3_sge_deinit_sw(sc);
545
546	if (sc->tq != NULL) {
547		taskqueue_drain(sc->tq, &sc->ext_intr_task);
548		taskqueue_free(sc->tq);
549	}
550
551	for (i = 0; i < (sc)->params.nports; ++i) {
552		if (sc->portdev[i] != NULL)
553			device_delete_child(sc->dev, sc->portdev[i]);
554	}
555
556	bus_generic_detach(sc->dev);
557
558	if (is_offload(sc)) {
559		cxgb_adapter_unofld(sc);
560		if (isset(&sc->open_device_map,	OFFLOAD_DEVMAP_BIT))
561			offload_close(&sc->tdev);
562	}
563	t3_free_sge_resources(sc);
564	t3_sge_free(sc);
565
566	if (sc->regs_res != NULL)
567		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
568		    sc->regs_res);
569
570	mtx_destroy(&sc->mdio_lock);
571	mtx_destroy(&sc->sge.reg_lock);
572	mtx_destroy(&sc->lock);
573
574	return;
575}
576
577/**
578 *	setup_sge_qsets - configure SGE Tx/Rx/response queues
579 *	@sc: the controller softc
580 *
581 *	Determines how many sets of SGE queues to use and initializes them.
582 *	We support multiple queue sets per port if we have MSI-X, otherwise
583 *	just one queue set per port.
584 */
585static int
586setup_sge_qsets(adapter_t *sc)
587{
588	int i, j, err, irq_idx, qset_idx;
589	u_int ntxq = SGE_TXQ_PER_SET;
590
591	if ((err = t3_sge_alloc(sc)) != 0) {
592		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
593		return (err);
594	}
595
596	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
597		irq_idx = -1;
598	else
599		irq_idx = 0;
600
601	for (qset_idx = 0, i = 0; i < (sc)->params.nports; ++i) {
602		struct port_info *pi = &sc->port[i];
603
604		for (j = 0; j < pi->nqsets; ++j, ++qset_idx) {
605			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
606			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
607			    &sc->params.sge.qset[qset_idx], ntxq, pi);
608			if (err) {
609				t3_free_sge_resources(sc);
610				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n", err);
611				return (err);
612			}
613		}
614	}
615
616	return (0);
617}
618
619static int
620cxgb_setup_msix(adapter_t *sc, int msix_count)
621{
622	int i, j, k, nqsets, rid;
623
624	/* The first message indicates link changes and error conditions */
625	sc->irq_rid = 1;
626	if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
627	   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
628		device_printf(sc->dev, "Cannot allocate msix interrupt\n");
629		return (EINVAL);
630	}
631
632	if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
633#ifdef INTR_FILTERS
634			NULL,
635#endif
636		cxgb_async_intr, sc, &sc->intr_tag)) {
637		device_printf(sc->dev, "Cannot set up interrupt\n");
638		return (EINVAL);
639	}
640	for (i = 0, k = 0; i < (sc)->params.nports; ++i) {
641		nqsets = sc->port[i].nqsets;
642		for (j = 0; j < nqsets; ++j, k++) {
643			struct sge_qset *qs = &sc->sge.qs[k];
644
645			rid = k + 2;
646			if (cxgb_debug)
647				printf("rid=%d ", rid);
648			if ((sc->msix_irq_res[k] = bus_alloc_resource_any(
649			    sc->dev, SYS_RES_IRQ, &rid,
650			    RF_SHAREABLE | RF_ACTIVE)) == NULL) {
651				device_printf(sc->dev, "Cannot allocate "
652				    "interrupt for message %d\n", rid);
653				return (EINVAL);
654			}
655			sc->msix_irq_rid[k] = rid;
656			if (bus_setup_intr(sc->dev, sc->msix_irq_res[j],
657			    INTR_MPSAFE|INTR_TYPE_NET,
658#ifdef INTR_FILTERS
659			NULL,
660#endif
661				t3_intr_msix, qs, &sc->msix_intr_tag[k])) {
662				device_printf(sc->dev, "Cannot set up "
663				    "interrupt for message %d\n", rid);
664				return (EINVAL);
665			}
666		}
667	}
668
669
670	return (0);
671}
672
673static int
674cxgb_port_probe(device_t dev)
675{
676	struct port_info *p;
677	char buf[80];
678
679	p = device_get_softc(dev);
680
681	snprintf(buf, sizeof(buf), "Port %d %s", p->port, p->port_type->desc);
682	device_set_desc_copy(dev, buf);
683	return (0);
684}
685
686
687static int
688cxgb_makedev(struct port_info *pi)
689{
690	struct cdevsw *cxgb_cdevsw;
691
692	if ((cxgb_cdevsw = malloc(sizeof(struct cdevsw), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
693		return (ENOMEM);
694
695	cxgb_cdevsw->d_version = D_VERSION;
696	cxgb_cdevsw->d_name = strdup(pi->ifp->if_xname, M_DEVBUF);
697	cxgb_cdevsw->d_ioctl = cxgb_extension_ioctl;
698
699	pi->port_cdev = make_dev(cxgb_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
700	    pi->ifp->if_xname);
701
702	if (pi->port_cdev == NULL)
703		return (ENOMEM);
704
705	pi->port_cdev->si_drv1 = (void *)pi;
706
707	return (0);
708}
709
710
711#ifdef TSO_SUPPORTED
712#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU)
713/* Don't enable TSO6 yet */
714#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU)
715#else
716#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_JUMBO_MTU)
717/* Don't enable TSO6 yet */
718#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM |  IFCAP_JUMBO_MTU)
719#define IFCAP_TSO4 0x0
720#define CSUM_TSO   0x0
721#endif
722
723
724static int
725cxgb_port_attach(device_t dev)
726{
727	struct port_info *p;
728	struct ifnet *ifp;
729	int media_flags;
730	int err;
731	char buf[64];
732
733	p = device_get_softc(dev);
734
735	snprintf(buf, sizeof(buf), "cxgb port %d", p->port);
736	mtx_init(&p->lock, buf, 0, MTX_DEF);
737
738	/* Allocate an ifnet object and set it up */
739	ifp = p->ifp = if_alloc(IFT_ETHER);
740	if (ifp == NULL) {
741		device_printf(dev, "Cannot allocate ifnet\n");
742		return (ENOMEM);
743	}
744
745	/*
746	 * Note that there is currently no watchdog timer.
747	 */
748	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
749	ifp->if_init = cxgb_init;
750	ifp->if_softc = p;
751	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
752	ifp->if_ioctl = cxgb_ioctl;
753	ifp->if_start = cxgb_start;
754	ifp->if_timer = 0;	/* Disable ifnet watchdog */
755	ifp->if_watchdog = NULL;
756
757	ifp->if_snd.ifq_drv_maxlen = TX_ETH_Q_SIZE;
758	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
759	IFQ_SET_READY(&ifp->if_snd);
760
761	ifp->if_hwassist = ifp->if_capabilities = ifp->if_capenable = 0;
762	ifp->if_capabilities |= CXGB_CAP;
763	ifp->if_capenable |= CXGB_CAP_ENABLE;
764	ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO);
765	ifp->if_baudrate = 100000000;
766
767	ether_ifattach(ifp, p->hw_addr);
768#ifdef DEFAULT_JUMBO
769	ifp->if_mtu = 9000;
770#endif
771	if ((err = cxgb_makedev(p)) != 0) {
772		printf("makedev failed %d\n", err);
773		return (err);
774	}
775	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
776	    cxgb_media_status);
777
778	if (!strcmp(p->port_type->desc, "10GBASE-CX4"))
779	        media_flags = IFM_ETHER | IFM_10G_CX4;
780	else if (!strcmp(p->port_type->desc, "10GBASE-SR"))
781	        media_flags = IFM_ETHER | IFM_10G_SR;
782	else if (!strcmp(p->port_type->desc, "10GBASE-XR"))
783	        media_flags = IFM_ETHER | IFM_10G_LR;
784	else {
785	        printf("unsupported media type %s\n", p->port_type->desc);
786		return (ENXIO);
787	}
788
789	ifmedia_add(&p->media, media_flags, 0, NULL);
790	ifmedia_add(&p->media, IFM_ETHER | IFM_AUTO, 0, NULL);
791	ifmedia_set(&p->media, media_flags);
792
793	snprintf(buf, sizeof(buf), "cxgb_port_taskq%d", p->port);
794#ifdef TASKQUEUE_CURRENT
795	/* Create a port for handling TX without starvation */
796	p->tq = taskqueue_create(buf, M_NOWAIT,
797	    taskqueue_thread_enqueue, &p->tq);
798#else
799	/* Create a port for handling TX without starvation */
800	p->tq = taskqueue_create_fast(buf, M_NOWAIT,
801	    taskqueue_thread_enqueue, &p->tq);
802#endif
803
804
805	if (p->tq == NULL) {
806		device_printf(dev, "failed to allocate port task queue\n");
807		return (ENOMEM);
808	}
809	taskqueue_start_threads(&p->tq, 1, PI_NET, "%s taskq",
810	    device_get_nameunit(dev));
811	TASK_INIT(&p->start_task, 0, cxgb_start_proc, ifp);
812
813
814	return (0);
815}
816
817static int
818cxgb_port_detach(device_t dev)
819{
820	struct port_info *p;
821
822	p = device_get_softc(dev);
823
824	PORT_LOCK(p);
825	cxgb_stop_locked(p);
826	PORT_UNLOCK(p);
827
828	mtx_destroy(&p->lock);
829	if (p->tq != NULL) {
830		taskqueue_drain(p->tq, &p->start_task);
831		taskqueue_free(p->tq);
832		p->tq = NULL;
833	}
834
835	ether_ifdetach(p->ifp);
836	if_free(p->ifp);
837
838	destroy_dev(p->port_cdev);
839
840
841	return (0);
842}
843
844void
845t3_fatal_err(struct adapter *sc)
846{
847	u_int fw_status[4];
848
849	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
850	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
851		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
852		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
853}
854
855int
856t3_os_find_pci_capability(adapter_t *sc, int cap)
857{
858	device_t dev;
859	struct pci_devinfo *dinfo;
860	pcicfgregs *cfg;
861	uint32_t status;
862	uint8_t ptr;
863
864	dev = sc->dev;
865	dinfo = device_get_ivars(dev);
866	cfg = &dinfo->cfg;
867
868	status = pci_read_config(dev, PCIR_STATUS, 2);
869	if (!(status & PCIM_STATUS_CAPPRESENT))
870		return (0);
871
872	switch (cfg->hdrtype & PCIM_HDRTYPE) {
873	case 0:
874	case 1:
875		ptr = PCIR_CAP_PTR;
876		break;
877	case 2:
878		ptr = PCIR_CAP_PTR_2;
879		break;
880	default:
881		return (0);
882		break;
883	}
884	ptr = pci_read_config(dev, ptr, 1);
885
886	while (ptr != 0) {
887		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
888			return (ptr);
889		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
890	}
891
892	return (0);
893}
894
895int
896t3_os_pci_save_state(struct adapter *sc)
897{
898	device_t dev;
899	struct pci_devinfo *dinfo;
900
901	dev = sc->dev;
902	dinfo = device_get_ivars(dev);
903
904	pci_cfg_save(dev, dinfo, 0);
905	return (0);
906}
907
908int
909t3_os_pci_restore_state(struct adapter *sc)
910{
911	device_t dev;
912	struct pci_devinfo *dinfo;
913
914	dev = sc->dev;
915	dinfo = device_get_ivars(dev);
916
917	pci_cfg_restore(dev, dinfo);
918	return (0);
919}
920
921/**
922 *	t3_os_link_changed - handle link status changes
923 *	@adapter: the adapter associated with the link change
924 *	@port_id: the port index whose limk status has changed
925 *	@link_stat: the new status of the link
926 *	@speed: the new speed setting
927 *	@duplex: the new duplex setting
928 *	@fc: the new flow-control setting
929 *
930 *	This is the OS-dependent handler for link status changes.  The OS
931 *	neutral handler takes care of most of the processing for these events,
932 *	then calls this handler for any OS-specific processing.
933 */
934void
935t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
936     int duplex, int fc)
937{
938	struct port_info *pi = &adapter->port[port_id];
939	struct cmac *mac = &adapter->port[port_id].mac;
940
941	if ((pi->ifp->if_flags & IFF_UP) == 0)
942		return;
943
944	if (link_status) {
945		t3_mac_enable(mac, MAC_DIRECTION_RX);
946		if_link_state_change(pi->ifp, LINK_STATE_UP);
947	} else {
948		if_link_state_change(pi->ifp, LINK_STATE_DOWN);
949		pi->phy.ops->power_down(&pi->phy, 1);
950		t3_mac_disable(mac, MAC_DIRECTION_RX);
951		t3_link_start(&pi->phy, mac, &pi->link_config);
952	}
953}
954
955
956/*
957 * Interrupt-context handler for external (PHY) interrupts.
958 */
959void
960t3_os_ext_intr_handler(adapter_t *sc)
961{
962	if (cxgb_debug)
963		printf("t3_os_ext_intr_handler\n");
964	/*
965	 * Schedule a task to handle external interrupts as they may be slow
966	 * and we use a mutex to protect MDIO registers.  We disable PHY
967	 * interrupts in the meantime and let the task reenable them when
968	 * it's done.
969	 */
970	ADAPTER_LOCK(sc);
971	if (sc->slow_intr_mask) {
972		sc->slow_intr_mask &= ~F_T3DBG;
973		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
974		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
975	}
976	ADAPTER_UNLOCK(sc);
977}
978
979void
980t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
981{
982
983	/*
984	 * The ifnet might not be allocated before this gets called,
985	 * as this is called early on in attach by t3_prep_adapter
986	 * save the address off in the port structure
987	 */
988	if (cxgb_debug)
989		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
990	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
991}
992
993/**
994 *	link_start - enable a port
995 *	@p: the port to enable
996 *
997 *	Performs the MAC and PHY actions needed to enable a port.
998 */
999static void
1000cxgb_link_start(struct port_info *p)
1001{
1002	struct ifnet *ifp;
1003	struct t3_rx_mode rm;
1004	struct cmac *mac = &p->mac;
1005
1006	ifp = p->ifp;
1007
1008	t3_init_rx_mode(&rm, p);
1009	t3_mac_reset(mac);
1010	t3_mac_set_mtu(mac, ifp->if_mtu + ETHER_HDR_LEN);
1011	t3_mac_set_address(mac, 0, p->hw_addr);
1012	t3_mac_set_rx_mode(mac, &rm);
1013	t3_link_start(&p->phy, mac, &p->link_config);
1014	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1015}
1016
1017/**
1018 *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1019 *	@adap: the adapter
1020 *
1021 *	Sets up RSS to distribute packets to multiple receive queues.  We
1022 *	configure the RSS CPU lookup table to distribute to the number of HW
1023 *	receive queues, and the response queue lookup table to narrow that
1024 *	down to the response queues actually configured for each port.
1025 *	We always configure the RSS mapping for two ports since the mapping
1026 *	table has plenty of entries.
1027 */
1028static void
1029setup_rss(adapter_t *adap)
1030{
1031	int i;
1032	u_int nq0 = adap->port[0].nqsets;
1033	u_int nq1 = max((u_int)adap->port[1].nqsets, 1U);
1034	uint8_t cpus[SGE_QSETS + 1];
1035	uint16_t rspq_map[RSS_TABLE_SIZE];
1036
1037	for (i = 0; i < SGE_QSETS; ++i)
1038		cpus[i] = i;
1039	cpus[SGE_QSETS] = 0xff;
1040
1041	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1042		rspq_map[i] = i % nq0;
1043		rspq_map[i + RSS_TABLE_SIZE / 2] = (i % nq1) + nq0;
1044	}
1045
1046	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1047	    F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN |
1048	    V_RRCPLCPUSIZE(6), cpus, rspq_map);
1049}
1050
1051/*
1052 * Sends an mbuf to an offload queue driver
1053 * after dealing with any active network taps.
1054 */
1055static inline int
1056offload_tx(struct toedev *tdev, struct mbuf *m)
1057{
1058	int ret;
1059
1060	critical_enter();
1061	ret = t3_offload_tx(tdev, m);
1062	critical_exit();
1063	return ret;
1064}
1065
1066static int
1067write_smt_entry(struct adapter *adapter, int idx)
1068{
1069	struct port_info *pi = &adapter->port[idx];
1070	struct cpl_smt_write_req *req;
1071	struct mbuf *m;
1072
1073	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1074		return (ENOMEM);
1075
1076	req = mtod(m, struct cpl_smt_write_req *);
1077	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1078	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1079	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1080	req->iff = idx;
1081	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1082	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1083
1084	m_set_priority(m, 1);
1085
1086	offload_tx(&adapter->tdev, m);
1087
1088	return (0);
1089}
1090
1091static int
1092init_smt(struct adapter *adapter)
1093{
1094	int i;
1095
1096	for_each_port(adapter, i)
1097		write_smt_entry(adapter, i);
1098	return 0;
1099}
1100
1101static void
1102init_port_mtus(adapter_t *adapter)
1103{
1104	unsigned int mtus = adapter->port[0].ifp->if_mtu;
1105
1106	if (adapter->port[1].ifp)
1107		mtus |= adapter->port[1].ifp->if_mtu << 16;
1108	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1109}
1110
1111static void
1112send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1113			      int hi, int port)
1114{
1115	struct mbuf *m;
1116	struct mngt_pktsched_wr *req;
1117
1118	m = m_gethdr(M_NOWAIT, MT_DATA);
1119	if (m) {
1120		req = mtod(m, struct mngt_pktsched_wr *);
1121		req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1122		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1123		req->sched = sched;
1124		req->idx = qidx;
1125		req->min = lo;
1126		req->max = hi;
1127		req->binding = port;
1128		m->m_len = m->m_pkthdr.len = sizeof(*req);
1129		t3_mgmt_tx(adap, m);
1130	}
1131}
1132
1133static void
1134bind_qsets(adapter_t *sc)
1135{
1136	int i, j;
1137
1138	if (singleq)
1139		return;
1140
1141	for (i = 0; i < (sc)->params.nports; ++i) {
1142		const struct port_info *pi = adap2pinfo(sc, i);
1143
1144		for (j = 0; j < pi->nqsets; ++j)
1145			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1146					  -1, i);
1147	}
1148}
1149
1150/**
1151 *	cxgb_up - enable the adapter
1152 *	@adap: adapter being enabled
1153 *
1154 *	Called when the first port is enabled, this function performs the
1155 *	actions necessary to make an adapter operational, such as completing
1156 *	the initialization of HW modules, and enabling interrupts.
1157 *
1158 */
1159static int
1160cxgb_up(struct adapter *sc)
1161{
1162	int err = 0;
1163
1164	if ((sc->flags & FULL_INIT_DONE) == 0) {
1165
1166		if ((sc->flags & FW_UPTODATE) == 0)
1167			err = upgrade_fw(sc);
1168
1169		if (err)
1170			goto out;
1171
1172		err = t3_init_hw(sc, 0);
1173		if (err)
1174			goto out;
1175
1176		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1177
1178		err = setup_sge_qsets(sc);
1179		if (err)
1180			goto out;
1181
1182		setup_rss(sc);
1183		sc->flags |= FULL_INIT_DONE;
1184	}
1185
1186	t3_intr_clear(sc);
1187
1188	/* If it's MSI or INTx, allocate a single interrupt for everything */
1189	if ((sc->flags & USING_MSIX) == 0) {
1190		if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
1191		   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
1192			device_printf(sc->dev, "Cannot allocate interrupt rid=%d\n", sc->irq_rid);
1193			err = EINVAL;
1194			goto out;
1195		}
1196		device_printf(sc->dev, "allocated irq_res=%p\n", sc->irq_res);
1197
1198		if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
1199#ifdef INTR_FILTERS
1200			NULL,
1201#endif
1202			sc->cxgb_intr, sc, &sc->intr_tag)) {
1203			device_printf(sc->dev, "Cannot set up interrupt\n");
1204			err = EINVAL;
1205			goto irq_err;
1206		}
1207	} else {
1208		cxgb_setup_msix(sc, sc->msi_count);
1209	}
1210
1211	t3_sge_start(sc);
1212	t3_intr_enable(sc);
1213
1214	if ((sc->flags & (USING_MSIX | QUEUES_BOUND)) == USING_MSIX)
1215		bind_qsets(sc);
1216	sc->flags |= QUEUES_BOUND;
1217out:
1218	return (err);
1219irq_err:
1220	CH_ERR(sc, "request_irq failed, err %d\n", err);
1221	goto out;
1222}
1223
1224
1225/*
1226 * Release resources when all the ports and offloading have been stopped.
1227 */
1228static void
1229cxgb_down(struct adapter *sc)
1230{
1231	int i;
1232
1233	t3_sge_stop(sc);
1234	t3_intr_disable(sc);
1235
1236	for (i = 0; i < SGE_QSETS; i++) {
1237		if (sc->msix_intr_tag[i] != NULL) {
1238			bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
1239			    sc->msix_intr_tag[i]);
1240			sc->msix_intr_tag[i] = NULL;
1241		}
1242		if (sc->msix_irq_res[i] != NULL) {
1243			bus_release_resource(sc->dev, SYS_RES_IRQ,
1244			    sc->msix_irq_rid[i], sc->msix_irq_res[i]);
1245			sc->msix_irq_res[i] = NULL;
1246		}
1247	}
1248
1249	if (sc->intr_tag != NULL) {
1250		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
1251		sc->intr_tag = NULL;
1252	}
1253	if (sc->irq_res != NULL) {
1254		device_printf(sc->dev, "de-allocating interrupt irq_rid=%d irq_res=%p\n",
1255		    sc->irq_rid, sc->irq_res);
1256		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
1257		    sc->irq_res);
1258		sc->irq_res = NULL;
1259	}
1260
1261	callout_drain(&sc->sge_timer_ch);
1262	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1263	taskqueue_drain(sc->tq, &sc->timer_reclaim_task);
1264}
1265
1266static int
1267offload_open(struct port_info *pi)
1268{
1269	struct adapter *adapter = pi->adapter;
1270	struct toedev *tdev = TOEDEV(pi->ifp);
1271	int adap_up = adapter->open_device_map & PORT_MASK;
1272	int err = 0;
1273
1274	if (atomic_cmpset_int(&adapter->open_device_map,
1275		(adapter->open_device_map & ~OFFLOAD_DEVMAP_BIT),
1276		(adapter->open_device_map | OFFLOAD_DEVMAP_BIT)) == 0)
1277		return (0);
1278
1279	ADAPTER_LOCK(pi->adapter);
1280	if (!adap_up)
1281		err = cxgb_up(adapter);
1282	ADAPTER_UNLOCK(pi->adapter);
1283	if (err < 0)
1284		return (err);
1285
1286	t3_tp_set_offload_mode(adapter, 1);
1287	tdev->lldev = adapter->port[0].ifp;
1288	err = cxgb_offload_activate(adapter);
1289	if (err)
1290		goto out;
1291
1292	init_port_mtus(adapter);
1293	t3_load_mtus(adapter, adapter->params.mtus, adapter->params.a_wnd,
1294		     adapter->params.b_wnd,
1295		     adapter->params.rev == 0 ?
1296		       adapter->port[0].ifp->if_mtu : 0xffff);
1297	init_smt(adapter);
1298
1299	/* Call back all registered clients */
1300	cxgb_add_clients(tdev);
1301
1302out:
1303	/* restore them in case the offload module has changed them */
1304	if (err) {
1305		t3_tp_set_offload_mode(adapter, 0);
1306		clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1307		cxgb_set_dummy_ops(tdev);
1308	}
1309	return (err);
1310}
1311
1312static int
1313offload_close(struct toedev *tdev)
1314{
1315	struct adapter *adapter = tdev2adap(tdev);
1316
1317	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1318		return 0;
1319
1320	/* Call back all registered clients */
1321	cxgb_remove_clients(tdev);
1322	tdev->lldev = NULL;
1323	cxgb_set_dummy_ops(tdev);
1324	t3_tp_set_offload_mode(adapter, 0);
1325	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1326
1327	ADAPTER_LOCK(adapter);
1328	if (!adapter->open_device_map)
1329		cxgb_down(adapter);
1330	ADAPTER_UNLOCK(adapter);
1331
1332	cxgb_offload_deactivate(adapter);
1333	return 0;
1334}
1335
1336static void
1337cxgb_init(void *arg)
1338{
1339	struct port_info *p = arg;
1340
1341	PORT_LOCK(p);
1342	cxgb_init_locked(p);
1343	PORT_UNLOCK(p);
1344}
1345
1346static void
1347cxgb_init_locked(struct port_info *p)
1348{
1349	struct ifnet *ifp;
1350	adapter_t *sc = p->adapter;
1351	int err;
1352
1353	mtx_assert(&p->lock, MA_OWNED);
1354	ifp = p->ifp;
1355
1356	ADAPTER_LOCK(p->adapter);
1357	if ((sc->open_device_map == 0) && ((err = cxgb_up(sc)) < 0)) {
1358		ADAPTER_UNLOCK(p->adapter);
1359		cxgb_stop_locked(p);
1360		return;
1361	}
1362	if (p->adapter->open_device_map == 0)
1363		t3_intr_clear(sc);
1364
1365	setbit(&p->adapter->open_device_map, p->port);
1366
1367	ADAPTER_UNLOCK(p->adapter);
1368	if (is_offload(sc) && !ofld_disable) {
1369		err = offload_open(p);
1370		if (err)
1371			log(LOG_WARNING,
1372			    "Could not initialize offload capabilities\n");
1373	}
1374	cxgb_link_start(p);
1375	t3_port_intr_enable(sc, p->port);
1376
1377	callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
1378	    cxgb_tick, sc);
1379
1380	PORT_LOCK(p);
1381	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1382	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1383	PORT_UNLOCK(p);
1384}
1385
1386static void
1387cxgb_set_rxmode(struct port_info *p)
1388{
1389	struct t3_rx_mode rm;
1390	struct cmac *mac = &p->mac;
1391
1392	mtx_assert(&p->lock, MA_OWNED);
1393
1394	t3_init_rx_mode(&rm, p);
1395	t3_mac_set_rx_mode(mac, &rm);
1396}
1397
1398static void
1399cxgb_stop_locked(struct port_info *p)
1400{
1401	struct ifnet *ifp;
1402
1403	mtx_assert(&p->lock, MA_OWNED);
1404	mtx_assert(&p->adapter->lock, MA_NOTOWNED);
1405
1406	ifp = p->ifp;
1407
1408	t3_port_intr_disable(p->adapter, p->port);
1409	PORT_LOCK(p);
1410	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1411	PORT_UNLOCK(p);
1412	p->phy.ops->power_down(&p->phy, 1);
1413	t3_mac_disable(&p->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
1414
1415	ADAPTER_LOCK(p->adapter);
1416	clrbit(&p->adapter->open_device_map, p->port);
1417	/*
1418	 * XXX cancel check_task
1419	 */
1420	if (p->adapter->open_device_map == 0)
1421		cxgb_down(p->adapter);
1422	ADAPTER_UNLOCK(p->adapter);
1423}
1424
1425static int
1426cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1427{
1428	struct port_info *p = ifp->if_softc;
1429	struct ifaddr *ifa = (struct ifaddr *)data;
1430	struct ifreq *ifr = (struct ifreq *)data;
1431	int flags, error = 0;
1432	uint32_t mask;
1433
1434	/*
1435	 * XXX need to check that we aren't in the middle of an unload
1436	 */
1437	switch (command) {
1438	case SIOCSIFMTU:
1439		if ((ifr->ifr_mtu < ETHERMIN) ||
1440		    (ifr->ifr_mtu > ETHER_MAX_LEN_JUMBO))
1441			error = EINVAL;
1442		else if (ifp->if_mtu != ifr->ifr_mtu) {
1443			PORT_LOCK(p);
1444			ifp->if_mtu = ifr->ifr_mtu;
1445			t3_mac_set_mtu(&p->mac, ifp->if_mtu + ETHER_HDR_LEN);
1446			PORT_UNLOCK(p);
1447		}
1448		break;
1449	case SIOCSIFADDR:
1450	case SIOCGIFADDR:
1451		if (ifa->ifa_addr->sa_family == AF_INET) {
1452			ifp->if_flags |= IFF_UP;
1453			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1454				cxgb_init(p);
1455			}
1456			arp_ifinit(ifp, ifa);
1457		} else
1458			error = ether_ioctl(ifp, command, data);
1459		break;
1460	case SIOCSIFFLAGS:
1461		if (ifp->if_flags & IFF_UP) {
1462			PORT_LOCK(p);
1463			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1464				flags = p->if_flags;
1465				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1466				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI))
1467					cxgb_set_rxmode(p);
1468
1469			} else
1470				cxgb_init_locked(p);
1471			p->if_flags = ifp->if_flags;
1472			PORT_UNLOCK(p);
1473		} else {
1474			callout_drain(&p->adapter->cxgb_tick_ch);
1475			PORT_LOCK(p);
1476			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1477				cxgb_stop_locked(p);
1478			} else {
1479				adapter_t *sc = p->adapter;
1480				callout_reset(&sc->cxgb_tick_ch,
1481				    sc->params.stats_update_period * hz,
1482				    cxgb_tick, sc);
1483			}
1484			PORT_UNLOCK(p);
1485		}
1486
1487
1488		break;
1489	case SIOCSIFMEDIA:
1490	case SIOCGIFMEDIA:
1491		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
1492		break;
1493	case SIOCSIFCAP:
1494		PORT_LOCK(p);
1495		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1496		if (mask & IFCAP_TXCSUM) {
1497			if (IFCAP_TXCSUM & ifp->if_capenable) {
1498				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
1499				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
1500				    | CSUM_TSO);
1501			} else {
1502				ifp->if_capenable |= IFCAP_TXCSUM;
1503				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1504			}
1505		} else if (mask & IFCAP_RXCSUM) {
1506			if (IFCAP_RXCSUM & ifp->if_capenable) {
1507				ifp->if_capenable &= ~IFCAP_RXCSUM;
1508			} else {
1509				ifp->if_capenable |= IFCAP_RXCSUM;
1510			}
1511		}
1512		if (mask & IFCAP_TSO4) {
1513			if (IFCAP_TSO4 & ifp->if_capenable) {
1514				ifp->if_capenable &= ~IFCAP_TSO4;
1515				ifp->if_hwassist &= ~CSUM_TSO;
1516			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
1517				ifp->if_capenable |= IFCAP_TSO4;
1518				ifp->if_hwassist |= CSUM_TSO;
1519			} else {
1520				if (cxgb_debug)
1521					printf("cxgb requires tx checksum offload"
1522					    " be enabled to use TSO\n");
1523				error = EINVAL;
1524			}
1525		}
1526		PORT_UNLOCK(p);
1527		break;
1528	default:
1529		error = ether_ioctl(ifp, command, data);
1530		break;
1531	}
1532
1533	return (error);
1534}
1535
1536static int
1537cxgb_start_tx(struct ifnet *ifp, uint32_t txmax)
1538{
1539	struct sge_qset *qs;
1540	struct sge_txq *txq;
1541	struct port_info *p = ifp->if_softc;
1542	struct mbuf *m0, *m = NULL;
1543	int err, in_use_init;
1544
1545	if (!p->link_config.link_ok)
1546		return (ENXIO);
1547
1548	if (IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1549		return (ENOBUFS);
1550
1551	qs = &p->adapter->sge.qs[p->first_qset];
1552	txq = &qs->txq[TXQ_ETH];
1553	err = 0;
1554
1555	mtx_lock(&txq->lock);
1556	in_use_init = txq->in_use;
1557	while ((txq->in_use - in_use_init < txmax) &&
1558	    (txq->size > txq->in_use + TX_MAX_DESC)) {
1559		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
1560		if (m == NULL)
1561			break;
1562		/*
1563		 * Convert chain to M_IOVEC
1564		 */
1565		KASSERT((m->m_flags & M_IOVEC) == 0, ("IOVEC set too early"));
1566		m0 = m;
1567#ifdef INVARIANTS
1568		/*
1569		 * Clean up after net stack sloppiness
1570		 * before calling m_sanity
1571		 */
1572		m0 = m->m_next;
1573		while (m0) {
1574			m0->m_flags &= ~M_PKTHDR;
1575			m0 = m0->m_next;
1576		}
1577		m_sanity(m0, 0);
1578		m0 = m;
1579#endif
1580		if (collapse_mbufs && m->m_pkthdr.len > MCLBYTES &&
1581		    m_collapse(m, TX_MAX_SEGS, &m0) == EFBIG) {
1582			if ((m0 = m_defrag(m, M_NOWAIT)) != NULL) {
1583				m = m0;
1584				m_collapse(m, TX_MAX_SEGS, &m0);
1585			} else
1586				break;
1587		}
1588		m = m0;
1589		if ((err = t3_encap(p, &m)) != 0)
1590			break;
1591		BPF_MTAP(ifp, m);
1592	}
1593	mtx_unlock(&txq->lock);
1594
1595	if (__predict_false(err)) {
1596		if (err == ENOMEM) {
1597			IFQ_LOCK(&ifp->if_snd);
1598			IFQ_DRV_PREPEND(&ifp->if_snd, m);
1599			IFQ_UNLOCK(&ifp->if_snd);
1600		}
1601	}
1602	if (err == 0 && m == NULL) {
1603		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1604		return (ENOBUFS);
1605	}
1606	if ((err == 0) &&  (txq->size <= txq->in_use + TX_MAX_DESC) &&
1607	    (ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
1608		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1609		return (ENOSPC);
1610	}
1611	return (err);
1612}
1613
1614static void
1615cxgb_start_proc(void *arg, int ncount)
1616{
1617	struct ifnet *ifp = arg;
1618	struct port_info *pi = ifp->if_softc;
1619	struct sge_qset *qs;
1620	struct sge_txq *txq;
1621	int error = 0;
1622
1623	qs = &pi->adapter->sge.qs[pi->first_qset];
1624	txq = &qs->txq[TXQ_ETH];
1625
1626	while (error == 0) {
1627		if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC)
1628			taskqueue_enqueue(pi->adapter->tq,
1629			    &pi->adapter->timer_reclaim_task);
1630
1631		error = cxgb_start_tx(ifp, TX_START_MAX_DESC);
1632	}
1633}
1634
1635static void
1636cxgb_start(struct ifnet *ifp)
1637{
1638	struct port_info *pi = ifp->if_softc;
1639	struct sge_qset *qs;
1640	struct sge_txq *txq;
1641	int err;
1642
1643	qs = &pi->adapter->sge.qs[pi->first_qset];
1644	txq = &qs->txq[TXQ_ETH];
1645
1646	if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC)
1647		taskqueue_enqueue(pi->adapter->tq,
1648		    &pi->adapter->timer_reclaim_task);
1649
1650	err = cxgb_start_tx(ifp, TX_START_MAX_DESC);
1651
1652	if (err == 0)
1653		taskqueue_enqueue(pi->tq, &pi->start_task);
1654}
1655
1656
1657static int
1658cxgb_media_change(struct ifnet *ifp)
1659{
1660	if_printf(ifp, "media change not supported\n");
1661	return (ENXIO);
1662}
1663
1664static void
1665cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1666{
1667	struct port_info *p = ifp->if_softc;
1668
1669	ifmr->ifm_status = IFM_AVALID;
1670	ifmr->ifm_active = IFM_ETHER;
1671
1672	if (!p->link_config.link_ok)
1673		return;
1674
1675	ifmr->ifm_status |= IFM_ACTIVE;
1676
1677	if (p->link_config.duplex)
1678		ifmr->ifm_active |= IFM_FDX;
1679	else
1680		ifmr->ifm_active |= IFM_HDX;
1681}
1682
1683static void
1684cxgb_async_intr(void *data)
1685{
1686	adapter_t *sc = data;
1687
1688	if (cxgb_debug)
1689		device_printf(sc->dev, "cxgb_async_intr\n");
1690
1691	t3_slow_intr_handler(sc);
1692
1693}
1694
1695static void
1696cxgb_ext_intr_handler(void *arg, int count)
1697{
1698	adapter_t *sc = (adapter_t *)arg;
1699
1700	if (cxgb_debug)
1701		printf("cxgb_ext_intr_handler\n");
1702
1703	t3_phy_intr_handler(sc);
1704
1705	/* Now reenable external interrupts */
1706	ADAPTER_LOCK(sc);
1707	if (sc->slow_intr_mask) {
1708		sc->slow_intr_mask |= F_T3DBG;
1709		t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
1710		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1711	}
1712	ADAPTER_UNLOCK(sc);
1713}
1714
1715static void
1716check_link_status(adapter_t *sc)
1717{
1718	int i;
1719
1720	for (i = 0; i < (sc)->params.nports; ++i) {
1721		struct port_info *p = &sc->port[i];
1722
1723		if (!(p->port_type->caps & SUPPORTED_IRQ))
1724			t3_link_changed(sc, i);
1725	}
1726}
1727
1728static void
1729check_t3b2_mac(struct adapter *adapter)
1730{
1731	int i;
1732
1733	for_each_port(adapter, i) {
1734		struct port_info *p = &adapter->port[i];
1735		struct ifnet *ifp = p->ifp;
1736		int status;
1737
1738		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1739			continue;
1740
1741		status = 0;
1742		PORT_LOCK(p);
1743		if ((ifp->if_drv_flags & IFF_DRV_RUNNING))
1744			status = t3b2_mac_watchdog_task(&p->mac);
1745		if (status == 1)
1746			p->mac.stats.num_toggled++;
1747		else if (status == 2) {
1748			struct cmac *mac = &p->mac;
1749
1750			t3_mac_set_mtu(mac, ifp->if_mtu + ETHER_HDR_LEN);
1751			t3_mac_set_address(mac, 0, p->hw_addr);
1752			cxgb_set_rxmode(p);
1753			t3_link_start(&p->phy, mac, &p->link_config);
1754			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1755			t3_port_intr_enable(adapter, p->port);
1756			p->mac.stats.num_resets++;
1757		}
1758		PORT_UNLOCK(p);
1759	}
1760}
1761
1762static void
1763cxgb_tick(void *arg)
1764{
1765	adapter_t *sc = (adapter_t *)arg;
1766	const struct adapter_params *p = &sc->params;
1767
1768	if (p->linkpoll_period)
1769		check_link_status(sc);
1770	callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
1771	    cxgb_tick, sc);
1772
1773	/*
1774	 * adapter lock can currently only be acquire after the
1775	 * port lock
1776	 */
1777	ADAPTER_UNLOCK(sc);
1778	if (p->rev == T3_REV_B2)
1779		check_t3b2_mac(sc);
1780
1781}
1782
1783static int
1784in_range(int val, int lo, int hi)
1785{
1786	return val < 0 || (val <= hi && val >= lo);
1787}
1788
1789static int
1790cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
1791    int fflag, struct thread *td)
1792{
1793	int mmd, error = 0;
1794	struct port_info *pi = dev->si_drv1;
1795	adapter_t *sc = pi->adapter;
1796
1797#ifdef PRIV_SUPPORTED
1798	if (priv_check(td, PRIV_DRIVER)) {
1799		if (cxgb_debug)
1800			printf("user does not have access to privileged ioctls\n");
1801		return (EPERM);
1802	}
1803#else
1804	if (suser(td)) {
1805		if (cxgb_debug)
1806			printf("user does not have access to privileged ioctls\n");
1807		return (EPERM);
1808	}
1809#endif
1810
1811	switch (cmd) {
1812	case SIOCGMIIREG: {
1813		uint32_t val;
1814		struct cphy *phy = &pi->phy;
1815		struct mii_data *mid = (struct mii_data *)data;
1816
1817		if (!phy->mdio_read)
1818			return (EOPNOTSUPP);
1819		if (is_10G(sc)) {
1820			mmd = mid->phy_id >> 8;
1821			if (!mmd)
1822				mmd = MDIO_DEV_PCS;
1823			else if (mmd > MDIO_DEV_XGXS)
1824				return -EINVAL;
1825
1826			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
1827					     mid->reg_num, &val);
1828		} else
1829		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
1830					     mid->reg_num & 0x1f, &val);
1831		if (error == 0)
1832			mid->val_out = val;
1833		break;
1834	}
1835	case SIOCSMIIREG: {
1836		struct cphy *phy = &pi->phy;
1837		struct mii_data *mid = (struct mii_data *)data;
1838
1839		if (!phy->mdio_write)
1840			return (EOPNOTSUPP);
1841		if (is_10G(sc)) {
1842			mmd = mid->phy_id >> 8;
1843			if (!mmd)
1844				mmd = MDIO_DEV_PCS;
1845			else if (mmd > MDIO_DEV_XGXS)
1846				return (EINVAL);
1847
1848			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
1849					      mmd, mid->reg_num, mid->val_in);
1850		} else
1851			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
1852					      mid->reg_num & 0x1f,
1853					      mid->val_in);
1854		break;
1855	}
1856	case CHELSIO_SETREG: {
1857		struct ch_reg *edata = (struct ch_reg *)data;
1858		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
1859			return (EFAULT);
1860		t3_write_reg(sc, edata->addr, edata->val);
1861		break;
1862	}
1863	case CHELSIO_GETREG: {
1864		struct ch_reg *edata = (struct ch_reg *)data;
1865		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
1866			return (EFAULT);
1867		edata->val = t3_read_reg(sc, edata->addr);
1868		break;
1869	}
1870	case CHELSIO_GET_SGE_CONTEXT: {
1871		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
1872		mtx_lock(&sc->sge.reg_lock);
1873		switch (ecntxt->cntxt_type) {
1874		case CNTXT_TYPE_EGRESS:
1875			error = t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
1876			    ecntxt->data);
1877			break;
1878		case CNTXT_TYPE_FL:
1879			error = t3_sge_read_fl(sc, ecntxt->cntxt_id,
1880			    ecntxt->data);
1881			break;
1882		case CNTXT_TYPE_RSP:
1883			error = t3_sge_read_rspq(sc, ecntxt->cntxt_id,
1884			    ecntxt->data);
1885			break;
1886		case CNTXT_TYPE_CQ:
1887			error = t3_sge_read_cq(sc, ecntxt->cntxt_id,
1888			    ecntxt->data);
1889			break;
1890		default:
1891			error = EINVAL;
1892			break;
1893		}
1894		mtx_unlock(&sc->sge.reg_lock);
1895		break;
1896	}
1897	case CHELSIO_GET_SGE_DESC: {
1898		struct ch_desc *edesc = (struct ch_desc *)data;
1899		int ret;
1900		if (edesc->queue_num >= SGE_QSETS * 6)
1901			return (EINVAL);
1902		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
1903		    edesc->queue_num % 6, edesc->idx, edesc->data);
1904		if (ret < 0)
1905			return (EINVAL);
1906		edesc->size = ret;
1907		break;
1908	}
1909	case CHELSIO_SET_QSET_PARAMS: {
1910		struct qset_params *q;
1911		struct ch_qset_params *t = (struct ch_qset_params *)data;
1912
1913		if (t->qset_idx >= SGE_QSETS)
1914			return -EINVAL;
1915		if (!in_range(t->intr_lat, 0, M_NEWTIMER) ||
1916		    !in_range(t->cong_thres, 0, 255) ||
1917		    !in_range(t->txq_size[0], MIN_TXQ_ENTRIES,
1918			      MAX_TXQ_ENTRIES) ||
1919		    !in_range(t->txq_size[1], MIN_TXQ_ENTRIES,
1920			      MAX_TXQ_ENTRIES) ||
1921		    !in_range(t->txq_size[2], MIN_CTRL_TXQ_ENTRIES,
1922			      MAX_CTRL_TXQ_ENTRIES) ||
1923		    !in_range(t->fl_size[0], MIN_FL_ENTRIES, MAX_RX_BUFFERS) ||
1924		    !in_range(t->fl_size[1], MIN_FL_ENTRIES,
1925			      MAX_RX_JUMBO_BUFFERS) ||
1926		    !in_range(t->rspq_size, MIN_RSPQ_ENTRIES, MAX_RSPQ_ENTRIES))
1927		       return -EINVAL;
1928		if ((sc->flags & FULL_INIT_DONE) &&
1929		    (t->rspq_size >= 0 || t->fl_size[0] >= 0 ||
1930		     t->fl_size[1] >= 0 || t->txq_size[0] >= 0 ||
1931		     t->txq_size[1] >= 0 || t->txq_size[2] >= 0 ||
1932		     t->polling >= 0 || t->cong_thres >= 0))
1933			return -EBUSY;
1934
1935		q = &sc->params.sge.qset[t->qset_idx];
1936
1937		if (t->rspq_size >= 0)
1938			q->rspq_size = t->rspq_size;
1939		if (t->fl_size[0] >= 0)
1940			q->fl_size = t->fl_size[0];
1941		if (t->fl_size[1] >= 0)
1942			q->jumbo_size = t->fl_size[1];
1943		if (t->txq_size[0] >= 0)
1944			q->txq_size[0] = t->txq_size[0];
1945		if (t->txq_size[1] >= 0)
1946			q->txq_size[1] = t->txq_size[1];
1947		if (t->txq_size[2] >= 0)
1948			q->txq_size[2] = t->txq_size[2];
1949		if (t->cong_thres >= 0)
1950			q->cong_thres = t->cong_thres;
1951		if (t->intr_lat >= 0) {
1952			struct sge_qset *qs = &sc->sge.qs[t->qset_idx];
1953
1954			q->coalesce_nsecs = t->intr_lat*1000;
1955			t3_update_qset_coalesce(qs, q);
1956		}
1957		break;
1958	}
1959	case CHELSIO_GET_QSET_PARAMS: {
1960		struct qset_params *q;
1961		struct ch_qset_params *t = (struct ch_qset_params *)data;
1962
1963		if (t->qset_idx >= SGE_QSETS)
1964			return (EINVAL);
1965
1966		q = &(sc)->params.sge.qset[t->qset_idx];
1967		t->rspq_size   = q->rspq_size;
1968		t->txq_size[0] = q->txq_size[0];
1969		t->txq_size[1] = q->txq_size[1];
1970		t->txq_size[2] = q->txq_size[2];
1971		t->fl_size[0]  = q->fl_size;
1972		t->fl_size[1]  = q->jumbo_size;
1973		t->polling     = q->polling;
1974		t->intr_lat    = q->coalesce_nsecs / 1000;
1975		t->cong_thres  = q->cong_thres;
1976		break;
1977	}
1978	case CHELSIO_SET_QSET_NUM: {
1979		struct ch_reg *edata = (struct ch_reg *)data;
1980		unsigned int port_idx = pi->port;
1981
1982		if (sc->flags & FULL_INIT_DONE)
1983			return (EBUSY);
1984		if (edata->val < 1 ||
1985		    (edata->val > 1 && !(sc->flags & USING_MSIX)))
1986			return (EINVAL);
1987		if (edata->val + sc->port[!port_idx].nqsets > SGE_QSETS)
1988			return (EINVAL);
1989		sc->port[port_idx].nqsets = edata->val;
1990		sc->port[0].first_qset = 0;
1991		/*
1992		 * XXX hardcode ourselves to 2 ports just like LEEENUX
1993		 */
1994		sc->port[1].first_qset = sc->port[0].nqsets;
1995		break;
1996	}
1997	case CHELSIO_GET_QSET_NUM: {
1998		struct ch_reg *edata = (struct ch_reg *)data;
1999		edata->val = pi->nqsets;
2000		break;
2001	}
2002#ifdef notyet
2003	case CHELSIO_LOAD_FW:
2004	case CHELSIO_GET_PM:
2005	case CHELSIO_SET_PM:
2006		return (EOPNOTSUPP);
2007		break;
2008#endif
2009	case CHELSIO_SETMTUTAB: {
2010		struct ch_mtus *m = (struct ch_mtus *)data;
2011		int i;
2012
2013		if (!is_offload(sc))
2014			return (EOPNOTSUPP);
2015		if (offload_running(sc))
2016			return (EBUSY);
2017		if (m->nmtus != NMTUS)
2018			return (EINVAL);
2019		if (m->mtus[0] < 81)         /* accommodate SACK */
2020			return (EINVAL);
2021
2022		/*
2023		 * MTUs must be in ascending order
2024		 */
2025		for (i = 1; i < NMTUS; ++i)
2026			if (m->mtus[i] < m->mtus[i - 1])
2027				return (EINVAL);
2028
2029		memcpy(sc->params.mtus, m->mtus,
2030		       sizeof(sc->params.mtus));
2031		break;
2032	}
2033	case CHELSIO_GETMTUTAB: {
2034		struct ch_mtus *m = (struct ch_mtus *)data;
2035
2036		if (!is_offload(sc))
2037			return (EOPNOTSUPP);
2038
2039		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2040		m->nmtus = NMTUS;
2041		break;
2042	}
2043	case CHELSIO_DEVUP:
2044		if (!is_offload(sc))
2045			return (EOPNOTSUPP);
2046		return offload_open(pi);
2047		break;
2048	case CHELSIO_GET_MEM: {
2049		struct ch_mem_range *t = (struct ch_mem_range *)data;
2050		struct mc7 *mem;
2051		uint8_t *useraddr;
2052		u64 buf[32];
2053
2054		if (!is_offload(sc))
2055			return (EOPNOTSUPP);
2056		if (!(sc->flags & FULL_INIT_DONE))
2057			return (EIO);         /* need the memory controllers */
2058		if ((t->addr & 0x7) || (t->len & 0x7))
2059			return (EINVAL);
2060		if (t->mem_id == MEM_CM)
2061			mem = &sc->cm;
2062		else if (t->mem_id == MEM_PMRX)
2063			mem = &sc->pmrx;
2064		else if (t->mem_id == MEM_PMTX)
2065			mem = &sc->pmtx;
2066		else
2067			return (EINVAL);
2068
2069		/*
2070		 * Version scheme:
2071		 * bits 0..9: chip version
2072		 * bits 10..15: chip revision
2073		 */
2074		t->version = 3 | (sc->params.rev << 10);
2075
2076		/*
2077		 * Read 256 bytes at a time as len can be large and we don't
2078		 * want to use huge intermediate buffers.
2079		 */
2080		useraddr = (uint8_t *)(t + 1);   /* advance to start of buffer */
2081		while (t->len) {
2082			unsigned int chunk = min(t->len, sizeof(buf));
2083
2084			error = t3_mc7_bd_read(mem, t->addr / 8, chunk / 8, buf);
2085			if (error)
2086				return (-error);
2087			if (copyout(buf, useraddr, chunk))
2088				return (EFAULT);
2089			useraddr += chunk;
2090			t->addr += chunk;
2091			t->len -= chunk;
2092		}
2093		break;
2094	}
2095	case CHELSIO_READ_TCAM_WORD: {
2096		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2097
2098		if (!is_offload(sc))
2099			return (EOPNOTSUPP);
2100		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2101		break;
2102	}
2103	case CHELSIO_SET_TRACE_FILTER: {
2104		struct ch_trace *t = (struct ch_trace *)data;
2105		const struct trace_params *tp;
2106
2107		tp = (const struct trace_params *)&t->sip;
2108		if (t->config_tx)
2109			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2110					       t->trace_tx);
2111		if (t->config_rx)
2112			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2113					       t->trace_rx);
2114		break;
2115	}
2116	case CHELSIO_SET_PKTSCHED: {
2117		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2118		if (sc->open_device_map == 0)
2119			return (EAGAIN);
2120		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2121		    p->binding);
2122		break;
2123	}
2124	case CHELSIO_IFCONF_GETREGS: {
2125		struct ifconf_regs *regs = (struct ifconf_regs *)data;
2126		int reglen = cxgb_get_regs_len();
2127		uint8_t *buf = malloc(REGDUMP_SIZE, M_DEVBUF, M_NOWAIT);
2128		if (buf == NULL) {
2129			return (ENOMEM);
2130		} if (regs->len > reglen)
2131			regs->len = reglen;
2132		else if (regs->len < reglen) {
2133			error = E2BIG;
2134			goto done;
2135		}
2136		cxgb_get_regs(sc, regs, buf);
2137		error = copyout(buf, regs->data, reglen);
2138
2139		done:
2140		free(buf, M_DEVBUF);
2141
2142		break;
2143	}
2144	case CHELSIO_SET_HW_SCHED: {
2145		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2146		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2147
2148		if ((sc->flags & FULL_INIT_DONE) == 0)
2149			return (EAGAIN);       /* need TP to be initialized */
2150		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2151		    !in_range(t->channel, 0, 1) ||
2152		    !in_range(t->kbps, 0, 10000000) ||
2153		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2154		    !in_range(t->flow_ipg, 0,
2155			      dack_ticks_to_usec(sc, 0x7ff)))
2156			return (EINVAL);
2157
2158		if (t->kbps >= 0) {
2159			error = t3_config_sched(sc, t->kbps, t->sched);
2160			if (error < 0)
2161				return (-error);
2162		}
2163		if (t->class_ipg >= 0)
2164			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2165		if (t->flow_ipg >= 0) {
2166			t->flow_ipg *= 1000;     /* us -> ns */
2167			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2168		}
2169		if (t->mode >= 0) {
2170			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2171
2172			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2173					 bit, t->mode ? bit : 0);
2174		}
2175		if (t->channel >= 0)
2176			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2177					 1 << t->sched, t->channel << t->sched);
2178		break;
2179	}
2180	default:
2181		return (EOPNOTSUPP);
2182		break;
2183	}
2184
2185	return (error);
2186}
2187
2188static __inline void
2189reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
2190    unsigned int end)
2191{
2192	uint32_t *p = (uint32_t *)buf + start;
2193
2194	for ( ; start <= end; start += sizeof(uint32_t))
2195		*p++ = t3_read_reg(ap, start);
2196}
2197
2198#define T3_REGMAP_SIZE (3 * 1024)
2199static int
2200cxgb_get_regs_len(void)
2201{
2202	return T3_REGMAP_SIZE;
2203}
2204#undef T3_REGMAP_SIZE
2205
2206static void
2207cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf)
2208{
2209
2210	/*
2211	 * Version scheme:
2212	 * bits 0..9: chip version
2213	 * bits 10..15: chip revision
2214	 * bit 31: set for PCIe cards
2215	 */
2216	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
2217
2218	/*
2219	 * We skip the MAC statistics registers because they are clear-on-read.
2220	 * Also reading multi-register stats would need to synchronize with the
2221	 * periodic mac stats accumulation.  Hard to justify the complexity.
2222	 */
2223	memset(buf, 0, REGDUMP_SIZE);
2224	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
2225	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
2226	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
2227	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
2228	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
2229	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
2230		       XGM_REG(A_XGM_SERDES_STAT3, 1));
2231	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
2232		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
2233}
2234