cxgb_main.c revision 171803
1/**************************************************************************
2
3Copyright (c) 2007, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
122. Neither the name of the Chelsio Corporation nor the names of its
13    contributors may be used to endorse or promote products derived from
14    this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_main.c 171803 2007-08-10 23:33:34Z kmacy $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/bus.h>
37#include <sys/module.h>
38#include <sys/pciio.h>
39#include <sys/conf.h>
40#include <machine/bus.h>
41#include <machine/resource.h>
42#include <sys/bus_dma.h>
43#include <sys/rman.h>
44#include <sys/ioccom.h>
45#include <sys/mbuf.h>
46#include <sys/linker.h>
47#include <sys/firmware.h>
48#include <sys/socket.h>
49#include <sys/sockio.h>
50#include <sys/smp.h>
51#include <sys/sysctl.h>
52#include <sys/queue.h>
53#include <sys/taskqueue.h>
54
55#include <net/bpf.h>
56#include <net/ethernet.h>
57#include <net/if.h>
58#include <net/if_arp.h>
59#include <net/if_dl.h>
60#include <net/if_media.h>
61#include <net/if_types.h>
62
63#include <netinet/in_systm.h>
64#include <netinet/in.h>
65#include <netinet/if_ether.h>
66#include <netinet/ip.h>
67#include <netinet/ip.h>
68#include <netinet/tcp.h>
69#include <netinet/udp.h>
70
71#include <dev/pci/pcireg.h>
72#include <dev/pci/pcivar.h>
73#include <dev/pci/pci_private.h>
74
75#ifdef CONFIG_DEFINED
76#include <cxgb_include.h>
77#else
78#include <dev/cxgb/cxgb_include.h>
79#endif
80
81#ifdef PRIV_SUPPORTED
82#include <sys/priv.h>
83#endif
84
85static int cxgb_setup_msix(adapter_t *, int);
86static void cxgb_teardown_msix(adapter_t *);
87static void cxgb_init(void *);
88static void cxgb_init_locked(struct port_info *);
89static void cxgb_stop_locked(struct port_info *);
90static void cxgb_set_rxmode(struct port_info *);
91static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92static void cxgb_start(struct ifnet *);
93static void cxgb_start_proc(void *, int ncount);
94static int cxgb_media_change(struct ifnet *);
95static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
96static int setup_sge_qsets(adapter_t *);
97static void cxgb_async_intr(void *);
98static void cxgb_ext_intr_handler(void *, int);
99static void cxgb_tick_handler(void *, int);
100static void cxgb_down_locked(struct adapter *sc);
101static void cxgb_tick(void *);
102static void setup_rss(adapter_t *sc);
103
104/* Attachment glue for the PCI controller end of the device.  Each port of
105 * the device is attached separately, as defined later.
106 */
107static int cxgb_controller_probe(device_t);
108static int cxgb_controller_attach(device_t);
109static int cxgb_controller_detach(device_t);
110static void cxgb_free(struct adapter *);
111static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
112    unsigned int end);
113static void cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf);
114static int cxgb_get_regs_len(void);
115static int offload_open(struct port_info *pi);
116#ifdef notyet
117static int offload_close(struct toedev *tdev);
118#endif
119
120
121static device_method_t cxgb_controller_methods[] = {
122	DEVMETHOD(device_probe,		cxgb_controller_probe),
123	DEVMETHOD(device_attach,	cxgb_controller_attach),
124	DEVMETHOD(device_detach,	cxgb_controller_detach),
125
126	/* bus interface */
127	DEVMETHOD(bus_print_child,	bus_generic_print_child),
128	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
129
130	{ 0, 0 }
131};
132
133static driver_t cxgb_controller_driver = {
134	"cxgbc",
135	cxgb_controller_methods,
136	sizeof(struct adapter)
137};
138
139static devclass_t	cxgb_controller_devclass;
140DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
141
142/*
143 * Attachment glue for the ports.  Attachment is done directly to the
144 * controller device.
145 */
146static int cxgb_port_probe(device_t);
147static int cxgb_port_attach(device_t);
148static int cxgb_port_detach(device_t);
149
150static device_method_t cxgb_port_methods[] = {
151	DEVMETHOD(device_probe,		cxgb_port_probe),
152	DEVMETHOD(device_attach,	cxgb_port_attach),
153	DEVMETHOD(device_detach,	cxgb_port_detach),
154	{ 0, 0 }
155};
156
157static driver_t cxgb_port_driver = {
158	"cxgb",
159	cxgb_port_methods,
160	0
161};
162
163static d_ioctl_t cxgb_extension_ioctl;
164static d_open_t cxgb_extension_open;
165static d_close_t cxgb_extension_close;
166
167static struct cdevsw cxgb_cdevsw = {
168       .d_version =    D_VERSION,
169       .d_flags =      0,
170       .d_open =       cxgb_extension_open,
171       .d_close =      cxgb_extension_close,
172       .d_ioctl =      cxgb_extension_ioctl,
173       .d_name =       "cxgb",
174};
175
176static devclass_t	cxgb_port_devclass;
177DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
178
179#define SGE_MSIX_COUNT (SGE_QSETS + 1)
180
181extern int collapse_mbufs;
182/*
183 * The driver uses the best interrupt scheme available on a platform in the
184 * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
185 * of these schemes the driver may consider as follows:
186 *
187 * msi = 2: choose from among all three options
188 * msi = 1 : only consider MSI and pin interrupts
189 * msi = 0: force pin interrupts
190 */
191static int msi_allowed = 2;
192
193TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
194SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
195SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
196    "MSI-X, MSI, INTx selector");
197
198/*
199 * The driver enables offload as a default.
200 * To disable it, use ofld_disable = 1.
201 */
202static int ofld_disable = 0;
203TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
204SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
205    "disable ULP offload");
206
207/*
208 * The driver uses an auto-queue algorithm by default.
209 * To disable it and force a single queue-set per port, use singleq = 1.
210 */
211static int singleq = 1;
212TUNABLE_INT("hw.cxgb.singleq", &singleq);
213SYSCTL_UINT(_hw_cxgb, OID_AUTO, singleq, CTLFLAG_RDTUN, &singleq, 0,
214    "use a single queue-set per port");
215
216enum {
217	MAX_TXQ_ENTRIES      = 16384,
218	MAX_CTRL_TXQ_ENTRIES = 1024,
219	MAX_RSPQ_ENTRIES     = 16384,
220	MAX_RX_BUFFERS       = 16384,
221	MAX_RX_JUMBO_BUFFERS = 16384,
222	MIN_TXQ_ENTRIES      = 4,
223	MIN_CTRL_TXQ_ENTRIES = 4,
224	MIN_RSPQ_ENTRIES     = 32,
225	MIN_FL_ENTRIES       = 32
226};
227
228struct filter_info {
229	u32 sip;
230	u32 sip_mask;
231	u32 dip;
232	u16 sport;
233	u16 dport;
234	u32 vlan:12;
235	u32 vlan_prio:3;
236	u32 mac_hit:1;
237	u32 mac_idx:4;
238	u32 mac_vld:1;
239	u32 pkt_type:2;
240	u32 report_filter_id:1;
241	u32 pass:1;
242	u32 rss:1;
243	u32 qset:3;
244	u32 locked:1;
245	u32 valid:1;
246};
247
248enum { FILTER_NO_VLAN_PRI = 7 };
249
250#define PORT_MASK ((1 << MAX_NPORTS) - 1)
251
252/* Table for probing the cards.  The desc field isn't actually used */
253struct cxgb_ident {
254	uint16_t	vendor;
255	uint16_t	device;
256	int		index;
257	char		*desc;
258} cxgb_identifiers[] = {
259	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
260	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
261	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
262	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
263	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
264	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
265	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
266	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
267	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
268	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
269	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
270	{0, 0, 0, NULL}
271};
272
273
274static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
275
276static inline char
277t3rev2char(struct adapter *adapter)
278{
279	char rev = 'z';
280
281	switch(adapter->params.rev) {
282	case T3_REV_A:
283		rev = 'a';
284		break;
285	case T3_REV_B:
286	case T3_REV_B2:
287		rev = 'b';
288		break;
289	case T3_REV_C:
290		rev = 'c';
291		break;
292	}
293	return rev;
294}
295
296static struct cxgb_ident *
297cxgb_get_ident(device_t dev)
298{
299	struct cxgb_ident *id;
300
301	for (id = cxgb_identifiers; id->desc != NULL; id++) {
302		if ((id->vendor == pci_get_vendor(dev)) &&
303		    (id->device == pci_get_device(dev))) {
304			return (id);
305		}
306	}
307	return (NULL);
308}
309
310static const struct adapter_info *
311cxgb_get_adapter_info(device_t dev)
312{
313	struct cxgb_ident *id;
314	const struct adapter_info *ai;
315
316	id = cxgb_get_ident(dev);
317	if (id == NULL)
318		return (NULL);
319
320	ai = t3_get_adapter_info(id->index);
321
322	return (ai);
323}
324
325static int
326cxgb_controller_probe(device_t dev)
327{
328	const struct adapter_info *ai;
329	char *ports, buf[80];
330	int nports;
331
332	ai = cxgb_get_adapter_info(dev);
333	if (ai == NULL)
334		return (ENXIO);
335
336	nports = ai->nports0 + ai->nports1;
337	if (nports == 1)
338		ports = "port";
339	else
340		ports = "ports";
341
342	snprintf(buf, sizeof(buf), "%s RNIC, %d %s", ai->desc, nports, ports);
343	device_set_desc_copy(dev, buf);
344	return (BUS_PROBE_DEFAULT);
345}
346
347#define FW_FNAME "t3fw%d%d%d"
348#define TPEEPROM_NAME "t3%ctpe%d%d%d"
349#define TPSRAM_NAME "t3%cps%d%d%d"
350
351static int
352upgrade_fw(adapter_t *sc)
353{
354	char buf[32];
355#ifdef FIRMWARE_LATEST
356	const struct firmware *fw;
357#else
358	struct firmware *fw;
359#endif
360	int status;
361
362	snprintf(&buf[0], sizeof(buf), FW_FNAME,  FW_VERSION_MAJOR,
363	    FW_VERSION_MINOR, FW_VERSION_MICRO);
364
365	fw = firmware_get(buf);
366
367	if (fw == NULL) {
368		device_printf(sc->dev, "Could not find firmware image %s\n", buf);
369		return (ENOENT);
370	} else
371		device_printf(sc->dev, "updating firmware on card with %s\n", buf);
372	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
373
374	device_printf(sc->dev, "firmware update returned %s %d\n", (status == 0) ? "success" : "fail", status);
375
376	firmware_put(fw, FIRMWARE_UNLOAD);
377
378	return (status);
379}
380
381static int
382cxgb_controller_attach(device_t dev)
383{
384	device_t child;
385	const struct adapter_info *ai;
386	struct adapter *sc;
387	int i, reg, msi_needed, error = 0;
388	uint32_t vers;
389	int port_qsets = 1;
390
391	sc = device_get_softc(dev);
392	sc->dev = dev;
393	sc->msi_count = 0;
394
395	/* find the PCIe link width and set max read request to 4KB*/
396	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
397		uint16_t lnk, pectl;
398		lnk = pci_read_config(dev, reg + 0x12, 2);
399		sc->link_width = (lnk >> 4) & 0x3f;
400
401		pectl = pci_read_config(dev, reg + 0x8, 2);
402		pectl = (pectl & ~0x7000) | (5 << 12);
403		pci_write_config(dev, reg + 0x8, pectl, 2);
404	}
405
406	ai = cxgb_get_adapter_info(dev);
407	if (sc->link_width != 0 && sc->link_width <= 4 &&
408	    (ai->nports0 + ai->nports1) <= 2) {
409		device_printf(sc->dev,
410		    "PCIe x%d Link, expect reduced performance\n",
411		    sc->link_width);
412	}
413
414	pci_enable_busmaster(dev);
415	/*
416	 * Allocate the registers and make them available to the driver.
417	 * The registers that we care about for NIC mode are in BAR 0
418	 */
419	sc->regs_rid = PCIR_BAR(0);
420	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
421	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
422		device_printf(dev, "Cannot allocate BAR\n");
423		return (ENXIO);
424	}
425
426	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
427	    device_get_unit(dev));
428	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
429
430	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
431	    device_get_unit(dev));
432	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
433	    device_get_unit(dev));
434	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
435	    device_get_unit(dev));
436
437	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_DEF);
438	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
439	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
440
441	sc->bt = rman_get_bustag(sc->regs_res);
442	sc->bh = rman_get_bushandle(sc->regs_res);
443	sc->mmio_len = rman_get_size(sc->regs_res);
444
445	if (t3_prep_adapter(sc, ai, 1) < 0) {
446		printf("prep adapter failed\n");
447		error = ENODEV;
448		goto out;
449	}
450	/* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
451	 * enough messages for the queue sets.  If that fails, try falling
452	 * back to MSI.  If that fails, then try falling back to the legacy
453	 * interrupt pin model.
454	 */
455#ifdef MSI_SUPPORTED
456
457	sc->msix_regs_rid = 0x20;
458	if ((msi_allowed >= 2) &&
459	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
460	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
461
462		msi_needed = sc->msi_count = SGE_MSIX_COUNT;
463
464		if (((error = pci_alloc_msix(dev, &sc->msi_count)) != 0) ||
465		    (sc->msi_count != msi_needed)) {
466			device_printf(dev, "msix allocation failed - msi_count = %d"
467			    " msi_needed=%d will try msi err=%d\n", sc->msi_count,
468			    msi_needed, error);
469			sc->msi_count = 0;
470			pci_release_msi(dev);
471			bus_release_resource(dev, SYS_RES_MEMORY,
472			    sc->msix_regs_rid, sc->msix_regs_res);
473			sc->msix_regs_res = NULL;
474		} else {
475			sc->flags |= USING_MSIX;
476			sc->cxgb_intr = t3_intr_msix;
477		}
478	}
479
480	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
481		sc->msi_count = 1;
482		if (pci_alloc_msi(dev, &sc->msi_count)) {
483			device_printf(dev, "alloc msi failed - will try INTx\n");
484			sc->msi_count = 0;
485			pci_release_msi(dev);
486		} else {
487			sc->flags |= USING_MSI;
488			sc->irq_rid = 1;
489			sc->cxgb_intr = t3_intr_msi;
490		}
491	}
492#endif
493	if (sc->msi_count == 0) {
494		device_printf(dev, "using line interrupts\n");
495		sc->irq_rid = 0;
496		sc->cxgb_intr = t3b_intr;
497	}
498
499
500	/* Create a private taskqueue thread for handling driver events */
501#ifdef TASKQUEUE_CURRENT
502	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
503	    taskqueue_thread_enqueue, &sc->tq);
504#else
505	sc->tq = taskqueue_create_fast("cxgb_taskq", M_NOWAIT,
506	    taskqueue_thread_enqueue, &sc->tq);
507#endif
508	if (sc->tq == NULL) {
509		device_printf(dev, "failed to allocate controller task queue\n");
510		goto out;
511	}
512
513	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
514	    device_get_nameunit(dev));
515	TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
516	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
517
518
519	/* Create a periodic callout for checking adapter status */
520	callout_init(&sc->cxgb_tick_ch, TRUE);
521
522	if (t3_check_fw_version(sc) != 0) {
523		/*
524		 * Warn user that a firmware update will be attempted in init.
525		 */
526		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
527		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
528		sc->flags &= ~FW_UPTODATE;
529	} else {
530		sc->flags |= FW_UPTODATE;
531	}
532
533	if (t3_check_tpsram_version(sc) != 0) {
534		/*
535		 * Warn user that a firmware update will be attempted in init.
536		 */
537		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
538		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
539		sc->flags &= ~TPS_UPTODATE;
540	} else {
541		sc->flags |= TPS_UPTODATE;
542	}
543
544	if ((sc->flags & USING_MSIX) && !singleq)
545		port_qsets = min((SGE_QSETS/(sc)->params.nports), mp_ncpus);
546
547	/*
548	 * Create a child device for each MAC.  The ethernet attachment
549	 * will be done in these children.
550	 */
551	for (i = 0; i < (sc)->params.nports; i++) {
552		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
553			device_printf(dev, "failed to add child port\n");
554			error = EINVAL;
555			goto out;
556		}
557		sc->port[i].adapter = sc;
558		sc->port[i].nqsets = port_qsets;
559		sc->port[i].first_qset = i*port_qsets;
560		sc->port[i].port_id = i;
561		sc->portdev[i] = child;
562		device_set_softc(child, &sc->port[i]);
563	}
564	if ((error = bus_generic_attach(dev)) != 0)
565		goto out;
566
567	/*
568	 * XXX need to poll for link status
569	 */
570	sc->params.stats_update_period = 1;
571
572	/* initialize sge private state */
573	t3_sge_init_adapter(sc);
574
575	t3_led_ready(sc);
576
577	cxgb_offload_init();
578	if (is_offload(sc)) {
579		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
580		cxgb_adapter_ofld(sc);
581        }
582	error = t3_get_fw_version(sc, &vers);
583	if (error)
584		goto out;
585
586	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
587	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
588	    G_FW_VERSION_MICRO(vers));
589
590	t3_add_sysctls(sc);
591out:
592	if (error)
593		cxgb_free(sc);
594
595	return (error);
596}
597
598static int
599cxgb_controller_detach(device_t dev)
600{
601	struct adapter *sc;
602
603	sc = device_get_softc(dev);
604
605	cxgb_free(sc);
606
607	return (0);
608}
609
610static void
611cxgb_free(struct adapter *sc)
612{
613	int i;
614
615	ADAPTER_LOCK(sc);
616	/*
617	 * drops the lock
618	 */
619	cxgb_down_locked(sc);
620
621#ifdef MSI_SUPPORTED
622	if (sc->flags & (USING_MSI | USING_MSIX)) {
623		device_printf(sc->dev, "releasing msi message(s)\n");
624		pci_release_msi(sc->dev);
625	} else {
626		device_printf(sc->dev, "no msi message to release\n");
627	}
628#endif
629	if (sc->msix_regs_res != NULL) {
630		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
631		    sc->msix_regs_res);
632	}
633
634	t3_sge_deinit_sw(sc);
635
636	if (sc->tq != NULL) {
637		taskqueue_drain(sc->tq, &sc->ext_intr_task);
638		taskqueue_drain(sc->tq, &sc->tick_task);
639		taskqueue_free(sc->tq);
640	}
641
642	tsleep(&sc, 0, "cxgb unload", hz);
643
644	for (i = 0; i < (sc)->params.nports; ++i) {
645		if (sc->portdev[i] != NULL)
646			device_delete_child(sc->dev, sc->portdev[i]);
647	}
648
649	bus_generic_detach(sc->dev);
650#ifdef notyet
651	if (is_offload(sc)) {
652		cxgb_adapter_unofld(sc);
653		if (isset(&sc->open_device_map,	OFFLOAD_DEVMAP_BIT))
654			offload_close(&sc->tdev);
655	}
656#endif
657	t3_free_sge_resources(sc);
658	free(sc->filters, M_DEVBUF);
659	t3_sge_free(sc);
660
661	cxgb_offload_exit();
662
663	if (sc->regs_res != NULL)
664		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
665		    sc->regs_res);
666
667	MTX_DESTROY(&sc->mdio_lock);
668	MTX_DESTROY(&sc->sge.reg_lock);
669	MTX_DESTROY(&sc->elmer_lock);
670	ADAPTER_LOCK_DEINIT(sc);
671
672	return;
673}
674
675
676
677static int
678alloc_filters(struct adapter *adap)
679{
680	struct filter_info *p;
681	int nfilters;
682
683	if ((nfilters = adap->params.mc5.nfilters) == 0)
684		return (0);
685
686	adap->filters = malloc(nfilters*sizeof(struct filter_info),
687	    M_DEVBUF, M_ZERO|M_WAITOK);
688
689	if (adap->filters == NULL)
690		return (ENOMEM);
691
692	/* Set the default filters, only need to set non-0 fields here. */
693	p = &adap->filters[nfilters - 1];
694	p->vlan = 0xfff;
695	p->vlan_prio = FILTER_NO_VLAN_PRI;
696	p->pass = p->rss = p->valid = p->locked = 1;
697
698	return (0);
699}
700
701static inline void
702set_tcb_field_ulp(struct cpl_set_tcb_field *req,
703    unsigned int tid, unsigned int word,
704    uint64_t mask, uint64_t val)
705{
706	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
707
708	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
709	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
710	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
711	req->reply = V_NO_REPLY(1);
712	req->cpu_idx = 0;
713	req->word = htons(word);
714	req->mask = htobe64(mask);
715	req->val = htobe64(val);
716}
717
718static int
719set_filter(struct adapter *adap, int id, const struct filter_info *f)
720{
721	int len;
722	struct mbuf *m;
723	struct ulp_txpkt *txpkt;
724	struct work_request_hdr *wr;
725	struct cpl_pass_open_req *oreq;
726	struct cpl_set_tcb_field *sreq;
727
728	len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
729	id += t3_mc5_size(&adap->mc5) - adap->params.mc5.nroutes -
730	      adap->params.mc5.nfilters;
731
732	m = m_gethdr(M_TRYWAIT, MT_DATA);
733	wr = mtod(m, struct work_request_hdr *);
734	wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
735	m->m_len = m->m_pkthdr.len = len;
736
737	oreq = (struct cpl_pass_open_req *)(wr + 1);
738	txpkt = (struct ulp_txpkt *)oreq;
739	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
740	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
741	OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
742	oreq->local_port = htons(f->dport);
743	oreq->peer_port = htons(f->sport);
744	oreq->local_ip = htonl(f->dip);
745	oreq->peer_ip = htonl(f->sip);
746	oreq->peer_netmask = htonl(f->sip_mask);
747	oreq->opt0h = 0;
748	oreq->opt0l = htonl(F_NO_OFFLOAD);
749	oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
750			 V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
751			 V_VLAN_PRI(f->vlan_prio >> 1) |
752			 V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
753			 V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
754			 V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
755
756	sreq = (struct cpl_set_tcb_field *)(oreq + 1);
757	set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
758			  (f->report_filter_id << 15) | (1 << 23) |
759			  ((u64)f->pass << 35) | ((u64)!f->rss << 36));
760	set_tcb_field_ulp(sreq + 1, id, 25, 0x3f80000,
761			  (u64)adap->rrss_map[f->qset] << 19);
762	t3_mgmt_tx(adap, m);
763	return 0;
764}
765
766static int
767setup_hw_filters(struct adapter *adap)
768{
769	int i, err;
770
771	if (adap->filters == NULL)
772		return 0;
773
774	t3_enable_filters(adap);
775
776	for (i = err = 0; i < adap->params.mc5.nfilters && !err; i++)
777		if (adap->filters[i].locked)
778			err = set_filter(adap, i, &adap->filters[i]);
779	return err;
780}
781
782/**
783 *	setup_sge_qsets - configure SGE Tx/Rx/response queues
784 *	@sc: the controller softc
785 *
786 *	Determines how many sets of SGE queues to use and initializes them.
787 *	We support multiple queue sets per port if we have MSI-X, otherwise
788 *	just one queue set per port.
789 */
790static int
791setup_sge_qsets(adapter_t *sc)
792{
793	int i, j, err, irq_idx, qset_idx;
794	u_int ntxq = SGE_TXQ_PER_SET;
795
796	if ((err = t3_sge_alloc(sc)) != 0) {
797		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
798		return (err);
799	}
800
801	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
802		irq_idx = -1;
803	else
804		irq_idx = 0;
805
806	for (qset_idx = 0, i = 0; i < (sc)->params.nports; ++i) {
807		struct port_info *pi = &sc->port[i];
808
809		for (j = 0; j < pi->nqsets; ++j, ++qset_idx) {
810			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
811			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
812			    &sc->params.sge.qset[qset_idx], ntxq, pi);
813			if (err) {
814				t3_free_sge_resources(sc);
815				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n", err);
816				return (err);
817			}
818		}
819	}
820
821	return (0);
822}
823
824static void
825cxgb_teardown_msix(adapter_t *sc)
826{
827	int i, nqsets;
828
829	for (nqsets = i = 0; i < (sc)->params.nports; i++)
830		nqsets += sc->port[i].nqsets;
831
832	for (i = 0; i < nqsets; i++) {
833		if (sc->msix_intr_tag[i] != NULL) {
834			bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
835			    sc->msix_intr_tag[i]);
836			sc->msix_intr_tag[i] = NULL;
837		}
838		if (sc->msix_irq_res[i] != NULL) {
839			bus_release_resource(sc->dev, SYS_RES_IRQ,
840			    sc->msix_irq_rid[i], sc->msix_irq_res[i]);
841			sc->msix_irq_res[i] = NULL;
842		}
843	}
844}
845
846static int
847cxgb_setup_msix(adapter_t *sc, int msix_count)
848{
849	int i, j, k, nqsets, rid;
850
851	/* The first message indicates link changes and error conditions */
852	sc->irq_rid = 1;
853	if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
854	   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
855		device_printf(sc->dev, "Cannot allocate msix interrupt\n");
856		return (EINVAL);
857	}
858
859	if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
860#ifdef INTR_FILTERS
861			NULL,
862#endif
863		cxgb_async_intr, sc, &sc->intr_tag)) {
864		device_printf(sc->dev, "Cannot set up interrupt\n");
865		return (EINVAL);
866	}
867	for (i = k = 0; i < (sc)->params.nports; i++) {
868		nqsets = sc->port[i].nqsets;
869		for (j = 0; j < nqsets; j++, k++) {
870			struct sge_qset *qs = &sc->sge.qs[k];
871
872			rid = k + 2;
873			if (cxgb_debug)
874				printf("rid=%d ", rid);
875			if ((sc->msix_irq_res[k] = bus_alloc_resource_any(
876			    sc->dev, SYS_RES_IRQ, &rid,
877			    RF_SHAREABLE | RF_ACTIVE)) == NULL) {
878				device_printf(sc->dev, "Cannot allocate "
879				    "interrupt for message %d\n", rid);
880				return (EINVAL);
881			}
882			sc->msix_irq_rid[k] = rid;
883			if (bus_setup_intr(sc->dev, sc->msix_irq_res[k],
884			    INTR_MPSAFE|INTR_TYPE_NET,
885#ifdef INTR_FILTERS
886			NULL,
887#endif
888				t3_intr_msix, qs, &sc->msix_intr_tag[k])) {
889				device_printf(sc->dev, "Cannot set up "
890				    "interrupt for message %d\n", rid);
891				return (EINVAL);
892			}
893		}
894	}
895
896
897	return (0);
898}
899
900static int
901cxgb_port_probe(device_t dev)
902{
903	struct port_info *p;
904	char buf[80];
905
906	p = device_get_softc(dev);
907
908	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, p->port_type->desc);
909	device_set_desc_copy(dev, buf);
910	return (0);
911}
912
913
914static int
915cxgb_makedev(struct port_info *pi)
916{
917
918	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
919	    UID_ROOT, GID_WHEEL, 0600, if_name(pi->ifp));
920
921	if (pi->port_cdev == NULL)
922		return (ENOMEM);
923
924	pi->port_cdev->si_drv1 = (void *)pi;
925
926	return (0);
927}
928
929
930#ifdef TSO_SUPPORTED
931#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU)
932/* Don't enable TSO6 yet */
933#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU)
934#else
935#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_JUMBO_MTU)
936/* Don't enable TSO6 yet */
937#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM |  IFCAP_JUMBO_MTU)
938#define IFCAP_TSO4 0x0
939#define CSUM_TSO   0x0
940#endif
941
942
943static int
944cxgb_port_attach(device_t dev)
945{
946	struct port_info *p;
947	struct ifnet *ifp;
948	int err, media_flags;
949
950	p = device_get_softc(dev);
951
952	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
953	    device_get_unit(device_get_parent(dev)), p->port_id);
954	PORT_LOCK_INIT(p, p->lockbuf);
955
956	/* Allocate an ifnet object and set it up */
957	ifp = p->ifp = if_alloc(IFT_ETHER);
958	if (ifp == NULL) {
959		device_printf(dev, "Cannot allocate ifnet\n");
960		return (ENOMEM);
961	}
962
963	/*
964	 * Note that there is currently no watchdog timer.
965	 */
966	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
967	ifp->if_init = cxgb_init;
968	ifp->if_softc = p;
969	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
970	ifp->if_ioctl = cxgb_ioctl;
971	ifp->if_start = cxgb_start;
972	ifp->if_timer = 0;	/* Disable ifnet watchdog */
973	ifp->if_watchdog = NULL;
974
975	ifp->if_snd.ifq_drv_maxlen = TX_ETH_Q_SIZE;
976	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
977	IFQ_SET_READY(&ifp->if_snd);
978
979	ifp->if_hwassist = ifp->if_capabilities = ifp->if_capenable = 0;
980	ifp->if_capabilities |= CXGB_CAP;
981	ifp->if_capenable |= CXGB_CAP_ENABLE;
982	ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO);
983	/*
984	 * disable TSO on 4-port - it isn't supported by the firmware yet
985	 */
986	if (p->adapter->params.nports > 2) {
987		ifp->if_capabilities &= ~(IFCAP_TSO4 | IFCAP_TSO6);
988		ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_TSO6);
989		ifp->if_hwassist &= ~CSUM_TSO;
990	}
991
992	ether_ifattach(ifp, p->hw_addr);
993	/*
994	 * Only default to jumbo frames on 10GigE
995	 */
996	if (p->adapter->params.nports <= 2)
997		ifp->if_mtu = 9000;
998	if ((err = cxgb_makedev(p)) != 0) {
999		printf("makedev failed %d\n", err);
1000		return (err);
1001	}
1002	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1003	    cxgb_media_status);
1004
1005	if (!strcmp(p->port_type->desc, "10GBASE-CX4")) {
1006		media_flags = IFM_ETHER | IFM_10G_CX4 | IFM_FDX;
1007	} else if (!strcmp(p->port_type->desc, "10GBASE-SR")) {
1008		media_flags = IFM_ETHER | IFM_10G_SR | IFM_FDX;
1009	} else if (!strcmp(p->port_type->desc, "10GBASE-XR")) {
1010		media_flags = IFM_ETHER | IFM_10G_LR | IFM_FDX;
1011	} else if (!strcmp(p->port_type->desc, "10/100/1000BASE-T")) {
1012		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T, 0, NULL);
1013		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T | IFM_FDX,
1014			    0, NULL);
1015		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX,
1016			    0, NULL);
1017		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
1018			    0, NULL);
1019		ifmedia_add(&p->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
1020			    0, NULL);
1021		media_flags = 0;
1022	} else {
1023	        printf("unsupported media type %s\n", p->port_type->desc);
1024		return (ENXIO);
1025	}
1026	if (media_flags) {
1027		ifmedia_add(&p->media, media_flags, 0, NULL);
1028		ifmedia_set(&p->media, media_flags);
1029	} else {
1030		ifmedia_add(&p->media, IFM_ETHER | IFM_AUTO, 0, NULL);
1031		ifmedia_set(&p->media, IFM_ETHER | IFM_AUTO);
1032	}
1033
1034
1035	snprintf(p->taskqbuf, TASKQ_NAME_LEN, "cxgb_port_taskq%d", p->port_id);
1036#ifdef TASKQUEUE_CURRENT
1037	/* Create a port for handling TX without starvation */
1038	p->tq = taskqueue_create(p->taskqbuf, M_NOWAIT,
1039	    taskqueue_thread_enqueue, &p->tq);
1040#else
1041	/* Create a port for handling TX without starvation */
1042	p->tq = taskqueue_create_fast(buf, M_NOWAIT,
1043	    taskqueue_thread_enqueue, &p->tq);
1044#endif
1045
1046	if (p->tq == NULL) {
1047		device_printf(dev, "failed to allocate port task queue\n");
1048		return (ENOMEM);
1049	}
1050	taskqueue_start_threads(&p->tq, 1, PI_NET, "%s taskq",
1051	    device_get_nameunit(dev));
1052	TASK_INIT(&p->start_task, 0, cxgb_start_proc, ifp);
1053
1054	t3_sge_init_port(p);
1055
1056	return (0);
1057}
1058
1059static int
1060cxgb_port_detach(device_t dev)
1061{
1062	struct port_info *p;
1063
1064	p = device_get_softc(dev);
1065
1066	PORT_LOCK(p);
1067	if (p->ifp->if_drv_flags & IFF_DRV_RUNNING)
1068		cxgb_stop_locked(p);
1069	PORT_UNLOCK(p);
1070
1071	if (p->tq != NULL) {
1072		taskqueue_drain(p->tq, &p->start_task);
1073		taskqueue_free(p->tq);
1074		p->tq = NULL;
1075	}
1076
1077	PORT_LOCK_DEINIT(p);
1078	ether_ifdetach(p->ifp);
1079	if_free(p->ifp);
1080
1081	if (p->port_cdev != NULL)
1082		destroy_dev(p->port_cdev);
1083
1084	return (0);
1085}
1086
1087void
1088t3_fatal_err(struct adapter *sc)
1089{
1090	u_int fw_status[4];
1091
1092	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1093	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1094		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1095		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1096}
1097
1098int
1099t3_os_find_pci_capability(adapter_t *sc, int cap)
1100{
1101	device_t dev;
1102	struct pci_devinfo *dinfo;
1103	pcicfgregs *cfg;
1104	uint32_t status;
1105	uint8_t ptr;
1106
1107	dev = sc->dev;
1108	dinfo = device_get_ivars(dev);
1109	cfg = &dinfo->cfg;
1110
1111	status = pci_read_config(dev, PCIR_STATUS, 2);
1112	if (!(status & PCIM_STATUS_CAPPRESENT))
1113		return (0);
1114
1115	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1116	case 0:
1117	case 1:
1118		ptr = PCIR_CAP_PTR;
1119		break;
1120	case 2:
1121		ptr = PCIR_CAP_PTR_2;
1122		break;
1123	default:
1124		return (0);
1125		break;
1126	}
1127	ptr = pci_read_config(dev, ptr, 1);
1128
1129	while (ptr != 0) {
1130		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1131			return (ptr);
1132		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1133	}
1134
1135	return (0);
1136}
1137
1138int
1139t3_os_pci_save_state(struct adapter *sc)
1140{
1141	device_t dev;
1142	struct pci_devinfo *dinfo;
1143
1144	dev = sc->dev;
1145	dinfo = device_get_ivars(dev);
1146
1147	pci_cfg_save(dev, dinfo, 0);
1148	return (0);
1149}
1150
1151int
1152t3_os_pci_restore_state(struct adapter *sc)
1153{
1154	device_t dev;
1155	struct pci_devinfo *dinfo;
1156
1157	dev = sc->dev;
1158	dinfo = device_get_ivars(dev);
1159
1160	pci_cfg_restore(dev, dinfo);
1161	return (0);
1162}
1163
1164/**
1165 *	t3_os_link_changed - handle link status changes
1166 *	@adapter: the adapter associated with the link change
1167 *	@port_id: the port index whose limk status has changed
1168 *	@link_stat: the new status of the link
1169 *	@speed: the new speed setting
1170 *	@duplex: the new duplex setting
1171 *	@fc: the new flow-control setting
1172 *
1173 *	This is the OS-dependent handler for link status changes.  The OS
1174 *	neutral handler takes care of most of the processing for these events,
1175 *	then calls this handler for any OS-specific processing.
1176 */
1177void
1178t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1179     int duplex, int fc)
1180{
1181	struct port_info *pi = &adapter->port[port_id];
1182	struct cmac *mac = &adapter->port[port_id].mac;
1183
1184	if ((pi->ifp->if_flags & IFF_UP) == 0)
1185		return;
1186
1187	if (link_status) {
1188		t3_mac_enable(mac, MAC_DIRECTION_RX);
1189		if_link_state_change(pi->ifp, LINK_STATE_UP);
1190	} else {
1191		if_link_state_change(pi->ifp, LINK_STATE_DOWN);
1192		pi->phy.ops->power_down(&pi->phy, 1);
1193		t3_mac_disable(mac, MAC_DIRECTION_RX);
1194		t3_link_start(&pi->phy, mac, &pi->link_config);
1195	}
1196}
1197
1198/*
1199 * Interrupt-context handler for external (PHY) interrupts.
1200 */
1201void
1202t3_os_ext_intr_handler(adapter_t *sc)
1203{
1204	if (cxgb_debug)
1205		printf("t3_os_ext_intr_handler\n");
1206	/*
1207	 * Schedule a task to handle external interrupts as they may be slow
1208	 * and we use a mutex to protect MDIO registers.  We disable PHY
1209	 * interrupts in the meantime and let the task reenable them when
1210	 * it's done.
1211	 */
1212	ADAPTER_LOCK(sc);
1213	if (sc->slow_intr_mask) {
1214		sc->slow_intr_mask &= ~F_T3DBG;
1215		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1216		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
1217	}
1218	ADAPTER_UNLOCK(sc);
1219}
1220
1221void
1222t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1223{
1224
1225	/*
1226	 * The ifnet might not be allocated before this gets called,
1227	 * as this is called early on in attach by t3_prep_adapter
1228	 * save the address off in the port structure
1229	 */
1230	if (cxgb_debug)
1231		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1232	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1233}
1234
1235/**
1236 *	link_start - enable a port
1237 *	@p: the port to enable
1238 *
1239 *	Performs the MAC and PHY actions needed to enable a port.
1240 */
1241static void
1242cxgb_link_start(struct port_info *p)
1243{
1244	struct ifnet *ifp;
1245	struct t3_rx_mode rm;
1246	struct cmac *mac = &p->mac;
1247
1248	ifp = p->ifp;
1249
1250	t3_init_rx_mode(&rm, p);
1251	t3_mac_reset(mac);
1252	t3_mac_set_mtu(mac, ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
1253	t3_mac_set_address(mac, 0, p->hw_addr);
1254	t3_mac_set_rx_mode(mac, &rm);
1255	t3_link_start(&p->phy, mac, &p->link_config);
1256	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1257}
1258
1259/**
1260 *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1261 *	@adap: the adapter
1262 *
1263 *	Sets up RSS to distribute packets to multiple receive queues.  We
1264 *	configure the RSS CPU lookup table to distribute to the number of HW
1265 *	receive queues, and the response queue lookup table to narrow that
1266 *	down to the response queues actually configured for each port.
1267 *	We always configure the RSS mapping for two ports since the mapping
1268 *	table has plenty of entries.
1269 */
1270static void
1271setup_rss(adapter_t *adap)
1272{
1273	int i;
1274	u_int nq[2];
1275	uint8_t cpus[SGE_QSETS + 1];
1276	uint16_t rspq_map[RSS_TABLE_SIZE];
1277
1278	nq[0] = adap->port[0].nqsets;
1279	nq[1] = max((u_int)adap->port[1].nqsets, 1U);
1280
1281	for (i = 0; i < SGE_QSETS; ++i)
1282		cpus[i] = i;
1283	cpus[SGE_QSETS] = 0xff;
1284
1285	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1286		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1287		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1288	}
1289	/* Calculate the reverse RSS map table */
1290	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1291		if (adap->rrss_map[rspq_map[i]] == 0xff)
1292			adap->rrss_map[rspq_map[i]] = i;
1293
1294	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1295		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1296		      F_RRCPLMAPEN | V_RRCPLCPUSIZE(6), cpus, rspq_map);
1297
1298}
1299
1300/*
1301 * Sends an mbuf to an offload queue driver
1302 * after dealing with any active network taps.
1303 */
1304static inline int
1305offload_tx(struct toedev *tdev, struct mbuf *m)
1306{
1307	int ret;
1308
1309	critical_enter();
1310	ret = t3_offload_tx(tdev, m);
1311	critical_exit();
1312	return (ret);
1313}
1314
1315static int
1316write_smt_entry(struct adapter *adapter, int idx)
1317{
1318	struct port_info *pi = &adapter->port[idx];
1319	struct cpl_smt_write_req *req;
1320	struct mbuf *m;
1321
1322	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1323		return (ENOMEM);
1324
1325	req = mtod(m, struct cpl_smt_write_req *);
1326	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1327	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1328	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1329	req->iff = idx;
1330	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1331	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1332
1333	m_set_priority(m, 1);
1334
1335	offload_tx(&adapter->tdev, m);
1336
1337	return (0);
1338}
1339
1340static int
1341init_smt(struct adapter *adapter)
1342{
1343	int i;
1344
1345	for_each_port(adapter, i)
1346		write_smt_entry(adapter, i);
1347	return 0;
1348}
1349
1350static void
1351init_port_mtus(adapter_t *adapter)
1352{
1353	unsigned int mtus = adapter->port[0].ifp->if_mtu;
1354
1355	if (adapter->port[1].ifp)
1356		mtus |= adapter->port[1].ifp->if_mtu << 16;
1357	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1358}
1359
1360static void
1361send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1362			      int hi, int port)
1363{
1364	struct mbuf *m;
1365	struct mngt_pktsched_wr *req;
1366
1367	m = m_gethdr(M_DONTWAIT, MT_DATA);
1368	if (m) {
1369		req = mtod(m, struct mngt_pktsched_wr *);
1370		req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1371		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1372		req->sched = sched;
1373		req->idx = qidx;
1374		req->min = lo;
1375		req->max = hi;
1376		req->binding = port;
1377		m->m_len = m->m_pkthdr.len = sizeof(*req);
1378		t3_mgmt_tx(adap, m);
1379	}
1380}
1381
1382static void
1383bind_qsets(adapter_t *sc)
1384{
1385	int i, j;
1386
1387	for (i = 0; i < (sc)->params.nports; ++i) {
1388		const struct port_info *pi = adap2pinfo(sc, i);
1389
1390		for (j = 0; j < pi->nqsets; ++j)
1391			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1392					  -1, i);
1393	}
1394}
1395
1396static void
1397update_tpeeprom(struct adapter *adap)
1398{
1399	const struct firmware *tpeeprom;
1400	char buf[64];
1401	uint32_t version;
1402	unsigned int major, minor;
1403	int ret, len;
1404	char rev;
1405
1406	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1407
1408	major = G_TP_VERSION_MAJOR(version);
1409	minor = G_TP_VERSION_MINOR(version);
1410	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1411		return;
1412
1413	rev = t3rev2char(adap);
1414
1415	snprintf(buf, sizeof(buf), TPEEPROM_NAME, rev,
1416		 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1417
1418	tpeeprom = firmware_get(buf);
1419	if (tpeeprom == NULL) {
1420		device_printf(adap->dev, "could not load TP EEPROM: unable to load %s\n",
1421			buf);
1422		return;
1423	}
1424
1425	len = tpeeprom->datasize - 4;
1426
1427	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1428	if (ret)
1429		goto release_tpeeprom;
1430
1431	if (len != TP_SRAM_LEN) {
1432		device_printf(adap->dev, "%s length is wrong len=%d expected=%d\n", buf, len, TP_SRAM_LEN);
1433		return;
1434	}
1435
1436	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1437	    TP_SRAM_OFFSET);
1438
1439	if (!ret) {
1440		device_printf(adap->dev,
1441			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1442			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1443	} else
1444		device_printf(adap->dev, "Protocol SRAM image update in EEPROM failed\n");
1445
1446release_tpeeprom:
1447	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1448
1449	return;
1450}
1451
1452static int
1453update_tpsram(struct adapter *adap)
1454{
1455	const struct firmware *tpsram;
1456	char buf[64];
1457	int ret;
1458	char rev;
1459
1460	rev = t3rev2char(adap);
1461	if (!rev)
1462		return 0;
1463
1464	update_tpeeprom(adap);
1465
1466	snprintf(buf, sizeof(buf), TPSRAM_NAME, rev,
1467		 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1468
1469	tpsram = firmware_get(buf);
1470	if (tpsram == NULL){
1471		device_printf(adap->dev, "could not load TP SRAM: unable to load %s\n",
1472			buf);
1473		return (EINVAL);
1474	} else
1475		device_printf(adap->dev, "updating TP SRAM with %s\n", buf);
1476
1477	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1478	if (ret)
1479		goto release_tpsram;
1480
1481	ret = t3_set_proto_sram(adap, tpsram->data);
1482	if (ret)
1483		device_printf(adap->dev, "loading protocol SRAM failed\n");
1484
1485release_tpsram:
1486	firmware_put(tpsram, FIRMWARE_UNLOAD);
1487
1488	return ret;
1489}
1490
1491/**
1492 *	cxgb_up - enable the adapter
1493 *	@adap: adapter being enabled
1494 *
1495 *	Called when the first port is enabled, this function performs the
1496 *	actions necessary to make an adapter operational, such as completing
1497 *	the initialization of HW modules, and enabling interrupts.
1498 *
1499 */
1500static int
1501cxgb_up(struct adapter *sc)
1502{
1503	int err = 0;
1504
1505	if ((sc->flags & FULL_INIT_DONE) == 0) {
1506
1507		if ((sc->flags & FW_UPTODATE) == 0)
1508			if ((err = upgrade_fw(sc)))
1509				goto out;
1510		if ((sc->flags & TPS_UPTODATE) == 0)
1511			if ((err = update_tpsram(sc)))
1512				goto out;
1513		err = t3_init_hw(sc, 0);
1514		if (err)
1515			goto out;
1516
1517		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1518
1519		err = setup_sge_qsets(sc);
1520		if (err)
1521			goto out;
1522
1523		alloc_filters(sc);
1524		setup_rss(sc);
1525		sc->flags |= FULL_INIT_DONE;
1526	}
1527
1528	t3_intr_clear(sc);
1529
1530	/* If it's MSI or INTx, allocate a single interrupt for everything */
1531	if ((sc->flags & USING_MSIX) == 0) {
1532		if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
1533		   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
1534			device_printf(sc->dev, "Cannot allocate interrupt rid=%d\n", sc->irq_rid);
1535			err = EINVAL;
1536			goto out;
1537		}
1538		device_printf(sc->dev, "allocated irq_res=%p\n", sc->irq_res);
1539
1540		if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
1541#ifdef INTR_FILTERS
1542			NULL,
1543#endif
1544			sc->cxgb_intr, sc, &sc->intr_tag)) {
1545			device_printf(sc->dev, "Cannot set up interrupt\n");
1546			err = EINVAL;
1547			goto irq_err;
1548		}
1549	} else {
1550		cxgb_setup_msix(sc, sc->msi_count);
1551	}
1552
1553	t3_sge_start(sc);
1554	t3_intr_enable(sc);
1555
1556	if ((sc->flags & (USING_MSIX | QUEUES_BOUND)) == USING_MSIX) {
1557		bind_qsets(sc);
1558		setup_hw_filters(sc);
1559		sc->flags |= QUEUES_BOUND;
1560	}
1561out:
1562	return (err);
1563irq_err:
1564	CH_ERR(sc, "request_irq failed, err %d\n", err);
1565	goto out;
1566}
1567
1568
1569/*
1570 * Release resources when all the ports and offloading have been stopped.
1571 */
1572static void
1573cxgb_down_locked(struct adapter *sc)
1574{
1575	int i;
1576
1577	t3_sge_stop(sc);
1578	t3_intr_disable(sc);
1579
1580	if (sc->intr_tag != NULL) {
1581		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
1582		sc->intr_tag = NULL;
1583	}
1584	if (sc->irq_res != NULL) {
1585		device_printf(sc->dev, "de-allocating interrupt irq_rid=%d irq_res=%p\n",
1586		    sc->irq_rid, sc->irq_res);
1587		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
1588		    sc->irq_res);
1589		sc->irq_res = NULL;
1590	}
1591
1592	if (sc->flags & USING_MSIX)
1593		cxgb_teardown_msix(sc);
1594	ADAPTER_UNLOCK(sc);
1595
1596	callout_drain(&sc->cxgb_tick_ch);
1597	callout_drain(&sc->sge_timer_ch);
1598
1599	if (sc->tq != NULL)
1600		taskqueue_drain(sc->tq, &sc->slow_intr_task);
1601	for (i = 0; i < sc->params.nports; i++)
1602		if (sc->port[i].tq != NULL)
1603			taskqueue_drain(sc->port[i].tq, &sc->port[i].timer_reclaim_task);
1604
1605}
1606
1607static int
1608offload_open(struct port_info *pi)
1609{
1610	struct adapter *adapter = pi->adapter;
1611	struct toedev *tdev = TOEDEV(pi->ifp);
1612	int adap_up = adapter->open_device_map & PORT_MASK;
1613	int err = 0;
1614
1615	if (atomic_cmpset_int(&adapter->open_device_map,
1616		(adapter->open_device_map & ~OFFLOAD_DEVMAP_BIT),
1617		(adapter->open_device_map | OFFLOAD_DEVMAP_BIT)) == 0)
1618		return (0);
1619
1620	ADAPTER_LOCK(pi->adapter);
1621	if (!adap_up)
1622		err = cxgb_up(adapter);
1623	ADAPTER_UNLOCK(pi->adapter);
1624	if (err)
1625		return (err);
1626
1627	t3_tp_set_offload_mode(adapter, 1);
1628	tdev->lldev = adapter->port[0].ifp;
1629	err = cxgb_offload_activate(adapter);
1630	if (err)
1631		goto out;
1632
1633	init_port_mtus(adapter);
1634	t3_load_mtus(adapter, adapter->params.mtus, adapter->params.a_wnd,
1635		     adapter->params.b_wnd,
1636		     adapter->params.rev == 0 ?
1637		       adapter->port[0].ifp->if_mtu : 0xffff);
1638	init_smt(adapter);
1639
1640	/* Call back all registered clients */
1641	cxgb_add_clients(tdev);
1642
1643out:
1644	/* restore them in case the offload module has changed them */
1645	if (err) {
1646		t3_tp_set_offload_mode(adapter, 0);
1647		clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1648		cxgb_set_dummy_ops(tdev);
1649	}
1650	return (err);
1651}
1652#ifdef notyet
1653static int
1654offload_close(struct toedev *tdev)
1655{
1656	struct adapter *adapter = tdev2adap(tdev);
1657
1658	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1659		return (0);
1660
1661	/* Call back all registered clients */
1662	cxgb_remove_clients(tdev);
1663	tdev->lldev = NULL;
1664	cxgb_set_dummy_ops(tdev);
1665	t3_tp_set_offload_mode(adapter, 0);
1666	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1667
1668	if (!adapter->open_device_map)
1669		cxgb_down(adapter);
1670
1671	cxgb_offload_deactivate(adapter);
1672	return (0);
1673}
1674#endif
1675
1676static void
1677cxgb_init(void *arg)
1678{
1679	struct port_info *p = arg;
1680
1681	PORT_LOCK(p);
1682	cxgb_init_locked(p);
1683	PORT_UNLOCK(p);
1684}
1685
1686static void
1687cxgb_init_locked(struct port_info *p)
1688{
1689	struct ifnet *ifp;
1690	adapter_t *sc = p->adapter;
1691	int err;
1692
1693	PORT_LOCK_ASSERT_OWNED(p);
1694	ifp = p->ifp;
1695
1696	ADAPTER_LOCK(p->adapter);
1697	if ((sc->open_device_map == 0) && (err = cxgb_up(sc))) {
1698		ADAPTER_UNLOCK(p->adapter);
1699		cxgb_stop_locked(p);
1700		return;
1701	}
1702	if (p->adapter->open_device_map == 0) {
1703		t3_intr_clear(sc);
1704		t3_sge_init_adapter(sc);
1705	}
1706	setbit(&p->adapter->open_device_map, p->port_id);
1707	ADAPTER_UNLOCK(p->adapter);
1708
1709	if (is_offload(sc) && !ofld_disable) {
1710		err = offload_open(p);
1711		if (err)
1712			log(LOG_WARNING,
1713			    "Could not initialize offload capabilities\n");
1714	}
1715	cxgb_link_start(p);
1716	t3_link_changed(sc, p->port_id);
1717	ifp->if_baudrate = p->link_config.speed * 1000000;
1718
1719	t3_port_intr_enable(sc, p->port_id);
1720
1721	callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
1722	    cxgb_tick, sc);
1723
1724	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1725	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1726}
1727
1728static void
1729cxgb_set_rxmode(struct port_info *p)
1730{
1731	struct t3_rx_mode rm;
1732	struct cmac *mac = &p->mac;
1733
1734	PORT_LOCK_ASSERT_OWNED(p);
1735
1736	t3_init_rx_mode(&rm, p);
1737	t3_mac_set_rx_mode(mac, &rm);
1738}
1739
1740static void
1741cxgb_stop_locked(struct port_info *p)
1742{
1743	struct ifnet *ifp;
1744
1745	PORT_LOCK_ASSERT_OWNED(p);
1746	ADAPTER_LOCK_ASSERT_NOTOWNED(p->adapter);
1747
1748	ifp = p->ifp;
1749
1750	t3_port_intr_disable(p->adapter, p->port_id);
1751	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1752	p->phy.ops->power_down(&p->phy, 1);
1753	t3_mac_disable(&p->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
1754
1755	ADAPTER_LOCK(p->adapter);
1756	clrbit(&p->adapter->open_device_map, p->port_id);
1757
1758
1759	if (p->adapter->open_device_map == 0) {
1760		cxgb_down_locked(p->adapter);
1761	} else
1762		ADAPTER_UNLOCK(p->adapter);
1763
1764}
1765
1766static int
1767cxgb_set_mtu(struct port_info *p, int mtu)
1768{
1769	struct ifnet *ifp = p->ifp;
1770	int error = 0;
1771
1772	if ((mtu < ETHERMIN) || (mtu > ETHER_MAX_LEN_JUMBO))
1773		error = EINVAL;
1774	else if (ifp->if_mtu != mtu) {
1775		PORT_LOCK(p);
1776		ifp->if_mtu = mtu;
1777		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1778			callout_stop(&p->adapter->cxgb_tick_ch);
1779			cxgb_stop_locked(p);
1780			cxgb_init_locked(p);
1781		}
1782		PORT_UNLOCK(p);
1783	}
1784	return (error);
1785}
1786
1787static int
1788cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1789{
1790	struct port_info *p = ifp->if_softc;
1791	struct ifaddr *ifa = (struct ifaddr *)data;
1792	struct ifreq *ifr = (struct ifreq *)data;
1793	int flags, error = 0;
1794	uint32_t mask;
1795
1796	/*
1797	 * XXX need to check that we aren't in the middle of an unload
1798	 */
1799	switch (command) {
1800	case SIOCSIFMTU:
1801		error = cxgb_set_mtu(p, ifr->ifr_mtu);
1802		break;
1803	case SIOCSIFADDR:
1804	case SIOCGIFADDR:
1805		PORT_LOCK(p);
1806		if (ifa->ifa_addr->sa_family == AF_INET) {
1807			ifp->if_flags |= IFF_UP;
1808			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1809				cxgb_init_locked(p);
1810			arp_ifinit(ifp, ifa);
1811		} else
1812			error = ether_ioctl(ifp, command, data);
1813		PORT_UNLOCK(p);
1814		break;
1815	case SIOCSIFFLAGS:
1816		callout_drain(&p->adapter->cxgb_tick_ch);
1817		PORT_LOCK(p);
1818		if (ifp->if_flags & IFF_UP) {
1819			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1820				flags = p->if_flags;
1821				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1822				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI))
1823					cxgb_set_rxmode(p);
1824			} else
1825				cxgb_init_locked(p);
1826			p->if_flags = ifp->if_flags;
1827		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1828			cxgb_stop_locked(p);
1829
1830		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1831			adapter_t *sc = p->adapter;
1832			callout_reset(&sc->cxgb_tick_ch,
1833			    sc->params.stats_update_period * hz,
1834			    cxgb_tick, sc);
1835		}
1836		PORT_UNLOCK(p);
1837		break;
1838	case SIOCSIFMEDIA:
1839	case SIOCGIFMEDIA:
1840		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
1841		break;
1842	case SIOCSIFCAP:
1843		PORT_LOCK(p);
1844		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1845		if (mask & IFCAP_TXCSUM) {
1846			if (IFCAP_TXCSUM & ifp->if_capenable) {
1847				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
1848				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
1849				    | CSUM_TSO);
1850			} else {
1851				ifp->if_capenable |= IFCAP_TXCSUM;
1852				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1853			}
1854		} else if (mask & IFCAP_RXCSUM) {
1855			if (IFCAP_RXCSUM & ifp->if_capenable) {
1856				ifp->if_capenable &= ~IFCAP_RXCSUM;
1857			} else {
1858				ifp->if_capenable |= IFCAP_RXCSUM;
1859			}
1860		}
1861		if (mask & IFCAP_TSO4) {
1862			if (IFCAP_TSO4 & ifp->if_capenable) {
1863				ifp->if_capenable &= ~IFCAP_TSO4;
1864				ifp->if_hwassist &= ~CSUM_TSO;
1865			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
1866				ifp->if_capenable |= IFCAP_TSO4;
1867				ifp->if_hwassist |= CSUM_TSO;
1868			} else {
1869				if (cxgb_debug)
1870					printf("cxgb requires tx checksum offload"
1871					    " be enabled to use TSO\n");
1872				error = EINVAL;
1873			}
1874		}
1875		PORT_UNLOCK(p);
1876		break;
1877	default:
1878		error = ether_ioctl(ifp, command, data);
1879		break;
1880	}
1881	return (error);
1882}
1883
1884static int
1885cxgb_start_tx(struct ifnet *ifp, uint32_t txmax)
1886{
1887	struct sge_qset *qs;
1888	struct sge_txq *txq;
1889	struct port_info *p = ifp->if_softc;
1890	struct mbuf *m0, *m = NULL;
1891	int err, in_use_init;
1892
1893	if (!p->link_config.link_ok)
1894		return (ENXIO);
1895
1896	if (IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1897		return (ENOBUFS);
1898
1899	qs = &p->adapter->sge.qs[p->first_qset];
1900	txq = &qs->txq[TXQ_ETH];
1901	err = 0;
1902
1903	if (txq->flags & TXQ_TRANSMITTING)
1904		return (EINPROGRESS);
1905
1906	mtx_lock(&txq->lock);
1907	txq->flags |= TXQ_TRANSMITTING;
1908	in_use_init = txq->in_use;
1909	while ((txq->in_use - in_use_init < txmax) &&
1910	    (txq->size > txq->in_use + TX_MAX_DESC)) {
1911		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
1912		if (m == NULL)
1913			break;
1914		/*
1915		 * Convert chain to M_IOVEC
1916		 */
1917		KASSERT((m->m_flags & M_IOVEC) == 0, ("IOVEC set too early"));
1918		m0 = m;
1919#ifdef INVARIANTS
1920		/*
1921		 * Clean up after net stack sloppiness
1922		 * before calling m_sanity
1923		 */
1924		m0 = m->m_next;
1925		while (m0) {
1926			m0->m_flags &= ~M_PKTHDR;
1927			m0 = m0->m_next;
1928		}
1929		m_sanity(m0, 0);
1930		m0 = m;
1931#endif
1932		if (collapse_mbufs && m->m_pkthdr.len > MCLBYTES &&
1933		    m_collapse(m, TX_MAX_SEGS, &m0) == EFBIG) {
1934			if ((m0 = m_defrag(m, M_NOWAIT)) != NULL) {
1935				m = m0;
1936				m_collapse(m, TX_MAX_SEGS, &m0);
1937			} else
1938				break;
1939		}
1940		m = m0;
1941		if ((err = t3_encap(p, &m)) != 0)
1942			break;
1943		BPF_MTAP(ifp, m);
1944	}
1945	txq->flags &= ~TXQ_TRANSMITTING;
1946	mtx_unlock(&txq->lock);
1947
1948	if (__predict_false(err)) {
1949		if (err == ENOMEM) {
1950			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1951			IFQ_LOCK(&ifp->if_snd);
1952			IFQ_DRV_PREPEND(&ifp->if_snd, m);
1953			IFQ_UNLOCK(&ifp->if_snd);
1954		}
1955	}
1956	if (err == 0 && m == NULL)
1957		err = ENOBUFS;
1958	else if ((err == 0) &&  (txq->size <= txq->in_use + TX_MAX_DESC) &&
1959	    (ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
1960		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1961		err = ENOSPC;
1962	}
1963	return (err);
1964}
1965
1966static void
1967cxgb_start_proc(void *arg, int ncount)
1968{
1969	struct ifnet *ifp = arg;
1970	struct port_info *pi = ifp->if_softc;
1971	struct sge_qset *qs;
1972	struct sge_txq *txq;
1973	int error;
1974
1975	qs = &pi->adapter->sge.qs[pi->first_qset];
1976	txq = &qs->txq[TXQ_ETH];
1977
1978	do {
1979		if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC >> 2)
1980			taskqueue_enqueue(pi->tq, &txq->qreclaim_task);
1981
1982		error = cxgb_start_tx(ifp, TX_START_MAX_DESC);
1983	} while (error == 0);
1984}
1985
1986static void
1987cxgb_start(struct ifnet *ifp)
1988{
1989	struct port_info *pi = ifp->if_softc;
1990	struct sge_qset *qs;
1991	struct sge_txq *txq;
1992	int err;
1993
1994	qs = &pi->adapter->sge.qs[pi->first_qset];
1995	txq = &qs->txq[TXQ_ETH];
1996
1997	if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC >> 2)
1998		taskqueue_enqueue(pi->tq,
1999		    &txq->qreclaim_task);
2000
2001	err = cxgb_start_tx(ifp, TX_START_MAX_DESC);
2002
2003	if (err == 0)
2004		taskqueue_enqueue(pi->tq, &pi->start_task);
2005}
2006
2007
2008static int
2009cxgb_media_change(struct ifnet *ifp)
2010{
2011	if_printf(ifp, "media change not supported\n");
2012	return (ENXIO);
2013}
2014
2015static void
2016cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2017{
2018	struct port_info *p = ifp->if_softc;
2019
2020	ifmr->ifm_status = IFM_AVALID;
2021	ifmr->ifm_active = IFM_ETHER;
2022
2023	if (!p->link_config.link_ok)
2024		return;
2025
2026	ifmr->ifm_status |= IFM_ACTIVE;
2027
2028	switch (p->link_config.speed) {
2029	case 10:
2030		ifmr->ifm_active |= IFM_10_T;
2031		break;
2032	case 100:
2033		ifmr->ifm_active |= IFM_100_TX;
2034			break;
2035	case 1000:
2036		ifmr->ifm_active |= IFM_1000_T;
2037		break;
2038	}
2039
2040	if (p->link_config.duplex)
2041		ifmr->ifm_active |= IFM_FDX;
2042	else
2043		ifmr->ifm_active |= IFM_HDX;
2044}
2045
2046static void
2047cxgb_async_intr(void *data)
2048{
2049	adapter_t *sc = data;
2050
2051	if (cxgb_debug)
2052		device_printf(sc->dev, "cxgb_async_intr\n");
2053	/*
2054	 * May need to sleep - defer to taskqueue
2055	 */
2056	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2057}
2058
2059static void
2060cxgb_ext_intr_handler(void *arg, int count)
2061{
2062	adapter_t *sc = (adapter_t *)arg;
2063
2064	if (cxgb_debug)
2065		printf("cxgb_ext_intr_handler\n");
2066
2067	t3_phy_intr_handler(sc);
2068
2069	/* Now reenable external interrupts */
2070	ADAPTER_LOCK(sc);
2071	if (sc->slow_intr_mask) {
2072		sc->slow_intr_mask |= F_T3DBG;
2073		t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
2074		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
2075	}
2076	ADAPTER_UNLOCK(sc);
2077}
2078
2079static void
2080check_link_status(adapter_t *sc)
2081{
2082	int i;
2083
2084	for (i = 0; i < (sc)->params.nports; ++i) {
2085		struct port_info *p = &sc->port[i];
2086
2087		if (!(p->port_type->caps & SUPPORTED_IRQ))
2088			t3_link_changed(sc, i);
2089		p->ifp->if_baudrate = p->link_config.speed * 1000000;
2090	}
2091}
2092
2093static void
2094check_t3b2_mac(struct adapter *adapter)
2095{
2096	int i;
2097
2098	for_each_port(adapter, i) {
2099		struct port_info *p = &adapter->port[i];
2100		struct ifnet *ifp = p->ifp;
2101		int status;
2102
2103		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2104			continue;
2105
2106		status = 0;
2107		PORT_LOCK(p);
2108		if ((ifp->if_drv_flags & IFF_DRV_RUNNING))
2109			status = t3b2_mac_watchdog_task(&p->mac);
2110		if (status == 1)
2111			p->mac.stats.num_toggled++;
2112		else if (status == 2) {
2113			struct cmac *mac = &p->mac;
2114
2115			t3_mac_set_mtu(mac, ifp->if_mtu + ETHER_HDR_LEN
2116			    + ETHER_VLAN_ENCAP_LEN);
2117			t3_mac_set_address(mac, 0, p->hw_addr);
2118			cxgb_set_rxmode(p);
2119			t3_link_start(&p->phy, mac, &p->link_config);
2120			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2121			t3_port_intr_enable(adapter, p->port_id);
2122			p->mac.stats.num_resets++;
2123		}
2124		PORT_UNLOCK(p);
2125	}
2126}
2127
2128static void
2129cxgb_tick(void *arg)
2130{
2131	adapter_t *sc = (adapter_t *)arg;
2132
2133	taskqueue_enqueue(sc->tq, &sc->tick_task);
2134
2135	if (sc->open_device_map != 0)
2136		callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
2137		    cxgb_tick, sc);
2138}
2139
2140static void
2141cxgb_tick_handler(void *arg, int count)
2142{
2143	adapter_t *sc = (adapter_t *)arg;
2144	const struct adapter_params *p = &sc->params;
2145
2146	ADAPTER_LOCK(sc);
2147	if (p->linkpoll_period)
2148		check_link_status(sc);
2149
2150	/*
2151	 * adapter lock can currently only be acquire after the
2152	 * port lock
2153	 */
2154	ADAPTER_UNLOCK(sc);
2155
2156	if (p->rev == T3_REV_B2 && p->nports < 4)
2157		check_t3b2_mac(sc);
2158}
2159
2160#if 0
2161static void *
2162filter_get_idx(struct seq_file *seq, loff_t pos)
2163{
2164	int i;
2165	struct adapter *adap = seq->private;
2166	struct filter_info *p = adap->filters;
2167
2168	if (!p)
2169		return NULL;
2170
2171	for (i = 0; i < adap->params.mc5.nfilters; i++, p++)
2172		if (p->valid) {
2173			if (!pos)
2174				return p;
2175			pos--;
2176		}
2177	return NULL;
2178}
2179
2180static void *filter_get_nxt_idx(struct seq_file *seq, struct filter_info *p)
2181{
2182	struct adapter *adap = seq->private;
2183	struct filter_info *end = &adap->filters[adap->params.mc5.nfilters];
2184
2185	while (++p < end && !p->valid)
2186		;
2187	return p < end ? p : NULL;
2188}
2189
2190static void *filter_seq_start(struct seq_file *seq, loff_t *pos)
2191{
2192	return *pos ? filter_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2193}
2194
2195static void *filter_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2196{
2197	v = *pos ? filter_get_nxt_idx(seq, v) : filter_get_idx(seq, 0);
2198	if (v)
2199		++*pos;
2200	return v;
2201}
2202
2203static void filter_seq_stop(struct seq_file *seq, void *v)
2204{
2205}
2206
2207static int filter_seq_show(struct seq_file *seq, void *v)
2208{
2209	static const char *pkt_type[] = { "any", "tcp", "udp", "frag" };
2210
2211	if (v == SEQ_START_TOKEN)
2212		seq_puts(seq, "index         SIP              DIP       sport "
2213			      "dport VLAN PRI MAC type Q\n");
2214	else {
2215		char sip[20], dip[20];
2216		struct filter_info *f = v;
2217		struct adapter *adap = seq->private;
2218
2219		sprintf(sip, NIPQUAD_FMT "/%-2u", HIPQUAD(f->sip),
2220			f->sip_mask ? 33 - ffs(f->sip_mask) : 0);
2221		sprintf(dip, NIPQUAD_FMT, HIPQUAD(f->dip));
2222		seq_printf(seq, "%5zu %18s %15s ", f - adap->filters, sip, dip);
2223		seq_printf(seq, f->sport ? "%5u " : "    * ", f->sport);
2224		seq_printf(seq, f->dport ? "%5u " : "    * ", f->dport);
2225		seq_printf(seq, f->vlan != 0xfff ? "%4u " : "   * ", f->vlan);
2226		seq_printf(seq, f->vlan_prio == FILTER_NO_VLAN_PRI ?
2227			   "  * " : "%1u/%1u ", f->vlan_prio, f->vlan_prio | 1);
2228		if (!f->mac_vld)
2229			seq_printf(seq, "  * ");
2230		else if (f->mac_hit)
2231			seq_printf(seq, "%3u ", f->mac_idx);
2232		else
2233			seq_printf(seq, " -1 ");
2234		seq_printf(seq, "%4s ", pkt_type[f->pkt_type]);
2235		if (!f->pass)
2236			seq_printf(seq, "-\n");
2237		else if (f->rss)
2238			seq_printf(seq, "*\n");
2239		else
2240			seq_printf(seq, "%1u\n", f->qset);
2241	}
2242	return 0;
2243}
2244
2245static struct seq_operations filter_seq_ops = {
2246	.start = filter_seq_start,
2247	.next = filter_seq_next,
2248	.stop = filter_seq_stop,
2249	.show = filter_seq_show
2250};
2251
2252static int filter_seq_open(struct inode *inode, struct file *file)
2253{
2254	int rc = seq_open(file, &filter_seq_ops);
2255
2256	if (!rc) {
2257		struct proc_dir_entry *dp = PDE(inode);
2258		struct seq_file *seq = file->private_data;
2259
2260		seq->private = dp->data;
2261	}
2262	return rc;
2263}
2264
2265static struct file_operations filter_seq_fops = {
2266	.owner = THIS_MODULE,
2267	.open = filter_seq_open,
2268	.read = seq_read,
2269	.llseek = seq_lseek,
2270	.release = seq_release
2271};
2272
2273#endif
2274
2275static int
2276set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2277{
2278	uint8_t *buf;
2279	int err = 0;
2280	u32 aligned_offset, aligned_len, *p;
2281	struct adapter *adapter = pi->adapter;
2282
2283
2284	aligned_offset = offset & ~3;
2285	aligned_len = (len + (offset & 3) + 3) & ~3;
2286
2287	if (aligned_offset != offset || aligned_len != len) {
2288		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2289		if (!buf)
2290			return (ENOMEM);
2291		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2292		if (!err && aligned_len > 4)
2293			err = t3_seeprom_read(adapter,
2294					      aligned_offset + aligned_len - 4,
2295					      (u32 *)&buf[aligned_len - 4]);
2296		if (err)
2297			goto out;
2298		memcpy(buf + (offset & 3), data, len);
2299	} else
2300		buf = (uint8_t *)(uintptr_t)data;
2301
2302	err = t3_seeprom_wp(adapter, 0);
2303	if (err)
2304		goto out;
2305
2306	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2307		err = t3_seeprom_write(adapter, aligned_offset, *p);
2308		aligned_offset += 4;
2309	}
2310
2311	if (!err)
2312		err = t3_seeprom_wp(adapter, 1);
2313out:
2314	if (buf != data)
2315		free(buf, M_DEVBUF);
2316	return err;
2317}
2318
2319
2320static int
2321in_range(int val, int lo, int hi)
2322{
2323	return val < 0 || (val <= hi && val >= lo);
2324}
2325
2326static int
2327cxgb_extension_open(struct cdev *dev, int flags, int fmp, d_thread_t *td)
2328{
2329       return (0);
2330}
2331
2332static int
2333cxgb_extension_close(struct cdev *dev, int flags, int fmt, d_thread_t *td)
2334{
2335       return (0);
2336}
2337
2338static int
2339cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2340    int fflag, struct thread *td)
2341{
2342	int mmd, error = 0;
2343	struct port_info *pi = dev->si_drv1;
2344	adapter_t *sc = pi->adapter;
2345
2346#ifdef PRIV_SUPPORTED
2347	if (priv_check(td, PRIV_DRIVER)) {
2348		if (cxgb_debug)
2349			printf("user does not have access to privileged ioctls\n");
2350		return (EPERM);
2351	}
2352#else
2353	if (suser(td)) {
2354		if (cxgb_debug)
2355			printf("user does not have access to privileged ioctls\n");
2356		return (EPERM);
2357	}
2358#endif
2359
2360	switch (cmd) {
2361	case SIOCGMIIREG: {
2362		uint32_t val;
2363		struct cphy *phy = &pi->phy;
2364		struct mii_data *mid = (struct mii_data *)data;
2365
2366		if (!phy->mdio_read)
2367			return (EOPNOTSUPP);
2368		if (is_10G(sc)) {
2369			mmd = mid->phy_id >> 8;
2370			if (!mmd)
2371				mmd = MDIO_DEV_PCS;
2372			else if (mmd > MDIO_DEV_XGXS)
2373				return (EINVAL);
2374
2375			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2376					     mid->reg_num, &val);
2377		} else
2378		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2379					     mid->reg_num & 0x1f, &val);
2380		if (error == 0)
2381			mid->val_out = val;
2382		break;
2383	}
2384	case SIOCSMIIREG: {
2385		struct cphy *phy = &pi->phy;
2386		struct mii_data *mid = (struct mii_data *)data;
2387
2388		if (!phy->mdio_write)
2389			return (EOPNOTSUPP);
2390		if (is_10G(sc)) {
2391			mmd = mid->phy_id >> 8;
2392			if (!mmd)
2393				mmd = MDIO_DEV_PCS;
2394			else if (mmd > MDIO_DEV_XGXS)
2395				return (EINVAL);
2396
2397			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2398					      mmd, mid->reg_num, mid->val_in);
2399		} else
2400			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2401					      mid->reg_num & 0x1f,
2402					      mid->val_in);
2403		break;
2404	}
2405	case CHELSIO_SETREG: {
2406		struct ch_reg *edata = (struct ch_reg *)data;
2407		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2408			return (EFAULT);
2409		t3_write_reg(sc, edata->addr, edata->val);
2410		break;
2411	}
2412	case CHELSIO_GETREG: {
2413		struct ch_reg *edata = (struct ch_reg *)data;
2414		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2415			return (EFAULT);
2416		edata->val = t3_read_reg(sc, edata->addr);
2417		break;
2418	}
2419	case CHELSIO_GET_SGE_CONTEXT: {
2420		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2421		mtx_lock(&sc->sge.reg_lock);
2422		switch (ecntxt->cntxt_type) {
2423		case CNTXT_TYPE_EGRESS:
2424			error = t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2425			    ecntxt->data);
2426			break;
2427		case CNTXT_TYPE_FL:
2428			error = t3_sge_read_fl(sc, ecntxt->cntxt_id,
2429			    ecntxt->data);
2430			break;
2431		case CNTXT_TYPE_RSP:
2432			error = t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2433			    ecntxt->data);
2434			break;
2435		case CNTXT_TYPE_CQ:
2436			error = t3_sge_read_cq(sc, ecntxt->cntxt_id,
2437			    ecntxt->data);
2438			break;
2439		default:
2440			error = EINVAL;
2441			break;
2442		}
2443		mtx_unlock(&sc->sge.reg_lock);
2444		break;
2445	}
2446	case CHELSIO_GET_SGE_DESC: {
2447		struct ch_desc *edesc = (struct ch_desc *)data;
2448		int ret;
2449		if (edesc->queue_num >= SGE_QSETS * 6)
2450			return (EINVAL);
2451		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2452		    edesc->queue_num % 6, edesc->idx, edesc->data);
2453		if (ret < 0)
2454			return (EINVAL);
2455		edesc->size = ret;
2456		break;
2457	}
2458	case CHELSIO_SET_QSET_PARAMS: {
2459		struct qset_params *q;
2460		struct ch_qset_params *t = (struct ch_qset_params *)data;
2461
2462		if (t->qset_idx >= SGE_QSETS)
2463			return (EINVAL);
2464		if (!in_range(t->intr_lat, 0, M_NEWTIMER) ||
2465		    !in_range(t->cong_thres, 0, 255) ||
2466		    !in_range(t->txq_size[0], MIN_TXQ_ENTRIES,
2467			      MAX_TXQ_ENTRIES) ||
2468		    !in_range(t->txq_size[1], MIN_TXQ_ENTRIES,
2469			      MAX_TXQ_ENTRIES) ||
2470		    !in_range(t->txq_size[2], MIN_CTRL_TXQ_ENTRIES,
2471			      MAX_CTRL_TXQ_ENTRIES) ||
2472		    !in_range(t->fl_size[0], MIN_FL_ENTRIES, MAX_RX_BUFFERS) ||
2473		    !in_range(t->fl_size[1], MIN_FL_ENTRIES,
2474			      MAX_RX_JUMBO_BUFFERS) ||
2475		    !in_range(t->rspq_size, MIN_RSPQ_ENTRIES, MAX_RSPQ_ENTRIES))
2476			return (EINVAL);
2477		if ((sc->flags & FULL_INIT_DONE) &&
2478		    (t->rspq_size >= 0 || t->fl_size[0] >= 0 ||
2479		     t->fl_size[1] >= 0 || t->txq_size[0] >= 0 ||
2480		     t->txq_size[1] >= 0 || t->txq_size[2] >= 0 ||
2481		     t->polling >= 0 || t->cong_thres >= 0))
2482			return (EBUSY);
2483
2484		q = &sc->params.sge.qset[t->qset_idx];
2485
2486		if (t->rspq_size >= 0)
2487			q->rspq_size = t->rspq_size;
2488		if (t->fl_size[0] >= 0)
2489			q->fl_size = t->fl_size[0];
2490		if (t->fl_size[1] >= 0)
2491			q->jumbo_size = t->fl_size[1];
2492		if (t->txq_size[0] >= 0)
2493			q->txq_size[0] = t->txq_size[0];
2494		if (t->txq_size[1] >= 0)
2495			q->txq_size[1] = t->txq_size[1];
2496		if (t->txq_size[2] >= 0)
2497			q->txq_size[2] = t->txq_size[2];
2498		if (t->cong_thres >= 0)
2499			q->cong_thres = t->cong_thres;
2500		if (t->intr_lat >= 0) {
2501			struct sge_qset *qs = &sc->sge.qs[t->qset_idx];
2502
2503			q->coalesce_nsecs = t->intr_lat*1000;
2504			t3_update_qset_coalesce(qs, q);
2505		}
2506		break;
2507	}
2508	case CHELSIO_GET_QSET_PARAMS: {
2509		struct qset_params *q;
2510		struct ch_qset_params *t = (struct ch_qset_params *)data;
2511
2512		if (t->qset_idx >= SGE_QSETS)
2513			return (EINVAL);
2514
2515		q = &(sc)->params.sge.qset[t->qset_idx];
2516		t->rspq_size   = q->rspq_size;
2517		t->txq_size[0] = q->txq_size[0];
2518		t->txq_size[1] = q->txq_size[1];
2519		t->txq_size[2] = q->txq_size[2];
2520		t->fl_size[0]  = q->fl_size;
2521		t->fl_size[1]  = q->jumbo_size;
2522		t->polling     = q->polling;
2523		t->intr_lat    = q->coalesce_nsecs / 1000;
2524		t->cong_thres  = q->cong_thres;
2525		break;
2526	}
2527	case CHELSIO_SET_QSET_NUM: {
2528		struct ch_reg *edata = (struct ch_reg *)data;
2529		unsigned int port_idx = pi->port_id;
2530
2531		if (sc->flags & FULL_INIT_DONE)
2532			return (EBUSY);
2533		if (edata->val < 1 ||
2534		    (edata->val > 1 && !(sc->flags & USING_MSIX)))
2535			return (EINVAL);
2536		if (edata->val + sc->port[!port_idx].nqsets > SGE_QSETS)
2537			return (EINVAL);
2538		sc->port[port_idx].nqsets = edata->val;
2539		sc->port[0].first_qset = 0;
2540		/*
2541		 * XXX hardcode ourselves to 2 ports just like LEEENUX
2542		 */
2543		sc->port[1].first_qset = sc->port[0].nqsets;
2544		break;
2545	}
2546	case CHELSIO_GET_QSET_NUM: {
2547		struct ch_reg *edata = (struct ch_reg *)data;
2548		edata->val = pi->nqsets;
2549		break;
2550	}
2551#ifdef notyet
2552	case CHELSIO_LOAD_FW:
2553	case CHELSIO_GET_PM:
2554	case CHELSIO_SET_PM:
2555		return (EOPNOTSUPP);
2556		break;
2557#endif
2558	case CHELSIO_SETMTUTAB: {
2559		struct ch_mtus *m = (struct ch_mtus *)data;
2560		int i;
2561
2562		if (!is_offload(sc))
2563			return (EOPNOTSUPP);
2564		if (offload_running(sc))
2565			return (EBUSY);
2566		if (m->nmtus != NMTUS)
2567			return (EINVAL);
2568		if (m->mtus[0] < 81)         /* accommodate SACK */
2569			return (EINVAL);
2570
2571		/*
2572		 * MTUs must be in ascending order
2573		 */
2574		for (i = 1; i < NMTUS; ++i)
2575			if (m->mtus[i] < m->mtus[i - 1])
2576				return (EINVAL);
2577
2578		memcpy(sc->params.mtus, m->mtus,
2579		       sizeof(sc->params.mtus));
2580		break;
2581	}
2582	case CHELSIO_GETMTUTAB: {
2583		struct ch_mtus *m = (struct ch_mtus *)data;
2584
2585		if (!is_offload(sc))
2586			return (EOPNOTSUPP);
2587
2588		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2589		m->nmtus = NMTUS;
2590		break;
2591	}
2592	case CHELSIO_SET_FILTER: {
2593		struct ch_filter *f = (struct ch_filter *)data;
2594		struct filter_info *p;
2595		int ret;
2596
2597		if (sc->params.mc5.nfilters == 0)
2598			return (EOPNOTSUPP);
2599		if (!(sc->flags & FULL_INIT_DONE))
2600			return (EAGAIN);  /* can still change nfilters */
2601		if (sc->filters == NULL)
2602			return (ENOMEM);
2603
2604		if (f->filter_id >= sc->params.mc5.nfilters ||
2605		    (f->val.dip && f->mask.dip != 0xffffffff) ||
2606		    (f->val.sport && f->mask.sport != 0xffff) ||
2607		    (f->val.dport && f->mask.dport != 0xffff) ||
2608		    (f->mask.vlan && f->mask.vlan != 0xfff) ||
2609		    (f->mask.vlan_prio && f->mask.vlan_prio != 7) ||
2610		    (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
2611		    f->qset >= SGE_QSETS ||
2612		    sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
2613			return (EINVAL);
2614
2615		p = &sc->filters[f->filter_id];
2616		if (p->locked)
2617			return (EPERM);
2618
2619		p->sip = f->val.sip;
2620		p->sip_mask = f->mask.sip;
2621		p->dip = f->val.dip;
2622		p->sport = f->val.sport;
2623		p->dport = f->val.dport;
2624		p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
2625		p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
2626						  FILTER_NO_VLAN_PRI;
2627		p->mac_hit = f->mac_hit;
2628		p->mac_vld = f->mac_addr_idx != 0xffff;
2629		p->mac_idx = f->mac_addr_idx;
2630		p->pkt_type = f->proto;
2631		p->report_filter_id = f->want_filter_id;
2632		p->pass = f->pass;
2633		p->rss = f->rss;
2634		p->qset = f->qset;
2635
2636		ret = set_filter(sc, f->filter_id, p);
2637		if (ret)
2638			return ret;
2639		p->valid = 1;
2640		break;
2641	}
2642	case CHELSIO_DEL_FILTER: {
2643		struct ch_filter *f = (struct ch_filter *)data;
2644		struct filter_info *p;
2645
2646		if (sc->params.mc5.nfilters == 0)
2647			return (EOPNOTSUPP);
2648		if (!(sc->flags & FULL_INIT_DONE))
2649			return (EAGAIN);  /* can still change nfilters */
2650		if (sc->filters == NULL)
2651			return (ENOMEM);
2652		if (f->filter_id >= sc->params.mc5.nfilters)
2653			return (EINVAL);
2654
2655		p = &sc->filters[f->filter_id];
2656		if (p->locked)
2657			return (EPERM);
2658		memset(p, 0, sizeof(*p));
2659		p->sip_mask = 0xffffffff;
2660		p->vlan = 0xfff;
2661		p->vlan_prio = FILTER_NO_VLAN_PRI;
2662		p->pkt_type = 1;
2663		return set_filter(sc, f->filter_id, p);
2664	}
2665	case CHELSIO_DEVUP:
2666		if (!is_offload(sc))
2667			return (EOPNOTSUPP);
2668		return offload_open(pi);
2669		break;
2670	case CHELSIO_GET_MEM: {
2671		struct ch_mem_range *t = (struct ch_mem_range *)data;
2672		struct mc7 *mem;
2673		uint8_t *useraddr;
2674		u64 buf[32];
2675
2676		if (!is_offload(sc))
2677			return (EOPNOTSUPP);
2678		if (!(sc->flags & FULL_INIT_DONE))
2679			return (EIO);         /* need the memory controllers */
2680		if ((t->addr & 0x7) || (t->len & 0x7))
2681			return (EINVAL);
2682		if (t->mem_id == MEM_CM)
2683			mem = &sc->cm;
2684		else if (t->mem_id == MEM_PMRX)
2685			mem = &sc->pmrx;
2686		else if (t->mem_id == MEM_PMTX)
2687			mem = &sc->pmtx;
2688		else
2689			return (EINVAL);
2690
2691		/*
2692		 * Version scheme:
2693		 * bits 0..9: chip version
2694		 * bits 10..15: chip revision
2695		 */
2696		t->version = 3 | (sc->params.rev << 10);
2697
2698		/*
2699		 * Read 256 bytes at a time as len can be large and we don't
2700		 * want to use huge intermediate buffers.
2701		 */
2702		useraddr = (uint8_t *)(t + 1);   /* advance to start of buffer */
2703		while (t->len) {
2704			unsigned int chunk = min(t->len, sizeof(buf));
2705
2706			error = t3_mc7_bd_read(mem, t->addr / 8, chunk / 8, buf);
2707			if (error)
2708				return (-error);
2709			if (copyout(buf, useraddr, chunk))
2710				return (EFAULT);
2711			useraddr += chunk;
2712			t->addr += chunk;
2713			t->len -= chunk;
2714		}
2715		break;
2716	}
2717	case CHELSIO_READ_TCAM_WORD: {
2718		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2719
2720		if (!is_offload(sc))
2721			return (EOPNOTSUPP);
2722		if (!(sc->flags & FULL_INIT_DONE))
2723			return (EIO);         /* need MC5 */
2724		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2725		break;
2726	}
2727	case CHELSIO_SET_TRACE_FILTER: {
2728		struct ch_trace *t = (struct ch_trace *)data;
2729		const struct trace_params *tp;
2730
2731		tp = (const struct trace_params *)&t->sip;
2732		if (t->config_tx)
2733			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2734					       t->trace_tx);
2735		if (t->config_rx)
2736			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2737					       t->trace_rx);
2738		break;
2739	}
2740	case CHELSIO_SET_PKTSCHED: {
2741		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2742		if (sc->open_device_map == 0)
2743			return (EAGAIN);
2744		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2745		    p->binding);
2746		break;
2747	}
2748	case CHELSIO_IFCONF_GETREGS: {
2749		struct ifconf_regs *regs = (struct ifconf_regs *)data;
2750		int reglen = cxgb_get_regs_len();
2751		uint8_t *buf = malloc(REGDUMP_SIZE, M_DEVBUF, M_NOWAIT);
2752		if (buf == NULL) {
2753			return (ENOMEM);
2754		} if (regs->len > reglen)
2755			regs->len = reglen;
2756		else if (regs->len < reglen) {
2757			error = E2BIG;
2758			goto done;
2759		}
2760		cxgb_get_regs(sc, regs, buf);
2761		error = copyout(buf, regs->data, reglen);
2762
2763		done:
2764		free(buf, M_DEVBUF);
2765
2766		break;
2767	}
2768	case CHELSIO_SET_HW_SCHED: {
2769		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2770		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2771
2772		if ((sc->flags & FULL_INIT_DONE) == 0)
2773			return (EAGAIN);       /* need TP to be initialized */
2774		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2775		    !in_range(t->channel, 0, 1) ||
2776		    !in_range(t->kbps, 0, 10000000) ||
2777		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2778		    !in_range(t->flow_ipg, 0,
2779			      dack_ticks_to_usec(sc, 0x7ff)))
2780			return (EINVAL);
2781
2782		if (t->kbps >= 0) {
2783			error = t3_config_sched(sc, t->kbps, t->sched);
2784			if (error < 0)
2785				return (-error);
2786		}
2787		if (t->class_ipg >= 0)
2788			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2789		if (t->flow_ipg >= 0) {
2790			t->flow_ipg *= 1000;     /* us -> ns */
2791			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2792		}
2793		if (t->mode >= 0) {
2794			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2795
2796			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2797					 bit, t->mode ? bit : 0);
2798		}
2799		if (t->channel >= 0)
2800			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2801					 1 << t->sched, t->channel << t->sched);
2802		break;
2803	}
2804	default:
2805		return (EOPNOTSUPP);
2806		break;
2807	}
2808
2809	return (error);
2810}
2811
2812static __inline void
2813reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
2814    unsigned int end)
2815{
2816	uint32_t *p = (uint32_t *)buf + start;
2817
2818	for ( ; start <= end; start += sizeof(uint32_t))
2819		*p++ = t3_read_reg(ap, start);
2820}
2821
2822#define T3_REGMAP_SIZE (3 * 1024)
2823static int
2824cxgb_get_regs_len(void)
2825{
2826	return T3_REGMAP_SIZE;
2827}
2828#undef T3_REGMAP_SIZE
2829
2830static void
2831cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf)
2832{
2833
2834	/*
2835	 * Version scheme:
2836	 * bits 0..9: chip version
2837	 * bits 10..15: chip revision
2838	 * bit 31: set for PCIe cards
2839	 */
2840	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
2841
2842	/*
2843	 * We skip the MAC statistics registers because they are clear-on-read.
2844	 * Also reading multi-register stats would need to synchronize with the
2845	 * periodic mac stats accumulation.  Hard to justify the complexity.
2846	 */
2847	memset(buf, 0, REGDUMP_SIZE);
2848	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
2849	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
2850	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
2851	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
2852	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
2853	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
2854		       XGM_REG(A_XGM_SERDES_STAT3, 1));
2855	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
2856		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
2857}
2858