cxgb_main.c revision 181652
1/**************************************************************************
2
3Copyright (c) 2007-2008, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13    contributors may be used to endorse or promote products derived from
14    this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_main.c 181652 2008-08-13 01:30:41Z kmacy $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/bus.h>
37#include <sys/module.h>
38#include <sys/pciio.h>
39#include <sys/conf.h>
40#include <machine/bus.h>
41#include <machine/resource.h>
42#include <sys/bus_dma.h>
43#include <sys/ktr.h>
44#include <sys/rman.h>
45#include <sys/ioccom.h>
46#include <sys/mbuf.h>
47#include <sys/linker.h>
48#include <sys/firmware.h>
49#include <sys/socket.h>
50#include <sys/sockio.h>
51#include <sys/smp.h>
52#include <sys/sysctl.h>
53#include <sys/syslog.h>
54#include <sys/queue.h>
55#include <sys/taskqueue.h>
56#include <sys/proc.h>
57
58#include <net/bpf.h>
59#include <net/ethernet.h>
60#include <net/if.h>
61#include <net/if_arp.h>
62#include <net/if_dl.h>
63#include <net/if_media.h>
64#include <net/if_types.h>
65#include <net/if_vlan_var.h>
66
67#include <netinet/in_systm.h>
68#include <netinet/in.h>
69#include <netinet/if_ether.h>
70#include <netinet/ip.h>
71#include <netinet/ip.h>
72#include <netinet/tcp.h>
73#include <netinet/udp.h>
74
75#include <dev/pci/pcireg.h>
76#include <dev/pci/pcivar.h>
77#include <dev/pci/pci_private.h>
78
79#ifdef CONFIG_DEFINED
80#include <cxgb_include.h>
81#else
82#include <dev/cxgb/cxgb_include.h>
83#endif
84
85#ifdef PRIV_SUPPORTED
86#include <sys/priv.h>
87#endif
88
89#ifdef IFNET_MULTIQUEUE
90#include <machine/intr_machdep.h>
91#endif
92
93static int cxgb_setup_msix(adapter_t *, int);
94static void cxgb_teardown_msix(adapter_t *);
95static void cxgb_init(void *);
96static void cxgb_init_locked(struct port_info *);
97static void cxgb_stop_locked(struct port_info *);
98static void cxgb_set_rxmode(struct port_info *);
99static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
100static int cxgb_media_change(struct ifnet *);
101static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
102static int setup_sge_qsets(adapter_t *);
103static void cxgb_async_intr(void *);
104static void cxgb_ext_intr_handler(void *, int);
105static void cxgb_tick_handler(void *, int);
106static void cxgb_down_locked(struct adapter *sc);
107static void cxgb_tick(void *);
108static void setup_rss(adapter_t *sc);
109
110/* Attachment glue for the PCI controller end of the device.  Each port of
111 * the device is attached separately, as defined later.
112 */
113static int cxgb_controller_probe(device_t);
114static int cxgb_controller_attach(device_t);
115static int cxgb_controller_detach(device_t);
116static void cxgb_free(struct adapter *);
117static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
118    unsigned int end);
119static void cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf);
120static int cxgb_get_regs_len(void);
121static int offload_open(struct port_info *pi);
122static void touch_bars(device_t dev);
123static int offload_close(struct t3cdev *tdev);
124static void cxgb_link_start(struct port_info *p);
125
126static device_method_t cxgb_controller_methods[] = {
127	DEVMETHOD(device_probe,		cxgb_controller_probe),
128	DEVMETHOD(device_attach,	cxgb_controller_attach),
129	DEVMETHOD(device_detach,	cxgb_controller_detach),
130
131	/* bus interface */
132	DEVMETHOD(bus_print_child,	bus_generic_print_child),
133	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
134
135	{ 0, 0 }
136};
137
138static driver_t cxgb_controller_driver = {
139	"cxgbc",
140	cxgb_controller_methods,
141	sizeof(struct adapter)
142};
143
144static devclass_t	cxgb_controller_devclass;
145DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
146
147/*
148 * Attachment glue for the ports.  Attachment is done directly to the
149 * controller device.
150 */
151static int cxgb_port_probe(device_t);
152static int cxgb_port_attach(device_t);
153static int cxgb_port_detach(device_t);
154
155static device_method_t cxgb_port_methods[] = {
156	DEVMETHOD(device_probe,		cxgb_port_probe),
157	DEVMETHOD(device_attach,	cxgb_port_attach),
158	DEVMETHOD(device_detach,	cxgb_port_detach),
159	{ 0, 0 }
160};
161
162static driver_t cxgb_port_driver = {
163	"cxgb",
164	cxgb_port_methods,
165	0
166};
167
168static d_ioctl_t cxgb_extension_ioctl;
169static d_open_t cxgb_extension_open;
170static d_close_t cxgb_extension_close;
171
172static struct cdevsw cxgb_cdevsw = {
173       .d_version =    D_VERSION,
174       .d_flags =      0,
175       .d_open =       cxgb_extension_open,
176       .d_close =      cxgb_extension_close,
177       .d_ioctl =      cxgb_extension_ioctl,
178       .d_name =       "cxgb",
179};
180
181static devclass_t	cxgb_port_devclass;
182DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
183
184#define SGE_MSIX_COUNT (SGE_QSETS + 1)
185
186/*
187 * The driver uses the best interrupt scheme available on a platform in the
188 * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
189 * of these schemes the driver may consider as follows:
190 *
191 * msi = 2: choose from among all three options
192 * msi = 1 : only consider MSI and pin interrupts
193 * msi = 0: force pin interrupts
194 */
195static int msi_allowed = 2;
196
197TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
198SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
199SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
200    "MSI-X, MSI, INTx selector");
201
202/*
203 * The driver enables offload as a default.
204 * To disable it, use ofld_disable = 1.
205 */
206static int ofld_disable = 0;
207TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
208SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
209    "disable ULP offload");
210
211/*
212 * The driver uses an auto-queue algorithm by default.
213 * To disable it and force a single queue-set per port, use singleq = 1.
214 */
215static int singleq = 0;
216TUNABLE_INT("hw.cxgb.singleq", &singleq);
217SYSCTL_UINT(_hw_cxgb, OID_AUTO, singleq, CTLFLAG_RDTUN, &singleq, 0,
218    "use a single queue-set per port");
219
220
221/*
222 * The driver uses an auto-queue algorithm by default.
223 * To disable it and force a single queue-set per port, use singleq = 1.
224 */
225static int force_fw_update = 0;
226TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
227SYSCTL_UINT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
228    "update firmware even if up to date");
229
230int cxgb_use_16k_clusters = 0;
231TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
232SYSCTL_UINT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
233    &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
234
235enum {
236	MAX_TXQ_ENTRIES      = 16384,
237	MAX_CTRL_TXQ_ENTRIES = 1024,
238	MAX_RSPQ_ENTRIES     = 16384,
239	MAX_RX_BUFFERS       = 16384,
240	MAX_RX_JUMBO_BUFFERS = 16384,
241	MIN_TXQ_ENTRIES      = 4,
242	MIN_CTRL_TXQ_ENTRIES = 4,
243	MIN_RSPQ_ENTRIES     = 32,
244	MIN_FL_ENTRIES       = 32,
245	MIN_FL_JUMBO_ENTRIES = 32
246};
247
248struct filter_info {
249	u32 sip;
250	u32 sip_mask;
251	u32 dip;
252	u16 sport;
253	u16 dport;
254	u32 vlan:12;
255	u32 vlan_prio:3;
256	u32 mac_hit:1;
257	u32 mac_idx:4;
258	u32 mac_vld:1;
259	u32 pkt_type:2;
260	u32 report_filter_id:1;
261	u32 pass:1;
262	u32 rss:1;
263	u32 qset:3;
264	u32 locked:1;
265	u32 valid:1;
266};
267
268enum { FILTER_NO_VLAN_PRI = 7 };
269
270#define PORT_MASK ((1 << MAX_NPORTS) - 1)
271
272/* Table for probing the cards.  The desc field isn't actually used */
273struct cxgb_ident {
274	uint16_t	vendor;
275	uint16_t	device;
276	int		index;
277	char		*desc;
278} cxgb_identifiers[] = {
279	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
280	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
281	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
282	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
283	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
284	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
285	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
286	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
287	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
288	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
289	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
290	{0, 0, 0, NULL}
291};
292
293static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
294
295
296void
297cxgb_log_tcb(struct adapter *sc, unsigned int tid)
298{
299	char buf[TCB_SIZE];
300	uint64_t *tcb = (uint64_t *)buf;
301	int i, error;
302	struct mc7 *mem = &sc->cm;
303
304	error = t3_mc7_bd_read(mem, tid*TCB_SIZE/8, TCB_SIZE/8, tcb);
305	if (error)
306		printf("cxgb_tcb_log failed\n");
307
308	CTR1(KTR_CXGB, "TCB tid=%u", tid);
309	for (i = 0; i < TCB_SIZE / 32; i++) {
310		CTR5(KTR_CXGB, "%1d: %08x %08x %08x %08x",
311		    i, (uint32_t)tcb[1], (uint32_t)(tcb[1] >> 32),
312		    (uint32_t)tcb[0], (uint32_t)(tcb[0] >> 32));
313		tcb += 2;
314		CTR4(KTR_CXGB, "   %08x %08x %08x %08x",
315		    (uint32_t)tcb[1], (uint32_t)(tcb[1] >> 32),
316		    (uint32_t)tcb[0], (uint32_t)(tcb[0] >> 32));
317		tcb += 2;
318	}
319}
320
321static __inline char
322t3rev2char(struct adapter *adapter)
323{
324	char rev = 'z';
325
326	switch(adapter->params.rev) {
327	case T3_REV_A:
328		rev = 'a';
329		break;
330	case T3_REV_B:
331	case T3_REV_B2:
332		rev = 'b';
333		break;
334	case T3_REV_C:
335		rev = 'c';
336		break;
337	}
338	return rev;
339}
340
341static struct cxgb_ident *
342cxgb_get_ident(device_t dev)
343{
344	struct cxgb_ident *id;
345
346	for (id = cxgb_identifiers; id->desc != NULL; id++) {
347		if ((id->vendor == pci_get_vendor(dev)) &&
348		    (id->device == pci_get_device(dev))) {
349			return (id);
350		}
351	}
352	return (NULL);
353}
354
355static const struct adapter_info *
356cxgb_get_adapter_info(device_t dev)
357{
358	struct cxgb_ident *id;
359	const struct adapter_info *ai;
360
361	id = cxgb_get_ident(dev);
362	if (id == NULL)
363		return (NULL);
364
365	ai = t3_get_adapter_info(id->index);
366
367	return (ai);
368}
369
370static int
371cxgb_controller_probe(device_t dev)
372{
373	const struct adapter_info *ai;
374	char *ports, buf[80];
375	int nports;
376
377	ai = cxgb_get_adapter_info(dev);
378	if (ai == NULL)
379		return (ENXIO);
380
381	nports = ai->nports0 + ai->nports1;
382	if (nports == 1)
383		ports = "port";
384	else
385		ports = "ports";
386
387	snprintf(buf, sizeof(buf), "%s RNIC, %d %s", ai->desc, nports, ports);
388	device_set_desc_copy(dev, buf);
389	return (BUS_PROBE_DEFAULT);
390}
391
392#define FW_FNAME "cxgb_t3fw"
393#define TPEEPROM_NAME "t3b_tp_eeprom"
394#define TPSRAM_NAME "t3b_protocol_sram"
395
396static int
397upgrade_fw(adapter_t *sc)
398{
399#ifdef FIRMWARE_LATEST
400	const struct firmware *fw;
401#else
402	struct firmware *fw;
403#endif
404	int status;
405
406	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
407		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
408		return (ENOENT);
409	} else
410		device_printf(sc->dev, "updating firmware on card\n");
411	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
412
413	device_printf(sc->dev, "firmware update returned %s %d\n", (status == 0) ? "success" : "fail", status);
414
415	firmware_put(fw, FIRMWARE_UNLOAD);
416
417	return (status);
418}
419
420static int
421cxgb_controller_attach(device_t dev)
422{
423	device_t child;
424	const struct adapter_info *ai;
425	struct adapter *sc;
426	int i, error = 0;
427	uint32_t vers;
428	int port_qsets = 1;
429#ifdef MSI_SUPPORTED
430	int msi_needed, reg;
431#endif
432	int must_load = 0;
433	sc = device_get_softc(dev);
434	sc->dev = dev;
435	sc->msi_count = 0;
436	ai = cxgb_get_adapter_info(dev);
437
438	/*
439	 * XXX not really related but a recent addition
440	 */
441#ifdef MSI_SUPPORTED
442	/* find the PCIe link width and set max read request to 4KB*/
443	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
444		uint16_t lnk, pectl;
445		lnk = pci_read_config(dev, reg + 0x12, 2);
446		sc->link_width = (lnk >> 4) & 0x3f;
447
448		pectl = pci_read_config(dev, reg + 0x8, 2);
449		pectl = (pectl & ~0x7000) | (5 << 12);
450		pci_write_config(dev, reg + 0x8, pectl, 2);
451	}
452
453	if (sc->link_width != 0 && sc->link_width <= 4 &&
454	    (ai->nports0 + ai->nports1) <= 2) {
455		device_printf(sc->dev,
456		    "PCIe x%d Link, expect reduced performance\n",
457		    sc->link_width);
458	}
459#endif
460	touch_bars(dev);
461	pci_enable_busmaster(dev);
462	/*
463	 * Allocate the registers and make them available to the driver.
464	 * The registers that we care about for NIC mode are in BAR 0
465	 */
466	sc->regs_rid = PCIR_BAR(0);
467	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
468	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
469		device_printf(dev, "Cannot allocate BAR region 0\n");
470		return (ENXIO);
471	}
472	sc->udbs_rid = PCIR_BAR(2);
473	if ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
474           &sc->udbs_rid, RF_ACTIVE)) == NULL) {
475		device_printf(dev, "Cannot allocate BAR region 1\n");
476		error = ENXIO;
477		goto out;
478       }
479
480	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
481	    device_get_unit(dev));
482	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
483
484	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
485	    device_get_unit(dev));
486	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
487	    device_get_unit(dev));
488	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
489	    device_get_unit(dev));
490
491	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
492	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
493	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
494
495	sc->bt = rman_get_bustag(sc->regs_res);
496	sc->bh = rman_get_bushandle(sc->regs_res);
497	sc->mmio_len = rman_get_size(sc->regs_res);
498
499	if (t3_prep_adapter(sc, ai, 1) < 0) {
500		printf("prep adapter failed\n");
501		error = ENODEV;
502		goto out;
503	}
504        /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
505	 * enough messages for the queue sets.  If that fails, try falling
506	 * back to MSI.  If that fails, then try falling back to the legacy
507	 * interrupt pin model.
508	 */
509#ifdef MSI_SUPPORTED
510
511	sc->msix_regs_rid = 0x20;
512	if ((msi_allowed >= 2) &&
513	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
514	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
515
516		msi_needed = sc->msi_count = SGE_MSIX_COUNT;
517
518		if (((error = pci_alloc_msix(dev, &sc->msi_count)) != 0) ||
519		    (sc->msi_count != msi_needed)) {
520			device_printf(dev, "msix allocation failed - msi_count = %d"
521			    " msi_needed=%d will try msi err=%d\n", sc->msi_count,
522			    msi_needed, error);
523			sc->msi_count = 0;
524			pci_release_msi(dev);
525			bus_release_resource(dev, SYS_RES_MEMORY,
526			    sc->msix_regs_rid, sc->msix_regs_res);
527			sc->msix_regs_res = NULL;
528		} else {
529			sc->flags |= USING_MSIX;
530			sc->cxgb_intr = t3_intr_msix;
531		}
532	}
533
534	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
535		sc->msi_count = 1;
536		if (pci_alloc_msi(dev, &sc->msi_count)) {
537			device_printf(dev, "alloc msi failed - will try INTx\n");
538			sc->msi_count = 0;
539			pci_release_msi(dev);
540		} else {
541			sc->flags |= USING_MSI;
542			sc->irq_rid = 1;
543			sc->cxgb_intr = t3_intr_msi;
544		}
545	}
546#endif
547	if (sc->msi_count == 0) {
548		device_printf(dev, "using line interrupts\n");
549		sc->irq_rid = 0;
550		sc->cxgb_intr = t3b_intr;
551	}
552
553	if ((sc->flags & USING_MSIX) && !singleq)
554		port_qsets = min((SGE_QSETS/(sc)->params.nports), mp_ncpus);
555
556	/* Create a private taskqueue thread for handling driver events */
557#ifdef TASKQUEUE_CURRENT
558	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
559	    taskqueue_thread_enqueue, &sc->tq);
560#else
561	sc->tq = taskqueue_create_fast("cxgb_taskq", M_NOWAIT,
562	    taskqueue_thread_enqueue, &sc->tq);
563#endif
564	if (sc->tq == NULL) {
565		device_printf(dev, "failed to allocate controller task queue\n");
566		goto out;
567	}
568
569	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
570	    device_get_nameunit(dev));
571	TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
572	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
573
574
575	/* Create a periodic callout for checking adapter status */
576	callout_init(&sc->cxgb_tick_ch, TRUE);
577
578	if ((t3_check_fw_version(sc, &must_load) != 0 && must_load) || force_fw_update) {
579		/*
580		 * Warn user that a firmware update will be attempted in init.
581		 */
582		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
583		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
584		sc->flags &= ~FW_UPTODATE;
585	} else {
586		sc->flags |= FW_UPTODATE;
587	}
588
589	if (t3_check_tpsram_version(sc, &must_load) != 0 && must_load) {
590		/*
591		 * Warn user that a firmware update will be attempted in init.
592		 */
593		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
594		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
595		sc->flags &= ~TPS_UPTODATE;
596	} else {
597		sc->flags |= TPS_UPTODATE;
598	}
599
600	/*
601	 * Create a child device for each MAC.  The ethernet attachment
602	 * will be done in these children.
603	 */
604	for (i = 0; i < (sc)->params.nports; i++) {
605		struct port_info *pi;
606
607		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
608			device_printf(dev, "failed to add child port\n");
609			error = EINVAL;
610			goto out;
611		}
612		pi = &sc->port[i];
613		pi->adapter = sc;
614		pi->nqsets = port_qsets;
615		pi->first_qset = i*port_qsets;
616		pi->port_id = i;
617		pi->tx_chan = i >= ai->nports0;
618		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
619		sc->rxpkt_map[pi->txpkt_intf] = i;
620		sc->port[i].tx_chan = i >= ai->nports0;
621		sc->portdev[i] = child;
622		device_set_softc(child, pi);
623	}
624	if ((error = bus_generic_attach(dev)) != 0)
625		goto out;
626
627	/* initialize sge private state */
628	t3_sge_init_adapter(sc);
629
630	t3_led_ready(sc);
631
632	cxgb_offload_init();
633	if (is_offload(sc)) {
634		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
635		cxgb_adapter_ofld(sc);
636        }
637	error = t3_get_fw_version(sc, &vers);
638	if (error)
639		goto out;
640
641	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
642	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
643	    G_FW_VERSION_MICRO(vers));
644
645	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
646	callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
647	t3_add_attach_sysctls(sc);
648out:
649	if (error)
650		cxgb_free(sc);
651
652	return (error);
653}
654
655static int
656cxgb_controller_detach(device_t dev)
657{
658	struct adapter *sc;
659
660	sc = device_get_softc(dev);
661
662	cxgb_free(sc);
663
664	return (0);
665}
666
667static void
668cxgb_free(struct adapter *sc)
669{
670	int i;
671
672	ADAPTER_LOCK(sc);
673	sc->flags |= CXGB_SHUTDOWN;
674	ADAPTER_UNLOCK(sc);
675	cxgb_pcpu_shutdown_threads(sc);
676	ADAPTER_LOCK(sc);
677
678/*
679 * drops the lock
680 */
681	cxgb_down_locked(sc);
682
683#ifdef MSI_SUPPORTED
684	if (sc->flags & (USING_MSI | USING_MSIX)) {
685		device_printf(sc->dev, "releasing msi message(s)\n");
686		pci_release_msi(sc->dev);
687	} else {
688		device_printf(sc->dev, "no msi message to release\n");
689	}
690#endif
691	if (sc->msix_regs_res != NULL) {
692		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
693		    sc->msix_regs_res);
694	}
695
696	t3_sge_deinit_sw(sc);
697	/*
698	 * Wait for last callout
699	 */
700
701	DELAY(hz*100);
702
703	for (i = 0; i < (sc)->params.nports; ++i) {
704		if (sc->portdev[i] != NULL)
705			device_delete_child(sc->dev, sc->portdev[i]);
706	}
707
708	bus_generic_detach(sc->dev);
709	if (sc->tq != NULL) {
710		taskqueue_free(sc->tq);
711		sc->tq = NULL;
712	}
713
714	if (is_offload(sc)) {
715		cxgb_adapter_unofld(sc);
716		if (isset(&sc->open_device_map,	OFFLOAD_DEVMAP_BIT))
717			offload_close(&sc->tdev);
718		else
719			printf("cxgb_free: DEVMAP_BIT not set\n");
720	} else
721		printf("not offloading set\n");
722#ifdef notyet
723	if (sc->flags & CXGB_OFLD_INIT)
724		cxgb_offload_deactivate(sc);
725#endif
726	free(sc->filters, M_DEVBUF);
727	t3_sge_free(sc);
728
729	cxgb_offload_exit();
730
731	if (sc->udbs_res != NULL)
732		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
733		    sc->udbs_res);
734
735	if (sc->regs_res != NULL)
736		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
737		    sc->regs_res);
738
739	MTX_DESTROY(&sc->mdio_lock);
740	MTX_DESTROY(&sc->sge.reg_lock);
741	MTX_DESTROY(&sc->elmer_lock);
742	ADAPTER_LOCK_DEINIT(sc);
743}
744
745/**
746 *	setup_sge_qsets - configure SGE Tx/Rx/response queues
747 *	@sc: the controller softc
748 *
749 *	Determines how many sets of SGE queues to use and initializes them.
750 *	We support multiple queue sets per port if we have MSI-X, otherwise
751 *	just one queue set per port.
752 */
753static int
754setup_sge_qsets(adapter_t *sc)
755{
756	int i, j, err, irq_idx = 0, qset_idx = 0;
757	u_int ntxq = SGE_TXQ_PER_SET;
758
759	if ((err = t3_sge_alloc(sc)) != 0) {
760		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
761		return (err);
762	}
763
764	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
765		irq_idx = -1;
766
767	for (i = 0; i < (sc)->params.nports; i++) {
768		struct port_info *pi = &sc->port[i];
769
770		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
771			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
772			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
773			    &sc->params.sge.qset[qset_idx], ntxq, pi);
774			if (err) {
775				t3_free_sge_resources(sc);
776				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n",
777				    err);
778				return (err);
779			}
780		}
781	}
782
783	return (0);
784}
785
786static void
787cxgb_teardown_msix(adapter_t *sc)
788{
789	int i, nqsets;
790
791	for (nqsets = i = 0; i < (sc)->params.nports; i++)
792		nqsets += sc->port[i].nqsets;
793
794	for (i = 0; i < nqsets; i++) {
795		if (sc->msix_intr_tag[i] != NULL) {
796			bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
797			    sc->msix_intr_tag[i]);
798			sc->msix_intr_tag[i] = NULL;
799		}
800		if (sc->msix_irq_res[i] != NULL) {
801			bus_release_resource(sc->dev, SYS_RES_IRQ,
802			    sc->msix_irq_rid[i], sc->msix_irq_res[i]);
803			sc->msix_irq_res[i] = NULL;
804		}
805	}
806}
807
808static int
809cxgb_setup_msix(adapter_t *sc, int msix_count)
810{
811	int i, j, k, nqsets, rid;
812
813	/* The first message indicates link changes and error conditions */
814	sc->irq_rid = 1;
815	if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
816	   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
817		device_printf(sc->dev, "Cannot allocate msix interrupt\n");
818		return (EINVAL);
819	}
820
821	if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
822#ifdef INTR_FILTERS
823		NULL,
824#endif
825		cxgb_async_intr, sc, &sc->intr_tag)) {
826		device_printf(sc->dev, "Cannot set up interrupt\n");
827		return (EINVAL);
828	}
829	for (i = k = 0; i < (sc)->params.nports; i++) {
830		nqsets = sc->port[i].nqsets;
831		for (j = 0; j < nqsets; j++, k++) {
832			struct sge_qset *qs = &sc->sge.qs[k];
833
834			rid = k + 2;
835			if (cxgb_debug)
836				printf("rid=%d ", rid);
837			if ((sc->msix_irq_res[k] = bus_alloc_resource_any(
838			    sc->dev, SYS_RES_IRQ, &rid,
839			    RF_SHAREABLE | RF_ACTIVE)) == NULL) {
840				device_printf(sc->dev, "Cannot allocate "
841				    "interrupt for message %d\n", rid);
842				return (EINVAL);
843			}
844			sc->msix_irq_rid[k] = rid;
845			if (bus_setup_intr(sc->dev, sc->msix_irq_res[k],
846				INTR_MPSAFE|INTR_TYPE_NET,
847#ifdef INTR_FILTERS
848				NULL,
849#endif
850				t3_intr_msix, qs, &sc->msix_intr_tag[k])) {
851				device_printf(sc->dev, "Cannot set up "
852				    "interrupt for message %d\n", rid);
853				return (EINVAL);
854			}
855#ifdef IFNET_MULTIQUEUE
856			if (singleq == 0) {
857				int vector = rman_get_start(sc->msix_irq_res[k]);
858				if (bootverbose)
859					device_printf(sc->dev, "binding vector=%d to cpu=%d\n", vector, k % mp_ncpus);
860				intr_bind(vector, k % mp_ncpus);
861			}
862#endif
863		}
864	}
865
866	return (0);
867}
868
869static int
870cxgb_port_probe(device_t dev)
871{
872	struct port_info *p;
873	char buf[80];
874	const char *desc;
875
876	p = device_get_softc(dev);
877	desc = p->phy.desc;
878	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
879	device_set_desc_copy(dev, buf);
880	return (0);
881}
882
883
884static int
885cxgb_makedev(struct port_info *pi)
886{
887
888	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
889	    UID_ROOT, GID_WHEEL, 0600, if_name(pi->ifp));
890
891	if (pi->port_cdev == NULL)
892		return (ENOMEM);
893
894	pi->port_cdev->si_drv1 = (void *)pi;
895
896	return (0);
897}
898
899
900#ifdef TSO_SUPPORTED
901#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO)
902/* Don't enable TSO6 yet */
903#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU | IFCAP_LRO)
904#else
905#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_JUMBO_MTU)
906/* Don't enable TSO6 yet */
907#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM |  IFCAP_JUMBO_MTU)
908#define IFCAP_TSO4 0x0
909#define IFCAP_TSO6 0x0
910#define CSUM_TSO   0x0
911#endif
912
913
914static int
915cxgb_port_attach(device_t dev)
916{
917	struct port_info *p;
918	struct ifnet *ifp;
919	int err, media_flags;
920	struct adapter *sc;
921
922
923	p = device_get_softc(dev);
924	sc = p->adapter;
925	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
926	    device_get_unit(device_get_parent(dev)), p->port_id);
927	PORT_LOCK_INIT(p, p->lockbuf);
928
929	/* Allocate an ifnet object and set it up */
930	ifp = p->ifp = if_alloc(IFT_ETHER);
931	if (ifp == NULL) {
932		device_printf(dev, "Cannot allocate ifnet\n");
933		return (ENOMEM);
934	}
935
936	/*
937	 * Note that there is currently no watchdog timer.
938	 */
939	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
940	ifp->if_init = cxgb_init;
941	ifp->if_softc = p;
942	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
943	ifp->if_ioctl = cxgb_ioctl;
944	ifp->if_start = cxgb_start;
945
946#if 0
947#ifdef IFNET_MULTIQUEUE
948	ifp->if_flags |= IFF_MULTIQ;
949	ifp->if_mq_start = cxgb_pcpu_start;
950#endif
951#endif
952	ifp->if_timer = 0;	/* Disable ifnet watchdog */
953	ifp->if_watchdog = NULL;
954
955	ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN;
956	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
957	IFQ_SET_READY(&ifp->if_snd);
958
959	ifp->if_hwassist = ifp->if_capabilities = ifp->if_capenable = 0;
960	ifp->if_capabilities |= CXGB_CAP;
961	ifp->if_capenable |= CXGB_CAP_ENABLE;
962	ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO);
963	/*
964	 * disable TSO on 4-port - it isn't supported by the firmware yet
965	 */
966	if (p->adapter->params.nports > 2) {
967		ifp->if_capabilities &= ~(IFCAP_TSO4 | IFCAP_TSO6);
968		ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_TSO6);
969		ifp->if_hwassist &= ~CSUM_TSO;
970	}
971
972	ether_ifattach(ifp, p->hw_addr);
973	/*
974	 * Only default to jumbo frames on 10GigE
975	 */
976	if (p->adapter->params.nports <= 2)
977		ifp->if_mtu = ETHERMTU_JUMBO;
978	if ((err = cxgb_makedev(p)) != 0) {
979		printf("makedev failed %d\n", err);
980		return (err);
981	}
982	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
983	    cxgb_media_status);
984
985	if (!strcmp(p->phy.desc,	"10GBASE-CX4")) {
986		media_flags = IFM_ETHER | IFM_10G_CX4 | IFM_FDX;
987	} else if (!strcmp(p->phy.desc, "10GBASE-SR")) {
988		media_flags = IFM_ETHER | IFM_10G_SR | IFM_FDX;
989	} else if (!strcmp(p->phy.desc, "10GBASE-R")) {
990		media_flags = IFM_ETHER | IFM_10G_LR | IFM_FDX;
991	} else if (!strcmp(p->phy.desc, "10/100/1000BASE-T")) {
992		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T, 0, NULL);
993		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T | IFM_FDX,
994			    0, NULL);
995		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX,
996			    0, NULL);
997		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
998			    0, NULL);
999		ifmedia_add(&p->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
1000			    0, NULL);
1001		media_flags = 0;
1002	} else {
1003	        printf("unsupported media type %s\n", p->phy.desc);
1004		return (ENXIO);
1005	}
1006	if (media_flags) {
1007		ifmedia_add(&p->media, media_flags, 0, NULL);
1008		ifmedia_set(&p->media, media_flags);
1009	} else {
1010		ifmedia_add(&p->media, IFM_ETHER | IFM_AUTO, 0, NULL);
1011		ifmedia_set(&p->media, IFM_ETHER | IFM_AUTO);
1012	}
1013
1014
1015	snprintf(p->taskqbuf, TASKQ_NAME_LEN, "cxgb_port_taskq%d", p->port_id);
1016#ifdef TASKQUEUE_CURRENT
1017	/* Create a port for handling TX without starvation */
1018	p->tq = taskqueue_create(p->taskqbuf, M_NOWAIT,
1019	    taskqueue_thread_enqueue, &p->tq);
1020#else
1021	/* Create a port for handling TX without starvation */
1022	p->tq = taskqueue_create_fast(p->taskqbuf, M_NOWAIT,
1023	    taskqueue_thread_enqueue, &p->tq);
1024#endif
1025	/* Get the latest mac address, User can use a LAA */
1026	bcopy(IF_LLADDR(p->ifp), p->hw_addr, ETHER_ADDR_LEN);
1027	t3_sge_init_port(p);
1028#if defined(LINK_ATTACH)
1029	cxgb_link_start(p);
1030	t3_link_changed(sc, p->port_id);
1031#endif
1032	return (0);
1033}
1034
1035static int
1036cxgb_port_detach(device_t dev)
1037{
1038	struct port_info *p;
1039
1040	p = device_get_softc(dev);
1041
1042	PORT_LOCK(p);
1043	if (p->ifp->if_drv_flags & IFF_DRV_RUNNING)
1044		cxgb_stop_locked(p);
1045	PORT_UNLOCK(p);
1046
1047	if (p->tq != NULL) {
1048		taskqueue_drain(p->tq, &p->start_task);
1049		taskqueue_free(p->tq);
1050		p->tq = NULL;
1051	}
1052
1053	ether_ifdetach(p->ifp);
1054	printf("waiting for callout to stop ...");
1055	DELAY(1000000);
1056	printf("done\n");
1057	/*
1058	 * the lock may be acquired in ifdetach
1059	 */
1060	PORT_LOCK_DEINIT(p);
1061	if_free(p->ifp);
1062
1063	if (p->port_cdev != NULL)
1064		destroy_dev(p->port_cdev);
1065
1066	return (0);
1067}
1068
1069void
1070t3_fatal_err(struct adapter *sc)
1071{
1072	u_int fw_status[4];
1073
1074	if (sc->flags & FULL_INIT_DONE) {
1075		t3_sge_stop(sc);
1076		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1077		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1078		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1079		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1080		t3_intr_disable(sc);
1081	}
1082	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1083	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1084		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1085		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1086}
1087
1088int
1089t3_os_find_pci_capability(adapter_t *sc, int cap)
1090{
1091	device_t dev;
1092	struct pci_devinfo *dinfo;
1093	pcicfgregs *cfg;
1094	uint32_t status;
1095	uint8_t ptr;
1096
1097	dev = sc->dev;
1098	dinfo = device_get_ivars(dev);
1099	cfg = &dinfo->cfg;
1100
1101	status = pci_read_config(dev, PCIR_STATUS, 2);
1102	if (!(status & PCIM_STATUS_CAPPRESENT))
1103		return (0);
1104
1105	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1106	case 0:
1107	case 1:
1108		ptr = PCIR_CAP_PTR;
1109		break;
1110	case 2:
1111		ptr = PCIR_CAP_PTR_2;
1112		break;
1113	default:
1114		return (0);
1115		break;
1116	}
1117	ptr = pci_read_config(dev, ptr, 1);
1118
1119	while (ptr != 0) {
1120		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1121			return (ptr);
1122		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1123	}
1124
1125	return (0);
1126}
1127
1128int
1129t3_os_pci_save_state(struct adapter *sc)
1130{
1131	device_t dev;
1132	struct pci_devinfo *dinfo;
1133
1134	dev = sc->dev;
1135	dinfo = device_get_ivars(dev);
1136
1137	pci_cfg_save(dev, dinfo, 0);
1138	return (0);
1139}
1140
1141int
1142t3_os_pci_restore_state(struct adapter *sc)
1143{
1144	device_t dev;
1145	struct pci_devinfo *dinfo;
1146
1147	dev = sc->dev;
1148	dinfo = device_get_ivars(dev);
1149
1150	pci_cfg_restore(dev, dinfo);
1151	return (0);
1152}
1153
1154/**
1155 *	t3_os_link_changed - handle link status changes
1156 *	@adapter: the adapter associated with the link change
1157 *	@port_id: the port index whose limk status has changed
1158 *	@link_status: the new status of the link
1159 *	@speed: the new speed setting
1160 *	@duplex: the new duplex setting
1161 *	@fc: the new flow-control setting
1162 *
1163 *	This is the OS-dependent handler for link status changes.  The OS
1164 *	neutral handler takes care of most of the processing for these events,
1165 *	then calls this handler for any OS-specific processing.
1166 */
1167void
1168t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1169     int duplex, int fc)
1170{
1171	struct port_info *pi = &adapter->port[port_id];
1172	struct cmac *mac = &adapter->port[port_id].mac;
1173
1174	if (link_status) {
1175		DELAY(10);
1176		t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1177			/* Clear errors created by MAC enable */
1178			t3_set_reg_field(adapter,
1179					 A_XGM_STAT_CTRL + pi->mac.offset,
1180					 F_CLRSTATS, 1);
1181		if_link_state_change(pi->ifp, LINK_STATE_UP);
1182
1183	} else {
1184		pi->phy.ops->power_down(&pi->phy, 1);
1185		t3_mac_disable(mac, MAC_DIRECTION_RX);
1186		t3_link_start(&pi->phy, mac, &pi->link_config);
1187		t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1188		if_link_state_change(pi->ifp, LINK_STATE_DOWN);
1189	}
1190}
1191
1192/**
1193 *	t3_os_phymod_changed - handle PHY module changes
1194 *	@phy: the PHY reporting the module change
1195 *	@mod_type: new module type
1196 *
1197 *	This is the OS-dependent handler for PHY module changes.  It is
1198 *	invoked when a PHY module is removed or inserted for any OS-specific
1199 *	processing.
1200 */
1201void t3_os_phymod_changed(struct adapter *adap, int port_id)
1202{
1203	static const char *mod_str[] = {
1204		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX", "unknown"
1205	};
1206
1207	struct port_info *pi = &adap->port[port_id];
1208
1209	if (pi->phy.modtype == phy_modtype_none)
1210		device_printf(adap->dev, "PHY module unplugged\n");
1211	else {
1212		KASSERT(pi->phy.modtype < ARRAY_SIZE(mod_str),
1213		    ("invalid PHY module type %d", pi->phy.modtype));
1214		device_printf(adap->dev, "%s PHY module inserted\n",
1215		    mod_str[pi->phy.modtype]);
1216	}
1217}
1218
1219/*
1220 * Interrupt-context handler for external (PHY) interrupts.
1221 */
1222void
1223t3_os_ext_intr_handler(adapter_t *sc)
1224{
1225	if (cxgb_debug)
1226		printf("t3_os_ext_intr_handler\n");
1227	/*
1228	 * Schedule a task to handle external interrupts as they may be slow
1229	 * and we use a mutex to protect MDIO registers.  We disable PHY
1230	 * interrupts in the meantime and let the task reenable them when
1231	 * it's done.
1232	 */
1233	ADAPTER_LOCK(sc);
1234	if (sc->slow_intr_mask) {
1235		sc->slow_intr_mask &= ~F_T3DBG;
1236		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1237		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
1238	}
1239	ADAPTER_UNLOCK(sc);
1240}
1241
1242void
1243t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1244{
1245
1246	/*
1247	 * The ifnet might not be allocated before this gets called,
1248	 * as this is called early on in attach by t3_prep_adapter
1249	 * save the address off in the port structure
1250	 */
1251	if (cxgb_debug)
1252		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1253	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1254}
1255
1256/**
1257 *	link_start - enable a port
1258 *	@p: the port to enable
1259 *
1260 *	Performs the MAC and PHY actions needed to enable a port.
1261 */
1262static void
1263cxgb_link_start(struct port_info *p)
1264{
1265	struct ifnet *ifp;
1266	struct t3_rx_mode rm;
1267	struct cmac *mac = &p->mac;
1268	int mtu, hwtagging;
1269
1270	ifp = p->ifp;
1271
1272	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1273
1274	mtu = ifp->if_mtu;
1275	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1276		mtu += ETHER_VLAN_ENCAP_LEN;
1277
1278	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1279
1280	t3_init_rx_mode(&rm, p);
1281	if (!mac->multiport)
1282		t3_mac_reset(mac);
1283	t3_mac_set_mtu(mac, mtu);
1284	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1285	t3_mac_set_address(mac, 0, p->hw_addr);
1286	t3_mac_set_rx_mode(mac, &rm);
1287	t3_link_start(&p->phy, mac, &p->link_config);
1288	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1289}
1290
1291
1292static int
1293await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1294			      unsigned long n)
1295{
1296	int attempts = 5;
1297
1298	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1299		if (!--attempts)
1300			return (ETIMEDOUT);
1301		t3_os_sleep(10);
1302	}
1303	return 0;
1304}
1305
1306static int
1307init_tp_parity(struct adapter *adap)
1308{
1309	int i;
1310	struct mbuf *m;
1311	struct cpl_set_tcb_field *greq;
1312	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1313
1314	t3_tp_set_offload_mode(adap, 1);
1315
1316	for (i = 0; i < 16; i++) {
1317		struct cpl_smt_write_req *req;
1318
1319		m = m_gethdr(M_WAITOK, MT_DATA);
1320		req = mtod(m, struct cpl_smt_write_req *);
1321		m->m_len = m->m_pkthdr.len = sizeof(*req);
1322		memset(req, 0, sizeof(*req));
1323		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1324		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1325		req->iff = i;
1326		t3_mgmt_tx(adap, m);
1327	}
1328
1329	for (i = 0; i < 2048; i++) {
1330		struct cpl_l2t_write_req *req;
1331
1332		m = m_gethdr(M_WAITOK, MT_DATA);
1333		req = mtod(m, struct cpl_l2t_write_req *);
1334		m->m_len = m->m_pkthdr.len = sizeof(*req);
1335		memset(req, 0, sizeof(*req));
1336		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1337		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1338		req->params = htonl(V_L2T_W_IDX(i));
1339		t3_mgmt_tx(adap, m);
1340	}
1341
1342	for (i = 0; i < 2048; i++) {
1343		struct cpl_rte_write_req *req;
1344
1345		m = m_gethdr(M_WAITOK, MT_DATA);
1346		req = mtod(m, struct cpl_rte_write_req *);
1347		m->m_len = m->m_pkthdr.len = sizeof(*req);
1348		memset(req, 0, sizeof(*req));
1349		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1350		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1351		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1352		t3_mgmt_tx(adap, m);
1353	}
1354
1355	m = m_gethdr(M_WAITOK, MT_DATA);
1356	greq = mtod(m, struct cpl_set_tcb_field *);
1357	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1358	memset(greq, 0, sizeof(*greq));
1359	greq->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1360	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1361	greq->mask = htobe64(1);
1362	t3_mgmt_tx(adap, m);
1363
1364	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1365	t3_tp_set_offload_mode(adap, 0);
1366	return (i);
1367}
1368
1369/**
1370 *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1371 *	@adap: the adapter
1372 *
1373 *	Sets up RSS to distribute packets to multiple receive queues.  We
1374 *	configure the RSS CPU lookup table to distribute to the number of HW
1375 *	receive queues, and the response queue lookup table to narrow that
1376 *	down to the response queues actually configured for each port.
1377 *	We always configure the RSS mapping for two ports since the mapping
1378 *	table has plenty of entries.
1379 */
1380static void
1381setup_rss(adapter_t *adap)
1382{
1383	int i;
1384	u_int nq[2];
1385	uint8_t cpus[SGE_QSETS + 1];
1386	uint16_t rspq_map[RSS_TABLE_SIZE];
1387
1388	for (i = 0; i < SGE_QSETS; ++i)
1389		cpus[i] = i;
1390	cpus[SGE_QSETS] = 0xff;
1391
1392	nq[0] = nq[1] = 0;
1393	for_each_port(adap, i) {
1394		const struct port_info *pi = adap2pinfo(adap, i);
1395
1396		nq[pi->tx_chan] += pi->nqsets;
1397	}
1398	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1399		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1400		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1401	}
1402	/* Calculate the reverse RSS map table */
1403	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1404		if (adap->rrss_map[rspq_map[i]] == 0xff)
1405			adap->rrss_map[rspq_map[i]] = i;
1406
1407	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1408		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1409	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1410	              cpus, rspq_map);
1411
1412}
1413
1414/*
1415 * Sends an mbuf to an offload queue driver
1416 * after dealing with any active network taps.
1417 */
1418static inline int
1419offload_tx(struct t3cdev *tdev, struct mbuf *m)
1420{
1421	int ret;
1422
1423	ret = t3_offload_tx(tdev, m);
1424	return (ret);
1425}
1426
1427static int
1428write_smt_entry(struct adapter *adapter, int idx)
1429{
1430	struct port_info *pi = &adapter->port[idx];
1431	struct cpl_smt_write_req *req;
1432	struct mbuf *m;
1433
1434	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1435		return (ENOMEM);
1436
1437	req = mtod(m, struct cpl_smt_write_req *);
1438	m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1439
1440	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1441	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1442	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1443	req->iff = idx;
1444	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1445	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1446
1447	m_set_priority(m, 1);
1448
1449	offload_tx(&adapter->tdev, m);
1450
1451	return (0);
1452}
1453
1454static int
1455init_smt(struct adapter *adapter)
1456{
1457	int i;
1458
1459	for_each_port(adapter, i)
1460		write_smt_entry(adapter, i);
1461	return 0;
1462}
1463
1464static void
1465init_port_mtus(adapter_t *adapter)
1466{
1467	unsigned int mtus = adapter->port[0].ifp->if_mtu;
1468
1469	if (adapter->port[1].ifp)
1470		mtus |= adapter->port[1].ifp->if_mtu << 16;
1471	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1472}
1473
1474static void
1475send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1476			      int hi, int port)
1477{
1478	struct mbuf *m;
1479	struct mngt_pktsched_wr *req;
1480
1481	m = m_gethdr(M_DONTWAIT, MT_DATA);
1482	if (m) {
1483		req = mtod(m, struct mngt_pktsched_wr *);
1484		req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1485		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1486		req->sched = sched;
1487		req->idx = qidx;
1488		req->min = lo;
1489		req->max = hi;
1490		req->binding = port;
1491		m->m_len = m->m_pkthdr.len = sizeof(*req);
1492		t3_mgmt_tx(adap, m);
1493	}
1494}
1495
1496static void
1497bind_qsets(adapter_t *sc)
1498{
1499	int i, j;
1500
1501	cxgb_pcpu_startup_threads(sc);
1502	for (i = 0; i < (sc)->params.nports; ++i) {
1503		const struct port_info *pi = adap2pinfo(sc, i);
1504
1505		for (j = 0; j < pi->nqsets; ++j) {
1506			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1507					  -1, pi->tx_chan);
1508
1509		}
1510	}
1511}
1512
1513static void
1514update_tpeeprom(struct adapter *adap)
1515{
1516#ifdef FIRMWARE_LATEST
1517	const struct firmware *tpeeprom;
1518#else
1519	struct firmware *tpeeprom;
1520#endif
1521
1522	uint32_t version;
1523	unsigned int major, minor;
1524	int ret, len;
1525	char rev;
1526
1527	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1528
1529	major = G_TP_VERSION_MAJOR(version);
1530	minor = G_TP_VERSION_MINOR(version);
1531	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1532		return;
1533
1534	rev = t3rev2char(adap);
1535
1536	tpeeprom = firmware_get(TPEEPROM_NAME);
1537	if (tpeeprom == NULL) {
1538		device_printf(adap->dev, "could not load TP EEPROM: unable to load %s\n",
1539		    TPEEPROM_NAME);
1540		return;
1541	}
1542
1543	len = tpeeprom->datasize - 4;
1544
1545	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1546	if (ret)
1547		goto release_tpeeprom;
1548
1549	if (len != TP_SRAM_LEN) {
1550		device_printf(adap->dev, "%s length is wrong len=%d expected=%d\n", TPEEPROM_NAME, len, TP_SRAM_LEN);
1551		return;
1552	}
1553
1554	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1555	    TP_SRAM_OFFSET);
1556
1557	if (!ret) {
1558		device_printf(adap->dev,
1559			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1560			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1561	} else
1562		device_printf(adap->dev, "Protocol SRAM image update in EEPROM failed\n");
1563
1564release_tpeeprom:
1565	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1566
1567	return;
1568}
1569
1570static int
1571update_tpsram(struct adapter *adap)
1572{
1573#ifdef FIRMWARE_LATEST
1574	const struct firmware *tpsram;
1575#else
1576	struct firmware *tpsram;
1577#endif
1578	int ret;
1579	char rev;
1580
1581	rev = t3rev2char(adap);
1582	if (!rev)
1583		return 0;
1584
1585	update_tpeeprom(adap);
1586
1587	tpsram = firmware_get(TPSRAM_NAME);
1588	if (tpsram == NULL){
1589		device_printf(adap->dev, "could not load TP SRAM\n");
1590		return (EINVAL);
1591	} else
1592		device_printf(adap->dev, "updating TP SRAM\n");
1593
1594	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1595	if (ret)
1596		goto release_tpsram;
1597
1598	ret = t3_set_proto_sram(adap, tpsram->data);
1599	if (ret)
1600		device_printf(adap->dev, "loading protocol SRAM failed\n");
1601
1602release_tpsram:
1603	firmware_put(tpsram, FIRMWARE_UNLOAD);
1604
1605	return ret;
1606}
1607
1608/**
1609 *	cxgb_up - enable the adapter
1610 *	@adap: adapter being enabled
1611 *
1612 *	Called when the first port is enabled, this function performs the
1613 *	actions necessary to make an adapter operational, such as completing
1614 *	the initialization of HW modules, and enabling interrupts.
1615 *
1616 */
1617static int
1618cxgb_up(struct adapter *sc)
1619{
1620	int err = 0;
1621
1622	if ((sc->flags & FULL_INIT_DONE) == 0) {
1623
1624		if ((sc->flags & FW_UPTODATE) == 0)
1625			if ((err = upgrade_fw(sc)))
1626				goto out;
1627		if ((sc->flags & TPS_UPTODATE) == 0)
1628			if ((err = update_tpsram(sc)))
1629				goto out;
1630		err = t3_init_hw(sc, 0);
1631		if (err)
1632			goto out;
1633
1634		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1635		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1636
1637		err = setup_sge_qsets(sc);
1638		if (err)
1639			goto out;
1640
1641		setup_rss(sc);
1642		t3_add_configured_sysctls(sc);
1643		sc->flags |= FULL_INIT_DONE;
1644	}
1645
1646	t3_intr_clear(sc);
1647
1648	/* If it's MSI or INTx, allocate a single interrupt for everything */
1649	if ((sc->flags & USING_MSIX) == 0) {
1650		if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
1651		   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
1652			device_printf(sc->dev, "Cannot allocate interrupt rid=%d\n",
1653			    sc->irq_rid);
1654			err = EINVAL;
1655			goto out;
1656		}
1657		device_printf(sc->dev, "allocated irq_res=%p\n", sc->irq_res);
1658
1659		if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
1660#ifdef INTR_FILTERS
1661			NULL,
1662#endif
1663			sc->cxgb_intr, sc, &sc->intr_tag)) {
1664			device_printf(sc->dev, "Cannot set up interrupt\n");
1665			err = EINVAL;
1666			goto irq_err;
1667		}
1668	} else {
1669		cxgb_setup_msix(sc, sc->msi_count);
1670	}
1671
1672	t3_sge_start(sc);
1673	t3_intr_enable(sc);
1674
1675	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1676	    is_offload(sc) && init_tp_parity(sc) == 0)
1677		sc->flags |= TP_PARITY_INIT;
1678
1679	if (sc->flags & TP_PARITY_INIT) {
1680		t3_write_reg(sc, A_TP_INT_CAUSE,
1681				F_CMCACHEPERR | F_ARPLUTPERR);
1682		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1683	}
1684
1685
1686	if (!(sc->flags & QUEUES_BOUND)) {
1687		bind_qsets(sc);
1688		sc->flags |= QUEUES_BOUND;
1689	}
1690out:
1691	return (err);
1692irq_err:
1693	CH_ERR(sc, "request_irq failed, err %d\n", err);
1694	goto out;
1695}
1696
1697
1698/*
1699 * Release resources when all the ports and offloading have been stopped.
1700 */
1701static void
1702cxgb_down_locked(struct adapter *sc)
1703{
1704
1705	t3_sge_stop(sc);
1706	t3_intr_disable(sc);
1707
1708	if (sc->intr_tag != NULL) {
1709		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
1710		sc->intr_tag = NULL;
1711	}
1712	if (sc->irq_res != NULL) {
1713		device_printf(sc->dev, "de-allocating interrupt irq_rid=%d irq_res=%p\n",
1714		    sc->irq_rid, sc->irq_res);
1715		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
1716		    sc->irq_res);
1717		sc->irq_res = NULL;
1718	}
1719
1720	if (sc->flags & USING_MSIX)
1721		cxgb_teardown_msix(sc);
1722
1723	callout_stop(&sc->cxgb_tick_ch);
1724	callout_stop(&sc->sge_timer_ch);
1725	callout_drain(&sc->cxgb_tick_ch);
1726	callout_drain(&sc->sge_timer_ch);
1727
1728	if (sc->tq != NULL) {
1729		printf("draining slow intr\n");
1730
1731		taskqueue_drain(sc->tq, &sc->slow_intr_task);
1732			printf("draining ext intr\n");
1733		taskqueue_drain(sc->tq, &sc->ext_intr_task);
1734		printf("draining tick task\n");
1735		taskqueue_drain(sc->tq, &sc->tick_task);
1736	}
1737	ADAPTER_UNLOCK(sc);
1738}
1739
1740static int
1741offload_open(struct port_info *pi)
1742{
1743	struct adapter *adapter = pi->adapter;
1744	struct t3cdev *tdev = &adapter->tdev;
1745#ifdef notyet
1746	    T3CDEV(pi->ifp);
1747#endif
1748	int adap_up = adapter->open_device_map & PORT_MASK;
1749	int err = 0;
1750
1751	CTR1(KTR_CXGB, "device_map=0x%x", adapter->open_device_map);
1752	if (atomic_cmpset_int(&adapter->open_device_map,
1753		(adapter->open_device_map & ~(1<<OFFLOAD_DEVMAP_BIT)),
1754		(adapter->open_device_map | (1<<OFFLOAD_DEVMAP_BIT))) == 0)
1755		return (0);
1756
1757
1758	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1759		printf("offload_open: DEVMAP_BIT did not get set 0x%x\n", adapter->open_device_map);
1760	ADAPTER_LOCK(pi->adapter);
1761	if (!adap_up)
1762		err = cxgb_up(adapter);
1763	ADAPTER_UNLOCK(pi->adapter);
1764	if (err)
1765		return (err);
1766
1767	t3_tp_set_offload_mode(adapter, 1);
1768	tdev->lldev = pi->ifp;
1769
1770	init_port_mtus(adapter);
1771	t3_load_mtus(adapter, adapter->params.mtus, adapter->params.a_wnd,
1772		     adapter->params.b_wnd,
1773		     adapter->params.rev == 0 ?
1774		       adapter->port[0].ifp->if_mtu : 0xffff);
1775	init_smt(adapter);
1776
1777	/* Call back all registered clients */
1778	cxgb_add_clients(tdev);
1779
1780
1781	/* restore them in case the offload module has changed them */
1782	if (err) {
1783		t3_tp_set_offload_mode(adapter, 0);
1784		clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1785		cxgb_set_dummy_ops(tdev);
1786	}
1787	return (err);
1788}
1789
1790static int
1791offload_close(struct t3cdev *tdev)
1792{
1793	struct adapter *adapter = tdev2adap(tdev);
1794
1795	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1796		return (0);
1797
1798	/* Call back all registered clients */
1799	cxgb_remove_clients(tdev);
1800
1801	tdev->lldev = NULL;
1802	cxgb_set_dummy_ops(tdev);
1803	t3_tp_set_offload_mode(adapter, 0);
1804	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1805
1806	ADAPTER_LOCK(adapter);
1807	if (!adapter->open_device_map)
1808		cxgb_down_locked(adapter);
1809	else
1810		ADAPTER_UNLOCK(adapter);
1811	return (0);
1812}
1813
1814
1815static void
1816cxgb_init(void *arg)
1817{
1818	struct port_info *p = arg;
1819
1820	PORT_LOCK(p);
1821	cxgb_init_locked(p);
1822	PORT_UNLOCK(p);
1823}
1824
1825static void
1826cxgb_init_locked(struct port_info *p)
1827{
1828	struct ifnet *ifp;
1829	adapter_t *sc = p->adapter;
1830	int err;
1831
1832	PORT_LOCK_ASSERT_OWNED(p);
1833	ifp = p->ifp;
1834
1835	ADAPTER_LOCK(p->adapter);
1836	if ((sc->open_device_map == 0) && (err = cxgb_up(sc))) {
1837		ADAPTER_UNLOCK(p->adapter);
1838		cxgb_stop_locked(p);
1839		return;
1840	}
1841	if (p->adapter->open_device_map == 0) {
1842		t3_intr_clear(sc);
1843	}
1844	setbit(&p->adapter->open_device_map, p->port_id);
1845	ADAPTER_UNLOCK(p->adapter);
1846
1847	if (is_offload(sc) && !ofld_disable) {
1848		err = offload_open(p);
1849		if (err)
1850			log(LOG_WARNING,
1851			    "Could not initialize offload capabilities\n");
1852	}
1853#if !defined(LINK_ATTACH)
1854	cxgb_link_start(p);
1855	t3_link_changed(sc, p->port_id);
1856#endif
1857	ifp->if_baudrate = p->link_config.speed * 1000000;
1858
1859	device_printf(sc->dev, "enabling interrupts on port=%d\n", p->port_id);
1860	t3_port_intr_enable(sc, p->port_id);
1861
1862	t3_sge_reset_adapter(sc);
1863
1864	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1865	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1866}
1867
1868static void
1869cxgb_set_rxmode(struct port_info *p)
1870{
1871	struct t3_rx_mode rm;
1872	struct cmac *mac = &p->mac;
1873
1874	t3_init_rx_mode(&rm, p);
1875	mtx_lock(&p->adapter->mdio_lock);
1876	t3_mac_set_rx_mode(mac, &rm);
1877	mtx_unlock(&p->adapter->mdio_lock);
1878}
1879
1880static void
1881cxgb_stop_locked(struct port_info *pi)
1882{
1883	struct ifnet *ifp;
1884
1885	PORT_LOCK_ASSERT_OWNED(pi);
1886	ADAPTER_LOCK_ASSERT_NOTOWNED(pi->adapter);
1887
1888	ifp = pi->ifp;
1889	t3_port_intr_disable(pi->adapter, pi->port_id);
1890	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1891
1892	/* disable pause frames */
1893	t3_set_reg_field(pi->adapter, A_XGM_TX_CFG + pi->mac.offset,
1894			 F_TXPAUSEEN, 0);
1895
1896	/* Reset RX FIFO HWM */
1897        t3_set_reg_field(pi->adapter, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1898			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1899
1900
1901	ADAPTER_LOCK(pi->adapter);
1902	clrbit(&pi->adapter->open_device_map, pi->port_id);
1903
1904	if (pi->adapter->open_device_map == 0) {
1905		cxgb_down_locked(pi->adapter);
1906	} else
1907		ADAPTER_UNLOCK(pi->adapter);
1908
1909#if !defined(LINK_ATTACH)
1910	DELAY(100);
1911
1912	/* Wait for TXFIFO empty */
1913	t3_wait_op_done(pi->adapter, A_XGM_TXFIFO_CFG + pi->mac.offset,
1914			F_TXFIFO_EMPTY, 1, 20, 5);
1915
1916	DELAY(100);
1917	t3_mac_disable(&pi->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
1918
1919	pi->phy.ops->power_down(&pi->phy, 1);
1920#endif
1921
1922}
1923
1924static int
1925cxgb_set_mtu(struct port_info *p, int mtu)
1926{
1927	struct ifnet *ifp = p->ifp;
1928	int error = 0;
1929
1930	if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO))
1931		error = EINVAL;
1932	else if (ifp->if_mtu != mtu) {
1933		PORT_LOCK(p);
1934		ifp->if_mtu = mtu;
1935		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1936			cxgb_stop_locked(p);
1937			cxgb_init_locked(p);
1938		}
1939		PORT_UNLOCK(p);
1940	}
1941	return (error);
1942}
1943
1944/*
1945 * Mark lro enabled or disabled in all qsets for this port
1946 */
1947static int
1948cxgb_set_lro(struct port_info *p, int enabled)
1949{
1950	int i;
1951	struct adapter *adp = p->adapter;
1952	struct sge_qset *q;
1953
1954	PORT_LOCK_ASSERT_OWNED(p);
1955	for (i = 0; i < p->nqsets; i++) {
1956		q = &adp->sge.qs[p->first_qset + i];
1957		q->lro.enabled = (enabled != 0);
1958	}
1959	return (0);
1960}
1961
1962static int
1963cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1964{
1965	struct port_info *p = ifp->if_softc;
1966	struct ifaddr *ifa = (struct ifaddr *)data;
1967	struct ifreq *ifr = (struct ifreq *)data;
1968	int flags, error = 0, reinit = 0;
1969	uint32_t mask;
1970
1971	/*
1972	 * XXX need to check that we aren't in the middle of an unload
1973	 */
1974	switch (command) {
1975	case SIOCSIFMTU:
1976		error = cxgb_set_mtu(p, ifr->ifr_mtu);
1977		break;
1978	case SIOCSIFADDR:
1979		if (ifa->ifa_addr->sa_family == AF_INET) {
1980			ifp->if_flags |= IFF_UP;
1981			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1982				PORT_LOCK(p);
1983				cxgb_init_locked(p);
1984				PORT_UNLOCK(p);
1985			}
1986			arp_ifinit(ifp, ifa);
1987		} else
1988			error = ether_ioctl(ifp, command, data);
1989		break;
1990	case SIOCSIFFLAGS:
1991		PORT_LOCK(p);
1992		if (ifp->if_flags & IFF_UP) {
1993			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1994				flags = p->if_flags;
1995				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1996				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI))
1997					cxgb_set_rxmode(p);
1998			} else
1999				cxgb_init_locked(p);
2000			p->if_flags = ifp->if_flags;
2001		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2002			cxgb_stop_locked(p);
2003
2004		PORT_UNLOCK(p);
2005		break;
2006	case SIOCADDMULTI:
2007	case SIOCDELMULTI:
2008		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2009			cxgb_set_rxmode(p);
2010		}
2011		break;
2012	case SIOCSIFMEDIA:
2013	case SIOCGIFMEDIA:
2014		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2015		break;
2016	case SIOCSIFCAP:
2017		PORT_LOCK(p);
2018		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2019		if (mask & IFCAP_TXCSUM) {
2020			if (IFCAP_TXCSUM & ifp->if_capenable) {
2021				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
2022				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
2023				    | CSUM_IP | CSUM_TSO);
2024			} else {
2025				ifp->if_capenable |= IFCAP_TXCSUM;
2026				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP
2027				    | CSUM_IP);
2028			}
2029		}
2030		if (mask & IFCAP_RXCSUM) {
2031			ifp->if_capenable ^= IFCAP_RXCSUM;
2032		}
2033		if (mask & IFCAP_TSO4) {
2034			if (IFCAP_TSO4 & ifp->if_capenable) {
2035				ifp->if_capenable &= ~IFCAP_TSO4;
2036				ifp->if_hwassist &= ~CSUM_TSO;
2037			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
2038				ifp->if_capenable |= IFCAP_TSO4;
2039				ifp->if_hwassist |= CSUM_TSO;
2040			} else {
2041				if (cxgb_debug)
2042					printf("cxgb requires tx checksum offload"
2043					    " be enabled to use TSO\n");
2044				error = EINVAL;
2045			}
2046		}
2047		if (mask & IFCAP_LRO) {
2048			ifp->if_capenable ^= IFCAP_LRO;
2049
2050			/* Safe to do this even if cxgb_up not called yet */
2051			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2052		}
2053		if (mask & IFCAP_VLAN_HWTAGGING) {
2054			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2055			reinit = ifp->if_drv_flags & IFF_DRV_RUNNING;
2056		}
2057		if (mask & IFCAP_VLAN_MTU) {
2058			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2059			reinit = ifp->if_drv_flags & IFF_DRV_RUNNING;
2060		}
2061		if (mask & IFCAP_VLAN_HWCSUM) {
2062			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2063		}
2064		if (reinit) {
2065			cxgb_stop_locked(p);
2066			cxgb_init_locked(p);
2067		}
2068		PORT_UNLOCK(p);
2069
2070#ifdef VLAN_CAPABILITIES
2071		VLAN_CAPABILITIES(ifp);
2072#endif
2073		break;
2074	default:
2075		error = ether_ioctl(ifp, command, data);
2076		break;
2077	}
2078	return (error);
2079}
2080
2081static int
2082cxgb_media_change(struct ifnet *ifp)
2083{
2084	if_printf(ifp, "media change not supported\n");
2085	return (ENXIO);
2086}
2087
2088static void
2089cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2090{
2091	struct port_info *p = ifp->if_softc;
2092
2093	ifmr->ifm_status = IFM_AVALID;
2094	ifmr->ifm_active = IFM_ETHER;
2095
2096	if (!p->link_config.link_ok)
2097		return;
2098
2099	ifmr->ifm_status |= IFM_ACTIVE;
2100
2101	switch (p->link_config.speed) {
2102	case 10:
2103		ifmr->ifm_active |= IFM_10_T;
2104		break;
2105	case 100:
2106		ifmr->ifm_active |= IFM_100_TX;
2107			break;
2108	case 1000:
2109		ifmr->ifm_active |= IFM_1000_T;
2110		break;
2111	}
2112
2113	if (p->link_config.duplex)
2114		ifmr->ifm_active |= IFM_FDX;
2115	else
2116		ifmr->ifm_active |= IFM_HDX;
2117}
2118
2119static void
2120cxgb_async_intr(void *data)
2121{
2122	adapter_t *sc = data;
2123
2124	if (cxgb_debug)
2125		device_printf(sc->dev, "cxgb_async_intr\n");
2126	/*
2127	 * May need to sleep - defer to taskqueue
2128	 */
2129	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2130}
2131
2132static void
2133cxgb_ext_intr_handler(void *arg, int count)
2134{
2135	adapter_t *sc = (adapter_t *)arg;
2136
2137	if (cxgb_debug)
2138		printf("cxgb_ext_intr_handler\n");
2139
2140	t3_phy_intr_handler(sc);
2141
2142	/* Now reenable external interrupts */
2143	ADAPTER_LOCK(sc);
2144	if (sc->slow_intr_mask) {
2145		sc->slow_intr_mask |= F_T3DBG;
2146		t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
2147		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
2148	}
2149	ADAPTER_UNLOCK(sc);
2150}
2151
2152static void
2153check_link_status(adapter_t *sc)
2154{
2155	int i;
2156
2157	for (i = 0; i < (sc)->params.nports; ++i) {
2158		struct port_info *p = &sc->port[i];
2159
2160		if (!(p->phy.caps & SUPPORTED_IRQ))
2161			t3_link_changed(sc, i);
2162		p->ifp->if_baudrate = p->link_config.speed * 1000000;
2163	}
2164}
2165
2166static void
2167check_t3b2_mac(struct adapter *adapter)
2168{
2169	int i;
2170
2171	if(adapter->flags & CXGB_SHUTDOWN)
2172		return;
2173
2174	for_each_port(adapter, i) {
2175		struct port_info *p = &adapter->port[i];
2176		struct ifnet *ifp = p->ifp;
2177		int status;
2178
2179		if(adapter->flags & CXGB_SHUTDOWN)
2180			return;
2181
2182		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2183			continue;
2184
2185		status = 0;
2186		PORT_LOCK(p);
2187		if ((ifp->if_drv_flags & IFF_DRV_RUNNING))
2188			status = t3b2_mac_watchdog_task(&p->mac);
2189		if (status == 1)
2190			p->mac.stats.num_toggled++;
2191		else if (status == 2) {
2192			struct cmac *mac = &p->mac;
2193			int mtu = ifp->if_mtu;
2194
2195			if (ifp->if_capenable & IFCAP_VLAN_MTU)
2196				mtu += ETHER_VLAN_ENCAP_LEN;
2197			t3_mac_set_mtu(mac, mtu);
2198			t3_mac_set_address(mac, 0, p->hw_addr);
2199			cxgb_set_rxmode(p);
2200			t3_link_start(&p->phy, mac, &p->link_config);
2201			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2202			t3_port_intr_enable(adapter, p->port_id);
2203			p->mac.stats.num_resets++;
2204		}
2205		PORT_UNLOCK(p);
2206	}
2207}
2208
2209static void
2210cxgb_tick(void *arg)
2211{
2212	adapter_t *sc = (adapter_t *)arg;
2213
2214	if(sc->flags & CXGB_SHUTDOWN)
2215		return;
2216
2217	taskqueue_enqueue(sc->tq, &sc->tick_task);
2218	callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
2219}
2220
2221static void
2222cxgb_tick_handler(void *arg, int count)
2223{
2224	adapter_t *sc = (adapter_t *)arg;
2225	const struct adapter_params *p = &sc->params;
2226	int i;
2227
2228	if(sc->flags & CXGB_SHUTDOWN)
2229		return;
2230
2231	ADAPTER_LOCK(sc);
2232	if (p->linkpoll_period)
2233		check_link_status(sc);
2234
2235	sc->check_task_cnt++;
2236
2237	/*
2238	 * adapter lock can currently only be acquired after the
2239	 * port lock
2240	 */
2241	ADAPTER_UNLOCK(sc);
2242
2243	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2244		check_t3b2_mac(sc);
2245
2246	/* Update MAC stats if it's time to do so */
2247	if (!p->linkpoll_period ||
2248	    (sc->check_task_cnt * p->linkpoll_period) / 10 >=
2249	    p->stats_update_period) {
2250		for_each_port(sc, i) {
2251			struct port_info *port = &sc->port[i];
2252			PORT_LOCK(port);
2253			t3_mac_update_stats(&port->mac);
2254			PORT_UNLOCK(port);
2255		}
2256		sc->check_task_cnt = 0;
2257	}
2258}
2259
2260static void
2261touch_bars(device_t dev)
2262{
2263	/*
2264	 * Don't enable yet
2265	 */
2266#if !defined(__LP64__) && 0
2267	u32 v;
2268
2269	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2270	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2271	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2272	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2273	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2274	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2275#endif
2276}
2277
2278static int
2279set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2280{
2281	uint8_t *buf;
2282	int err = 0;
2283	u32 aligned_offset, aligned_len, *p;
2284	struct adapter *adapter = pi->adapter;
2285
2286
2287	aligned_offset = offset & ~3;
2288	aligned_len = (len + (offset & 3) + 3) & ~3;
2289
2290	if (aligned_offset != offset || aligned_len != len) {
2291		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2292		if (!buf)
2293			return (ENOMEM);
2294		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2295		if (!err && aligned_len > 4)
2296			err = t3_seeprom_read(adapter,
2297					      aligned_offset + aligned_len - 4,
2298					      (u32 *)&buf[aligned_len - 4]);
2299		if (err)
2300			goto out;
2301		memcpy(buf + (offset & 3), data, len);
2302	} else
2303		buf = (uint8_t *)(uintptr_t)data;
2304
2305	err = t3_seeprom_wp(adapter, 0);
2306	if (err)
2307		goto out;
2308
2309	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2310		err = t3_seeprom_write(adapter, aligned_offset, *p);
2311		aligned_offset += 4;
2312	}
2313
2314	if (!err)
2315		err = t3_seeprom_wp(adapter, 1);
2316out:
2317	if (buf != data)
2318		free(buf, M_DEVBUF);
2319	return err;
2320}
2321
2322
2323static int
2324in_range(int val, int lo, int hi)
2325{
2326	return val < 0 || (val <= hi && val >= lo);
2327}
2328
2329static int
2330cxgb_extension_open(struct cdev *dev, int flags, int fmp, d_thread_t *td)
2331{
2332       return (0);
2333}
2334
2335static int
2336cxgb_extension_close(struct cdev *dev, int flags, int fmt, d_thread_t *td)
2337{
2338       return (0);
2339}
2340
2341static int
2342cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2343    int fflag, struct thread *td)
2344{
2345	int mmd, error = 0;
2346	struct port_info *pi = dev->si_drv1;
2347	adapter_t *sc = pi->adapter;
2348
2349#ifdef PRIV_SUPPORTED
2350	if (priv_check(td, PRIV_DRIVER)) {
2351		if (cxgb_debug)
2352			printf("user does not have access to privileged ioctls\n");
2353		return (EPERM);
2354	}
2355#else
2356	if (suser(td)) {
2357		if (cxgb_debug)
2358			printf("user does not have access to privileged ioctls\n");
2359		return (EPERM);
2360	}
2361#endif
2362
2363	switch (cmd) {
2364	case SIOCGMIIREG: {
2365		uint32_t val;
2366		struct cphy *phy = &pi->phy;
2367		struct mii_data *mid = (struct mii_data *)data;
2368
2369		if (!phy->mdio_read)
2370			return (EOPNOTSUPP);
2371		if (is_10G(sc)) {
2372			mmd = mid->phy_id >> 8;
2373			if (!mmd)
2374				mmd = MDIO_DEV_PCS;
2375			else if (mmd > MDIO_DEV_XGXS)
2376				return (EINVAL);
2377
2378			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2379					     mid->reg_num, &val);
2380		} else
2381		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2382					     mid->reg_num & 0x1f, &val);
2383		if (error == 0)
2384			mid->val_out = val;
2385		break;
2386	}
2387	case SIOCSMIIREG: {
2388		struct cphy *phy = &pi->phy;
2389		struct mii_data *mid = (struct mii_data *)data;
2390
2391		if (!phy->mdio_write)
2392			return (EOPNOTSUPP);
2393		if (is_10G(sc)) {
2394			mmd = mid->phy_id >> 8;
2395			if (!mmd)
2396				mmd = MDIO_DEV_PCS;
2397			else if (mmd > MDIO_DEV_XGXS)
2398				return (EINVAL);
2399
2400			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2401					      mmd, mid->reg_num, mid->val_in);
2402		} else
2403			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2404					      mid->reg_num & 0x1f,
2405					      mid->val_in);
2406		break;
2407	}
2408	case CHELSIO_SETREG: {
2409		struct ch_reg *edata = (struct ch_reg *)data;
2410		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2411			return (EFAULT);
2412		t3_write_reg(sc, edata->addr, edata->val);
2413		break;
2414	}
2415	case CHELSIO_GETREG: {
2416		struct ch_reg *edata = (struct ch_reg *)data;
2417		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2418			return (EFAULT);
2419		edata->val = t3_read_reg(sc, edata->addr);
2420		break;
2421	}
2422	case CHELSIO_GET_SGE_CONTEXT: {
2423		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2424		mtx_lock_spin(&sc->sge.reg_lock);
2425		switch (ecntxt->cntxt_type) {
2426		case CNTXT_TYPE_EGRESS:
2427			error = t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2428			    ecntxt->data);
2429			break;
2430		case CNTXT_TYPE_FL:
2431			error = t3_sge_read_fl(sc, ecntxt->cntxt_id,
2432			    ecntxt->data);
2433			break;
2434		case CNTXT_TYPE_RSP:
2435			error = t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2436			    ecntxt->data);
2437			break;
2438		case CNTXT_TYPE_CQ:
2439			error = t3_sge_read_cq(sc, ecntxt->cntxt_id,
2440			    ecntxt->data);
2441			break;
2442		default:
2443			error = EINVAL;
2444			break;
2445		}
2446		mtx_unlock_spin(&sc->sge.reg_lock);
2447		break;
2448	}
2449	case CHELSIO_GET_SGE_DESC: {
2450		struct ch_desc *edesc = (struct ch_desc *)data;
2451		int ret;
2452		if (edesc->queue_num >= SGE_QSETS * 6)
2453			return (EINVAL);
2454		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2455		    edesc->queue_num % 6, edesc->idx, edesc->data);
2456		if (ret < 0)
2457			return (EINVAL);
2458		edesc->size = ret;
2459		break;
2460	}
2461	case CHELSIO_SET_QSET_PARAMS: {
2462		struct qset_params *q;
2463		struct ch_qset_params *t = (struct ch_qset_params *)data;
2464		int i;
2465
2466		if (t->qset_idx >= SGE_QSETS)
2467			return (EINVAL);
2468		if (!in_range(t->intr_lat, 0, M_NEWTIMER) ||
2469		    !in_range(t->cong_thres, 0, 255) ||
2470		    !in_range(t->txq_size[0], MIN_TXQ_ENTRIES,
2471			      MAX_TXQ_ENTRIES) ||
2472		    !in_range(t->txq_size[1], MIN_TXQ_ENTRIES,
2473			      MAX_TXQ_ENTRIES) ||
2474		    !in_range(t->txq_size[2], MIN_CTRL_TXQ_ENTRIES,
2475			      MAX_CTRL_TXQ_ENTRIES) ||
2476		    !in_range(t->fl_size[0], MIN_FL_ENTRIES, MAX_RX_BUFFERS) ||
2477		    !in_range(t->fl_size[1], MIN_FL_ENTRIES,
2478			      MAX_RX_JUMBO_BUFFERS) ||
2479		    !in_range(t->rspq_size, MIN_RSPQ_ENTRIES, MAX_RSPQ_ENTRIES))
2480			return (EINVAL);
2481
2482		if ((sc->flags & FULL_INIT_DONE) && t->lro > 0)
2483			for_each_port(sc, i) {
2484				pi = adap2pinfo(sc, i);
2485				if (t->qset_idx >= pi->first_qset &&
2486				    t->qset_idx < pi->first_qset + pi->nqsets
2487#if 0
2488					&& !pi->rx_csum_offload
2489#endif
2490					)
2491					return -EINVAL;
2492			}
2493		if ((sc->flags & FULL_INIT_DONE) &&
2494		    (t->rspq_size >= 0 || t->fl_size[0] >= 0 ||
2495		     t->fl_size[1] >= 0 || t->txq_size[0] >= 0 ||
2496		     t->txq_size[1] >= 0 || t->txq_size[2] >= 0 ||
2497		     t->polling >= 0 || t->cong_thres >= 0))
2498			return (EBUSY);
2499
2500		q = &sc->params.sge.qset[t->qset_idx];
2501
2502		if (t->rspq_size >= 0)
2503			q->rspq_size = t->rspq_size;
2504		if (t->fl_size[0] >= 0)
2505			q->fl_size = t->fl_size[0];
2506		if (t->fl_size[1] >= 0)
2507			q->jumbo_size = t->fl_size[1];
2508		if (t->txq_size[0] >= 0)
2509			q->txq_size[0] = t->txq_size[0];
2510		if (t->txq_size[1] >= 0)
2511			q->txq_size[1] = t->txq_size[1];
2512		if (t->txq_size[2] >= 0)
2513			q->txq_size[2] = t->txq_size[2];
2514		if (t->cong_thres >= 0)
2515			q->cong_thres = t->cong_thres;
2516		if (t->intr_lat >= 0) {
2517			struct sge_qset *qs = &sc->sge.qs[t->qset_idx];
2518
2519			q->coalesce_usecs = t->intr_lat;
2520			t3_update_qset_coalesce(qs, q);
2521		}
2522		break;
2523	}
2524	case CHELSIO_GET_QSET_PARAMS: {
2525		struct qset_params *q;
2526		struct ch_qset_params *t = (struct ch_qset_params *)data;
2527
2528		if (t->qset_idx >= SGE_QSETS)
2529			return (EINVAL);
2530
2531		q = &(sc)->params.sge.qset[t->qset_idx];
2532		t->rspq_size   = q->rspq_size;
2533		t->txq_size[0] = q->txq_size[0];
2534		t->txq_size[1] = q->txq_size[1];
2535		t->txq_size[2] = q->txq_size[2];
2536		t->fl_size[0]  = q->fl_size;
2537		t->fl_size[1]  = q->jumbo_size;
2538		t->polling     = q->polling;
2539		t->intr_lat    = q->coalesce_usecs;
2540		t->cong_thres  = q->cong_thres;
2541		break;
2542	}
2543	case CHELSIO_SET_QSET_NUM: {
2544		struct ch_reg *edata = (struct ch_reg *)data;
2545		unsigned int port_idx = pi->port_id;
2546
2547		if (sc->flags & FULL_INIT_DONE)
2548			return (EBUSY);
2549		if (edata->val < 1 ||
2550		    (edata->val > 1 && !(sc->flags & USING_MSIX)))
2551			return (EINVAL);
2552		if (edata->val + sc->port[!port_idx].nqsets > SGE_QSETS)
2553			return (EINVAL);
2554		sc->port[port_idx].nqsets = edata->val;
2555		sc->port[0].first_qset = 0;
2556		/*
2557		 * XXX hardcode ourselves to 2 ports just like LEEENUX
2558		 */
2559		sc->port[1].first_qset = sc->port[0].nqsets;
2560		break;
2561	}
2562	case CHELSIO_GET_QSET_NUM: {
2563		struct ch_reg *edata = (struct ch_reg *)data;
2564		edata->val = pi->nqsets;
2565		break;
2566	}
2567#ifdef notyet
2568	case CHELSIO_LOAD_FW:
2569	case CHELSIO_GET_PM:
2570	case CHELSIO_SET_PM:
2571		return (EOPNOTSUPP);
2572		break;
2573#endif
2574	case CHELSIO_SETMTUTAB: {
2575		struct ch_mtus *m = (struct ch_mtus *)data;
2576		int i;
2577
2578		if (!is_offload(sc))
2579			return (EOPNOTSUPP);
2580		if (offload_running(sc))
2581			return (EBUSY);
2582		if (m->nmtus != NMTUS)
2583			return (EINVAL);
2584		if (m->mtus[0] < 81)         /* accommodate SACK */
2585			return (EINVAL);
2586
2587		/*
2588		 * MTUs must be in ascending order
2589		 */
2590		for (i = 1; i < NMTUS; ++i)
2591			if (m->mtus[i] < m->mtus[i - 1])
2592				return (EINVAL);
2593
2594		memcpy(sc->params.mtus, m->mtus,
2595		       sizeof(sc->params.mtus));
2596		break;
2597	}
2598	case CHELSIO_GETMTUTAB: {
2599		struct ch_mtus *m = (struct ch_mtus *)data;
2600
2601		if (!is_offload(sc))
2602			return (EOPNOTSUPP);
2603
2604		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2605		m->nmtus = NMTUS;
2606		break;
2607	}
2608	case CHELSIO_DEVUP:
2609		if (!is_offload(sc))
2610			return (EOPNOTSUPP);
2611		return offload_open(pi);
2612		break;
2613	case CHELSIO_GET_MEM: {
2614		struct ch_mem_range *t = (struct ch_mem_range *)data;
2615		struct mc7 *mem;
2616		uint8_t *useraddr;
2617		u64 buf[32];
2618
2619		if (!is_offload(sc))
2620			return (EOPNOTSUPP);
2621		if (!(sc->flags & FULL_INIT_DONE))
2622			return (EIO);         /* need the memory controllers */
2623		if ((t->addr & 0x7) || (t->len & 0x7))
2624			return (EINVAL);
2625		if (t->mem_id == MEM_CM)
2626			mem = &sc->cm;
2627		else if (t->mem_id == MEM_PMRX)
2628			mem = &sc->pmrx;
2629		else if (t->mem_id == MEM_PMTX)
2630			mem = &sc->pmtx;
2631		else
2632			return (EINVAL);
2633
2634		/*
2635		 * Version scheme:
2636		 * bits 0..9: chip version
2637		 * bits 10..15: chip revision
2638		 */
2639		t->version = 3 | (sc->params.rev << 10);
2640
2641		/*
2642		 * Read 256 bytes at a time as len can be large and we don't
2643		 * want to use huge intermediate buffers.
2644		 */
2645		useraddr = (uint8_t *)t->buf;
2646		while (t->len) {
2647			unsigned int chunk = min(t->len, sizeof(buf));
2648
2649			error = t3_mc7_bd_read(mem, t->addr / 8, chunk / 8, buf);
2650			if (error)
2651				return (-error);
2652			if (copyout(buf, useraddr, chunk))
2653				return (EFAULT);
2654			useraddr += chunk;
2655			t->addr += chunk;
2656			t->len -= chunk;
2657		}
2658		break;
2659	}
2660	case CHELSIO_READ_TCAM_WORD: {
2661		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2662
2663		if (!is_offload(sc))
2664			return (EOPNOTSUPP);
2665		if (!(sc->flags & FULL_INIT_DONE))
2666			return (EIO);         /* need MC5 */
2667		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2668		break;
2669	}
2670	case CHELSIO_SET_TRACE_FILTER: {
2671		struct ch_trace *t = (struct ch_trace *)data;
2672		const struct trace_params *tp;
2673
2674		tp = (const struct trace_params *)&t->sip;
2675		if (t->config_tx)
2676			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2677					       t->trace_tx);
2678		if (t->config_rx)
2679			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2680					       t->trace_rx);
2681		break;
2682	}
2683	case CHELSIO_SET_PKTSCHED: {
2684		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2685		if (sc->open_device_map == 0)
2686			return (EAGAIN);
2687		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2688		    p->binding);
2689		break;
2690	}
2691	case CHELSIO_IFCONF_GETREGS: {
2692		struct ifconf_regs *regs = (struct ifconf_regs *)data;
2693		int reglen = cxgb_get_regs_len();
2694		uint8_t *buf = malloc(REGDUMP_SIZE, M_DEVBUF, M_NOWAIT);
2695		if (buf == NULL) {
2696			return (ENOMEM);
2697		} if (regs->len > reglen)
2698			regs->len = reglen;
2699		else if (regs->len < reglen) {
2700			error = E2BIG;
2701			goto done;
2702		}
2703		cxgb_get_regs(sc, regs, buf);
2704		error = copyout(buf, regs->data, reglen);
2705
2706		done:
2707		free(buf, M_DEVBUF);
2708
2709		break;
2710	}
2711	case CHELSIO_SET_HW_SCHED: {
2712		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2713		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2714
2715		if ((sc->flags & FULL_INIT_DONE) == 0)
2716			return (EAGAIN);       /* need TP to be initialized */
2717		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2718		    !in_range(t->channel, 0, 1) ||
2719		    !in_range(t->kbps, 0, 10000000) ||
2720		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2721		    !in_range(t->flow_ipg, 0,
2722			      dack_ticks_to_usec(sc, 0x7ff)))
2723			return (EINVAL);
2724
2725		if (t->kbps >= 0) {
2726			error = t3_config_sched(sc, t->kbps, t->sched);
2727			if (error < 0)
2728				return (-error);
2729		}
2730		if (t->class_ipg >= 0)
2731			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2732		if (t->flow_ipg >= 0) {
2733			t->flow_ipg *= 1000;     /* us -> ns */
2734			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2735		}
2736		if (t->mode >= 0) {
2737			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2738
2739			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2740					 bit, t->mode ? bit : 0);
2741		}
2742		if (t->channel >= 0)
2743			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2744					 1 << t->sched, t->channel << t->sched);
2745		break;
2746	}
2747	default:
2748		return (EOPNOTSUPP);
2749		break;
2750	}
2751
2752	return (error);
2753}
2754
2755static __inline void
2756reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
2757    unsigned int end)
2758{
2759	uint32_t *p = (uint32_t *)buf + start;
2760
2761	for ( ; start <= end; start += sizeof(uint32_t))
2762		*p++ = t3_read_reg(ap, start);
2763}
2764
2765#define T3_REGMAP_SIZE (3 * 1024)
2766static int
2767cxgb_get_regs_len(void)
2768{
2769	return T3_REGMAP_SIZE;
2770}
2771#undef T3_REGMAP_SIZE
2772
2773static void
2774cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf)
2775{
2776
2777	/*
2778	 * Version scheme:
2779	 * bits 0..9: chip version
2780	 * bits 10..15: chip revision
2781	 * bit 31: set for PCIe cards
2782	 */
2783	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
2784
2785	/*
2786	 * We skip the MAC statistics registers because they are clear-on-read.
2787	 * Also reading multi-register stats would need to synchronize with the
2788	 * periodic mac stats accumulation.  Hard to justify the complexity.
2789	 */
2790	memset(buf, 0, REGDUMP_SIZE);
2791	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
2792	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
2793	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
2794	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
2795	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
2796	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
2797		       XGM_REG(A_XGM_SERDES_STAT3, 1));
2798	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
2799		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
2800}
2801
2802
2803MODULE_DEPEND(if_cxgb, cxgb_t3fw, 1, 1, 1);
2804