cxgb_main.c revision 181616
1/**************************************************************************
2
3Copyright (c) 2007-2008, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13    contributors may be used to endorse or promote products derived from
14    this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_main.c 181616 2008-08-12 00:27:32Z kmacy $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/bus.h>
37#include <sys/module.h>
38#include <sys/pciio.h>
39#include <sys/conf.h>
40#include <machine/bus.h>
41#include <machine/resource.h>
42#include <sys/bus_dma.h>
43#include <sys/ktr.h>
44#include <sys/rman.h>
45#include <sys/ioccom.h>
46#include <sys/mbuf.h>
47#include <sys/linker.h>
48#include <sys/firmware.h>
49#include <sys/socket.h>
50#include <sys/sockio.h>
51#include <sys/smp.h>
52#include <sys/sysctl.h>
53#include <sys/syslog.h>
54#include <sys/queue.h>
55#include <sys/taskqueue.h>
56#include <sys/proc.h>
57
58#include <net/bpf.h>
59#include <net/ethernet.h>
60#include <net/if.h>
61#include <net/if_arp.h>
62#include <net/if_dl.h>
63#include <net/if_media.h>
64#include <net/if_types.h>
65#include <net/if_vlan_var.h>
66
67#include <netinet/in_systm.h>
68#include <netinet/in.h>
69#include <netinet/if_ether.h>
70#include <netinet/ip.h>
71#include <netinet/ip.h>
72#include <netinet/tcp.h>
73#include <netinet/udp.h>
74
75#include <dev/pci/pcireg.h>
76#include <dev/pci/pcivar.h>
77#include <dev/pci/pci_private.h>
78
79#ifdef CONFIG_DEFINED
80#include <cxgb_include.h>
81#else
82#include <dev/cxgb/cxgb_include.h>
83#endif
84
85#ifdef PRIV_SUPPORTED
86#include <sys/priv.h>
87#endif
88
89#ifdef IFNET_MULTIQUEUE
90#include <machine/intr_machdep.h>
91#endif
92
93static int cxgb_setup_msix(adapter_t *, int);
94static void cxgb_teardown_msix(adapter_t *);
95static void cxgb_init(void *);
96static void cxgb_init_locked(struct port_info *);
97static void cxgb_stop_locked(struct port_info *);
98static void cxgb_set_rxmode(struct port_info *);
99static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
100static int cxgb_media_change(struct ifnet *);
101static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
102static int setup_sge_qsets(adapter_t *);
103static void cxgb_async_intr(void *);
104static void cxgb_ext_intr_handler(void *, int);
105static void cxgb_tick_handler(void *, int);
106static void cxgb_down_locked(struct adapter *sc);
107static void cxgb_tick(void *);
108static void setup_rss(adapter_t *sc);
109
110/* Attachment glue for the PCI controller end of the device.  Each port of
111 * the device is attached separately, as defined later.
112 */
113static int cxgb_controller_probe(device_t);
114static int cxgb_controller_attach(device_t);
115static int cxgb_controller_detach(device_t);
116static void cxgb_free(struct adapter *);
117static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
118    unsigned int end);
119static void cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf);
120static int cxgb_get_regs_len(void);
121static int offload_open(struct port_info *pi);
122static void touch_bars(device_t dev);
123static int offload_close(struct t3cdev *tdev);
124static void cxgb_link_start(struct port_info *p);
125
126static device_method_t cxgb_controller_methods[] = {
127	DEVMETHOD(device_probe,		cxgb_controller_probe),
128	DEVMETHOD(device_attach,	cxgb_controller_attach),
129	DEVMETHOD(device_detach,	cxgb_controller_detach),
130
131	/* bus interface */
132	DEVMETHOD(bus_print_child,	bus_generic_print_child),
133	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
134
135	{ 0, 0 }
136};
137
138static driver_t cxgb_controller_driver = {
139	"cxgbc",
140	cxgb_controller_methods,
141	sizeof(struct adapter)
142};
143
144static devclass_t	cxgb_controller_devclass;
145DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
146
147/*
148 * Attachment glue for the ports.  Attachment is done directly to the
149 * controller device.
150 */
151static int cxgb_port_probe(device_t);
152static int cxgb_port_attach(device_t);
153static int cxgb_port_detach(device_t);
154
155static device_method_t cxgb_port_methods[] = {
156	DEVMETHOD(device_probe,		cxgb_port_probe),
157	DEVMETHOD(device_attach,	cxgb_port_attach),
158	DEVMETHOD(device_detach,	cxgb_port_detach),
159	{ 0, 0 }
160};
161
162static driver_t cxgb_port_driver = {
163	"cxgb",
164	cxgb_port_methods,
165	0
166};
167
168static d_ioctl_t cxgb_extension_ioctl;
169static d_open_t cxgb_extension_open;
170static d_close_t cxgb_extension_close;
171
172static struct cdevsw cxgb_cdevsw = {
173       .d_version =    D_VERSION,
174       .d_flags =      0,
175       .d_open =       cxgb_extension_open,
176       .d_close =      cxgb_extension_close,
177       .d_ioctl =      cxgb_extension_ioctl,
178       .d_name =       "cxgb",
179};
180
181static devclass_t	cxgb_port_devclass;
182DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
183
184#define SGE_MSIX_COUNT (SGE_QSETS + 1)
185
186/*
187 * The driver uses the best interrupt scheme available on a platform in the
188 * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
189 * of these schemes the driver may consider as follows:
190 *
191 * msi = 2: choose from among all three options
192 * msi = 1 : only consider MSI and pin interrupts
193 * msi = 0: force pin interrupts
194 */
195static int msi_allowed = 2;
196
197TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
198SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
199SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
200    "MSI-X, MSI, INTx selector");
201
202/*
203 * The driver enables offload as a default.
204 * To disable it, use ofld_disable = 1.
205 */
206static int ofld_disable = 0;
207TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
208SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
209    "disable ULP offload");
210
211/*
212 * The driver uses an auto-queue algorithm by default.
213 * To disable it and force a single queue-set per port, use singleq = 1.
214 */
215static int singleq = 0;
216TUNABLE_INT("hw.cxgb.singleq", &singleq);
217SYSCTL_UINT(_hw_cxgb, OID_AUTO, singleq, CTLFLAG_RDTUN, &singleq, 0,
218    "use a single queue-set per port");
219
220
221/*
222 * The driver uses an auto-queue algorithm by default.
223 * To disable it and force a single queue-set per port, use singleq = 1.
224 */
225static int force_fw_update = 0;
226TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
227SYSCTL_UINT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
228    "update firmware even if up to date");
229
230int cxgb_use_16k_clusters = 0;
231TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
232SYSCTL_UINT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
233    &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
234
235enum {
236	MAX_TXQ_ENTRIES      = 16384,
237	MAX_CTRL_TXQ_ENTRIES = 1024,
238	MAX_RSPQ_ENTRIES     = 16384,
239	MAX_RX_BUFFERS       = 16384,
240	MAX_RX_JUMBO_BUFFERS = 16384,
241	MIN_TXQ_ENTRIES      = 4,
242	MIN_CTRL_TXQ_ENTRIES = 4,
243	MIN_RSPQ_ENTRIES     = 32,
244	MIN_FL_ENTRIES       = 32,
245	MIN_FL_JUMBO_ENTRIES = 32
246};
247
248struct filter_info {
249	u32 sip;
250	u32 sip_mask;
251	u32 dip;
252	u16 sport;
253	u16 dport;
254	u32 vlan:12;
255	u32 vlan_prio:3;
256	u32 mac_hit:1;
257	u32 mac_idx:4;
258	u32 mac_vld:1;
259	u32 pkt_type:2;
260	u32 report_filter_id:1;
261	u32 pass:1;
262	u32 rss:1;
263	u32 qset:3;
264	u32 locked:1;
265	u32 valid:1;
266};
267
268enum { FILTER_NO_VLAN_PRI = 7 };
269
270#define PORT_MASK ((1 << MAX_NPORTS) - 1)
271
272/* Table for probing the cards.  The desc field isn't actually used */
273struct cxgb_ident {
274	uint16_t	vendor;
275	uint16_t	device;
276	int		index;
277	char		*desc;
278} cxgb_identifiers[] = {
279	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
280	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
281	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
282	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
283	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
284	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
285	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
286	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
287	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
288	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
289	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
290	{0, 0, 0, NULL}
291};
292
293static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
294
295
296void
297cxgb_log_tcb(struct adapter *sc, unsigned int tid)
298{
299	char buf[TCB_SIZE];
300	uint64_t *tcb = (uint64_t *)buf;
301	int i, error;
302	struct mc7 *mem = &sc->cm;
303
304	error = t3_mc7_bd_read(mem, tid*TCB_SIZE/8, TCB_SIZE/8, tcb);
305	if (error)
306		printf("cxgb_tcb_log failed\n");
307
308	CTR1(KTR_CXGB, "TCB tid=%u", tid);
309	for (i = 0; i < TCB_SIZE / 32; i++) {
310		CTR5(KTR_CXGB, "%1d: %08x %08x %08x %08x",
311		    i, (uint32_t)tcb[1], (uint32_t)(tcb[1] >> 32),
312		    (uint32_t)tcb[0], (uint32_t)(tcb[0] >> 32));
313		tcb += 2;
314		CTR4(KTR_CXGB, "   %08x %08x %08x %08x",
315		    (uint32_t)tcb[1], (uint32_t)(tcb[1] >> 32),
316		    (uint32_t)tcb[0], (uint32_t)(tcb[0] >> 32));
317		tcb += 2;
318	}
319}
320
321static __inline char
322t3rev2char(struct adapter *adapter)
323{
324	char rev = 'z';
325
326	switch(adapter->params.rev) {
327	case T3_REV_A:
328		rev = 'a';
329		break;
330	case T3_REV_B:
331	case T3_REV_B2:
332		rev = 'b';
333		break;
334	case T3_REV_C:
335		rev = 'c';
336		break;
337	}
338	return rev;
339}
340
341static struct cxgb_ident *
342cxgb_get_ident(device_t dev)
343{
344	struct cxgb_ident *id;
345
346	for (id = cxgb_identifiers; id->desc != NULL; id++) {
347		if ((id->vendor == pci_get_vendor(dev)) &&
348		    (id->device == pci_get_device(dev))) {
349			return (id);
350		}
351	}
352	return (NULL);
353}
354
355static const struct adapter_info *
356cxgb_get_adapter_info(device_t dev)
357{
358	struct cxgb_ident *id;
359	const struct adapter_info *ai;
360
361	id = cxgb_get_ident(dev);
362	if (id == NULL)
363		return (NULL);
364
365	ai = t3_get_adapter_info(id->index);
366
367	return (ai);
368}
369
370static int
371cxgb_controller_probe(device_t dev)
372{
373	const struct adapter_info *ai;
374	char *ports, buf[80];
375	int nports;
376
377	ai = cxgb_get_adapter_info(dev);
378	if (ai == NULL)
379		return (ENXIO);
380
381	nports = ai->nports0 + ai->nports1;
382	if (nports == 1)
383		ports = "port";
384	else
385		ports = "ports";
386
387	snprintf(buf, sizeof(buf), "%s RNIC, %d %s", ai->desc, nports, ports);
388	device_set_desc_copy(dev, buf);
389	return (BUS_PROBE_DEFAULT);
390}
391
392#define FW_FNAME "cxgb_t3fw"
393#define TPEEPROM_NAME "t3b_tp_eeprom"
394#define TPSRAM_NAME "t3b_protocol_sram"
395
396static int
397upgrade_fw(adapter_t *sc)
398{
399#ifdef FIRMWARE_LATEST
400	const struct firmware *fw;
401#else
402	struct firmware *fw;
403#endif
404	int status;
405
406	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
407		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
408		return (ENOENT);
409	} else
410		device_printf(sc->dev, "updating firmware on card\n");
411	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
412
413	device_printf(sc->dev, "firmware update returned %s %d\n", (status == 0) ? "success" : "fail", status);
414
415	firmware_put(fw, FIRMWARE_UNLOAD);
416
417	return (status);
418}
419
420static int
421cxgb_controller_attach(device_t dev)
422{
423	device_t child;
424	const struct adapter_info *ai;
425	struct adapter *sc;
426	int i, error = 0;
427	uint32_t vers;
428	int port_qsets = 1;
429#ifdef MSI_SUPPORTED
430	int msi_needed, reg;
431#endif
432	int must_load = 0;
433	sc = device_get_softc(dev);
434	sc->dev = dev;
435	sc->msi_count = 0;
436	ai = cxgb_get_adapter_info(dev);
437
438	/*
439	 * XXX not really related but a recent addition
440	 */
441#ifdef MSI_SUPPORTED
442	/* find the PCIe link width and set max read request to 4KB*/
443	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
444		uint16_t lnk, pectl;
445		lnk = pci_read_config(dev, reg + 0x12, 2);
446		sc->link_width = (lnk >> 4) & 0x3f;
447
448		pectl = pci_read_config(dev, reg + 0x8, 2);
449		pectl = (pectl & ~0x7000) | (5 << 12);
450		pci_write_config(dev, reg + 0x8, pectl, 2);
451	}
452
453	if (sc->link_width != 0 && sc->link_width <= 4 &&
454	    (ai->nports0 + ai->nports1) <= 2) {
455		device_printf(sc->dev,
456		    "PCIe x%d Link, expect reduced performance\n",
457		    sc->link_width);
458	}
459#endif
460	touch_bars(dev);
461	pci_enable_busmaster(dev);
462	/*
463	 * Allocate the registers and make them available to the driver.
464	 * The registers that we care about for NIC mode are in BAR 0
465	 */
466	sc->regs_rid = PCIR_BAR(0);
467	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
468	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
469		device_printf(dev, "Cannot allocate BAR region 0\n");
470		return (ENXIO);
471	}
472	sc->udbs_rid = PCIR_BAR(2);
473	if ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
474           &sc->udbs_rid, RF_ACTIVE)) == NULL) {
475		device_printf(dev, "Cannot allocate BAR region 1\n");
476		error = ENXIO;
477		goto out;
478       }
479
480	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
481	    device_get_unit(dev));
482	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
483
484	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
485	    device_get_unit(dev));
486	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
487	    device_get_unit(dev));
488	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
489	    device_get_unit(dev));
490
491	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
492	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
493	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
494
495	sc->bt = rman_get_bustag(sc->regs_res);
496	sc->bh = rman_get_bushandle(sc->regs_res);
497	sc->mmio_len = rman_get_size(sc->regs_res);
498
499	if (t3_prep_adapter(sc, ai, 1) < 0) {
500		printf("prep adapter failed\n");
501		error = ENODEV;
502		goto out;
503	}
504        /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
505	 * enough messages for the queue sets.  If that fails, try falling
506	 * back to MSI.  If that fails, then try falling back to the legacy
507	 * interrupt pin model.
508	 */
509#ifdef MSI_SUPPORTED
510
511	sc->msix_regs_rid = 0x20;
512	if ((msi_allowed >= 2) &&
513	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
514	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
515
516		msi_needed = sc->msi_count = SGE_MSIX_COUNT;
517
518		if (((error = pci_alloc_msix(dev, &sc->msi_count)) != 0) ||
519		    (sc->msi_count != msi_needed)) {
520			device_printf(dev, "msix allocation failed - msi_count = %d"
521			    " msi_needed=%d will try msi err=%d\n", sc->msi_count,
522			    msi_needed, error);
523			sc->msi_count = 0;
524			pci_release_msi(dev);
525			bus_release_resource(dev, SYS_RES_MEMORY,
526			    sc->msix_regs_rid, sc->msix_regs_res);
527			sc->msix_regs_res = NULL;
528		} else {
529			sc->flags |= USING_MSIX;
530			sc->cxgb_intr = t3_intr_msix;
531		}
532	}
533
534	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
535		sc->msi_count = 1;
536		if (pci_alloc_msi(dev, &sc->msi_count)) {
537			device_printf(dev, "alloc msi failed - will try INTx\n");
538			sc->msi_count = 0;
539			pci_release_msi(dev);
540		} else {
541			sc->flags |= USING_MSI;
542			sc->irq_rid = 1;
543			sc->cxgb_intr = t3_intr_msi;
544		}
545	}
546#endif
547	if (sc->msi_count == 0) {
548		device_printf(dev, "using line interrupts\n");
549		sc->irq_rid = 0;
550		sc->cxgb_intr = t3b_intr;
551	}
552
553	if ((sc->flags & USING_MSIX) && !singleq)
554		port_qsets = min((SGE_QSETS/(sc)->params.nports), mp_ncpus);
555
556	/* Create a private taskqueue thread for handling driver events */
557#ifdef TASKQUEUE_CURRENT
558	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
559	    taskqueue_thread_enqueue, &sc->tq);
560#else
561	sc->tq = taskqueue_create_fast("cxgb_taskq", M_NOWAIT,
562	    taskqueue_thread_enqueue, &sc->tq);
563#endif
564	if (sc->tq == NULL) {
565		device_printf(dev, "failed to allocate controller task queue\n");
566		goto out;
567	}
568
569	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
570	    device_get_nameunit(dev));
571	TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
572	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
573
574
575	/* Create a periodic callout for checking adapter status */
576	callout_init(&sc->cxgb_tick_ch, TRUE);
577
578	if ((t3_check_fw_version(sc, &must_load) != 0 && must_load) || force_fw_update) {
579		/*
580		 * Warn user that a firmware update will be attempted in init.
581		 */
582		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
583		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
584		sc->flags &= ~FW_UPTODATE;
585	} else {
586		sc->flags |= FW_UPTODATE;
587	}
588
589	if (t3_check_tpsram_version(sc, &must_load) != 0 && must_load) {
590		/*
591		 * Warn user that a firmware update will be attempted in init.
592		 */
593		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
594		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
595		sc->flags &= ~TPS_UPTODATE;
596	} else {
597		sc->flags |= TPS_UPTODATE;
598	}
599
600	/*
601	 * Create a child device for each MAC.  The ethernet attachment
602	 * will be done in these children.
603	 */
604	for (i = 0; i < (sc)->params.nports; i++) {
605		struct port_info *pi;
606
607		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
608			device_printf(dev, "failed to add child port\n");
609			error = EINVAL;
610			goto out;
611		}
612		pi = &sc->port[i];
613		pi->adapter = sc;
614		pi->nqsets = port_qsets;
615		pi->first_qset = i*port_qsets;
616		pi->port_id = i;
617		pi->tx_chan = i >= ai->nports0;
618		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
619		sc->rxpkt_map[pi->txpkt_intf] = i;
620		sc->port[i].tx_chan = i >= ai->nports0;
621		sc->portdev[i] = child;
622		device_set_softc(child, pi);
623	}
624	if ((error = bus_generic_attach(dev)) != 0)
625		goto out;
626
627	/*
628	 * XXX need to poll for link status
629	 */
630	sc->params.stats_update_period = 1;
631
632	/* initialize sge private state */
633	t3_sge_init_adapter(sc);
634
635	t3_led_ready(sc);
636
637	cxgb_offload_init();
638	if (is_offload(sc)) {
639		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
640		cxgb_adapter_ofld(sc);
641        }
642	error = t3_get_fw_version(sc, &vers);
643	if (error)
644		goto out;
645
646	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
647	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
648	    G_FW_VERSION_MICRO(vers));
649
650	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
651	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
652	t3_add_attach_sysctls(sc);
653out:
654	if (error)
655		cxgb_free(sc);
656
657	return (error);
658}
659
660static int
661cxgb_controller_detach(device_t dev)
662{
663	struct adapter *sc;
664
665	sc = device_get_softc(dev);
666
667	cxgb_free(sc);
668
669	return (0);
670}
671
672static void
673cxgb_free(struct adapter *sc)
674{
675	int i;
676
677	ADAPTER_LOCK(sc);
678	sc->flags |= CXGB_SHUTDOWN;
679	ADAPTER_UNLOCK(sc);
680	cxgb_pcpu_shutdown_threads(sc);
681	ADAPTER_LOCK(sc);
682
683/*
684 * drops the lock
685 */
686	cxgb_down_locked(sc);
687
688#ifdef MSI_SUPPORTED
689	if (sc->flags & (USING_MSI | USING_MSIX)) {
690		device_printf(sc->dev, "releasing msi message(s)\n");
691		pci_release_msi(sc->dev);
692	} else {
693		device_printf(sc->dev, "no msi message to release\n");
694	}
695#endif
696	if (sc->msix_regs_res != NULL) {
697		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
698		    sc->msix_regs_res);
699	}
700
701	t3_sge_deinit_sw(sc);
702	/*
703	 * Wait for last callout
704	 */
705
706	DELAY(hz*100);
707
708	for (i = 0; i < (sc)->params.nports; ++i) {
709		if (sc->portdev[i] != NULL)
710			device_delete_child(sc->dev, sc->portdev[i]);
711	}
712
713	bus_generic_detach(sc->dev);
714	if (sc->tq != NULL) {
715		taskqueue_free(sc->tq);
716		sc->tq = NULL;
717	}
718
719	if (is_offload(sc)) {
720		cxgb_adapter_unofld(sc);
721		if (isset(&sc->open_device_map,	OFFLOAD_DEVMAP_BIT))
722			offload_close(&sc->tdev);
723		else
724			printf("cxgb_free: DEVMAP_BIT not set\n");
725	} else
726		printf("not offloading set\n");
727#ifdef notyet
728	if (sc->flags & CXGB_OFLD_INIT)
729		cxgb_offload_deactivate(sc);
730#endif
731	free(sc->filters, M_DEVBUF);
732	t3_sge_free(sc);
733
734	cxgb_offload_exit();
735
736	if (sc->udbs_res != NULL)
737		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
738		    sc->udbs_res);
739
740	if (sc->regs_res != NULL)
741		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
742		    sc->regs_res);
743
744	MTX_DESTROY(&sc->mdio_lock);
745	MTX_DESTROY(&sc->sge.reg_lock);
746	MTX_DESTROY(&sc->elmer_lock);
747	ADAPTER_LOCK_DEINIT(sc);
748}
749
750/**
751 *	setup_sge_qsets - configure SGE Tx/Rx/response queues
752 *	@sc: the controller softc
753 *
754 *	Determines how many sets of SGE queues to use and initializes them.
755 *	We support multiple queue sets per port if we have MSI-X, otherwise
756 *	just one queue set per port.
757 */
758static int
759setup_sge_qsets(adapter_t *sc)
760{
761	int i, j, err, irq_idx = 0, qset_idx = 0;
762	u_int ntxq = SGE_TXQ_PER_SET;
763
764	if ((err = t3_sge_alloc(sc)) != 0) {
765		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
766		return (err);
767	}
768
769	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
770		irq_idx = -1;
771
772	for (i = 0; i < (sc)->params.nports; i++) {
773		struct port_info *pi = &sc->port[i];
774
775		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
776			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
777			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
778			    &sc->params.sge.qset[qset_idx], ntxq, pi);
779			if (err) {
780				t3_free_sge_resources(sc);
781				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n",
782				    err);
783				return (err);
784			}
785		}
786	}
787
788	return (0);
789}
790
791static void
792cxgb_teardown_msix(adapter_t *sc)
793{
794	int i, nqsets;
795
796	for (nqsets = i = 0; i < (sc)->params.nports; i++)
797		nqsets += sc->port[i].nqsets;
798
799	for (i = 0; i < nqsets; i++) {
800		if (sc->msix_intr_tag[i] != NULL) {
801			bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
802			    sc->msix_intr_tag[i]);
803			sc->msix_intr_tag[i] = NULL;
804		}
805		if (sc->msix_irq_res[i] != NULL) {
806			bus_release_resource(sc->dev, SYS_RES_IRQ,
807			    sc->msix_irq_rid[i], sc->msix_irq_res[i]);
808			sc->msix_irq_res[i] = NULL;
809		}
810	}
811}
812
813static int
814cxgb_setup_msix(adapter_t *sc, int msix_count)
815{
816	int i, j, k, nqsets, rid;
817
818	/* The first message indicates link changes and error conditions */
819	sc->irq_rid = 1;
820	if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
821	   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
822		device_printf(sc->dev, "Cannot allocate msix interrupt\n");
823		return (EINVAL);
824	}
825
826	if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
827#ifdef INTR_FILTERS
828		NULL,
829#endif
830		cxgb_async_intr, sc, &sc->intr_tag)) {
831		device_printf(sc->dev, "Cannot set up interrupt\n");
832		return (EINVAL);
833	}
834	for (i = k = 0; i < (sc)->params.nports; i++) {
835		nqsets = sc->port[i].nqsets;
836		for (j = 0; j < nqsets; j++, k++) {
837			struct sge_qset *qs = &sc->sge.qs[k];
838
839			rid = k + 2;
840			if (cxgb_debug)
841				printf("rid=%d ", rid);
842			if ((sc->msix_irq_res[k] = bus_alloc_resource_any(
843			    sc->dev, SYS_RES_IRQ, &rid,
844			    RF_SHAREABLE | RF_ACTIVE)) == NULL) {
845				device_printf(sc->dev, "Cannot allocate "
846				    "interrupt for message %d\n", rid);
847				return (EINVAL);
848			}
849			sc->msix_irq_rid[k] = rid;
850			if (bus_setup_intr(sc->dev, sc->msix_irq_res[k],
851				INTR_MPSAFE|INTR_TYPE_NET,
852#ifdef INTR_FILTERS
853				NULL,
854#endif
855				t3_intr_msix, qs, &sc->msix_intr_tag[k])) {
856				device_printf(sc->dev, "Cannot set up "
857				    "interrupt for message %d\n", rid);
858				return (EINVAL);
859			}
860#ifdef IFNET_MULTIQUEUE
861			if (singleq == 0) {
862				int vector = rman_get_start(sc->msix_irq_res[k]);
863				if (bootverbose)
864					device_printf(sc->dev, "binding vector=%d to cpu=%d\n", vector, k % mp_ncpus);
865				intr_bind(vector, k % mp_ncpus);
866			}
867#endif
868		}
869	}
870
871	return (0);
872}
873
874static int
875cxgb_port_probe(device_t dev)
876{
877	struct port_info *p;
878	char buf[80];
879	const char *desc;
880
881	p = device_get_softc(dev);
882	desc = p->phy.desc;
883	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
884	device_set_desc_copy(dev, buf);
885	return (0);
886}
887
888
889static int
890cxgb_makedev(struct port_info *pi)
891{
892
893	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
894	    UID_ROOT, GID_WHEEL, 0600, if_name(pi->ifp));
895
896	if (pi->port_cdev == NULL)
897		return (ENOMEM);
898
899	pi->port_cdev->si_drv1 = (void *)pi;
900
901	return (0);
902}
903
904
905#ifdef TSO_SUPPORTED
906#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO)
907/* Don't enable TSO6 yet */
908#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU | IFCAP_LRO)
909#else
910#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_JUMBO_MTU)
911/* Don't enable TSO6 yet */
912#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM |  IFCAP_JUMBO_MTU)
913#define IFCAP_TSO4 0x0
914#define IFCAP_TSO6 0x0
915#define CSUM_TSO   0x0
916#endif
917
918
919static int
920cxgb_port_attach(device_t dev)
921{
922	struct port_info *p;
923	struct ifnet *ifp;
924	int err, media_flags;
925	struct adapter *sc;
926
927
928	p = device_get_softc(dev);
929	sc = p->adapter;
930	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
931	    device_get_unit(device_get_parent(dev)), p->port_id);
932	PORT_LOCK_INIT(p, p->lockbuf);
933
934	/* Allocate an ifnet object and set it up */
935	ifp = p->ifp = if_alloc(IFT_ETHER);
936	if (ifp == NULL) {
937		device_printf(dev, "Cannot allocate ifnet\n");
938		return (ENOMEM);
939	}
940
941	/*
942	 * Note that there is currently no watchdog timer.
943	 */
944	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
945	ifp->if_init = cxgb_init;
946	ifp->if_softc = p;
947	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
948	ifp->if_ioctl = cxgb_ioctl;
949	ifp->if_start = cxgb_start;
950
951#if 0
952#ifdef IFNET_MULTIQUEUE
953	ifp->if_flags |= IFF_MULTIQ;
954	ifp->if_mq_start = cxgb_pcpu_start;
955#endif
956#endif
957	ifp->if_timer = 0;	/* Disable ifnet watchdog */
958	ifp->if_watchdog = NULL;
959
960	ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN;
961	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
962	IFQ_SET_READY(&ifp->if_snd);
963
964	ifp->if_hwassist = ifp->if_capabilities = ifp->if_capenable = 0;
965	ifp->if_capabilities |= CXGB_CAP;
966	ifp->if_capenable |= CXGB_CAP_ENABLE;
967	ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO);
968	/*
969	 * disable TSO on 4-port - it isn't supported by the firmware yet
970	 */
971	if (p->adapter->params.nports > 2) {
972		ifp->if_capabilities &= ~(IFCAP_TSO4 | IFCAP_TSO6);
973		ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_TSO6);
974		ifp->if_hwassist &= ~CSUM_TSO;
975	}
976
977	ether_ifattach(ifp, p->hw_addr);
978	/*
979	 * Only default to jumbo frames on 10GigE
980	 */
981	if (p->adapter->params.nports <= 2)
982		ifp->if_mtu = ETHERMTU_JUMBO;
983	if ((err = cxgb_makedev(p)) != 0) {
984		printf("makedev failed %d\n", err);
985		return (err);
986	}
987	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
988	    cxgb_media_status);
989
990	if (!strcmp(p->phy.desc,	"10GBASE-CX4")) {
991		media_flags = IFM_ETHER | IFM_10G_CX4 | IFM_FDX;
992	} else if (!strcmp(p->phy.desc, "10GBASE-SR")) {
993		media_flags = IFM_ETHER | IFM_10G_SR | IFM_FDX;
994	} else if (!strcmp(p->phy.desc, "10GBASE-R")) {
995		media_flags = IFM_ETHER | IFM_10G_LR | IFM_FDX;
996	} else if (!strcmp(p->phy.desc, "10/100/1000BASE-T")) {
997		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T, 0, NULL);
998		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T | IFM_FDX,
999			    0, NULL);
1000		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX,
1001			    0, NULL);
1002		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
1003			    0, NULL);
1004		ifmedia_add(&p->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
1005			    0, NULL);
1006		media_flags = 0;
1007	} else {
1008	        printf("unsupported media type %s\n", p->phy.desc);
1009		return (ENXIO);
1010	}
1011	if (media_flags) {
1012		ifmedia_add(&p->media, media_flags, 0, NULL);
1013		ifmedia_set(&p->media, media_flags);
1014	} else {
1015		ifmedia_add(&p->media, IFM_ETHER | IFM_AUTO, 0, NULL);
1016		ifmedia_set(&p->media, IFM_ETHER | IFM_AUTO);
1017	}
1018
1019
1020	snprintf(p->taskqbuf, TASKQ_NAME_LEN, "cxgb_port_taskq%d", p->port_id);
1021#ifdef TASKQUEUE_CURRENT
1022	/* Create a port for handling TX without starvation */
1023	p->tq = taskqueue_create(p->taskqbuf, M_NOWAIT,
1024	    taskqueue_thread_enqueue, &p->tq);
1025#else
1026	/* Create a port for handling TX without starvation */
1027	p->tq = taskqueue_create_fast(p->taskqbuf, M_NOWAIT,
1028	    taskqueue_thread_enqueue, &p->tq);
1029#endif
1030	/* Get the latest mac address, User can use a LAA */
1031	bcopy(IF_LLADDR(p->ifp), p->hw_addr, ETHER_ADDR_LEN);
1032	t3_sge_init_port(p);
1033#if defined(LINK_ATTACH)
1034	cxgb_link_start(p);
1035	t3_link_changed(sc, p->port_id);
1036#endif
1037	return (0);
1038}
1039
1040static int
1041cxgb_port_detach(device_t dev)
1042{
1043	struct port_info *p;
1044
1045	p = device_get_softc(dev);
1046
1047	PORT_LOCK(p);
1048	if (p->ifp->if_drv_flags & IFF_DRV_RUNNING)
1049		cxgb_stop_locked(p);
1050	PORT_UNLOCK(p);
1051
1052	if (p->tq != NULL) {
1053		taskqueue_drain(p->tq, &p->start_task);
1054		taskqueue_free(p->tq);
1055		p->tq = NULL;
1056	}
1057
1058	ether_ifdetach(p->ifp);
1059	printf("waiting for callout to stop ...");
1060	DELAY(1000000);
1061	printf("done\n");
1062	/*
1063	 * the lock may be acquired in ifdetach
1064	 */
1065	PORT_LOCK_DEINIT(p);
1066	if_free(p->ifp);
1067
1068	if (p->port_cdev != NULL)
1069		destroy_dev(p->port_cdev);
1070
1071	return (0);
1072}
1073
1074void
1075t3_fatal_err(struct adapter *sc)
1076{
1077	u_int fw_status[4];
1078
1079	if (sc->flags & FULL_INIT_DONE) {
1080		t3_sge_stop(sc);
1081		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1082		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1083		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1084		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1085		t3_intr_disable(sc);
1086	}
1087	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1088	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1089		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1090		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1091}
1092
1093int
1094t3_os_find_pci_capability(adapter_t *sc, int cap)
1095{
1096	device_t dev;
1097	struct pci_devinfo *dinfo;
1098	pcicfgregs *cfg;
1099	uint32_t status;
1100	uint8_t ptr;
1101
1102	dev = sc->dev;
1103	dinfo = device_get_ivars(dev);
1104	cfg = &dinfo->cfg;
1105
1106	status = pci_read_config(dev, PCIR_STATUS, 2);
1107	if (!(status & PCIM_STATUS_CAPPRESENT))
1108		return (0);
1109
1110	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1111	case 0:
1112	case 1:
1113		ptr = PCIR_CAP_PTR;
1114		break;
1115	case 2:
1116		ptr = PCIR_CAP_PTR_2;
1117		break;
1118	default:
1119		return (0);
1120		break;
1121	}
1122	ptr = pci_read_config(dev, ptr, 1);
1123
1124	while (ptr != 0) {
1125		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1126			return (ptr);
1127		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1128	}
1129
1130	return (0);
1131}
1132
1133int
1134t3_os_pci_save_state(struct adapter *sc)
1135{
1136	device_t dev;
1137	struct pci_devinfo *dinfo;
1138
1139	dev = sc->dev;
1140	dinfo = device_get_ivars(dev);
1141
1142	pci_cfg_save(dev, dinfo, 0);
1143	return (0);
1144}
1145
1146int
1147t3_os_pci_restore_state(struct adapter *sc)
1148{
1149	device_t dev;
1150	struct pci_devinfo *dinfo;
1151
1152	dev = sc->dev;
1153	dinfo = device_get_ivars(dev);
1154
1155	pci_cfg_restore(dev, dinfo);
1156	return (0);
1157}
1158
1159/**
1160 *	t3_os_link_changed - handle link status changes
1161 *	@adapter: the adapter associated with the link change
1162 *	@port_id: the port index whose limk status has changed
1163 *	@link_status: the new status of the link
1164 *	@speed: the new speed setting
1165 *	@duplex: the new duplex setting
1166 *	@fc: the new flow-control setting
1167 *
1168 *	This is the OS-dependent handler for link status changes.  The OS
1169 *	neutral handler takes care of most of the processing for these events,
1170 *	then calls this handler for any OS-specific processing.
1171 */
1172void
1173t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1174     int duplex, int fc)
1175{
1176	struct port_info *pi = &adapter->port[port_id];
1177	struct cmac *mac = &adapter->port[port_id].mac;
1178
1179	if (link_status) {
1180		DELAY(10);
1181		t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1182			/* Clear errors created by MAC enable */
1183			t3_set_reg_field(adapter,
1184					 A_XGM_STAT_CTRL + pi->mac.offset,
1185					 F_CLRSTATS, 1);
1186		if_link_state_change(pi->ifp, LINK_STATE_UP);
1187
1188	} else {
1189		pi->phy.ops->power_down(&pi->phy, 1);
1190		t3_mac_disable(mac, MAC_DIRECTION_RX);
1191		t3_link_start(&pi->phy, mac, &pi->link_config);
1192		t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1193		if_link_state_change(pi->ifp, LINK_STATE_DOWN);
1194	}
1195}
1196
1197/**
1198 *	t3_os_phymod_changed - handle PHY module changes
1199 *	@phy: the PHY reporting the module change
1200 *	@mod_type: new module type
1201 *
1202 *	This is the OS-dependent handler for PHY module changes.  It is
1203 *	invoked when a PHY module is removed or inserted for any OS-specific
1204 *	processing.
1205 */
1206void t3_os_phymod_changed(struct adapter *adap, int port_id)
1207{
1208	static const char *mod_str[] = {
1209		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX", "unknown"
1210	};
1211
1212	struct port_info *pi = &adap->port[port_id];
1213
1214	if (pi->phy.modtype == phy_modtype_none)
1215		device_printf(adap->dev, "PHY module unplugged\n");
1216	else {
1217		KASSERT(pi->phy.modtype < ARRAY_SIZE(mod_str),
1218		    ("invalid PHY module type %d", pi->phy.modtype));
1219		device_printf(adap->dev, "%s PHY module inserted\n",
1220		    mod_str[pi->phy.modtype]);
1221	}
1222}
1223
1224/*
1225 * Interrupt-context handler for external (PHY) interrupts.
1226 */
1227void
1228t3_os_ext_intr_handler(adapter_t *sc)
1229{
1230	if (cxgb_debug)
1231		printf("t3_os_ext_intr_handler\n");
1232	/*
1233	 * Schedule a task to handle external interrupts as they may be slow
1234	 * and we use a mutex to protect MDIO registers.  We disable PHY
1235	 * interrupts in the meantime and let the task reenable them when
1236	 * it's done.
1237	 */
1238	ADAPTER_LOCK(sc);
1239	if (sc->slow_intr_mask) {
1240		sc->slow_intr_mask &= ~F_T3DBG;
1241		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1242		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
1243	}
1244	ADAPTER_UNLOCK(sc);
1245}
1246
1247void
1248t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1249{
1250
1251	/*
1252	 * The ifnet might not be allocated before this gets called,
1253	 * as this is called early on in attach by t3_prep_adapter
1254	 * save the address off in the port structure
1255	 */
1256	if (cxgb_debug)
1257		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1258	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1259}
1260
1261/**
1262 *	link_start - enable a port
1263 *	@p: the port to enable
1264 *
1265 *	Performs the MAC and PHY actions needed to enable a port.
1266 */
1267static void
1268cxgb_link_start(struct port_info *p)
1269{
1270	struct ifnet *ifp;
1271	struct t3_rx_mode rm;
1272	struct cmac *mac = &p->mac;
1273	int mtu, hwtagging;
1274
1275	ifp = p->ifp;
1276
1277	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1278
1279	mtu = ifp->if_mtu;
1280	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1281		mtu += ETHER_VLAN_ENCAP_LEN;
1282
1283	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1284
1285	t3_init_rx_mode(&rm, p);
1286	if (!mac->multiport)
1287		t3_mac_reset(mac);
1288	t3_mac_set_mtu(mac, mtu);
1289	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1290	t3_mac_set_address(mac, 0, p->hw_addr);
1291	t3_mac_set_rx_mode(mac, &rm);
1292	t3_link_start(&p->phy, mac, &p->link_config);
1293	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1294}
1295
1296
1297static int
1298await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1299			      unsigned long n)
1300{
1301	int attempts = 5;
1302
1303	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1304		if (!--attempts)
1305			return (ETIMEDOUT);
1306		t3_os_sleep(10);
1307	}
1308	return 0;
1309}
1310
1311static int
1312init_tp_parity(struct adapter *adap)
1313{
1314	int i;
1315	struct mbuf *m;
1316	struct cpl_set_tcb_field *greq;
1317	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1318
1319	t3_tp_set_offload_mode(adap, 1);
1320
1321	for (i = 0; i < 16; i++) {
1322		struct cpl_smt_write_req *req;
1323
1324		m = m_gethdr(M_WAITOK, MT_DATA);
1325		req = mtod(m, struct cpl_smt_write_req *);
1326		m->m_len = m->m_pkthdr.len = sizeof(*req);
1327		memset(req, 0, sizeof(*req));
1328		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1329		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1330		req->iff = i;
1331		t3_mgmt_tx(adap, m);
1332	}
1333
1334	for (i = 0; i < 2048; i++) {
1335		struct cpl_l2t_write_req *req;
1336
1337		m = m_gethdr(M_WAITOK, MT_DATA);
1338		req = mtod(m, struct cpl_l2t_write_req *);
1339		m->m_len = m->m_pkthdr.len = sizeof(*req);
1340		memset(req, 0, sizeof(*req));
1341		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1342		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1343		req->params = htonl(V_L2T_W_IDX(i));
1344		t3_mgmt_tx(adap, m);
1345	}
1346
1347	for (i = 0; i < 2048; i++) {
1348		struct cpl_rte_write_req *req;
1349
1350		m = m_gethdr(M_WAITOK, MT_DATA);
1351		req = mtod(m, struct cpl_rte_write_req *);
1352		m->m_len = m->m_pkthdr.len = sizeof(*req);
1353		memset(req, 0, sizeof(*req));
1354		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1355		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1356		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1357		t3_mgmt_tx(adap, m);
1358	}
1359
1360	m = m_gethdr(M_WAITOK, MT_DATA);
1361	greq = mtod(m, struct cpl_set_tcb_field *);
1362	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1363	memset(greq, 0, sizeof(*greq));
1364	greq->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1365	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1366	greq->mask = htobe64(1);
1367	t3_mgmt_tx(adap, m);
1368
1369	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1370	t3_tp_set_offload_mode(adap, 0);
1371	return (i);
1372}
1373
1374/**
1375 *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1376 *	@adap: the adapter
1377 *
1378 *	Sets up RSS to distribute packets to multiple receive queues.  We
1379 *	configure the RSS CPU lookup table to distribute to the number of HW
1380 *	receive queues, and the response queue lookup table to narrow that
1381 *	down to the response queues actually configured for each port.
1382 *	We always configure the RSS mapping for two ports since the mapping
1383 *	table has plenty of entries.
1384 */
1385static void
1386setup_rss(adapter_t *adap)
1387{
1388	int i;
1389	u_int nq[2];
1390	uint8_t cpus[SGE_QSETS + 1];
1391	uint16_t rspq_map[RSS_TABLE_SIZE];
1392
1393	for (i = 0; i < SGE_QSETS; ++i)
1394		cpus[i] = i;
1395	cpus[SGE_QSETS] = 0xff;
1396
1397	nq[0] = nq[1] = 0;
1398	for_each_port(adap, i) {
1399		const struct port_info *pi = adap2pinfo(adap, i);
1400
1401		nq[pi->tx_chan] += pi->nqsets;
1402	}
1403	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1404		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1405		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1406	}
1407	/* Calculate the reverse RSS map table */
1408	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1409		if (adap->rrss_map[rspq_map[i]] == 0xff)
1410			adap->rrss_map[rspq_map[i]] = i;
1411
1412	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1413		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1414	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1415	              cpus, rspq_map);
1416
1417}
1418
1419/*
1420 * Sends an mbuf to an offload queue driver
1421 * after dealing with any active network taps.
1422 */
1423static inline int
1424offload_tx(struct t3cdev *tdev, struct mbuf *m)
1425{
1426	int ret;
1427
1428	ret = t3_offload_tx(tdev, m);
1429	return (ret);
1430}
1431
1432static int
1433write_smt_entry(struct adapter *adapter, int idx)
1434{
1435	struct port_info *pi = &adapter->port[idx];
1436	struct cpl_smt_write_req *req;
1437	struct mbuf *m;
1438
1439	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1440		return (ENOMEM);
1441
1442	req = mtod(m, struct cpl_smt_write_req *);
1443	m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1444
1445	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1446	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1447	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1448	req->iff = idx;
1449	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1450	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1451
1452	m_set_priority(m, 1);
1453
1454	offload_tx(&adapter->tdev, m);
1455
1456	return (0);
1457}
1458
1459static int
1460init_smt(struct adapter *adapter)
1461{
1462	int i;
1463
1464	for_each_port(adapter, i)
1465		write_smt_entry(adapter, i);
1466	return 0;
1467}
1468
1469static void
1470init_port_mtus(adapter_t *adapter)
1471{
1472	unsigned int mtus = adapter->port[0].ifp->if_mtu;
1473
1474	if (adapter->port[1].ifp)
1475		mtus |= adapter->port[1].ifp->if_mtu << 16;
1476	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1477}
1478
1479static void
1480send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1481			      int hi, int port)
1482{
1483	struct mbuf *m;
1484	struct mngt_pktsched_wr *req;
1485
1486	m = m_gethdr(M_DONTWAIT, MT_DATA);
1487	if (m) {
1488		req = mtod(m, struct mngt_pktsched_wr *);
1489		req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1490		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1491		req->sched = sched;
1492		req->idx = qidx;
1493		req->min = lo;
1494		req->max = hi;
1495		req->binding = port;
1496		m->m_len = m->m_pkthdr.len = sizeof(*req);
1497		t3_mgmt_tx(adap, m);
1498	}
1499}
1500
1501static void
1502bind_qsets(adapter_t *sc)
1503{
1504	int i, j;
1505
1506	cxgb_pcpu_startup_threads(sc);
1507	for (i = 0; i < (sc)->params.nports; ++i) {
1508		const struct port_info *pi = adap2pinfo(sc, i);
1509
1510		for (j = 0; j < pi->nqsets; ++j) {
1511			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1512					  -1, pi->tx_chan);
1513
1514		}
1515	}
1516}
1517
1518static void
1519update_tpeeprom(struct adapter *adap)
1520{
1521#ifdef FIRMWARE_LATEST
1522	const struct firmware *tpeeprom;
1523#else
1524	struct firmware *tpeeprom;
1525#endif
1526
1527	uint32_t version;
1528	unsigned int major, minor;
1529	int ret, len;
1530	char rev;
1531
1532	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1533
1534	major = G_TP_VERSION_MAJOR(version);
1535	minor = G_TP_VERSION_MINOR(version);
1536	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1537		return;
1538
1539	rev = t3rev2char(adap);
1540
1541	tpeeprom = firmware_get(TPEEPROM_NAME);
1542	if (tpeeprom == NULL) {
1543		device_printf(adap->dev, "could not load TP EEPROM: unable to load %s\n",
1544		    TPEEPROM_NAME);
1545		return;
1546	}
1547
1548	len = tpeeprom->datasize - 4;
1549
1550	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1551	if (ret)
1552		goto release_tpeeprom;
1553
1554	if (len != TP_SRAM_LEN) {
1555		device_printf(adap->dev, "%s length is wrong len=%d expected=%d\n", TPEEPROM_NAME, len, TP_SRAM_LEN);
1556		return;
1557	}
1558
1559	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1560	    TP_SRAM_OFFSET);
1561
1562	if (!ret) {
1563		device_printf(adap->dev,
1564			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1565			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1566	} else
1567		device_printf(adap->dev, "Protocol SRAM image update in EEPROM failed\n");
1568
1569release_tpeeprom:
1570	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1571
1572	return;
1573}
1574
1575static int
1576update_tpsram(struct adapter *adap)
1577{
1578#ifdef FIRMWARE_LATEST
1579	const struct firmware *tpsram;
1580#else
1581	struct firmware *tpsram;
1582#endif
1583	int ret;
1584	char rev;
1585
1586	rev = t3rev2char(adap);
1587	if (!rev)
1588		return 0;
1589
1590	update_tpeeprom(adap);
1591
1592	tpsram = firmware_get(TPSRAM_NAME);
1593	if (tpsram == NULL){
1594		device_printf(adap->dev, "could not load TP SRAM\n");
1595		return (EINVAL);
1596	} else
1597		device_printf(adap->dev, "updating TP SRAM\n");
1598
1599	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1600	if (ret)
1601		goto release_tpsram;
1602
1603	ret = t3_set_proto_sram(adap, tpsram->data);
1604	if (ret)
1605		device_printf(adap->dev, "loading protocol SRAM failed\n");
1606
1607release_tpsram:
1608	firmware_put(tpsram, FIRMWARE_UNLOAD);
1609
1610	return ret;
1611}
1612
1613/**
1614 *	cxgb_up - enable the adapter
1615 *	@adap: adapter being enabled
1616 *
1617 *	Called when the first port is enabled, this function performs the
1618 *	actions necessary to make an adapter operational, such as completing
1619 *	the initialization of HW modules, and enabling interrupts.
1620 *
1621 */
1622static int
1623cxgb_up(struct adapter *sc)
1624{
1625	int err = 0;
1626
1627	if ((sc->flags & FULL_INIT_DONE) == 0) {
1628
1629		if ((sc->flags & FW_UPTODATE) == 0)
1630			if ((err = upgrade_fw(sc)))
1631				goto out;
1632		if ((sc->flags & TPS_UPTODATE) == 0)
1633			if ((err = update_tpsram(sc)))
1634				goto out;
1635		err = t3_init_hw(sc, 0);
1636		if (err)
1637			goto out;
1638
1639		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1640		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1641
1642		err = setup_sge_qsets(sc);
1643		if (err)
1644			goto out;
1645
1646		setup_rss(sc);
1647		t3_add_configured_sysctls(sc);
1648		sc->flags |= FULL_INIT_DONE;
1649	}
1650
1651	t3_intr_clear(sc);
1652
1653	/* If it's MSI or INTx, allocate a single interrupt for everything */
1654	if ((sc->flags & USING_MSIX) == 0) {
1655		if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
1656		   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
1657			device_printf(sc->dev, "Cannot allocate interrupt rid=%d\n",
1658			    sc->irq_rid);
1659			err = EINVAL;
1660			goto out;
1661		}
1662		device_printf(sc->dev, "allocated irq_res=%p\n", sc->irq_res);
1663
1664		if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
1665#ifdef INTR_FILTERS
1666			NULL,
1667#endif
1668			sc->cxgb_intr, sc, &sc->intr_tag)) {
1669			device_printf(sc->dev, "Cannot set up interrupt\n");
1670			err = EINVAL;
1671			goto irq_err;
1672		}
1673	} else {
1674		cxgb_setup_msix(sc, sc->msi_count);
1675	}
1676
1677	t3_sge_start(sc);
1678	t3_intr_enable(sc);
1679
1680	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1681	    is_offload(sc) && init_tp_parity(sc) == 0)
1682		sc->flags |= TP_PARITY_INIT;
1683
1684	if (sc->flags & TP_PARITY_INIT) {
1685		t3_write_reg(sc, A_TP_INT_CAUSE,
1686				F_CMCACHEPERR | F_ARPLUTPERR);
1687		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1688	}
1689
1690
1691	if (!(sc->flags & QUEUES_BOUND)) {
1692		bind_qsets(sc);
1693		sc->flags |= QUEUES_BOUND;
1694	}
1695out:
1696	return (err);
1697irq_err:
1698	CH_ERR(sc, "request_irq failed, err %d\n", err);
1699	goto out;
1700}
1701
1702
1703/*
1704 * Release resources when all the ports and offloading have been stopped.
1705 */
1706static void
1707cxgb_down_locked(struct adapter *sc)
1708{
1709
1710	t3_sge_stop(sc);
1711	t3_intr_disable(sc);
1712
1713	if (sc->intr_tag != NULL) {
1714		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
1715		sc->intr_tag = NULL;
1716	}
1717	if (sc->irq_res != NULL) {
1718		device_printf(sc->dev, "de-allocating interrupt irq_rid=%d irq_res=%p\n",
1719		    sc->irq_rid, sc->irq_res);
1720		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
1721		    sc->irq_res);
1722		sc->irq_res = NULL;
1723	}
1724
1725	if (sc->flags & USING_MSIX)
1726		cxgb_teardown_msix(sc);
1727
1728	callout_stop(&sc->cxgb_tick_ch);
1729	callout_stop(&sc->sge_timer_ch);
1730	callout_drain(&sc->cxgb_tick_ch);
1731	callout_drain(&sc->sge_timer_ch);
1732
1733	if (sc->tq != NULL) {
1734		printf("draining slow intr\n");
1735
1736		taskqueue_drain(sc->tq, &sc->slow_intr_task);
1737			printf("draining ext intr\n");
1738		taskqueue_drain(sc->tq, &sc->ext_intr_task);
1739		printf("draining tick task\n");
1740		taskqueue_drain(sc->tq, &sc->tick_task);
1741	}
1742	ADAPTER_UNLOCK(sc);
1743}
1744
1745static int
1746offload_open(struct port_info *pi)
1747{
1748	struct adapter *adapter = pi->adapter;
1749	struct t3cdev *tdev = &adapter->tdev;
1750#ifdef notyet
1751	    T3CDEV(pi->ifp);
1752#endif
1753	int adap_up = adapter->open_device_map & PORT_MASK;
1754	int err = 0;
1755
1756	CTR1(KTR_CXGB, "device_map=0x%x", adapter->open_device_map);
1757	if (atomic_cmpset_int(&adapter->open_device_map,
1758		(adapter->open_device_map & ~(1<<OFFLOAD_DEVMAP_BIT)),
1759		(adapter->open_device_map | (1<<OFFLOAD_DEVMAP_BIT))) == 0)
1760		return (0);
1761
1762
1763	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1764		printf("offload_open: DEVMAP_BIT did not get set 0x%x\n", adapter->open_device_map);
1765	ADAPTER_LOCK(pi->adapter);
1766	if (!adap_up)
1767		err = cxgb_up(adapter);
1768	ADAPTER_UNLOCK(pi->adapter);
1769	if (err)
1770		return (err);
1771
1772	t3_tp_set_offload_mode(adapter, 1);
1773	tdev->lldev = pi->ifp;
1774
1775	init_port_mtus(adapter);
1776	t3_load_mtus(adapter, adapter->params.mtus, adapter->params.a_wnd,
1777		     adapter->params.b_wnd,
1778		     adapter->params.rev == 0 ?
1779		       adapter->port[0].ifp->if_mtu : 0xffff);
1780	init_smt(adapter);
1781
1782	/* Call back all registered clients */
1783	cxgb_add_clients(tdev);
1784
1785
1786	/* restore them in case the offload module has changed them */
1787	if (err) {
1788		t3_tp_set_offload_mode(adapter, 0);
1789		clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1790		cxgb_set_dummy_ops(tdev);
1791	}
1792	return (err);
1793}
1794
1795static int
1796offload_close(struct t3cdev *tdev)
1797{
1798	struct adapter *adapter = tdev2adap(tdev);
1799
1800	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1801		return (0);
1802
1803	/* Call back all registered clients */
1804	cxgb_remove_clients(tdev);
1805
1806	tdev->lldev = NULL;
1807	cxgb_set_dummy_ops(tdev);
1808	t3_tp_set_offload_mode(adapter, 0);
1809	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1810
1811	ADAPTER_LOCK(adapter);
1812	if (!adapter->open_device_map)
1813		cxgb_down_locked(adapter);
1814	else
1815		ADAPTER_UNLOCK(adapter);
1816	return (0);
1817}
1818
1819
1820static void
1821cxgb_init(void *arg)
1822{
1823	struct port_info *p = arg;
1824
1825	PORT_LOCK(p);
1826	cxgb_init_locked(p);
1827	PORT_UNLOCK(p);
1828}
1829
1830static void
1831cxgb_init_locked(struct port_info *p)
1832{
1833	struct ifnet *ifp;
1834	adapter_t *sc = p->adapter;
1835	int err;
1836
1837	PORT_LOCK_ASSERT_OWNED(p);
1838	ifp = p->ifp;
1839
1840	ADAPTER_LOCK(p->adapter);
1841	if ((sc->open_device_map == 0) && (err = cxgb_up(sc))) {
1842		ADAPTER_UNLOCK(p->adapter);
1843		cxgb_stop_locked(p);
1844		return;
1845	}
1846	if (p->adapter->open_device_map == 0) {
1847		t3_intr_clear(sc);
1848	}
1849	setbit(&p->adapter->open_device_map, p->port_id);
1850	ADAPTER_UNLOCK(p->adapter);
1851
1852	if (is_offload(sc) && !ofld_disable) {
1853		err = offload_open(p);
1854		if (err)
1855			log(LOG_WARNING,
1856			    "Could not initialize offload capabilities\n");
1857	}
1858#if !defined(LINK_ATTACH)
1859	cxgb_link_start(p);
1860	t3_link_changed(sc, p->port_id);
1861#endif
1862	ifp->if_baudrate = p->link_config.speed * 1000000;
1863
1864	device_printf(sc->dev, "enabling interrupts on port=%d\n", p->port_id);
1865	t3_port_intr_enable(sc, p->port_id);
1866
1867	t3_sge_reset_adapter(sc);
1868
1869	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1870	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1871}
1872
1873static void
1874cxgb_set_rxmode(struct port_info *p)
1875{
1876	struct t3_rx_mode rm;
1877	struct cmac *mac = &p->mac;
1878
1879	t3_init_rx_mode(&rm, p);
1880	mtx_lock(&p->adapter->mdio_lock);
1881	t3_mac_set_rx_mode(mac, &rm);
1882	mtx_unlock(&p->adapter->mdio_lock);
1883}
1884
1885static void
1886cxgb_stop_locked(struct port_info *pi)
1887{
1888	struct ifnet *ifp;
1889
1890	PORT_LOCK_ASSERT_OWNED(pi);
1891	ADAPTER_LOCK_ASSERT_NOTOWNED(pi->adapter);
1892
1893	ifp = pi->ifp;
1894	t3_port_intr_disable(pi->adapter, pi->port_id);
1895	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1896
1897	/* disable pause frames */
1898	t3_set_reg_field(pi->adapter, A_XGM_TX_CFG + pi->mac.offset,
1899			 F_TXPAUSEEN, 0);
1900
1901	/* Reset RX FIFO HWM */
1902        t3_set_reg_field(pi->adapter, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1903			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1904
1905
1906	ADAPTER_LOCK(pi->adapter);
1907	clrbit(&pi->adapter->open_device_map, pi->port_id);
1908
1909	if (pi->adapter->open_device_map == 0) {
1910		cxgb_down_locked(pi->adapter);
1911	} else
1912		ADAPTER_UNLOCK(pi->adapter);
1913
1914#if !defined(LINK_ATTACH)
1915	DELAY(100);
1916
1917	/* Wait for TXFIFO empty */
1918	t3_wait_op_done(pi->adapter, A_XGM_TXFIFO_CFG + pi->mac.offset,
1919			F_TXFIFO_EMPTY, 1, 20, 5);
1920
1921	DELAY(100);
1922	t3_mac_disable(&pi->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
1923
1924	pi->phy.ops->power_down(&pi->phy, 1);
1925#endif
1926
1927}
1928
1929static int
1930cxgb_set_mtu(struct port_info *p, int mtu)
1931{
1932	struct ifnet *ifp = p->ifp;
1933	int error = 0;
1934
1935	if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO))
1936		error = EINVAL;
1937	else if (ifp->if_mtu != mtu) {
1938		PORT_LOCK(p);
1939		ifp->if_mtu = mtu;
1940		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1941			cxgb_stop_locked(p);
1942			cxgb_init_locked(p);
1943		}
1944		PORT_UNLOCK(p);
1945	}
1946	return (error);
1947}
1948
1949/*
1950 * Mark lro enabled or disabled in all qsets for this port
1951 */
1952static int
1953cxgb_set_lro(struct port_info *p, int enabled)
1954{
1955	int i;
1956	struct adapter *adp = p->adapter;
1957	struct sge_qset *q;
1958
1959	PORT_LOCK_ASSERT_OWNED(p);
1960	for (i = 0; i < p->nqsets; i++) {
1961		q = &adp->sge.qs[p->first_qset + i];
1962		q->lro.enabled = (enabled != 0);
1963	}
1964	return (0);
1965}
1966
1967static int
1968cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1969{
1970	struct port_info *p = ifp->if_softc;
1971	struct ifaddr *ifa = (struct ifaddr *)data;
1972	struct ifreq *ifr = (struct ifreq *)data;
1973	int flags, error = 0, reinit = 0;
1974	uint32_t mask;
1975
1976	/*
1977	 * XXX need to check that we aren't in the middle of an unload
1978	 */
1979	switch (command) {
1980	case SIOCSIFMTU:
1981		error = cxgb_set_mtu(p, ifr->ifr_mtu);
1982		break;
1983	case SIOCSIFADDR:
1984		if (ifa->ifa_addr->sa_family == AF_INET) {
1985			ifp->if_flags |= IFF_UP;
1986			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1987				PORT_LOCK(p);
1988				cxgb_init_locked(p);
1989				PORT_UNLOCK(p);
1990			}
1991			arp_ifinit(ifp, ifa);
1992		} else
1993			error = ether_ioctl(ifp, command, data);
1994		break;
1995	case SIOCSIFFLAGS:
1996		PORT_LOCK(p);
1997		if (ifp->if_flags & IFF_UP) {
1998			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1999				flags = p->if_flags;
2000				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
2001				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI))
2002					cxgb_set_rxmode(p);
2003			} else
2004				cxgb_init_locked(p);
2005			p->if_flags = ifp->if_flags;
2006		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2007			cxgb_stop_locked(p);
2008
2009		PORT_UNLOCK(p);
2010		break;
2011	case SIOCADDMULTI:
2012	case SIOCDELMULTI:
2013		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2014			cxgb_set_rxmode(p);
2015		}
2016		break;
2017	case SIOCSIFMEDIA:
2018	case SIOCGIFMEDIA:
2019		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2020		break;
2021	case SIOCSIFCAP:
2022		PORT_LOCK(p);
2023		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2024		if (mask & IFCAP_TXCSUM) {
2025			if (IFCAP_TXCSUM & ifp->if_capenable) {
2026				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
2027				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
2028				    | CSUM_IP | CSUM_TSO);
2029			} else {
2030				ifp->if_capenable |= IFCAP_TXCSUM;
2031				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP
2032				    | CSUM_IP);
2033			}
2034		}
2035		if (mask & IFCAP_RXCSUM) {
2036			ifp->if_capenable ^= IFCAP_RXCSUM;
2037		}
2038		if (mask & IFCAP_TSO4) {
2039			if (IFCAP_TSO4 & ifp->if_capenable) {
2040				ifp->if_capenable &= ~IFCAP_TSO4;
2041				ifp->if_hwassist &= ~CSUM_TSO;
2042			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
2043				ifp->if_capenable |= IFCAP_TSO4;
2044				ifp->if_hwassist |= CSUM_TSO;
2045			} else {
2046				if (cxgb_debug)
2047					printf("cxgb requires tx checksum offload"
2048					    " be enabled to use TSO\n");
2049				error = EINVAL;
2050			}
2051		}
2052		if (mask & IFCAP_LRO) {
2053			ifp->if_capenable ^= IFCAP_LRO;
2054
2055			/* Safe to do this even if cxgb_up not called yet */
2056			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2057		}
2058		if (mask & IFCAP_VLAN_HWTAGGING) {
2059			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2060			reinit = ifp->if_drv_flags & IFF_DRV_RUNNING;
2061		}
2062		if (mask & IFCAP_VLAN_MTU) {
2063			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2064			reinit = ifp->if_drv_flags & IFF_DRV_RUNNING;
2065		}
2066		if (mask & IFCAP_VLAN_HWCSUM) {
2067			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2068		}
2069		if (reinit) {
2070			cxgb_stop_locked(p);
2071			cxgb_init_locked(p);
2072		}
2073		PORT_UNLOCK(p);
2074
2075#ifdef VLAN_CAPABILITIES
2076		VLAN_CAPABILITIES(ifp);
2077#endif
2078		break;
2079	default:
2080		error = ether_ioctl(ifp, command, data);
2081		break;
2082	}
2083	return (error);
2084}
2085
2086static int
2087cxgb_media_change(struct ifnet *ifp)
2088{
2089	if_printf(ifp, "media change not supported\n");
2090	return (ENXIO);
2091}
2092
2093static void
2094cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2095{
2096	struct port_info *p = ifp->if_softc;
2097
2098	ifmr->ifm_status = IFM_AVALID;
2099	ifmr->ifm_active = IFM_ETHER;
2100
2101	if (!p->link_config.link_ok)
2102		return;
2103
2104	ifmr->ifm_status |= IFM_ACTIVE;
2105
2106	switch (p->link_config.speed) {
2107	case 10:
2108		ifmr->ifm_active |= IFM_10_T;
2109		break;
2110	case 100:
2111		ifmr->ifm_active |= IFM_100_TX;
2112			break;
2113	case 1000:
2114		ifmr->ifm_active |= IFM_1000_T;
2115		break;
2116	}
2117
2118	if (p->link_config.duplex)
2119		ifmr->ifm_active |= IFM_FDX;
2120	else
2121		ifmr->ifm_active |= IFM_HDX;
2122}
2123
2124static void
2125cxgb_async_intr(void *data)
2126{
2127	adapter_t *sc = data;
2128
2129	if (cxgb_debug)
2130		device_printf(sc->dev, "cxgb_async_intr\n");
2131	/*
2132	 * May need to sleep - defer to taskqueue
2133	 */
2134	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2135}
2136
2137static void
2138cxgb_ext_intr_handler(void *arg, int count)
2139{
2140	adapter_t *sc = (adapter_t *)arg;
2141
2142	if (cxgb_debug)
2143		printf("cxgb_ext_intr_handler\n");
2144
2145	t3_phy_intr_handler(sc);
2146
2147	/* Now reenable external interrupts */
2148	ADAPTER_LOCK(sc);
2149	if (sc->slow_intr_mask) {
2150		sc->slow_intr_mask |= F_T3DBG;
2151		t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
2152		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
2153	}
2154	ADAPTER_UNLOCK(sc);
2155}
2156
2157static void
2158check_link_status(adapter_t *sc)
2159{
2160	int i;
2161
2162	for (i = 0; i < (sc)->params.nports; ++i) {
2163		struct port_info *p = &sc->port[i];
2164
2165		if (!(p->phy.caps & SUPPORTED_IRQ))
2166			t3_link_changed(sc, i);
2167		p->ifp->if_baudrate = p->link_config.speed * 1000000;
2168	}
2169}
2170
2171static void
2172check_t3b2_mac(struct adapter *adapter)
2173{
2174	int i;
2175
2176	if(adapter->flags & CXGB_SHUTDOWN)
2177		return;
2178
2179	for_each_port(adapter, i) {
2180		struct port_info *p = &adapter->port[i];
2181		struct ifnet *ifp = p->ifp;
2182		int status;
2183
2184		if(adapter->flags & CXGB_SHUTDOWN)
2185			return;
2186
2187		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2188			continue;
2189
2190		status = 0;
2191		PORT_LOCK(p);
2192		if ((ifp->if_drv_flags & IFF_DRV_RUNNING))
2193			status = t3b2_mac_watchdog_task(&p->mac);
2194		if (status == 1)
2195			p->mac.stats.num_toggled++;
2196		else if (status == 2) {
2197			struct cmac *mac = &p->mac;
2198			int mtu = ifp->if_mtu;
2199
2200			if (ifp->if_capenable & IFCAP_VLAN_MTU)
2201				mtu += ETHER_VLAN_ENCAP_LEN;
2202			t3_mac_set_mtu(mac, mtu);
2203			t3_mac_set_address(mac, 0, p->hw_addr);
2204			cxgb_set_rxmode(p);
2205			t3_link_start(&p->phy, mac, &p->link_config);
2206			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2207			t3_port_intr_enable(adapter, p->port_id);
2208			p->mac.stats.num_resets++;
2209		}
2210		PORT_UNLOCK(p);
2211	}
2212}
2213
2214static void
2215cxgb_tick(void *arg)
2216{
2217	adapter_t *sc = (adapter_t *)arg;
2218
2219	if(sc->flags & CXGB_SHUTDOWN)
2220		return;
2221
2222	taskqueue_enqueue(sc->tq, &sc->tick_task);
2223	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2224}
2225
2226static void
2227cxgb_tick_handler(void *arg, int count)
2228{
2229	adapter_t *sc = (adapter_t *)arg;
2230	const struct adapter_params *p = &sc->params;
2231
2232	if(sc->flags & CXGB_SHUTDOWN)
2233		return;
2234
2235	ADAPTER_LOCK(sc);
2236	if (p->linkpoll_period)
2237		check_link_status(sc);
2238
2239	/*
2240	 * adapter lock can currently only be acquired after the
2241	 * port lock
2242	 */
2243	ADAPTER_UNLOCK(sc);
2244
2245	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2246		check_t3b2_mac(sc);
2247}
2248
2249static void
2250touch_bars(device_t dev)
2251{
2252	/*
2253	 * Don't enable yet
2254	 */
2255#if !defined(__LP64__) && 0
2256	u32 v;
2257
2258	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2259	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2260	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2261	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2262	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2263	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2264#endif
2265}
2266
2267static int
2268set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2269{
2270	uint8_t *buf;
2271	int err = 0;
2272	u32 aligned_offset, aligned_len, *p;
2273	struct adapter *adapter = pi->adapter;
2274
2275
2276	aligned_offset = offset & ~3;
2277	aligned_len = (len + (offset & 3) + 3) & ~3;
2278
2279	if (aligned_offset != offset || aligned_len != len) {
2280		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2281		if (!buf)
2282			return (ENOMEM);
2283		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2284		if (!err && aligned_len > 4)
2285			err = t3_seeprom_read(adapter,
2286					      aligned_offset + aligned_len - 4,
2287					      (u32 *)&buf[aligned_len - 4]);
2288		if (err)
2289			goto out;
2290		memcpy(buf + (offset & 3), data, len);
2291	} else
2292		buf = (uint8_t *)(uintptr_t)data;
2293
2294	err = t3_seeprom_wp(adapter, 0);
2295	if (err)
2296		goto out;
2297
2298	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2299		err = t3_seeprom_write(adapter, aligned_offset, *p);
2300		aligned_offset += 4;
2301	}
2302
2303	if (!err)
2304		err = t3_seeprom_wp(adapter, 1);
2305out:
2306	if (buf != data)
2307		free(buf, M_DEVBUF);
2308	return err;
2309}
2310
2311
2312static int
2313in_range(int val, int lo, int hi)
2314{
2315	return val < 0 || (val <= hi && val >= lo);
2316}
2317
2318static int
2319cxgb_extension_open(struct cdev *dev, int flags, int fmp, d_thread_t *td)
2320{
2321       return (0);
2322}
2323
2324static int
2325cxgb_extension_close(struct cdev *dev, int flags, int fmt, d_thread_t *td)
2326{
2327       return (0);
2328}
2329
2330static int
2331cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2332    int fflag, struct thread *td)
2333{
2334	int mmd, error = 0;
2335	struct port_info *pi = dev->si_drv1;
2336	adapter_t *sc = pi->adapter;
2337
2338#ifdef PRIV_SUPPORTED
2339	if (priv_check(td, PRIV_DRIVER)) {
2340		if (cxgb_debug)
2341			printf("user does not have access to privileged ioctls\n");
2342		return (EPERM);
2343	}
2344#else
2345	if (suser(td)) {
2346		if (cxgb_debug)
2347			printf("user does not have access to privileged ioctls\n");
2348		return (EPERM);
2349	}
2350#endif
2351
2352	switch (cmd) {
2353	case SIOCGMIIREG: {
2354		uint32_t val;
2355		struct cphy *phy = &pi->phy;
2356		struct mii_data *mid = (struct mii_data *)data;
2357
2358		if (!phy->mdio_read)
2359			return (EOPNOTSUPP);
2360		if (is_10G(sc)) {
2361			mmd = mid->phy_id >> 8;
2362			if (!mmd)
2363				mmd = MDIO_DEV_PCS;
2364			else if (mmd > MDIO_DEV_XGXS)
2365				return (EINVAL);
2366
2367			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2368					     mid->reg_num, &val);
2369		} else
2370		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2371					     mid->reg_num & 0x1f, &val);
2372		if (error == 0)
2373			mid->val_out = val;
2374		break;
2375	}
2376	case SIOCSMIIREG: {
2377		struct cphy *phy = &pi->phy;
2378		struct mii_data *mid = (struct mii_data *)data;
2379
2380		if (!phy->mdio_write)
2381			return (EOPNOTSUPP);
2382		if (is_10G(sc)) {
2383			mmd = mid->phy_id >> 8;
2384			if (!mmd)
2385				mmd = MDIO_DEV_PCS;
2386			else if (mmd > MDIO_DEV_XGXS)
2387				return (EINVAL);
2388
2389			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2390					      mmd, mid->reg_num, mid->val_in);
2391		} else
2392			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2393					      mid->reg_num & 0x1f,
2394					      mid->val_in);
2395		break;
2396	}
2397	case CHELSIO_SETREG: {
2398		struct ch_reg *edata = (struct ch_reg *)data;
2399		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2400			return (EFAULT);
2401		t3_write_reg(sc, edata->addr, edata->val);
2402		break;
2403	}
2404	case CHELSIO_GETREG: {
2405		struct ch_reg *edata = (struct ch_reg *)data;
2406		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2407			return (EFAULT);
2408		edata->val = t3_read_reg(sc, edata->addr);
2409		break;
2410	}
2411	case CHELSIO_GET_SGE_CONTEXT: {
2412		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2413		mtx_lock_spin(&sc->sge.reg_lock);
2414		switch (ecntxt->cntxt_type) {
2415		case CNTXT_TYPE_EGRESS:
2416			error = t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2417			    ecntxt->data);
2418			break;
2419		case CNTXT_TYPE_FL:
2420			error = t3_sge_read_fl(sc, ecntxt->cntxt_id,
2421			    ecntxt->data);
2422			break;
2423		case CNTXT_TYPE_RSP:
2424			error = t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2425			    ecntxt->data);
2426			break;
2427		case CNTXT_TYPE_CQ:
2428			error = t3_sge_read_cq(sc, ecntxt->cntxt_id,
2429			    ecntxt->data);
2430			break;
2431		default:
2432			error = EINVAL;
2433			break;
2434		}
2435		mtx_unlock_spin(&sc->sge.reg_lock);
2436		break;
2437	}
2438	case CHELSIO_GET_SGE_DESC: {
2439		struct ch_desc *edesc = (struct ch_desc *)data;
2440		int ret;
2441		if (edesc->queue_num >= SGE_QSETS * 6)
2442			return (EINVAL);
2443		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2444		    edesc->queue_num % 6, edesc->idx, edesc->data);
2445		if (ret < 0)
2446			return (EINVAL);
2447		edesc->size = ret;
2448		break;
2449	}
2450	case CHELSIO_SET_QSET_PARAMS: {
2451		struct qset_params *q;
2452		struct ch_qset_params *t = (struct ch_qset_params *)data;
2453		int i;
2454
2455		if (t->qset_idx >= SGE_QSETS)
2456			return (EINVAL);
2457		if (!in_range(t->intr_lat, 0, M_NEWTIMER) ||
2458		    !in_range(t->cong_thres, 0, 255) ||
2459		    !in_range(t->txq_size[0], MIN_TXQ_ENTRIES,
2460			      MAX_TXQ_ENTRIES) ||
2461		    !in_range(t->txq_size[1], MIN_TXQ_ENTRIES,
2462			      MAX_TXQ_ENTRIES) ||
2463		    !in_range(t->txq_size[2], MIN_CTRL_TXQ_ENTRIES,
2464			      MAX_CTRL_TXQ_ENTRIES) ||
2465		    !in_range(t->fl_size[0], MIN_FL_ENTRIES, MAX_RX_BUFFERS) ||
2466		    !in_range(t->fl_size[1], MIN_FL_ENTRIES,
2467			      MAX_RX_JUMBO_BUFFERS) ||
2468		    !in_range(t->rspq_size, MIN_RSPQ_ENTRIES, MAX_RSPQ_ENTRIES))
2469			return (EINVAL);
2470
2471		if ((sc->flags & FULL_INIT_DONE) && t->lro > 0)
2472			for_each_port(sc, i) {
2473				pi = adap2pinfo(sc, i);
2474				if (t->qset_idx >= pi->first_qset &&
2475				    t->qset_idx < pi->first_qset + pi->nqsets
2476#if 0
2477					&& !pi->rx_csum_offload
2478#endif
2479					)
2480					return -EINVAL;
2481			}
2482		if ((sc->flags & FULL_INIT_DONE) &&
2483		    (t->rspq_size >= 0 || t->fl_size[0] >= 0 ||
2484		     t->fl_size[1] >= 0 || t->txq_size[0] >= 0 ||
2485		     t->txq_size[1] >= 0 || t->txq_size[2] >= 0 ||
2486		     t->polling >= 0 || t->cong_thres >= 0))
2487			return (EBUSY);
2488
2489		q = &sc->params.sge.qset[t->qset_idx];
2490
2491		if (t->rspq_size >= 0)
2492			q->rspq_size = t->rspq_size;
2493		if (t->fl_size[0] >= 0)
2494			q->fl_size = t->fl_size[0];
2495		if (t->fl_size[1] >= 0)
2496			q->jumbo_size = t->fl_size[1];
2497		if (t->txq_size[0] >= 0)
2498			q->txq_size[0] = t->txq_size[0];
2499		if (t->txq_size[1] >= 0)
2500			q->txq_size[1] = t->txq_size[1];
2501		if (t->txq_size[2] >= 0)
2502			q->txq_size[2] = t->txq_size[2];
2503		if (t->cong_thres >= 0)
2504			q->cong_thres = t->cong_thres;
2505		if (t->intr_lat >= 0) {
2506			struct sge_qset *qs = &sc->sge.qs[t->qset_idx];
2507
2508			q->coalesce_usecs = t->intr_lat;
2509			t3_update_qset_coalesce(qs, q);
2510		}
2511		break;
2512	}
2513	case CHELSIO_GET_QSET_PARAMS: {
2514		struct qset_params *q;
2515		struct ch_qset_params *t = (struct ch_qset_params *)data;
2516
2517		if (t->qset_idx >= SGE_QSETS)
2518			return (EINVAL);
2519
2520		q = &(sc)->params.sge.qset[t->qset_idx];
2521		t->rspq_size   = q->rspq_size;
2522		t->txq_size[0] = q->txq_size[0];
2523		t->txq_size[1] = q->txq_size[1];
2524		t->txq_size[2] = q->txq_size[2];
2525		t->fl_size[0]  = q->fl_size;
2526		t->fl_size[1]  = q->jumbo_size;
2527		t->polling     = q->polling;
2528		t->intr_lat    = q->coalesce_usecs;
2529		t->cong_thres  = q->cong_thres;
2530		break;
2531	}
2532	case CHELSIO_SET_QSET_NUM: {
2533		struct ch_reg *edata = (struct ch_reg *)data;
2534		unsigned int port_idx = pi->port_id;
2535
2536		if (sc->flags & FULL_INIT_DONE)
2537			return (EBUSY);
2538		if (edata->val < 1 ||
2539		    (edata->val > 1 && !(sc->flags & USING_MSIX)))
2540			return (EINVAL);
2541		if (edata->val + sc->port[!port_idx].nqsets > SGE_QSETS)
2542			return (EINVAL);
2543		sc->port[port_idx].nqsets = edata->val;
2544		sc->port[0].first_qset = 0;
2545		/*
2546		 * XXX hardcode ourselves to 2 ports just like LEEENUX
2547		 */
2548		sc->port[1].first_qset = sc->port[0].nqsets;
2549		break;
2550	}
2551	case CHELSIO_GET_QSET_NUM: {
2552		struct ch_reg *edata = (struct ch_reg *)data;
2553		edata->val = pi->nqsets;
2554		break;
2555	}
2556#ifdef notyet
2557	case CHELSIO_LOAD_FW:
2558	case CHELSIO_GET_PM:
2559	case CHELSIO_SET_PM:
2560		return (EOPNOTSUPP);
2561		break;
2562#endif
2563	case CHELSIO_SETMTUTAB: {
2564		struct ch_mtus *m = (struct ch_mtus *)data;
2565		int i;
2566
2567		if (!is_offload(sc))
2568			return (EOPNOTSUPP);
2569		if (offload_running(sc))
2570			return (EBUSY);
2571		if (m->nmtus != NMTUS)
2572			return (EINVAL);
2573		if (m->mtus[0] < 81)         /* accommodate SACK */
2574			return (EINVAL);
2575
2576		/*
2577		 * MTUs must be in ascending order
2578		 */
2579		for (i = 1; i < NMTUS; ++i)
2580			if (m->mtus[i] < m->mtus[i - 1])
2581				return (EINVAL);
2582
2583		memcpy(sc->params.mtus, m->mtus,
2584		       sizeof(sc->params.mtus));
2585		break;
2586	}
2587	case CHELSIO_GETMTUTAB: {
2588		struct ch_mtus *m = (struct ch_mtus *)data;
2589
2590		if (!is_offload(sc))
2591			return (EOPNOTSUPP);
2592
2593		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2594		m->nmtus = NMTUS;
2595		break;
2596	}
2597	case CHELSIO_DEVUP:
2598		if (!is_offload(sc))
2599			return (EOPNOTSUPP);
2600		return offload_open(pi);
2601		break;
2602	case CHELSIO_GET_MEM: {
2603		struct ch_mem_range *t = (struct ch_mem_range *)data;
2604		struct mc7 *mem;
2605		uint8_t *useraddr;
2606		u64 buf[32];
2607
2608		if (!is_offload(sc))
2609			return (EOPNOTSUPP);
2610		if (!(sc->flags & FULL_INIT_DONE))
2611			return (EIO);         /* need the memory controllers */
2612		if ((t->addr & 0x7) || (t->len & 0x7))
2613			return (EINVAL);
2614		if (t->mem_id == MEM_CM)
2615			mem = &sc->cm;
2616		else if (t->mem_id == MEM_PMRX)
2617			mem = &sc->pmrx;
2618		else if (t->mem_id == MEM_PMTX)
2619			mem = &sc->pmtx;
2620		else
2621			return (EINVAL);
2622
2623		/*
2624		 * Version scheme:
2625		 * bits 0..9: chip version
2626		 * bits 10..15: chip revision
2627		 */
2628		t->version = 3 | (sc->params.rev << 10);
2629
2630		/*
2631		 * Read 256 bytes at a time as len can be large and we don't
2632		 * want to use huge intermediate buffers.
2633		 */
2634		useraddr = (uint8_t *)t->buf;
2635		while (t->len) {
2636			unsigned int chunk = min(t->len, sizeof(buf));
2637
2638			error = t3_mc7_bd_read(mem, t->addr / 8, chunk / 8, buf);
2639			if (error)
2640				return (-error);
2641			if (copyout(buf, useraddr, chunk))
2642				return (EFAULT);
2643			useraddr += chunk;
2644			t->addr += chunk;
2645			t->len -= chunk;
2646		}
2647		break;
2648	}
2649	case CHELSIO_READ_TCAM_WORD: {
2650		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2651
2652		if (!is_offload(sc))
2653			return (EOPNOTSUPP);
2654		if (!(sc->flags & FULL_INIT_DONE))
2655			return (EIO);         /* need MC5 */
2656		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2657		break;
2658	}
2659	case CHELSIO_SET_TRACE_FILTER: {
2660		struct ch_trace *t = (struct ch_trace *)data;
2661		const struct trace_params *tp;
2662
2663		tp = (const struct trace_params *)&t->sip;
2664		if (t->config_tx)
2665			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2666					       t->trace_tx);
2667		if (t->config_rx)
2668			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2669					       t->trace_rx);
2670		break;
2671	}
2672	case CHELSIO_SET_PKTSCHED: {
2673		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2674		if (sc->open_device_map == 0)
2675			return (EAGAIN);
2676		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2677		    p->binding);
2678		break;
2679	}
2680	case CHELSIO_IFCONF_GETREGS: {
2681		struct ifconf_regs *regs = (struct ifconf_regs *)data;
2682		int reglen = cxgb_get_regs_len();
2683		uint8_t *buf = malloc(REGDUMP_SIZE, M_DEVBUF, M_NOWAIT);
2684		if (buf == NULL) {
2685			return (ENOMEM);
2686		} if (regs->len > reglen)
2687			regs->len = reglen;
2688		else if (regs->len < reglen) {
2689			error = E2BIG;
2690			goto done;
2691		}
2692		cxgb_get_regs(sc, regs, buf);
2693		error = copyout(buf, regs->data, reglen);
2694
2695		done:
2696		free(buf, M_DEVBUF);
2697
2698		break;
2699	}
2700	case CHELSIO_SET_HW_SCHED: {
2701		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2702		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2703
2704		if ((sc->flags & FULL_INIT_DONE) == 0)
2705			return (EAGAIN);       /* need TP to be initialized */
2706		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2707		    !in_range(t->channel, 0, 1) ||
2708		    !in_range(t->kbps, 0, 10000000) ||
2709		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2710		    !in_range(t->flow_ipg, 0,
2711			      dack_ticks_to_usec(sc, 0x7ff)))
2712			return (EINVAL);
2713
2714		if (t->kbps >= 0) {
2715			error = t3_config_sched(sc, t->kbps, t->sched);
2716			if (error < 0)
2717				return (-error);
2718		}
2719		if (t->class_ipg >= 0)
2720			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2721		if (t->flow_ipg >= 0) {
2722			t->flow_ipg *= 1000;     /* us -> ns */
2723			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2724		}
2725		if (t->mode >= 0) {
2726			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2727
2728			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2729					 bit, t->mode ? bit : 0);
2730		}
2731		if (t->channel >= 0)
2732			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2733					 1 << t->sched, t->channel << t->sched);
2734		break;
2735	}
2736	default:
2737		return (EOPNOTSUPP);
2738		break;
2739	}
2740
2741	return (error);
2742}
2743
2744static __inline void
2745reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
2746    unsigned int end)
2747{
2748	uint32_t *p = (uint32_t *)buf + start;
2749
2750	for ( ; start <= end; start += sizeof(uint32_t))
2751		*p++ = t3_read_reg(ap, start);
2752}
2753
2754#define T3_REGMAP_SIZE (3 * 1024)
2755static int
2756cxgb_get_regs_len(void)
2757{
2758	return T3_REGMAP_SIZE;
2759}
2760#undef T3_REGMAP_SIZE
2761
2762static void
2763cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf)
2764{
2765
2766	/*
2767	 * Version scheme:
2768	 * bits 0..9: chip version
2769	 * bits 10..15: chip revision
2770	 * bit 31: set for PCIe cards
2771	 */
2772	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
2773
2774	/*
2775	 * We skip the MAC statistics registers because they are clear-on-read.
2776	 * Also reading multi-register stats would need to synchronize with the
2777	 * periodic mac stats accumulation.  Hard to justify the complexity.
2778	 */
2779	memset(buf, 0, REGDUMP_SIZE);
2780	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
2781	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
2782	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
2783	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
2784	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
2785	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
2786		       XGM_REG(A_XGM_SERDES_STAT3, 1));
2787	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
2788		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
2789}
2790
2791
2792MODULE_DEPEND(if_cxgb, cxgb_t3fw, 1, 1, 1);
2793