cxgb_main.c revision 185157
1179187Sjb/**************************************************************************
2179187Sjb
3179187SjbCopyright (c) 2007-2008, Chelsio Inc.
4179187SjbAll rights reserved.
5179187Sjb
6179187SjbRedistribution and use in source and binary forms, with or without
7179187Sjbmodification, are permitted provided that the following conditions are met:
8179187Sjb
9179187Sjb 1. Redistributions of source code must retain the above copyright notice,
10179187Sjb    this list of conditions and the following disclaimer.
11179187Sjb
12179187Sjb 2. Neither the name of the Chelsio Corporation nor the names of its
13179187Sjb    contributors may be used to endorse or promote products derived from
14179187Sjb    this software without specific prior written permission.
15179187Sjb
16179187SjbTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17179187SjbAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18179187SjbIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19179187SjbARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20179187SjbLIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21179187SjbCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22179187SjbSUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23179187SjbINTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24179187SjbCONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25179187SjbARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26179187SjbPOSSIBILITY OF SUCH DAMAGE.
27179187Sjb
28179187Sjb***************************************************************************/
29179187Sjb
30179187Sjb#include <sys/cdefs.h>
31179187Sjb__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_main.c 185157 2008-11-21 19:22:25Z gnn $");
32179187Sjb
33179187Sjb#include <sys/param.h>
34179187Sjb#include <sys/systm.h>
35179187Sjb#include <sys/kernel.h>
36179187Sjb#include <sys/bus.h>
37179187Sjb#include <sys/module.h>
38179187Sjb#include <sys/pciio.h>
39179187Sjb#include <sys/conf.h>
40179187Sjb#include <machine/bus.h>
41179187Sjb#include <machine/resource.h>
42179187Sjb#include <sys/bus_dma.h>
43179187Sjb#include <sys/ktr.h>
44179187Sjb#include <sys/rman.h>
45179187Sjb#include <sys/ioccom.h>
46179187Sjb#include <sys/mbuf.h>
47179187Sjb#include <sys/linker.h>
48179187Sjb#include <sys/firmware.h>
49179187Sjb#include <sys/socket.h>
50179187Sjb#include <sys/sockio.h>
51179187Sjb#include <sys/smp.h>
52179187Sjb#include <sys/sysctl.h>
53179187Sjb#include <sys/syslog.h>
54179187Sjb#include <sys/queue.h>
55179187Sjb#include <sys/taskqueue.h>
56179187Sjb#include <sys/proc.h>
57179187Sjb
58179187Sjb#include <net/bpf.h>
59179187Sjb#include <net/ethernet.h>
60179187Sjb#include <net/if.h>
61179187Sjb#include <net/if_arp.h>
62179187Sjb#include <net/if_dl.h>
63179187Sjb#include <net/if_media.h>
64179187Sjb#include <net/if_types.h>
65179187Sjb#include <net/if_vlan_var.h>
66179187Sjb
67179187Sjb#include <netinet/in_systm.h>
68179187Sjb#include <netinet/in.h>
69179187Sjb#include <netinet/if_ether.h>
70179187Sjb#include <netinet/ip.h>
71179187Sjb#include <netinet/ip.h>
72179187Sjb#include <netinet/tcp.h>
73179187Sjb#include <netinet/udp.h>
74179187Sjb
75179187Sjb#include <dev/pci/pcireg.h>
76179187Sjb#include <dev/pci/pcivar.h>
77179187Sjb#include <dev/pci/pci_private.h>
78179187Sjb
79179187Sjb#include <cxgb_include.h>
80179187Sjb
81179187Sjb#ifdef PRIV_SUPPORTED
82179187Sjb#include <sys/priv.h>
83179187Sjb#endif
84179187Sjb
85179187Sjb#ifdef IFNET_MULTIQUEUE
86179187Sjb#include <machine/intr_machdep.h>
87179187Sjb#endif
88179187Sjb
89179187Sjbstatic int cxgb_setup_msix(adapter_t *, int);
90179187Sjbstatic void cxgb_teardown_msix(adapter_t *);
91179187Sjbstatic void cxgb_init(void *);
92179187Sjbstatic void cxgb_init_locked(struct port_info *);
93179187Sjbstatic void cxgb_stop_locked(struct port_info *);
94179187Sjbstatic void cxgb_set_rxmode(struct port_info *);
95179187Sjbstatic int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
96179187Sjbstatic int cxgb_media_change(struct ifnet *);
97179187Sjbstatic void cxgb_media_status(struct ifnet *, struct ifmediareq *);
98179187Sjbstatic int setup_sge_qsets(adapter_t *);
99static void cxgb_async_intr(void *);
100static void cxgb_ext_intr_handler(void *, int);
101static void cxgb_tick_handler(void *, int);
102static void cxgb_down_locked(struct adapter *sc);
103static void cxgb_tick(void *);
104static void setup_rss(adapter_t *sc);
105
106/* Attachment glue for the PCI controller end of the device.  Each port of
107 * the device is attached separately, as defined later.
108 */
109static int cxgb_controller_probe(device_t);
110static int cxgb_controller_attach(device_t);
111static int cxgb_controller_detach(device_t);
112static void cxgb_free(struct adapter *);
113static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
114    unsigned int end);
115static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
116static int cxgb_get_regs_len(void);
117static int offload_open(struct port_info *pi);
118static void touch_bars(device_t dev);
119static int offload_close(struct t3cdev *tdev);
120static void cxgb_link_start(struct port_info *p);
121
122static device_method_t cxgb_controller_methods[] = {
123	DEVMETHOD(device_probe,		cxgb_controller_probe),
124	DEVMETHOD(device_attach,	cxgb_controller_attach),
125	DEVMETHOD(device_detach,	cxgb_controller_detach),
126
127	/* bus interface */
128	DEVMETHOD(bus_print_child,	bus_generic_print_child),
129	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
130
131	{ 0, 0 }
132};
133
134static driver_t cxgb_controller_driver = {
135	"cxgbc",
136	cxgb_controller_methods,
137	sizeof(struct adapter)
138};
139
140static devclass_t	cxgb_controller_devclass;
141DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
142
143/*
144 * Attachment glue for the ports.  Attachment is done directly to the
145 * controller device.
146 */
147static int cxgb_port_probe(device_t);
148static int cxgb_port_attach(device_t);
149static int cxgb_port_detach(device_t);
150
151static device_method_t cxgb_port_methods[] = {
152	DEVMETHOD(device_probe,		cxgb_port_probe),
153	DEVMETHOD(device_attach,	cxgb_port_attach),
154	DEVMETHOD(device_detach,	cxgb_port_detach),
155	{ 0, 0 }
156};
157
158static driver_t cxgb_port_driver = {
159	"cxgb",
160	cxgb_port_methods,
161	0
162};
163
164static d_ioctl_t cxgb_extension_ioctl;
165static d_open_t cxgb_extension_open;
166static d_close_t cxgb_extension_close;
167
168static struct cdevsw cxgb_cdevsw = {
169       .d_version =    D_VERSION,
170       .d_flags =      0,
171       .d_open =       cxgb_extension_open,
172       .d_close =      cxgb_extension_close,
173       .d_ioctl =      cxgb_extension_ioctl,
174       .d_name =       "cxgb",
175};
176
177static devclass_t	cxgb_port_devclass;
178DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
179
180#define SGE_MSIX_COUNT (SGE_QSETS + 1)
181
182/*
183 * The driver uses the best interrupt scheme available on a platform in the
184 * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
185 * of these schemes the driver may consider as follows:
186 *
187 * msi = 2: choose from among all three options
188 * msi = 1 : only consider MSI and pin interrupts
189 * msi = 0: force pin interrupts
190 */
191static int msi_allowed = 2;
192
193TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
194SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
195SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
196    "MSI-X, MSI, INTx selector");
197
198/*
199 * The driver enables offload as a default.
200 * To disable it, use ofld_disable = 1.
201 */
202static int ofld_disable = 0;
203TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
204SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
205    "disable ULP offload");
206
207/*
208 * The driver uses an auto-queue algorithm by default.
209 * To disable it and force a single queue-set per port, use singleq = 1.
210 */
211static int singleq = 0;
212TUNABLE_INT("hw.cxgb.singleq", &singleq);
213SYSCTL_UINT(_hw_cxgb, OID_AUTO, singleq, CTLFLAG_RDTUN, &singleq, 0,
214    "use a single queue-set per port");
215
216
217/*
218 * The driver uses an auto-queue algorithm by default.
219 * To disable it and force a single queue-set per port, use singleq = 1.
220 */
221static int force_fw_update = 0;
222TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
223SYSCTL_UINT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
224    "update firmware even if up to date");
225
226int cxgb_use_16k_clusters = 1;
227TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
228SYSCTL_UINT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
229    &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
230
231/*
232 * Tune the size of the output queue.
233 */
234int cxgb_snd_queue_len = IFQ_MAXLEN;
235TUNABLE_INT("hw.cxgb.snd_queue_len", &cxgb_snd_queue_len);
236SYSCTL_UINT(_hw_cxgb, OID_AUTO, snd_queue_len, CTLFLAG_RDTUN,
237    &cxgb_snd_queue_len, 0, "send queue size ");
238
239
240enum {
241	MAX_TXQ_ENTRIES      = 16384,
242	MAX_CTRL_TXQ_ENTRIES = 1024,
243	MAX_RSPQ_ENTRIES     = 16384,
244	MAX_RX_BUFFERS       = 16384,
245	MAX_RX_JUMBO_BUFFERS = 16384,
246	MIN_TXQ_ENTRIES      = 4,
247	MIN_CTRL_TXQ_ENTRIES = 4,
248	MIN_RSPQ_ENTRIES     = 32,
249	MIN_FL_ENTRIES       = 32,
250	MIN_FL_JUMBO_ENTRIES = 32
251};
252
253struct filter_info {
254	u32 sip;
255	u32 sip_mask;
256	u32 dip;
257	u16 sport;
258	u16 dport;
259	u32 vlan:12;
260	u32 vlan_prio:3;
261	u32 mac_hit:1;
262	u32 mac_idx:4;
263	u32 mac_vld:1;
264	u32 pkt_type:2;
265	u32 report_filter_id:1;
266	u32 pass:1;
267	u32 rss:1;
268	u32 qset:3;
269	u32 locked:1;
270	u32 valid:1;
271};
272
273enum { FILTER_NO_VLAN_PRI = 7 };
274
275#define EEPROM_MAGIC 0x38E2F10C
276
277#define PORT_MASK ((1 << MAX_NPORTS) - 1)
278
279/* Table for probing the cards.  The desc field isn't actually used */
280struct cxgb_ident {
281	uint16_t	vendor;
282	uint16_t	device;
283	int		index;
284	char		*desc;
285} cxgb_identifiers[] = {
286	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
287	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
288	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
289	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
290	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
291	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
292	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
293	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
294	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
295	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
296	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
297	{0, 0, 0, NULL}
298};
299
300static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
301
302
303static __inline char
304t3rev2char(struct adapter *adapter)
305{
306	char rev = 'z';
307
308	switch(adapter->params.rev) {
309	case T3_REV_A:
310		rev = 'a';
311		break;
312	case T3_REV_B:
313	case T3_REV_B2:
314		rev = 'b';
315		break;
316	case T3_REV_C:
317		rev = 'c';
318		break;
319	}
320	return rev;
321}
322
323static struct cxgb_ident *
324cxgb_get_ident(device_t dev)
325{
326	struct cxgb_ident *id;
327
328	for (id = cxgb_identifiers; id->desc != NULL; id++) {
329		if ((id->vendor == pci_get_vendor(dev)) &&
330		    (id->device == pci_get_device(dev))) {
331			return (id);
332		}
333	}
334	return (NULL);
335}
336
337static const struct adapter_info *
338cxgb_get_adapter_info(device_t dev)
339{
340	struct cxgb_ident *id;
341	const struct adapter_info *ai;
342
343	id = cxgb_get_ident(dev);
344	if (id == NULL)
345		return (NULL);
346
347	ai = t3_get_adapter_info(id->index);
348
349	return (ai);
350}
351
352static int
353cxgb_controller_probe(device_t dev)
354{
355	const struct adapter_info *ai;
356	char *ports, buf[80];
357	int nports;
358	struct adapter *sc = device_get_softc(dev);
359
360	ai = cxgb_get_adapter_info(dev);
361	if (ai == NULL)
362		return (ENXIO);
363
364	nports = ai->nports0 + ai->nports1;
365	if (nports == 1)
366		ports = "port";
367	else
368		ports = "ports";
369
370	snprintf(buf, sizeof(buf), "%s %sNIC, rev: %d nports: %d %s",
371		 ai->desc, is_offload(sc) ? "R" : "",
372		 sc->params.rev, nports, ports);
373	device_set_desc_copy(dev, buf);
374	return (BUS_PROBE_DEFAULT);
375}
376
377#define FW_FNAME "cxgb_t3fw"
378#define TPEEPROM_NAME "t3b_tp_eeprom"
379#define TPSRAM_NAME "t3b_protocol_sram"
380
381static int
382upgrade_fw(adapter_t *sc)
383{
384#ifdef FIRMWARE_LATEST
385	const struct firmware *fw;
386#else
387	struct firmware *fw;
388#endif
389	int status;
390
391	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
392		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
393		return (ENOENT);
394	} else
395		device_printf(sc->dev, "updating firmware on card\n");
396	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
397
398	device_printf(sc->dev, "firmware update returned %s %d\n", (status == 0) ? "success" : "fail", status);
399
400	firmware_put(fw, FIRMWARE_UNLOAD);
401
402	return (status);
403}
404
405static int
406cxgb_controller_attach(device_t dev)
407{
408	device_t child;
409	const struct adapter_info *ai;
410	struct adapter *sc;
411	int i, error = 0;
412	uint32_t vers;
413	int port_qsets = 1;
414#ifdef MSI_SUPPORTED
415	int msi_needed, reg;
416#endif
417	int must_load = 0;
418	char buf[80];
419
420	sc = device_get_softc(dev);
421	sc->dev = dev;
422	sc->msi_count = 0;
423	ai = cxgb_get_adapter_info(dev);
424
425	/*
426	 * XXX not really related but a recent addition
427	 */
428#ifdef MSI_SUPPORTED
429	/* find the PCIe link width and set max read request to 4KB*/
430	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
431		uint16_t lnk, pectl;
432		lnk = pci_read_config(dev, reg + 0x12, 2);
433		sc->link_width = (lnk >> 4) & 0x3f;
434
435		pectl = pci_read_config(dev, reg + 0x8, 2);
436		pectl = (pectl & ~0x7000) | (5 << 12);
437		pci_write_config(dev, reg + 0x8, pectl, 2);
438	}
439
440	if (sc->link_width != 0 && sc->link_width <= 4 &&
441	    (ai->nports0 + ai->nports1) <= 2) {
442		device_printf(sc->dev,
443		    "PCIe x%d Link, expect reduced performance\n",
444		    sc->link_width);
445	}
446#endif
447	touch_bars(dev);
448	pci_enable_busmaster(dev);
449	/*
450	 * Allocate the registers and make them available to the driver.
451	 * The registers that we care about for NIC mode are in BAR 0
452	 */
453	sc->regs_rid = PCIR_BAR(0);
454	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
455	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
456		device_printf(dev, "Cannot allocate BAR region 0\n");
457		return (ENXIO);
458	}
459	sc->udbs_rid = PCIR_BAR(2);
460	if ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
461           &sc->udbs_rid, RF_ACTIVE)) == NULL) {
462		device_printf(dev, "Cannot allocate BAR region 1\n");
463		error = ENXIO;
464		goto out;
465       }
466
467	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
468	    device_get_unit(dev));
469	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
470
471	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
472	    device_get_unit(dev));
473	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
474	    device_get_unit(dev));
475	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
476	    device_get_unit(dev));
477
478	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
479	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
480	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
481
482	sc->bt = rman_get_bustag(sc->regs_res);
483	sc->bh = rman_get_bushandle(sc->regs_res);
484	sc->mmio_len = rman_get_size(sc->regs_res);
485
486	if (t3_prep_adapter(sc, ai, 1) < 0) {
487		printf("prep adapter failed\n");
488		error = ENODEV;
489		goto out;
490	}
491        /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
492	 * enough messages for the queue sets.  If that fails, try falling
493	 * back to MSI.  If that fails, then try falling back to the legacy
494	 * interrupt pin model.
495	 */
496#ifdef MSI_SUPPORTED
497
498	sc->msix_regs_rid = 0x20;
499	if ((msi_allowed >= 2) &&
500	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
501	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
502
503		msi_needed = sc->msi_count = SGE_MSIX_COUNT;
504
505		if (((error = pci_alloc_msix(dev, &sc->msi_count)) != 0) ||
506		    (sc->msi_count != msi_needed)) {
507			device_printf(dev, "msix allocation failed - msi_count = %d"
508			    " msi_needed=%d will try msi err=%d\n", sc->msi_count,
509			    msi_needed, error);
510			sc->msi_count = 0;
511			pci_release_msi(dev);
512			bus_release_resource(dev, SYS_RES_MEMORY,
513			    sc->msix_regs_rid, sc->msix_regs_res);
514			sc->msix_regs_res = NULL;
515		} else {
516			sc->flags |= USING_MSIX;
517			sc->cxgb_intr = t3_intr_msix;
518		}
519	}
520
521	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
522		sc->msi_count = 1;
523		if (pci_alloc_msi(dev, &sc->msi_count)) {
524			device_printf(dev, "alloc msi failed - will try INTx\n");
525			sc->msi_count = 0;
526			pci_release_msi(dev);
527		} else {
528			sc->flags |= USING_MSI;
529			sc->irq_rid = 1;
530			sc->cxgb_intr = t3_intr_msi;
531		}
532	}
533#endif
534	if (sc->msi_count == 0) {
535		device_printf(dev, "using line interrupts\n");
536		sc->irq_rid = 0;
537		sc->cxgb_intr = t3b_intr;
538	}
539
540	if ((sc->flags & USING_MSIX) && !singleq)
541		port_qsets = min((SGE_QSETS/(sc)->params.nports), mp_ncpus);
542
543	/* Create a private taskqueue thread for handling driver events */
544#ifdef TASKQUEUE_CURRENT
545	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
546	    taskqueue_thread_enqueue, &sc->tq);
547#else
548	sc->tq = taskqueue_create_fast("cxgb_taskq", M_NOWAIT,
549	    taskqueue_thread_enqueue, &sc->tq);
550#endif
551	if (sc->tq == NULL) {
552		device_printf(dev, "failed to allocate controller task queue\n");
553		goto out;
554	}
555
556	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
557	    device_get_nameunit(dev));
558	TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
559	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
560
561
562	/* Create a periodic callout for checking adapter status */
563	callout_init(&sc->cxgb_tick_ch, TRUE);
564
565	if ((t3_check_fw_version(sc, &must_load) != 0 && must_load) || force_fw_update) {
566		/*
567		 * Warn user that a firmware update will be attempted in init.
568		 */
569		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
570		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
571		sc->flags &= ~FW_UPTODATE;
572	} else {
573		sc->flags |= FW_UPTODATE;
574	}
575
576	if (t3_check_tpsram_version(sc, &must_load) != 0 && must_load) {
577		/*
578		 * Warn user that a firmware update will be attempted in init.
579		 */
580		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
581		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
582		sc->flags &= ~TPS_UPTODATE;
583	} else {
584		sc->flags |= TPS_UPTODATE;
585	}
586
587	/*
588	 * Create a child device for each MAC.  The ethernet attachment
589	 * will be done in these children.
590	 */
591	for (i = 0; i < (sc)->params.nports; i++) {
592		struct port_info *pi;
593
594		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
595			device_printf(dev, "failed to add child port\n");
596			error = EINVAL;
597			goto out;
598		}
599		pi = &sc->port[i];
600		pi->adapter = sc;
601		pi->nqsets = port_qsets;
602		pi->first_qset = i*port_qsets;
603		pi->port_id = i;
604		pi->tx_chan = i >= ai->nports0;
605		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
606		sc->rxpkt_map[pi->txpkt_intf] = i;
607		sc->port[i].tx_chan = i >= ai->nports0;
608		sc->portdev[i] = child;
609		device_set_softc(child, pi);
610	}
611	if ((error = bus_generic_attach(dev)) != 0)
612		goto out;
613
614	/* initialize sge private state */
615	t3_sge_init_adapter(sc);
616
617	t3_led_ready(sc);
618
619	cxgb_offload_init();
620	if (is_offload(sc)) {
621		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
622		cxgb_adapter_ofld(sc);
623        }
624	error = t3_get_fw_version(sc, &vers);
625	if (error)
626		goto out;
627
628	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
629	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
630	    G_FW_VERSION_MICRO(vers));
631
632	snprintf(buf, sizeof(buf), "%s\t E/C: %s S/N: %s",
633		 ai->desc,
634		 sc->params.vpd.ec, sc->params.vpd.sn);
635	device_set_desc_copy(dev, buf);
636
637	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
638	callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
639	t3_add_attach_sysctls(sc);
640out:
641	if (error)
642		cxgb_free(sc);
643
644	return (error);
645}
646
647static int
648cxgb_controller_detach(device_t dev)
649{
650	struct adapter *sc;
651
652	sc = device_get_softc(dev);
653
654	cxgb_free(sc);
655
656	return (0);
657}
658
659static void
660cxgb_free(struct adapter *sc)
661{
662	int i;
663
664	ADAPTER_LOCK(sc);
665	sc->flags |= CXGB_SHUTDOWN;
666	ADAPTER_UNLOCK(sc);
667	cxgb_pcpu_shutdown_threads(sc);
668	ADAPTER_LOCK(sc);
669
670/*
671 * drops the lock
672 */
673	cxgb_down_locked(sc);
674
675#ifdef MSI_SUPPORTED
676	if (sc->flags & (USING_MSI | USING_MSIX)) {
677		device_printf(sc->dev, "releasing msi message(s)\n");
678		pci_release_msi(sc->dev);
679	} else {
680		device_printf(sc->dev, "no msi message to release\n");
681	}
682#endif
683	if (sc->msix_regs_res != NULL) {
684		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
685		    sc->msix_regs_res);
686	}
687
688	t3_sge_deinit_sw(sc);
689	/*
690	 * Wait for last callout
691	 */
692
693	DELAY(hz*100);
694
695	for (i = 0; i < (sc)->params.nports; ++i) {
696		if (sc->portdev[i] != NULL)
697			device_delete_child(sc->dev, sc->portdev[i]);
698	}
699
700	bus_generic_detach(sc->dev);
701	if (sc->tq != NULL) {
702		taskqueue_free(sc->tq);
703		sc->tq = NULL;
704	}
705
706	if (is_offload(sc)) {
707		cxgb_adapter_unofld(sc);
708		if (isset(&sc->open_device_map,	OFFLOAD_DEVMAP_BIT))
709			offload_close(&sc->tdev);
710		else
711			printf("cxgb_free: DEVMAP_BIT not set\n");
712	} else
713		printf("not offloading set\n");
714#ifdef notyet
715	if (sc->flags & CXGB_OFLD_INIT)
716		cxgb_offload_deactivate(sc);
717#endif
718	free(sc->filters, M_DEVBUF);
719	t3_sge_free(sc);
720
721	cxgb_offload_exit();
722
723	if (sc->udbs_res != NULL)
724		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
725		    sc->udbs_res);
726
727	if (sc->regs_res != NULL)
728		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
729		    sc->regs_res);
730
731	MTX_DESTROY(&sc->mdio_lock);
732	MTX_DESTROY(&sc->sge.reg_lock);
733	MTX_DESTROY(&sc->elmer_lock);
734	ADAPTER_LOCK_DEINIT(sc);
735}
736
737/**
738 *	setup_sge_qsets - configure SGE Tx/Rx/response queues
739 *	@sc: the controller softc
740 *
741 *	Determines how many sets of SGE queues to use and initializes them.
742 *	We support multiple queue sets per port if we have MSI-X, otherwise
743 *	just one queue set per port.
744 */
745static int
746setup_sge_qsets(adapter_t *sc)
747{
748	int i, j, err, irq_idx = 0, qset_idx = 0;
749	u_int ntxq = SGE_TXQ_PER_SET;
750
751	if ((err = t3_sge_alloc(sc)) != 0) {
752		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
753		return (err);
754	}
755
756	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
757		irq_idx = -1;
758
759	for (i = 0; i < (sc)->params.nports; i++) {
760		struct port_info *pi = &sc->port[i];
761
762		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
763			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
764			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
765			    &sc->params.sge.qset[qset_idx], ntxq, pi);
766			if (err) {
767				t3_free_sge_resources(sc);
768				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n",
769				    err);
770				return (err);
771			}
772		}
773	}
774
775	return (0);
776}
777
778static void
779cxgb_teardown_msix(adapter_t *sc)
780{
781	int i, nqsets;
782
783	for (nqsets = i = 0; i < (sc)->params.nports; i++)
784		nqsets += sc->port[i].nqsets;
785
786	for (i = 0; i < nqsets; i++) {
787		if (sc->msix_intr_tag[i] != NULL) {
788			bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
789			    sc->msix_intr_tag[i]);
790			sc->msix_intr_tag[i] = NULL;
791		}
792		if (sc->msix_irq_res[i] != NULL) {
793			bus_release_resource(sc->dev, SYS_RES_IRQ,
794			    sc->msix_irq_rid[i], sc->msix_irq_res[i]);
795			sc->msix_irq_res[i] = NULL;
796		}
797	}
798}
799
800static int
801cxgb_setup_msix(adapter_t *sc, int msix_count)
802{
803	int i, j, k, nqsets, rid;
804
805	/* The first message indicates link changes and error conditions */
806	sc->irq_rid = 1;
807	if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
808	   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
809		device_printf(sc->dev, "Cannot allocate msix interrupt\n");
810		return (EINVAL);
811	}
812
813	if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
814#ifdef INTR_FILTERS
815		NULL,
816#endif
817		cxgb_async_intr, sc, &sc->intr_tag)) {
818		device_printf(sc->dev, "Cannot set up interrupt\n");
819		return (EINVAL);
820	}
821	for (i = k = 0; i < (sc)->params.nports; i++) {
822		nqsets = sc->port[i].nqsets;
823		for (j = 0; j < nqsets; j++, k++) {
824			struct sge_qset *qs = &sc->sge.qs[k];
825
826			rid = k + 2;
827			if (cxgb_debug)
828				printf("rid=%d ", rid);
829			if ((sc->msix_irq_res[k] = bus_alloc_resource_any(
830			    sc->dev, SYS_RES_IRQ, &rid,
831			    RF_SHAREABLE | RF_ACTIVE)) == NULL) {
832				device_printf(sc->dev, "Cannot allocate "
833				    "interrupt for message %d\n", rid);
834				return (EINVAL);
835			}
836			sc->msix_irq_rid[k] = rid;
837			if (bus_setup_intr(sc->dev, sc->msix_irq_res[k],
838				INTR_MPSAFE|INTR_TYPE_NET,
839#ifdef INTR_FILTERS
840				NULL,
841#endif
842				t3_intr_msix, qs, &sc->msix_intr_tag[k])) {
843				device_printf(sc->dev, "Cannot set up "
844				    "interrupt for message %d\n", rid);
845				return (EINVAL);
846			}
847#ifdef IFNET_MULTIQUEUE
848			if (singleq == 0) {
849				int vector = rman_get_start(sc->msix_irq_res[k]);
850				if (bootverbose)
851					device_printf(sc->dev, "binding vector=%d to cpu=%d\n", vector, k % mp_ncpus);
852				intr_bind(vector, k % mp_ncpus);
853			}
854#endif
855		}
856	}
857
858	return (0);
859}
860
861static int
862cxgb_port_probe(device_t dev)
863{
864	struct port_info *p;
865	char buf[80];
866	const char *desc;
867
868	p = device_get_softc(dev);
869	desc = p->phy.desc;
870	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
871	device_set_desc_copy(dev, buf);
872	return (0);
873}
874
875
876static int
877cxgb_makedev(struct port_info *pi)
878{
879
880	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
881	    UID_ROOT, GID_WHEEL, 0600, if_name(pi->ifp));
882
883	if (pi->port_cdev == NULL)
884		return (ENOMEM);
885
886	pi->port_cdev->si_drv1 = (void *)pi;
887
888	return (0);
889}
890
891#ifndef LRO_SUPPORTED
892#ifdef IFCAP_LRO
893#undef IFCAP_LRO
894#endif
895#define IFCAP_LRO 0x0
896#endif
897
898#ifdef TSO_SUPPORTED
899#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO)
900/* Don't enable TSO6 yet */
901#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU | IFCAP_LRO)
902#else
903#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_JUMBO_MTU)
904/* Don't enable TSO6 yet */
905#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM |  IFCAP_JUMBO_MTU)
906#define IFCAP_TSO4 0x0
907#define IFCAP_TSO6 0x0
908#define CSUM_TSO   0x0
909#endif
910
911
912static int
913cxgb_port_attach(device_t dev)
914{
915	struct port_info *p;
916	struct ifnet *ifp;
917	int err, media_flags;
918	struct adapter *sc;
919
920
921	p = device_get_softc(dev);
922	sc = p->adapter;
923	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
924	    device_get_unit(device_get_parent(dev)), p->port_id);
925	PORT_LOCK_INIT(p, p->lockbuf);
926
927	/* Allocate an ifnet object and set it up */
928	ifp = p->ifp = if_alloc(IFT_ETHER);
929	if (ifp == NULL) {
930		device_printf(dev, "Cannot allocate ifnet\n");
931		return (ENOMEM);
932	}
933
934	/*
935	 * Note that there is currently no watchdog timer.
936	 */
937	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
938	ifp->if_init = cxgb_init;
939	ifp->if_softc = p;
940	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
941	ifp->if_ioctl = cxgb_ioctl;
942	ifp->if_start = cxgb_start;
943
944#if 0
945#ifdef IFNET_MULTIQUEUE
946	ifp->if_flags |= IFF_MULTIQ;
947	ifp->if_mq_start = cxgb_pcpu_start;
948#endif
949#endif
950	ifp->if_timer = 0;	/* Disable ifnet watchdog */
951	ifp->if_watchdog = NULL;
952
953	ifp->if_snd.ifq_drv_maxlen = cxgb_snd_queue_len;
954	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
955	IFQ_SET_READY(&ifp->if_snd);
956
957	ifp->if_hwassist = ifp->if_capabilities = ifp->if_capenable = 0;
958	ifp->if_capabilities |= CXGB_CAP;
959	ifp->if_capenable |= CXGB_CAP_ENABLE;
960	ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO);
961	/*
962	 * disable TSO on 4-port - it isn't supported by the firmware yet
963	 */
964	if (p->adapter->params.nports > 2) {
965		ifp->if_capabilities &= ~(IFCAP_TSO4 | IFCAP_TSO6);
966		ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_TSO6);
967		ifp->if_hwassist &= ~CSUM_TSO;
968	}
969
970	ether_ifattach(ifp, p->hw_addr);
971	/*
972	 * Only default to jumbo frames on 10GigE
973	 */
974	if (p->adapter->params.nports <= 2)
975		ifp->if_mtu = ETHERMTU_JUMBO;
976	if ((err = cxgb_makedev(p)) != 0) {
977		printf("makedev failed %d\n", err);
978		return (err);
979	}
980	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
981	    cxgb_media_status);
982
983	if (!strcmp(p->phy.desc,	"10GBASE-CX4")) {
984		media_flags = IFM_ETHER | IFM_10G_CX4 | IFM_FDX;
985	} else if (!strcmp(p->phy.desc, "10GBASE-SR")) {
986		media_flags = IFM_ETHER | IFM_10G_SR | IFM_FDX;
987	} else if (!strcmp(p->phy.desc, "10GBASE-R")) {
988		media_flags = IFM_ETHER | IFM_10G_LR | IFM_FDX;
989	} else if (!strcmp(p->phy.desc, "10/100/1000BASE-T")) {
990		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T, 0, NULL);
991		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T | IFM_FDX,
992			    0, NULL);
993		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX,
994			    0, NULL);
995		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
996			    0, NULL);
997		ifmedia_add(&p->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
998			    0, NULL);
999		media_flags = 0;
1000	} else if (!strcmp(p->phy.desc, "1000BASE-X")) {
1001		/*
1002		 * XXX: This is not very accurate.  Fix when common code
1003		 * returns more specific value - eg 1000BASE-SX, LX, etc.
1004		 */
1005		media_flags = IFM_ETHER | IFM_1000_SX | IFM_FDX;
1006	} else {
1007	        printf("unsupported media type %s\n", p->phy.desc);
1008		return (ENXIO);
1009	}
1010	if (media_flags) {
1011		ifmedia_add(&p->media, media_flags, 0, NULL);
1012		ifmedia_set(&p->media, media_flags);
1013	} else {
1014		ifmedia_add(&p->media, IFM_ETHER | IFM_AUTO, 0, NULL);
1015		ifmedia_set(&p->media, IFM_ETHER | IFM_AUTO);
1016	}
1017
1018	/* Get the latest mac address, User can use a LAA */
1019	bcopy(IF_LLADDR(p->ifp), p->hw_addr, ETHER_ADDR_LEN);
1020	t3_sge_init_port(p);
1021#if defined(LINK_ATTACH)
1022	cxgb_link_start(p);
1023	t3_link_changed(sc, p->port_id);
1024#endif
1025	return (0);
1026}
1027
1028static int
1029cxgb_port_detach(device_t dev)
1030{
1031	struct port_info *p;
1032
1033	p = device_get_softc(dev);
1034
1035	PORT_LOCK(p);
1036	if (p->ifp->if_drv_flags & IFF_DRV_RUNNING)
1037		cxgb_stop_locked(p);
1038	PORT_UNLOCK(p);
1039
1040	ether_ifdetach(p->ifp);
1041	printf("waiting for callout to stop ...");
1042	DELAY(1000000);
1043	printf("done\n");
1044	/*
1045	 * the lock may be acquired in ifdetach
1046	 */
1047	PORT_LOCK_DEINIT(p);
1048	if_free(p->ifp);
1049
1050	if (p->port_cdev != NULL)
1051		destroy_dev(p->port_cdev);
1052
1053	return (0);
1054}
1055
1056void
1057t3_fatal_err(struct adapter *sc)
1058{
1059	u_int fw_status[4];
1060
1061	if (sc->flags & FULL_INIT_DONE) {
1062		t3_sge_stop(sc);
1063		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1064		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1065		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1066		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1067		t3_intr_disable(sc);
1068	}
1069	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1070	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1071		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1072		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1073}
1074
1075int
1076t3_os_find_pci_capability(adapter_t *sc, int cap)
1077{
1078	device_t dev;
1079	struct pci_devinfo *dinfo;
1080	pcicfgregs *cfg;
1081	uint32_t status;
1082	uint8_t ptr;
1083
1084	dev = sc->dev;
1085	dinfo = device_get_ivars(dev);
1086	cfg = &dinfo->cfg;
1087
1088	status = pci_read_config(dev, PCIR_STATUS, 2);
1089	if (!(status & PCIM_STATUS_CAPPRESENT))
1090		return (0);
1091
1092	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1093	case 0:
1094	case 1:
1095		ptr = PCIR_CAP_PTR;
1096		break;
1097	case 2:
1098		ptr = PCIR_CAP_PTR_2;
1099		break;
1100	default:
1101		return (0);
1102		break;
1103	}
1104	ptr = pci_read_config(dev, ptr, 1);
1105
1106	while (ptr != 0) {
1107		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1108			return (ptr);
1109		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1110	}
1111
1112	return (0);
1113}
1114
1115int
1116t3_os_pci_save_state(struct adapter *sc)
1117{
1118	device_t dev;
1119	struct pci_devinfo *dinfo;
1120
1121	dev = sc->dev;
1122	dinfo = device_get_ivars(dev);
1123
1124	pci_cfg_save(dev, dinfo, 0);
1125	return (0);
1126}
1127
1128int
1129t3_os_pci_restore_state(struct adapter *sc)
1130{
1131	device_t dev;
1132	struct pci_devinfo *dinfo;
1133
1134	dev = sc->dev;
1135	dinfo = device_get_ivars(dev);
1136
1137	pci_cfg_restore(dev, dinfo);
1138	return (0);
1139}
1140
1141/**
1142 *	t3_os_link_changed - handle link status changes
1143 *	@adapter: the adapter associated with the link change
1144 *	@port_id: the port index whose limk status has changed
1145 *	@link_status: the new status of the link
1146 *	@speed: the new speed setting
1147 *	@duplex: the new duplex setting
1148 *	@fc: the new flow-control setting
1149 *
1150 *	This is the OS-dependent handler for link status changes.  The OS
1151 *	neutral handler takes care of most of the processing for these events,
1152 *	then calls this handler for any OS-specific processing.
1153 */
1154void
1155t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1156     int duplex, int fc)
1157{
1158	struct port_info *pi = &adapter->port[port_id];
1159	struct cmac *mac = &adapter->port[port_id].mac;
1160
1161	if (link_status) {
1162		DELAY(10);
1163		t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1164			/* Clear errors created by MAC enable */
1165			t3_set_reg_field(adapter,
1166					 A_XGM_STAT_CTRL + pi->mac.offset,
1167					 F_CLRSTATS, 1);
1168		if_link_state_change(pi->ifp, LINK_STATE_UP);
1169
1170	} else {
1171		pi->phy.ops->power_down(&pi->phy, 1);
1172		t3_mac_disable(mac, MAC_DIRECTION_RX);
1173		t3_link_start(&pi->phy, mac, &pi->link_config);
1174		t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1175		if_link_state_change(pi->ifp, LINK_STATE_DOWN);
1176	}
1177}
1178
1179/**
1180 *	t3_os_phymod_changed - handle PHY module changes
1181 *	@phy: the PHY reporting the module change
1182 *	@mod_type: new module type
1183 *
1184 *	This is the OS-dependent handler for PHY module changes.  It is
1185 *	invoked when a PHY module is removed or inserted for any OS-specific
1186 *	processing.
1187 */
1188void t3_os_phymod_changed(struct adapter *adap, int port_id)
1189{
1190	static const char *mod_str[] = {
1191		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX", "unknown"
1192	};
1193
1194	struct port_info *pi = &adap->port[port_id];
1195
1196	if (pi->phy.modtype == phy_modtype_none)
1197		device_printf(adap->dev, "PHY module unplugged\n");
1198	else {
1199		KASSERT(pi->phy.modtype < ARRAY_SIZE(mod_str),
1200		    ("invalid PHY module type %d", pi->phy.modtype));
1201		device_printf(adap->dev, "%s PHY module inserted\n",
1202		    mod_str[pi->phy.modtype]);
1203	}
1204}
1205
1206/*
1207 * Interrupt-context handler for external (PHY) interrupts.
1208 */
1209void
1210t3_os_ext_intr_handler(adapter_t *sc)
1211{
1212	if (cxgb_debug)
1213		printf("t3_os_ext_intr_handler\n");
1214	/*
1215	 * Schedule a task to handle external interrupts as they may be slow
1216	 * and we use a mutex to protect MDIO registers.  We disable PHY
1217	 * interrupts in the meantime and let the task reenable them when
1218	 * it's done.
1219	 */
1220	ADAPTER_LOCK(sc);
1221	if (sc->slow_intr_mask) {
1222		sc->slow_intr_mask &= ~F_T3DBG;
1223		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1224		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
1225	}
1226	ADAPTER_UNLOCK(sc);
1227}
1228
1229void
1230t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1231{
1232
1233	/*
1234	 * The ifnet might not be allocated before this gets called,
1235	 * as this is called early on in attach by t3_prep_adapter
1236	 * save the address off in the port structure
1237	 */
1238	if (cxgb_debug)
1239		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1240	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1241}
1242
1243/**
1244 *	link_start - enable a port
1245 *	@p: the port to enable
1246 *
1247 *	Performs the MAC and PHY actions needed to enable a port.
1248 */
1249static void
1250cxgb_link_start(struct port_info *p)
1251{
1252	struct ifnet *ifp;
1253	struct t3_rx_mode rm;
1254	struct cmac *mac = &p->mac;
1255	int mtu, hwtagging;
1256
1257	ifp = p->ifp;
1258
1259	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1260
1261	mtu = ifp->if_mtu;
1262	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1263		mtu += ETHER_VLAN_ENCAP_LEN;
1264
1265	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1266
1267	t3_init_rx_mode(&rm, p);
1268	if (!mac->multiport)
1269		t3_mac_reset(mac);
1270	t3_mac_set_mtu(mac, mtu);
1271	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1272	t3_mac_set_address(mac, 0, p->hw_addr);
1273	t3_mac_set_rx_mode(mac, &rm);
1274	t3_link_start(&p->phy, mac, &p->link_config);
1275	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1276}
1277
1278
1279static int
1280await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1281			      unsigned long n)
1282{
1283	int attempts = 5;
1284
1285	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1286		if (!--attempts)
1287			return (ETIMEDOUT);
1288		t3_os_sleep(10);
1289	}
1290	return 0;
1291}
1292
1293static int
1294init_tp_parity(struct adapter *adap)
1295{
1296	int i;
1297	struct mbuf *m;
1298	struct cpl_set_tcb_field *greq;
1299	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1300
1301	t3_tp_set_offload_mode(adap, 1);
1302
1303	for (i = 0; i < 16; i++) {
1304		struct cpl_smt_write_req *req;
1305
1306		m = m_gethdr(M_WAITOK, MT_DATA);
1307		req = mtod(m, struct cpl_smt_write_req *);
1308		m->m_len = m->m_pkthdr.len = sizeof(*req);
1309		memset(req, 0, sizeof(*req));
1310		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1311		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1312		req->iff = i;
1313		t3_mgmt_tx(adap, m);
1314	}
1315
1316	for (i = 0; i < 2048; i++) {
1317		struct cpl_l2t_write_req *req;
1318
1319		m = m_gethdr(M_WAITOK, MT_DATA);
1320		req = mtod(m, struct cpl_l2t_write_req *);
1321		m->m_len = m->m_pkthdr.len = sizeof(*req);
1322		memset(req, 0, sizeof(*req));
1323		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1324		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1325		req->params = htonl(V_L2T_W_IDX(i));
1326		t3_mgmt_tx(adap, m);
1327	}
1328
1329	for (i = 0; i < 2048; i++) {
1330		struct cpl_rte_write_req *req;
1331
1332		m = m_gethdr(M_WAITOK, MT_DATA);
1333		req = mtod(m, struct cpl_rte_write_req *);
1334		m->m_len = m->m_pkthdr.len = sizeof(*req);
1335		memset(req, 0, sizeof(*req));
1336		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1337		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1338		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1339		t3_mgmt_tx(adap, m);
1340	}
1341
1342	m = m_gethdr(M_WAITOK, MT_DATA);
1343	greq = mtod(m, struct cpl_set_tcb_field *);
1344	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1345	memset(greq, 0, sizeof(*greq));
1346	greq->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1347	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1348	greq->mask = htobe64(1);
1349	t3_mgmt_tx(adap, m);
1350
1351	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1352	t3_tp_set_offload_mode(adap, 0);
1353	return (i);
1354}
1355
1356/**
1357 *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1358 *	@adap: the adapter
1359 *
1360 *	Sets up RSS to distribute packets to multiple receive queues.  We
1361 *	configure the RSS CPU lookup table to distribute to the number of HW
1362 *	receive queues, and the response queue lookup table to narrow that
1363 *	down to the response queues actually configured for each port.
1364 *	We always configure the RSS mapping for two ports since the mapping
1365 *	table has plenty of entries.
1366 */
1367static void
1368setup_rss(adapter_t *adap)
1369{
1370	int i;
1371	u_int nq[2];
1372	uint8_t cpus[SGE_QSETS + 1];
1373	uint16_t rspq_map[RSS_TABLE_SIZE];
1374
1375	for (i = 0; i < SGE_QSETS; ++i)
1376		cpus[i] = i;
1377	cpus[SGE_QSETS] = 0xff;
1378
1379	nq[0] = nq[1] = 0;
1380	for_each_port(adap, i) {
1381		const struct port_info *pi = adap2pinfo(adap, i);
1382
1383		nq[pi->tx_chan] += pi->nqsets;
1384	}
1385	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1386		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1387		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1388	}
1389	/* Calculate the reverse RSS map table */
1390	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1391		if (adap->rrss_map[rspq_map[i]] == 0xff)
1392			adap->rrss_map[rspq_map[i]] = i;
1393
1394	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1395		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1396	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1397	              cpus, rspq_map);
1398
1399}
1400
1401/*
1402 * Sends an mbuf to an offload queue driver
1403 * after dealing with any active network taps.
1404 */
1405static inline int
1406offload_tx(struct t3cdev *tdev, struct mbuf *m)
1407{
1408	int ret;
1409
1410	ret = t3_offload_tx(tdev, m);
1411	return (ret);
1412}
1413
1414static int
1415write_smt_entry(struct adapter *adapter, int idx)
1416{
1417	struct port_info *pi = &adapter->port[idx];
1418	struct cpl_smt_write_req *req;
1419	struct mbuf *m;
1420
1421	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1422		return (ENOMEM);
1423
1424	req = mtod(m, struct cpl_smt_write_req *);
1425	m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1426
1427	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1428	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1429	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1430	req->iff = idx;
1431	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1432	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1433
1434	m_set_priority(m, 1);
1435
1436	offload_tx(&adapter->tdev, m);
1437
1438	return (0);
1439}
1440
1441static int
1442init_smt(struct adapter *adapter)
1443{
1444	int i;
1445
1446	for_each_port(adapter, i)
1447		write_smt_entry(adapter, i);
1448	return 0;
1449}
1450
1451static void
1452init_port_mtus(adapter_t *adapter)
1453{
1454	unsigned int mtus = adapter->port[0].ifp->if_mtu;
1455
1456	if (adapter->port[1].ifp)
1457		mtus |= adapter->port[1].ifp->if_mtu << 16;
1458	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1459}
1460
1461static void
1462send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1463			      int hi, int port)
1464{
1465	struct mbuf *m;
1466	struct mngt_pktsched_wr *req;
1467
1468	m = m_gethdr(M_DONTWAIT, MT_DATA);
1469	if (m) {
1470		req = mtod(m, struct mngt_pktsched_wr *);
1471		req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1472		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1473		req->sched = sched;
1474		req->idx = qidx;
1475		req->min = lo;
1476		req->max = hi;
1477		req->binding = port;
1478		m->m_len = m->m_pkthdr.len = sizeof(*req);
1479		t3_mgmt_tx(adap, m);
1480	}
1481}
1482
1483static void
1484bind_qsets(adapter_t *sc)
1485{
1486	int i, j;
1487
1488	cxgb_pcpu_startup_threads(sc);
1489	for (i = 0; i < (sc)->params.nports; ++i) {
1490		const struct port_info *pi = adap2pinfo(sc, i);
1491
1492		for (j = 0; j < pi->nqsets; ++j) {
1493			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1494					  -1, pi->tx_chan);
1495
1496		}
1497	}
1498}
1499
1500static void
1501update_tpeeprom(struct adapter *adap)
1502{
1503#ifdef FIRMWARE_LATEST
1504	const struct firmware *tpeeprom;
1505#else
1506	struct firmware *tpeeprom;
1507#endif
1508
1509	uint32_t version;
1510	unsigned int major, minor;
1511	int ret, len;
1512	char rev;
1513
1514	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1515
1516	major = G_TP_VERSION_MAJOR(version);
1517	minor = G_TP_VERSION_MINOR(version);
1518	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1519		return;
1520
1521	rev = t3rev2char(adap);
1522
1523	tpeeprom = firmware_get(TPEEPROM_NAME);
1524	if (tpeeprom == NULL) {
1525		device_printf(adap->dev, "could not load TP EEPROM: unable to load %s\n",
1526		    TPEEPROM_NAME);
1527		return;
1528	}
1529
1530	len = tpeeprom->datasize - 4;
1531
1532	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1533	if (ret)
1534		goto release_tpeeprom;
1535
1536	if (len != TP_SRAM_LEN) {
1537		device_printf(adap->dev, "%s length is wrong len=%d expected=%d\n", TPEEPROM_NAME, len, TP_SRAM_LEN);
1538		return;
1539	}
1540
1541	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1542	    TP_SRAM_OFFSET);
1543
1544	if (!ret) {
1545		device_printf(adap->dev,
1546			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1547			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1548	} else
1549		device_printf(adap->dev, "Protocol SRAM image update in EEPROM failed\n");
1550
1551release_tpeeprom:
1552	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1553
1554	return;
1555}
1556
1557static int
1558update_tpsram(struct adapter *adap)
1559{
1560#ifdef FIRMWARE_LATEST
1561	const struct firmware *tpsram;
1562#else
1563	struct firmware *tpsram;
1564#endif
1565	int ret;
1566	char rev;
1567
1568	rev = t3rev2char(adap);
1569	if (!rev)
1570		return 0;
1571
1572	update_tpeeprom(adap);
1573
1574	tpsram = firmware_get(TPSRAM_NAME);
1575	if (tpsram == NULL){
1576		device_printf(adap->dev, "could not load TP SRAM\n");
1577		return (EINVAL);
1578	} else
1579		device_printf(adap->dev, "updating TP SRAM\n");
1580
1581	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1582	if (ret)
1583		goto release_tpsram;
1584
1585	ret = t3_set_proto_sram(adap, tpsram->data);
1586	if (ret)
1587		device_printf(adap->dev, "loading protocol SRAM failed\n");
1588
1589release_tpsram:
1590	firmware_put(tpsram, FIRMWARE_UNLOAD);
1591
1592	return ret;
1593}
1594
1595/**
1596 *	cxgb_up - enable the adapter
1597 *	@adap: adapter being enabled
1598 *
1599 *	Called when the first port is enabled, this function performs the
1600 *	actions necessary to make an adapter operational, such as completing
1601 *	the initialization of HW modules, and enabling interrupts.
1602 *
1603 */
1604static int
1605cxgb_up(struct adapter *sc)
1606{
1607	int err = 0;
1608
1609	if ((sc->flags & FULL_INIT_DONE) == 0) {
1610
1611		if ((sc->flags & FW_UPTODATE) == 0)
1612			if ((err = upgrade_fw(sc)))
1613				goto out;
1614		if ((sc->flags & TPS_UPTODATE) == 0)
1615			if ((err = update_tpsram(sc)))
1616				goto out;
1617		err = t3_init_hw(sc, 0);
1618		if (err)
1619			goto out;
1620
1621		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1622		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1623
1624		err = setup_sge_qsets(sc);
1625		if (err)
1626			goto out;
1627
1628		setup_rss(sc);
1629		t3_add_configured_sysctls(sc);
1630		sc->flags |= FULL_INIT_DONE;
1631	}
1632
1633	t3_intr_clear(sc);
1634
1635	/* If it's MSI or INTx, allocate a single interrupt for everything */
1636	if ((sc->flags & USING_MSIX) == 0) {
1637		if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
1638		   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
1639			device_printf(sc->dev, "Cannot allocate interrupt rid=%d\n",
1640			    sc->irq_rid);
1641			err = EINVAL;
1642			goto out;
1643		}
1644		device_printf(sc->dev, "allocated irq_res=%p\n", sc->irq_res);
1645
1646		if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
1647#ifdef INTR_FILTERS
1648			NULL,
1649#endif
1650			sc->cxgb_intr, sc, &sc->intr_tag)) {
1651			device_printf(sc->dev, "Cannot set up interrupt\n");
1652			err = EINVAL;
1653			goto irq_err;
1654		}
1655	} else {
1656		cxgb_setup_msix(sc, sc->msi_count);
1657	}
1658
1659	t3_sge_start(sc);
1660	t3_intr_enable(sc);
1661
1662	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1663	    is_offload(sc) && init_tp_parity(sc) == 0)
1664		sc->flags |= TP_PARITY_INIT;
1665
1666	if (sc->flags & TP_PARITY_INIT) {
1667		t3_write_reg(sc, A_TP_INT_CAUSE,
1668				F_CMCACHEPERR | F_ARPLUTPERR);
1669		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1670	}
1671
1672
1673	if (!(sc->flags & QUEUES_BOUND)) {
1674		bind_qsets(sc);
1675		sc->flags |= QUEUES_BOUND;
1676	}
1677out:
1678	return (err);
1679irq_err:
1680	CH_ERR(sc, "request_irq failed, err %d\n", err);
1681	goto out;
1682}
1683
1684
1685/*
1686 * Release resources when all the ports and offloading have been stopped.
1687 */
1688static void
1689cxgb_down_locked(struct adapter *sc)
1690{
1691
1692	t3_sge_stop(sc);
1693	t3_intr_disable(sc);
1694
1695	if (sc->intr_tag != NULL) {
1696		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
1697		sc->intr_tag = NULL;
1698	}
1699	if (sc->irq_res != NULL) {
1700		device_printf(sc->dev, "de-allocating interrupt irq_rid=%d irq_res=%p\n",
1701		    sc->irq_rid, sc->irq_res);
1702		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
1703		    sc->irq_res);
1704		sc->irq_res = NULL;
1705	}
1706
1707	if (sc->flags & USING_MSIX)
1708		cxgb_teardown_msix(sc);
1709
1710	callout_stop(&sc->cxgb_tick_ch);
1711	callout_stop(&sc->sge_timer_ch);
1712	callout_drain(&sc->cxgb_tick_ch);
1713	callout_drain(&sc->sge_timer_ch);
1714
1715	if (sc->tq != NULL) {
1716		printf("draining slow intr\n");
1717
1718		taskqueue_drain(sc->tq, &sc->slow_intr_task);
1719			printf("draining ext intr\n");
1720		taskqueue_drain(sc->tq, &sc->ext_intr_task);
1721		printf("draining tick task\n");
1722		taskqueue_drain(sc->tq, &sc->tick_task);
1723	}
1724	ADAPTER_UNLOCK(sc);
1725}
1726
1727static int
1728offload_open(struct port_info *pi)
1729{
1730	struct adapter *adapter = pi->adapter;
1731	struct t3cdev *tdev = &adapter->tdev;
1732
1733	int adap_up = adapter->open_device_map & PORT_MASK;
1734	int err = 0;
1735
1736	if (atomic_cmpset_int(&adapter->open_device_map,
1737		(adapter->open_device_map & ~(1<<OFFLOAD_DEVMAP_BIT)),
1738		(adapter->open_device_map | (1<<OFFLOAD_DEVMAP_BIT))) == 0)
1739		return (0);
1740
1741	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1742		printf("offload_open: DEVMAP_BIT did not get set 0x%x\n",
1743		    adapter->open_device_map);
1744	ADAPTER_LOCK(pi->adapter);
1745	if (!adap_up)
1746		err = cxgb_up(adapter);
1747	ADAPTER_UNLOCK(pi->adapter);
1748	if (err)
1749		return (err);
1750
1751	t3_tp_set_offload_mode(adapter, 1);
1752	tdev->lldev = pi->ifp;
1753
1754	init_port_mtus(adapter);
1755	t3_load_mtus(adapter, adapter->params.mtus, adapter->params.a_wnd,
1756		     adapter->params.b_wnd,
1757		     adapter->params.rev == 0 ?
1758		       adapter->port[0].ifp->if_mtu : 0xffff);
1759	init_smt(adapter);
1760	/* Call back all registered clients */
1761	cxgb_add_clients(tdev);
1762
1763	/* restore them in case the offload module has changed them */
1764	if (err) {
1765		t3_tp_set_offload_mode(adapter, 0);
1766		clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1767		cxgb_set_dummy_ops(tdev);
1768	}
1769	return (err);
1770}
1771
1772static int
1773offload_close(struct t3cdev *tdev)
1774{
1775	struct adapter *adapter = tdev2adap(tdev);
1776
1777	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1778		return (0);
1779
1780	/* Call back all registered clients */
1781	cxgb_remove_clients(tdev);
1782
1783	tdev->lldev = NULL;
1784	cxgb_set_dummy_ops(tdev);
1785	t3_tp_set_offload_mode(adapter, 0);
1786	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1787
1788	ADAPTER_LOCK(adapter);
1789	if (!adapter->open_device_map)
1790		cxgb_down_locked(adapter);
1791	else
1792		ADAPTER_UNLOCK(adapter);
1793	return (0);
1794}
1795
1796
1797static void
1798cxgb_init(void *arg)
1799{
1800	struct port_info *p = arg;
1801
1802	PORT_LOCK(p);
1803	cxgb_init_locked(p);
1804	PORT_UNLOCK(p);
1805}
1806
1807static void
1808cxgb_init_locked(struct port_info *p)
1809{
1810	struct ifnet *ifp;
1811	adapter_t *sc = p->adapter;
1812	int err;
1813
1814	PORT_LOCK_ASSERT_OWNED(p);
1815	ifp = p->ifp;
1816
1817	ADAPTER_LOCK(p->adapter);
1818	if ((sc->open_device_map == 0) && (err = cxgb_up(sc))) {
1819		ADAPTER_UNLOCK(p->adapter);
1820		cxgb_stop_locked(p);
1821		return;
1822	}
1823	if (p->adapter->open_device_map == 0) {
1824		t3_intr_clear(sc);
1825	}
1826	setbit(&p->adapter->open_device_map, p->port_id);
1827	ADAPTER_UNLOCK(p->adapter);
1828
1829	if (is_offload(sc) && !ofld_disable) {
1830		err = offload_open(p);
1831		if (err)
1832			log(LOG_WARNING,
1833			    "Could not initialize offload capabilities\n");
1834	}
1835#if !defined(LINK_ATTACH)
1836	cxgb_link_start(p);
1837	t3_link_changed(sc, p->port_id);
1838#endif
1839	ifp->if_baudrate = p->link_config.speed * 1000000;
1840
1841	device_printf(sc->dev, "enabling interrupts on port=%d\n", p->port_id);
1842	t3_port_intr_enable(sc, p->port_id);
1843
1844	t3_sge_reset_adapter(sc);
1845
1846	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1847	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1848}
1849
1850static void
1851cxgb_set_rxmode(struct port_info *p)
1852{
1853	struct t3_rx_mode rm;
1854	struct cmac *mac = &p->mac;
1855
1856	t3_init_rx_mode(&rm, p);
1857	mtx_lock(&p->adapter->mdio_lock);
1858	t3_mac_set_rx_mode(mac, &rm);
1859	mtx_unlock(&p->adapter->mdio_lock);
1860}
1861
1862static void
1863cxgb_stop_locked(struct port_info *pi)
1864{
1865	struct ifnet *ifp;
1866
1867	PORT_LOCK_ASSERT_OWNED(pi);
1868	ADAPTER_LOCK_ASSERT_NOTOWNED(pi->adapter);
1869
1870	ifp = pi->ifp;
1871	t3_port_intr_disable(pi->adapter, pi->port_id);
1872	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1873
1874	/* disable pause frames */
1875	t3_set_reg_field(pi->adapter, A_XGM_TX_CFG + pi->mac.offset,
1876			 F_TXPAUSEEN, 0);
1877
1878	/* Reset RX FIFO HWM */
1879        t3_set_reg_field(pi->adapter, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1880			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1881
1882
1883	ADAPTER_LOCK(pi->adapter);
1884	clrbit(&pi->adapter->open_device_map, pi->port_id);
1885
1886	if (pi->adapter->open_device_map == 0) {
1887		cxgb_down_locked(pi->adapter);
1888	} else
1889		ADAPTER_UNLOCK(pi->adapter);
1890
1891#if !defined(LINK_ATTACH)
1892	DELAY(100);
1893
1894	/* Wait for TXFIFO empty */
1895	t3_wait_op_done(pi->adapter, A_XGM_TXFIFO_CFG + pi->mac.offset,
1896			F_TXFIFO_EMPTY, 1, 20, 5);
1897
1898	DELAY(100);
1899	t3_mac_disable(&pi->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
1900
1901	pi->phy.ops->power_down(&pi->phy, 1);
1902#endif
1903
1904}
1905
1906static int
1907cxgb_set_mtu(struct port_info *p, int mtu)
1908{
1909	struct ifnet *ifp = p->ifp;
1910	int error = 0;
1911
1912	if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO))
1913		error = EINVAL;
1914	else if (ifp->if_mtu != mtu) {
1915		PORT_LOCK(p);
1916		ifp->if_mtu = mtu;
1917		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1918			cxgb_stop_locked(p);
1919			cxgb_init_locked(p);
1920		}
1921		PORT_UNLOCK(p);
1922	}
1923	return (error);
1924}
1925
1926#ifdef LRO_SUPPORTED
1927/*
1928 * Mark lro enabled or disabled in all qsets for this port
1929 */
1930static int
1931cxgb_set_lro(struct port_info *p, int enabled)
1932{
1933	int i;
1934	struct adapter *adp = p->adapter;
1935	struct sge_qset *q;
1936
1937	PORT_LOCK_ASSERT_OWNED(p);
1938	for (i = 0; i < p->nqsets; i++) {
1939		q = &adp->sge.qs[p->first_qset + i];
1940		q->lro.enabled = (enabled != 0);
1941	}
1942	return (0);
1943}
1944#endif
1945
1946static int
1947cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1948{
1949	struct port_info *p = ifp->if_softc;
1950#ifdef INET
1951	struct ifaddr *ifa = (struct ifaddr *)data;
1952#endif
1953	struct ifreq *ifr = (struct ifreq *)data;
1954	int flags, error = 0, reinit = 0;
1955	uint32_t mask;
1956
1957	/*
1958	 * XXX need to check that we aren't in the middle of an unload
1959	 */
1960	switch (command) {
1961	case SIOCSIFMTU:
1962		error = cxgb_set_mtu(p, ifr->ifr_mtu);
1963		break;
1964	case SIOCSIFADDR:
1965#ifdef INET
1966		if (ifa->ifa_addr->sa_family == AF_INET) {
1967			ifp->if_flags |= IFF_UP;
1968			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1969				PORT_LOCK(p);
1970				cxgb_init_locked(p);
1971				PORT_UNLOCK(p);
1972			}
1973			arp_ifinit(ifp, ifa);
1974		} else
1975#endif
1976			error = ether_ioctl(ifp, command, data);
1977		break;
1978	case SIOCSIFFLAGS:
1979		PORT_LOCK(p);
1980		if (ifp->if_flags & IFF_UP) {
1981			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1982				flags = p->if_flags;
1983				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1984				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI))
1985					cxgb_set_rxmode(p);
1986			} else
1987				cxgb_init_locked(p);
1988			p->if_flags = ifp->if_flags;
1989		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1990			cxgb_stop_locked(p);
1991
1992		PORT_UNLOCK(p);
1993		break;
1994	case SIOCADDMULTI:
1995	case SIOCDELMULTI:
1996		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1997			cxgb_set_rxmode(p);
1998		}
1999		break;
2000	case SIOCSIFMEDIA:
2001	case SIOCGIFMEDIA:
2002		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2003		break;
2004	case SIOCSIFCAP:
2005		PORT_LOCK(p);
2006		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2007		if (mask & IFCAP_TXCSUM) {
2008			if (IFCAP_TXCSUM & ifp->if_capenable) {
2009				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
2010				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
2011				    | CSUM_IP | CSUM_TSO);
2012			} else {
2013				ifp->if_capenable |= IFCAP_TXCSUM;
2014				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP
2015				    | CSUM_IP);
2016			}
2017		}
2018		if (mask & IFCAP_RXCSUM) {
2019			ifp->if_capenable ^= IFCAP_RXCSUM;
2020		}
2021		if (mask & IFCAP_TSO4) {
2022			if (IFCAP_TSO4 & ifp->if_capenable) {
2023				ifp->if_capenable &= ~IFCAP_TSO4;
2024				ifp->if_hwassist &= ~CSUM_TSO;
2025			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
2026				ifp->if_capenable |= IFCAP_TSO4;
2027				ifp->if_hwassist |= CSUM_TSO;
2028			} else {
2029				if (cxgb_debug)
2030					printf("cxgb requires tx checksum offload"
2031					    " be enabled to use TSO\n");
2032				error = EINVAL;
2033			}
2034		}
2035#ifdef LRO_SUPPORTED
2036		if (mask & IFCAP_LRO) {
2037			ifp->if_capenable ^= IFCAP_LRO;
2038
2039			/* Safe to do this even if cxgb_up not called yet */
2040			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2041		}
2042#endif
2043		if (mask & IFCAP_VLAN_HWTAGGING) {
2044			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2045			reinit = ifp->if_drv_flags & IFF_DRV_RUNNING;
2046		}
2047		if (mask & IFCAP_VLAN_MTU) {
2048			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2049			reinit = ifp->if_drv_flags & IFF_DRV_RUNNING;
2050		}
2051		if (mask & IFCAP_VLAN_HWCSUM) {
2052			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2053		}
2054		if (reinit) {
2055			cxgb_stop_locked(p);
2056			cxgb_init_locked(p);
2057		}
2058		PORT_UNLOCK(p);
2059
2060#ifdef VLAN_CAPABILITIES
2061		VLAN_CAPABILITIES(ifp);
2062#endif
2063		break;
2064	default:
2065		error = ether_ioctl(ifp, command, data);
2066		break;
2067	}
2068	return (error);
2069}
2070
2071static int
2072cxgb_media_change(struct ifnet *ifp)
2073{
2074	if_printf(ifp, "media change not supported\n");
2075	return (ENXIO);
2076}
2077
2078static void
2079cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2080{
2081	struct port_info *p = ifp->if_softc;
2082
2083	ifmr->ifm_status = IFM_AVALID;
2084	ifmr->ifm_active = IFM_ETHER;
2085
2086	if (!p->link_config.link_ok)
2087		return;
2088
2089	ifmr->ifm_status |= IFM_ACTIVE;
2090
2091	switch (p->link_config.speed) {
2092	case 10:
2093		ifmr->ifm_active |= IFM_10_T;
2094		break;
2095	case 100:
2096		ifmr->ifm_active |= IFM_100_TX;
2097			break;
2098	case 1000:
2099		ifmr->ifm_active |= IFM_1000_T;
2100		break;
2101	}
2102
2103	if (p->link_config.duplex)
2104		ifmr->ifm_active |= IFM_FDX;
2105	else
2106		ifmr->ifm_active |= IFM_HDX;
2107}
2108
2109static void
2110cxgb_async_intr(void *data)
2111{
2112	adapter_t *sc = data;
2113
2114	if (cxgb_debug)
2115		device_printf(sc->dev, "cxgb_async_intr\n");
2116	/*
2117	 * May need to sleep - defer to taskqueue
2118	 */
2119	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2120}
2121
2122static void
2123cxgb_ext_intr_handler(void *arg, int count)
2124{
2125	adapter_t *sc = (adapter_t *)arg;
2126
2127	if (cxgb_debug)
2128		printf("cxgb_ext_intr_handler\n");
2129
2130	t3_phy_intr_handler(sc);
2131
2132	/* Now reenable external interrupts */
2133	ADAPTER_LOCK(sc);
2134	if (sc->slow_intr_mask) {
2135		sc->slow_intr_mask |= F_T3DBG;
2136		t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
2137		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
2138	}
2139	ADAPTER_UNLOCK(sc);
2140}
2141
2142static void
2143check_link_status(adapter_t *sc)
2144{
2145	int i;
2146
2147	for (i = 0; i < (sc)->params.nports; ++i) {
2148		struct port_info *p = &sc->port[i];
2149
2150		if (!(p->phy.caps & SUPPORTED_IRQ))
2151			t3_link_changed(sc, i);
2152		p->ifp->if_baudrate = p->link_config.speed * 1000000;
2153	}
2154}
2155
2156static void
2157check_t3b2_mac(struct adapter *adapter)
2158{
2159	int i;
2160
2161	if(adapter->flags & CXGB_SHUTDOWN)
2162		return;
2163
2164	for_each_port(adapter, i) {
2165		struct port_info *p = &adapter->port[i];
2166		struct ifnet *ifp = p->ifp;
2167		int status;
2168
2169		if(adapter->flags & CXGB_SHUTDOWN)
2170			return;
2171
2172		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2173			continue;
2174
2175		status = 0;
2176		PORT_LOCK(p);
2177		if ((ifp->if_drv_flags & IFF_DRV_RUNNING))
2178			status = t3b2_mac_watchdog_task(&p->mac);
2179		if (status == 1)
2180			p->mac.stats.num_toggled++;
2181		else if (status == 2) {
2182			struct cmac *mac = &p->mac;
2183			int mtu = ifp->if_mtu;
2184
2185			if (ifp->if_capenable & IFCAP_VLAN_MTU)
2186				mtu += ETHER_VLAN_ENCAP_LEN;
2187			t3_mac_set_mtu(mac, mtu);
2188			t3_mac_set_address(mac, 0, p->hw_addr);
2189			cxgb_set_rxmode(p);
2190			t3_link_start(&p->phy, mac, &p->link_config);
2191			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2192			t3_port_intr_enable(adapter, p->port_id);
2193			p->mac.stats.num_resets++;
2194		}
2195		PORT_UNLOCK(p);
2196	}
2197}
2198
2199static void
2200cxgb_tick(void *arg)
2201{
2202	adapter_t *sc = (adapter_t *)arg;
2203
2204	if(sc->flags & CXGB_SHUTDOWN)
2205		return;
2206
2207	taskqueue_enqueue(sc->tq, &sc->tick_task);
2208	callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
2209}
2210
2211static void
2212cxgb_tick_handler(void *arg, int count)
2213{
2214	adapter_t *sc = (adapter_t *)arg;
2215	const struct adapter_params *p = &sc->params;
2216	int i;
2217
2218	if(sc->flags & CXGB_SHUTDOWN)
2219		return;
2220
2221	ADAPTER_LOCK(sc);
2222	if (p->linkpoll_period)
2223		check_link_status(sc);
2224
2225	sc->check_task_cnt++;
2226
2227	/*
2228	 * adapter lock can currently only be acquired after the
2229	 * port lock
2230	 */
2231	ADAPTER_UNLOCK(sc);
2232
2233	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2234		check_t3b2_mac(sc);
2235
2236	/* Update MAC stats if it's time to do so */
2237	if (!p->linkpoll_period ||
2238	    (sc->check_task_cnt * p->linkpoll_period) / 10 >=
2239	    p->stats_update_period) {
2240		for_each_port(sc, i) {
2241			struct port_info *port = &sc->port[i];
2242			PORT_LOCK(port);
2243			t3_mac_update_stats(&port->mac);
2244			PORT_UNLOCK(port);
2245		}
2246		sc->check_task_cnt = 0;
2247	}
2248}
2249
2250static void
2251touch_bars(device_t dev)
2252{
2253	/*
2254	 * Don't enable yet
2255	 */
2256#if !defined(__LP64__) && 0
2257	u32 v;
2258
2259	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2260	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2261	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2262	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2263	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2264	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2265#endif
2266}
2267
2268static int
2269set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2270{
2271	uint8_t *buf;
2272	int err = 0;
2273	u32 aligned_offset, aligned_len, *p;
2274	struct adapter *adapter = pi->adapter;
2275
2276
2277	aligned_offset = offset & ~3;
2278	aligned_len = (len + (offset & 3) + 3) & ~3;
2279
2280	if (aligned_offset != offset || aligned_len != len) {
2281		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2282		if (!buf)
2283			return (ENOMEM);
2284		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2285		if (!err && aligned_len > 4)
2286			err = t3_seeprom_read(adapter,
2287					      aligned_offset + aligned_len - 4,
2288					      (u32 *)&buf[aligned_len - 4]);
2289		if (err)
2290			goto out;
2291		memcpy(buf + (offset & 3), data, len);
2292	} else
2293		buf = (uint8_t *)(uintptr_t)data;
2294
2295	err = t3_seeprom_wp(adapter, 0);
2296	if (err)
2297		goto out;
2298
2299	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2300		err = t3_seeprom_write(adapter, aligned_offset, *p);
2301		aligned_offset += 4;
2302	}
2303
2304	if (!err)
2305		err = t3_seeprom_wp(adapter, 1);
2306out:
2307	if (buf != data)
2308		free(buf, M_DEVBUF);
2309	return err;
2310}
2311
2312
2313static int
2314in_range(int val, int lo, int hi)
2315{
2316	return val < 0 || (val <= hi && val >= lo);
2317}
2318
2319static int
2320cxgb_extension_open(struct cdev *dev, int flags, int fmp, d_thread_t *td)
2321{
2322       return (0);
2323}
2324
2325static int
2326cxgb_extension_close(struct cdev *dev, int flags, int fmt, d_thread_t *td)
2327{
2328       return (0);
2329}
2330
2331static int
2332cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2333    int fflag, struct thread *td)
2334{
2335	int mmd, error = 0;
2336	struct port_info *pi = dev->si_drv1;
2337	adapter_t *sc = pi->adapter;
2338
2339#ifdef PRIV_SUPPORTED
2340	if (priv_check(td, PRIV_DRIVER)) {
2341		if (cxgb_debug)
2342			printf("user does not have access to privileged ioctls\n");
2343		return (EPERM);
2344	}
2345#else
2346	if (suser(td)) {
2347		if (cxgb_debug)
2348			printf("user does not have access to privileged ioctls\n");
2349		return (EPERM);
2350	}
2351#endif
2352
2353	switch (cmd) {
2354	case CHELSIO_GET_MIIREG: {
2355		uint32_t val;
2356		struct cphy *phy = &pi->phy;
2357		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2358
2359		if (!phy->mdio_read)
2360			return (EOPNOTSUPP);
2361		if (is_10G(sc)) {
2362			mmd = mid->phy_id >> 8;
2363			if (!mmd)
2364				mmd = MDIO_DEV_PCS;
2365			else if (mmd > MDIO_DEV_XGXS)
2366				return (EINVAL);
2367
2368			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2369					     mid->reg_num, &val);
2370		} else
2371		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2372					     mid->reg_num & 0x1f, &val);
2373		if (error == 0)
2374			mid->val_out = val;
2375		break;
2376	}
2377	case CHELSIO_SET_MIIREG: {
2378		struct cphy *phy = &pi->phy;
2379		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2380
2381		if (!phy->mdio_write)
2382			return (EOPNOTSUPP);
2383		if (is_10G(sc)) {
2384			mmd = mid->phy_id >> 8;
2385			if (!mmd)
2386				mmd = MDIO_DEV_PCS;
2387			else if (mmd > MDIO_DEV_XGXS)
2388				return (EINVAL);
2389
2390			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2391					      mmd, mid->reg_num, mid->val_in);
2392		} else
2393			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2394					      mid->reg_num & 0x1f,
2395					      mid->val_in);
2396		break;
2397	}
2398	case CHELSIO_SETREG: {
2399		struct ch_reg *edata = (struct ch_reg *)data;
2400		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2401			return (EFAULT);
2402		t3_write_reg(sc, edata->addr, edata->val);
2403		break;
2404	}
2405	case CHELSIO_GETREG: {
2406		struct ch_reg *edata = (struct ch_reg *)data;
2407		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2408			return (EFAULT);
2409		edata->val = t3_read_reg(sc, edata->addr);
2410		break;
2411	}
2412	case CHELSIO_GET_SGE_CONTEXT: {
2413		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2414		mtx_lock_spin(&sc->sge.reg_lock);
2415		switch (ecntxt->cntxt_type) {
2416		case CNTXT_TYPE_EGRESS:
2417			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2418			    ecntxt->data);
2419			break;
2420		case CNTXT_TYPE_FL:
2421			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2422			    ecntxt->data);
2423			break;
2424		case CNTXT_TYPE_RSP:
2425			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2426			    ecntxt->data);
2427			break;
2428		case CNTXT_TYPE_CQ:
2429			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2430			    ecntxt->data);
2431			break;
2432		default:
2433			error = EINVAL;
2434			break;
2435		}
2436		mtx_unlock_spin(&sc->sge.reg_lock);
2437		break;
2438	}
2439	case CHELSIO_GET_SGE_DESC: {
2440		struct ch_desc *edesc = (struct ch_desc *)data;
2441		int ret;
2442		if (edesc->queue_num >= SGE_QSETS * 6)
2443			return (EINVAL);
2444		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2445		    edesc->queue_num % 6, edesc->idx, edesc->data);
2446		if (ret < 0)
2447			return (EINVAL);
2448		edesc->size = ret;
2449		break;
2450	}
2451	case CHELSIO_GET_QSET_PARAMS: {
2452		struct qset_params *q;
2453		struct ch_qset_params *t = (struct ch_qset_params *)data;
2454		int q1 = pi->first_qset;
2455		int nqsets = pi->nqsets;
2456		int i;
2457
2458		if (t->qset_idx >= nqsets)
2459			return EINVAL;
2460
2461		i = q1 + t->qset_idx;
2462		q = &sc->params.sge.qset[i];
2463		t->rspq_size   = q->rspq_size;
2464		t->txq_size[0] = q->txq_size[0];
2465		t->txq_size[1] = q->txq_size[1];
2466		t->txq_size[2] = q->txq_size[2];
2467		t->fl_size[0]  = q->fl_size;
2468		t->fl_size[1]  = q->jumbo_size;
2469		t->polling     = q->polling;
2470		t->lro         = q->lro;
2471		t->intr_lat    = q->coalesce_usecs;
2472		t->cong_thres  = q->cong_thres;
2473		t->qnum        = i;
2474
2475		if (sc->flags & USING_MSIX)
2476			t->vector = rman_get_start(sc->msix_irq_res[i]);
2477		else
2478			t->vector = rman_get_start(sc->irq_res);
2479
2480		break;
2481	}
2482	case CHELSIO_GET_QSET_NUM: {
2483		struct ch_reg *edata = (struct ch_reg *)data;
2484		edata->val = pi->nqsets;
2485		break;
2486	}
2487	case CHELSIO_LOAD_FW: {
2488		uint8_t *fw_data;
2489		uint32_t vers;
2490		struct ch_mem_range *t = (struct ch_mem_range *)data;
2491
2492		/*
2493		 * You're allowed to load a firmware only before FULL_INIT_DONE
2494		 *
2495		 * FW_UPTODATE is also set so the rest of the initialization
2496		 * will not overwrite what was loaded here.  This gives you the
2497		 * flexibility to load any firmware (and maybe shoot yourself in
2498		 * the foot).
2499		 */
2500
2501		ADAPTER_LOCK(sc);
2502		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2503			ADAPTER_UNLOCK(sc);
2504			return (EBUSY);
2505		}
2506
2507		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2508		if (!fw_data)
2509			error = ENOMEM;
2510		else
2511			error = copyin(t->buf, fw_data, t->len);
2512
2513		if (!error)
2514			error = -t3_load_fw(sc, fw_data, t->len);
2515
2516		if (t3_get_fw_version(sc, &vers) == 0) {
2517			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2518			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2519			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2520		}
2521
2522		if (!error)
2523			sc->flags |= FW_UPTODATE;
2524
2525		free(fw_data, M_DEVBUF);
2526		ADAPTER_UNLOCK(sc);
2527		break;
2528	}
2529	case CHELSIO_LOAD_BOOT: {
2530		uint8_t *boot_data;
2531		struct ch_mem_range *t = (struct ch_mem_range *)data;
2532
2533		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2534		if (!boot_data)
2535			return ENOMEM;
2536
2537		error = copyin(t->buf, boot_data, t->len);
2538		if (!error)
2539			error = -t3_load_boot(sc, boot_data, t->len);
2540
2541		free(boot_data, M_DEVBUF);
2542		break;
2543	}
2544	case CHELSIO_GET_PM: {
2545		struct ch_pm *m = (struct ch_pm *)data;
2546		struct tp_params *p = &sc->params.tp;
2547
2548		if (!is_offload(sc))
2549			return (EOPNOTSUPP);
2550
2551		m->tx_pg_sz = p->tx_pg_size;
2552		m->tx_num_pg = p->tx_num_pgs;
2553		m->rx_pg_sz  = p->rx_pg_size;
2554		m->rx_num_pg = p->rx_num_pgs;
2555		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2556
2557		break;
2558	}
2559	case CHELSIO_SET_PM: {
2560		struct ch_pm *m = (struct ch_pm *)data;
2561		struct tp_params *p = &sc->params.tp;
2562
2563		if (!is_offload(sc))
2564			return (EOPNOTSUPP);
2565		if (sc->flags & FULL_INIT_DONE)
2566			return (EBUSY);
2567
2568		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2569		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2570			return (EINVAL);	/* not power of 2 */
2571		if (!(m->rx_pg_sz & 0x14000))
2572			return (EINVAL);	/* not 16KB or 64KB */
2573		if (!(m->tx_pg_sz & 0x1554000))
2574			return (EINVAL);
2575		if (m->tx_num_pg == -1)
2576			m->tx_num_pg = p->tx_num_pgs;
2577		if (m->rx_num_pg == -1)
2578			m->rx_num_pg = p->rx_num_pgs;
2579		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2580			return (EINVAL);
2581		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2582		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2583			return (EINVAL);
2584
2585		p->rx_pg_size = m->rx_pg_sz;
2586		p->tx_pg_size = m->tx_pg_sz;
2587		p->rx_num_pgs = m->rx_num_pg;
2588		p->tx_num_pgs = m->tx_num_pg;
2589		break;
2590	}
2591	case CHELSIO_SETMTUTAB: {
2592		struct ch_mtus *m = (struct ch_mtus *)data;
2593		int i;
2594
2595		if (!is_offload(sc))
2596			return (EOPNOTSUPP);
2597		if (offload_running(sc))
2598			return (EBUSY);
2599		if (m->nmtus != NMTUS)
2600			return (EINVAL);
2601		if (m->mtus[0] < 81)         /* accommodate SACK */
2602			return (EINVAL);
2603
2604		/*
2605		 * MTUs must be in ascending order
2606		 */
2607		for (i = 1; i < NMTUS; ++i)
2608			if (m->mtus[i] < m->mtus[i - 1])
2609				return (EINVAL);
2610
2611		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2612		break;
2613	}
2614	case CHELSIO_GETMTUTAB: {
2615		struct ch_mtus *m = (struct ch_mtus *)data;
2616
2617		if (!is_offload(sc))
2618			return (EOPNOTSUPP);
2619
2620		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2621		m->nmtus = NMTUS;
2622		break;
2623	}
2624	case CHELSIO_GET_MEM: {
2625		struct ch_mem_range *t = (struct ch_mem_range *)data;
2626		struct mc7 *mem;
2627		uint8_t *useraddr;
2628		u64 buf[32];
2629
2630		/*
2631		 * Use these to avoid modifying len/addr in the the return
2632		 * struct
2633		 */
2634		uint32_t len = t->len, addr = t->addr;
2635
2636		if (!is_offload(sc))
2637			return (EOPNOTSUPP);
2638		if (!(sc->flags & FULL_INIT_DONE))
2639			return (EIO);         /* need the memory controllers */
2640		if ((addr & 0x7) || (len & 0x7))
2641			return (EINVAL);
2642		if (t->mem_id == MEM_CM)
2643			mem = &sc->cm;
2644		else if (t->mem_id == MEM_PMRX)
2645			mem = &sc->pmrx;
2646		else if (t->mem_id == MEM_PMTX)
2647			mem = &sc->pmtx;
2648		else
2649			return (EINVAL);
2650
2651		/*
2652		 * Version scheme:
2653		 * bits 0..9: chip version
2654		 * bits 10..15: chip revision
2655		 */
2656		t->version = 3 | (sc->params.rev << 10);
2657
2658		/*
2659		 * Read 256 bytes at a time as len can be large and we don't
2660		 * want to use huge intermediate buffers.
2661		 */
2662		useraddr = (uint8_t *)t->buf;
2663		while (len) {
2664			unsigned int chunk = min(len, sizeof(buf));
2665
2666			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2667			if (error)
2668				return (-error);
2669			if (copyout(buf, useraddr, chunk))
2670				return (EFAULT);
2671			useraddr += chunk;
2672			addr += chunk;
2673			len -= chunk;
2674		}
2675		break;
2676	}
2677	case CHELSIO_READ_TCAM_WORD: {
2678		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2679
2680		if (!is_offload(sc))
2681			return (EOPNOTSUPP);
2682		if (!(sc->flags & FULL_INIT_DONE))
2683			return (EIO);         /* need MC5 */
2684		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2685		break;
2686	}
2687	case CHELSIO_SET_TRACE_FILTER: {
2688		struct ch_trace *t = (struct ch_trace *)data;
2689		const struct trace_params *tp;
2690
2691		tp = (const struct trace_params *)&t->sip;
2692		if (t->config_tx)
2693			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2694					       t->trace_tx);
2695		if (t->config_rx)
2696			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2697					       t->trace_rx);
2698		break;
2699	}
2700	case CHELSIO_SET_PKTSCHED: {
2701		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2702		if (sc->open_device_map == 0)
2703			return (EAGAIN);
2704		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2705		    p->binding);
2706		break;
2707	}
2708	case CHELSIO_IFCONF_GETREGS: {
2709		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2710		int reglen = cxgb_get_regs_len();
2711		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2712		if (buf == NULL) {
2713			return (ENOMEM);
2714		}
2715		if (regs->len > reglen)
2716			regs->len = reglen;
2717		else if (regs->len < reglen)
2718			error = E2BIG;
2719
2720		if (!error) {
2721			cxgb_get_regs(sc, regs, buf);
2722			error = copyout(buf, regs->data, reglen);
2723		}
2724		free(buf, M_DEVBUF);
2725
2726		break;
2727	}
2728	case CHELSIO_SET_HW_SCHED: {
2729		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2730		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2731
2732		if ((sc->flags & FULL_INIT_DONE) == 0)
2733			return (EAGAIN);       /* need TP to be initialized */
2734		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2735		    !in_range(t->channel, 0, 1) ||
2736		    !in_range(t->kbps, 0, 10000000) ||
2737		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2738		    !in_range(t->flow_ipg, 0,
2739			      dack_ticks_to_usec(sc, 0x7ff)))
2740			return (EINVAL);
2741
2742		if (t->kbps >= 0) {
2743			error = t3_config_sched(sc, t->kbps, t->sched);
2744			if (error < 0)
2745				return (-error);
2746		}
2747		if (t->class_ipg >= 0)
2748			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2749		if (t->flow_ipg >= 0) {
2750			t->flow_ipg *= 1000;     /* us -> ns */
2751			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2752		}
2753		if (t->mode >= 0) {
2754			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2755
2756			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2757					 bit, t->mode ? bit : 0);
2758		}
2759		if (t->channel >= 0)
2760			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2761					 1 << t->sched, t->channel << t->sched);
2762		break;
2763	}
2764	case CHELSIO_GET_EEPROM: {
2765		int i;
2766		struct ch_eeprom *e = (struct ch_eeprom *)data;
2767		uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2768
2769		if (buf == NULL) {
2770			return (ENOMEM);
2771		}
2772		e->magic = EEPROM_MAGIC;
2773		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
2774			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
2775
2776		if (!error)
2777			error = copyout(buf + e->offset, e->data, e->len);
2778
2779		free(buf, M_DEVBUF);
2780		break;
2781	}
2782	case CHELSIO_CLEAR_STATS: {
2783		if (!(sc->flags & FULL_INIT_DONE))
2784			return EAGAIN;
2785
2786		PORT_LOCK(pi);
2787		t3_mac_update_stats(&pi->mac);
2788		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
2789		PORT_UNLOCK(pi);
2790		break;
2791	}
2792	default:
2793		return (EOPNOTSUPP);
2794		break;
2795	}
2796
2797	return (error);
2798}
2799
2800static __inline void
2801reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
2802    unsigned int end)
2803{
2804	uint32_t *p = (uint32_t *)(buf + start);
2805
2806	for ( ; start <= end; start += sizeof(uint32_t))
2807		*p++ = t3_read_reg(ap, start);
2808}
2809
2810#define T3_REGMAP_SIZE (3 * 1024)
2811static int
2812cxgb_get_regs_len(void)
2813{
2814	return T3_REGMAP_SIZE;
2815}
2816
2817static void
2818cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
2819{
2820
2821	/*
2822	 * Version scheme:
2823	 * bits 0..9: chip version
2824	 * bits 10..15: chip revision
2825	 * bit 31: set for PCIe cards
2826	 */
2827	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
2828
2829	/*
2830	 * We skip the MAC statistics registers because they are clear-on-read.
2831	 * Also reading multi-register stats would need to synchronize with the
2832	 * periodic mac stats accumulation.  Hard to justify the complexity.
2833	 */
2834	memset(buf, 0, cxgb_get_regs_len());
2835	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
2836	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
2837	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
2838	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
2839	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
2840	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
2841		       XGM_REG(A_XGM_SERDES_STAT3, 1));
2842	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
2843		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
2844}
2845
2846
2847MODULE_DEPEND(if_cxgb, cxgb_t3fw, 1, 1, 1);
2848