1/**************************************************************************
2
3Copyright (c) 2007-2009, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13    contributors may be used to endorse or promote products derived from
14    this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD$");
32
33#include "opt_inet.h"
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/bus.h>
39#include <sys/module.h>
40#include <sys/pciio.h>
41#include <sys/conf.h>
42#include <machine/bus.h>
43#include <machine/resource.h>
44#include <sys/bus_dma.h>
45#include <sys/ktr.h>
46#include <sys/rman.h>
47#include <sys/ioccom.h>
48#include <sys/mbuf.h>
49#include <sys/linker.h>
50#include <sys/firmware.h>
51#include <sys/socket.h>
52#include <sys/sockio.h>
53#include <sys/smp.h>
54#include <sys/sysctl.h>
55#include <sys/syslog.h>
56#include <sys/queue.h>
57#include <sys/taskqueue.h>
58#include <sys/proc.h>
59
60#include <net/bpf.h>
61#include <net/ethernet.h>
62#include <net/if.h>
63#include <net/if_arp.h>
64#include <net/if_dl.h>
65#include <net/if_media.h>
66#include <net/if_types.h>
67#include <net/if_vlan_var.h>
68
69#include <netinet/in_systm.h>
70#include <netinet/in.h>
71#include <netinet/if_ether.h>
72#include <netinet/ip.h>
73#include <netinet/ip.h>
74#include <netinet/tcp.h>
75#include <netinet/udp.h>
76
77#include <dev/pci/pcireg.h>
78#include <dev/pci/pcivar.h>
79#include <dev/pci/pci_private.h>
80
81#include <cxgb_include.h>
82
83#ifdef PRIV_SUPPORTED
84#include <sys/priv.h>
85#endif
86
87static int cxgb_setup_interrupts(adapter_t *);
88static void cxgb_teardown_interrupts(adapter_t *);
89static void cxgb_init(void *);
90static int cxgb_init_locked(struct port_info *);
91static int cxgb_uninit_locked(struct port_info *);
92static int cxgb_uninit_synchronized(struct port_info *);
93static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
94static int cxgb_media_change(struct ifnet *);
95static int cxgb_ifm_type(int);
96static void cxgb_build_medialist(struct port_info *);
97static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
98static int setup_sge_qsets(adapter_t *);
99static void cxgb_async_intr(void *);
100static void cxgb_tick_handler(void *, int);
101static void cxgb_tick(void *);
102static void link_check_callout(void *);
103static void check_link_status(void *, int);
104static void setup_rss(adapter_t *sc);
105static int alloc_filters(struct adapter *);
106static int setup_hw_filters(struct adapter *);
107static int set_filter(struct adapter *, int, const struct filter_info *);
108static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
109    unsigned int, u64, u64);
110static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
111    unsigned int, u64, u64);
112#ifdef TCP_OFFLOAD
113static int cpl_not_handled(struct sge_qset *, struct rsp_desc *, struct mbuf *);
114#endif
115
116/* Attachment glue for the PCI controller end of the device.  Each port of
117 * the device is attached separately, as defined later.
118 */
119static int cxgb_controller_probe(device_t);
120static int cxgb_controller_attach(device_t);
121static int cxgb_controller_detach(device_t);
122static void cxgb_free(struct adapter *);
123static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
124    unsigned int end);
125static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
126static int cxgb_get_regs_len(void);
127static void touch_bars(device_t dev);
128static void cxgb_update_mac_settings(struct port_info *p);
129#ifdef TCP_OFFLOAD
130static int toe_capability(struct port_info *, int);
131#endif
132
133static device_method_t cxgb_controller_methods[] = {
134	DEVMETHOD(device_probe,		cxgb_controller_probe),
135	DEVMETHOD(device_attach,	cxgb_controller_attach),
136	DEVMETHOD(device_detach,	cxgb_controller_detach),
137
138	DEVMETHOD_END
139};
140
141static driver_t cxgb_controller_driver = {
142	"cxgbc",
143	cxgb_controller_methods,
144	sizeof(struct adapter)
145};
146
147static int cxgbc_mod_event(module_t, int, void *);
148static devclass_t	cxgb_controller_devclass;
149DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass,
150    cxgbc_mod_event, 0);
151MODULE_VERSION(cxgbc, 1);
152MODULE_DEPEND(cxgbc, firmware, 1, 1, 1);
153
154/*
155 * Attachment glue for the ports.  Attachment is done directly to the
156 * controller device.
157 */
158static int cxgb_port_probe(device_t);
159static int cxgb_port_attach(device_t);
160static int cxgb_port_detach(device_t);
161
162static device_method_t cxgb_port_methods[] = {
163	DEVMETHOD(device_probe,		cxgb_port_probe),
164	DEVMETHOD(device_attach,	cxgb_port_attach),
165	DEVMETHOD(device_detach,	cxgb_port_detach),
166	{ 0, 0 }
167};
168
169static driver_t cxgb_port_driver = {
170	"cxgb",
171	cxgb_port_methods,
172	0
173};
174
175static d_ioctl_t cxgb_extension_ioctl;
176static d_open_t cxgb_extension_open;
177static d_close_t cxgb_extension_close;
178
179static struct cdevsw cxgb_cdevsw = {
180       .d_version =    D_VERSION,
181       .d_flags =      0,
182       .d_open =       cxgb_extension_open,
183       .d_close =      cxgb_extension_close,
184       .d_ioctl =      cxgb_extension_ioctl,
185       .d_name =       "cxgb",
186};
187
188static devclass_t	cxgb_port_devclass;
189DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
190MODULE_VERSION(cxgb, 1);
191
192static struct mtx t3_list_lock;
193static SLIST_HEAD(, adapter) t3_list;
194#ifdef TCP_OFFLOAD
195static struct mtx t3_uld_list_lock;
196static SLIST_HEAD(, uld_info) t3_uld_list;
197#endif
198
199/*
200 * The driver uses the best interrupt scheme available on a platform in the
201 * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
202 * of these schemes the driver may consider as follows:
203 *
204 * msi = 2: choose from among all three options
205 * msi = 1 : only consider MSI and pin interrupts
206 * msi = 0: force pin interrupts
207 */
208static int msi_allowed = 2;
209
210TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
211SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
212SYSCTL_INT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
213    "MSI-X, MSI, INTx selector");
214
215/*
216 * The driver uses an auto-queue algorithm by default.
217 * To disable it and force a single queue-set per port, use multiq = 0
218 */
219static int multiq = 1;
220TUNABLE_INT("hw.cxgb.multiq", &multiq);
221SYSCTL_INT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
222    "use min(ncpus/ports, 8) queue-sets per port");
223
224/*
225 * By default the driver will not update the firmware unless
226 * it was compiled against a newer version
227 *
228 */
229static int force_fw_update = 0;
230TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
231SYSCTL_INT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
232    "update firmware even if up to date");
233
234int cxgb_use_16k_clusters = -1;
235TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
236SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
237    &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
238
239static int nfilters = -1;
240TUNABLE_INT("hw.cxgb.nfilters", &nfilters);
241SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
242    &nfilters, 0, "max number of entries in the filter table");
243
244enum {
245	MAX_TXQ_ENTRIES      = 16384,
246	MAX_CTRL_TXQ_ENTRIES = 1024,
247	MAX_RSPQ_ENTRIES     = 16384,
248	MAX_RX_BUFFERS       = 16384,
249	MAX_RX_JUMBO_BUFFERS = 16384,
250	MIN_TXQ_ENTRIES      = 4,
251	MIN_CTRL_TXQ_ENTRIES = 4,
252	MIN_RSPQ_ENTRIES     = 32,
253	MIN_FL_ENTRIES       = 32,
254	MIN_FL_JUMBO_ENTRIES = 32
255};
256
257struct filter_info {
258	u32 sip;
259	u32 sip_mask;
260	u32 dip;
261	u16 sport;
262	u16 dport;
263	u32 vlan:12;
264	u32 vlan_prio:3;
265	u32 mac_hit:1;
266	u32 mac_idx:4;
267	u32 mac_vld:1;
268	u32 pkt_type:2;
269	u32 report_filter_id:1;
270	u32 pass:1;
271	u32 rss:1;
272	u32 qset:3;
273	u32 locked:1;
274	u32 valid:1;
275};
276
277enum { FILTER_NO_VLAN_PRI = 7 };
278
279#define EEPROM_MAGIC 0x38E2F10C
280
281#define PORT_MASK ((1 << MAX_NPORTS) - 1)
282
283/* Table for probing the cards.  The desc field isn't actually used */
284struct cxgb_ident {
285	uint16_t	vendor;
286	uint16_t	device;
287	int		index;
288	char		*desc;
289} cxgb_identifiers[] = {
290	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
291	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
292	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
293	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
294	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
295	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
296	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
297	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
298	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
299	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
300	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
301	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
302	{PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
303	{PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
304	{0, 0, 0, NULL}
305};
306
307static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
308
309
310static __inline char
311t3rev2char(struct adapter *adapter)
312{
313	char rev = 'z';
314
315	switch(adapter->params.rev) {
316	case T3_REV_A:
317		rev = 'a';
318		break;
319	case T3_REV_B:
320	case T3_REV_B2:
321		rev = 'b';
322		break;
323	case T3_REV_C:
324		rev = 'c';
325		break;
326	}
327	return rev;
328}
329
330static struct cxgb_ident *
331cxgb_get_ident(device_t dev)
332{
333	struct cxgb_ident *id;
334
335	for (id = cxgb_identifiers; id->desc != NULL; id++) {
336		if ((id->vendor == pci_get_vendor(dev)) &&
337		    (id->device == pci_get_device(dev))) {
338			return (id);
339		}
340	}
341	return (NULL);
342}
343
344static const struct adapter_info *
345cxgb_get_adapter_info(device_t dev)
346{
347	struct cxgb_ident *id;
348	const struct adapter_info *ai;
349
350	id = cxgb_get_ident(dev);
351	if (id == NULL)
352		return (NULL);
353
354	ai = t3_get_adapter_info(id->index);
355
356	return (ai);
357}
358
359static int
360cxgb_controller_probe(device_t dev)
361{
362	const struct adapter_info *ai;
363	char *ports, buf[80];
364	int nports;
365
366	ai = cxgb_get_adapter_info(dev);
367	if (ai == NULL)
368		return (ENXIO);
369
370	nports = ai->nports0 + ai->nports1;
371	if (nports == 1)
372		ports = "port";
373	else
374		ports = "ports";
375
376	snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
377	device_set_desc_copy(dev, buf);
378	return (BUS_PROBE_DEFAULT);
379}
380
381#define FW_FNAME "cxgb_t3fw"
382#define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
383#define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
384
385static int
386upgrade_fw(adapter_t *sc)
387{
388	const struct firmware *fw;
389	int status;
390	u32 vers;
391
392	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
393		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
394		return (ENOENT);
395	} else
396		device_printf(sc->dev, "installing firmware on card\n");
397	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
398
399	if (status != 0) {
400		device_printf(sc->dev, "failed to install firmware: %d\n",
401		    status);
402	} else {
403		t3_get_fw_version(sc, &vers);
404		snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
405		    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
406		    G_FW_VERSION_MICRO(vers));
407	}
408
409	firmware_put(fw, FIRMWARE_UNLOAD);
410
411	return (status);
412}
413
414/*
415 * The cxgb_controller_attach function is responsible for the initial
416 * bringup of the device.  Its responsibilities include:
417 *
418 *  1. Determine if the device supports MSI or MSI-X.
419 *  2. Allocate bus resources so that we can access the Base Address Register
420 *  3. Create and initialize mutexes for the controller and its control
421 *     logic such as SGE and MDIO.
422 *  4. Call hardware specific setup routine for the adapter as a whole.
423 *  5. Allocate the BAR for doing MSI-X.
424 *  6. Setup the line interrupt iff MSI-X is not supported.
425 *  7. Create the driver's taskq.
426 *  8. Start one task queue service thread.
427 *  9. Check if the firmware and SRAM are up-to-date.  They will be
428 *     auto-updated later (before FULL_INIT_DONE), if required.
429 * 10. Create a child device for each MAC (port)
430 * 11. Initialize T3 private state.
431 * 12. Trigger the LED
432 * 13. Setup offload iff supported.
433 * 14. Reset/restart the tick callout.
434 * 15. Attach sysctls
435 *
436 * NOTE: Any modification or deviation from this list MUST be reflected in
437 * the above comment.  Failure to do so will result in problems on various
438 * error conditions including link flapping.
439 */
440static int
441cxgb_controller_attach(device_t dev)
442{
443	device_t child;
444	const struct adapter_info *ai;
445	struct adapter *sc;
446	int i, error = 0;
447	uint32_t vers;
448	int port_qsets = 1;
449	int msi_needed, reg;
450	char buf[80];
451
452	sc = device_get_softc(dev);
453	sc->dev = dev;
454	sc->msi_count = 0;
455	ai = cxgb_get_adapter_info(dev);
456
457	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
458	    device_get_unit(dev));
459	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
460
461	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
462	    device_get_unit(dev));
463	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
464	    device_get_unit(dev));
465	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
466	    device_get_unit(dev));
467
468	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
469	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
470	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
471
472	mtx_lock(&t3_list_lock);
473	SLIST_INSERT_HEAD(&t3_list, sc, link);
474	mtx_unlock(&t3_list_lock);
475
476	/* find the PCIe link width and set max read request to 4KB*/
477	if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
478		uint16_t lnk;
479
480		lnk = pci_read_config(dev, reg + PCIER_LINK_STA, 2);
481		sc->link_width = (lnk & PCIEM_LINK_STA_WIDTH) >> 4;
482		if (sc->link_width < 8 &&
483		    (ai->caps & SUPPORTED_10000baseT_Full)) {
484			device_printf(sc->dev,
485			    "PCIe x%d Link, expect reduced performance\n",
486			    sc->link_width);
487		}
488
489		pci_set_max_read_req(dev, 4096);
490	}
491
492	touch_bars(dev);
493	pci_enable_busmaster(dev);
494	/*
495	 * Allocate the registers and make them available to the driver.
496	 * The registers that we care about for NIC mode are in BAR 0
497	 */
498	sc->regs_rid = PCIR_BAR(0);
499	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
500	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
501		device_printf(dev, "Cannot allocate BAR region 0\n");
502		error = ENXIO;
503		goto out;
504	}
505
506	sc->bt = rman_get_bustag(sc->regs_res);
507	sc->bh = rman_get_bushandle(sc->regs_res);
508	sc->mmio_len = rman_get_size(sc->regs_res);
509
510	for (i = 0; i < MAX_NPORTS; i++)
511		sc->port[i].adapter = sc;
512
513	if (t3_prep_adapter(sc, ai, 1) < 0) {
514		printf("prep adapter failed\n");
515		error = ENODEV;
516		goto out;
517	}
518
519	sc->udbs_rid = PCIR_BAR(2);
520	sc->udbs_res = NULL;
521	if (is_offload(sc) &&
522	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
523		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
524		device_printf(dev, "Cannot allocate BAR region 1\n");
525		error = ENXIO;
526		goto out;
527	}
528
529        /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
530	 * enough messages for the queue sets.  If that fails, try falling
531	 * back to MSI.  If that fails, then try falling back to the legacy
532	 * interrupt pin model.
533	 */
534	sc->msix_regs_rid = 0x20;
535	if ((msi_allowed >= 2) &&
536	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
537	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
538
539		if (multiq)
540			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
541		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
542
543		if (pci_msix_count(dev) == 0 ||
544		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
545		    sc->msi_count != msi_needed) {
546			device_printf(dev, "alloc msix failed - "
547				      "msi_count=%d, msi_needed=%d, err=%d; "
548				      "will try MSI\n", sc->msi_count,
549				      msi_needed, error);
550			sc->msi_count = 0;
551			port_qsets = 1;
552			pci_release_msi(dev);
553			bus_release_resource(dev, SYS_RES_MEMORY,
554			    sc->msix_regs_rid, sc->msix_regs_res);
555			sc->msix_regs_res = NULL;
556		} else {
557			sc->flags |= USING_MSIX;
558			sc->cxgb_intr = cxgb_async_intr;
559			device_printf(dev,
560				      "using MSI-X interrupts (%u vectors)\n",
561				      sc->msi_count);
562		}
563	}
564
565	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
566		sc->msi_count = 1;
567		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
568			device_printf(dev, "alloc msi failed - "
569				      "err=%d; will try INTx\n", error);
570			sc->msi_count = 0;
571			port_qsets = 1;
572			pci_release_msi(dev);
573		} else {
574			sc->flags |= USING_MSI;
575			sc->cxgb_intr = t3_intr_msi;
576			device_printf(dev, "using MSI interrupts\n");
577		}
578	}
579	if (sc->msi_count == 0) {
580		device_printf(dev, "using line interrupts\n");
581		sc->cxgb_intr = t3b_intr;
582	}
583
584	/* Create a private taskqueue thread for handling driver events */
585	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
586	    taskqueue_thread_enqueue, &sc->tq);
587	if (sc->tq == NULL) {
588		device_printf(dev, "failed to allocate controller task queue\n");
589		goto out;
590	}
591
592	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
593	    device_get_nameunit(dev));
594	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
595
596
597	/* Create a periodic callout for checking adapter status */
598	callout_init(&sc->cxgb_tick_ch, TRUE);
599
600	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
601		/*
602		 * Warn user that a firmware update will be attempted in init.
603		 */
604		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
605		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
606		sc->flags &= ~FW_UPTODATE;
607	} else {
608		sc->flags |= FW_UPTODATE;
609	}
610
611	if (t3_check_tpsram_version(sc) < 0) {
612		/*
613		 * Warn user that a firmware update will be attempted in init.
614		 */
615		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
616		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
617		sc->flags &= ~TPS_UPTODATE;
618	} else {
619		sc->flags |= TPS_UPTODATE;
620	}
621
622	/*
623	 * Create a child device for each MAC.  The ethernet attachment
624	 * will be done in these children.
625	 */
626	for (i = 0; i < (sc)->params.nports; i++) {
627		struct port_info *pi;
628
629		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
630			device_printf(dev, "failed to add child port\n");
631			error = EINVAL;
632			goto out;
633		}
634		pi = &sc->port[i];
635		pi->adapter = sc;
636		pi->nqsets = port_qsets;
637		pi->first_qset = i*port_qsets;
638		pi->port_id = i;
639		pi->tx_chan = i >= ai->nports0;
640		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
641		sc->rxpkt_map[pi->txpkt_intf] = i;
642		sc->port[i].tx_chan = i >= ai->nports0;
643		sc->portdev[i] = child;
644		device_set_softc(child, pi);
645	}
646	if ((error = bus_generic_attach(dev)) != 0)
647		goto out;
648
649	/* initialize sge private state */
650	t3_sge_init_adapter(sc);
651
652	t3_led_ready(sc);
653
654	error = t3_get_fw_version(sc, &vers);
655	if (error)
656		goto out;
657
658	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
659	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
660	    G_FW_VERSION_MICRO(vers));
661
662	snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
663		 ai->desc, is_offload(sc) ? "R" : "",
664		 sc->params.vpd.ec, sc->params.vpd.sn);
665	device_set_desc_copy(dev, buf);
666
667	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
668		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
669		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
670
671	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
672	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
673	t3_add_attach_sysctls(sc);
674
675#ifdef TCP_OFFLOAD
676	for (i = 0; i < NUM_CPL_HANDLERS; i++)
677		sc->cpl_handler[i] = cpl_not_handled;
678#endif
679
680	t3_intr_clear(sc);
681	error = cxgb_setup_interrupts(sc);
682out:
683	if (error)
684		cxgb_free(sc);
685
686	return (error);
687}
688
689/*
690 * The cxgb_controller_detach routine is called with the device is
691 * unloaded from the system.
692 */
693
694static int
695cxgb_controller_detach(device_t dev)
696{
697	struct adapter *sc;
698
699	sc = device_get_softc(dev);
700
701	cxgb_free(sc);
702
703	return (0);
704}
705
706/*
707 * The cxgb_free() is called by the cxgb_controller_detach() routine
708 * to tear down the structures that were built up in
709 * cxgb_controller_attach(), and should be the final piece of work
710 * done when fully unloading the driver.
711 *
712 *
713 *  1. Shutting down the threads started by the cxgb_controller_attach()
714 *     routine.
715 *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
716 *  3. Detaching all of the port devices created during the
717 *     cxgb_controller_attach() routine.
718 *  4. Removing the device children created via cxgb_controller_attach().
719 *  5. Releasing PCI resources associated with the device.
720 *  6. Turning off the offload support, iff it was turned on.
721 *  7. Destroying the mutexes created in cxgb_controller_attach().
722 *
723 */
724static void
725cxgb_free(struct adapter *sc)
726{
727	int i, nqsets = 0;
728
729	ADAPTER_LOCK(sc);
730	sc->flags |= CXGB_SHUTDOWN;
731	ADAPTER_UNLOCK(sc);
732
733	/*
734	 * Make sure all child devices are gone.
735	 */
736	bus_generic_detach(sc->dev);
737	for (i = 0; i < (sc)->params.nports; i++) {
738		if (sc->portdev[i] &&
739		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
740			device_printf(sc->dev, "failed to delete child port\n");
741		nqsets += sc->port[i].nqsets;
742	}
743
744	/*
745	 * At this point, it is as if cxgb_port_detach has run on all ports, and
746	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
747	 * all open devices have been closed.
748	 */
749	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
750					   __func__, sc->open_device_map));
751	for (i = 0; i < sc->params.nports; i++) {
752		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
753						  __func__, i));
754	}
755
756	/*
757	 * Finish off the adapter's callouts.
758	 */
759	callout_drain(&sc->cxgb_tick_ch);
760	callout_drain(&sc->sge_timer_ch);
761
762	/*
763	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
764	 * sysctls are cleaned up by the kernel linker.
765	 */
766	if (sc->flags & FULL_INIT_DONE) {
767 		t3_free_sge_resources(sc, nqsets);
768 		sc->flags &= ~FULL_INIT_DONE;
769 	}
770
771	/*
772	 * Release all interrupt resources.
773	 */
774	cxgb_teardown_interrupts(sc);
775	if (sc->flags & (USING_MSI | USING_MSIX)) {
776		device_printf(sc->dev, "releasing msi message(s)\n");
777		pci_release_msi(sc->dev);
778	} else {
779		device_printf(sc->dev, "no msi message to release\n");
780	}
781
782	if (sc->msix_regs_res != NULL) {
783		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
784		    sc->msix_regs_res);
785	}
786
787	/*
788	 * Free the adapter's taskqueue.
789	 */
790	if (sc->tq != NULL) {
791		taskqueue_free(sc->tq);
792		sc->tq = NULL;
793	}
794
795	free(sc->filters, M_DEVBUF);
796	t3_sge_free(sc);
797
798	if (sc->udbs_res != NULL)
799		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
800		    sc->udbs_res);
801
802	if (sc->regs_res != NULL)
803		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
804		    sc->regs_res);
805
806	MTX_DESTROY(&sc->mdio_lock);
807	MTX_DESTROY(&sc->sge.reg_lock);
808	MTX_DESTROY(&sc->elmer_lock);
809	mtx_lock(&t3_list_lock);
810	SLIST_REMOVE(&t3_list, sc, adapter, link);
811	mtx_unlock(&t3_list_lock);
812	ADAPTER_LOCK_DEINIT(sc);
813}
814
815/**
816 *	setup_sge_qsets - configure SGE Tx/Rx/response queues
817 *	@sc: the controller softc
818 *
819 *	Determines how many sets of SGE queues to use and initializes them.
820 *	We support multiple queue sets per port if we have MSI-X, otherwise
821 *	just one queue set per port.
822 */
823static int
824setup_sge_qsets(adapter_t *sc)
825{
826	int i, j, err, irq_idx = 0, qset_idx = 0;
827	u_int ntxq = SGE_TXQ_PER_SET;
828
829	if ((err = t3_sge_alloc(sc)) != 0) {
830		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
831		return (err);
832	}
833
834	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
835		irq_idx = -1;
836
837	for (i = 0; i < (sc)->params.nports; i++) {
838		struct port_info *pi = &sc->port[i];
839
840		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
841			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
842			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
843			    &sc->params.sge.qset[qset_idx], ntxq, pi);
844			if (err) {
845				t3_free_sge_resources(sc, qset_idx);
846				device_printf(sc->dev,
847				    "t3_sge_alloc_qset failed with %d\n", err);
848				return (err);
849			}
850		}
851	}
852
853	return (0);
854}
855
856static void
857cxgb_teardown_interrupts(adapter_t *sc)
858{
859	int i;
860
861	for (i = 0; i < SGE_QSETS; i++) {
862		if (sc->msix_intr_tag[i] == NULL) {
863
864			/* Should have been setup fully or not at all */
865			KASSERT(sc->msix_irq_res[i] == NULL &&
866				sc->msix_irq_rid[i] == 0,
867				("%s: half-done interrupt (%d).", __func__, i));
868
869			continue;
870		}
871
872		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
873				  sc->msix_intr_tag[i]);
874		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
875				     sc->msix_irq_res[i]);
876
877		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
878		sc->msix_irq_rid[i] = 0;
879	}
880
881	if (sc->intr_tag) {
882		KASSERT(sc->irq_res != NULL,
883			("%s: half-done interrupt.", __func__));
884
885		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
886		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
887				     sc->irq_res);
888
889		sc->irq_res = sc->intr_tag = NULL;
890		sc->irq_rid = 0;
891	}
892}
893
894static int
895cxgb_setup_interrupts(adapter_t *sc)
896{
897	struct resource *res;
898	void *tag;
899	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
900
901	sc->irq_rid = intr_flag ? 1 : 0;
902	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
903					     RF_SHAREABLE | RF_ACTIVE);
904	if (sc->irq_res == NULL) {
905		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
906			      intr_flag, sc->irq_rid);
907		err = EINVAL;
908		sc->irq_rid = 0;
909	} else {
910		err = bus_setup_intr(sc->dev, sc->irq_res,
911		    INTR_MPSAFE | INTR_TYPE_NET, NULL,
912		    sc->cxgb_intr, sc, &sc->intr_tag);
913
914		if (err) {
915			device_printf(sc->dev,
916				      "Cannot set up interrupt (%x, %u, %d)\n",
917				      intr_flag, sc->irq_rid, err);
918			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
919					     sc->irq_res);
920			sc->irq_res = sc->intr_tag = NULL;
921			sc->irq_rid = 0;
922		}
923	}
924
925	/* That's all for INTx or MSI */
926	if (!(intr_flag & USING_MSIX) || err)
927		return (err);
928
929	bus_describe_intr(sc->dev, sc->irq_res, sc->intr_tag, "err");
930	for (i = 0; i < sc->msi_count - 1; i++) {
931		rid = i + 2;
932		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
933					     RF_SHAREABLE | RF_ACTIVE);
934		if (res == NULL) {
935			device_printf(sc->dev, "Cannot allocate interrupt "
936				      "for message %d\n", rid);
937			err = EINVAL;
938			break;
939		}
940
941		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
942				     NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
943		if (err) {
944			device_printf(sc->dev, "Cannot set up interrupt "
945				      "for message %d (%d)\n", rid, err);
946			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
947			break;
948		}
949
950		sc->msix_irq_rid[i] = rid;
951		sc->msix_irq_res[i] = res;
952		sc->msix_intr_tag[i] = tag;
953		bus_describe_intr(sc->dev, res, tag, "qs%d", i);
954	}
955
956	if (err)
957		cxgb_teardown_interrupts(sc);
958
959	return (err);
960}
961
962
963static int
964cxgb_port_probe(device_t dev)
965{
966	struct port_info *p;
967	char buf[80];
968	const char *desc;
969
970	p = device_get_softc(dev);
971	desc = p->phy.desc;
972	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
973	device_set_desc_copy(dev, buf);
974	return (0);
975}
976
977
978static int
979cxgb_makedev(struct port_info *pi)
980{
981
982	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
983	    UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
984
985	if (pi->port_cdev == NULL)
986		return (ENOMEM);
987
988	pi->port_cdev->si_drv1 = (void *)pi;
989
990	return (0);
991}
992
993#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
994    IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
995    IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6)
996#define CXGB_CAP_ENABLE CXGB_CAP
997
998static int
999cxgb_port_attach(device_t dev)
1000{
1001	struct port_info *p;
1002	struct ifnet *ifp;
1003	int err;
1004	struct adapter *sc;
1005
1006	p = device_get_softc(dev);
1007	sc = p->adapter;
1008	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1009	    device_get_unit(device_get_parent(dev)), p->port_id);
1010	PORT_LOCK_INIT(p, p->lockbuf);
1011
1012	callout_init(&p->link_check_ch, CALLOUT_MPSAFE);
1013	TASK_INIT(&p->link_check_task, 0, check_link_status, p);
1014
1015	/* Allocate an ifnet object and set it up */
1016	ifp = p->ifp = if_alloc(IFT_ETHER);
1017	if (ifp == NULL) {
1018		device_printf(dev, "Cannot allocate ifnet\n");
1019		return (ENOMEM);
1020	}
1021
1022	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1023	ifp->if_init = cxgb_init;
1024	ifp->if_softc = p;
1025	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1026	ifp->if_ioctl = cxgb_ioctl;
1027	ifp->if_transmit = cxgb_transmit;
1028	ifp->if_qflush = cxgb_qflush;
1029
1030	ifp->if_capabilities = CXGB_CAP;
1031#ifdef TCP_OFFLOAD
1032	if (is_offload(sc))
1033		ifp->if_capabilities |= IFCAP_TOE4;
1034#endif
1035	ifp->if_capenable = CXGB_CAP_ENABLE;
1036	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
1037	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
1038
1039	/*
1040	 * Disable TSO on 4-port - it isn't supported by the firmware.
1041	 */
1042	if (sc->params.nports > 2) {
1043		ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1044		ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1045		ifp->if_hwassist &= ~CSUM_TSO;
1046	}
1047
1048	ether_ifattach(ifp, p->hw_addr);
1049
1050#ifdef DEFAULT_JUMBO
1051	if (sc->params.nports <= 2)
1052		ifp->if_mtu = ETHERMTU_JUMBO;
1053#endif
1054	if ((err = cxgb_makedev(p)) != 0) {
1055		printf("makedev failed %d\n", err);
1056		return (err);
1057	}
1058
1059	/* Create a list of media supported by this port */
1060	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1061	    cxgb_media_status);
1062	cxgb_build_medialist(p);
1063
1064	t3_sge_init_port(p);
1065
1066	return (err);
1067}
1068
1069/*
1070 * cxgb_port_detach() is called via the device_detach methods when
1071 * cxgb_free() calls the bus_generic_detach.  It is responsible for
1072 * removing the device from the view of the kernel, i.e. from all
1073 * interfaces lists etc.  This routine is only called when the driver is
1074 * being unloaded, not when the link goes down.
1075 */
1076static int
1077cxgb_port_detach(device_t dev)
1078{
1079	struct port_info *p;
1080	struct adapter *sc;
1081	int i;
1082
1083	p = device_get_softc(dev);
1084	sc = p->adapter;
1085
1086	/* Tell cxgb_ioctl and if_init that the port is going away */
1087	ADAPTER_LOCK(sc);
1088	SET_DOOMED(p);
1089	wakeup(&sc->flags);
1090	while (IS_BUSY(sc))
1091		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1092	SET_BUSY(sc);
1093	ADAPTER_UNLOCK(sc);
1094
1095	if (p->port_cdev != NULL)
1096		destroy_dev(p->port_cdev);
1097
1098	cxgb_uninit_synchronized(p);
1099	ether_ifdetach(p->ifp);
1100
1101	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1102		struct sge_qset *qs = &sc->sge.qs[i];
1103		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1104
1105		callout_drain(&txq->txq_watchdog);
1106		callout_drain(&txq->txq_timer);
1107	}
1108
1109	PORT_LOCK_DEINIT(p);
1110	if_free(p->ifp);
1111	p->ifp = NULL;
1112
1113	ADAPTER_LOCK(sc);
1114	CLR_BUSY(sc);
1115	wakeup_one(&sc->flags);
1116	ADAPTER_UNLOCK(sc);
1117	return (0);
1118}
1119
1120void
1121t3_fatal_err(struct adapter *sc)
1122{
1123	u_int fw_status[4];
1124
1125	if (sc->flags & FULL_INIT_DONE) {
1126		t3_sge_stop(sc);
1127		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1128		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1129		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1130		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1131		t3_intr_disable(sc);
1132	}
1133	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1134	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1135		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1136		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1137}
1138
1139int
1140t3_os_find_pci_capability(adapter_t *sc, int cap)
1141{
1142	device_t dev;
1143	struct pci_devinfo *dinfo;
1144	pcicfgregs *cfg;
1145	uint32_t status;
1146	uint8_t ptr;
1147
1148	dev = sc->dev;
1149	dinfo = device_get_ivars(dev);
1150	cfg = &dinfo->cfg;
1151
1152	status = pci_read_config(dev, PCIR_STATUS, 2);
1153	if (!(status & PCIM_STATUS_CAPPRESENT))
1154		return (0);
1155
1156	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1157	case 0:
1158	case 1:
1159		ptr = PCIR_CAP_PTR;
1160		break;
1161	case 2:
1162		ptr = PCIR_CAP_PTR_2;
1163		break;
1164	default:
1165		return (0);
1166		break;
1167	}
1168	ptr = pci_read_config(dev, ptr, 1);
1169
1170	while (ptr != 0) {
1171		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1172			return (ptr);
1173		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1174	}
1175
1176	return (0);
1177}
1178
1179int
1180t3_os_pci_save_state(struct adapter *sc)
1181{
1182	device_t dev;
1183	struct pci_devinfo *dinfo;
1184
1185	dev = sc->dev;
1186	dinfo = device_get_ivars(dev);
1187
1188	pci_cfg_save(dev, dinfo, 0);
1189	return (0);
1190}
1191
1192int
1193t3_os_pci_restore_state(struct adapter *sc)
1194{
1195	device_t dev;
1196	struct pci_devinfo *dinfo;
1197
1198	dev = sc->dev;
1199	dinfo = device_get_ivars(dev);
1200
1201	pci_cfg_restore(dev, dinfo);
1202	return (0);
1203}
1204
1205/**
1206 *	t3_os_link_changed - handle link status changes
1207 *	@sc: the adapter associated with the link change
1208 *	@port_id: the port index whose link status has changed
1209 *	@link_status: the new status of the link
1210 *	@speed: the new speed setting
1211 *	@duplex: the new duplex setting
1212 *	@fc: the new flow-control setting
1213 *
1214 *	This is the OS-dependent handler for link status changes.  The OS
1215 *	neutral handler takes care of most of the processing for these events,
1216 *	then calls this handler for any OS-specific processing.
1217 */
1218void
1219t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1220     int duplex, int fc, int mac_was_reset)
1221{
1222	struct port_info *pi = &adapter->port[port_id];
1223	struct ifnet *ifp = pi->ifp;
1224
1225	/* no race with detach, so ifp should always be good */
1226	KASSERT(ifp, ("%s: if detached.", __func__));
1227
1228	/* Reapply mac settings if they were lost due to a reset */
1229	if (mac_was_reset) {
1230		PORT_LOCK(pi);
1231		cxgb_update_mac_settings(pi);
1232		PORT_UNLOCK(pi);
1233	}
1234
1235	if (link_status) {
1236		ifp->if_baudrate = IF_Mbps(speed);
1237		if_link_state_change(ifp, LINK_STATE_UP);
1238	} else
1239		if_link_state_change(ifp, LINK_STATE_DOWN);
1240}
1241
1242/**
1243 *	t3_os_phymod_changed - handle PHY module changes
1244 *	@phy: the PHY reporting the module change
1245 *	@mod_type: new module type
1246 *
1247 *	This is the OS-dependent handler for PHY module changes.  It is
1248 *	invoked when a PHY module is removed or inserted for any OS-specific
1249 *	processing.
1250 */
1251void t3_os_phymod_changed(struct adapter *adap, int port_id)
1252{
1253	static const char *mod_str[] = {
1254		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1255	};
1256	struct port_info *pi = &adap->port[port_id];
1257	int mod = pi->phy.modtype;
1258
1259	if (mod != pi->media.ifm_cur->ifm_data)
1260		cxgb_build_medialist(pi);
1261
1262	if (mod == phy_modtype_none)
1263		if_printf(pi->ifp, "PHY module unplugged\n");
1264	else {
1265		KASSERT(mod < ARRAY_SIZE(mod_str),
1266			("invalid PHY module type %d", mod));
1267		if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1268	}
1269}
1270
1271void
1272t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1273{
1274
1275	/*
1276	 * The ifnet might not be allocated before this gets called,
1277	 * as this is called early on in attach by t3_prep_adapter
1278	 * save the address off in the port structure
1279	 */
1280	if (cxgb_debug)
1281		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1282	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1283}
1284
1285/*
1286 * Programs the XGMAC based on the settings in the ifnet.  These settings
1287 * include MTU, MAC address, mcast addresses, etc.
1288 */
1289static void
1290cxgb_update_mac_settings(struct port_info *p)
1291{
1292	struct ifnet *ifp = p->ifp;
1293	struct t3_rx_mode rm;
1294	struct cmac *mac = &p->mac;
1295	int mtu, hwtagging;
1296
1297	PORT_LOCK_ASSERT_OWNED(p);
1298
1299	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1300
1301	mtu = ifp->if_mtu;
1302	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1303		mtu += ETHER_VLAN_ENCAP_LEN;
1304
1305	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1306
1307	t3_mac_set_mtu(mac, mtu);
1308	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1309	t3_mac_set_address(mac, 0, p->hw_addr);
1310	t3_init_rx_mode(&rm, p);
1311	t3_mac_set_rx_mode(mac, &rm);
1312}
1313
1314
1315static int
1316await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1317			      unsigned long n)
1318{
1319	int attempts = 5;
1320
1321	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1322		if (!--attempts)
1323			return (ETIMEDOUT);
1324		t3_os_sleep(10);
1325	}
1326	return 0;
1327}
1328
1329static int
1330init_tp_parity(struct adapter *adap)
1331{
1332	int i;
1333	struct mbuf *m;
1334	struct cpl_set_tcb_field *greq;
1335	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1336
1337	t3_tp_set_offload_mode(adap, 1);
1338
1339	for (i = 0; i < 16; i++) {
1340		struct cpl_smt_write_req *req;
1341
1342		m = m_gethdr(M_WAITOK, MT_DATA);
1343		req = mtod(m, struct cpl_smt_write_req *);
1344		m->m_len = m->m_pkthdr.len = sizeof(*req);
1345		memset(req, 0, sizeof(*req));
1346		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1347		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1348		req->iff = i;
1349		t3_mgmt_tx(adap, m);
1350	}
1351
1352	for (i = 0; i < 2048; i++) {
1353		struct cpl_l2t_write_req *req;
1354
1355		m = m_gethdr(M_WAITOK, MT_DATA);
1356		req = mtod(m, struct cpl_l2t_write_req *);
1357		m->m_len = m->m_pkthdr.len = sizeof(*req);
1358		memset(req, 0, sizeof(*req));
1359		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1360		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1361		req->params = htonl(V_L2T_W_IDX(i));
1362		t3_mgmt_tx(adap, m);
1363	}
1364
1365	for (i = 0; i < 2048; i++) {
1366		struct cpl_rte_write_req *req;
1367
1368		m = m_gethdr(M_WAITOK, MT_DATA);
1369		req = mtod(m, struct cpl_rte_write_req *);
1370		m->m_len = m->m_pkthdr.len = sizeof(*req);
1371		memset(req, 0, sizeof(*req));
1372		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1373		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1374		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1375		t3_mgmt_tx(adap, m);
1376	}
1377
1378	m = m_gethdr(M_WAITOK, MT_DATA);
1379	greq = mtod(m, struct cpl_set_tcb_field *);
1380	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1381	memset(greq, 0, sizeof(*greq));
1382	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1383	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1384	greq->mask = htobe64(1);
1385	t3_mgmt_tx(adap, m);
1386
1387	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1388	t3_tp_set_offload_mode(adap, 0);
1389	return (i);
1390}
1391
1392/**
1393 *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1394 *	@adap: the adapter
1395 *
1396 *	Sets up RSS to distribute packets to multiple receive queues.  We
1397 *	configure the RSS CPU lookup table to distribute to the number of HW
1398 *	receive queues, and the response queue lookup table to narrow that
1399 *	down to the response queues actually configured for each port.
1400 *	We always configure the RSS mapping for two ports since the mapping
1401 *	table has plenty of entries.
1402 */
1403static void
1404setup_rss(adapter_t *adap)
1405{
1406	int i;
1407	u_int nq[2];
1408	uint8_t cpus[SGE_QSETS + 1];
1409	uint16_t rspq_map[RSS_TABLE_SIZE];
1410
1411	for (i = 0; i < SGE_QSETS; ++i)
1412		cpus[i] = i;
1413	cpus[SGE_QSETS] = 0xff;
1414
1415	nq[0] = nq[1] = 0;
1416	for_each_port(adap, i) {
1417		const struct port_info *pi = adap2pinfo(adap, i);
1418
1419		nq[pi->tx_chan] += pi->nqsets;
1420	}
1421	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1422		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1423		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1424	}
1425
1426	/* Calculate the reverse RSS map table */
1427	for (i = 0; i < SGE_QSETS; ++i)
1428		adap->rrss_map[i] = 0xff;
1429	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1430		if (adap->rrss_map[rspq_map[i]] == 0xff)
1431			adap->rrss_map[rspq_map[i]] = i;
1432
1433	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1434		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1435	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1436	              cpus, rspq_map);
1437
1438}
1439static void
1440send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1441			      int hi, int port)
1442{
1443	struct mbuf *m;
1444	struct mngt_pktsched_wr *req;
1445
1446	m = m_gethdr(M_NOWAIT, MT_DATA);
1447	if (m) {
1448		req = mtod(m, struct mngt_pktsched_wr *);
1449		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1450		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1451		req->sched = sched;
1452		req->idx = qidx;
1453		req->min = lo;
1454		req->max = hi;
1455		req->binding = port;
1456		m->m_len = m->m_pkthdr.len = sizeof(*req);
1457		t3_mgmt_tx(adap, m);
1458	}
1459}
1460
1461static void
1462bind_qsets(adapter_t *sc)
1463{
1464	int i, j;
1465
1466	for (i = 0; i < (sc)->params.nports; ++i) {
1467		const struct port_info *pi = adap2pinfo(sc, i);
1468
1469		for (j = 0; j < pi->nqsets; ++j) {
1470			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1471					  -1, pi->tx_chan);
1472
1473		}
1474	}
1475}
1476
1477static void
1478update_tpeeprom(struct adapter *adap)
1479{
1480	const struct firmware *tpeeprom;
1481
1482	uint32_t version;
1483	unsigned int major, minor;
1484	int ret, len;
1485	char rev, name[32];
1486
1487	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1488
1489	major = G_TP_VERSION_MAJOR(version);
1490	minor = G_TP_VERSION_MINOR(version);
1491	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1492		return;
1493
1494	rev = t3rev2char(adap);
1495	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1496
1497	tpeeprom = firmware_get(name);
1498	if (tpeeprom == NULL) {
1499		device_printf(adap->dev,
1500			      "could not load TP EEPROM: unable to load %s\n",
1501			      name);
1502		return;
1503	}
1504
1505	len = tpeeprom->datasize - 4;
1506
1507	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1508	if (ret)
1509		goto release_tpeeprom;
1510
1511	if (len != TP_SRAM_LEN) {
1512		device_printf(adap->dev,
1513			      "%s length is wrong len=%d expected=%d\n", name,
1514			      len, TP_SRAM_LEN);
1515		return;
1516	}
1517
1518	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1519	    TP_SRAM_OFFSET);
1520
1521	if (!ret) {
1522		device_printf(adap->dev,
1523			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1524			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1525	} else
1526		device_printf(adap->dev,
1527			      "Protocol SRAM image update in EEPROM failed\n");
1528
1529release_tpeeprom:
1530	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1531
1532	return;
1533}
1534
1535static int
1536update_tpsram(struct adapter *adap)
1537{
1538	const struct firmware *tpsram;
1539	int ret;
1540	char rev, name[32];
1541
1542	rev = t3rev2char(adap);
1543	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1544
1545	update_tpeeprom(adap);
1546
1547	tpsram = firmware_get(name);
1548	if (tpsram == NULL){
1549		device_printf(adap->dev, "could not load TP SRAM\n");
1550		return (EINVAL);
1551	} else
1552		device_printf(adap->dev, "updating TP SRAM\n");
1553
1554	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1555	if (ret)
1556		goto release_tpsram;
1557
1558	ret = t3_set_proto_sram(adap, tpsram->data);
1559	if (ret)
1560		device_printf(adap->dev, "loading protocol SRAM failed\n");
1561
1562release_tpsram:
1563	firmware_put(tpsram, FIRMWARE_UNLOAD);
1564
1565	return ret;
1566}
1567
1568/**
1569 *	cxgb_up - enable the adapter
1570 *	@adap: adapter being enabled
1571 *
1572 *	Called when the first port is enabled, this function performs the
1573 *	actions necessary to make an adapter operational, such as completing
1574 *	the initialization of HW modules, and enabling interrupts.
1575 */
1576static int
1577cxgb_up(struct adapter *sc)
1578{
1579	int err = 0;
1580	unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1581
1582	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1583					   __func__, sc->open_device_map));
1584
1585	if ((sc->flags & FULL_INIT_DONE) == 0) {
1586
1587		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1588
1589		if ((sc->flags & FW_UPTODATE) == 0)
1590			if ((err = upgrade_fw(sc)))
1591				goto out;
1592
1593		if ((sc->flags & TPS_UPTODATE) == 0)
1594			if ((err = update_tpsram(sc)))
1595				goto out;
1596
1597		if (is_offload(sc) && nfilters != 0) {
1598			sc->params.mc5.nservers = 0;
1599
1600			if (nfilters < 0)
1601				sc->params.mc5.nfilters = mxf;
1602			else
1603				sc->params.mc5.nfilters = min(nfilters, mxf);
1604		}
1605
1606		err = t3_init_hw(sc, 0);
1607		if (err)
1608			goto out;
1609
1610		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1611		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1612
1613		err = setup_sge_qsets(sc);
1614		if (err)
1615			goto out;
1616
1617		alloc_filters(sc);
1618		setup_rss(sc);
1619
1620		t3_add_configured_sysctls(sc);
1621		sc->flags |= FULL_INIT_DONE;
1622	}
1623
1624	t3_intr_clear(sc);
1625	t3_sge_start(sc);
1626	t3_intr_enable(sc);
1627
1628	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1629	    is_offload(sc) && init_tp_parity(sc) == 0)
1630		sc->flags |= TP_PARITY_INIT;
1631
1632	if (sc->flags & TP_PARITY_INIT) {
1633		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1634		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1635	}
1636
1637	if (!(sc->flags & QUEUES_BOUND)) {
1638		bind_qsets(sc);
1639		setup_hw_filters(sc);
1640		sc->flags |= QUEUES_BOUND;
1641	}
1642
1643	t3_sge_reset_adapter(sc);
1644out:
1645	return (err);
1646}
1647
1648/*
1649 * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1650 * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1651 * during controller_detach, not here.
1652 */
1653static void
1654cxgb_down(struct adapter *sc)
1655{
1656	t3_sge_stop(sc);
1657	t3_intr_disable(sc);
1658}
1659
1660/*
1661 * if_init for cxgb ports.
1662 */
1663static void
1664cxgb_init(void *arg)
1665{
1666	struct port_info *p = arg;
1667	struct adapter *sc = p->adapter;
1668
1669	ADAPTER_LOCK(sc);
1670	cxgb_init_locked(p); /* releases adapter lock */
1671	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1672}
1673
1674static int
1675cxgb_init_locked(struct port_info *p)
1676{
1677	struct adapter *sc = p->adapter;
1678	struct ifnet *ifp = p->ifp;
1679	struct cmac *mac = &p->mac;
1680	int i, rc = 0, may_sleep = 0, gave_up_lock = 0;
1681
1682	ADAPTER_LOCK_ASSERT_OWNED(sc);
1683
1684	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1685		gave_up_lock = 1;
1686		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1687			rc = EINTR;
1688			goto done;
1689		}
1690	}
1691	if (IS_DOOMED(p)) {
1692		rc = ENXIO;
1693		goto done;
1694	}
1695	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1696
1697	/*
1698	 * The code that runs during one-time adapter initialization can sleep
1699	 * so it's important not to hold any locks across it.
1700	 */
1701	may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1702
1703	if (may_sleep) {
1704		SET_BUSY(sc);
1705		gave_up_lock = 1;
1706		ADAPTER_UNLOCK(sc);
1707	}
1708
1709	if (sc->open_device_map == 0 && ((rc = cxgb_up(sc)) != 0))
1710			goto done;
1711
1712	PORT_LOCK(p);
1713	if (isset(&sc->open_device_map, p->port_id) &&
1714	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1715		PORT_UNLOCK(p);
1716		goto done;
1717	}
1718	t3_port_intr_enable(sc, p->port_id);
1719	if (!mac->multiport)
1720		t3_mac_init(mac);
1721	cxgb_update_mac_settings(p);
1722	t3_link_start(&p->phy, mac, &p->link_config);
1723	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1724	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1725	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1726	PORT_UNLOCK(p);
1727
1728	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1729		struct sge_qset *qs = &sc->sge.qs[i];
1730		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1731
1732		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1733				 txq->txq_watchdog.c_cpu);
1734	}
1735
1736	/* all ok */
1737	setbit(&sc->open_device_map, p->port_id);
1738	callout_reset(&p->link_check_ch,
1739	    p->phy.caps & SUPPORTED_LINK_IRQ ?  hz * 3 : hz / 4,
1740	    link_check_callout, p);
1741
1742done:
1743	if (may_sleep) {
1744		ADAPTER_LOCK(sc);
1745		KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1746		CLR_BUSY(sc);
1747	}
1748	if (gave_up_lock)
1749		wakeup_one(&sc->flags);
1750	ADAPTER_UNLOCK(sc);
1751	return (rc);
1752}
1753
1754static int
1755cxgb_uninit_locked(struct port_info *p)
1756{
1757	struct adapter *sc = p->adapter;
1758	int rc;
1759
1760	ADAPTER_LOCK_ASSERT_OWNED(sc);
1761
1762	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1763		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1764			rc = EINTR;
1765			goto done;
1766		}
1767	}
1768	if (IS_DOOMED(p)) {
1769		rc = ENXIO;
1770		goto done;
1771	}
1772	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1773	SET_BUSY(sc);
1774	ADAPTER_UNLOCK(sc);
1775
1776	rc = cxgb_uninit_synchronized(p);
1777
1778	ADAPTER_LOCK(sc);
1779	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1780	CLR_BUSY(sc);
1781	wakeup_one(&sc->flags);
1782done:
1783	ADAPTER_UNLOCK(sc);
1784	return (rc);
1785}
1786
1787/*
1788 * Called on "ifconfig down", and from port_detach
1789 */
1790static int
1791cxgb_uninit_synchronized(struct port_info *pi)
1792{
1793	struct adapter *sc = pi->adapter;
1794	struct ifnet *ifp = pi->ifp;
1795
1796	/*
1797	 * taskqueue_drain may cause a deadlock if the adapter lock is held.
1798	 */
1799	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1800
1801	/*
1802	 * Clear this port's bit from the open device map, and then drain all
1803	 * the tasks that can access/manipulate this port's port_info or ifp.
1804	 * We disable this port's interrupts here and so the slow/ext
1805	 * interrupt tasks won't be enqueued.  The tick task will continue to
1806	 * be enqueued every second but the runs after this drain will not see
1807	 * this port in the open device map.
1808	 *
1809	 * A well behaved task must take open_device_map into account and ignore
1810	 * ports that are not open.
1811	 */
1812	clrbit(&sc->open_device_map, pi->port_id);
1813	t3_port_intr_disable(sc, pi->port_id);
1814	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1815	taskqueue_drain(sc->tq, &sc->tick_task);
1816
1817	callout_drain(&pi->link_check_ch);
1818	taskqueue_drain(sc->tq, &pi->link_check_task);
1819
1820	PORT_LOCK(pi);
1821	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1822
1823	/* disable pause frames */
1824	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1825
1826	/* Reset RX FIFO HWM */
1827	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1828			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1829
1830	DELAY(100 * 1000);
1831
1832	/* Wait for TXFIFO empty */
1833	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1834			F_TXFIFO_EMPTY, 1, 20, 5);
1835
1836	DELAY(100 * 1000);
1837	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1838
1839	pi->phy.ops->power_down(&pi->phy, 1);
1840
1841	PORT_UNLOCK(pi);
1842
1843	pi->link_config.link_ok = 0;
1844	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1845
1846	if (sc->open_device_map == 0)
1847		cxgb_down(pi->adapter);
1848
1849	return (0);
1850}
1851
1852/*
1853 * Mark lro enabled or disabled in all qsets for this port
1854 */
1855static int
1856cxgb_set_lro(struct port_info *p, int enabled)
1857{
1858	int i;
1859	struct adapter *adp = p->adapter;
1860	struct sge_qset *q;
1861
1862	for (i = 0; i < p->nqsets; i++) {
1863		q = &adp->sge.qs[p->first_qset + i];
1864		q->lro.enabled = (enabled != 0);
1865	}
1866	return (0);
1867}
1868
1869static int
1870cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1871{
1872	struct port_info *p = ifp->if_softc;
1873	struct adapter *sc = p->adapter;
1874	struct ifreq *ifr = (struct ifreq *)data;
1875	int flags, error = 0, mtu;
1876	uint32_t mask;
1877
1878	switch (command) {
1879	case SIOCSIFMTU:
1880		ADAPTER_LOCK(sc);
1881		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1882		if (error) {
1883fail:
1884			ADAPTER_UNLOCK(sc);
1885			return (error);
1886		}
1887
1888		mtu = ifr->ifr_mtu;
1889		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
1890			error = EINVAL;
1891		} else {
1892			ifp->if_mtu = mtu;
1893			PORT_LOCK(p);
1894			cxgb_update_mac_settings(p);
1895			PORT_UNLOCK(p);
1896		}
1897		ADAPTER_UNLOCK(sc);
1898		break;
1899	case SIOCSIFFLAGS:
1900		ADAPTER_LOCK(sc);
1901		if (IS_DOOMED(p)) {
1902			error = ENXIO;
1903			goto fail;
1904		}
1905		if (ifp->if_flags & IFF_UP) {
1906			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1907				flags = p->if_flags;
1908				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1909				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
1910					if (IS_BUSY(sc)) {
1911						error = EBUSY;
1912						goto fail;
1913					}
1914					PORT_LOCK(p);
1915					cxgb_update_mac_settings(p);
1916					PORT_UNLOCK(p);
1917				}
1918				ADAPTER_UNLOCK(sc);
1919			} else
1920				error = cxgb_init_locked(p);
1921			p->if_flags = ifp->if_flags;
1922		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1923			error = cxgb_uninit_locked(p);
1924		else
1925			ADAPTER_UNLOCK(sc);
1926
1927		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1928		break;
1929	case SIOCADDMULTI:
1930	case SIOCDELMULTI:
1931		ADAPTER_LOCK(sc);
1932		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1933		if (error)
1934			goto fail;
1935
1936		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1937			PORT_LOCK(p);
1938			cxgb_update_mac_settings(p);
1939			PORT_UNLOCK(p);
1940		}
1941		ADAPTER_UNLOCK(sc);
1942
1943		break;
1944	case SIOCSIFCAP:
1945		ADAPTER_LOCK(sc);
1946		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1947		if (error)
1948			goto fail;
1949
1950		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1951		if (mask & IFCAP_TXCSUM) {
1952			ifp->if_capenable ^= IFCAP_TXCSUM;
1953			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
1954
1955			if (IFCAP_TSO4 & ifp->if_capenable &&
1956			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1957				ifp->if_capenable &= ~IFCAP_TSO4;
1958				if_printf(ifp,
1959				    "tso4 disabled due to -txcsum.\n");
1960			}
1961		}
1962		if (mask & IFCAP_TXCSUM_IPV6) {
1963			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1964			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
1965
1966			if (IFCAP_TSO6 & ifp->if_capenable &&
1967			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1968				ifp->if_capenable &= ~IFCAP_TSO6;
1969				if_printf(ifp,
1970				    "tso6 disabled due to -txcsum6.\n");
1971			}
1972		}
1973		if (mask & IFCAP_RXCSUM)
1974			ifp->if_capenable ^= IFCAP_RXCSUM;
1975		if (mask & IFCAP_RXCSUM_IPV6)
1976			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1977
1978		/*
1979		 * Note that we leave CSUM_TSO alone (it is always set).  The
1980		 * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
1981		 * sending a TSO request our way, so it's sufficient to toggle
1982		 * IFCAP_TSOx only.
1983		 */
1984		if (mask & IFCAP_TSO4) {
1985			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
1986			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1987				if_printf(ifp, "enable txcsum first.\n");
1988				error = EAGAIN;
1989				goto fail;
1990			}
1991			ifp->if_capenable ^= IFCAP_TSO4;
1992		}
1993		if (mask & IFCAP_TSO6) {
1994			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
1995			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1996				if_printf(ifp, "enable txcsum6 first.\n");
1997				error = EAGAIN;
1998				goto fail;
1999			}
2000			ifp->if_capenable ^= IFCAP_TSO6;
2001		}
2002		if (mask & IFCAP_LRO) {
2003			ifp->if_capenable ^= IFCAP_LRO;
2004
2005			/* Safe to do this even if cxgb_up not called yet */
2006			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2007		}
2008#ifdef TCP_OFFLOAD
2009		if (mask & IFCAP_TOE4) {
2010			int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE4;
2011
2012			error = toe_capability(p, enable);
2013			if (error == 0)
2014				ifp->if_capenable ^= mask;
2015		}
2016#endif
2017		if (mask & IFCAP_VLAN_HWTAGGING) {
2018			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2019			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2020				PORT_LOCK(p);
2021				cxgb_update_mac_settings(p);
2022				PORT_UNLOCK(p);
2023			}
2024		}
2025		if (mask & IFCAP_VLAN_MTU) {
2026			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2027			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2028				PORT_LOCK(p);
2029				cxgb_update_mac_settings(p);
2030				PORT_UNLOCK(p);
2031			}
2032		}
2033		if (mask & IFCAP_VLAN_HWTSO)
2034			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2035		if (mask & IFCAP_VLAN_HWCSUM)
2036			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2037
2038#ifdef VLAN_CAPABILITIES
2039		VLAN_CAPABILITIES(ifp);
2040#endif
2041		ADAPTER_UNLOCK(sc);
2042		break;
2043	case SIOCSIFMEDIA:
2044	case SIOCGIFMEDIA:
2045		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2046		break;
2047	default:
2048		error = ether_ioctl(ifp, command, data);
2049	}
2050
2051	return (error);
2052}
2053
2054static int
2055cxgb_media_change(struct ifnet *ifp)
2056{
2057	return (EOPNOTSUPP);
2058}
2059
2060/*
2061 * Translates phy->modtype to the correct Ethernet media subtype.
2062 */
2063static int
2064cxgb_ifm_type(int mod)
2065{
2066	switch (mod) {
2067	case phy_modtype_sr:
2068		return (IFM_10G_SR);
2069	case phy_modtype_lr:
2070		return (IFM_10G_LR);
2071	case phy_modtype_lrm:
2072		return (IFM_10G_LRM);
2073	case phy_modtype_twinax:
2074		return (IFM_10G_TWINAX);
2075	case phy_modtype_twinax_long:
2076		return (IFM_10G_TWINAX_LONG);
2077	case phy_modtype_none:
2078		return (IFM_NONE);
2079	case phy_modtype_unknown:
2080		return (IFM_UNKNOWN);
2081	}
2082
2083	KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2084	return (IFM_UNKNOWN);
2085}
2086
2087/*
2088 * Rebuilds the ifmedia list for this port, and sets the current media.
2089 */
2090static void
2091cxgb_build_medialist(struct port_info *p)
2092{
2093	struct cphy *phy = &p->phy;
2094	struct ifmedia *media = &p->media;
2095	int mod = phy->modtype;
2096	int m = IFM_ETHER | IFM_FDX;
2097
2098	PORT_LOCK(p);
2099
2100	ifmedia_removeall(media);
2101	if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2102		/* Copper (RJ45) */
2103
2104		if (phy->caps & SUPPORTED_10000baseT_Full)
2105			ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2106
2107		if (phy->caps & SUPPORTED_1000baseT_Full)
2108			ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2109
2110		if (phy->caps & SUPPORTED_100baseT_Full)
2111			ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2112
2113		if (phy->caps & SUPPORTED_10baseT_Full)
2114			ifmedia_add(media, m | IFM_10_T, mod, NULL);
2115
2116		ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2117		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2118
2119	} else if (phy->caps & SUPPORTED_TP) {
2120		/* Copper (CX4) */
2121
2122		KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2123			("%s: unexpected cap 0x%x", __func__, phy->caps));
2124
2125		ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2126		ifmedia_set(media, m | IFM_10G_CX4);
2127
2128	} else if (phy->caps & SUPPORTED_FIBRE &&
2129		   phy->caps & SUPPORTED_10000baseT_Full) {
2130		/* 10G optical (but includes SFP+ twinax) */
2131
2132		m |= cxgb_ifm_type(mod);
2133		if (IFM_SUBTYPE(m) == IFM_NONE)
2134			m &= ~IFM_FDX;
2135
2136		ifmedia_add(media, m, mod, NULL);
2137		ifmedia_set(media, m);
2138
2139	} else if (phy->caps & SUPPORTED_FIBRE &&
2140		   phy->caps & SUPPORTED_1000baseT_Full) {
2141		/* 1G optical */
2142
2143		/* XXX: Lie and claim to be SX, could actually be any 1G-X */
2144		ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2145		ifmedia_set(media, m | IFM_1000_SX);
2146
2147	} else {
2148		KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2149			    phy->caps));
2150	}
2151
2152	PORT_UNLOCK(p);
2153}
2154
2155static void
2156cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2157{
2158	struct port_info *p = ifp->if_softc;
2159	struct ifmedia_entry *cur = p->media.ifm_cur;
2160	int speed = p->link_config.speed;
2161
2162	if (cur->ifm_data != p->phy.modtype) {
2163		cxgb_build_medialist(p);
2164		cur = p->media.ifm_cur;
2165	}
2166
2167	ifmr->ifm_status = IFM_AVALID;
2168	if (!p->link_config.link_ok)
2169		return;
2170
2171	ifmr->ifm_status |= IFM_ACTIVE;
2172
2173	/*
2174	 * active and current will differ iff current media is autoselect.  That
2175	 * can happen only for copper RJ45.
2176	 */
2177	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2178		return;
2179	KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2180		("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2181
2182	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2183	if (speed == SPEED_10000)
2184		ifmr->ifm_active |= IFM_10G_T;
2185	else if (speed == SPEED_1000)
2186		ifmr->ifm_active |= IFM_1000_T;
2187	else if (speed == SPEED_100)
2188		ifmr->ifm_active |= IFM_100_TX;
2189	else if (speed == SPEED_10)
2190		ifmr->ifm_active |= IFM_10_T;
2191	else
2192		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2193			    speed));
2194}
2195
2196static void
2197cxgb_async_intr(void *data)
2198{
2199	adapter_t *sc = data;
2200
2201	t3_write_reg(sc, A_PL_INT_ENABLE0, 0);
2202	(void) t3_read_reg(sc, A_PL_INT_ENABLE0);
2203	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2204}
2205
2206static void
2207link_check_callout(void *arg)
2208{
2209	struct port_info *pi = arg;
2210	struct adapter *sc = pi->adapter;
2211
2212	if (!isset(&sc->open_device_map, pi->port_id))
2213		return;
2214
2215	taskqueue_enqueue(sc->tq, &pi->link_check_task);
2216}
2217
2218static void
2219check_link_status(void *arg, int pending)
2220{
2221	struct port_info *pi = arg;
2222	struct adapter *sc = pi->adapter;
2223
2224	if (!isset(&sc->open_device_map, pi->port_id))
2225		return;
2226
2227	t3_link_changed(sc, pi->port_id);
2228
2229	if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ))
2230		callout_reset(&pi->link_check_ch, hz, link_check_callout, pi);
2231}
2232
2233void
2234t3_os_link_intr(struct port_info *pi)
2235{
2236	/*
2237	 * Schedule a link check in the near future.  If the link is flapping
2238	 * rapidly we'll keep resetting the callout and delaying the check until
2239	 * things stabilize a bit.
2240	 */
2241	callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi);
2242}
2243
2244static void
2245check_t3b2_mac(struct adapter *sc)
2246{
2247	int i;
2248
2249	if (sc->flags & CXGB_SHUTDOWN)
2250		return;
2251
2252	for_each_port(sc, i) {
2253		struct port_info *p = &sc->port[i];
2254		int status;
2255#ifdef INVARIANTS
2256		struct ifnet *ifp = p->ifp;
2257#endif
2258
2259		if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2260		    !p->link_config.link_ok)
2261			continue;
2262
2263		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2264			("%s: state mismatch (drv_flags %x, device_map %x)",
2265			 __func__, ifp->if_drv_flags, sc->open_device_map));
2266
2267		PORT_LOCK(p);
2268		status = t3b2_mac_watchdog_task(&p->mac);
2269		if (status == 1)
2270			p->mac.stats.num_toggled++;
2271		else if (status == 2) {
2272			struct cmac *mac = &p->mac;
2273
2274			cxgb_update_mac_settings(p);
2275			t3_link_start(&p->phy, mac, &p->link_config);
2276			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2277			t3_port_intr_enable(sc, p->port_id);
2278			p->mac.stats.num_resets++;
2279		}
2280		PORT_UNLOCK(p);
2281	}
2282}
2283
2284static void
2285cxgb_tick(void *arg)
2286{
2287	adapter_t *sc = (adapter_t *)arg;
2288
2289	if (sc->flags & CXGB_SHUTDOWN)
2290		return;
2291
2292	taskqueue_enqueue(sc->tq, &sc->tick_task);
2293	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2294}
2295
2296static void
2297cxgb_tick_handler(void *arg, int count)
2298{
2299	adapter_t *sc = (adapter_t *)arg;
2300	const struct adapter_params *p = &sc->params;
2301	int i;
2302	uint32_t cause, reset;
2303
2304	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2305		return;
2306
2307	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2308		check_t3b2_mac(sc);
2309
2310	cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2311	if (cause) {
2312		struct sge_qset *qs = &sc->sge.qs[0];
2313		uint32_t mask, v;
2314
2315		v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2316
2317		mask = 1;
2318		for (i = 0; i < SGE_QSETS; i++) {
2319			if (v & mask)
2320				qs[i].rspq.starved++;
2321			mask <<= 1;
2322		}
2323
2324		mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2325
2326		for (i = 0; i < SGE_QSETS * 2; i++) {
2327			if (v & mask) {
2328				qs[i / 2].fl[i % 2].empty++;
2329			}
2330			mask <<= 1;
2331		}
2332
2333		/* clear */
2334		t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2335		t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2336	}
2337
2338	for (i = 0; i < sc->params.nports; i++) {
2339		struct port_info *pi = &sc->port[i];
2340		struct ifnet *ifp = pi->ifp;
2341		struct cmac *mac = &pi->mac;
2342		struct mac_stats *mstats = &mac->stats;
2343		int drops, j;
2344
2345		if (!isset(&sc->open_device_map, pi->port_id))
2346			continue;
2347
2348		PORT_LOCK(pi);
2349		t3_mac_update_stats(mac);
2350		PORT_UNLOCK(pi);
2351
2352		ifp->if_opackets = mstats->tx_frames;
2353		ifp->if_ipackets = mstats->rx_frames;
2354		ifp->if_obytes = mstats->tx_octets;
2355		ifp->if_ibytes = mstats->rx_octets;
2356		ifp->if_omcasts = mstats->tx_mcast_frames;
2357		ifp->if_imcasts = mstats->rx_mcast_frames;
2358		ifp->if_collisions = mstats->tx_total_collisions;
2359		ifp->if_iqdrops = mstats->rx_cong_drops;
2360
2361		drops = 0;
2362		for (j = pi->first_qset; j < pi->first_qset + pi->nqsets; j++)
2363			drops += sc->sge.qs[j].txq[TXQ_ETH].txq_mr->br_drops;
2364		ifp->if_snd.ifq_drops = drops;
2365
2366		ifp->if_oerrors =
2367		    mstats->tx_excess_collisions +
2368		    mstats->tx_underrun +
2369		    mstats->tx_len_errs +
2370		    mstats->tx_mac_internal_errs +
2371		    mstats->tx_excess_deferral +
2372		    mstats->tx_fcs_errs;
2373		ifp->if_ierrors =
2374		    mstats->rx_jabber +
2375		    mstats->rx_data_errs +
2376		    mstats->rx_sequence_errs +
2377		    mstats->rx_runt +
2378		    mstats->rx_too_long +
2379		    mstats->rx_mac_internal_errs +
2380		    mstats->rx_short +
2381		    mstats->rx_fcs_errs;
2382
2383		if (mac->multiport)
2384			continue;
2385
2386		/* Count rx fifo overflows, once per second */
2387		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2388		reset = 0;
2389		if (cause & F_RXFIFO_OVERFLOW) {
2390			mac->stats.rx_fifo_ovfl++;
2391			reset |= F_RXFIFO_OVERFLOW;
2392		}
2393		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2394	}
2395}
2396
2397static void
2398touch_bars(device_t dev)
2399{
2400	/*
2401	 * Don't enable yet
2402	 */
2403#if !defined(__LP64__) && 0
2404	u32 v;
2405
2406	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2407	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2408	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2409	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2410	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2411	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2412#endif
2413}
2414
2415static int
2416set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2417{
2418	uint8_t *buf;
2419	int err = 0;
2420	u32 aligned_offset, aligned_len, *p;
2421	struct adapter *adapter = pi->adapter;
2422
2423
2424	aligned_offset = offset & ~3;
2425	aligned_len = (len + (offset & 3) + 3) & ~3;
2426
2427	if (aligned_offset != offset || aligned_len != len) {
2428		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2429		if (!buf)
2430			return (ENOMEM);
2431		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2432		if (!err && aligned_len > 4)
2433			err = t3_seeprom_read(adapter,
2434					      aligned_offset + aligned_len - 4,
2435					      (u32 *)&buf[aligned_len - 4]);
2436		if (err)
2437			goto out;
2438		memcpy(buf + (offset & 3), data, len);
2439	} else
2440		buf = (uint8_t *)(uintptr_t)data;
2441
2442	err = t3_seeprom_wp(adapter, 0);
2443	if (err)
2444		goto out;
2445
2446	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2447		err = t3_seeprom_write(adapter, aligned_offset, *p);
2448		aligned_offset += 4;
2449	}
2450
2451	if (!err)
2452		err = t3_seeprom_wp(adapter, 1);
2453out:
2454	if (buf != data)
2455		free(buf, M_DEVBUF);
2456	return err;
2457}
2458
2459
2460static int
2461in_range(int val, int lo, int hi)
2462{
2463	return val < 0 || (val <= hi && val >= lo);
2464}
2465
2466static int
2467cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2468{
2469       return (0);
2470}
2471
2472static int
2473cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2474{
2475       return (0);
2476}
2477
2478static int
2479cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2480    int fflag, struct thread *td)
2481{
2482	int mmd, error = 0;
2483	struct port_info *pi = dev->si_drv1;
2484	adapter_t *sc = pi->adapter;
2485
2486#ifdef PRIV_SUPPORTED
2487	if (priv_check(td, PRIV_DRIVER)) {
2488		if (cxgb_debug)
2489			printf("user does not have access to privileged ioctls\n");
2490		return (EPERM);
2491	}
2492#else
2493	if (suser(td)) {
2494		if (cxgb_debug)
2495			printf("user does not have access to privileged ioctls\n");
2496		return (EPERM);
2497	}
2498#endif
2499
2500	switch (cmd) {
2501	case CHELSIO_GET_MIIREG: {
2502		uint32_t val;
2503		struct cphy *phy = &pi->phy;
2504		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2505
2506		if (!phy->mdio_read)
2507			return (EOPNOTSUPP);
2508		if (is_10G(sc)) {
2509			mmd = mid->phy_id >> 8;
2510			if (!mmd)
2511				mmd = MDIO_DEV_PCS;
2512			else if (mmd > MDIO_DEV_VEND2)
2513				return (EINVAL);
2514
2515			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2516					     mid->reg_num, &val);
2517		} else
2518		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2519					     mid->reg_num & 0x1f, &val);
2520		if (error == 0)
2521			mid->val_out = val;
2522		break;
2523	}
2524	case CHELSIO_SET_MIIREG: {
2525		struct cphy *phy = &pi->phy;
2526		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2527
2528		if (!phy->mdio_write)
2529			return (EOPNOTSUPP);
2530		if (is_10G(sc)) {
2531			mmd = mid->phy_id >> 8;
2532			if (!mmd)
2533				mmd = MDIO_DEV_PCS;
2534			else if (mmd > MDIO_DEV_VEND2)
2535				return (EINVAL);
2536
2537			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2538					      mmd, mid->reg_num, mid->val_in);
2539		} else
2540			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2541					      mid->reg_num & 0x1f,
2542					      mid->val_in);
2543		break;
2544	}
2545	case CHELSIO_SETREG: {
2546		struct ch_reg *edata = (struct ch_reg *)data;
2547		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2548			return (EFAULT);
2549		t3_write_reg(sc, edata->addr, edata->val);
2550		break;
2551	}
2552	case CHELSIO_GETREG: {
2553		struct ch_reg *edata = (struct ch_reg *)data;
2554		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2555			return (EFAULT);
2556		edata->val = t3_read_reg(sc, edata->addr);
2557		break;
2558	}
2559	case CHELSIO_GET_SGE_CONTEXT: {
2560		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2561		mtx_lock_spin(&sc->sge.reg_lock);
2562		switch (ecntxt->cntxt_type) {
2563		case CNTXT_TYPE_EGRESS:
2564			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2565			    ecntxt->data);
2566			break;
2567		case CNTXT_TYPE_FL:
2568			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2569			    ecntxt->data);
2570			break;
2571		case CNTXT_TYPE_RSP:
2572			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2573			    ecntxt->data);
2574			break;
2575		case CNTXT_TYPE_CQ:
2576			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2577			    ecntxt->data);
2578			break;
2579		default:
2580			error = EINVAL;
2581			break;
2582		}
2583		mtx_unlock_spin(&sc->sge.reg_lock);
2584		break;
2585	}
2586	case CHELSIO_GET_SGE_DESC: {
2587		struct ch_desc *edesc = (struct ch_desc *)data;
2588		int ret;
2589		if (edesc->queue_num >= SGE_QSETS * 6)
2590			return (EINVAL);
2591		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2592		    edesc->queue_num % 6, edesc->idx, edesc->data);
2593		if (ret < 0)
2594			return (EINVAL);
2595		edesc->size = ret;
2596		break;
2597	}
2598	case CHELSIO_GET_QSET_PARAMS: {
2599		struct qset_params *q;
2600		struct ch_qset_params *t = (struct ch_qset_params *)data;
2601		int q1 = pi->first_qset;
2602		int nqsets = pi->nqsets;
2603		int i;
2604
2605		if (t->qset_idx >= nqsets)
2606			return EINVAL;
2607
2608		i = q1 + t->qset_idx;
2609		q = &sc->params.sge.qset[i];
2610		t->rspq_size   = q->rspq_size;
2611		t->txq_size[0] = q->txq_size[0];
2612		t->txq_size[1] = q->txq_size[1];
2613		t->txq_size[2] = q->txq_size[2];
2614		t->fl_size[0]  = q->fl_size;
2615		t->fl_size[1]  = q->jumbo_size;
2616		t->polling     = q->polling;
2617		t->lro         = q->lro;
2618		t->intr_lat    = q->coalesce_usecs;
2619		t->cong_thres  = q->cong_thres;
2620		t->qnum        = i;
2621
2622		if ((sc->flags & FULL_INIT_DONE) == 0)
2623			t->vector = 0;
2624		else if (sc->flags & USING_MSIX)
2625			t->vector = rman_get_start(sc->msix_irq_res[i]);
2626		else
2627			t->vector = rman_get_start(sc->irq_res);
2628
2629		break;
2630	}
2631	case CHELSIO_GET_QSET_NUM: {
2632		struct ch_reg *edata = (struct ch_reg *)data;
2633		edata->val = pi->nqsets;
2634		break;
2635	}
2636	case CHELSIO_LOAD_FW: {
2637		uint8_t *fw_data;
2638		uint32_t vers;
2639		struct ch_mem_range *t = (struct ch_mem_range *)data;
2640
2641		/*
2642		 * You're allowed to load a firmware only before FULL_INIT_DONE
2643		 *
2644		 * FW_UPTODATE is also set so the rest of the initialization
2645		 * will not overwrite what was loaded here.  This gives you the
2646		 * flexibility to load any firmware (and maybe shoot yourself in
2647		 * the foot).
2648		 */
2649
2650		ADAPTER_LOCK(sc);
2651		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2652			ADAPTER_UNLOCK(sc);
2653			return (EBUSY);
2654		}
2655
2656		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2657		if (!fw_data)
2658			error = ENOMEM;
2659		else
2660			error = copyin(t->buf, fw_data, t->len);
2661
2662		if (!error)
2663			error = -t3_load_fw(sc, fw_data, t->len);
2664
2665		if (t3_get_fw_version(sc, &vers) == 0) {
2666			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2667			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2668			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2669		}
2670
2671		if (!error)
2672			sc->flags |= FW_UPTODATE;
2673
2674		free(fw_data, M_DEVBUF);
2675		ADAPTER_UNLOCK(sc);
2676		break;
2677	}
2678	case CHELSIO_LOAD_BOOT: {
2679		uint8_t *boot_data;
2680		struct ch_mem_range *t = (struct ch_mem_range *)data;
2681
2682		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2683		if (!boot_data)
2684			return ENOMEM;
2685
2686		error = copyin(t->buf, boot_data, t->len);
2687		if (!error)
2688			error = -t3_load_boot(sc, boot_data, t->len);
2689
2690		free(boot_data, M_DEVBUF);
2691		break;
2692	}
2693	case CHELSIO_GET_PM: {
2694		struct ch_pm *m = (struct ch_pm *)data;
2695		struct tp_params *p = &sc->params.tp;
2696
2697		if (!is_offload(sc))
2698			return (EOPNOTSUPP);
2699
2700		m->tx_pg_sz = p->tx_pg_size;
2701		m->tx_num_pg = p->tx_num_pgs;
2702		m->rx_pg_sz  = p->rx_pg_size;
2703		m->rx_num_pg = p->rx_num_pgs;
2704		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2705
2706		break;
2707	}
2708	case CHELSIO_SET_PM: {
2709		struct ch_pm *m = (struct ch_pm *)data;
2710		struct tp_params *p = &sc->params.tp;
2711
2712		if (!is_offload(sc))
2713			return (EOPNOTSUPP);
2714		if (sc->flags & FULL_INIT_DONE)
2715			return (EBUSY);
2716
2717		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2718		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2719			return (EINVAL);	/* not power of 2 */
2720		if (!(m->rx_pg_sz & 0x14000))
2721			return (EINVAL);	/* not 16KB or 64KB */
2722		if (!(m->tx_pg_sz & 0x1554000))
2723			return (EINVAL);
2724		if (m->tx_num_pg == -1)
2725			m->tx_num_pg = p->tx_num_pgs;
2726		if (m->rx_num_pg == -1)
2727			m->rx_num_pg = p->rx_num_pgs;
2728		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2729			return (EINVAL);
2730		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2731		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2732			return (EINVAL);
2733
2734		p->rx_pg_size = m->rx_pg_sz;
2735		p->tx_pg_size = m->tx_pg_sz;
2736		p->rx_num_pgs = m->rx_num_pg;
2737		p->tx_num_pgs = m->tx_num_pg;
2738		break;
2739	}
2740	case CHELSIO_SETMTUTAB: {
2741		struct ch_mtus *m = (struct ch_mtus *)data;
2742		int i;
2743
2744		if (!is_offload(sc))
2745			return (EOPNOTSUPP);
2746		if (offload_running(sc))
2747			return (EBUSY);
2748		if (m->nmtus != NMTUS)
2749			return (EINVAL);
2750		if (m->mtus[0] < 81)         /* accommodate SACK */
2751			return (EINVAL);
2752
2753		/*
2754		 * MTUs must be in ascending order
2755		 */
2756		for (i = 1; i < NMTUS; ++i)
2757			if (m->mtus[i] < m->mtus[i - 1])
2758				return (EINVAL);
2759
2760		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2761		break;
2762	}
2763	case CHELSIO_GETMTUTAB: {
2764		struct ch_mtus *m = (struct ch_mtus *)data;
2765
2766		if (!is_offload(sc))
2767			return (EOPNOTSUPP);
2768
2769		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2770		m->nmtus = NMTUS;
2771		break;
2772	}
2773	case CHELSIO_GET_MEM: {
2774		struct ch_mem_range *t = (struct ch_mem_range *)data;
2775		struct mc7 *mem;
2776		uint8_t *useraddr;
2777		u64 buf[32];
2778
2779		/*
2780		 * Use these to avoid modifying len/addr in the return
2781		 * struct
2782		 */
2783		uint32_t len = t->len, addr = t->addr;
2784
2785		if (!is_offload(sc))
2786			return (EOPNOTSUPP);
2787		if (!(sc->flags & FULL_INIT_DONE))
2788			return (EIO);         /* need the memory controllers */
2789		if ((addr & 0x7) || (len & 0x7))
2790			return (EINVAL);
2791		if (t->mem_id == MEM_CM)
2792			mem = &sc->cm;
2793		else if (t->mem_id == MEM_PMRX)
2794			mem = &sc->pmrx;
2795		else if (t->mem_id == MEM_PMTX)
2796			mem = &sc->pmtx;
2797		else
2798			return (EINVAL);
2799
2800		/*
2801		 * Version scheme:
2802		 * bits 0..9: chip version
2803		 * bits 10..15: chip revision
2804		 */
2805		t->version = 3 | (sc->params.rev << 10);
2806
2807		/*
2808		 * Read 256 bytes at a time as len can be large and we don't
2809		 * want to use huge intermediate buffers.
2810		 */
2811		useraddr = (uint8_t *)t->buf;
2812		while (len) {
2813			unsigned int chunk = min(len, sizeof(buf));
2814
2815			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2816			if (error)
2817				return (-error);
2818			if (copyout(buf, useraddr, chunk))
2819				return (EFAULT);
2820			useraddr += chunk;
2821			addr += chunk;
2822			len -= chunk;
2823		}
2824		break;
2825	}
2826	case CHELSIO_READ_TCAM_WORD: {
2827		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2828
2829		if (!is_offload(sc))
2830			return (EOPNOTSUPP);
2831		if (!(sc->flags & FULL_INIT_DONE))
2832			return (EIO);         /* need MC5 */
2833		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2834		break;
2835	}
2836	case CHELSIO_SET_TRACE_FILTER: {
2837		struct ch_trace *t = (struct ch_trace *)data;
2838		const struct trace_params *tp;
2839
2840		tp = (const struct trace_params *)&t->sip;
2841		if (t->config_tx)
2842			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2843					       t->trace_tx);
2844		if (t->config_rx)
2845			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2846					       t->trace_rx);
2847		break;
2848	}
2849	case CHELSIO_SET_PKTSCHED: {
2850		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2851		if (sc->open_device_map == 0)
2852			return (EAGAIN);
2853		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2854		    p->binding);
2855		break;
2856	}
2857	case CHELSIO_IFCONF_GETREGS: {
2858		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2859		int reglen = cxgb_get_regs_len();
2860		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2861		if (buf == NULL) {
2862			return (ENOMEM);
2863		}
2864		if (regs->len > reglen)
2865			regs->len = reglen;
2866		else if (regs->len < reglen)
2867			error = ENOBUFS;
2868
2869		if (!error) {
2870			cxgb_get_regs(sc, regs, buf);
2871			error = copyout(buf, regs->data, reglen);
2872		}
2873		free(buf, M_DEVBUF);
2874
2875		break;
2876	}
2877	case CHELSIO_SET_HW_SCHED: {
2878		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2879		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2880
2881		if ((sc->flags & FULL_INIT_DONE) == 0)
2882			return (EAGAIN);       /* need TP to be initialized */
2883		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2884		    !in_range(t->channel, 0, 1) ||
2885		    !in_range(t->kbps, 0, 10000000) ||
2886		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2887		    !in_range(t->flow_ipg, 0,
2888			      dack_ticks_to_usec(sc, 0x7ff)))
2889			return (EINVAL);
2890
2891		if (t->kbps >= 0) {
2892			error = t3_config_sched(sc, t->kbps, t->sched);
2893			if (error < 0)
2894				return (-error);
2895		}
2896		if (t->class_ipg >= 0)
2897			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2898		if (t->flow_ipg >= 0) {
2899			t->flow_ipg *= 1000;     /* us -> ns */
2900			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2901		}
2902		if (t->mode >= 0) {
2903			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2904
2905			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2906					 bit, t->mode ? bit : 0);
2907		}
2908		if (t->channel >= 0)
2909			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2910					 1 << t->sched, t->channel << t->sched);
2911		break;
2912	}
2913	case CHELSIO_GET_EEPROM: {
2914		int i;
2915		struct ch_eeprom *e = (struct ch_eeprom *)data;
2916		uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2917
2918		if (buf == NULL) {
2919			return (ENOMEM);
2920		}
2921		e->magic = EEPROM_MAGIC;
2922		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
2923			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
2924
2925		if (!error)
2926			error = copyout(buf + e->offset, e->data, e->len);
2927
2928		free(buf, M_DEVBUF);
2929		break;
2930	}
2931	case CHELSIO_CLEAR_STATS: {
2932		if (!(sc->flags & FULL_INIT_DONE))
2933			return EAGAIN;
2934
2935		PORT_LOCK(pi);
2936		t3_mac_update_stats(&pi->mac);
2937		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
2938		PORT_UNLOCK(pi);
2939		break;
2940	}
2941	case CHELSIO_GET_UP_LA: {
2942		struct ch_up_la *la = (struct ch_up_la *)data;
2943		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
2944		if (buf == NULL) {
2945			return (ENOMEM);
2946		}
2947		if (la->bufsize < LA_BUFSIZE)
2948			error = ENOBUFS;
2949
2950		if (!error)
2951			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
2952					      &la->bufsize, buf);
2953		if (!error)
2954			error = copyout(buf, la->data, la->bufsize);
2955
2956		free(buf, M_DEVBUF);
2957		break;
2958	}
2959	case CHELSIO_GET_UP_IOQS: {
2960		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
2961		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
2962		uint32_t *v;
2963
2964		if (buf == NULL) {
2965			return (ENOMEM);
2966		}
2967		if (ioqs->bufsize < IOQS_BUFSIZE)
2968			error = ENOBUFS;
2969
2970		if (!error)
2971			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
2972
2973		if (!error) {
2974			v = (uint32_t *)buf;
2975
2976			ioqs->ioq_rx_enable = *v++;
2977			ioqs->ioq_tx_enable = *v++;
2978			ioqs->ioq_rx_status = *v++;
2979			ioqs->ioq_tx_status = *v++;
2980
2981			error = copyout(v, ioqs->data, ioqs->bufsize);
2982		}
2983
2984		free(buf, M_DEVBUF);
2985		break;
2986	}
2987	case CHELSIO_SET_FILTER: {
2988		struct ch_filter *f = (struct ch_filter *)data;
2989		struct filter_info *p;
2990		unsigned int nfilters = sc->params.mc5.nfilters;
2991
2992		if (!is_offload(sc))
2993			return (EOPNOTSUPP);	/* No TCAM */
2994		if (!(sc->flags & FULL_INIT_DONE))
2995			return (EAGAIN);	/* mc5 not setup yet */
2996		if (nfilters == 0)
2997			return (EBUSY);		/* TOE will use TCAM */
2998
2999		/* sanity checks */
3000		if (f->filter_id >= nfilters ||
3001		    (f->val.dip && f->mask.dip != 0xffffffff) ||
3002		    (f->val.sport && f->mask.sport != 0xffff) ||
3003		    (f->val.dport && f->mask.dport != 0xffff) ||
3004		    (f->val.vlan && f->mask.vlan != 0xfff) ||
3005		    (f->val.vlan_prio &&
3006			f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3007		    (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3008		    f->qset >= SGE_QSETS ||
3009		    sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3010			return (EINVAL);
3011
3012		/* Was allocated with M_WAITOK */
3013		KASSERT(sc->filters, ("filter table NULL\n"));
3014
3015		p = &sc->filters[f->filter_id];
3016		if (p->locked)
3017			return (EPERM);
3018
3019		bzero(p, sizeof(*p));
3020		p->sip = f->val.sip;
3021		p->sip_mask = f->mask.sip;
3022		p->dip = f->val.dip;
3023		p->sport = f->val.sport;
3024		p->dport = f->val.dport;
3025		p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3026		p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3027		    FILTER_NO_VLAN_PRI;
3028		p->mac_hit = f->mac_hit;
3029		p->mac_vld = f->mac_addr_idx != 0xffff;
3030		p->mac_idx = f->mac_addr_idx;
3031		p->pkt_type = f->proto;
3032		p->report_filter_id = f->want_filter_id;
3033		p->pass = f->pass;
3034		p->rss = f->rss;
3035		p->qset = f->qset;
3036
3037		error = set_filter(sc, f->filter_id, p);
3038		if (error == 0)
3039			p->valid = 1;
3040		break;
3041	}
3042	case CHELSIO_DEL_FILTER: {
3043		struct ch_filter *f = (struct ch_filter *)data;
3044		struct filter_info *p;
3045		unsigned int nfilters = sc->params.mc5.nfilters;
3046
3047		if (!is_offload(sc))
3048			return (EOPNOTSUPP);
3049		if (!(sc->flags & FULL_INIT_DONE))
3050			return (EAGAIN);
3051		if (nfilters == 0 || sc->filters == NULL)
3052			return (EINVAL);
3053		if (f->filter_id >= nfilters)
3054		       return (EINVAL);
3055
3056		p = &sc->filters[f->filter_id];
3057		if (p->locked)
3058			return (EPERM);
3059		if (!p->valid)
3060			return (EFAULT); /* Read "Bad address" as "Bad index" */
3061
3062		bzero(p, sizeof(*p));
3063		p->sip = p->sip_mask = 0xffffffff;
3064		p->vlan = 0xfff;
3065		p->vlan_prio = FILTER_NO_VLAN_PRI;
3066		p->pkt_type = 1;
3067		error = set_filter(sc, f->filter_id, p);
3068		break;
3069	}
3070	case CHELSIO_GET_FILTER: {
3071		struct ch_filter *f = (struct ch_filter *)data;
3072		struct filter_info *p;
3073		unsigned int i, nfilters = sc->params.mc5.nfilters;
3074
3075		if (!is_offload(sc))
3076			return (EOPNOTSUPP);
3077		if (!(sc->flags & FULL_INIT_DONE))
3078			return (EAGAIN);
3079		if (nfilters == 0 || sc->filters == NULL)
3080			return (EINVAL);
3081
3082		i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3083		for (; i < nfilters; i++) {
3084			p = &sc->filters[i];
3085			if (!p->valid)
3086				continue;
3087
3088			bzero(f, sizeof(*f));
3089
3090			f->filter_id = i;
3091			f->val.sip = p->sip;
3092			f->mask.sip = p->sip_mask;
3093			f->val.dip = p->dip;
3094			f->mask.dip = p->dip ? 0xffffffff : 0;
3095			f->val.sport = p->sport;
3096			f->mask.sport = p->sport ? 0xffff : 0;
3097			f->val.dport = p->dport;
3098			f->mask.dport = p->dport ? 0xffff : 0;
3099			f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3100			f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3101			f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3102			    0 : p->vlan_prio;
3103			f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3104			    0 : FILTER_NO_VLAN_PRI;
3105			f->mac_hit = p->mac_hit;
3106			f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3107			f->proto = p->pkt_type;
3108			f->want_filter_id = p->report_filter_id;
3109			f->pass = p->pass;
3110			f->rss = p->rss;
3111			f->qset = p->qset;
3112
3113			break;
3114		}
3115
3116		if (i == nfilters)
3117			f->filter_id = 0xffffffff;
3118		break;
3119	}
3120	default:
3121		return (EOPNOTSUPP);
3122		break;
3123	}
3124
3125	return (error);
3126}
3127
3128static __inline void
3129reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3130    unsigned int end)
3131{
3132	uint32_t *p = (uint32_t *)(buf + start);
3133
3134	for ( ; start <= end; start += sizeof(uint32_t))
3135		*p++ = t3_read_reg(ap, start);
3136}
3137
3138#define T3_REGMAP_SIZE (3 * 1024)
3139static int
3140cxgb_get_regs_len(void)
3141{
3142	return T3_REGMAP_SIZE;
3143}
3144
3145static void
3146cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3147{
3148
3149	/*
3150	 * Version scheme:
3151	 * bits 0..9: chip version
3152	 * bits 10..15: chip revision
3153	 * bit 31: set for PCIe cards
3154	 */
3155	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3156
3157	/*
3158	 * We skip the MAC statistics registers because they are clear-on-read.
3159	 * Also reading multi-register stats would need to synchronize with the
3160	 * periodic mac stats accumulation.  Hard to justify the complexity.
3161	 */
3162	memset(buf, 0, cxgb_get_regs_len());
3163	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3164	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3165	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3166	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3167	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3168	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3169		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3170	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3171		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3172}
3173
3174static int
3175alloc_filters(struct adapter *sc)
3176{
3177	struct filter_info *p;
3178	unsigned int nfilters = sc->params.mc5.nfilters;
3179
3180	if (nfilters == 0)
3181		return (0);
3182
3183	p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3184	sc->filters = p;
3185
3186	p = &sc->filters[nfilters - 1];
3187	p->vlan = 0xfff;
3188	p->vlan_prio = FILTER_NO_VLAN_PRI;
3189	p->pass = p->rss = p->valid = p->locked = 1;
3190
3191	return (0);
3192}
3193
3194static int
3195setup_hw_filters(struct adapter *sc)
3196{
3197	int i, rc;
3198	unsigned int nfilters = sc->params.mc5.nfilters;
3199
3200	if (!sc->filters)
3201		return (0);
3202
3203	t3_enable_filters(sc);
3204
3205	for (i = rc = 0; i < nfilters && !rc; i++) {
3206		if (sc->filters[i].locked)
3207			rc = set_filter(sc, i, &sc->filters[i]);
3208	}
3209
3210	return (rc);
3211}
3212
3213static int
3214set_filter(struct adapter *sc, int id, const struct filter_info *f)
3215{
3216	int len;
3217	struct mbuf *m;
3218	struct ulp_txpkt *txpkt;
3219	struct work_request_hdr *wr;
3220	struct cpl_pass_open_req *oreq;
3221	struct cpl_set_tcb_field *sreq;
3222
3223	len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3224	KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3225
3226	id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3227	      sc->params.mc5.nfilters;
3228
3229	m = m_gethdr(M_WAITOK, MT_DATA);
3230	m->m_len = m->m_pkthdr.len = len;
3231	bzero(mtod(m, char *), len);
3232
3233	wr = mtod(m, struct work_request_hdr *);
3234	wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3235
3236	oreq = (struct cpl_pass_open_req *)(wr + 1);
3237	txpkt = (struct ulp_txpkt *)oreq;
3238	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3239	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3240	OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3241	oreq->local_port = htons(f->dport);
3242	oreq->peer_port = htons(f->sport);
3243	oreq->local_ip = htonl(f->dip);
3244	oreq->peer_ip = htonl(f->sip);
3245	oreq->peer_netmask = htonl(f->sip_mask);
3246	oreq->opt0h = 0;
3247	oreq->opt0l = htonl(F_NO_OFFLOAD);
3248	oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3249			 V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3250			 V_VLAN_PRI(f->vlan_prio >> 1) |
3251			 V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3252			 V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3253			 V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3254
3255	sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3256	set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3257			  (f->report_filter_id << 15) | (1 << 23) |
3258			  ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3259	set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3260	t3_mgmt_tx(sc, m);
3261
3262	if (f->pass && !f->rss) {
3263		len = sizeof(*sreq);
3264		m = m_gethdr(M_WAITOK, MT_DATA);
3265		m->m_len = m->m_pkthdr.len = len;
3266		bzero(mtod(m, char *), len);
3267		sreq = mtod(m, struct cpl_set_tcb_field *);
3268		sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3269		mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3270				 (u64)sc->rrss_map[f->qset] << 19);
3271		t3_mgmt_tx(sc, m);
3272	}
3273	return 0;
3274}
3275
3276static inline void
3277mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3278    unsigned int word, u64 mask, u64 val)
3279{
3280	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3281	req->reply = V_NO_REPLY(1);
3282	req->cpu_idx = 0;
3283	req->word = htons(word);
3284	req->mask = htobe64(mask);
3285	req->val = htobe64(val);
3286}
3287
3288static inline void
3289set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3290    unsigned int word, u64 mask, u64 val)
3291{
3292	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3293
3294	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3295	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3296	mk_set_tcb_field(req, tid, word, mask, val);
3297}
3298
3299void
3300t3_iterate(void (*func)(struct adapter *, void *), void *arg)
3301{
3302	struct adapter *sc;
3303
3304	mtx_lock(&t3_list_lock);
3305	SLIST_FOREACH(sc, &t3_list, link) {
3306		/*
3307		 * func should not make any assumptions about what state sc is
3308		 * in - the only guarantee is that sc->sc_lock is a valid lock.
3309		 */
3310		func(sc, arg);
3311	}
3312	mtx_unlock(&t3_list_lock);
3313}
3314
3315#ifdef TCP_OFFLOAD
3316static int
3317toe_capability(struct port_info *pi, int enable)
3318{
3319	int rc;
3320	struct adapter *sc = pi->adapter;
3321
3322	ADAPTER_LOCK_ASSERT_OWNED(sc);
3323
3324	if (!is_offload(sc))
3325		return (ENODEV);
3326
3327	if (enable) {
3328		if (!(sc->flags & FULL_INIT_DONE)) {
3329			log(LOG_WARNING,
3330			    "You must enable a cxgb interface first\n");
3331			return (EAGAIN);
3332		}
3333
3334		if (isset(&sc->offload_map, pi->port_id))
3335			return (0);
3336
3337		if (!(sc->flags & TOM_INIT_DONE)) {
3338			rc = t3_activate_uld(sc, ULD_TOM);
3339			if (rc == EAGAIN) {
3340				log(LOG_WARNING,
3341				    "You must kldload t3_tom.ko before trying "
3342				    "to enable TOE on a cxgb interface.\n");
3343			}
3344			if (rc != 0)
3345				return (rc);
3346			KASSERT(sc->tom_softc != NULL,
3347			    ("%s: TOM activated but softc NULL", __func__));
3348			KASSERT(sc->flags & TOM_INIT_DONE,
3349			    ("%s: TOM activated but flag not set", __func__));
3350		}
3351
3352		setbit(&sc->offload_map, pi->port_id);
3353
3354		/*
3355		 * XXX: Temporary code to allow iWARP to be enabled when TOE is
3356		 * enabled on any port.  Need to figure out how to enable,
3357		 * disable, load, and unload iWARP cleanly.
3358		 */
3359		if (!isset(&sc->offload_map, MAX_NPORTS) &&
3360		    t3_activate_uld(sc, ULD_IWARP) == 0)
3361			setbit(&sc->offload_map, MAX_NPORTS);
3362	} else {
3363		if (!isset(&sc->offload_map, pi->port_id))
3364			return (0);
3365
3366		KASSERT(sc->flags & TOM_INIT_DONE,
3367		    ("%s: TOM never initialized?", __func__));
3368		clrbit(&sc->offload_map, pi->port_id);
3369	}
3370
3371	return (0);
3372}
3373
3374/*
3375 * Add an upper layer driver to the global list.
3376 */
3377int
3378t3_register_uld(struct uld_info *ui)
3379{
3380	int rc = 0;
3381	struct uld_info *u;
3382
3383	mtx_lock(&t3_uld_list_lock);
3384	SLIST_FOREACH(u, &t3_uld_list, link) {
3385	    if (u->uld_id == ui->uld_id) {
3386		    rc = EEXIST;
3387		    goto done;
3388	    }
3389	}
3390
3391	SLIST_INSERT_HEAD(&t3_uld_list, ui, link);
3392	ui->refcount = 0;
3393done:
3394	mtx_unlock(&t3_uld_list_lock);
3395	return (rc);
3396}
3397
3398int
3399t3_unregister_uld(struct uld_info *ui)
3400{
3401	int rc = EINVAL;
3402	struct uld_info *u;
3403
3404	mtx_lock(&t3_uld_list_lock);
3405
3406	SLIST_FOREACH(u, &t3_uld_list, link) {
3407	    if (u == ui) {
3408		    if (ui->refcount > 0) {
3409			    rc = EBUSY;
3410			    goto done;
3411		    }
3412
3413		    SLIST_REMOVE(&t3_uld_list, ui, uld_info, link);
3414		    rc = 0;
3415		    goto done;
3416	    }
3417	}
3418done:
3419	mtx_unlock(&t3_uld_list_lock);
3420	return (rc);
3421}
3422
3423int
3424t3_activate_uld(struct adapter *sc, int id)
3425{
3426	int rc = EAGAIN;
3427	struct uld_info *ui;
3428
3429	mtx_lock(&t3_uld_list_lock);
3430
3431	SLIST_FOREACH(ui, &t3_uld_list, link) {
3432		if (ui->uld_id == id) {
3433			rc = ui->activate(sc);
3434			if (rc == 0)
3435				ui->refcount++;
3436			goto done;
3437		}
3438	}
3439done:
3440	mtx_unlock(&t3_uld_list_lock);
3441
3442	return (rc);
3443}
3444
3445int
3446t3_deactivate_uld(struct adapter *sc, int id)
3447{
3448	int rc = EINVAL;
3449	struct uld_info *ui;
3450
3451	mtx_lock(&t3_uld_list_lock);
3452
3453	SLIST_FOREACH(ui, &t3_uld_list, link) {
3454		if (ui->uld_id == id) {
3455			rc = ui->deactivate(sc);
3456			if (rc == 0)
3457				ui->refcount--;
3458			goto done;
3459		}
3460	}
3461done:
3462	mtx_unlock(&t3_uld_list_lock);
3463
3464	return (rc);
3465}
3466
3467static int
3468cpl_not_handled(struct sge_qset *qs __unused, struct rsp_desc *r __unused,
3469    struct mbuf *m)
3470{
3471	m_freem(m);
3472	return (EDOOFUS);
3473}
3474
3475int
3476t3_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h)
3477{
3478	uintptr_t *loc, new;
3479
3480	if (opcode >= NUM_CPL_HANDLERS)
3481		return (EINVAL);
3482
3483	new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled;
3484	loc = (uintptr_t *) &sc->cpl_handler[opcode];
3485	atomic_store_rel_ptr(loc, new);
3486
3487	return (0);
3488}
3489#endif
3490
3491static int
3492cxgbc_mod_event(module_t mod, int cmd, void *arg)
3493{
3494	int rc = 0;
3495
3496	switch (cmd) {
3497	case MOD_LOAD:
3498		mtx_init(&t3_list_lock, "T3 adapters", 0, MTX_DEF);
3499		SLIST_INIT(&t3_list);
3500#ifdef TCP_OFFLOAD
3501		mtx_init(&t3_uld_list_lock, "T3 ULDs", 0, MTX_DEF);
3502		SLIST_INIT(&t3_uld_list);
3503#endif
3504		break;
3505
3506	case MOD_UNLOAD:
3507#ifdef TCP_OFFLOAD
3508		mtx_lock(&t3_uld_list_lock);
3509		if (!SLIST_EMPTY(&t3_uld_list)) {
3510			rc = EBUSY;
3511			mtx_unlock(&t3_uld_list_lock);
3512			break;
3513		}
3514		mtx_unlock(&t3_uld_list_lock);
3515		mtx_destroy(&t3_uld_list_lock);
3516#endif
3517		mtx_lock(&t3_list_lock);
3518		if (!SLIST_EMPTY(&t3_list)) {
3519			rc = EBUSY;
3520			mtx_unlock(&t3_list_lock);
3521			break;
3522		}
3523		mtx_unlock(&t3_list_lock);
3524		mtx_destroy(&t3_list_lock);
3525		break;
3526	}
3527
3528	return (rc);
3529}
3530