cxgb_main.c revision 240680
1/**************************************************************************
2
3Copyright (c) 2007-2009, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13    contributors may be used to endorse or promote products derived from
14    this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_main.c 240680 2012-09-18 22:04:59Z gavin $");
32
33#include "opt_inet.h"
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/bus.h>
39#include <sys/module.h>
40#include <sys/pciio.h>
41#include <sys/conf.h>
42#include <machine/bus.h>
43#include <machine/resource.h>
44#include <sys/bus_dma.h>
45#include <sys/ktr.h>
46#include <sys/rman.h>
47#include <sys/ioccom.h>
48#include <sys/mbuf.h>
49#include <sys/linker.h>
50#include <sys/firmware.h>
51#include <sys/socket.h>
52#include <sys/sockio.h>
53#include <sys/smp.h>
54#include <sys/sysctl.h>
55#include <sys/syslog.h>
56#include <sys/queue.h>
57#include <sys/taskqueue.h>
58#include <sys/proc.h>
59
60#include <net/bpf.h>
61#include <net/ethernet.h>
62#include <net/if.h>
63#include <net/if_arp.h>
64#include <net/if_dl.h>
65#include <net/if_media.h>
66#include <net/if_types.h>
67#include <net/if_vlan_var.h>
68
69#include <netinet/in_systm.h>
70#include <netinet/in.h>
71#include <netinet/if_ether.h>
72#include <netinet/ip.h>
73#include <netinet/ip.h>
74#include <netinet/tcp.h>
75#include <netinet/udp.h>
76
77#include <dev/pci/pcireg.h>
78#include <dev/pci/pcivar.h>
79#include <dev/pci/pci_private.h>
80
81#include <cxgb_include.h>
82
83#ifdef PRIV_SUPPORTED
84#include <sys/priv.h>
85#endif
86
87static int cxgb_setup_interrupts(adapter_t *);
88static void cxgb_teardown_interrupts(adapter_t *);
89static void cxgb_init(void *);
90static int cxgb_init_locked(struct port_info *);
91static int cxgb_uninit_locked(struct port_info *);
92static int cxgb_uninit_synchronized(struct port_info *);
93static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
94static int cxgb_media_change(struct ifnet *);
95static int cxgb_ifm_type(int);
96static void cxgb_build_medialist(struct port_info *);
97static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
98static int setup_sge_qsets(adapter_t *);
99static void cxgb_async_intr(void *);
100static void cxgb_tick_handler(void *, int);
101static void cxgb_tick(void *);
102static void link_check_callout(void *);
103static void check_link_status(void *, int);
104static void setup_rss(adapter_t *sc);
105static int alloc_filters(struct adapter *);
106static int setup_hw_filters(struct adapter *);
107static int set_filter(struct adapter *, int, const struct filter_info *);
108static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
109    unsigned int, u64, u64);
110static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
111    unsigned int, u64, u64);
112#ifdef TCP_OFFLOAD
113static int cpl_not_handled(struct sge_qset *, struct rsp_desc *, struct mbuf *);
114#endif
115
116/* Attachment glue for the PCI controller end of the device.  Each port of
117 * the device is attached separately, as defined later.
118 */
119static int cxgb_controller_probe(device_t);
120static int cxgb_controller_attach(device_t);
121static int cxgb_controller_detach(device_t);
122static void cxgb_free(struct adapter *);
123static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
124    unsigned int end);
125static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
126static int cxgb_get_regs_len(void);
127static void touch_bars(device_t dev);
128static void cxgb_update_mac_settings(struct port_info *p);
129#ifdef TCP_OFFLOAD
130static int toe_capability(struct port_info *, int);
131#endif
132
133static device_method_t cxgb_controller_methods[] = {
134	DEVMETHOD(device_probe,		cxgb_controller_probe),
135	DEVMETHOD(device_attach,	cxgb_controller_attach),
136	DEVMETHOD(device_detach,	cxgb_controller_detach),
137
138	DEVMETHOD_END
139};
140
141static driver_t cxgb_controller_driver = {
142	"cxgbc",
143	cxgb_controller_methods,
144	sizeof(struct adapter)
145};
146
147static int cxgbc_mod_event(module_t, int, void *);
148static devclass_t	cxgb_controller_devclass;
149DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass,
150    cxgbc_mod_event, 0);
151MODULE_VERSION(cxgbc, 1);
152
153/*
154 * Attachment glue for the ports.  Attachment is done directly to the
155 * controller device.
156 */
157static int cxgb_port_probe(device_t);
158static int cxgb_port_attach(device_t);
159static int cxgb_port_detach(device_t);
160
161static device_method_t cxgb_port_methods[] = {
162	DEVMETHOD(device_probe,		cxgb_port_probe),
163	DEVMETHOD(device_attach,	cxgb_port_attach),
164	DEVMETHOD(device_detach,	cxgb_port_detach),
165	{ 0, 0 }
166};
167
168static driver_t cxgb_port_driver = {
169	"cxgb",
170	cxgb_port_methods,
171	0
172};
173
174static d_ioctl_t cxgb_extension_ioctl;
175static d_open_t cxgb_extension_open;
176static d_close_t cxgb_extension_close;
177
178static struct cdevsw cxgb_cdevsw = {
179       .d_version =    D_VERSION,
180       .d_flags =      0,
181       .d_open =       cxgb_extension_open,
182       .d_close =      cxgb_extension_close,
183       .d_ioctl =      cxgb_extension_ioctl,
184       .d_name =       "cxgb",
185};
186
187static devclass_t	cxgb_port_devclass;
188DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
189MODULE_VERSION(cxgb, 1);
190
191static struct mtx t3_list_lock;
192static SLIST_HEAD(, adapter) t3_list;
193#ifdef TCP_OFFLOAD
194static struct mtx t3_uld_list_lock;
195static SLIST_HEAD(, uld_info) t3_uld_list;
196#endif
197
198/*
199 * The driver uses the best interrupt scheme available on a platform in the
200 * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
201 * of these schemes the driver may consider as follows:
202 *
203 * msi = 2: choose from among all three options
204 * msi = 1 : only consider MSI and pin interrupts
205 * msi = 0: force pin interrupts
206 */
207static int msi_allowed = 2;
208
209TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
210SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
211SYSCTL_INT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
212    "MSI-X, MSI, INTx selector");
213
214/*
215 * The driver uses an auto-queue algorithm by default.
216 * To disable it and force a single queue-set per port, use multiq = 0
217 */
218static int multiq = 1;
219TUNABLE_INT("hw.cxgb.multiq", &multiq);
220SYSCTL_INT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
221    "use min(ncpus/ports, 8) queue-sets per port");
222
223/*
224 * By default the driver will not update the firmware unless
225 * it was compiled against a newer version
226 *
227 */
228static int force_fw_update = 0;
229TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
230SYSCTL_INT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
231    "update firmware even if up to date");
232
233int cxgb_use_16k_clusters = -1;
234TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
235SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
236    &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
237
238static int nfilters = -1;
239TUNABLE_INT("hw.cxgb.nfilters", &nfilters);
240SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
241    &nfilters, 0, "max number of entries in the filter table");
242
243enum {
244	MAX_TXQ_ENTRIES      = 16384,
245	MAX_CTRL_TXQ_ENTRIES = 1024,
246	MAX_RSPQ_ENTRIES     = 16384,
247	MAX_RX_BUFFERS       = 16384,
248	MAX_RX_JUMBO_BUFFERS = 16384,
249	MIN_TXQ_ENTRIES      = 4,
250	MIN_CTRL_TXQ_ENTRIES = 4,
251	MIN_RSPQ_ENTRIES     = 32,
252	MIN_FL_ENTRIES       = 32,
253	MIN_FL_JUMBO_ENTRIES = 32
254};
255
256struct filter_info {
257	u32 sip;
258	u32 sip_mask;
259	u32 dip;
260	u16 sport;
261	u16 dport;
262	u32 vlan:12;
263	u32 vlan_prio:3;
264	u32 mac_hit:1;
265	u32 mac_idx:4;
266	u32 mac_vld:1;
267	u32 pkt_type:2;
268	u32 report_filter_id:1;
269	u32 pass:1;
270	u32 rss:1;
271	u32 qset:3;
272	u32 locked:1;
273	u32 valid:1;
274};
275
276enum { FILTER_NO_VLAN_PRI = 7 };
277
278#define EEPROM_MAGIC 0x38E2F10C
279
280#define PORT_MASK ((1 << MAX_NPORTS) - 1)
281
282/* Table for probing the cards.  The desc field isn't actually used */
283struct cxgb_ident {
284	uint16_t	vendor;
285	uint16_t	device;
286	int		index;
287	char		*desc;
288} cxgb_identifiers[] = {
289	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
290	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
291	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
292	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
293	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
294	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
295	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
296	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
297	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
298	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
299	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
300	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
301	{PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
302	{PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
303	{0, 0, 0, NULL}
304};
305
306static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
307
308
309static __inline char
310t3rev2char(struct adapter *adapter)
311{
312	char rev = 'z';
313
314	switch(adapter->params.rev) {
315	case T3_REV_A:
316		rev = 'a';
317		break;
318	case T3_REV_B:
319	case T3_REV_B2:
320		rev = 'b';
321		break;
322	case T3_REV_C:
323		rev = 'c';
324		break;
325	}
326	return rev;
327}
328
329static struct cxgb_ident *
330cxgb_get_ident(device_t dev)
331{
332	struct cxgb_ident *id;
333
334	for (id = cxgb_identifiers; id->desc != NULL; id++) {
335		if ((id->vendor == pci_get_vendor(dev)) &&
336		    (id->device == pci_get_device(dev))) {
337			return (id);
338		}
339	}
340	return (NULL);
341}
342
343static const struct adapter_info *
344cxgb_get_adapter_info(device_t dev)
345{
346	struct cxgb_ident *id;
347	const struct adapter_info *ai;
348
349	id = cxgb_get_ident(dev);
350	if (id == NULL)
351		return (NULL);
352
353	ai = t3_get_adapter_info(id->index);
354
355	return (ai);
356}
357
358static int
359cxgb_controller_probe(device_t dev)
360{
361	const struct adapter_info *ai;
362	char *ports, buf[80];
363	int nports;
364
365	ai = cxgb_get_adapter_info(dev);
366	if (ai == NULL)
367		return (ENXIO);
368
369	nports = ai->nports0 + ai->nports1;
370	if (nports == 1)
371		ports = "port";
372	else
373		ports = "ports";
374
375	snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
376	device_set_desc_copy(dev, buf);
377	return (BUS_PROBE_DEFAULT);
378}
379
380#define FW_FNAME "cxgb_t3fw"
381#define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
382#define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
383
384static int
385upgrade_fw(adapter_t *sc)
386{
387	const struct firmware *fw;
388	int status;
389	u32 vers;
390
391	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
392		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
393		return (ENOENT);
394	} else
395		device_printf(sc->dev, "installing firmware on card\n");
396	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
397
398	if (status != 0) {
399		device_printf(sc->dev, "failed to install firmware: %d\n",
400		    status);
401	} else {
402		t3_get_fw_version(sc, &vers);
403		snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
404		    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
405		    G_FW_VERSION_MICRO(vers));
406	}
407
408	firmware_put(fw, FIRMWARE_UNLOAD);
409
410	return (status);
411}
412
413/*
414 * The cxgb_controller_attach function is responsible for the initial
415 * bringup of the device.  Its responsibilities include:
416 *
417 *  1. Determine if the device supports MSI or MSI-X.
418 *  2. Allocate bus resources so that we can access the Base Address Register
419 *  3. Create and initialize mutexes for the controller and its control
420 *     logic such as SGE and MDIO.
421 *  4. Call hardware specific setup routine for the adapter as a whole.
422 *  5. Allocate the BAR for doing MSI-X.
423 *  6. Setup the line interrupt iff MSI-X is not supported.
424 *  7. Create the driver's taskq.
425 *  8. Start one task queue service thread.
426 *  9. Check if the firmware and SRAM are up-to-date.  They will be
427 *     auto-updated later (before FULL_INIT_DONE), if required.
428 * 10. Create a child device for each MAC (port)
429 * 11. Initialize T3 private state.
430 * 12. Trigger the LED
431 * 13. Setup offload iff supported.
432 * 14. Reset/restart the tick callout.
433 * 15. Attach sysctls
434 *
435 * NOTE: Any modification or deviation from this list MUST be reflected in
436 * the above comment.  Failure to do so will result in problems on various
437 * error conditions including link flapping.
438 */
439static int
440cxgb_controller_attach(device_t dev)
441{
442	device_t child;
443	const struct adapter_info *ai;
444	struct adapter *sc;
445	int i, error = 0;
446	uint32_t vers;
447	int port_qsets = 1;
448	int msi_needed, reg;
449	char buf[80];
450
451	sc = device_get_softc(dev);
452	sc->dev = dev;
453	sc->msi_count = 0;
454	ai = cxgb_get_adapter_info(dev);
455
456	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
457	    device_get_unit(dev));
458	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
459
460	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
461	    device_get_unit(dev));
462	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
463	    device_get_unit(dev));
464	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
465	    device_get_unit(dev));
466
467	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
468	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
469	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
470
471	mtx_lock(&t3_list_lock);
472	SLIST_INSERT_HEAD(&t3_list, sc, link);
473	mtx_unlock(&t3_list_lock);
474
475	/* find the PCIe link width and set max read request to 4KB*/
476	if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
477		uint16_t lnk;
478
479		lnk = pci_read_config(dev, reg + PCIER_LINK_STA, 2);
480		sc->link_width = (lnk & PCIEM_LINK_STA_WIDTH) >> 4;
481		if (sc->link_width < 8 &&
482		    (ai->caps & SUPPORTED_10000baseT_Full)) {
483			device_printf(sc->dev,
484			    "PCIe x%d Link, expect reduced performance\n",
485			    sc->link_width);
486		}
487
488		pci_set_max_read_req(dev, 4096);
489	}
490
491	touch_bars(dev);
492	pci_enable_busmaster(dev);
493	/*
494	 * Allocate the registers and make them available to the driver.
495	 * The registers that we care about for NIC mode are in BAR 0
496	 */
497	sc->regs_rid = PCIR_BAR(0);
498	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
499	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
500		device_printf(dev, "Cannot allocate BAR region 0\n");
501		error = ENXIO;
502		goto out;
503	}
504
505	sc->bt = rman_get_bustag(sc->regs_res);
506	sc->bh = rman_get_bushandle(sc->regs_res);
507	sc->mmio_len = rman_get_size(sc->regs_res);
508
509	for (i = 0; i < MAX_NPORTS; i++)
510		sc->port[i].adapter = sc;
511
512	if (t3_prep_adapter(sc, ai, 1) < 0) {
513		printf("prep adapter failed\n");
514		error = ENODEV;
515		goto out;
516	}
517
518	sc->udbs_rid = PCIR_BAR(2);
519	sc->udbs_res = NULL;
520	if (is_offload(sc) &&
521	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
522		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
523		device_printf(dev, "Cannot allocate BAR region 1\n");
524		error = ENXIO;
525		goto out;
526	}
527
528        /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
529	 * enough messages for the queue sets.  If that fails, try falling
530	 * back to MSI.  If that fails, then try falling back to the legacy
531	 * interrupt pin model.
532	 */
533	sc->msix_regs_rid = 0x20;
534	if ((msi_allowed >= 2) &&
535	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
536	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
537
538		if (multiq)
539			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
540		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
541
542		if (pci_msix_count(dev) == 0 ||
543		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
544		    sc->msi_count != msi_needed) {
545			device_printf(dev, "alloc msix failed - "
546				      "msi_count=%d, msi_needed=%d, err=%d; "
547				      "will try MSI\n", sc->msi_count,
548				      msi_needed, error);
549			sc->msi_count = 0;
550			port_qsets = 1;
551			pci_release_msi(dev);
552			bus_release_resource(dev, SYS_RES_MEMORY,
553			    sc->msix_regs_rid, sc->msix_regs_res);
554			sc->msix_regs_res = NULL;
555		} else {
556			sc->flags |= USING_MSIX;
557			sc->cxgb_intr = cxgb_async_intr;
558			device_printf(dev,
559				      "using MSI-X interrupts (%u vectors)\n",
560				      sc->msi_count);
561		}
562	}
563
564	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
565		sc->msi_count = 1;
566		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
567			device_printf(dev, "alloc msi failed - "
568				      "err=%d; will try INTx\n", error);
569			sc->msi_count = 0;
570			port_qsets = 1;
571			pci_release_msi(dev);
572		} else {
573			sc->flags |= USING_MSI;
574			sc->cxgb_intr = t3_intr_msi;
575			device_printf(dev, "using MSI interrupts\n");
576		}
577	}
578	if (sc->msi_count == 0) {
579		device_printf(dev, "using line interrupts\n");
580		sc->cxgb_intr = t3b_intr;
581	}
582
583	/* Create a private taskqueue thread for handling driver events */
584	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
585	    taskqueue_thread_enqueue, &sc->tq);
586	if (sc->tq == NULL) {
587		device_printf(dev, "failed to allocate controller task queue\n");
588		goto out;
589	}
590
591	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
592	    device_get_nameunit(dev));
593	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
594
595
596	/* Create a periodic callout for checking adapter status */
597	callout_init(&sc->cxgb_tick_ch, TRUE);
598
599	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
600		/*
601		 * Warn user that a firmware update will be attempted in init.
602		 */
603		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
604		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
605		sc->flags &= ~FW_UPTODATE;
606	} else {
607		sc->flags |= FW_UPTODATE;
608	}
609
610	if (t3_check_tpsram_version(sc) < 0) {
611		/*
612		 * Warn user that a firmware update will be attempted in init.
613		 */
614		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
615		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
616		sc->flags &= ~TPS_UPTODATE;
617	} else {
618		sc->flags |= TPS_UPTODATE;
619	}
620
621	/*
622	 * Create a child device for each MAC.  The ethernet attachment
623	 * will be done in these children.
624	 */
625	for (i = 0; i < (sc)->params.nports; i++) {
626		struct port_info *pi;
627
628		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
629			device_printf(dev, "failed to add child port\n");
630			error = EINVAL;
631			goto out;
632		}
633		pi = &sc->port[i];
634		pi->adapter = sc;
635		pi->nqsets = port_qsets;
636		pi->first_qset = i*port_qsets;
637		pi->port_id = i;
638		pi->tx_chan = i >= ai->nports0;
639		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
640		sc->rxpkt_map[pi->txpkt_intf] = i;
641		sc->port[i].tx_chan = i >= ai->nports0;
642		sc->portdev[i] = child;
643		device_set_softc(child, pi);
644	}
645	if ((error = bus_generic_attach(dev)) != 0)
646		goto out;
647
648	/* initialize sge private state */
649	t3_sge_init_adapter(sc);
650
651	t3_led_ready(sc);
652
653	error = t3_get_fw_version(sc, &vers);
654	if (error)
655		goto out;
656
657	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
658	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
659	    G_FW_VERSION_MICRO(vers));
660
661	snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
662		 ai->desc, is_offload(sc) ? "R" : "",
663		 sc->params.vpd.ec, sc->params.vpd.sn);
664	device_set_desc_copy(dev, buf);
665
666	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
667		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
668		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
669
670	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
671	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
672	t3_add_attach_sysctls(sc);
673
674#ifdef TCP_OFFLOAD
675	for (i = 0; i < NUM_CPL_HANDLERS; i++)
676		sc->cpl_handler[i] = cpl_not_handled;
677#endif
678
679	t3_intr_clear(sc);
680	error = cxgb_setup_interrupts(sc);
681out:
682	if (error)
683		cxgb_free(sc);
684
685	return (error);
686}
687
688/*
689 * The cxgb_controller_detach routine is called with the device is
690 * unloaded from the system.
691 */
692
693static int
694cxgb_controller_detach(device_t dev)
695{
696	struct adapter *sc;
697
698	sc = device_get_softc(dev);
699
700	cxgb_free(sc);
701
702	return (0);
703}
704
705/*
706 * The cxgb_free() is called by the cxgb_controller_detach() routine
707 * to tear down the structures that were built up in
708 * cxgb_controller_attach(), and should be the final piece of work
709 * done when fully unloading the driver.
710 *
711 *
712 *  1. Shutting down the threads started by the cxgb_controller_attach()
713 *     routine.
714 *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
715 *  3. Detaching all of the port devices created during the
716 *     cxgb_controller_attach() routine.
717 *  4. Removing the device children created via cxgb_controller_attach().
718 *  5. Releasing PCI resources associated with the device.
719 *  6. Turning off the offload support, iff it was turned on.
720 *  7. Destroying the mutexes created in cxgb_controller_attach().
721 *
722 */
723static void
724cxgb_free(struct adapter *sc)
725{
726	int i, nqsets = 0;
727
728	ADAPTER_LOCK(sc);
729	sc->flags |= CXGB_SHUTDOWN;
730	ADAPTER_UNLOCK(sc);
731
732	/*
733	 * Make sure all child devices are gone.
734	 */
735	bus_generic_detach(sc->dev);
736	for (i = 0; i < (sc)->params.nports; i++) {
737		if (sc->portdev[i] &&
738		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
739			device_printf(sc->dev, "failed to delete child port\n");
740		nqsets += sc->port[i].nqsets;
741	}
742
743	/*
744	 * At this point, it is as if cxgb_port_detach has run on all ports, and
745	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
746	 * all open devices have been closed.
747	 */
748	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
749					   __func__, sc->open_device_map));
750	for (i = 0; i < sc->params.nports; i++) {
751		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
752						  __func__, i));
753	}
754
755	/*
756	 * Finish off the adapter's callouts.
757	 */
758	callout_drain(&sc->cxgb_tick_ch);
759	callout_drain(&sc->sge_timer_ch);
760
761	/*
762	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
763	 * sysctls are cleaned up by the kernel linker.
764	 */
765	if (sc->flags & FULL_INIT_DONE) {
766 		t3_free_sge_resources(sc, nqsets);
767 		sc->flags &= ~FULL_INIT_DONE;
768 	}
769
770	/*
771	 * Release all interrupt resources.
772	 */
773	cxgb_teardown_interrupts(sc);
774	if (sc->flags & (USING_MSI | USING_MSIX)) {
775		device_printf(sc->dev, "releasing msi message(s)\n");
776		pci_release_msi(sc->dev);
777	} else {
778		device_printf(sc->dev, "no msi message to release\n");
779	}
780
781	if (sc->msix_regs_res != NULL) {
782		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
783		    sc->msix_regs_res);
784	}
785
786	/*
787	 * Free the adapter's taskqueue.
788	 */
789	if (sc->tq != NULL) {
790		taskqueue_free(sc->tq);
791		sc->tq = NULL;
792	}
793
794	free(sc->filters, M_DEVBUF);
795	t3_sge_free(sc);
796
797	if (sc->udbs_res != NULL)
798		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
799		    sc->udbs_res);
800
801	if (sc->regs_res != NULL)
802		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
803		    sc->regs_res);
804
805	MTX_DESTROY(&sc->mdio_lock);
806	MTX_DESTROY(&sc->sge.reg_lock);
807	MTX_DESTROY(&sc->elmer_lock);
808	mtx_lock(&t3_list_lock);
809	SLIST_REMOVE(&t3_list, sc, adapter, link);
810	mtx_unlock(&t3_list_lock);
811	ADAPTER_LOCK_DEINIT(sc);
812}
813
814/**
815 *	setup_sge_qsets - configure SGE Tx/Rx/response queues
816 *	@sc: the controller softc
817 *
818 *	Determines how many sets of SGE queues to use and initializes them.
819 *	We support multiple queue sets per port if we have MSI-X, otherwise
820 *	just one queue set per port.
821 */
822static int
823setup_sge_qsets(adapter_t *sc)
824{
825	int i, j, err, irq_idx = 0, qset_idx = 0;
826	u_int ntxq = SGE_TXQ_PER_SET;
827
828	if ((err = t3_sge_alloc(sc)) != 0) {
829		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
830		return (err);
831	}
832
833	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
834		irq_idx = -1;
835
836	for (i = 0; i < (sc)->params.nports; i++) {
837		struct port_info *pi = &sc->port[i];
838
839		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
840			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
841			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
842			    &sc->params.sge.qset[qset_idx], ntxq, pi);
843			if (err) {
844				t3_free_sge_resources(sc, qset_idx);
845				device_printf(sc->dev,
846				    "t3_sge_alloc_qset failed with %d\n", err);
847				return (err);
848			}
849		}
850	}
851
852	return (0);
853}
854
855static void
856cxgb_teardown_interrupts(adapter_t *sc)
857{
858	int i;
859
860	for (i = 0; i < SGE_QSETS; i++) {
861		if (sc->msix_intr_tag[i] == NULL) {
862
863			/* Should have been setup fully or not at all */
864			KASSERT(sc->msix_irq_res[i] == NULL &&
865				sc->msix_irq_rid[i] == 0,
866				("%s: half-done interrupt (%d).", __func__, i));
867
868			continue;
869		}
870
871		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
872				  sc->msix_intr_tag[i]);
873		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
874				     sc->msix_irq_res[i]);
875
876		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
877		sc->msix_irq_rid[i] = 0;
878	}
879
880	if (sc->intr_tag) {
881		KASSERT(sc->irq_res != NULL,
882			("%s: half-done interrupt.", __func__));
883
884		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
885		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
886				     sc->irq_res);
887
888		sc->irq_res = sc->intr_tag = NULL;
889		sc->irq_rid = 0;
890	}
891}
892
893static int
894cxgb_setup_interrupts(adapter_t *sc)
895{
896	struct resource *res;
897	void *tag;
898	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
899
900	sc->irq_rid = intr_flag ? 1 : 0;
901	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
902					     RF_SHAREABLE | RF_ACTIVE);
903	if (sc->irq_res == NULL) {
904		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
905			      intr_flag, sc->irq_rid);
906		err = EINVAL;
907		sc->irq_rid = 0;
908	} else {
909		err = bus_setup_intr(sc->dev, sc->irq_res,
910		    INTR_MPSAFE | INTR_TYPE_NET, NULL,
911		    sc->cxgb_intr, sc, &sc->intr_tag);
912
913		if (err) {
914			device_printf(sc->dev,
915				      "Cannot set up interrupt (%x, %u, %d)\n",
916				      intr_flag, sc->irq_rid, err);
917			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
918					     sc->irq_res);
919			sc->irq_res = sc->intr_tag = NULL;
920			sc->irq_rid = 0;
921		}
922	}
923
924	/* That's all for INTx or MSI */
925	if (!(intr_flag & USING_MSIX) || err)
926		return (err);
927
928	bus_describe_intr(sc->dev, sc->irq_res, sc->intr_tag, "err");
929	for (i = 0; i < sc->msi_count - 1; i++) {
930		rid = i + 2;
931		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
932					     RF_SHAREABLE | RF_ACTIVE);
933		if (res == NULL) {
934			device_printf(sc->dev, "Cannot allocate interrupt "
935				      "for message %d\n", rid);
936			err = EINVAL;
937			break;
938		}
939
940		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
941				     NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
942		if (err) {
943			device_printf(sc->dev, "Cannot set up interrupt "
944				      "for message %d (%d)\n", rid, err);
945			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
946			break;
947		}
948
949		sc->msix_irq_rid[i] = rid;
950		sc->msix_irq_res[i] = res;
951		sc->msix_intr_tag[i] = tag;
952		bus_describe_intr(sc->dev, res, tag, "qs%d", i);
953	}
954
955	if (err)
956		cxgb_teardown_interrupts(sc);
957
958	return (err);
959}
960
961
962static int
963cxgb_port_probe(device_t dev)
964{
965	struct port_info *p;
966	char buf[80];
967	const char *desc;
968
969	p = device_get_softc(dev);
970	desc = p->phy.desc;
971	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
972	device_set_desc_copy(dev, buf);
973	return (0);
974}
975
976
977static int
978cxgb_makedev(struct port_info *pi)
979{
980
981	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
982	    UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
983
984	if (pi->port_cdev == NULL)
985		return (ENOMEM);
986
987	pi->port_cdev->si_drv1 = (void *)pi;
988
989	return (0);
990}
991
992#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
993    IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
994    IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6)
995#define CXGB_CAP_ENABLE CXGB_CAP
996
997static int
998cxgb_port_attach(device_t dev)
999{
1000	struct port_info *p;
1001	struct ifnet *ifp;
1002	int err;
1003	struct adapter *sc;
1004
1005	p = device_get_softc(dev);
1006	sc = p->adapter;
1007	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1008	    device_get_unit(device_get_parent(dev)), p->port_id);
1009	PORT_LOCK_INIT(p, p->lockbuf);
1010
1011	callout_init(&p->link_check_ch, CALLOUT_MPSAFE);
1012	TASK_INIT(&p->link_check_task, 0, check_link_status, p);
1013
1014	/* Allocate an ifnet object and set it up */
1015	ifp = p->ifp = if_alloc(IFT_ETHER);
1016	if (ifp == NULL) {
1017		device_printf(dev, "Cannot allocate ifnet\n");
1018		return (ENOMEM);
1019	}
1020
1021	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1022	ifp->if_init = cxgb_init;
1023	ifp->if_softc = p;
1024	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1025	ifp->if_ioctl = cxgb_ioctl;
1026	ifp->if_transmit = cxgb_transmit;
1027	ifp->if_qflush = cxgb_qflush;
1028
1029	ifp->if_capabilities = CXGB_CAP;
1030#ifdef TCP_OFFLOAD
1031	if (is_offload(sc))
1032		ifp->if_capabilities |= IFCAP_TOE4;
1033#endif
1034	ifp->if_capenable = CXGB_CAP_ENABLE;
1035	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
1036	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
1037
1038	/*
1039	 * Disable TSO on 4-port - it isn't supported by the firmware.
1040	 */
1041	if (sc->params.nports > 2) {
1042		ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1043		ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1044		ifp->if_hwassist &= ~CSUM_TSO;
1045	}
1046
1047	ether_ifattach(ifp, p->hw_addr);
1048
1049#ifdef DEFAULT_JUMBO
1050	if (sc->params.nports <= 2)
1051		ifp->if_mtu = ETHERMTU_JUMBO;
1052#endif
1053	if ((err = cxgb_makedev(p)) != 0) {
1054		printf("makedev failed %d\n", err);
1055		return (err);
1056	}
1057
1058	/* Create a list of media supported by this port */
1059	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1060	    cxgb_media_status);
1061	cxgb_build_medialist(p);
1062
1063	t3_sge_init_port(p);
1064
1065	return (err);
1066}
1067
1068/*
1069 * cxgb_port_detach() is called via the device_detach methods when
1070 * cxgb_free() calls the bus_generic_detach.  It is responsible for
1071 * removing the device from the view of the kernel, i.e. from all
1072 * interfaces lists etc.  This routine is only called when the driver is
1073 * being unloaded, not when the link goes down.
1074 */
1075static int
1076cxgb_port_detach(device_t dev)
1077{
1078	struct port_info *p;
1079	struct adapter *sc;
1080	int i;
1081
1082	p = device_get_softc(dev);
1083	sc = p->adapter;
1084
1085	/* Tell cxgb_ioctl and if_init that the port is going away */
1086	ADAPTER_LOCK(sc);
1087	SET_DOOMED(p);
1088	wakeup(&sc->flags);
1089	while (IS_BUSY(sc))
1090		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1091	SET_BUSY(sc);
1092	ADAPTER_UNLOCK(sc);
1093
1094	if (p->port_cdev != NULL)
1095		destroy_dev(p->port_cdev);
1096
1097	cxgb_uninit_synchronized(p);
1098	ether_ifdetach(p->ifp);
1099
1100	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1101		struct sge_qset *qs = &sc->sge.qs[i];
1102		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1103
1104		callout_drain(&txq->txq_watchdog);
1105		callout_drain(&txq->txq_timer);
1106	}
1107
1108	PORT_LOCK_DEINIT(p);
1109	if_free(p->ifp);
1110	p->ifp = NULL;
1111
1112	ADAPTER_LOCK(sc);
1113	CLR_BUSY(sc);
1114	wakeup_one(&sc->flags);
1115	ADAPTER_UNLOCK(sc);
1116	return (0);
1117}
1118
1119void
1120t3_fatal_err(struct adapter *sc)
1121{
1122	u_int fw_status[4];
1123
1124	if (sc->flags & FULL_INIT_DONE) {
1125		t3_sge_stop(sc);
1126		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1127		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1128		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1129		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1130		t3_intr_disable(sc);
1131	}
1132	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1133	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1134		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1135		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1136}
1137
1138int
1139t3_os_find_pci_capability(adapter_t *sc, int cap)
1140{
1141	device_t dev;
1142	struct pci_devinfo *dinfo;
1143	pcicfgregs *cfg;
1144	uint32_t status;
1145	uint8_t ptr;
1146
1147	dev = sc->dev;
1148	dinfo = device_get_ivars(dev);
1149	cfg = &dinfo->cfg;
1150
1151	status = pci_read_config(dev, PCIR_STATUS, 2);
1152	if (!(status & PCIM_STATUS_CAPPRESENT))
1153		return (0);
1154
1155	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1156	case 0:
1157	case 1:
1158		ptr = PCIR_CAP_PTR;
1159		break;
1160	case 2:
1161		ptr = PCIR_CAP_PTR_2;
1162		break;
1163	default:
1164		return (0);
1165		break;
1166	}
1167	ptr = pci_read_config(dev, ptr, 1);
1168
1169	while (ptr != 0) {
1170		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1171			return (ptr);
1172		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1173	}
1174
1175	return (0);
1176}
1177
1178int
1179t3_os_pci_save_state(struct adapter *sc)
1180{
1181	device_t dev;
1182	struct pci_devinfo *dinfo;
1183
1184	dev = sc->dev;
1185	dinfo = device_get_ivars(dev);
1186
1187	pci_cfg_save(dev, dinfo, 0);
1188	return (0);
1189}
1190
1191int
1192t3_os_pci_restore_state(struct adapter *sc)
1193{
1194	device_t dev;
1195	struct pci_devinfo *dinfo;
1196
1197	dev = sc->dev;
1198	dinfo = device_get_ivars(dev);
1199
1200	pci_cfg_restore(dev, dinfo);
1201	return (0);
1202}
1203
1204/**
1205 *	t3_os_link_changed - handle link status changes
1206 *	@sc: the adapter associated with the link change
1207 *	@port_id: the port index whose link status has changed
1208 *	@link_status: the new status of the link
1209 *	@speed: the new speed setting
1210 *	@duplex: the new duplex setting
1211 *	@fc: the new flow-control setting
1212 *
1213 *	This is the OS-dependent handler for link status changes.  The OS
1214 *	neutral handler takes care of most of the processing for these events,
1215 *	then calls this handler for any OS-specific processing.
1216 */
1217void
1218t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1219     int duplex, int fc, int mac_was_reset)
1220{
1221	struct port_info *pi = &adapter->port[port_id];
1222	struct ifnet *ifp = pi->ifp;
1223
1224	/* no race with detach, so ifp should always be good */
1225	KASSERT(ifp, ("%s: if detached.", __func__));
1226
1227	/* Reapply mac settings if they were lost due to a reset */
1228	if (mac_was_reset) {
1229		PORT_LOCK(pi);
1230		cxgb_update_mac_settings(pi);
1231		PORT_UNLOCK(pi);
1232	}
1233
1234	if (link_status) {
1235		ifp->if_baudrate = IF_Mbps(speed);
1236		if_link_state_change(ifp, LINK_STATE_UP);
1237	} else
1238		if_link_state_change(ifp, LINK_STATE_DOWN);
1239}
1240
1241/**
1242 *	t3_os_phymod_changed - handle PHY module changes
1243 *	@phy: the PHY reporting the module change
1244 *	@mod_type: new module type
1245 *
1246 *	This is the OS-dependent handler for PHY module changes.  It is
1247 *	invoked when a PHY module is removed or inserted for any OS-specific
1248 *	processing.
1249 */
1250void t3_os_phymod_changed(struct adapter *adap, int port_id)
1251{
1252	static const char *mod_str[] = {
1253		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1254	};
1255	struct port_info *pi = &adap->port[port_id];
1256	int mod = pi->phy.modtype;
1257
1258	if (mod != pi->media.ifm_cur->ifm_data)
1259		cxgb_build_medialist(pi);
1260
1261	if (mod == phy_modtype_none)
1262		if_printf(pi->ifp, "PHY module unplugged\n");
1263	else {
1264		KASSERT(mod < ARRAY_SIZE(mod_str),
1265			("invalid PHY module type %d", mod));
1266		if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1267	}
1268}
1269
1270void
1271t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1272{
1273
1274	/*
1275	 * The ifnet might not be allocated before this gets called,
1276	 * as this is called early on in attach by t3_prep_adapter
1277	 * save the address off in the port structure
1278	 */
1279	if (cxgb_debug)
1280		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1281	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1282}
1283
1284/*
1285 * Programs the XGMAC based on the settings in the ifnet.  These settings
1286 * include MTU, MAC address, mcast addresses, etc.
1287 */
1288static void
1289cxgb_update_mac_settings(struct port_info *p)
1290{
1291	struct ifnet *ifp = p->ifp;
1292	struct t3_rx_mode rm;
1293	struct cmac *mac = &p->mac;
1294	int mtu, hwtagging;
1295
1296	PORT_LOCK_ASSERT_OWNED(p);
1297
1298	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1299
1300	mtu = ifp->if_mtu;
1301	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1302		mtu += ETHER_VLAN_ENCAP_LEN;
1303
1304	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1305
1306	t3_mac_set_mtu(mac, mtu);
1307	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1308	t3_mac_set_address(mac, 0, p->hw_addr);
1309	t3_init_rx_mode(&rm, p);
1310	t3_mac_set_rx_mode(mac, &rm);
1311}
1312
1313
1314static int
1315await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1316			      unsigned long n)
1317{
1318	int attempts = 5;
1319
1320	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1321		if (!--attempts)
1322			return (ETIMEDOUT);
1323		t3_os_sleep(10);
1324	}
1325	return 0;
1326}
1327
1328static int
1329init_tp_parity(struct adapter *adap)
1330{
1331	int i;
1332	struct mbuf *m;
1333	struct cpl_set_tcb_field *greq;
1334	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1335
1336	t3_tp_set_offload_mode(adap, 1);
1337
1338	for (i = 0; i < 16; i++) {
1339		struct cpl_smt_write_req *req;
1340
1341		m = m_gethdr(M_WAITOK, MT_DATA);
1342		req = mtod(m, struct cpl_smt_write_req *);
1343		m->m_len = m->m_pkthdr.len = sizeof(*req);
1344		memset(req, 0, sizeof(*req));
1345		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1346		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1347		req->iff = i;
1348		t3_mgmt_tx(adap, m);
1349	}
1350
1351	for (i = 0; i < 2048; i++) {
1352		struct cpl_l2t_write_req *req;
1353
1354		m = m_gethdr(M_WAITOK, MT_DATA);
1355		req = mtod(m, struct cpl_l2t_write_req *);
1356		m->m_len = m->m_pkthdr.len = sizeof(*req);
1357		memset(req, 0, sizeof(*req));
1358		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1359		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1360		req->params = htonl(V_L2T_W_IDX(i));
1361		t3_mgmt_tx(adap, m);
1362	}
1363
1364	for (i = 0; i < 2048; i++) {
1365		struct cpl_rte_write_req *req;
1366
1367		m = m_gethdr(M_WAITOK, MT_DATA);
1368		req = mtod(m, struct cpl_rte_write_req *);
1369		m->m_len = m->m_pkthdr.len = sizeof(*req);
1370		memset(req, 0, sizeof(*req));
1371		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1372		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1373		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1374		t3_mgmt_tx(adap, m);
1375	}
1376
1377	m = m_gethdr(M_WAITOK, MT_DATA);
1378	greq = mtod(m, struct cpl_set_tcb_field *);
1379	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1380	memset(greq, 0, sizeof(*greq));
1381	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1382	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1383	greq->mask = htobe64(1);
1384	t3_mgmt_tx(adap, m);
1385
1386	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1387	t3_tp_set_offload_mode(adap, 0);
1388	return (i);
1389}
1390
1391/**
1392 *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1393 *	@adap: the adapter
1394 *
1395 *	Sets up RSS to distribute packets to multiple receive queues.  We
1396 *	configure the RSS CPU lookup table to distribute to the number of HW
1397 *	receive queues, and the response queue lookup table to narrow that
1398 *	down to the response queues actually configured for each port.
1399 *	We always configure the RSS mapping for two ports since the mapping
1400 *	table has plenty of entries.
1401 */
1402static void
1403setup_rss(adapter_t *adap)
1404{
1405	int i;
1406	u_int nq[2];
1407	uint8_t cpus[SGE_QSETS + 1];
1408	uint16_t rspq_map[RSS_TABLE_SIZE];
1409
1410	for (i = 0; i < SGE_QSETS; ++i)
1411		cpus[i] = i;
1412	cpus[SGE_QSETS] = 0xff;
1413
1414	nq[0] = nq[1] = 0;
1415	for_each_port(adap, i) {
1416		const struct port_info *pi = adap2pinfo(adap, i);
1417
1418		nq[pi->tx_chan] += pi->nqsets;
1419	}
1420	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1421		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1422		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1423	}
1424
1425	/* Calculate the reverse RSS map table */
1426	for (i = 0; i < SGE_QSETS; ++i)
1427		adap->rrss_map[i] = 0xff;
1428	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1429		if (adap->rrss_map[rspq_map[i]] == 0xff)
1430			adap->rrss_map[rspq_map[i]] = i;
1431
1432	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1433		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1434	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1435	              cpus, rspq_map);
1436
1437}
1438static void
1439send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1440			      int hi, int port)
1441{
1442	struct mbuf *m;
1443	struct mngt_pktsched_wr *req;
1444
1445	m = m_gethdr(M_DONTWAIT, MT_DATA);
1446	if (m) {
1447		req = mtod(m, struct mngt_pktsched_wr *);
1448		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1449		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1450		req->sched = sched;
1451		req->idx = qidx;
1452		req->min = lo;
1453		req->max = hi;
1454		req->binding = port;
1455		m->m_len = m->m_pkthdr.len = sizeof(*req);
1456		t3_mgmt_tx(adap, m);
1457	}
1458}
1459
1460static void
1461bind_qsets(adapter_t *sc)
1462{
1463	int i, j;
1464
1465	for (i = 0; i < (sc)->params.nports; ++i) {
1466		const struct port_info *pi = adap2pinfo(sc, i);
1467
1468		for (j = 0; j < pi->nqsets; ++j) {
1469			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1470					  -1, pi->tx_chan);
1471
1472		}
1473	}
1474}
1475
1476static void
1477update_tpeeprom(struct adapter *adap)
1478{
1479	const struct firmware *tpeeprom;
1480
1481	uint32_t version;
1482	unsigned int major, minor;
1483	int ret, len;
1484	char rev, name[32];
1485
1486	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1487
1488	major = G_TP_VERSION_MAJOR(version);
1489	minor = G_TP_VERSION_MINOR(version);
1490	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1491		return;
1492
1493	rev = t3rev2char(adap);
1494	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1495
1496	tpeeprom = firmware_get(name);
1497	if (tpeeprom == NULL) {
1498		device_printf(adap->dev,
1499			      "could not load TP EEPROM: unable to load %s\n",
1500			      name);
1501		return;
1502	}
1503
1504	len = tpeeprom->datasize - 4;
1505
1506	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1507	if (ret)
1508		goto release_tpeeprom;
1509
1510	if (len != TP_SRAM_LEN) {
1511		device_printf(adap->dev,
1512			      "%s length is wrong len=%d expected=%d\n", name,
1513			      len, TP_SRAM_LEN);
1514		return;
1515	}
1516
1517	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1518	    TP_SRAM_OFFSET);
1519
1520	if (!ret) {
1521		device_printf(adap->dev,
1522			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1523			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1524	} else
1525		device_printf(adap->dev,
1526			      "Protocol SRAM image update in EEPROM failed\n");
1527
1528release_tpeeprom:
1529	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1530
1531	return;
1532}
1533
1534static int
1535update_tpsram(struct adapter *adap)
1536{
1537	const struct firmware *tpsram;
1538	int ret;
1539	char rev, name[32];
1540
1541	rev = t3rev2char(adap);
1542	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1543
1544	update_tpeeprom(adap);
1545
1546	tpsram = firmware_get(name);
1547	if (tpsram == NULL){
1548		device_printf(adap->dev, "could not load TP SRAM\n");
1549		return (EINVAL);
1550	} else
1551		device_printf(adap->dev, "updating TP SRAM\n");
1552
1553	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1554	if (ret)
1555		goto release_tpsram;
1556
1557	ret = t3_set_proto_sram(adap, tpsram->data);
1558	if (ret)
1559		device_printf(adap->dev, "loading protocol SRAM failed\n");
1560
1561release_tpsram:
1562	firmware_put(tpsram, FIRMWARE_UNLOAD);
1563
1564	return ret;
1565}
1566
1567/**
1568 *	cxgb_up - enable the adapter
1569 *	@adap: adapter being enabled
1570 *
1571 *	Called when the first port is enabled, this function performs the
1572 *	actions necessary to make an adapter operational, such as completing
1573 *	the initialization of HW modules, and enabling interrupts.
1574 */
1575static int
1576cxgb_up(struct adapter *sc)
1577{
1578	int err = 0;
1579	unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1580
1581	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1582					   __func__, sc->open_device_map));
1583
1584	if ((sc->flags & FULL_INIT_DONE) == 0) {
1585
1586		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1587
1588		if ((sc->flags & FW_UPTODATE) == 0)
1589			if ((err = upgrade_fw(sc)))
1590				goto out;
1591
1592		if ((sc->flags & TPS_UPTODATE) == 0)
1593			if ((err = update_tpsram(sc)))
1594				goto out;
1595
1596		if (is_offload(sc) && nfilters != 0) {
1597			sc->params.mc5.nservers = 0;
1598
1599			if (nfilters < 0)
1600				sc->params.mc5.nfilters = mxf;
1601			else
1602				sc->params.mc5.nfilters = min(nfilters, mxf);
1603		}
1604
1605		err = t3_init_hw(sc, 0);
1606		if (err)
1607			goto out;
1608
1609		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1610		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1611
1612		err = setup_sge_qsets(sc);
1613		if (err)
1614			goto out;
1615
1616		alloc_filters(sc);
1617		setup_rss(sc);
1618
1619		t3_add_configured_sysctls(sc);
1620		sc->flags |= FULL_INIT_DONE;
1621	}
1622
1623	t3_intr_clear(sc);
1624	t3_sge_start(sc);
1625	t3_intr_enable(sc);
1626
1627	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1628	    is_offload(sc) && init_tp_parity(sc) == 0)
1629		sc->flags |= TP_PARITY_INIT;
1630
1631	if (sc->flags & TP_PARITY_INIT) {
1632		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1633		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1634	}
1635
1636	if (!(sc->flags & QUEUES_BOUND)) {
1637		bind_qsets(sc);
1638		setup_hw_filters(sc);
1639		sc->flags |= QUEUES_BOUND;
1640	}
1641
1642	t3_sge_reset_adapter(sc);
1643out:
1644	return (err);
1645}
1646
1647/*
1648 * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1649 * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1650 * during controller_detach, not here.
1651 */
1652static void
1653cxgb_down(struct adapter *sc)
1654{
1655	t3_sge_stop(sc);
1656	t3_intr_disable(sc);
1657}
1658
1659/*
1660 * if_init for cxgb ports.
1661 */
1662static void
1663cxgb_init(void *arg)
1664{
1665	struct port_info *p = arg;
1666	struct adapter *sc = p->adapter;
1667
1668	ADAPTER_LOCK(sc);
1669	cxgb_init_locked(p); /* releases adapter lock */
1670	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1671}
1672
1673static int
1674cxgb_init_locked(struct port_info *p)
1675{
1676	struct adapter *sc = p->adapter;
1677	struct ifnet *ifp = p->ifp;
1678	struct cmac *mac = &p->mac;
1679	int i, rc = 0, may_sleep = 0, gave_up_lock = 0;
1680
1681	ADAPTER_LOCK_ASSERT_OWNED(sc);
1682
1683	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1684		gave_up_lock = 1;
1685		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1686			rc = EINTR;
1687			goto done;
1688		}
1689	}
1690	if (IS_DOOMED(p)) {
1691		rc = ENXIO;
1692		goto done;
1693	}
1694	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1695
1696	/*
1697	 * The code that runs during one-time adapter initialization can sleep
1698	 * so it's important not to hold any locks across it.
1699	 */
1700	may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1701
1702	if (may_sleep) {
1703		SET_BUSY(sc);
1704		gave_up_lock = 1;
1705		ADAPTER_UNLOCK(sc);
1706	}
1707
1708	if (sc->open_device_map == 0 && ((rc = cxgb_up(sc)) != 0))
1709			goto done;
1710
1711	PORT_LOCK(p);
1712	if (isset(&sc->open_device_map, p->port_id) &&
1713	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1714		PORT_UNLOCK(p);
1715		goto done;
1716	}
1717	t3_port_intr_enable(sc, p->port_id);
1718	if (!mac->multiport)
1719		t3_mac_init(mac);
1720	cxgb_update_mac_settings(p);
1721	t3_link_start(&p->phy, mac, &p->link_config);
1722	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1723	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1724	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1725	PORT_UNLOCK(p);
1726
1727	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1728		struct sge_qset *qs = &sc->sge.qs[i];
1729		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1730
1731		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1732				 txq->txq_watchdog.c_cpu);
1733	}
1734
1735	/* all ok */
1736	setbit(&sc->open_device_map, p->port_id);
1737	callout_reset(&p->link_check_ch,
1738	    p->phy.caps & SUPPORTED_LINK_IRQ ?  hz * 3 : hz / 4,
1739	    link_check_callout, p);
1740
1741done:
1742	if (may_sleep) {
1743		ADAPTER_LOCK(sc);
1744		KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1745		CLR_BUSY(sc);
1746	}
1747	if (gave_up_lock)
1748		wakeup_one(&sc->flags);
1749	ADAPTER_UNLOCK(sc);
1750	return (rc);
1751}
1752
1753static int
1754cxgb_uninit_locked(struct port_info *p)
1755{
1756	struct adapter *sc = p->adapter;
1757	int rc;
1758
1759	ADAPTER_LOCK_ASSERT_OWNED(sc);
1760
1761	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1762		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1763			rc = EINTR;
1764			goto done;
1765		}
1766	}
1767	if (IS_DOOMED(p)) {
1768		rc = ENXIO;
1769		goto done;
1770	}
1771	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1772	SET_BUSY(sc);
1773	ADAPTER_UNLOCK(sc);
1774
1775	rc = cxgb_uninit_synchronized(p);
1776
1777	ADAPTER_LOCK(sc);
1778	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1779	CLR_BUSY(sc);
1780	wakeup_one(&sc->flags);
1781done:
1782	ADAPTER_UNLOCK(sc);
1783	return (rc);
1784}
1785
1786/*
1787 * Called on "ifconfig down", and from port_detach
1788 */
1789static int
1790cxgb_uninit_synchronized(struct port_info *pi)
1791{
1792	struct adapter *sc = pi->adapter;
1793	struct ifnet *ifp = pi->ifp;
1794
1795	/*
1796	 * taskqueue_drain may cause a deadlock if the adapter lock is held.
1797	 */
1798	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1799
1800	/*
1801	 * Clear this port's bit from the open device map, and then drain all
1802	 * the tasks that can access/manipulate this port's port_info or ifp.
1803	 * We disable this port's interrupts here and so the slow/ext
1804	 * interrupt tasks won't be enqueued.  The tick task will continue to
1805	 * be enqueued every second but the runs after this drain will not see
1806	 * this port in the open device map.
1807	 *
1808	 * A well behaved task must take open_device_map into account and ignore
1809	 * ports that are not open.
1810	 */
1811	clrbit(&sc->open_device_map, pi->port_id);
1812	t3_port_intr_disable(sc, pi->port_id);
1813	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1814	taskqueue_drain(sc->tq, &sc->tick_task);
1815
1816	callout_drain(&pi->link_check_ch);
1817	taskqueue_drain(sc->tq, &pi->link_check_task);
1818
1819	PORT_LOCK(pi);
1820	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1821
1822	/* disable pause frames */
1823	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1824
1825	/* Reset RX FIFO HWM */
1826	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1827			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1828
1829	DELAY(100 * 1000);
1830
1831	/* Wait for TXFIFO empty */
1832	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1833			F_TXFIFO_EMPTY, 1, 20, 5);
1834
1835	DELAY(100 * 1000);
1836	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1837
1838	pi->phy.ops->power_down(&pi->phy, 1);
1839
1840	PORT_UNLOCK(pi);
1841
1842	pi->link_config.link_ok = 0;
1843	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1844
1845	if (sc->open_device_map == 0)
1846		cxgb_down(pi->adapter);
1847
1848	return (0);
1849}
1850
1851/*
1852 * Mark lro enabled or disabled in all qsets for this port
1853 */
1854static int
1855cxgb_set_lro(struct port_info *p, int enabled)
1856{
1857	int i;
1858	struct adapter *adp = p->adapter;
1859	struct sge_qset *q;
1860
1861	for (i = 0; i < p->nqsets; i++) {
1862		q = &adp->sge.qs[p->first_qset + i];
1863		q->lro.enabled = (enabled != 0);
1864	}
1865	return (0);
1866}
1867
1868static int
1869cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1870{
1871	struct port_info *p = ifp->if_softc;
1872	struct adapter *sc = p->adapter;
1873	struct ifreq *ifr = (struct ifreq *)data;
1874	int flags, error = 0, mtu;
1875	uint32_t mask;
1876
1877	switch (command) {
1878	case SIOCSIFMTU:
1879		ADAPTER_LOCK(sc);
1880		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1881		if (error) {
1882fail:
1883			ADAPTER_UNLOCK(sc);
1884			return (error);
1885		}
1886
1887		mtu = ifr->ifr_mtu;
1888		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
1889			error = EINVAL;
1890		} else {
1891			ifp->if_mtu = mtu;
1892			PORT_LOCK(p);
1893			cxgb_update_mac_settings(p);
1894			PORT_UNLOCK(p);
1895		}
1896		ADAPTER_UNLOCK(sc);
1897		break;
1898	case SIOCSIFFLAGS:
1899		ADAPTER_LOCK(sc);
1900		if (IS_DOOMED(p)) {
1901			error = ENXIO;
1902			goto fail;
1903		}
1904		if (ifp->if_flags & IFF_UP) {
1905			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1906				flags = p->if_flags;
1907				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1908				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
1909					if (IS_BUSY(sc)) {
1910						error = EBUSY;
1911						goto fail;
1912					}
1913					PORT_LOCK(p);
1914					cxgb_update_mac_settings(p);
1915					PORT_UNLOCK(p);
1916				}
1917				ADAPTER_UNLOCK(sc);
1918			} else
1919				error = cxgb_init_locked(p);
1920			p->if_flags = ifp->if_flags;
1921		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1922			error = cxgb_uninit_locked(p);
1923		else
1924			ADAPTER_UNLOCK(sc);
1925
1926		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1927		break;
1928	case SIOCADDMULTI:
1929	case SIOCDELMULTI:
1930		ADAPTER_LOCK(sc);
1931		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1932		if (error)
1933			goto fail;
1934
1935		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1936			PORT_LOCK(p);
1937			cxgb_update_mac_settings(p);
1938			PORT_UNLOCK(p);
1939		}
1940		ADAPTER_UNLOCK(sc);
1941
1942		break;
1943	case SIOCSIFCAP:
1944		ADAPTER_LOCK(sc);
1945		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1946		if (error)
1947			goto fail;
1948
1949		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1950		if (mask & IFCAP_TXCSUM) {
1951			ifp->if_capenable ^= IFCAP_TXCSUM;
1952			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
1953
1954			if (IFCAP_TSO4 & ifp->if_capenable &&
1955			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1956				ifp->if_capenable &= ~IFCAP_TSO4;
1957				if_printf(ifp,
1958				    "tso4 disabled due to -txcsum.\n");
1959			}
1960		}
1961		if (mask & IFCAP_TXCSUM_IPV6) {
1962			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1963			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
1964
1965			if (IFCAP_TSO6 & ifp->if_capenable &&
1966			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1967				ifp->if_capenable &= ~IFCAP_TSO6;
1968				if_printf(ifp,
1969				    "tso6 disabled due to -txcsum6.\n");
1970			}
1971		}
1972		if (mask & IFCAP_RXCSUM)
1973			ifp->if_capenable ^= IFCAP_RXCSUM;
1974		if (mask & IFCAP_RXCSUM_IPV6)
1975			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1976
1977		/*
1978		 * Note that we leave CSUM_TSO alone (it is always set).  The
1979		 * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
1980		 * sending a TSO request our way, so it's sufficient to toggle
1981		 * IFCAP_TSOx only.
1982		 */
1983		if (mask & IFCAP_TSO4) {
1984			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
1985			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1986				if_printf(ifp, "enable txcsum first.\n");
1987				error = EAGAIN;
1988				goto fail;
1989			}
1990			ifp->if_capenable ^= IFCAP_TSO4;
1991		}
1992		if (mask & IFCAP_TSO6) {
1993			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
1994			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1995				if_printf(ifp, "enable txcsum6 first.\n");
1996				error = EAGAIN;
1997				goto fail;
1998			}
1999			ifp->if_capenable ^= IFCAP_TSO6;
2000		}
2001		if (mask & IFCAP_LRO) {
2002			ifp->if_capenable ^= IFCAP_LRO;
2003
2004			/* Safe to do this even if cxgb_up not called yet */
2005			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2006		}
2007#ifdef TCP_OFFLOAD
2008		if (mask & IFCAP_TOE4) {
2009			int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE4;
2010
2011			error = toe_capability(p, enable);
2012			if (error == 0)
2013				ifp->if_capenable ^= mask;
2014		}
2015#endif
2016		if (mask & IFCAP_VLAN_HWTAGGING) {
2017			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2018			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2019				PORT_LOCK(p);
2020				cxgb_update_mac_settings(p);
2021				PORT_UNLOCK(p);
2022			}
2023		}
2024		if (mask & IFCAP_VLAN_MTU) {
2025			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2026			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2027				PORT_LOCK(p);
2028				cxgb_update_mac_settings(p);
2029				PORT_UNLOCK(p);
2030			}
2031		}
2032		if (mask & IFCAP_VLAN_HWTSO)
2033			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2034		if (mask & IFCAP_VLAN_HWCSUM)
2035			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2036
2037#ifdef VLAN_CAPABILITIES
2038		VLAN_CAPABILITIES(ifp);
2039#endif
2040		ADAPTER_UNLOCK(sc);
2041		break;
2042	case SIOCSIFMEDIA:
2043	case SIOCGIFMEDIA:
2044		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2045		break;
2046	default:
2047		error = ether_ioctl(ifp, command, data);
2048	}
2049
2050	return (error);
2051}
2052
2053static int
2054cxgb_media_change(struct ifnet *ifp)
2055{
2056	return (EOPNOTSUPP);
2057}
2058
2059/*
2060 * Translates phy->modtype to the correct Ethernet media subtype.
2061 */
2062static int
2063cxgb_ifm_type(int mod)
2064{
2065	switch (mod) {
2066	case phy_modtype_sr:
2067		return (IFM_10G_SR);
2068	case phy_modtype_lr:
2069		return (IFM_10G_LR);
2070	case phy_modtype_lrm:
2071		return (IFM_10G_LRM);
2072	case phy_modtype_twinax:
2073		return (IFM_10G_TWINAX);
2074	case phy_modtype_twinax_long:
2075		return (IFM_10G_TWINAX_LONG);
2076	case phy_modtype_none:
2077		return (IFM_NONE);
2078	case phy_modtype_unknown:
2079		return (IFM_UNKNOWN);
2080	}
2081
2082	KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2083	return (IFM_UNKNOWN);
2084}
2085
2086/*
2087 * Rebuilds the ifmedia list for this port, and sets the current media.
2088 */
2089static void
2090cxgb_build_medialist(struct port_info *p)
2091{
2092	struct cphy *phy = &p->phy;
2093	struct ifmedia *media = &p->media;
2094	int mod = phy->modtype;
2095	int m = IFM_ETHER | IFM_FDX;
2096
2097	PORT_LOCK(p);
2098
2099	ifmedia_removeall(media);
2100	if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2101		/* Copper (RJ45) */
2102
2103		if (phy->caps & SUPPORTED_10000baseT_Full)
2104			ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2105
2106		if (phy->caps & SUPPORTED_1000baseT_Full)
2107			ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2108
2109		if (phy->caps & SUPPORTED_100baseT_Full)
2110			ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2111
2112		if (phy->caps & SUPPORTED_10baseT_Full)
2113			ifmedia_add(media, m | IFM_10_T, mod, NULL);
2114
2115		ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2116		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2117
2118	} else if (phy->caps & SUPPORTED_TP) {
2119		/* Copper (CX4) */
2120
2121		KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2122			("%s: unexpected cap 0x%x", __func__, phy->caps));
2123
2124		ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2125		ifmedia_set(media, m | IFM_10G_CX4);
2126
2127	} else if (phy->caps & SUPPORTED_FIBRE &&
2128		   phy->caps & SUPPORTED_10000baseT_Full) {
2129		/* 10G optical (but includes SFP+ twinax) */
2130
2131		m |= cxgb_ifm_type(mod);
2132		if (IFM_SUBTYPE(m) == IFM_NONE)
2133			m &= ~IFM_FDX;
2134
2135		ifmedia_add(media, m, mod, NULL);
2136		ifmedia_set(media, m);
2137
2138	} else if (phy->caps & SUPPORTED_FIBRE &&
2139		   phy->caps & SUPPORTED_1000baseT_Full) {
2140		/* 1G optical */
2141
2142		/* XXX: Lie and claim to be SX, could actually be any 1G-X */
2143		ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2144		ifmedia_set(media, m | IFM_1000_SX);
2145
2146	} else {
2147		KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2148			    phy->caps));
2149	}
2150
2151	PORT_UNLOCK(p);
2152}
2153
2154static void
2155cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2156{
2157	struct port_info *p = ifp->if_softc;
2158	struct ifmedia_entry *cur = p->media.ifm_cur;
2159	int speed = p->link_config.speed;
2160
2161	if (cur->ifm_data != p->phy.modtype) {
2162		cxgb_build_medialist(p);
2163		cur = p->media.ifm_cur;
2164	}
2165
2166	ifmr->ifm_status = IFM_AVALID;
2167	if (!p->link_config.link_ok)
2168		return;
2169
2170	ifmr->ifm_status |= IFM_ACTIVE;
2171
2172	/*
2173	 * active and current will differ iff current media is autoselect.  That
2174	 * can happen only for copper RJ45.
2175	 */
2176	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2177		return;
2178	KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2179		("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2180
2181	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2182	if (speed == SPEED_10000)
2183		ifmr->ifm_active |= IFM_10G_T;
2184	else if (speed == SPEED_1000)
2185		ifmr->ifm_active |= IFM_1000_T;
2186	else if (speed == SPEED_100)
2187		ifmr->ifm_active |= IFM_100_TX;
2188	else if (speed == SPEED_10)
2189		ifmr->ifm_active |= IFM_10_T;
2190	else
2191		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2192			    speed));
2193}
2194
2195static void
2196cxgb_async_intr(void *data)
2197{
2198	adapter_t *sc = data;
2199
2200	t3_write_reg(sc, A_PL_INT_ENABLE0, 0);
2201	(void) t3_read_reg(sc, A_PL_INT_ENABLE0);
2202	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2203}
2204
2205static void
2206link_check_callout(void *arg)
2207{
2208	struct port_info *pi = arg;
2209	struct adapter *sc = pi->adapter;
2210
2211	if (!isset(&sc->open_device_map, pi->port_id))
2212		return;
2213
2214	taskqueue_enqueue(sc->tq, &pi->link_check_task);
2215}
2216
2217static void
2218check_link_status(void *arg, int pending)
2219{
2220	struct port_info *pi = arg;
2221	struct adapter *sc = pi->adapter;
2222
2223	if (!isset(&sc->open_device_map, pi->port_id))
2224		return;
2225
2226	t3_link_changed(sc, pi->port_id);
2227
2228	if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ))
2229		callout_reset(&pi->link_check_ch, hz, link_check_callout, pi);
2230}
2231
2232void
2233t3_os_link_intr(struct port_info *pi)
2234{
2235	/*
2236	 * Schedule a link check in the near future.  If the link is flapping
2237	 * rapidly we'll keep resetting the callout and delaying the check until
2238	 * things stabilize a bit.
2239	 */
2240	callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi);
2241}
2242
2243static void
2244check_t3b2_mac(struct adapter *sc)
2245{
2246	int i;
2247
2248	if (sc->flags & CXGB_SHUTDOWN)
2249		return;
2250
2251	for_each_port(sc, i) {
2252		struct port_info *p = &sc->port[i];
2253		int status;
2254#ifdef INVARIANTS
2255		struct ifnet *ifp = p->ifp;
2256#endif
2257
2258		if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2259		    !p->link_config.link_ok)
2260			continue;
2261
2262		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2263			("%s: state mismatch (drv_flags %x, device_map %x)",
2264			 __func__, ifp->if_drv_flags, sc->open_device_map));
2265
2266		PORT_LOCK(p);
2267		status = t3b2_mac_watchdog_task(&p->mac);
2268		if (status == 1)
2269			p->mac.stats.num_toggled++;
2270		else if (status == 2) {
2271			struct cmac *mac = &p->mac;
2272
2273			cxgb_update_mac_settings(p);
2274			t3_link_start(&p->phy, mac, &p->link_config);
2275			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2276			t3_port_intr_enable(sc, p->port_id);
2277			p->mac.stats.num_resets++;
2278		}
2279		PORT_UNLOCK(p);
2280	}
2281}
2282
2283static void
2284cxgb_tick(void *arg)
2285{
2286	adapter_t *sc = (adapter_t *)arg;
2287
2288	if (sc->flags & CXGB_SHUTDOWN)
2289		return;
2290
2291	taskqueue_enqueue(sc->tq, &sc->tick_task);
2292	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2293}
2294
2295static void
2296cxgb_tick_handler(void *arg, int count)
2297{
2298	adapter_t *sc = (adapter_t *)arg;
2299	const struct adapter_params *p = &sc->params;
2300	int i;
2301	uint32_t cause, reset;
2302
2303	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2304		return;
2305
2306	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2307		check_t3b2_mac(sc);
2308
2309	cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2310	if (cause) {
2311		struct sge_qset *qs = &sc->sge.qs[0];
2312		uint32_t mask, v;
2313
2314		v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2315
2316		mask = 1;
2317		for (i = 0; i < SGE_QSETS; i++) {
2318			if (v & mask)
2319				qs[i].rspq.starved++;
2320			mask <<= 1;
2321		}
2322
2323		mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2324
2325		for (i = 0; i < SGE_QSETS * 2; i++) {
2326			if (v & mask) {
2327				qs[i / 2].fl[i % 2].empty++;
2328			}
2329			mask <<= 1;
2330		}
2331
2332		/* clear */
2333		t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2334		t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2335	}
2336
2337	for (i = 0; i < sc->params.nports; i++) {
2338		struct port_info *pi = &sc->port[i];
2339		struct ifnet *ifp = pi->ifp;
2340		struct cmac *mac = &pi->mac;
2341		struct mac_stats *mstats = &mac->stats;
2342		int drops, j;
2343
2344		if (!isset(&sc->open_device_map, pi->port_id))
2345			continue;
2346
2347		PORT_LOCK(pi);
2348		t3_mac_update_stats(mac);
2349		PORT_UNLOCK(pi);
2350
2351		ifp->if_opackets = mstats->tx_frames;
2352		ifp->if_ipackets = mstats->rx_frames;
2353		ifp->if_obytes = mstats->tx_octets;
2354		ifp->if_ibytes = mstats->rx_octets;
2355		ifp->if_omcasts = mstats->tx_mcast_frames;
2356		ifp->if_imcasts = mstats->rx_mcast_frames;
2357		ifp->if_collisions = mstats->tx_total_collisions;
2358		ifp->if_iqdrops = mstats->rx_cong_drops;
2359
2360		drops = 0;
2361		for (j = pi->first_qset; j < pi->first_qset + pi->nqsets; j++)
2362			drops += sc->sge.qs[j].txq[TXQ_ETH].txq_mr->br_drops;
2363		ifp->if_snd.ifq_drops = drops;
2364
2365		ifp->if_oerrors =
2366		    mstats->tx_excess_collisions +
2367		    mstats->tx_underrun +
2368		    mstats->tx_len_errs +
2369		    mstats->tx_mac_internal_errs +
2370		    mstats->tx_excess_deferral +
2371		    mstats->tx_fcs_errs;
2372		ifp->if_ierrors =
2373		    mstats->rx_jabber +
2374		    mstats->rx_data_errs +
2375		    mstats->rx_sequence_errs +
2376		    mstats->rx_runt +
2377		    mstats->rx_too_long +
2378		    mstats->rx_mac_internal_errs +
2379		    mstats->rx_short +
2380		    mstats->rx_fcs_errs;
2381
2382		if (mac->multiport)
2383			continue;
2384
2385		/* Count rx fifo overflows, once per second */
2386		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2387		reset = 0;
2388		if (cause & F_RXFIFO_OVERFLOW) {
2389			mac->stats.rx_fifo_ovfl++;
2390			reset |= F_RXFIFO_OVERFLOW;
2391		}
2392		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2393	}
2394}
2395
2396static void
2397touch_bars(device_t dev)
2398{
2399	/*
2400	 * Don't enable yet
2401	 */
2402#if !defined(__LP64__) && 0
2403	u32 v;
2404
2405	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2406	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2407	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2408	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2409	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2410	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2411#endif
2412}
2413
2414static int
2415set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2416{
2417	uint8_t *buf;
2418	int err = 0;
2419	u32 aligned_offset, aligned_len, *p;
2420	struct adapter *adapter = pi->adapter;
2421
2422
2423	aligned_offset = offset & ~3;
2424	aligned_len = (len + (offset & 3) + 3) & ~3;
2425
2426	if (aligned_offset != offset || aligned_len != len) {
2427		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2428		if (!buf)
2429			return (ENOMEM);
2430		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2431		if (!err && aligned_len > 4)
2432			err = t3_seeprom_read(adapter,
2433					      aligned_offset + aligned_len - 4,
2434					      (u32 *)&buf[aligned_len - 4]);
2435		if (err)
2436			goto out;
2437		memcpy(buf + (offset & 3), data, len);
2438	} else
2439		buf = (uint8_t *)(uintptr_t)data;
2440
2441	err = t3_seeprom_wp(adapter, 0);
2442	if (err)
2443		goto out;
2444
2445	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2446		err = t3_seeprom_write(adapter, aligned_offset, *p);
2447		aligned_offset += 4;
2448	}
2449
2450	if (!err)
2451		err = t3_seeprom_wp(adapter, 1);
2452out:
2453	if (buf != data)
2454		free(buf, M_DEVBUF);
2455	return err;
2456}
2457
2458
2459static int
2460in_range(int val, int lo, int hi)
2461{
2462	return val < 0 || (val <= hi && val >= lo);
2463}
2464
2465static int
2466cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2467{
2468       return (0);
2469}
2470
2471static int
2472cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2473{
2474       return (0);
2475}
2476
2477static int
2478cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2479    int fflag, struct thread *td)
2480{
2481	int mmd, error = 0;
2482	struct port_info *pi = dev->si_drv1;
2483	adapter_t *sc = pi->adapter;
2484
2485#ifdef PRIV_SUPPORTED
2486	if (priv_check(td, PRIV_DRIVER)) {
2487		if (cxgb_debug)
2488			printf("user does not have access to privileged ioctls\n");
2489		return (EPERM);
2490	}
2491#else
2492	if (suser(td)) {
2493		if (cxgb_debug)
2494			printf("user does not have access to privileged ioctls\n");
2495		return (EPERM);
2496	}
2497#endif
2498
2499	switch (cmd) {
2500	case CHELSIO_GET_MIIREG: {
2501		uint32_t val;
2502		struct cphy *phy = &pi->phy;
2503		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2504
2505		if (!phy->mdio_read)
2506			return (EOPNOTSUPP);
2507		if (is_10G(sc)) {
2508			mmd = mid->phy_id >> 8;
2509			if (!mmd)
2510				mmd = MDIO_DEV_PCS;
2511			else if (mmd > MDIO_DEV_VEND2)
2512				return (EINVAL);
2513
2514			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2515					     mid->reg_num, &val);
2516		} else
2517		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2518					     mid->reg_num & 0x1f, &val);
2519		if (error == 0)
2520			mid->val_out = val;
2521		break;
2522	}
2523	case CHELSIO_SET_MIIREG: {
2524		struct cphy *phy = &pi->phy;
2525		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2526
2527		if (!phy->mdio_write)
2528			return (EOPNOTSUPP);
2529		if (is_10G(sc)) {
2530			mmd = mid->phy_id >> 8;
2531			if (!mmd)
2532				mmd = MDIO_DEV_PCS;
2533			else if (mmd > MDIO_DEV_VEND2)
2534				return (EINVAL);
2535
2536			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2537					      mmd, mid->reg_num, mid->val_in);
2538		} else
2539			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2540					      mid->reg_num & 0x1f,
2541					      mid->val_in);
2542		break;
2543	}
2544	case CHELSIO_SETREG: {
2545		struct ch_reg *edata = (struct ch_reg *)data;
2546		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2547			return (EFAULT);
2548		t3_write_reg(sc, edata->addr, edata->val);
2549		break;
2550	}
2551	case CHELSIO_GETREG: {
2552		struct ch_reg *edata = (struct ch_reg *)data;
2553		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2554			return (EFAULT);
2555		edata->val = t3_read_reg(sc, edata->addr);
2556		break;
2557	}
2558	case CHELSIO_GET_SGE_CONTEXT: {
2559		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2560		mtx_lock_spin(&sc->sge.reg_lock);
2561		switch (ecntxt->cntxt_type) {
2562		case CNTXT_TYPE_EGRESS:
2563			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2564			    ecntxt->data);
2565			break;
2566		case CNTXT_TYPE_FL:
2567			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2568			    ecntxt->data);
2569			break;
2570		case CNTXT_TYPE_RSP:
2571			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2572			    ecntxt->data);
2573			break;
2574		case CNTXT_TYPE_CQ:
2575			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2576			    ecntxt->data);
2577			break;
2578		default:
2579			error = EINVAL;
2580			break;
2581		}
2582		mtx_unlock_spin(&sc->sge.reg_lock);
2583		break;
2584	}
2585	case CHELSIO_GET_SGE_DESC: {
2586		struct ch_desc *edesc = (struct ch_desc *)data;
2587		int ret;
2588		if (edesc->queue_num >= SGE_QSETS * 6)
2589			return (EINVAL);
2590		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2591		    edesc->queue_num % 6, edesc->idx, edesc->data);
2592		if (ret < 0)
2593			return (EINVAL);
2594		edesc->size = ret;
2595		break;
2596	}
2597	case CHELSIO_GET_QSET_PARAMS: {
2598		struct qset_params *q;
2599		struct ch_qset_params *t = (struct ch_qset_params *)data;
2600		int q1 = pi->first_qset;
2601		int nqsets = pi->nqsets;
2602		int i;
2603
2604		if (t->qset_idx >= nqsets)
2605			return EINVAL;
2606
2607		i = q1 + t->qset_idx;
2608		q = &sc->params.sge.qset[i];
2609		t->rspq_size   = q->rspq_size;
2610		t->txq_size[0] = q->txq_size[0];
2611		t->txq_size[1] = q->txq_size[1];
2612		t->txq_size[2] = q->txq_size[2];
2613		t->fl_size[0]  = q->fl_size;
2614		t->fl_size[1]  = q->jumbo_size;
2615		t->polling     = q->polling;
2616		t->lro         = q->lro;
2617		t->intr_lat    = q->coalesce_usecs;
2618		t->cong_thres  = q->cong_thres;
2619		t->qnum        = i;
2620
2621		if ((sc->flags & FULL_INIT_DONE) == 0)
2622			t->vector = 0;
2623		else if (sc->flags & USING_MSIX)
2624			t->vector = rman_get_start(sc->msix_irq_res[i]);
2625		else
2626			t->vector = rman_get_start(sc->irq_res);
2627
2628		break;
2629	}
2630	case CHELSIO_GET_QSET_NUM: {
2631		struct ch_reg *edata = (struct ch_reg *)data;
2632		edata->val = pi->nqsets;
2633		break;
2634	}
2635	case CHELSIO_LOAD_FW: {
2636		uint8_t *fw_data;
2637		uint32_t vers;
2638		struct ch_mem_range *t = (struct ch_mem_range *)data;
2639
2640		/*
2641		 * You're allowed to load a firmware only before FULL_INIT_DONE
2642		 *
2643		 * FW_UPTODATE is also set so the rest of the initialization
2644		 * will not overwrite what was loaded here.  This gives you the
2645		 * flexibility to load any firmware (and maybe shoot yourself in
2646		 * the foot).
2647		 */
2648
2649		ADAPTER_LOCK(sc);
2650		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2651			ADAPTER_UNLOCK(sc);
2652			return (EBUSY);
2653		}
2654
2655		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2656		if (!fw_data)
2657			error = ENOMEM;
2658		else
2659			error = copyin(t->buf, fw_data, t->len);
2660
2661		if (!error)
2662			error = -t3_load_fw(sc, fw_data, t->len);
2663
2664		if (t3_get_fw_version(sc, &vers) == 0) {
2665			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2666			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2667			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2668		}
2669
2670		if (!error)
2671			sc->flags |= FW_UPTODATE;
2672
2673		free(fw_data, M_DEVBUF);
2674		ADAPTER_UNLOCK(sc);
2675		break;
2676	}
2677	case CHELSIO_LOAD_BOOT: {
2678		uint8_t *boot_data;
2679		struct ch_mem_range *t = (struct ch_mem_range *)data;
2680
2681		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2682		if (!boot_data)
2683			return ENOMEM;
2684
2685		error = copyin(t->buf, boot_data, t->len);
2686		if (!error)
2687			error = -t3_load_boot(sc, boot_data, t->len);
2688
2689		free(boot_data, M_DEVBUF);
2690		break;
2691	}
2692	case CHELSIO_GET_PM: {
2693		struct ch_pm *m = (struct ch_pm *)data;
2694		struct tp_params *p = &sc->params.tp;
2695
2696		if (!is_offload(sc))
2697			return (EOPNOTSUPP);
2698
2699		m->tx_pg_sz = p->tx_pg_size;
2700		m->tx_num_pg = p->tx_num_pgs;
2701		m->rx_pg_sz  = p->rx_pg_size;
2702		m->rx_num_pg = p->rx_num_pgs;
2703		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2704
2705		break;
2706	}
2707	case CHELSIO_SET_PM: {
2708		struct ch_pm *m = (struct ch_pm *)data;
2709		struct tp_params *p = &sc->params.tp;
2710
2711		if (!is_offload(sc))
2712			return (EOPNOTSUPP);
2713		if (sc->flags & FULL_INIT_DONE)
2714			return (EBUSY);
2715
2716		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2717		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2718			return (EINVAL);	/* not power of 2 */
2719		if (!(m->rx_pg_sz & 0x14000))
2720			return (EINVAL);	/* not 16KB or 64KB */
2721		if (!(m->tx_pg_sz & 0x1554000))
2722			return (EINVAL);
2723		if (m->tx_num_pg == -1)
2724			m->tx_num_pg = p->tx_num_pgs;
2725		if (m->rx_num_pg == -1)
2726			m->rx_num_pg = p->rx_num_pgs;
2727		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2728			return (EINVAL);
2729		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2730		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2731			return (EINVAL);
2732
2733		p->rx_pg_size = m->rx_pg_sz;
2734		p->tx_pg_size = m->tx_pg_sz;
2735		p->rx_num_pgs = m->rx_num_pg;
2736		p->tx_num_pgs = m->tx_num_pg;
2737		break;
2738	}
2739	case CHELSIO_SETMTUTAB: {
2740		struct ch_mtus *m = (struct ch_mtus *)data;
2741		int i;
2742
2743		if (!is_offload(sc))
2744			return (EOPNOTSUPP);
2745		if (offload_running(sc))
2746			return (EBUSY);
2747		if (m->nmtus != NMTUS)
2748			return (EINVAL);
2749		if (m->mtus[0] < 81)         /* accommodate SACK */
2750			return (EINVAL);
2751
2752		/*
2753		 * MTUs must be in ascending order
2754		 */
2755		for (i = 1; i < NMTUS; ++i)
2756			if (m->mtus[i] < m->mtus[i - 1])
2757				return (EINVAL);
2758
2759		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2760		break;
2761	}
2762	case CHELSIO_GETMTUTAB: {
2763		struct ch_mtus *m = (struct ch_mtus *)data;
2764
2765		if (!is_offload(sc))
2766			return (EOPNOTSUPP);
2767
2768		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2769		m->nmtus = NMTUS;
2770		break;
2771	}
2772	case CHELSIO_GET_MEM: {
2773		struct ch_mem_range *t = (struct ch_mem_range *)data;
2774		struct mc7 *mem;
2775		uint8_t *useraddr;
2776		u64 buf[32];
2777
2778		/*
2779		 * Use these to avoid modifying len/addr in the return
2780		 * struct
2781		 */
2782		uint32_t len = t->len, addr = t->addr;
2783
2784		if (!is_offload(sc))
2785			return (EOPNOTSUPP);
2786		if (!(sc->flags & FULL_INIT_DONE))
2787			return (EIO);         /* need the memory controllers */
2788		if ((addr & 0x7) || (len & 0x7))
2789			return (EINVAL);
2790		if (t->mem_id == MEM_CM)
2791			mem = &sc->cm;
2792		else if (t->mem_id == MEM_PMRX)
2793			mem = &sc->pmrx;
2794		else if (t->mem_id == MEM_PMTX)
2795			mem = &sc->pmtx;
2796		else
2797			return (EINVAL);
2798
2799		/*
2800		 * Version scheme:
2801		 * bits 0..9: chip version
2802		 * bits 10..15: chip revision
2803		 */
2804		t->version = 3 | (sc->params.rev << 10);
2805
2806		/*
2807		 * Read 256 bytes at a time as len can be large and we don't
2808		 * want to use huge intermediate buffers.
2809		 */
2810		useraddr = (uint8_t *)t->buf;
2811		while (len) {
2812			unsigned int chunk = min(len, sizeof(buf));
2813
2814			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2815			if (error)
2816				return (-error);
2817			if (copyout(buf, useraddr, chunk))
2818				return (EFAULT);
2819			useraddr += chunk;
2820			addr += chunk;
2821			len -= chunk;
2822		}
2823		break;
2824	}
2825	case CHELSIO_READ_TCAM_WORD: {
2826		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2827
2828		if (!is_offload(sc))
2829			return (EOPNOTSUPP);
2830		if (!(sc->flags & FULL_INIT_DONE))
2831			return (EIO);         /* need MC5 */
2832		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2833		break;
2834	}
2835	case CHELSIO_SET_TRACE_FILTER: {
2836		struct ch_trace *t = (struct ch_trace *)data;
2837		const struct trace_params *tp;
2838
2839		tp = (const struct trace_params *)&t->sip;
2840		if (t->config_tx)
2841			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2842					       t->trace_tx);
2843		if (t->config_rx)
2844			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2845					       t->trace_rx);
2846		break;
2847	}
2848	case CHELSIO_SET_PKTSCHED: {
2849		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2850		if (sc->open_device_map == 0)
2851			return (EAGAIN);
2852		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2853		    p->binding);
2854		break;
2855	}
2856	case CHELSIO_IFCONF_GETREGS: {
2857		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2858		int reglen = cxgb_get_regs_len();
2859		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2860		if (buf == NULL) {
2861			return (ENOMEM);
2862		}
2863		if (regs->len > reglen)
2864			regs->len = reglen;
2865		else if (regs->len < reglen)
2866			error = ENOBUFS;
2867
2868		if (!error) {
2869			cxgb_get_regs(sc, regs, buf);
2870			error = copyout(buf, regs->data, reglen);
2871		}
2872		free(buf, M_DEVBUF);
2873
2874		break;
2875	}
2876	case CHELSIO_SET_HW_SCHED: {
2877		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2878		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2879
2880		if ((sc->flags & FULL_INIT_DONE) == 0)
2881			return (EAGAIN);       /* need TP to be initialized */
2882		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2883		    !in_range(t->channel, 0, 1) ||
2884		    !in_range(t->kbps, 0, 10000000) ||
2885		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2886		    !in_range(t->flow_ipg, 0,
2887			      dack_ticks_to_usec(sc, 0x7ff)))
2888			return (EINVAL);
2889
2890		if (t->kbps >= 0) {
2891			error = t3_config_sched(sc, t->kbps, t->sched);
2892			if (error < 0)
2893				return (-error);
2894		}
2895		if (t->class_ipg >= 0)
2896			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2897		if (t->flow_ipg >= 0) {
2898			t->flow_ipg *= 1000;     /* us -> ns */
2899			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2900		}
2901		if (t->mode >= 0) {
2902			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2903
2904			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2905					 bit, t->mode ? bit : 0);
2906		}
2907		if (t->channel >= 0)
2908			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2909					 1 << t->sched, t->channel << t->sched);
2910		break;
2911	}
2912	case CHELSIO_GET_EEPROM: {
2913		int i;
2914		struct ch_eeprom *e = (struct ch_eeprom *)data;
2915		uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2916
2917		if (buf == NULL) {
2918			return (ENOMEM);
2919		}
2920		e->magic = EEPROM_MAGIC;
2921		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
2922			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
2923
2924		if (!error)
2925			error = copyout(buf + e->offset, e->data, e->len);
2926
2927		free(buf, M_DEVBUF);
2928		break;
2929	}
2930	case CHELSIO_CLEAR_STATS: {
2931		if (!(sc->flags & FULL_INIT_DONE))
2932			return EAGAIN;
2933
2934		PORT_LOCK(pi);
2935		t3_mac_update_stats(&pi->mac);
2936		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
2937		PORT_UNLOCK(pi);
2938		break;
2939	}
2940	case CHELSIO_GET_UP_LA: {
2941		struct ch_up_la *la = (struct ch_up_la *)data;
2942		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
2943		if (buf == NULL) {
2944			return (ENOMEM);
2945		}
2946		if (la->bufsize < LA_BUFSIZE)
2947			error = ENOBUFS;
2948
2949		if (!error)
2950			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
2951					      &la->bufsize, buf);
2952		if (!error)
2953			error = copyout(buf, la->data, la->bufsize);
2954
2955		free(buf, M_DEVBUF);
2956		break;
2957	}
2958	case CHELSIO_GET_UP_IOQS: {
2959		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
2960		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
2961		uint32_t *v;
2962
2963		if (buf == NULL) {
2964			return (ENOMEM);
2965		}
2966		if (ioqs->bufsize < IOQS_BUFSIZE)
2967			error = ENOBUFS;
2968
2969		if (!error)
2970			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
2971
2972		if (!error) {
2973			v = (uint32_t *)buf;
2974
2975			ioqs->ioq_rx_enable = *v++;
2976			ioqs->ioq_tx_enable = *v++;
2977			ioqs->ioq_rx_status = *v++;
2978			ioqs->ioq_tx_status = *v++;
2979
2980			error = copyout(v, ioqs->data, ioqs->bufsize);
2981		}
2982
2983		free(buf, M_DEVBUF);
2984		break;
2985	}
2986	case CHELSIO_SET_FILTER: {
2987		struct ch_filter *f = (struct ch_filter *)data;;
2988		struct filter_info *p;
2989		unsigned int nfilters = sc->params.mc5.nfilters;
2990
2991		if (!is_offload(sc))
2992			return (EOPNOTSUPP);	/* No TCAM */
2993		if (!(sc->flags & FULL_INIT_DONE))
2994			return (EAGAIN);	/* mc5 not setup yet */
2995		if (nfilters == 0)
2996			return (EBUSY);		/* TOE will use TCAM */
2997
2998		/* sanity checks */
2999		if (f->filter_id >= nfilters ||
3000		    (f->val.dip && f->mask.dip != 0xffffffff) ||
3001		    (f->val.sport && f->mask.sport != 0xffff) ||
3002		    (f->val.dport && f->mask.dport != 0xffff) ||
3003		    (f->val.vlan && f->mask.vlan != 0xfff) ||
3004		    (f->val.vlan_prio &&
3005			f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3006		    (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3007		    f->qset >= SGE_QSETS ||
3008		    sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3009			return (EINVAL);
3010
3011		/* Was allocated with M_WAITOK */
3012		KASSERT(sc->filters, ("filter table NULL\n"));
3013
3014		p = &sc->filters[f->filter_id];
3015		if (p->locked)
3016			return (EPERM);
3017
3018		bzero(p, sizeof(*p));
3019		p->sip = f->val.sip;
3020		p->sip_mask = f->mask.sip;
3021		p->dip = f->val.dip;
3022		p->sport = f->val.sport;
3023		p->dport = f->val.dport;
3024		p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3025		p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3026		    FILTER_NO_VLAN_PRI;
3027		p->mac_hit = f->mac_hit;
3028		p->mac_vld = f->mac_addr_idx != 0xffff;
3029		p->mac_idx = f->mac_addr_idx;
3030		p->pkt_type = f->proto;
3031		p->report_filter_id = f->want_filter_id;
3032		p->pass = f->pass;
3033		p->rss = f->rss;
3034		p->qset = f->qset;
3035
3036		error = set_filter(sc, f->filter_id, p);
3037		if (error == 0)
3038			p->valid = 1;
3039		break;
3040	}
3041	case CHELSIO_DEL_FILTER: {
3042		struct ch_filter *f = (struct ch_filter *)data;
3043		struct filter_info *p;
3044		unsigned int nfilters = sc->params.mc5.nfilters;
3045
3046		if (!is_offload(sc))
3047			return (EOPNOTSUPP);
3048		if (!(sc->flags & FULL_INIT_DONE))
3049			return (EAGAIN);
3050		if (nfilters == 0 || sc->filters == NULL)
3051			return (EINVAL);
3052		if (f->filter_id >= nfilters)
3053		       return (EINVAL);
3054
3055		p = &sc->filters[f->filter_id];
3056		if (p->locked)
3057			return (EPERM);
3058		if (!p->valid)
3059			return (EFAULT); /* Read "Bad address" as "Bad index" */
3060
3061		bzero(p, sizeof(*p));
3062		p->sip = p->sip_mask = 0xffffffff;
3063		p->vlan = 0xfff;
3064		p->vlan_prio = FILTER_NO_VLAN_PRI;
3065		p->pkt_type = 1;
3066		error = set_filter(sc, f->filter_id, p);
3067		break;
3068	}
3069	case CHELSIO_GET_FILTER: {
3070		struct ch_filter *f = (struct ch_filter *)data;
3071		struct filter_info *p;
3072		unsigned int i, nfilters = sc->params.mc5.nfilters;
3073
3074		if (!is_offload(sc))
3075			return (EOPNOTSUPP);
3076		if (!(sc->flags & FULL_INIT_DONE))
3077			return (EAGAIN);
3078		if (nfilters == 0 || sc->filters == NULL)
3079			return (EINVAL);
3080
3081		i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3082		for (; i < nfilters; i++) {
3083			p = &sc->filters[i];
3084			if (!p->valid)
3085				continue;
3086
3087			bzero(f, sizeof(*f));
3088
3089			f->filter_id = i;
3090			f->val.sip = p->sip;
3091			f->mask.sip = p->sip_mask;
3092			f->val.dip = p->dip;
3093			f->mask.dip = p->dip ? 0xffffffff : 0;
3094			f->val.sport = p->sport;
3095			f->mask.sport = p->sport ? 0xffff : 0;
3096			f->val.dport = p->dport;
3097			f->mask.dport = p->dport ? 0xffff : 0;
3098			f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3099			f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3100			f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3101			    0 : p->vlan_prio;
3102			f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3103			    0 : FILTER_NO_VLAN_PRI;
3104			f->mac_hit = p->mac_hit;
3105			f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3106			f->proto = p->pkt_type;
3107			f->want_filter_id = p->report_filter_id;
3108			f->pass = p->pass;
3109			f->rss = p->rss;
3110			f->qset = p->qset;
3111
3112			break;
3113		}
3114
3115		if (i == nfilters)
3116			f->filter_id = 0xffffffff;
3117		break;
3118	}
3119	default:
3120		return (EOPNOTSUPP);
3121		break;
3122	}
3123
3124	return (error);
3125}
3126
3127static __inline void
3128reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3129    unsigned int end)
3130{
3131	uint32_t *p = (uint32_t *)(buf + start);
3132
3133	for ( ; start <= end; start += sizeof(uint32_t))
3134		*p++ = t3_read_reg(ap, start);
3135}
3136
3137#define T3_REGMAP_SIZE (3 * 1024)
3138static int
3139cxgb_get_regs_len(void)
3140{
3141	return T3_REGMAP_SIZE;
3142}
3143
3144static void
3145cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3146{
3147
3148	/*
3149	 * Version scheme:
3150	 * bits 0..9: chip version
3151	 * bits 10..15: chip revision
3152	 * bit 31: set for PCIe cards
3153	 */
3154	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3155
3156	/*
3157	 * We skip the MAC statistics registers because they are clear-on-read.
3158	 * Also reading multi-register stats would need to synchronize with the
3159	 * periodic mac stats accumulation.  Hard to justify the complexity.
3160	 */
3161	memset(buf, 0, cxgb_get_regs_len());
3162	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3163	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3164	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3165	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3166	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3167	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3168		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3169	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3170		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3171}
3172
3173static int
3174alloc_filters(struct adapter *sc)
3175{
3176	struct filter_info *p;
3177	unsigned int nfilters = sc->params.mc5.nfilters;
3178
3179	if (nfilters == 0)
3180		return (0);
3181
3182	p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3183	sc->filters = p;
3184
3185	p = &sc->filters[nfilters - 1];
3186	p->vlan = 0xfff;
3187	p->vlan_prio = FILTER_NO_VLAN_PRI;
3188	p->pass = p->rss = p->valid = p->locked = 1;
3189
3190	return (0);
3191}
3192
3193static int
3194setup_hw_filters(struct adapter *sc)
3195{
3196	int i, rc;
3197	unsigned int nfilters = sc->params.mc5.nfilters;
3198
3199	if (!sc->filters)
3200		return (0);
3201
3202	t3_enable_filters(sc);
3203
3204	for (i = rc = 0; i < nfilters && !rc; i++) {
3205		if (sc->filters[i].locked)
3206			rc = set_filter(sc, i, &sc->filters[i]);
3207	}
3208
3209	return (rc);
3210}
3211
3212static int
3213set_filter(struct adapter *sc, int id, const struct filter_info *f)
3214{
3215	int len;
3216	struct mbuf *m;
3217	struct ulp_txpkt *txpkt;
3218	struct work_request_hdr *wr;
3219	struct cpl_pass_open_req *oreq;
3220	struct cpl_set_tcb_field *sreq;
3221
3222	len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3223	KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3224
3225	id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3226	      sc->params.mc5.nfilters;
3227
3228	m = m_gethdr(M_WAITOK, MT_DATA);
3229	m->m_len = m->m_pkthdr.len = len;
3230	bzero(mtod(m, char *), len);
3231
3232	wr = mtod(m, struct work_request_hdr *);
3233	wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3234
3235	oreq = (struct cpl_pass_open_req *)(wr + 1);
3236	txpkt = (struct ulp_txpkt *)oreq;
3237	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3238	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3239	OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3240	oreq->local_port = htons(f->dport);
3241	oreq->peer_port = htons(f->sport);
3242	oreq->local_ip = htonl(f->dip);
3243	oreq->peer_ip = htonl(f->sip);
3244	oreq->peer_netmask = htonl(f->sip_mask);
3245	oreq->opt0h = 0;
3246	oreq->opt0l = htonl(F_NO_OFFLOAD);
3247	oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3248			 V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3249			 V_VLAN_PRI(f->vlan_prio >> 1) |
3250			 V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3251			 V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3252			 V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3253
3254	sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3255	set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3256			  (f->report_filter_id << 15) | (1 << 23) |
3257			  ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3258	set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3259	t3_mgmt_tx(sc, m);
3260
3261	if (f->pass && !f->rss) {
3262		len = sizeof(*sreq);
3263		m = m_gethdr(M_WAITOK, MT_DATA);
3264		m->m_len = m->m_pkthdr.len = len;
3265		bzero(mtod(m, char *), len);
3266		sreq = mtod(m, struct cpl_set_tcb_field *);
3267		sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3268		mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3269				 (u64)sc->rrss_map[f->qset] << 19);
3270		t3_mgmt_tx(sc, m);
3271	}
3272	return 0;
3273}
3274
3275static inline void
3276mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3277    unsigned int word, u64 mask, u64 val)
3278{
3279	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3280	req->reply = V_NO_REPLY(1);
3281	req->cpu_idx = 0;
3282	req->word = htons(word);
3283	req->mask = htobe64(mask);
3284	req->val = htobe64(val);
3285}
3286
3287static inline void
3288set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3289    unsigned int word, u64 mask, u64 val)
3290{
3291	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3292
3293	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3294	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3295	mk_set_tcb_field(req, tid, word, mask, val);
3296}
3297
3298void
3299t3_iterate(void (*func)(struct adapter *, void *), void *arg)
3300{
3301	struct adapter *sc;
3302
3303	mtx_lock(&t3_list_lock);
3304	SLIST_FOREACH(sc, &t3_list, link) {
3305		/*
3306		 * func should not make any assumptions about what state sc is
3307		 * in - the only guarantee is that sc->sc_lock is a valid lock.
3308		 */
3309		func(sc, arg);
3310	}
3311	mtx_unlock(&t3_list_lock);
3312}
3313
3314#ifdef TCP_OFFLOAD
3315static int
3316toe_capability(struct port_info *pi, int enable)
3317{
3318	int rc;
3319	struct adapter *sc = pi->adapter;
3320
3321	ADAPTER_LOCK_ASSERT_OWNED(sc);
3322
3323	if (!is_offload(sc))
3324		return (ENODEV);
3325
3326	if (enable) {
3327		if (!(sc->flags & FULL_INIT_DONE)) {
3328			log(LOG_WARNING,
3329			    "You must enable a cxgb interface first\n");
3330			return (EAGAIN);
3331		}
3332
3333		if (isset(&sc->offload_map, pi->port_id))
3334			return (0);
3335
3336		if (!(sc->flags & TOM_INIT_DONE)) {
3337			rc = t3_activate_uld(sc, ULD_TOM);
3338			if (rc == EAGAIN) {
3339				log(LOG_WARNING,
3340				    "You must kldload t3_tom.ko before trying "
3341				    "to enable TOE on a cxgb interface.\n");
3342			}
3343			if (rc != 0)
3344				return (rc);
3345			KASSERT(sc->tom_softc != NULL,
3346			    ("%s: TOM activated but softc NULL", __func__));
3347			KASSERT(sc->flags & TOM_INIT_DONE,
3348			    ("%s: TOM activated but flag not set", __func__));
3349		}
3350
3351		setbit(&sc->offload_map, pi->port_id);
3352
3353		/*
3354		 * XXX: Temporary code to allow iWARP to be enabled when TOE is
3355		 * enabled on any port.  Need to figure out how to enable,
3356		 * disable, load, and unload iWARP cleanly.
3357		 */
3358		if (!isset(&sc->offload_map, MAX_NPORTS) &&
3359		    t3_activate_uld(sc, ULD_IWARP) == 0)
3360			setbit(&sc->offload_map, MAX_NPORTS);
3361	} else {
3362		if (!isset(&sc->offload_map, pi->port_id))
3363			return (0);
3364
3365		KASSERT(sc->flags & TOM_INIT_DONE,
3366		    ("%s: TOM never initialized?", __func__));
3367		clrbit(&sc->offload_map, pi->port_id);
3368	}
3369
3370	return (0);
3371}
3372
3373/*
3374 * Add an upper layer driver to the global list.
3375 */
3376int
3377t3_register_uld(struct uld_info *ui)
3378{
3379	int rc = 0;
3380	struct uld_info *u;
3381
3382	mtx_lock(&t3_uld_list_lock);
3383	SLIST_FOREACH(u, &t3_uld_list, link) {
3384	    if (u->uld_id == ui->uld_id) {
3385		    rc = EEXIST;
3386		    goto done;
3387	    }
3388	}
3389
3390	SLIST_INSERT_HEAD(&t3_uld_list, ui, link);
3391	ui->refcount = 0;
3392done:
3393	mtx_unlock(&t3_uld_list_lock);
3394	return (rc);
3395}
3396
3397int
3398t3_unregister_uld(struct uld_info *ui)
3399{
3400	int rc = EINVAL;
3401	struct uld_info *u;
3402
3403	mtx_lock(&t3_uld_list_lock);
3404
3405	SLIST_FOREACH(u, &t3_uld_list, link) {
3406	    if (u == ui) {
3407		    if (ui->refcount > 0) {
3408			    rc = EBUSY;
3409			    goto done;
3410		    }
3411
3412		    SLIST_REMOVE(&t3_uld_list, ui, uld_info, link);
3413		    rc = 0;
3414		    goto done;
3415	    }
3416	}
3417done:
3418	mtx_unlock(&t3_uld_list_lock);
3419	return (rc);
3420}
3421
3422int
3423t3_activate_uld(struct adapter *sc, int id)
3424{
3425	int rc = EAGAIN;
3426	struct uld_info *ui;
3427
3428	mtx_lock(&t3_uld_list_lock);
3429
3430	SLIST_FOREACH(ui, &t3_uld_list, link) {
3431		if (ui->uld_id == id) {
3432			rc = ui->activate(sc);
3433			if (rc == 0)
3434				ui->refcount++;
3435			goto done;
3436		}
3437	}
3438done:
3439	mtx_unlock(&t3_uld_list_lock);
3440
3441	return (rc);
3442}
3443
3444int
3445t3_deactivate_uld(struct adapter *sc, int id)
3446{
3447	int rc = EINVAL;
3448	struct uld_info *ui;
3449
3450	mtx_lock(&t3_uld_list_lock);
3451
3452	SLIST_FOREACH(ui, &t3_uld_list, link) {
3453		if (ui->uld_id == id) {
3454			rc = ui->deactivate(sc);
3455			if (rc == 0)
3456				ui->refcount--;
3457			goto done;
3458		}
3459	}
3460done:
3461	mtx_unlock(&t3_uld_list_lock);
3462
3463	return (rc);
3464}
3465
3466static int
3467cpl_not_handled(struct sge_qset *qs __unused, struct rsp_desc *r __unused,
3468    struct mbuf *m)
3469{
3470	m_freem(m);
3471	return (EDOOFUS);
3472}
3473
3474int
3475t3_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h)
3476{
3477	uintptr_t *loc, new;
3478
3479	if (opcode >= NUM_CPL_HANDLERS)
3480		return (EINVAL);
3481
3482	new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled;
3483	loc = (uintptr_t *) &sc->cpl_handler[opcode];
3484	atomic_store_rel_ptr(loc, new);
3485
3486	return (0);
3487}
3488#endif
3489
3490static int
3491cxgbc_mod_event(module_t mod, int cmd, void *arg)
3492{
3493	int rc = 0;
3494
3495	switch (cmd) {
3496	case MOD_LOAD:
3497		mtx_init(&t3_list_lock, "T3 adapters", 0, MTX_DEF);
3498		SLIST_INIT(&t3_list);
3499#ifdef TCP_OFFLOAD
3500		mtx_init(&t3_uld_list_lock, "T3 ULDs", 0, MTX_DEF);
3501		SLIST_INIT(&t3_uld_list);
3502#endif
3503		break;
3504
3505	case MOD_UNLOAD:
3506#ifdef TCP_OFFLOAD
3507		mtx_lock(&t3_uld_list_lock);
3508		if (!SLIST_EMPTY(&t3_uld_list)) {
3509			rc = EBUSY;
3510			mtx_unlock(&t3_uld_list_lock);
3511			break;
3512		}
3513		mtx_unlock(&t3_uld_list_lock);
3514		mtx_destroy(&t3_uld_list_lock);
3515#endif
3516		mtx_lock(&t3_list_lock);
3517		if (!SLIST_EMPTY(&t3_list)) {
3518			rc = EBUSY;
3519			mtx_unlock(&t3_list_lock);
3520			break;
3521		}
3522		mtx_unlock(&t3_list_lock);
3523		mtx_destroy(&t3_list_lock);
3524		break;
3525	}
3526
3527	return (rc);
3528}
3529