cxgb_main.c revision 257176
1/**************************************************************************
2
3Copyright (c) 2007-2009, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13    contributors may be used to endorse or promote products derived from
14    this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_main.c 257176 2013-10-26 17:58:36Z glebius $");
32
33#include "opt_inet.h"
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/bus.h>
39#include <sys/module.h>
40#include <sys/pciio.h>
41#include <sys/conf.h>
42#include <machine/bus.h>
43#include <machine/resource.h>
44#include <sys/bus_dma.h>
45#include <sys/ktr.h>
46#include <sys/rman.h>
47#include <sys/ioccom.h>
48#include <sys/mbuf.h>
49#include <sys/linker.h>
50#include <sys/firmware.h>
51#include <sys/socket.h>
52#include <sys/sockio.h>
53#include <sys/smp.h>
54#include <sys/sysctl.h>
55#include <sys/syslog.h>
56#include <sys/queue.h>
57#include <sys/taskqueue.h>
58#include <sys/proc.h>
59
60#include <net/bpf.h>
61#include <net/ethernet.h>
62#include <net/if.h>
63#include <net/if_var.h>
64#include <net/if_arp.h>
65#include <net/if_dl.h>
66#include <net/if_media.h>
67#include <net/if_types.h>
68#include <net/if_vlan_var.h>
69
70#include <netinet/in_systm.h>
71#include <netinet/in.h>
72#include <netinet/if_ether.h>
73#include <netinet/ip.h>
74#include <netinet/ip.h>
75#include <netinet/tcp.h>
76#include <netinet/udp.h>
77
78#include <dev/pci/pcireg.h>
79#include <dev/pci/pcivar.h>
80#include <dev/pci/pci_private.h>
81
82#include <cxgb_include.h>
83
84#ifdef PRIV_SUPPORTED
85#include <sys/priv.h>
86#endif
87
88static int cxgb_setup_interrupts(adapter_t *);
89static void cxgb_teardown_interrupts(adapter_t *);
90static void cxgb_init(void *);
91static int cxgb_init_locked(struct port_info *);
92static int cxgb_uninit_locked(struct port_info *);
93static int cxgb_uninit_synchronized(struct port_info *);
94static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
95static int cxgb_media_change(struct ifnet *);
96static int cxgb_ifm_type(int);
97static void cxgb_build_medialist(struct port_info *);
98static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
99static int setup_sge_qsets(adapter_t *);
100static void cxgb_async_intr(void *);
101static void cxgb_tick_handler(void *, int);
102static void cxgb_tick(void *);
103static void link_check_callout(void *);
104static void check_link_status(void *, int);
105static void setup_rss(adapter_t *sc);
106static int alloc_filters(struct adapter *);
107static int setup_hw_filters(struct adapter *);
108static int set_filter(struct adapter *, int, const struct filter_info *);
109static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
110    unsigned int, u64, u64);
111static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
112    unsigned int, u64, u64);
113#ifdef TCP_OFFLOAD
114static int cpl_not_handled(struct sge_qset *, struct rsp_desc *, struct mbuf *);
115#endif
116
117/* Attachment glue for the PCI controller end of the device.  Each port of
118 * the device is attached separately, as defined later.
119 */
120static int cxgb_controller_probe(device_t);
121static int cxgb_controller_attach(device_t);
122static int cxgb_controller_detach(device_t);
123static void cxgb_free(struct adapter *);
124static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
125    unsigned int end);
126static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
127static int cxgb_get_regs_len(void);
128static void touch_bars(device_t dev);
129static void cxgb_update_mac_settings(struct port_info *p);
130#ifdef TCP_OFFLOAD
131static int toe_capability(struct port_info *, int);
132#endif
133
134static device_method_t cxgb_controller_methods[] = {
135	DEVMETHOD(device_probe,		cxgb_controller_probe),
136	DEVMETHOD(device_attach,	cxgb_controller_attach),
137	DEVMETHOD(device_detach,	cxgb_controller_detach),
138
139	DEVMETHOD_END
140};
141
142static driver_t cxgb_controller_driver = {
143	"cxgbc",
144	cxgb_controller_methods,
145	sizeof(struct adapter)
146};
147
148static int cxgbc_mod_event(module_t, int, void *);
149static devclass_t	cxgb_controller_devclass;
150DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass,
151    cxgbc_mod_event, 0);
152MODULE_VERSION(cxgbc, 1);
153MODULE_DEPEND(cxgbc, firmware, 1, 1, 1);
154
155/*
156 * Attachment glue for the ports.  Attachment is done directly to the
157 * controller device.
158 */
159static int cxgb_port_probe(device_t);
160static int cxgb_port_attach(device_t);
161static int cxgb_port_detach(device_t);
162
163static device_method_t cxgb_port_methods[] = {
164	DEVMETHOD(device_probe,		cxgb_port_probe),
165	DEVMETHOD(device_attach,	cxgb_port_attach),
166	DEVMETHOD(device_detach,	cxgb_port_detach),
167	{ 0, 0 }
168};
169
170static driver_t cxgb_port_driver = {
171	"cxgb",
172	cxgb_port_methods,
173	0
174};
175
176static d_ioctl_t cxgb_extension_ioctl;
177static d_open_t cxgb_extension_open;
178static d_close_t cxgb_extension_close;
179
180static struct cdevsw cxgb_cdevsw = {
181       .d_version =    D_VERSION,
182       .d_flags =      0,
183       .d_open =       cxgb_extension_open,
184       .d_close =      cxgb_extension_close,
185       .d_ioctl =      cxgb_extension_ioctl,
186       .d_name =       "cxgb",
187};
188
189static devclass_t	cxgb_port_devclass;
190DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
191MODULE_VERSION(cxgb, 1);
192
193static struct mtx t3_list_lock;
194static SLIST_HEAD(, adapter) t3_list;
195#ifdef TCP_OFFLOAD
196static struct mtx t3_uld_list_lock;
197static SLIST_HEAD(, uld_info) t3_uld_list;
198#endif
199
200/*
201 * The driver uses the best interrupt scheme available on a platform in the
202 * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
203 * of these schemes the driver may consider as follows:
204 *
205 * msi = 2: choose from among all three options
206 * msi = 1 : only consider MSI and pin interrupts
207 * msi = 0: force pin interrupts
208 */
209static int msi_allowed = 2;
210
211TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
212SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
213SYSCTL_INT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
214    "MSI-X, MSI, INTx selector");
215
216/*
217 * The driver uses an auto-queue algorithm by default.
218 * To disable it and force a single queue-set per port, use multiq = 0
219 */
220static int multiq = 1;
221TUNABLE_INT("hw.cxgb.multiq", &multiq);
222SYSCTL_INT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
223    "use min(ncpus/ports, 8) queue-sets per port");
224
225/*
226 * By default the driver will not update the firmware unless
227 * it was compiled against a newer version
228 *
229 */
230static int force_fw_update = 0;
231TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
232SYSCTL_INT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
233    "update firmware even if up to date");
234
235int cxgb_use_16k_clusters = -1;
236TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
237SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
238    &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
239
240static int nfilters = -1;
241TUNABLE_INT("hw.cxgb.nfilters", &nfilters);
242SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
243    &nfilters, 0, "max number of entries in the filter table");
244
245enum {
246	MAX_TXQ_ENTRIES      = 16384,
247	MAX_CTRL_TXQ_ENTRIES = 1024,
248	MAX_RSPQ_ENTRIES     = 16384,
249	MAX_RX_BUFFERS       = 16384,
250	MAX_RX_JUMBO_BUFFERS = 16384,
251	MIN_TXQ_ENTRIES      = 4,
252	MIN_CTRL_TXQ_ENTRIES = 4,
253	MIN_RSPQ_ENTRIES     = 32,
254	MIN_FL_ENTRIES       = 32,
255	MIN_FL_JUMBO_ENTRIES = 32
256};
257
258struct filter_info {
259	u32 sip;
260	u32 sip_mask;
261	u32 dip;
262	u16 sport;
263	u16 dport;
264	u32 vlan:12;
265	u32 vlan_prio:3;
266	u32 mac_hit:1;
267	u32 mac_idx:4;
268	u32 mac_vld:1;
269	u32 pkt_type:2;
270	u32 report_filter_id:1;
271	u32 pass:1;
272	u32 rss:1;
273	u32 qset:3;
274	u32 locked:1;
275	u32 valid:1;
276};
277
278enum { FILTER_NO_VLAN_PRI = 7 };
279
280#define EEPROM_MAGIC 0x38E2F10C
281
282#define PORT_MASK ((1 << MAX_NPORTS) - 1)
283
284/* Table for probing the cards.  The desc field isn't actually used */
285struct cxgb_ident {
286	uint16_t	vendor;
287	uint16_t	device;
288	int		index;
289	char		*desc;
290} cxgb_identifiers[] = {
291	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
292	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
293	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
294	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
295	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
296	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
297	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
298	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
299	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
300	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
301	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
302	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
303	{PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
304	{PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
305	{0, 0, 0, NULL}
306};
307
308static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
309
310
311static __inline char
312t3rev2char(struct adapter *adapter)
313{
314	char rev = 'z';
315
316	switch(adapter->params.rev) {
317	case T3_REV_A:
318		rev = 'a';
319		break;
320	case T3_REV_B:
321	case T3_REV_B2:
322		rev = 'b';
323		break;
324	case T3_REV_C:
325		rev = 'c';
326		break;
327	}
328	return rev;
329}
330
331static struct cxgb_ident *
332cxgb_get_ident(device_t dev)
333{
334	struct cxgb_ident *id;
335
336	for (id = cxgb_identifiers; id->desc != NULL; id++) {
337		if ((id->vendor == pci_get_vendor(dev)) &&
338		    (id->device == pci_get_device(dev))) {
339			return (id);
340		}
341	}
342	return (NULL);
343}
344
345static const struct adapter_info *
346cxgb_get_adapter_info(device_t dev)
347{
348	struct cxgb_ident *id;
349	const struct adapter_info *ai;
350
351	id = cxgb_get_ident(dev);
352	if (id == NULL)
353		return (NULL);
354
355	ai = t3_get_adapter_info(id->index);
356
357	return (ai);
358}
359
360static int
361cxgb_controller_probe(device_t dev)
362{
363	const struct adapter_info *ai;
364	char *ports, buf[80];
365	int nports;
366
367	ai = cxgb_get_adapter_info(dev);
368	if (ai == NULL)
369		return (ENXIO);
370
371	nports = ai->nports0 + ai->nports1;
372	if (nports == 1)
373		ports = "port";
374	else
375		ports = "ports";
376
377	snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
378	device_set_desc_copy(dev, buf);
379	return (BUS_PROBE_DEFAULT);
380}
381
382#define FW_FNAME "cxgb_t3fw"
383#define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
384#define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
385
386static int
387upgrade_fw(adapter_t *sc)
388{
389	const struct firmware *fw;
390	int status;
391	u32 vers;
392
393	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
394		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
395		return (ENOENT);
396	} else
397		device_printf(sc->dev, "installing firmware on card\n");
398	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
399
400	if (status != 0) {
401		device_printf(sc->dev, "failed to install firmware: %d\n",
402		    status);
403	} else {
404		t3_get_fw_version(sc, &vers);
405		snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
406		    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
407		    G_FW_VERSION_MICRO(vers));
408	}
409
410	firmware_put(fw, FIRMWARE_UNLOAD);
411
412	return (status);
413}
414
415/*
416 * The cxgb_controller_attach function is responsible for the initial
417 * bringup of the device.  Its responsibilities include:
418 *
419 *  1. Determine if the device supports MSI or MSI-X.
420 *  2. Allocate bus resources so that we can access the Base Address Register
421 *  3. Create and initialize mutexes for the controller and its control
422 *     logic such as SGE and MDIO.
423 *  4. Call hardware specific setup routine for the adapter as a whole.
424 *  5. Allocate the BAR for doing MSI-X.
425 *  6. Setup the line interrupt iff MSI-X is not supported.
426 *  7. Create the driver's taskq.
427 *  8. Start one task queue service thread.
428 *  9. Check if the firmware and SRAM are up-to-date.  They will be
429 *     auto-updated later (before FULL_INIT_DONE), if required.
430 * 10. Create a child device for each MAC (port)
431 * 11. Initialize T3 private state.
432 * 12. Trigger the LED
433 * 13. Setup offload iff supported.
434 * 14. Reset/restart the tick callout.
435 * 15. Attach sysctls
436 *
437 * NOTE: Any modification or deviation from this list MUST be reflected in
438 * the above comment.  Failure to do so will result in problems on various
439 * error conditions including link flapping.
440 */
441static int
442cxgb_controller_attach(device_t dev)
443{
444	device_t child;
445	const struct adapter_info *ai;
446	struct adapter *sc;
447	int i, error = 0;
448	uint32_t vers;
449	int port_qsets = 1;
450	int msi_needed, reg;
451	char buf[80];
452
453	sc = device_get_softc(dev);
454	sc->dev = dev;
455	sc->msi_count = 0;
456	ai = cxgb_get_adapter_info(dev);
457
458	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
459	    device_get_unit(dev));
460	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
461
462	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
463	    device_get_unit(dev));
464	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
465	    device_get_unit(dev));
466	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
467	    device_get_unit(dev));
468
469	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
470	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
471	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
472
473	mtx_lock(&t3_list_lock);
474	SLIST_INSERT_HEAD(&t3_list, sc, link);
475	mtx_unlock(&t3_list_lock);
476
477	/* find the PCIe link width and set max read request to 4KB*/
478	if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
479		uint16_t lnk;
480
481		lnk = pci_read_config(dev, reg + PCIER_LINK_STA, 2);
482		sc->link_width = (lnk & PCIEM_LINK_STA_WIDTH) >> 4;
483		if (sc->link_width < 8 &&
484		    (ai->caps & SUPPORTED_10000baseT_Full)) {
485			device_printf(sc->dev,
486			    "PCIe x%d Link, expect reduced performance\n",
487			    sc->link_width);
488		}
489
490		pci_set_max_read_req(dev, 4096);
491	}
492
493	touch_bars(dev);
494	pci_enable_busmaster(dev);
495	/*
496	 * Allocate the registers and make them available to the driver.
497	 * The registers that we care about for NIC mode are in BAR 0
498	 */
499	sc->regs_rid = PCIR_BAR(0);
500	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
501	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
502		device_printf(dev, "Cannot allocate BAR region 0\n");
503		error = ENXIO;
504		goto out;
505	}
506
507	sc->bt = rman_get_bustag(sc->regs_res);
508	sc->bh = rman_get_bushandle(sc->regs_res);
509	sc->mmio_len = rman_get_size(sc->regs_res);
510
511	for (i = 0; i < MAX_NPORTS; i++)
512		sc->port[i].adapter = sc;
513
514	if (t3_prep_adapter(sc, ai, 1) < 0) {
515		printf("prep adapter failed\n");
516		error = ENODEV;
517		goto out;
518	}
519
520	sc->udbs_rid = PCIR_BAR(2);
521	sc->udbs_res = NULL;
522	if (is_offload(sc) &&
523	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
524		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
525		device_printf(dev, "Cannot allocate BAR region 1\n");
526		error = ENXIO;
527		goto out;
528	}
529
530        /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
531	 * enough messages for the queue sets.  If that fails, try falling
532	 * back to MSI.  If that fails, then try falling back to the legacy
533	 * interrupt pin model.
534	 */
535	sc->msix_regs_rid = 0x20;
536	if ((msi_allowed >= 2) &&
537	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
538	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
539
540		if (multiq)
541			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
542		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
543
544		if (pci_msix_count(dev) == 0 ||
545		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
546		    sc->msi_count != msi_needed) {
547			device_printf(dev, "alloc msix failed - "
548				      "msi_count=%d, msi_needed=%d, err=%d; "
549				      "will try MSI\n", sc->msi_count,
550				      msi_needed, error);
551			sc->msi_count = 0;
552			port_qsets = 1;
553			pci_release_msi(dev);
554			bus_release_resource(dev, SYS_RES_MEMORY,
555			    sc->msix_regs_rid, sc->msix_regs_res);
556			sc->msix_regs_res = NULL;
557		} else {
558			sc->flags |= USING_MSIX;
559			sc->cxgb_intr = cxgb_async_intr;
560			device_printf(dev,
561				      "using MSI-X interrupts (%u vectors)\n",
562				      sc->msi_count);
563		}
564	}
565
566	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
567		sc->msi_count = 1;
568		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
569			device_printf(dev, "alloc msi failed - "
570				      "err=%d; will try INTx\n", error);
571			sc->msi_count = 0;
572			port_qsets = 1;
573			pci_release_msi(dev);
574		} else {
575			sc->flags |= USING_MSI;
576			sc->cxgb_intr = t3_intr_msi;
577			device_printf(dev, "using MSI interrupts\n");
578		}
579	}
580	if (sc->msi_count == 0) {
581		device_printf(dev, "using line interrupts\n");
582		sc->cxgb_intr = t3b_intr;
583	}
584
585	/* Create a private taskqueue thread for handling driver events */
586	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
587	    taskqueue_thread_enqueue, &sc->tq);
588	if (sc->tq == NULL) {
589		device_printf(dev, "failed to allocate controller task queue\n");
590		goto out;
591	}
592
593	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
594	    device_get_nameunit(dev));
595	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
596
597
598	/* Create a periodic callout for checking adapter status */
599	callout_init(&sc->cxgb_tick_ch, TRUE);
600
601	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
602		/*
603		 * Warn user that a firmware update will be attempted in init.
604		 */
605		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
606		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
607		sc->flags &= ~FW_UPTODATE;
608	} else {
609		sc->flags |= FW_UPTODATE;
610	}
611
612	if (t3_check_tpsram_version(sc) < 0) {
613		/*
614		 * Warn user that a firmware update will be attempted in init.
615		 */
616		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
617		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
618		sc->flags &= ~TPS_UPTODATE;
619	} else {
620		sc->flags |= TPS_UPTODATE;
621	}
622
623	/*
624	 * Create a child device for each MAC.  The ethernet attachment
625	 * will be done in these children.
626	 */
627	for (i = 0; i < (sc)->params.nports; i++) {
628		struct port_info *pi;
629
630		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
631			device_printf(dev, "failed to add child port\n");
632			error = EINVAL;
633			goto out;
634		}
635		pi = &sc->port[i];
636		pi->adapter = sc;
637		pi->nqsets = port_qsets;
638		pi->first_qset = i*port_qsets;
639		pi->port_id = i;
640		pi->tx_chan = i >= ai->nports0;
641		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
642		sc->rxpkt_map[pi->txpkt_intf] = i;
643		sc->port[i].tx_chan = i >= ai->nports0;
644		sc->portdev[i] = child;
645		device_set_softc(child, pi);
646	}
647	if ((error = bus_generic_attach(dev)) != 0)
648		goto out;
649
650	/* initialize sge private state */
651	t3_sge_init_adapter(sc);
652
653	t3_led_ready(sc);
654
655	error = t3_get_fw_version(sc, &vers);
656	if (error)
657		goto out;
658
659	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
660	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
661	    G_FW_VERSION_MICRO(vers));
662
663	snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
664		 ai->desc, is_offload(sc) ? "R" : "",
665		 sc->params.vpd.ec, sc->params.vpd.sn);
666	device_set_desc_copy(dev, buf);
667
668	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
669		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
670		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
671
672	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
673	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
674	t3_add_attach_sysctls(sc);
675
676#ifdef TCP_OFFLOAD
677	for (i = 0; i < NUM_CPL_HANDLERS; i++)
678		sc->cpl_handler[i] = cpl_not_handled;
679#endif
680
681	t3_intr_clear(sc);
682	error = cxgb_setup_interrupts(sc);
683out:
684	if (error)
685		cxgb_free(sc);
686
687	return (error);
688}
689
690/*
691 * The cxgb_controller_detach routine is called with the device is
692 * unloaded from the system.
693 */
694
695static int
696cxgb_controller_detach(device_t dev)
697{
698	struct adapter *sc;
699
700	sc = device_get_softc(dev);
701
702	cxgb_free(sc);
703
704	return (0);
705}
706
707/*
708 * The cxgb_free() is called by the cxgb_controller_detach() routine
709 * to tear down the structures that were built up in
710 * cxgb_controller_attach(), and should be the final piece of work
711 * done when fully unloading the driver.
712 *
713 *
714 *  1. Shutting down the threads started by the cxgb_controller_attach()
715 *     routine.
716 *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
717 *  3. Detaching all of the port devices created during the
718 *     cxgb_controller_attach() routine.
719 *  4. Removing the device children created via cxgb_controller_attach().
720 *  5. Releasing PCI resources associated with the device.
721 *  6. Turning off the offload support, iff it was turned on.
722 *  7. Destroying the mutexes created in cxgb_controller_attach().
723 *
724 */
725static void
726cxgb_free(struct adapter *sc)
727{
728	int i, nqsets = 0;
729
730	ADAPTER_LOCK(sc);
731	sc->flags |= CXGB_SHUTDOWN;
732	ADAPTER_UNLOCK(sc);
733
734	/*
735	 * Make sure all child devices are gone.
736	 */
737	bus_generic_detach(sc->dev);
738	for (i = 0; i < (sc)->params.nports; i++) {
739		if (sc->portdev[i] &&
740		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
741			device_printf(sc->dev, "failed to delete child port\n");
742		nqsets += sc->port[i].nqsets;
743	}
744
745	/*
746	 * At this point, it is as if cxgb_port_detach has run on all ports, and
747	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
748	 * all open devices have been closed.
749	 */
750	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
751					   __func__, sc->open_device_map));
752	for (i = 0; i < sc->params.nports; i++) {
753		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
754						  __func__, i));
755	}
756
757	/*
758	 * Finish off the adapter's callouts.
759	 */
760	callout_drain(&sc->cxgb_tick_ch);
761	callout_drain(&sc->sge_timer_ch);
762
763	/*
764	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
765	 * sysctls are cleaned up by the kernel linker.
766	 */
767	if (sc->flags & FULL_INIT_DONE) {
768 		t3_free_sge_resources(sc, nqsets);
769 		sc->flags &= ~FULL_INIT_DONE;
770 	}
771
772	/*
773	 * Release all interrupt resources.
774	 */
775	cxgb_teardown_interrupts(sc);
776	if (sc->flags & (USING_MSI | USING_MSIX)) {
777		device_printf(sc->dev, "releasing msi message(s)\n");
778		pci_release_msi(sc->dev);
779	} else {
780		device_printf(sc->dev, "no msi message to release\n");
781	}
782
783	if (sc->msix_regs_res != NULL) {
784		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
785		    sc->msix_regs_res);
786	}
787
788	/*
789	 * Free the adapter's taskqueue.
790	 */
791	if (sc->tq != NULL) {
792		taskqueue_free(sc->tq);
793		sc->tq = NULL;
794	}
795
796	free(sc->filters, M_DEVBUF);
797	t3_sge_free(sc);
798
799	if (sc->udbs_res != NULL)
800		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
801		    sc->udbs_res);
802
803	if (sc->regs_res != NULL)
804		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
805		    sc->regs_res);
806
807	MTX_DESTROY(&sc->mdio_lock);
808	MTX_DESTROY(&sc->sge.reg_lock);
809	MTX_DESTROY(&sc->elmer_lock);
810	mtx_lock(&t3_list_lock);
811	SLIST_REMOVE(&t3_list, sc, adapter, link);
812	mtx_unlock(&t3_list_lock);
813	ADAPTER_LOCK_DEINIT(sc);
814}
815
816/**
817 *	setup_sge_qsets - configure SGE Tx/Rx/response queues
818 *	@sc: the controller softc
819 *
820 *	Determines how many sets of SGE queues to use and initializes them.
821 *	We support multiple queue sets per port if we have MSI-X, otherwise
822 *	just one queue set per port.
823 */
824static int
825setup_sge_qsets(adapter_t *sc)
826{
827	int i, j, err, irq_idx = 0, qset_idx = 0;
828	u_int ntxq = SGE_TXQ_PER_SET;
829
830	if ((err = t3_sge_alloc(sc)) != 0) {
831		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
832		return (err);
833	}
834
835	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
836		irq_idx = -1;
837
838	for (i = 0; i < (sc)->params.nports; i++) {
839		struct port_info *pi = &sc->port[i];
840
841		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
842			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
843			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
844			    &sc->params.sge.qset[qset_idx], ntxq, pi);
845			if (err) {
846				t3_free_sge_resources(sc, qset_idx);
847				device_printf(sc->dev,
848				    "t3_sge_alloc_qset failed with %d\n", err);
849				return (err);
850			}
851		}
852	}
853
854	return (0);
855}
856
857static void
858cxgb_teardown_interrupts(adapter_t *sc)
859{
860	int i;
861
862	for (i = 0; i < SGE_QSETS; i++) {
863		if (sc->msix_intr_tag[i] == NULL) {
864
865			/* Should have been setup fully or not at all */
866			KASSERT(sc->msix_irq_res[i] == NULL &&
867				sc->msix_irq_rid[i] == 0,
868				("%s: half-done interrupt (%d).", __func__, i));
869
870			continue;
871		}
872
873		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
874				  sc->msix_intr_tag[i]);
875		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
876				     sc->msix_irq_res[i]);
877
878		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
879		sc->msix_irq_rid[i] = 0;
880	}
881
882	if (sc->intr_tag) {
883		KASSERT(sc->irq_res != NULL,
884			("%s: half-done interrupt.", __func__));
885
886		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
887		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
888				     sc->irq_res);
889
890		sc->irq_res = sc->intr_tag = NULL;
891		sc->irq_rid = 0;
892	}
893}
894
895static int
896cxgb_setup_interrupts(adapter_t *sc)
897{
898	struct resource *res;
899	void *tag;
900	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
901
902	sc->irq_rid = intr_flag ? 1 : 0;
903	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
904					     RF_SHAREABLE | RF_ACTIVE);
905	if (sc->irq_res == NULL) {
906		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
907			      intr_flag, sc->irq_rid);
908		err = EINVAL;
909		sc->irq_rid = 0;
910	} else {
911		err = bus_setup_intr(sc->dev, sc->irq_res,
912		    INTR_MPSAFE | INTR_TYPE_NET, NULL,
913		    sc->cxgb_intr, sc, &sc->intr_tag);
914
915		if (err) {
916			device_printf(sc->dev,
917				      "Cannot set up interrupt (%x, %u, %d)\n",
918				      intr_flag, sc->irq_rid, err);
919			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
920					     sc->irq_res);
921			sc->irq_res = sc->intr_tag = NULL;
922			sc->irq_rid = 0;
923		}
924	}
925
926	/* That's all for INTx or MSI */
927	if (!(intr_flag & USING_MSIX) || err)
928		return (err);
929
930	bus_describe_intr(sc->dev, sc->irq_res, sc->intr_tag, "err");
931	for (i = 0; i < sc->msi_count - 1; i++) {
932		rid = i + 2;
933		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
934					     RF_SHAREABLE | RF_ACTIVE);
935		if (res == NULL) {
936			device_printf(sc->dev, "Cannot allocate interrupt "
937				      "for message %d\n", rid);
938			err = EINVAL;
939			break;
940		}
941
942		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
943				     NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
944		if (err) {
945			device_printf(sc->dev, "Cannot set up interrupt "
946				      "for message %d (%d)\n", rid, err);
947			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
948			break;
949		}
950
951		sc->msix_irq_rid[i] = rid;
952		sc->msix_irq_res[i] = res;
953		sc->msix_intr_tag[i] = tag;
954		bus_describe_intr(sc->dev, res, tag, "qs%d", i);
955	}
956
957	if (err)
958		cxgb_teardown_interrupts(sc);
959
960	return (err);
961}
962
963
964static int
965cxgb_port_probe(device_t dev)
966{
967	struct port_info *p;
968	char buf[80];
969	const char *desc;
970
971	p = device_get_softc(dev);
972	desc = p->phy.desc;
973	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
974	device_set_desc_copy(dev, buf);
975	return (0);
976}
977
978
979static int
980cxgb_makedev(struct port_info *pi)
981{
982
983	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
984	    UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
985
986	if (pi->port_cdev == NULL)
987		return (ENOMEM);
988
989	pi->port_cdev->si_drv1 = (void *)pi;
990
991	return (0);
992}
993
994#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
995    IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
996    IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6)
997#define CXGB_CAP_ENABLE CXGB_CAP
998
999static int
1000cxgb_port_attach(device_t dev)
1001{
1002	struct port_info *p;
1003	struct ifnet *ifp;
1004	int err;
1005	struct adapter *sc;
1006
1007	p = device_get_softc(dev);
1008	sc = p->adapter;
1009	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1010	    device_get_unit(device_get_parent(dev)), p->port_id);
1011	PORT_LOCK_INIT(p, p->lockbuf);
1012
1013	callout_init(&p->link_check_ch, CALLOUT_MPSAFE);
1014	TASK_INIT(&p->link_check_task, 0, check_link_status, p);
1015
1016	/* Allocate an ifnet object and set it up */
1017	ifp = p->ifp = if_alloc(IFT_ETHER);
1018	if (ifp == NULL) {
1019		device_printf(dev, "Cannot allocate ifnet\n");
1020		return (ENOMEM);
1021	}
1022
1023	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1024	ifp->if_init = cxgb_init;
1025	ifp->if_softc = p;
1026	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1027	ifp->if_ioctl = cxgb_ioctl;
1028	ifp->if_transmit = cxgb_transmit;
1029	ifp->if_qflush = cxgb_qflush;
1030
1031	ifp->if_capabilities = CXGB_CAP;
1032#ifdef TCP_OFFLOAD
1033	if (is_offload(sc))
1034		ifp->if_capabilities |= IFCAP_TOE4;
1035#endif
1036	ifp->if_capenable = CXGB_CAP_ENABLE;
1037	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
1038	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
1039
1040	/*
1041	 * Disable TSO on 4-port - it isn't supported by the firmware.
1042	 */
1043	if (sc->params.nports > 2) {
1044		ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1045		ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1046		ifp->if_hwassist &= ~CSUM_TSO;
1047	}
1048
1049	ether_ifattach(ifp, p->hw_addr);
1050
1051#ifdef DEFAULT_JUMBO
1052	if (sc->params.nports <= 2)
1053		ifp->if_mtu = ETHERMTU_JUMBO;
1054#endif
1055	if ((err = cxgb_makedev(p)) != 0) {
1056		printf("makedev failed %d\n", err);
1057		return (err);
1058	}
1059
1060	/* Create a list of media supported by this port */
1061	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1062	    cxgb_media_status);
1063	cxgb_build_medialist(p);
1064
1065	t3_sge_init_port(p);
1066
1067	return (err);
1068}
1069
1070/*
1071 * cxgb_port_detach() is called via the device_detach methods when
1072 * cxgb_free() calls the bus_generic_detach.  It is responsible for
1073 * removing the device from the view of the kernel, i.e. from all
1074 * interfaces lists etc.  This routine is only called when the driver is
1075 * being unloaded, not when the link goes down.
1076 */
1077static int
1078cxgb_port_detach(device_t dev)
1079{
1080	struct port_info *p;
1081	struct adapter *sc;
1082	int i;
1083
1084	p = device_get_softc(dev);
1085	sc = p->adapter;
1086
1087	/* Tell cxgb_ioctl and if_init that the port is going away */
1088	ADAPTER_LOCK(sc);
1089	SET_DOOMED(p);
1090	wakeup(&sc->flags);
1091	while (IS_BUSY(sc))
1092		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1093	SET_BUSY(sc);
1094	ADAPTER_UNLOCK(sc);
1095
1096	if (p->port_cdev != NULL)
1097		destroy_dev(p->port_cdev);
1098
1099	cxgb_uninit_synchronized(p);
1100	ether_ifdetach(p->ifp);
1101
1102	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1103		struct sge_qset *qs = &sc->sge.qs[i];
1104		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1105
1106		callout_drain(&txq->txq_watchdog);
1107		callout_drain(&txq->txq_timer);
1108	}
1109
1110	PORT_LOCK_DEINIT(p);
1111	if_free(p->ifp);
1112	p->ifp = NULL;
1113
1114	ADAPTER_LOCK(sc);
1115	CLR_BUSY(sc);
1116	wakeup_one(&sc->flags);
1117	ADAPTER_UNLOCK(sc);
1118	return (0);
1119}
1120
1121void
1122t3_fatal_err(struct adapter *sc)
1123{
1124	u_int fw_status[4];
1125
1126	if (sc->flags & FULL_INIT_DONE) {
1127		t3_sge_stop(sc);
1128		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1129		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1130		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1131		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1132		t3_intr_disable(sc);
1133	}
1134	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1135	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1136		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1137		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1138}
1139
1140int
1141t3_os_find_pci_capability(adapter_t *sc, int cap)
1142{
1143	device_t dev;
1144	struct pci_devinfo *dinfo;
1145	pcicfgregs *cfg;
1146	uint32_t status;
1147	uint8_t ptr;
1148
1149	dev = sc->dev;
1150	dinfo = device_get_ivars(dev);
1151	cfg = &dinfo->cfg;
1152
1153	status = pci_read_config(dev, PCIR_STATUS, 2);
1154	if (!(status & PCIM_STATUS_CAPPRESENT))
1155		return (0);
1156
1157	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1158	case 0:
1159	case 1:
1160		ptr = PCIR_CAP_PTR;
1161		break;
1162	case 2:
1163		ptr = PCIR_CAP_PTR_2;
1164		break;
1165	default:
1166		return (0);
1167		break;
1168	}
1169	ptr = pci_read_config(dev, ptr, 1);
1170
1171	while (ptr != 0) {
1172		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1173			return (ptr);
1174		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1175	}
1176
1177	return (0);
1178}
1179
1180int
1181t3_os_pci_save_state(struct adapter *sc)
1182{
1183	device_t dev;
1184	struct pci_devinfo *dinfo;
1185
1186	dev = sc->dev;
1187	dinfo = device_get_ivars(dev);
1188
1189	pci_cfg_save(dev, dinfo, 0);
1190	return (0);
1191}
1192
1193int
1194t3_os_pci_restore_state(struct adapter *sc)
1195{
1196	device_t dev;
1197	struct pci_devinfo *dinfo;
1198
1199	dev = sc->dev;
1200	dinfo = device_get_ivars(dev);
1201
1202	pci_cfg_restore(dev, dinfo);
1203	return (0);
1204}
1205
1206/**
1207 *	t3_os_link_changed - handle link status changes
1208 *	@sc: the adapter associated with the link change
1209 *	@port_id: the port index whose link status has changed
1210 *	@link_status: the new status of the link
1211 *	@speed: the new speed setting
1212 *	@duplex: the new duplex setting
1213 *	@fc: the new flow-control setting
1214 *
1215 *	This is the OS-dependent handler for link status changes.  The OS
1216 *	neutral handler takes care of most of the processing for these events,
1217 *	then calls this handler for any OS-specific processing.
1218 */
1219void
1220t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1221     int duplex, int fc, int mac_was_reset)
1222{
1223	struct port_info *pi = &adapter->port[port_id];
1224	struct ifnet *ifp = pi->ifp;
1225
1226	/* no race with detach, so ifp should always be good */
1227	KASSERT(ifp, ("%s: if detached.", __func__));
1228
1229	/* Reapply mac settings if they were lost due to a reset */
1230	if (mac_was_reset) {
1231		PORT_LOCK(pi);
1232		cxgb_update_mac_settings(pi);
1233		PORT_UNLOCK(pi);
1234	}
1235
1236	if (link_status) {
1237		ifp->if_baudrate = IF_Mbps(speed);
1238		if_link_state_change(ifp, LINK_STATE_UP);
1239	} else
1240		if_link_state_change(ifp, LINK_STATE_DOWN);
1241}
1242
1243/**
1244 *	t3_os_phymod_changed - handle PHY module changes
1245 *	@phy: the PHY reporting the module change
1246 *	@mod_type: new module type
1247 *
1248 *	This is the OS-dependent handler for PHY module changes.  It is
1249 *	invoked when a PHY module is removed or inserted for any OS-specific
1250 *	processing.
1251 */
1252void t3_os_phymod_changed(struct adapter *adap, int port_id)
1253{
1254	static const char *mod_str[] = {
1255		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1256	};
1257	struct port_info *pi = &adap->port[port_id];
1258	int mod = pi->phy.modtype;
1259
1260	if (mod != pi->media.ifm_cur->ifm_data)
1261		cxgb_build_medialist(pi);
1262
1263	if (mod == phy_modtype_none)
1264		if_printf(pi->ifp, "PHY module unplugged\n");
1265	else {
1266		KASSERT(mod < ARRAY_SIZE(mod_str),
1267			("invalid PHY module type %d", mod));
1268		if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1269	}
1270}
1271
1272void
1273t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1274{
1275
1276	/*
1277	 * The ifnet might not be allocated before this gets called,
1278	 * as this is called early on in attach by t3_prep_adapter
1279	 * save the address off in the port structure
1280	 */
1281	if (cxgb_debug)
1282		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1283	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1284}
1285
1286/*
1287 * Programs the XGMAC based on the settings in the ifnet.  These settings
1288 * include MTU, MAC address, mcast addresses, etc.
1289 */
1290static void
1291cxgb_update_mac_settings(struct port_info *p)
1292{
1293	struct ifnet *ifp = p->ifp;
1294	struct t3_rx_mode rm;
1295	struct cmac *mac = &p->mac;
1296	int mtu, hwtagging;
1297
1298	PORT_LOCK_ASSERT_OWNED(p);
1299
1300	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1301
1302	mtu = ifp->if_mtu;
1303	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1304		mtu += ETHER_VLAN_ENCAP_LEN;
1305
1306	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1307
1308	t3_mac_set_mtu(mac, mtu);
1309	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1310	t3_mac_set_address(mac, 0, p->hw_addr);
1311	t3_init_rx_mode(&rm, p);
1312	t3_mac_set_rx_mode(mac, &rm);
1313}
1314
1315
1316static int
1317await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1318			      unsigned long n)
1319{
1320	int attempts = 5;
1321
1322	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1323		if (!--attempts)
1324			return (ETIMEDOUT);
1325		t3_os_sleep(10);
1326	}
1327	return 0;
1328}
1329
1330static int
1331init_tp_parity(struct adapter *adap)
1332{
1333	int i;
1334	struct mbuf *m;
1335	struct cpl_set_tcb_field *greq;
1336	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1337
1338	t3_tp_set_offload_mode(adap, 1);
1339
1340	for (i = 0; i < 16; i++) {
1341		struct cpl_smt_write_req *req;
1342
1343		m = m_gethdr(M_WAITOK, MT_DATA);
1344		req = mtod(m, struct cpl_smt_write_req *);
1345		m->m_len = m->m_pkthdr.len = sizeof(*req);
1346		memset(req, 0, sizeof(*req));
1347		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1348		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1349		req->iff = i;
1350		t3_mgmt_tx(adap, m);
1351	}
1352
1353	for (i = 0; i < 2048; i++) {
1354		struct cpl_l2t_write_req *req;
1355
1356		m = m_gethdr(M_WAITOK, MT_DATA);
1357		req = mtod(m, struct cpl_l2t_write_req *);
1358		m->m_len = m->m_pkthdr.len = sizeof(*req);
1359		memset(req, 0, sizeof(*req));
1360		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1361		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1362		req->params = htonl(V_L2T_W_IDX(i));
1363		t3_mgmt_tx(adap, m);
1364	}
1365
1366	for (i = 0; i < 2048; i++) {
1367		struct cpl_rte_write_req *req;
1368
1369		m = m_gethdr(M_WAITOK, MT_DATA);
1370		req = mtod(m, struct cpl_rte_write_req *);
1371		m->m_len = m->m_pkthdr.len = sizeof(*req);
1372		memset(req, 0, sizeof(*req));
1373		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1374		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1375		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1376		t3_mgmt_tx(adap, m);
1377	}
1378
1379	m = m_gethdr(M_WAITOK, MT_DATA);
1380	greq = mtod(m, struct cpl_set_tcb_field *);
1381	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1382	memset(greq, 0, sizeof(*greq));
1383	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1384	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1385	greq->mask = htobe64(1);
1386	t3_mgmt_tx(adap, m);
1387
1388	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1389	t3_tp_set_offload_mode(adap, 0);
1390	return (i);
1391}
1392
1393/**
1394 *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1395 *	@adap: the adapter
1396 *
1397 *	Sets up RSS to distribute packets to multiple receive queues.  We
1398 *	configure the RSS CPU lookup table to distribute to the number of HW
1399 *	receive queues, and the response queue lookup table to narrow that
1400 *	down to the response queues actually configured for each port.
1401 *	We always configure the RSS mapping for two ports since the mapping
1402 *	table has plenty of entries.
1403 */
1404static void
1405setup_rss(adapter_t *adap)
1406{
1407	int i;
1408	u_int nq[2];
1409	uint8_t cpus[SGE_QSETS + 1];
1410	uint16_t rspq_map[RSS_TABLE_SIZE];
1411
1412	for (i = 0; i < SGE_QSETS; ++i)
1413		cpus[i] = i;
1414	cpus[SGE_QSETS] = 0xff;
1415
1416	nq[0] = nq[1] = 0;
1417	for_each_port(adap, i) {
1418		const struct port_info *pi = adap2pinfo(adap, i);
1419
1420		nq[pi->tx_chan] += pi->nqsets;
1421	}
1422	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1423		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1424		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1425	}
1426
1427	/* Calculate the reverse RSS map table */
1428	for (i = 0; i < SGE_QSETS; ++i)
1429		adap->rrss_map[i] = 0xff;
1430	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1431		if (adap->rrss_map[rspq_map[i]] == 0xff)
1432			adap->rrss_map[rspq_map[i]] = i;
1433
1434	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1435		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1436	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1437	              cpus, rspq_map);
1438
1439}
1440static void
1441send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1442			      int hi, int port)
1443{
1444	struct mbuf *m;
1445	struct mngt_pktsched_wr *req;
1446
1447	m = m_gethdr(M_NOWAIT, MT_DATA);
1448	if (m) {
1449		req = mtod(m, struct mngt_pktsched_wr *);
1450		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1451		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1452		req->sched = sched;
1453		req->idx = qidx;
1454		req->min = lo;
1455		req->max = hi;
1456		req->binding = port;
1457		m->m_len = m->m_pkthdr.len = sizeof(*req);
1458		t3_mgmt_tx(adap, m);
1459	}
1460}
1461
1462static void
1463bind_qsets(adapter_t *sc)
1464{
1465	int i, j;
1466
1467	for (i = 0; i < (sc)->params.nports; ++i) {
1468		const struct port_info *pi = adap2pinfo(sc, i);
1469
1470		for (j = 0; j < pi->nqsets; ++j) {
1471			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1472					  -1, pi->tx_chan);
1473
1474		}
1475	}
1476}
1477
1478static void
1479update_tpeeprom(struct adapter *adap)
1480{
1481	const struct firmware *tpeeprom;
1482
1483	uint32_t version;
1484	unsigned int major, minor;
1485	int ret, len;
1486	char rev, name[32];
1487
1488	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1489
1490	major = G_TP_VERSION_MAJOR(version);
1491	minor = G_TP_VERSION_MINOR(version);
1492	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1493		return;
1494
1495	rev = t3rev2char(adap);
1496	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1497
1498	tpeeprom = firmware_get(name);
1499	if (tpeeprom == NULL) {
1500		device_printf(adap->dev,
1501			      "could not load TP EEPROM: unable to load %s\n",
1502			      name);
1503		return;
1504	}
1505
1506	len = tpeeprom->datasize - 4;
1507
1508	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1509	if (ret)
1510		goto release_tpeeprom;
1511
1512	if (len != TP_SRAM_LEN) {
1513		device_printf(adap->dev,
1514			      "%s length is wrong len=%d expected=%d\n", name,
1515			      len, TP_SRAM_LEN);
1516		return;
1517	}
1518
1519	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1520	    TP_SRAM_OFFSET);
1521
1522	if (!ret) {
1523		device_printf(adap->dev,
1524			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1525			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1526	} else
1527		device_printf(adap->dev,
1528			      "Protocol SRAM image update in EEPROM failed\n");
1529
1530release_tpeeprom:
1531	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1532
1533	return;
1534}
1535
1536static int
1537update_tpsram(struct adapter *adap)
1538{
1539	const struct firmware *tpsram;
1540	int ret;
1541	char rev, name[32];
1542
1543	rev = t3rev2char(adap);
1544	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1545
1546	update_tpeeprom(adap);
1547
1548	tpsram = firmware_get(name);
1549	if (tpsram == NULL){
1550		device_printf(adap->dev, "could not load TP SRAM\n");
1551		return (EINVAL);
1552	} else
1553		device_printf(adap->dev, "updating TP SRAM\n");
1554
1555	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1556	if (ret)
1557		goto release_tpsram;
1558
1559	ret = t3_set_proto_sram(adap, tpsram->data);
1560	if (ret)
1561		device_printf(adap->dev, "loading protocol SRAM failed\n");
1562
1563release_tpsram:
1564	firmware_put(tpsram, FIRMWARE_UNLOAD);
1565
1566	return ret;
1567}
1568
1569/**
1570 *	cxgb_up - enable the adapter
1571 *	@adap: adapter being enabled
1572 *
1573 *	Called when the first port is enabled, this function performs the
1574 *	actions necessary to make an adapter operational, such as completing
1575 *	the initialization of HW modules, and enabling interrupts.
1576 */
1577static int
1578cxgb_up(struct adapter *sc)
1579{
1580	int err = 0;
1581	unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1582
1583	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1584					   __func__, sc->open_device_map));
1585
1586	if ((sc->flags & FULL_INIT_DONE) == 0) {
1587
1588		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1589
1590		if ((sc->flags & FW_UPTODATE) == 0)
1591			if ((err = upgrade_fw(sc)))
1592				goto out;
1593
1594		if ((sc->flags & TPS_UPTODATE) == 0)
1595			if ((err = update_tpsram(sc)))
1596				goto out;
1597
1598		if (is_offload(sc) && nfilters != 0) {
1599			sc->params.mc5.nservers = 0;
1600
1601			if (nfilters < 0)
1602				sc->params.mc5.nfilters = mxf;
1603			else
1604				sc->params.mc5.nfilters = min(nfilters, mxf);
1605		}
1606
1607		err = t3_init_hw(sc, 0);
1608		if (err)
1609			goto out;
1610
1611		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1612		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1613
1614		err = setup_sge_qsets(sc);
1615		if (err)
1616			goto out;
1617
1618		alloc_filters(sc);
1619		setup_rss(sc);
1620
1621		t3_add_configured_sysctls(sc);
1622		sc->flags |= FULL_INIT_DONE;
1623	}
1624
1625	t3_intr_clear(sc);
1626	t3_sge_start(sc);
1627	t3_intr_enable(sc);
1628
1629	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1630	    is_offload(sc) && init_tp_parity(sc) == 0)
1631		sc->flags |= TP_PARITY_INIT;
1632
1633	if (sc->flags & TP_PARITY_INIT) {
1634		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1635		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1636	}
1637
1638	if (!(sc->flags & QUEUES_BOUND)) {
1639		bind_qsets(sc);
1640		setup_hw_filters(sc);
1641		sc->flags |= QUEUES_BOUND;
1642	}
1643
1644	t3_sge_reset_adapter(sc);
1645out:
1646	return (err);
1647}
1648
1649/*
1650 * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1651 * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1652 * during controller_detach, not here.
1653 */
1654static void
1655cxgb_down(struct adapter *sc)
1656{
1657	t3_sge_stop(sc);
1658	t3_intr_disable(sc);
1659}
1660
1661/*
1662 * if_init for cxgb ports.
1663 */
1664static void
1665cxgb_init(void *arg)
1666{
1667	struct port_info *p = arg;
1668	struct adapter *sc = p->adapter;
1669
1670	ADAPTER_LOCK(sc);
1671	cxgb_init_locked(p); /* releases adapter lock */
1672	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1673}
1674
1675static int
1676cxgb_init_locked(struct port_info *p)
1677{
1678	struct adapter *sc = p->adapter;
1679	struct ifnet *ifp = p->ifp;
1680	struct cmac *mac = &p->mac;
1681	int i, rc = 0, may_sleep = 0, gave_up_lock = 0;
1682
1683	ADAPTER_LOCK_ASSERT_OWNED(sc);
1684
1685	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1686		gave_up_lock = 1;
1687		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1688			rc = EINTR;
1689			goto done;
1690		}
1691	}
1692	if (IS_DOOMED(p)) {
1693		rc = ENXIO;
1694		goto done;
1695	}
1696	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1697
1698	/*
1699	 * The code that runs during one-time adapter initialization can sleep
1700	 * so it's important not to hold any locks across it.
1701	 */
1702	may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1703
1704	if (may_sleep) {
1705		SET_BUSY(sc);
1706		gave_up_lock = 1;
1707		ADAPTER_UNLOCK(sc);
1708	}
1709
1710	if (sc->open_device_map == 0 && ((rc = cxgb_up(sc)) != 0))
1711			goto done;
1712
1713	PORT_LOCK(p);
1714	if (isset(&sc->open_device_map, p->port_id) &&
1715	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1716		PORT_UNLOCK(p);
1717		goto done;
1718	}
1719	t3_port_intr_enable(sc, p->port_id);
1720	if (!mac->multiport)
1721		t3_mac_init(mac);
1722	cxgb_update_mac_settings(p);
1723	t3_link_start(&p->phy, mac, &p->link_config);
1724	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1725	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1726	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1727	PORT_UNLOCK(p);
1728
1729	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1730		struct sge_qset *qs = &sc->sge.qs[i];
1731		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1732
1733		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1734				 txq->txq_watchdog.c_cpu);
1735	}
1736
1737	/* all ok */
1738	setbit(&sc->open_device_map, p->port_id);
1739	callout_reset(&p->link_check_ch,
1740	    p->phy.caps & SUPPORTED_LINK_IRQ ?  hz * 3 : hz / 4,
1741	    link_check_callout, p);
1742
1743done:
1744	if (may_sleep) {
1745		ADAPTER_LOCK(sc);
1746		KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1747		CLR_BUSY(sc);
1748	}
1749	if (gave_up_lock)
1750		wakeup_one(&sc->flags);
1751	ADAPTER_UNLOCK(sc);
1752	return (rc);
1753}
1754
1755static int
1756cxgb_uninit_locked(struct port_info *p)
1757{
1758	struct adapter *sc = p->adapter;
1759	int rc;
1760
1761	ADAPTER_LOCK_ASSERT_OWNED(sc);
1762
1763	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1764		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1765			rc = EINTR;
1766			goto done;
1767		}
1768	}
1769	if (IS_DOOMED(p)) {
1770		rc = ENXIO;
1771		goto done;
1772	}
1773	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1774	SET_BUSY(sc);
1775	ADAPTER_UNLOCK(sc);
1776
1777	rc = cxgb_uninit_synchronized(p);
1778
1779	ADAPTER_LOCK(sc);
1780	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1781	CLR_BUSY(sc);
1782	wakeup_one(&sc->flags);
1783done:
1784	ADAPTER_UNLOCK(sc);
1785	return (rc);
1786}
1787
1788/*
1789 * Called on "ifconfig down", and from port_detach
1790 */
1791static int
1792cxgb_uninit_synchronized(struct port_info *pi)
1793{
1794	struct adapter *sc = pi->adapter;
1795	struct ifnet *ifp = pi->ifp;
1796
1797	/*
1798	 * taskqueue_drain may cause a deadlock if the adapter lock is held.
1799	 */
1800	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1801
1802	/*
1803	 * Clear this port's bit from the open device map, and then drain all
1804	 * the tasks that can access/manipulate this port's port_info or ifp.
1805	 * We disable this port's interrupts here and so the slow/ext
1806	 * interrupt tasks won't be enqueued.  The tick task will continue to
1807	 * be enqueued every second but the runs after this drain will not see
1808	 * this port in the open device map.
1809	 *
1810	 * A well behaved task must take open_device_map into account and ignore
1811	 * ports that are not open.
1812	 */
1813	clrbit(&sc->open_device_map, pi->port_id);
1814	t3_port_intr_disable(sc, pi->port_id);
1815	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1816	taskqueue_drain(sc->tq, &sc->tick_task);
1817
1818	callout_drain(&pi->link_check_ch);
1819	taskqueue_drain(sc->tq, &pi->link_check_task);
1820
1821	PORT_LOCK(pi);
1822	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1823
1824	/* disable pause frames */
1825	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1826
1827	/* Reset RX FIFO HWM */
1828	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1829			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1830
1831	DELAY(100 * 1000);
1832
1833	/* Wait for TXFIFO empty */
1834	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1835			F_TXFIFO_EMPTY, 1, 20, 5);
1836
1837	DELAY(100 * 1000);
1838	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1839
1840	pi->phy.ops->power_down(&pi->phy, 1);
1841
1842	PORT_UNLOCK(pi);
1843
1844	pi->link_config.link_ok = 0;
1845	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1846
1847	if (sc->open_device_map == 0)
1848		cxgb_down(pi->adapter);
1849
1850	return (0);
1851}
1852
1853/*
1854 * Mark lro enabled or disabled in all qsets for this port
1855 */
1856static int
1857cxgb_set_lro(struct port_info *p, int enabled)
1858{
1859	int i;
1860	struct adapter *adp = p->adapter;
1861	struct sge_qset *q;
1862
1863	for (i = 0; i < p->nqsets; i++) {
1864		q = &adp->sge.qs[p->first_qset + i];
1865		q->lro.enabled = (enabled != 0);
1866	}
1867	return (0);
1868}
1869
1870static int
1871cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1872{
1873	struct port_info *p = ifp->if_softc;
1874	struct adapter *sc = p->adapter;
1875	struct ifreq *ifr = (struct ifreq *)data;
1876	int flags, error = 0, mtu;
1877	uint32_t mask;
1878
1879	switch (command) {
1880	case SIOCSIFMTU:
1881		ADAPTER_LOCK(sc);
1882		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1883		if (error) {
1884fail:
1885			ADAPTER_UNLOCK(sc);
1886			return (error);
1887		}
1888
1889		mtu = ifr->ifr_mtu;
1890		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
1891			error = EINVAL;
1892		} else {
1893			ifp->if_mtu = mtu;
1894			PORT_LOCK(p);
1895			cxgb_update_mac_settings(p);
1896			PORT_UNLOCK(p);
1897		}
1898		ADAPTER_UNLOCK(sc);
1899		break;
1900	case SIOCSIFFLAGS:
1901		ADAPTER_LOCK(sc);
1902		if (IS_DOOMED(p)) {
1903			error = ENXIO;
1904			goto fail;
1905		}
1906		if (ifp->if_flags & IFF_UP) {
1907			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1908				flags = p->if_flags;
1909				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1910				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
1911					if (IS_BUSY(sc)) {
1912						error = EBUSY;
1913						goto fail;
1914					}
1915					PORT_LOCK(p);
1916					cxgb_update_mac_settings(p);
1917					PORT_UNLOCK(p);
1918				}
1919				ADAPTER_UNLOCK(sc);
1920			} else
1921				error = cxgb_init_locked(p);
1922			p->if_flags = ifp->if_flags;
1923		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1924			error = cxgb_uninit_locked(p);
1925		else
1926			ADAPTER_UNLOCK(sc);
1927
1928		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1929		break;
1930	case SIOCADDMULTI:
1931	case SIOCDELMULTI:
1932		ADAPTER_LOCK(sc);
1933		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1934		if (error)
1935			goto fail;
1936
1937		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1938			PORT_LOCK(p);
1939			cxgb_update_mac_settings(p);
1940			PORT_UNLOCK(p);
1941		}
1942		ADAPTER_UNLOCK(sc);
1943
1944		break;
1945	case SIOCSIFCAP:
1946		ADAPTER_LOCK(sc);
1947		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1948		if (error)
1949			goto fail;
1950
1951		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1952		if (mask & IFCAP_TXCSUM) {
1953			ifp->if_capenable ^= IFCAP_TXCSUM;
1954			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
1955
1956			if (IFCAP_TSO4 & ifp->if_capenable &&
1957			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1958				ifp->if_capenable &= ~IFCAP_TSO4;
1959				if_printf(ifp,
1960				    "tso4 disabled due to -txcsum.\n");
1961			}
1962		}
1963		if (mask & IFCAP_TXCSUM_IPV6) {
1964			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1965			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
1966
1967			if (IFCAP_TSO6 & ifp->if_capenable &&
1968			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1969				ifp->if_capenable &= ~IFCAP_TSO6;
1970				if_printf(ifp,
1971				    "tso6 disabled due to -txcsum6.\n");
1972			}
1973		}
1974		if (mask & IFCAP_RXCSUM)
1975			ifp->if_capenable ^= IFCAP_RXCSUM;
1976		if (mask & IFCAP_RXCSUM_IPV6)
1977			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1978
1979		/*
1980		 * Note that we leave CSUM_TSO alone (it is always set).  The
1981		 * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
1982		 * sending a TSO request our way, so it's sufficient to toggle
1983		 * IFCAP_TSOx only.
1984		 */
1985		if (mask & IFCAP_TSO4) {
1986			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
1987			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1988				if_printf(ifp, "enable txcsum first.\n");
1989				error = EAGAIN;
1990				goto fail;
1991			}
1992			ifp->if_capenable ^= IFCAP_TSO4;
1993		}
1994		if (mask & IFCAP_TSO6) {
1995			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
1996			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1997				if_printf(ifp, "enable txcsum6 first.\n");
1998				error = EAGAIN;
1999				goto fail;
2000			}
2001			ifp->if_capenable ^= IFCAP_TSO6;
2002		}
2003		if (mask & IFCAP_LRO) {
2004			ifp->if_capenable ^= IFCAP_LRO;
2005
2006			/* Safe to do this even if cxgb_up not called yet */
2007			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2008		}
2009#ifdef TCP_OFFLOAD
2010		if (mask & IFCAP_TOE4) {
2011			int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE4;
2012
2013			error = toe_capability(p, enable);
2014			if (error == 0)
2015				ifp->if_capenable ^= mask;
2016		}
2017#endif
2018		if (mask & IFCAP_VLAN_HWTAGGING) {
2019			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2020			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2021				PORT_LOCK(p);
2022				cxgb_update_mac_settings(p);
2023				PORT_UNLOCK(p);
2024			}
2025		}
2026		if (mask & IFCAP_VLAN_MTU) {
2027			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2028			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2029				PORT_LOCK(p);
2030				cxgb_update_mac_settings(p);
2031				PORT_UNLOCK(p);
2032			}
2033		}
2034		if (mask & IFCAP_VLAN_HWTSO)
2035			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2036		if (mask & IFCAP_VLAN_HWCSUM)
2037			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2038
2039#ifdef VLAN_CAPABILITIES
2040		VLAN_CAPABILITIES(ifp);
2041#endif
2042		ADAPTER_UNLOCK(sc);
2043		break;
2044	case SIOCSIFMEDIA:
2045	case SIOCGIFMEDIA:
2046		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2047		break;
2048	default:
2049		error = ether_ioctl(ifp, command, data);
2050	}
2051
2052	return (error);
2053}
2054
2055static int
2056cxgb_media_change(struct ifnet *ifp)
2057{
2058	return (EOPNOTSUPP);
2059}
2060
2061/*
2062 * Translates phy->modtype to the correct Ethernet media subtype.
2063 */
2064static int
2065cxgb_ifm_type(int mod)
2066{
2067	switch (mod) {
2068	case phy_modtype_sr:
2069		return (IFM_10G_SR);
2070	case phy_modtype_lr:
2071		return (IFM_10G_LR);
2072	case phy_modtype_lrm:
2073		return (IFM_10G_LRM);
2074	case phy_modtype_twinax:
2075		return (IFM_10G_TWINAX);
2076	case phy_modtype_twinax_long:
2077		return (IFM_10G_TWINAX_LONG);
2078	case phy_modtype_none:
2079		return (IFM_NONE);
2080	case phy_modtype_unknown:
2081		return (IFM_UNKNOWN);
2082	}
2083
2084	KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2085	return (IFM_UNKNOWN);
2086}
2087
2088/*
2089 * Rebuilds the ifmedia list for this port, and sets the current media.
2090 */
2091static void
2092cxgb_build_medialist(struct port_info *p)
2093{
2094	struct cphy *phy = &p->phy;
2095	struct ifmedia *media = &p->media;
2096	int mod = phy->modtype;
2097	int m = IFM_ETHER | IFM_FDX;
2098
2099	PORT_LOCK(p);
2100
2101	ifmedia_removeall(media);
2102	if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2103		/* Copper (RJ45) */
2104
2105		if (phy->caps & SUPPORTED_10000baseT_Full)
2106			ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2107
2108		if (phy->caps & SUPPORTED_1000baseT_Full)
2109			ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2110
2111		if (phy->caps & SUPPORTED_100baseT_Full)
2112			ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2113
2114		if (phy->caps & SUPPORTED_10baseT_Full)
2115			ifmedia_add(media, m | IFM_10_T, mod, NULL);
2116
2117		ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2118		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2119
2120	} else if (phy->caps & SUPPORTED_TP) {
2121		/* Copper (CX4) */
2122
2123		KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2124			("%s: unexpected cap 0x%x", __func__, phy->caps));
2125
2126		ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2127		ifmedia_set(media, m | IFM_10G_CX4);
2128
2129	} else if (phy->caps & SUPPORTED_FIBRE &&
2130		   phy->caps & SUPPORTED_10000baseT_Full) {
2131		/* 10G optical (but includes SFP+ twinax) */
2132
2133		m |= cxgb_ifm_type(mod);
2134		if (IFM_SUBTYPE(m) == IFM_NONE)
2135			m &= ~IFM_FDX;
2136
2137		ifmedia_add(media, m, mod, NULL);
2138		ifmedia_set(media, m);
2139
2140	} else if (phy->caps & SUPPORTED_FIBRE &&
2141		   phy->caps & SUPPORTED_1000baseT_Full) {
2142		/* 1G optical */
2143
2144		/* XXX: Lie and claim to be SX, could actually be any 1G-X */
2145		ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2146		ifmedia_set(media, m | IFM_1000_SX);
2147
2148	} else {
2149		KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2150			    phy->caps));
2151	}
2152
2153	PORT_UNLOCK(p);
2154}
2155
2156static void
2157cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2158{
2159	struct port_info *p = ifp->if_softc;
2160	struct ifmedia_entry *cur = p->media.ifm_cur;
2161	int speed = p->link_config.speed;
2162
2163	if (cur->ifm_data != p->phy.modtype) {
2164		cxgb_build_medialist(p);
2165		cur = p->media.ifm_cur;
2166	}
2167
2168	ifmr->ifm_status = IFM_AVALID;
2169	if (!p->link_config.link_ok)
2170		return;
2171
2172	ifmr->ifm_status |= IFM_ACTIVE;
2173
2174	/*
2175	 * active and current will differ iff current media is autoselect.  That
2176	 * can happen only for copper RJ45.
2177	 */
2178	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2179		return;
2180	KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2181		("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2182
2183	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2184	if (speed == SPEED_10000)
2185		ifmr->ifm_active |= IFM_10G_T;
2186	else if (speed == SPEED_1000)
2187		ifmr->ifm_active |= IFM_1000_T;
2188	else if (speed == SPEED_100)
2189		ifmr->ifm_active |= IFM_100_TX;
2190	else if (speed == SPEED_10)
2191		ifmr->ifm_active |= IFM_10_T;
2192	else
2193		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2194			    speed));
2195}
2196
2197static void
2198cxgb_async_intr(void *data)
2199{
2200	adapter_t *sc = data;
2201
2202	t3_write_reg(sc, A_PL_INT_ENABLE0, 0);
2203	(void) t3_read_reg(sc, A_PL_INT_ENABLE0);
2204	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2205}
2206
2207static void
2208link_check_callout(void *arg)
2209{
2210	struct port_info *pi = arg;
2211	struct adapter *sc = pi->adapter;
2212
2213	if (!isset(&sc->open_device_map, pi->port_id))
2214		return;
2215
2216	taskqueue_enqueue(sc->tq, &pi->link_check_task);
2217}
2218
2219static void
2220check_link_status(void *arg, int pending)
2221{
2222	struct port_info *pi = arg;
2223	struct adapter *sc = pi->adapter;
2224
2225	if (!isset(&sc->open_device_map, pi->port_id))
2226		return;
2227
2228	t3_link_changed(sc, pi->port_id);
2229
2230	if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ))
2231		callout_reset(&pi->link_check_ch, hz, link_check_callout, pi);
2232}
2233
2234void
2235t3_os_link_intr(struct port_info *pi)
2236{
2237	/*
2238	 * Schedule a link check in the near future.  If the link is flapping
2239	 * rapidly we'll keep resetting the callout and delaying the check until
2240	 * things stabilize a bit.
2241	 */
2242	callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi);
2243}
2244
2245static void
2246check_t3b2_mac(struct adapter *sc)
2247{
2248	int i;
2249
2250	if (sc->flags & CXGB_SHUTDOWN)
2251		return;
2252
2253	for_each_port(sc, i) {
2254		struct port_info *p = &sc->port[i];
2255		int status;
2256#ifdef INVARIANTS
2257		struct ifnet *ifp = p->ifp;
2258#endif
2259
2260		if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2261		    !p->link_config.link_ok)
2262			continue;
2263
2264		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2265			("%s: state mismatch (drv_flags %x, device_map %x)",
2266			 __func__, ifp->if_drv_flags, sc->open_device_map));
2267
2268		PORT_LOCK(p);
2269		status = t3b2_mac_watchdog_task(&p->mac);
2270		if (status == 1)
2271			p->mac.stats.num_toggled++;
2272		else if (status == 2) {
2273			struct cmac *mac = &p->mac;
2274
2275			cxgb_update_mac_settings(p);
2276			t3_link_start(&p->phy, mac, &p->link_config);
2277			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2278			t3_port_intr_enable(sc, p->port_id);
2279			p->mac.stats.num_resets++;
2280		}
2281		PORT_UNLOCK(p);
2282	}
2283}
2284
2285static void
2286cxgb_tick(void *arg)
2287{
2288	adapter_t *sc = (adapter_t *)arg;
2289
2290	if (sc->flags & CXGB_SHUTDOWN)
2291		return;
2292
2293	taskqueue_enqueue(sc->tq, &sc->tick_task);
2294	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2295}
2296
2297static void
2298cxgb_tick_handler(void *arg, int count)
2299{
2300	adapter_t *sc = (adapter_t *)arg;
2301	const struct adapter_params *p = &sc->params;
2302	int i;
2303	uint32_t cause, reset;
2304
2305	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2306		return;
2307
2308	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2309		check_t3b2_mac(sc);
2310
2311	cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2312	if (cause) {
2313		struct sge_qset *qs = &sc->sge.qs[0];
2314		uint32_t mask, v;
2315
2316		v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2317
2318		mask = 1;
2319		for (i = 0; i < SGE_QSETS; i++) {
2320			if (v & mask)
2321				qs[i].rspq.starved++;
2322			mask <<= 1;
2323		}
2324
2325		mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2326
2327		for (i = 0; i < SGE_QSETS * 2; i++) {
2328			if (v & mask) {
2329				qs[i / 2].fl[i % 2].empty++;
2330			}
2331			mask <<= 1;
2332		}
2333
2334		/* clear */
2335		t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2336		t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2337	}
2338
2339	for (i = 0; i < sc->params.nports; i++) {
2340		struct port_info *pi = &sc->port[i];
2341		struct ifnet *ifp = pi->ifp;
2342		struct cmac *mac = &pi->mac;
2343		struct mac_stats *mstats = &mac->stats;
2344		int drops, j;
2345
2346		if (!isset(&sc->open_device_map, pi->port_id))
2347			continue;
2348
2349		PORT_LOCK(pi);
2350		t3_mac_update_stats(mac);
2351		PORT_UNLOCK(pi);
2352
2353		ifp->if_opackets = mstats->tx_frames;
2354		ifp->if_ipackets = mstats->rx_frames;
2355		ifp->if_obytes = mstats->tx_octets;
2356		ifp->if_ibytes = mstats->rx_octets;
2357		ifp->if_omcasts = mstats->tx_mcast_frames;
2358		ifp->if_imcasts = mstats->rx_mcast_frames;
2359		ifp->if_collisions = mstats->tx_total_collisions;
2360		ifp->if_iqdrops = mstats->rx_cong_drops;
2361
2362		drops = 0;
2363		for (j = pi->first_qset; j < pi->first_qset + pi->nqsets; j++)
2364			drops += sc->sge.qs[j].txq[TXQ_ETH].txq_mr->br_drops;
2365		ifp->if_snd.ifq_drops = drops;
2366
2367		ifp->if_oerrors =
2368		    mstats->tx_excess_collisions +
2369		    mstats->tx_underrun +
2370		    mstats->tx_len_errs +
2371		    mstats->tx_mac_internal_errs +
2372		    mstats->tx_excess_deferral +
2373		    mstats->tx_fcs_errs;
2374		ifp->if_ierrors =
2375		    mstats->rx_jabber +
2376		    mstats->rx_data_errs +
2377		    mstats->rx_sequence_errs +
2378		    mstats->rx_runt +
2379		    mstats->rx_too_long +
2380		    mstats->rx_mac_internal_errs +
2381		    mstats->rx_short +
2382		    mstats->rx_fcs_errs;
2383
2384		if (mac->multiport)
2385			continue;
2386
2387		/* Count rx fifo overflows, once per second */
2388		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2389		reset = 0;
2390		if (cause & F_RXFIFO_OVERFLOW) {
2391			mac->stats.rx_fifo_ovfl++;
2392			reset |= F_RXFIFO_OVERFLOW;
2393		}
2394		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2395	}
2396}
2397
2398static void
2399touch_bars(device_t dev)
2400{
2401	/*
2402	 * Don't enable yet
2403	 */
2404#if !defined(__LP64__) && 0
2405	u32 v;
2406
2407	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2408	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2409	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2410	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2411	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2412	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2413#endif
2414}
2415
2416static int
2417set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2418{
2419	uint8_t *buf;
2420	int err = 0;
2421	u32 aligned_offset, aligned_len, *p;
2422	struct adapter *adapter = pi->adapter;
2423
2424
2425	aligned_offset = offset & ~3;
2426	aligned_len = (len + (offset & 3) + 3) & ~3;
2427
2428	if (aligned_offset != offset || aligned_len != len) {
2429		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2430		if (!buf)
2431			return (ENOMEM);
2432		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2433		if (!err && aligned_len > 4)
2434			err = t3_seeprom_read(adapter,
2435					      aligned_offset + aligned_len - 4,
2436					      (u32 *)&buf[aligned_len - 4]);
2437		if (err)
2438			goto out;
2439		memcpy(buf + (offset & 3), data, len);
2440	} else
2441		buf = (uint8_t *)(uintptr_t)data;
2442
2443	err = t3_seeprom_wp(adapter, 0);
2444	if (err)
2445		goto out;
2446
2447	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2448		err = t3_seeprom_write(adapter, aligned_offset, *p);
2449		aligned_offset += 4;
2450	}
2451
2452	if (!err)
2453		err = t3_seeprom_wp(adapter, 1);
2454out:
2455	if (buf != data)
2456		free(buf, M_DEVBUF);
2457	return err;
2458}
2459
2460
2461static int
2462in_range(int val, int lo, int hi)
2463{
2464	return val < 0 || (val <= hi && val >= lo);
2465}
2466
2467static int
2468cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2469{
2470       return (0);
2471}
2472
2473static int
2474cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2475{
2476       return (0);
2477}
2478
2479static int
2480cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2481    int fflag, struct thread *td)
2482{
2483	int mmd, error = 0;
2484	struct port_info *pi = dev->si_drv1;
2485	adapter_t *sc = pi->adapter;
2486
2487#ifdef PRIV_SUPPORTED
2488	if (priv_check(td, PRIV_DRIVER)) {
2489		if (cxgb_debug)
2490			printf("user does not have access to privileged ioctls\n");
2491		return (EPERM);
2492	}
2493#else
2494	if (suser(td)) {
2495		if (cxgb_debug)
2496			printf("user does not have access to privileged ioctls\n");
2497		return (EPERM);
2498	}
2499#endif
2500
2501	switch (cmd) {
2502	case CHELSIO_GET_MIIREG: {
2503		uint32_t val;
2504		struct cphy *phy = &pi->phy;
2505		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2506
2507		if (!phy->mdio_read)
2508			return (EOPNOTSUPP);
2509		if (is_10G(sc)) {
2510			mmd = mid->phy_id >> 8;
2511			if (!mmd)
2512				mmd = MDIO_DEV_PCS;
2513			else if (mmd > MDIO_DEV_VEND2)
2514				return (EINVAL);
2515
2516			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2517					     mid->reg_num, &val);
2518		} else
2519		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2520					     mid->reg_num & 0x1f, &val);
2521		if (error == 0)
2522			mid->val_out = val;
2523		break;
2524	}
2525	case CHELSIO_SET_MIIREG: {
2526		struct cphy *phy = &pi->phy;
2527		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2528
2529		if (!phy->mdio_write)
2530			return (EOPNOTSUPP);
2531		if (is_10G(sc)) {
2532			mmd = mid->phy_id >> 8;
2533			if (!mmd)
2534				mmd = MDIO_DEV_PCS;
2535			else if (mmd > MDIO_DEV_VEND2)
2536				return (EINVAL);
2537
2538			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2539					      mmd, mid->reg_num, mid->val_in);
2540		} else
2541			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2542					      mid->reg_num & 0x1f,
2543					      mid->val_in);
2544		break;
2545	}
2546	case CHELSIO_SETREG: {
2547		struct ch_reg *edata = (struct ch_reg *)data;
2548		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2549			return (EFAULT);
2550		t3_write_reg(sc, edata->addr, edata->val);
2551		break;
2552	}
2553	case CHELSIO_GETREG: {
2554		struct ch_reg *edata = (struct ch_reg *)data;
2555		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2556			return (EFAULT);
2557		edata->val = t3_read_reg(sc, edata->addr);
2558		break;
2559	}
2560	case CHELSIO_GET_SGE_CONTEXT: {
2561		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2562		mtx_lock_spin(&sc->sge.reg_lock);
2563		switch (ecntxt->cntxt_type) {
2564		case CNTXT_TYPE_EGRESS:
2565			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2566			    ecntxt->data);
2567			break;
2568		case CNTXT_TYPE_FL:
2569			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2570			    ecntxt->data);
2571			break;
2572		case CNTXT_TYPE_RSP:
2573			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2574			    ecntxt->data);
2575			break;
2576		case CNTXT_TYPE_CQ:
2577			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2578			    ecntxt->data);
2579			break;
2580		default:
2581			error = EINVAL;
2582			break;
2583		}
2584		mtx_unlock_spin(&sc->sge.reg_lock);
2585		break;
2586	}
2587	case CHELSIO_GET_SGE_DESC: {
2588		struct ch_desc *edesc = (struct ch_desc *)data;
2589		int ret;
2590		if (edesc->queue_num >= SGE_QSETS * 6)
2591			return (EINVAL);
2592		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2593		    edesc->queue_num % 6, edesc->idx, edesc->data);
2594		if (ret < 0)
2595			return (EINVAL);
2596		edesc->size = ret;
2597		break;
2598	}
2599	case CHELSIO_GET_QSET_PARAMS: {
2600		struct qset_params *q;
2601		struct ch_qset_params *t = (struct ch_qset_params *)data;
2602		int q1 = pi->first_qset;
2603		int nqsets = pi->nqsets;
2604		int i;
2605
2606		if (t->qset_idx >= nqsets)
2607			return EINVAL;
2608
2609		i = q1 + t->qset_idx;
2610		q = &sc->params.sge.qset[i];
2611		t->rspq_size   = q->rspq_size;
2612		t->txq_size[0] = q->txq_size[0];
2613		t->txq_size[1] = q->txq_size[1];
2614		t->txq_size[2] = q->txq_size[2];
2615		t->fl_size[0]  = q->fl_size;
2616		t->fl_size[1]  = q->jumbo_size;
2617		t->polling     = q->polling;
2618		t->lro         = q->lro;
2619		t->intr_lat    = q->coalesce_usecs;
2620		t->cong_thres  = q->cong_thres;
2621		t->qnum        = i;
2622
2623		if ((sc->flags & FULL_INIT_DONE) == 0)
2624			t->vector = 0;
2625		else if (sc->flags & USING_MSIX)
2626			t->vector = rman_get_start(sc->msix_irq_res[i]);
2627		else
2628			t->vector = rman_get_start(sc->irq_res);
2629
2630		break;
2631	}
2632	case CHELSIO_GET_QSET_NUM: {
2633		struct ch_reg *edata = (struct ch_reg *)data;
2634		edata->val = pi->nqsets;
2635		break;
2636	}
2637	case CHELSIO_LOAD_FW: {
2638		uint8_t *fw_data;
2639		uint32_t vers;
2640		struct ch_mem_range *t = (struct ch_mem_range *)data;
2641
2642		/*
2643		 * You're allowed to load a firmware only before FULL_INIT_DONE
2644		 *
2645		 * FW_UPTODATE is also set so the rest of the initialization
2646		 * will not overwrite what was loaded here.  This gives you the
2647		 * flexibility to load any firmware (and maybe shoot yourself in
2648		 * the foot).
2649		 */
2650
2651		ADAPTER_LOCK(sc);
2652		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2653			ADAPTER_UNLOCK(sc);
2654			return (EBUSY);
2655		}
2656
2657		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2658		if (!fw_data)
2659			error = ENOMEM;
2660		else
2661			error = copyin(t->buf, fw_data, t->len);
2662
2663		if (!error)
2664			error = -t3_load_fw(sc, fw_data, t->len);
2665
2666		if (t3_get_fw_version(sc, &vers) == 0) {
2667			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2668			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2669			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2670		}
2671
2672		if (!error)
2673			sc->flags |= FW_UPTODATE;
2674
2675		free(fw_data, M_DEVBUF);
2676		ADAPTER_UNLOCK(sc);
2677		break;
2678	}
2679	case CHELSIO_LOAD_BOOT: {
2680		uint8_t *boot_data;
2681		struct ch_mem_range *t = (struct ch_mem_range *)data;
2682
2683		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2684		if (!boot_data)
2685			return ENOMEM;
2686
2687		error = copyin(t->buf, boot_data, t->len);
2688		if (!error)
2689			error = -t3_load_boot(sc, boot_data, t->len);
2690
2691		free(boot_data, M_DEVBUF);
2692		break;
2693	}
2694	case CHELSIO_GET_PM: {
2695		struct ch_pm *m = (struct ch_pm *)data;
2696		struct tp_params *p = &sc->params.tp;
2697
2698		if (!is_offload(sc))
2699			return (EOPNOTSUPP);
2700
2701		m->tx_pg_sz = p->tx_pg_size;
2702		m->tx_num_pg = p->tx_num_pgs;
2703		m->rx_pg_sz  = p->rx_pg_size;
2704		m->rx_num_pg = p->rx_num_pgs;
2705		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2706
2707		break;
2708	}
2709	case CHELSIO_SET_PM: {
2710		struct ch_pm *m = (struct ch_pm *)data;
2711		struct tp_params *p = &sc->params.tp;
2712
2713		if (!is_offload(sc))
2714			return (EOPNOTSUPP);
2715		if (sc->flags & FULL_INIT_DONE)
2716			return (EBUSY);
2717
2718		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2719		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2720			return (EINVAL);	/* not power of 2 */
2721		if (!(m->rx_pg_sz & 0x14000))
2722			return (EINVAL);	/* not 16KB or 64KB */
2723		if (!(m->tx_pg_sz & 0x1554000))
2724			return (EINVAL);
2725		if (m->tx_num_pg == -1)
2726			m->tx_num_pg = p->tx_num_pgs;
2727		if (m->rx_num_pg == -1)
2728			m->rx_num_pg = p->rx_num_pgs;
2729		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2730			return (EINVAL);
2731		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2732		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2733			return (EINVAL);
2734
2735		p->rx_pg_size = m->rx_pg_sz;
2736		p->tx_pg_size = m->tx_pg_sz;
2737		p->rx_num_pgs = m->rx_num_pg;
2738		p->tx_num_pgs = m->tx_num_pg;
2739		break;
2740	}
2741	case CHELSIO_SETMTUTAB: {
2742		struct ch_mtus *m = (struct ch_mtus *)data;
2743		int i;
2744
2745		if (!is_offload(sc))
2746			return (EOPNOTSUPP);
2747		if (offload_running(sc))
2748			return (EBUSY);
2749		if (m->nmtus != NMTUS)
2750			return (EINVAL);
2751		if (m->mtus[0] < 81)         /* accommodate SACK */
2752			return (EINVAL);
2753
2754		/*
2755		 * MTUs must be in ascending order
2756		 */
2757		for (i = 1; i < NMTUS; ++i)
2758			if (m->mtus[i] < m->mtus[i - 1])
2759				return (EINVAL);
2760
2761		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2762		break;
2763	}
2764	case CHELSIO_GETMTUTAB: {
2765		struct ch_mtus *m = (struct ch_mtus *)data;
2766
2767		if (!is_offload(sc))
2768			return (EOPNOTSUPP);
2769
2770		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2771		m->nmtus = NMTUS;
2772		break;
2773	}
2774	case CHELSIO_GET_MEM: {
2775		struct ch_mem_range *t = (struct ch_mem_range *)data;
2776		struct mc7 *mem;
2777		uint8_t *useraddr;
2778		u64 buf[32];
2779
2780		/*
2781		 * Use these to avoid modifying len/addr in the return
2782		 * struct
2783		 */
2784		uint32_t len = t->len, addr = t->addr;
2785
2786		if (!is_offload(sc))
2787			return (EOPNOTSUPP);
2788		if (!(sc->flags & FULL_INIT_DONE))
2789			return (EIO);         /* need the memory controllers */
2790		if ((addr & 0x7) || (len & 0x7))
2791			return (EINVAL);
2792		if (t->mem_id == MEM_CM)
2793			mem = &sc->cm;
2794		else if (t->mem_id == MEM_PMRX)
2795			mem = &sc->pmrx;
2796		else if (t->mem_id == MEM_PMTX)
2797			mem = &sc->pmtx;
2798		else
2799			return (EINVAL);
2800
2801		/*
2802		 * Version scheme:
2803		 * bits 0..9: chip version
2804		 * bits 10..15: chip revision
2805		 */
2806		t->version = 3 | (sc->params.rev << 10);
2807
2808		/*
2809		 * Read 256 bytes at a time as len can be large and we don't
2810		 * want to use huge intermediate buffers.
2811		 */
2812		useraddr = (uint8_t *)t->buf;
2813		while (len) {
2814			unsigned int chunk = min(len, sizeof(buf));
2815
2816			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2817			if (error)
2818				return (-error);
2819			if (copyout(buf, useraddr, chunk))
2820				return (EFAULT);
2821			useraddr += chunk;
2822			addr += chunk;
2823			len -= chunk;
2824		}
2825		break;
2826	}
2827	case CHELSIO_READ_TCAM_WORD: {
2828		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2829
2830		if (!is_offload(sc))
2831			return (EOPNOTSUPP);
2832		if (!(sc->flags & FULL_INIT_DONE))
2833			return (EIO);         /* need MC5 */
2834		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2835		break;
2836	}
2837	case CHELSIO_SET_TRACE_FILTER: {
2838		struct ch_trace *t = (struct ch_trace *)data;
2839		const struct trace_params *tp;
2840
2841		tp = (const struct trace_params *)&t->sip;
2842		if (t->config_tx)
2843			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2844					       t->trace_tx);
2845		if (t->config_rx)
2846			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2847					       t->trace_rx);
2848		break;
2849	}
2850	case CHELSIO_SET_PKTSCHED: {
2851		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2852		if (sc->open_device_map == 0)
2853			return (EAGAIN);
2854		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2855		    p->binding);
2856		break;
2857	}
2858	case CHELSIO_IFCONF_GETREGS: {
2859		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2860		int reglen = cxgb_get_regs_len();
2861		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2862		if (buf == NULL) {
2863			return (ENOMEM);
2864		}
2865		if (regs->len > reglen)
2866			regs->len = reglen;
2867		else if (regs->len < reglen)
2868			error = ENOBUFS;
2869
2870		if (!error) {
2871			cxgb_get_regs(sc, regs, buf);
2872			error = copyout(buf, regs->data, reglen);
2873		}
2874		free(buf, M_DEVBUF);
2875
2876		break;
2877	}
2878	case CHELSIO_SET_HW_SCHED: {
2879		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2880		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2881
2882		if ((sc->flags & FULL_INIT_DONE) == 0)
2883			return (EAGAIN);       /* need TP to be initialized */
2884		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2885		    !in_range(t->channel, 0, 1) ||
2886		    !in_range(t->kbps, 0, 10000000) ||
2887		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2888		    !in_range(t->flow_ipg, 0,
2889			      dack_ticks_to_usec(sc, 0x7ff)))
2890			return (EINVAL);
2891
2892		if (t->kbps >= 0) {
2893			error = t3_config_sched(sc, t->kbps, t->sched);
2894			if (error < 0)
2895				return (-error);
2896		}
2897		if (t->class_ipg >= 0)
2898			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2899		if (t->flow_ipg >= 0) {
2900			t->flow_ipg *= 1000;     /* us -> ns */
2901			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2902		}
2903		if (t->mode >= 0) {
2904			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2905
2906			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2907					 bit, t->mode ? bit : 0);
2908		}
2909		if (t->channel >= 0)
2910			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2911					 1 << t->sched, t->channel << t->sched);
2912		break;
2913	}
2914	case CHELSIO_GET_EEPROM: {
2915		int i;
2916		struct ch_eeprom *e = (struct ch_eeprom *)data;
2917		uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2918
2919		if (buf == NULL) {
2920			return (ENOMEM);
2921		}
2922		e->magic = EEPROM_MAGIC;
2923		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
2924			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
2925
2926		if (!error)
2927			error = copyout(buf + e->offset, e->data, e->len);
2928
2929		free(buf, M_DEVBUF);
2930		break;
2931	}
2932	case CHELSIO_CLEAR_STATS: {
2933		if (!(sc->flags & FULL_INIT_DONE))
2934			return EAGAIN;
2935
2936		PORT_LOCK(pi);
2937		t3_mac_update_stats(&pi->mac);
2938		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
2939		PORT_UNLOCK(pi);
2940		break;
2941	}
2942	case CHELSIO_GET_UP_LA: {
2943		struct ch_up_la *la = (struct ch_up_la *)data;
2944		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
2945		if (buf == NULL) {
2946			return (ENOMEM);
2947		}
2948		if (la->bufsize < LA_BUFSIZE)
2949			error = ENOBUFS;
2950
2951		if (!error)
2952			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
2953					      &la->bufsize, buf);
2954		if (!error)
2955			error = copyout(buf, la->data, la->bufsize);
2956
2957		free(buf, M_DEVBUF);
2958		break;
2959	}
2960	case CHELSIO_GET_UP_IOQS: {
2961		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
2962		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
2963		uint32_t *v;
2964
2965		if (buf == NULL) {
2966			return (ENOMEM);
2967		}
2968		if (ioqs->bufsize < IOQS_BUFSIZE)
2969			error = ENOBUFS;
2970
2971		if (!error)
2972			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
2973
2974		if (!error) {
2975			v = (uint32_t *)buf;
2976
2977			ioqs->ioq_rx_enable = *v++;
2978			ioqs->ioq_tx_enable = *v++;
2979			ioqs->ioq_rx_status = *v++;
2980			ioqs->ioq_tx_status = *v++;
2981
2982			error = copyout(v, ioqs->data, ioqs->bufsize);
2983		}
2984
2985		free(buf, M_DEVBUF);
2986		break;
2987	}
2988	case CHELSIO_SET_FILTER: {
2989		struct ch_filter *f = (struct ch_filter *)data;
2990		struct filter_info *p;
2991		unsigned int nfilters = sc->params.mc5.nfilters;
2992
2993		if (!is_offload(sc))
2994			return (EOPNOTSUPP);	/* No TCAM */
2995		if (!(sc->flags & FULL_INIT_DONE))
2996			return (EAGAIN);	/* mc5 not setup yet */
2997		if (nfilters == 0)
2998			return (EBUSY);		/* TOE will use TCAM */
2999
3000		/* sanity checks */
3001		if (f->filter_id >= nfilters ||
3002		    (f->val.dip && f->mask.dip != 0xffffffff) ||
3003		    (f->val.sport && f->mask.sport != 0xffff) ||
3004		    (f->val.dport && f->mask.dport != 0xffff) ||
3005		    (f->val.vlan && f->mask.vlan != 0xfff) ||
3006		    (f->val.vlan_prio &&
3007			f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3008		    (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3009		    f->qset >= SGE_QSETS ||
3010		    sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3011			return (EINVAL);
3012
3013		/* Was allocated with M_WAITOK */
3014		KASSERT(sc->filters, ("filter table NULL\n"));
3015
3016		p = &sc->filters[f->filter_id];
3017		if (p->locked)
3018			return (EPERM);
3019
3020		bzero(p, sizeof(*p));
3021		p->sip = f->val.sip;
3022		p->sip_mask = f->mask.sip;
3023		p->dip = f->val.dip;
3024		p->sport = f->val.sport;
3025		p->dport = f->val.dport;
3026		p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3027		p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3028		    FILTER_NO_VLAN_PRI;
3029		p->mac_hit = f->mac_hit;
3030		p->mac_vld = f->mac_addr_idx != 0xffff;
3031		p->mac_idx = f->mac_addr_idx;
3032		p->pkt_type = f->proto;
3033		p->report_filter_id = f->want_filter_id;
3034		p->pass = f->pass;
3035		p->rss = f->rss;
3036		p->qset = f->qset;
3037
3038		error = set_filter(sc, f->filter_id, p);
3039		if (error == 0)
3040			p->valid = 1;
3041		break;
3042	}
3043	case CHELSIO_DEL_FILTER: {
3044		struct ch_filter *f = (struct ch_filter *)data;
3045		struct filter_info *p;
3046		unsigned int nfilters = sc->params.mc5.nfilters;
3047
3048		if (!is_offload(sc))
3049			return (EOPNOTSUPP);
3050		if (!(sc->flags & FULL_INIT_DONE))
3051			return (EAGAIN);
3052		if (nfilters == 0 || sc->filters == NULL)
3053			return (EINVAL);
3054		if (f->filter_id >= nfilters)
3055		       return (EINVAL);
3056
3057		p = &sc->filters[f->filter_id];
3058		if (p->locked)
3059			return (EPERM);
3060		if (!p->valid)
3061			return (EFAULT); /* Read "Bad address" as "Bad index" */
3062
3063		bzero(p, sizeof(*p));
3064		p->sip = p->sip_mask = 0xffffffff;
3065		p->vlan = 0xfff;
3066		p->vlan_prio = FILTER_NO_VLAN_PRI;
3067		p->pkt_type = 1;
3068		error = set_filter(sc, f->filter_id, p);
3069		break;
3070	}
3071	case CHELSIO_GET_FILTER: {
3072		struct ch_filter *f = (struct ch_filter *)data;
3073		struct filter_info *p;
3074		unsigned int i, nfilters = sc->params.mc5.nfilters;
3075
3076		if (!is_offload(sc))
3077			return (EOPNOTSUPP);
3078		if (!(sc->flags & FULL_INIT_DONE))
3079			return (EAGAIN);
3080		if (nfilters == 0 || sc->filters == NULL)
3081			return (EINVAL);
3082
3083		i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3084		for (; i < nfilters; i++) {
3085			p = &sc->filters[i];
3086			if (!p->valid)
3087				continue;
3088
3089			bzero(f, sizeof(*f));
3090
3091			f->filter_id = i;
3092			f->val.sip = p->sip;
3093			f->mask.sip = p->sip_mask;
3094			f->val.dip = p->dip;
3095			f->mask.dip = p->dip ? 0xffffffff : 0;
3096			f->val.sport = p->sport;
3097			f->mask.sport = p->sport ? 0xffff : 0;
3098			f->val.dport = p->dport;
3099			f->mask.dport = p->dport ? 0xffff : 0;
3100			f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3101			f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3102			f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3103			    0 : p->vlan_prio;
3104			f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3105			    0 : FILTER_NO_VLAN_PRI;
3106			f->mac_hit = p->mac_hit;
3107			f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3108			f->proto = p->pkt_type;
3109			f->want_filter_id = p->report_filter_id;
3110			f->pass = p->pass;
3111			f->rss = p->rss;
3112			f->qset = p->qset;
3113
3114			break;
3115		}
3116
3117		if (i == nfilters)
3118			f->filter_id = 0xffffffff;
3119		break;
3120	}
3121	default:
3122		return (EOPNOTSUPP);
3123		break;
3124	}
3125
3126	return (error);
3127}
3128
3129static __inline void
3130reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3131    unsigned int end)
3132{
3133	uint32_t *p = (uint32_t *)(buf + start);
3134
3135	for ( ; start <= end; start += sizeof(uint32_t))
3136		*p++ = t3_read_reg(ap, start);
3137}
3138
3139#define T3_REGMAP_SIZE (3 * 1024)
3140static int
3141cxgb_get_regs_len(void)
3142{
3143	return T3_REGMAP_SIZE;
3144}
3145
3146static void
3147cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3148{
3149
3150	/*
3151	 * Version scheme:
3152	 * bits 0..9: chip version
3153	 * bits 10..15: chip revision
3154	 * bit 31: set for PCIe cards
3155	 */
3156	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3157
3158	/*
3159	 * We skip the MAC statistics registers because they are clear-on-read.
3160	 * Also reading multi-register stats would need to synchronize with the
3161	 * periodic mac stats accumulation.  Hard to justify the complexity.
3162	 */
3163	memset(buf, 0, cxgb_get_regs_len());
3164	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3165	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3166	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3167	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3168	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3169	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3170		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3171	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3172		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3173}
3174
3175static int
3176alloc_filters(struct adapter *sc)
3177{
3178	struct filter_info *p;
3179	unsigned int nfilters = sc->params.mc5.nfilters;
3180
3181	if (nfilters == 0)
3182		return (0);
3183
3184	p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3185	sc->filters = p;
3186
3187	p = &sc->filters[nfilters - 1];
3188	p->vlan = 0xfff;
3189	p->vlan_prio = FILTER_NO_VLAN_PRI;
3190	p->pass = p->rss = p->valid = p->locked = 1;
3191
3192	return (0);
3193}
3194
3195static int
3196setup_hw_filters(struct adapter *sc)
3197{
3198	int i, rc;
3199	unsigned int nfilters = sc->params.mc5.nfilters;
3200
3201	if (!sc->filters)
3202		return (0);
3203
3204	t3_enable_filters(sc);
3205
3206	for (i = rc = 0; i < nfilters && !rc; i++) {
3207		if (sc->filters[i].locked)
3208			rc = set_filter(sc, i, &sc->filters[i]);
3209	}
3210
3211	return (rc);
3212}
3213
3214static int
3215set_filter(struct adapter *sc, int id, const struct filter_info *f)
3216{
3217	int len;
3218	struct mbuf *m;
3219	struct ulp_txpkt *txpkt;
3220	struct work_request_hdr *wr;
3221	struct cpl_pass_open_req *oreq;
3222	struct cpl_set_tcb_field *sreq;
3223
3224	len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3225	KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3226
3227	id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3228	      sc->params.mc5.nfilters;
3229
3230	m = m_gethdr(M_WAITOK, MT_DATA);
3231	m->m_len = m->m_pkthdr.len = len;
3232	bzero(mtod(m, char *), len);
3233
3234	wr = mtod(m, struct work_request_hdr *);
3235	wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3236
3237	oreq = (struct cpl_pass_open_req *)(wr + 1);
3238	txpkt = (struct ulp_txpkt *)oreq;
3239	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3240	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3241	OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3242	oreq->local_port = htons(f->dport);
3243	oreq->peer_port = htons(f->sport);
3244	oreq->local_ip = htonl(f->dip);
3245	oreq->peer_ip = htonl(f->sip);
3246	oreq->peer_netmask = htonl(f->sip_mask);
3247	oreq->opt0h = 0;
3248	oreq->opt0l = htonl(F_NO_OFFLOAD);
3249	oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3250			 V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3251			 V_VLAN_PRI(f->vlan_prio >> 1) |
3252			 V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3253			 V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3254			 V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3255
3256	sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3257	set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3258			  (f->report_filter_id << 15) | (1 << 23) |
3259			  ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3260	set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3261	t3_mgmt_tx(sc, m);
3262
3263	if (f->pass && !f->rss) {
3264		len = sizeof(*sreq);
3265		m = m_gethdr(M_WAITOK, MT_DATA);
3266		m->m_len = m->m_pkthdr.len = len;
3267		bzero(mtod(m, char *), len);
3268		sreq = mtod(m, struct cpl_set_tcb_field *);
3269		sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3270		mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3271				 (u64)sc->rrss_map[f->qset] << 19);
3272		t3_mgmt_tx(sc, m);
3273	}
3274	return 0;
3275}
3276
3277static inline void
3278mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3279    unsigned int word, u64 mask, u64 val)
3280{
3281	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3282	req->reply = V_NO_REPLY(1);
3283	req->cpu_idx = 0;
3284	req->word = htons(word);
3285	req->mask = htobe64(mask);
3286	req->val = htobe64(val);
3287}
3288
3289static inline void
3290set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3291    unsigned int word, u64 mask, u64 val)
3292{
3293	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3294
3295	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3296	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3297	mk_set_tcb_field(req, tid, word, mask, val);
3298}
3299
3300void
3301t3_iterate(void (*func)(struct adapter *, void *), void *arg)
3302{
3303	struct adapter *sc;
3304
3305	mtx_lock(&t3_list_lock);
3306	SLIST_FOREACH(sc, &t3_list, link) {
3307		/*
3308		 * func should not make any assumptions about what state sc is
3309		 * in - the only guarantee is that sc->sc_lock is a valid lock.
3310		 */
3311		func(sc, arg);
3312	}
3313	mtx_unlock(&t3_list_lock);
3314}
3315
3316#ifdef TCP_OFFLOAD
3317static int
3318toe_capability(struct port_info *pi, int enable)
3319{
3320	int rc;
3321	struct adapter *sc = pi->adapter;
3322
3323	ADAPTER_LOCK_ASSERT_OWNED(sc);
3324
3325	if (!is_offload(sc))
3326		return (ENODEV);
3327
3328	if (enable) {
3329		if (!(sc->flags & FULL_INIT_DONE)) {
3330			log(LOG_WARNING,
3331			    "You must enable a cxgb interface first\n");
3332			return (EAGAIN);
3333		}
3334
3335		if (isset(&sc->offload_map, pi->port_id))
3336			return (0);
3337
3338		if (!(sc->flags & TOM_INIT_DONE)) {
3339			rc = t3_activate_uld(sc, ULD_TOM);
3340			if (rc == EAGAIN) {
3341				log(LOG_WARNING,
3342				    "You must kldload t3_tom.ko before trying "
3343				    "to enable TOE on a cxgb interface.\n");
3344			}
3345			if (rc != 0)
3346				return (rc);
3347			KASSERT(sc->tom_softc != NULL,
3348			    ("%s: TOM activated but softc NULL", __func__));
3349			KASSERT(sc->flags & TOM_INIT_DONE,
3350			    ("%s: TOM activated but flag not set", __func__));
3351		}
3352
3353		setbit(&sc->offload_map, pi->port_id);
3354
3355		/*
3356		 * XXX: Temporary code to allow iWARP to be enabled when TOE is
3357		 * enabled on any port.  Need to figure out how to enable,
3358		 * disable, load, and unload iWARP cleanly.
3359		 */
3360		if (!isset(&sc->offload_map, MAX_NPORTS) &&
3361		    t3_activate_uld(sc, ULD_IWARP) == 0)
3362			setbit(&sc->offload_map, MAX_NPORTS);
3363	} else {
3364		if (!isset(&sc->offload_map, pi->port_id))
3365			return (0);
3366
3367		KASSERT(sc->flags & TOM_INIT_DONE,
3368		    ("%s: TOM never initialized?", __func__));
3369		clrbit(&sc->offload_map, pi->port_id);
3370	}
3371
3372	return (0);
3373}
3374
3375/*
3376 * Add an upper layer driver to the global list.
3377 */
3378int
3379t3_register_uld(struct uld_info *ui)
3380{
3381	int rc = 0;
3382	struct uld_info *u;
3383
3384	mtx_lock(&t3_uld_list_lock);
3385	SLIST_FOREACH(u, &t3_uld_list, link) {
3386	    if (u->uld_id == ui->uld_id) {
3387		    rc = EEXIST;
3388		    goto done;
3389	    }
3390	}
3391
3392	SLIST_INSERT_HEAD(&t3_uld_list, ui, link);
3393	ui->refcount = 0;
3394done:
3395	mtx_unlock(&t3_uld_list_lock);
3396	return (rc);
3397}
3398
3399int
3400t3_unregister_uld(struct uld_info *ui)
3401{
3402	int rc = EINVAL;
3403	struct uld_info *u;
3404
3405	mtx_lock(&t3_uld_list_lock);
3406
3407	SLIST_FOREACH(u, &t3_uld_list, link) {
3408	    if (u == ui) {
3409		    if (ui->refcount > 0) {
3410			    rc = EBUSY;
3411			    goto done;
3412		    }
3413
3414		    SLIST_REMOVE(&t3_uld_list, ui, uld_info, link);
3415		    rc = 0;
3416		    goto done;
3417	    }
3418	}
3419done:
3420	mtx_unlock(&t3_uld_list_lock);
3421	return (rc);
3422}
3423
3424int
3425t3_activate_uld(struct adapter *sc, int id)
3426{
3427	int rc = EAGAIN;
3428	struct uld_info *ui;
3429
3430	mtx_lock(&t3_uld_list_lock);
3431
3432	SLIST_FOREACH(ui, &t3_uld_list, link) {
3433		if (ui->uld_id == id) {
3434			rc = ui->activate(sc);
3435			if (rc == 0)
3436				ui->refcount++;
3437			goto done;
3438		}
3439	}
3440done:
3441	mtx_unlock(&t3_uld_list_lock);
3442
3443	return (rc);
3444}
3445
3446int
3447t3_deactivate_uld(struct adapter *sc, int id)
3448{
3449	int rc = EINVAL;
3450	struct uld_info *ui;
3451
3452	mtx_lock(&t3_uld_list_lock);
3453
3454	SLIST_FOREACH(ui, &t3_uld_list, link) {
3455		if (ui->uld_id == id) {
3456			rc = ui->deactivate(sc);
3457			if (rc == 0)
3458				ui->refcount--;
3459			goto done;
3460		}
3461	}
3462done:
3463	mtx_unlock(&t3_uld_list_lock);
3464
3465	return (rc);
3466}
3467
3468static int
3469cpl_not_handled(struct sge_qset *qs __unused, struct rsp_desc *r __unused,
3470    struct mbuf *m)
3471{
3472	m_freem(m);
3473	return (EDOOFUS);
3474}
3475
3476int
3477t3_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h)
3478{
3479	uintptr_t *loc, new;
3480
3481	if (opcode >= NUM_CPL_HANDLERS)
3482		return (EINVAL);
3483
3484	new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled;
3485	loc = (uintptr_t *) &sc->cpl_handler[opcode];
3486	atomic_store_rel_ptr(loc, new);
3487
3488	return (0);
3489}
3490#endif
3491
3492static int
3493cxgbc_mod_event(module_t mod, int cmd, void *arg)
3494{
3495	int rc = 0;
3496
3497	switch (cmd) {
3498	case MOD_LOAD:
3499		mtx_init(&t3_list_lock, "T3 adapters", 0, MTX_DEF);
3500		SLIST_INIT(&t3_list);
3501#ifdef TCP_OFFLOAD
3502		mtx_init(&t3_uld_list_lock, "T3 ULDs", 0, MTX_DEF);
3503		SLIST_INIT(&t3_uld_list);
3504#endif
3505		break;
3506
3507	case MOD_UNLOAD:
3508#ifdef TCP_OFFLOAD
3509		mtx_lock(&t3_uld_list_lock);
3510		if (!SLIST_EMPTY(&t3_uld_list)) {
3511			rc = EBUSY;
3512			mtx_unlock(&t3_uld_list_lock);
3513			break;
3514		}
3515		mtx_unlock(&t3_uld_list_lock);
3516		mtx_destroy(&t3_uld_list_lock);
3517#endif
3518		mtx_lock(&t3_list_lock);
3519		if (!SLIST_EMPTY(&t3_list)) {
3520			rc = EBUSY;
3521			mtx_unlock(&t3_list_lock);
3522			break;
3523		}
3524		mtx_unlock(&t3_list_lock);
3525		mtx_destroy(&t3_list_lock);
3526		break;
3527	}
3528
3529	return (rc);
3530}
3531