cxgb_main.c revision 271856
1/**************************************************************************
2
3Copyright (c) 2007-2009, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13    contributors may be used to endorse or promote products derived from
14    this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_main.c 271856 2014-09-19 09:01:19Z glebius $");
32
33#include "opt_inet.h"
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/bus.h>
39#include <sys/module.h>
40#include <sys/pciio.h>
41#include <sys/conf.h>
42#include <machine/bus.h>
43#include <machine/resource.h>
44#include <sys/bus_dma.h>
45#include <sys/ktr.h>
46#include <sys/rman.h>
47#include <sys/ioccom.h>
48#include <sys/mbuf.h>
49#include <sys/linker.h>
50#include <sys/firmware.h>
51#include <sys/socket.h>
52#include <sys/sockio.h>
53#include <sys/smp.h>
54#include <sys/sysctl.h>
55#include <sys/syslog.h>
56#include <sys/queue.h>
57#include <sys/taskqueue.h>
58#include <sys/proc.h>
59
60#include <net/bpf.h>
61#include <net/ethernet.h>
62#include <net/if.h>
63#include <net/if_var.h>
64#include <net/if_arp.h>
65#include <net/if_dl.h>
66#include <net/if_media.h>
67#include <net/if_types.h>
68#include <net/if_vlan_var.h>
69
70#include <netinet/in_systm.h>
71#include <netinet/in.h>
72#include <netinet/if_ether.h>
73#include <netinet/ip.h>
74#include <netinet/ip.h>
75#include <netinet/tcp.h>
76#include <netinet/udp.h>
77
78#include <dev/pci/pcireg.h>
79#include <dev/pci/pcivar.h>
80#include <dev/pci/pci_private.h>
81
82#include <cxgb_include.h>
83
84#ifdef PRIV_SUPPORTED
85#include <sys/priv.h>
86#endif
87
88static int cxgb_setup_interrupts(adapter_t *);
89static void cxgb_teardown_interrupts(adapter_t *);
90static void cxgb_init(void *);
91static int cxgb_init_locked(struct port_info *);
92static int cxgb_uninit_locked(struct port_info *);
93static int cxgb_uninit_synchronized(struct port_info *);
94static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
95static int cxgb_media_change(struct ifnet *);
96static int cxgb_ifm_type(int);
97static void cxgb_build_medialist(struct port_info *);
98static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
99static int setup_sge_qsets(adapter_t *);
100static void cxgb_async_intr(void *);
101static void cxgb_tick_handler(void *, int);
102static void cxgb_tick(void *);
103static void link_check_callout(void *);
104static void check_link_status(void *, int);
105static void setup_rss(adapter_t *sc);
106static int alloc_filters(struct adapter *);
107static int setup_hw_filters(struct adapter *);
108static int set_filter(struct adapter *, int, const struct filter_info *);
109static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
110    unsigned int, u64, u64);
111static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
112    unsigned int, u64, u64);
113#ifdef TCP_OFFLOAD
114static int cpl_not_handled(struct sge_qset *, struct rsp_desc *, struct mbuf *);
115#endif
116
117/* Attachment glue for the PCI controller end of the device.  Each port of
118 * the device is attached separately, as defined later.
119 */
120static int cxgb_controller_probe(device_t);
121static int cxgb_controller_attach(device_t);
122static int cxgb_controller_detach(device_t);
123static void cxgb_free(struct adapter *);
124static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
125    unsigned int end);
126static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
127static int cxgb_get_regs_len(void);
128static void touch_bars(device_t dev);
129static void cxgb_update_mac_settings(struct port_info *p);
130#ifdef TCP_OFFLOAD
131static int toe_capability(struct port_info *, int);
132#endif
133
134static device_method_t cxgb_controller_methods[] = {
135	DEVMETHOD(device_probe,		cxgb_controller_probe),
136	DEVMETHOD(device_attach,	cxgb_controller_attach),
137	DEVMETHOD(device_detach,	cxgb_controller_detach),
138
139	DEVMETHOD_END
140};
141
142static driver_t cxgb_controller_driver = {
143	"cxgbc",
144	cxgb_controller_methods,
145	sizeof(struct adapter)
146};
147
148static int cxgbc_mod_event(module_t, int, void *);
149static devclass_t	cxgb_controller_devclass;
150DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass,
151    cxgbc_mod_event, 0);
152MODULE_VERSION(cxgbc, 1);
153MODULE_DEPEND(cxgbc, firmware, 1, 1, 1);
154
155/*
156 * Attachment glue for the ports.  Attachment is done directly to the
157 * controller device.
158 */
159static int cxgb_port_probe(device_t);
160static int cxgb_port_attach(device_t);
161static int cxgb_port_detach(device_t);
162
163static device_method_t cxgb_port_methods[] = {
164	DEVMETHOD(device_probe,		cxgb_port_probe),
165	DEVMETHOD(device_attach,	cxgb_port_attach),
166	DEVMETHOD(device_detach,	cxgb_port_detach),
167	{ 0, 0 }
168};
169
170static driver_t cxgb_port_driver = {
171	"cxgb",
172	cxgb_port_methods,
173	0
174};
175
176static d_ioctl_t cxgb_extension_ioctl;
177static d_open_t cxgb_extension_open;
178static d_close_t cxgb_extension_close;
179
180static struct cdevsw cxgb_cdevsw = {
181       .d_version =    D_VERSION,
182       .d_flags =      0,
183       .d_open =       cxgb_extension_open,
184       .d_close =      cxgb_extension_close,
185       .d_ioctl =      cxgb_extension_ioctl,
186       .d_name =       "cxgb",
187};
188
189static devclass_t	cxgb_port_devclass;
190DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
191MODULE_VERSION(cxgb, 1);
192
193static struct mtx t3_list_lock;
194static SLIST_HEAD(, adapter) t3_list;
195#ifdef TCP_OFFLOAD
196static struct mtx t3_uld_list_lock;
197static SLIST_HEAD(, uld_info) t3_uld_list;
198#endif
199
200/*
201 * The driver uses the best interrupt scheme available on a platform in the
202 * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
203 * of these schemes the driver may consider as follows:
204 *
205 * msi = 2: choose from among all three options
206 * msi = 1 : only consider MSI and pin interrupts
207 * msi = 0: force pin interrupts
208 */
209static int msi_allowed = 2;
210
211SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
212SYSCTL_INT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
213    "MSI-X, MSI, INTx selector");
214
215/*
216 * The driver uses an auto-queue algorithm by default.
217 * To disable it and force a single queue-set per port, use multiq = 0
218 */
219static int multiq = 1;
220SYSCTL_INT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
221    "use min(ncpus/ports, 8) queue-sets per port");
222
223/*
224 * By default the driver will not update the firmware unless
225 * it was compiled against a newer version
226 *
227 */
228static int force_fw_update = 0;
229SYSCTL_INT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
230    "update firmware even if up to date");
231
232int cxgb_use_16k_clusters = -1;
233SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
234    &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
235
236static int nfilters = -1;
237SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
238    &nfilters, 0, "max number of entries in the filter table");
239
240enum {
241	MAX_TXQ_ENTRIES      = 16384,
242	MAX_CTRL_TXQ_ENTRIES = 1024,
243	MAX_RSPQ_ENTRIES     = 16384,
244	MAX_RX_BUFFERS       = 16384,
245	MAX_RX_JUMBO_BUFFERS = 16384,
246	MIN_TXQ_ENTRIES      = 4,
247	MIN_CTRL_TXQ_ENTRIES = 4,
248	MIN_RSPQ_ENTRIES     = 32,
249	MIN_FL_ENTRIES       = 32,
250	MIN_FL_JUMBO_ENTRIES = 32
251};
252
253struct filter_info {
254	u32 sip;
255	u32 sip_mask;
256	u32 dip;
257	u16 sport;
258	u16 dport;
259	u32 vlan:12;
260	u32 vlan_prio:3;
261	u32 mac_hit:1;
262	u32 mac_idx:4;
263	u32 mac_vld:1;
264	u32 pkt_type:2;
265	u32 report_filter_id:1;
266	u32 pass:1;
267	u32 rss:1;
268	u32 qset:3;
269	u32 locked:1;
270	u32 valid:1;
271};
272
273enum { FILTER_NO_VLAN_PRI = 7 };
274
275#define EEPROM_MAGIC 0x38E2F10C
276
277#define PORT_MASK ((1 << MAX_NPORTS) - 1)
278
279/* Table for probing the cards.  The desc field isn't actually used */
280struct cxgb_ident {
281	uint16_t	vendor;
282	uint16_t	device;
283	int		index;
284	char		*desc;
285} cxgb_identifiers[] = {
286	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
287	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
288	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
289	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
290	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
291	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
292	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
293	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
294	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
295	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
296	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
297	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
298	{PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
299	{PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
300	{0, 0, 0, NULL}
301};
302
303static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
304
305
306static __inline char
307t3rev2char(struct adapter *adapter)
308{
309	char rev = 'z';
310
311	switch(adapter->params.rev) {
312	case T3_REV_A:
313		rev = 'a';
314		break;
315	case T3_REV_B:
316	case T3_REV_B2:
317		rev = 'b';
318		break;
319	case T3_REV_C:
320		rev = 'c';
321		break;
322	}
323	return rev;
324}
325
326static struct cxgb_ident *
327cxgb_get_ident(device_t dev)
328{
329	struct cxgb_ident *id;
330
331	for (id = cxgb_identifiers; id->desc != NULL; id++) {
332		if ((id->vendor == pci_get_vendor(dev)) &&
333		    (id->device == pci_get_device(dev))) {
334			return (id);
335		}
336	}
337	return (NULL);
338}
339
340static const struct adapter_info *
341cxgb_get_adapter_info(device_t dev)
342{
343	struct cxgb_ident *id;
344	const struct adapter_info *ai;
345
346	id = cxgb_get_ident(dev);
347	if (id == NULL)
348		return (NULL);
349
350	ai = t3_get_adapter_info(id->index);
351
352	return (ai);
353}
354
355static int
356cxgb_controller_probe(device_t dev)
357{
358	const struct adapter_info *ai;
359	char *ports, buf[80];
360	int nports;
361
362	ai = cxgb_get_adapter_info(dev);
363	if (ai == NULL)
364		return (ENXIO);
365
366	nports = ai->nports0 + ai->nports1;
367	if (nports == 1)
368		ports = "port";
369	else
370		ports = "ports";
371
372	snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
373	device_set_desc_copy(dev, buf);
374	return (BUS_PROBE_DEFAULT);
375}
376
377#define FW_FNAME "cxgb_t3fw"
378#define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
379#define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
380
381static int
382upgrade_fw(adapter_t *sc)
383{
384	const struct firmware *fw;
385	int status;
386	u32 vers;
387
388	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
389		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
390		return (ENOENT);
391	} else
392		device_printf(sc->dev, "installing firmware on card\n");
393	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
394
395	if (status != 0) {
396		device_printf(sc->dev, "failed to install firmware: %d\n",
397		    status);
398	} else {
399		t3_get_fw_version(sc, &vers);
400		snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
401		    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
402		    G_FW_VERSION_MICRO(vers));
403	}
404
405	firmware_put(fw, FIRMWARE_UNLOAD);
406
407	return (status);
408}
409
410/*
411 * The cxgb_controller_attach function is responsible for the initial
412 * bringup of the device.  Its responsibilities include:
413 *
414 *  1. Determine if the device supports MSI or MSI-X.
415 *  2. Allocate bus resources so that we can access the Base Address Register
416 *  3. Create and initialize mutexes for the controller and its control
417 *     logic such as SGE and MDIO.
418 *  4. Call hardware specific setup routine for the adapter as a whole.
419 *  5. Allocate the BAR for doing MSI-X.
420 *  6. Setup the line interrupt iff MSI-X is not supported.
421 *  7. Create the driver's taskq.
422 *  8. Start one task queue service thread.
423 *  9. Check if the firmware and SRAM are up-to-date.  They will be
424 *     auto-updated later (before FULL_INIT_DONE), if required.
425 * 10. Create a child device for each MAC (port)
426 * 11. Initialize T3 private state.
427 * 12. Trigger the LED
428 * 13. Setup offload iff supported.
429 * 14. Reset/restart the tick callout.
430 * 15. Attach sysctls
431 *
432 * NOTE: Any modification or deviation from this list MUST be reflected in
433 * the above comment.  Failure to do so will result in problems on various
434 * error conditions including link flapping.
435 */
436static int
437cxgb_controller_attach(device_t dev)
438{
439	device_t child;
440	const struct adapter_info *ai;
441	struct adapter *sc;
442	int i, error = 0;
443	uint32_t vers;
444	int port_qsets = 1;
445	int msi_needed, reg;
446	char buf[80];
447
448	sc = device_get_softc(dev);
449	sc->dev = dev;
450	sc->msi_count = 0;
451	ai = cxgb_get_adapter_info(dev);
452
453	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
454	    device_get_unit(dev));
455	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
456
457	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
458	    device_get_unit(dev));
459	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
460	    device_get_unit(dev));
461	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
462	    device_get_unit(dev));
463
464	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
465	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
466	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
467
468	mtx_lock(&t3_list_lock);
469	SLIST_INSERT_HEAD(&t3_list, sc, link);
470	mtx_unlock(&t3_list_lock);
471
472	/* find the PCIe link width and set max read request to 4KB*/
473	if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
474		uint16_t lnk;
475
476		lnk = pci_read_config(dev, reg + PCIER_LINK_STA, 2);
477		sc->link_width = (lnk & PCIEM_LINK_STA_WIDTH) >> 4;
478		if (sc->link_width < 8 &&
479		    (ai->caps & SUPPORTED_10000baseT_Full)) {
480			device_printf(sc->dev,
481			    "PCIe x%d Link, expect reduced performance\n",
482			    sc->link_width);
483		}
484
485		pci_set_max_read_req(dev, 4096);
486	}
487
488	touch_bars(dev);
489	pci_enable_busmaster(dev);
490	/*
491	 * Allocate the registers and make them available to the driver.
492	 * The registers that we care about for NIC mode are in BAR 0
493	 */
494	sc->regs_rid = PCIR_BAR(0);
495	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
496	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
497		device_printf(dev, "Cannot allocate BAR region 0\n");
498		error = ENXIO;
499		goto out;
500	}
501
502	sc->bt = rman_get_bustag(sc->regs_res);
503	sc->bh = rman_get_bushandle(sc->regs_res);
504	sc->mmio_len = rman_get_size(sc->regs_res);
505
506	for (i = 0; i < MAX_NPORTS; i++)
507		sc->port[i].adapter = sc;
508
509	if (t3_prep_adapter(sc, ai, 1) < 0) {
510		printf("prep adapter failed\n");
511		error = ENODEV;
512		goto out;
513	}
514
515	sc->udbs_rid = PCIR_BAR(2);
516	sc->udbs_res = NULL;
517	if (is_offload(sc) &&
518	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
519		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
520		device_printf(dev, "Cannot allocate BAR region 1\n");
521		error = ENXIO;
522		goto out;
523	}
524
525        /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
526	 * enough messages for the queue sets.  If that fails, try falling
527	 * back to MSI.  If that fails, then try falling back to the legacy
528	 * interrupt pin model.
529	 */
530	sc->msix_regs_rid = 0x20;
531	if ((msi_allowed >= 2) &&
532	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
533	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
534
535		if (multiq)
536			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
537		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
538
539		if (pci_msix_count(dev) == 0 ||
540		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
541		    sc->msi_count != msi_needed) {
542			device_printf(dev, "alloc msix failed - "
543				      "msi_count=%d, msi_needed=%d, err=%d; "
544				      "will try MSI\n", sc->msi_count,
545				      msi_needed, error);
546			sc->msi_count = 0;
547			port_qsets = 1;
548			pci_release_msi(dev);
549			bus_release_resource(dev, SYS_RES_MEMORY,
550			    sc->msix_regs_rid, sc->msix_regs_res);
551			sc->msix_regs_res = NULL;
552		} else {
553			sc->flags |= USING_MSIX;
554			sc->cxgb_intr = cxgb_async_intr;
555			device_printf(dev,
556				      "using MSI-X interrupts (%u vectors)\n",
557				      sc->msi_count);
558		}
559	}
560
561	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
562		sc->msi_count = 1;
563		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
564			device_printf(dev, "alloc msi failed - "
565				      "err=%d; will try INTx\n", error);
566			sc->msi_count = 0;
567			port_qsets = 1;
568			pci_release_msi(dev);
569		} else {
570			sc->flags |= USING_MSI;
571			sc->cxgb_intr = t3_intr_msi;
572			device_printf(dev, "using MSI interrupts\n");
573		}
574	}
575	if (sc->msi_count == 0) {
576		device_printf(dev, "using line interrupts\n");
577		sc->cxgb_intr = t3b_intr;
578	}
579
580	/* Create a private taskqueue thread for handling driver events */
581	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
582	    taskqueue_thread_enqueue, &sc->tq);
583	if (sc->tq == NULL) {
584		device_printf(dev, "failed to allocate controller task queue\n");
585		goto out;
586	}
587
588	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
589	    device_get_nameunit(dev));
590	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
591
592
593	/* Create a periodic callout for checking adapter status */
594	callout_init(&sc->cxgb_tick_ch, TRUE);
595
596	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
597		/*
598		 * Warn user that a firmware update will be attempted in init.
599		 */
600		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
601		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
602		sc->flags &= ~FW_UPTODATE;
603	} else {
604		sc->flags |= FW_UPTODATE;
605	}
606
607	if (t3_check_tpsram_version(sc) < 0) {
608		/*
609		 * Warn user that a firmware update will be attempted in init.
610		 */
611		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
612		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
613		sc->flags &= ~TPS_UPTODATE;
614	} else {
615		sc->flags |= TPS_UPTODATE;
616	}
617
618	/*
619	 * Create a child device for each MAC.  The ethernet attachment
620	 * will be done in these children.
621	 */
622	for (i = 0; i < (sc)->params.nports; i++) {
623		struct port_info *pi;
624
625		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
626			device_printf(dev, "failed to add child port\n");
627			error = EINVAL;
628			goto out;
629		}
630		pi = &sc->port[i];
631		pi->adapter = sc;
632		pi->nqsets = port_qsets;
633		pi->first_qset = i*port_qsets;
634		pi->port_id = i;
635		pi->tx_chan = i >= ai->nports0;
636		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
637		sc->rxpkt_map[pi->txpkt_intf] = i;
638		sc->port[i].tx_chan = i >= ai->nports0;
639		sc->portdev[i] = child;
640		device_set_softc(child, pi);
641	}
642	if ((error = bus_generic_attach(dev)) != 0)
643		goto out;
644
645	/* initialize sge private state */
646	t3_sge_init_adapter(sc);
647
648	t3_led_ready(sc);
649
650	error = t3_get_fw_version(sc, &vers);
651	if (error)
652		goto out;
653
654	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
655	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
656	    G_FW_VERSION_MICRO(vers));
657
658	snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
659		 ai->desc, is_offload(sc) ? "R" : "",
660		 sc->params.vpd.ec, sc->params.vpd.sn);
661	device_set_desc_copy(dev, buf);
662
663	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
664		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
665		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
666
667	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
668	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
669	t3_add_attach_sysctls(sc);
670
671#ifdef TCP_OFFLOAD
672	for (i = 0; i < NUM_CPL_HANDLERS; i++)
673		sc->cpl_handler[i] = cpl_not_handled;
674#endif
675
676	t3_intr_clear(sc);
677	error = cxgb_setup_interrupts(sc);
678out:
679	if (error)
680		cxgb_free(sc);
681
682	return (error);
683}
684
685/*
686 * The cxgb_controller_detach routine is called with the device is
687 * unloaded from the system.
688 */
689
690static int
691cxgb_controller_detach(device_t dev)
692{
693	struct adapter *sc;
694
695	sc = device_get_softc(dev);
696
697	cxgb_free(sc);
698
699	return (0);
700}
701
702/*
703 * The cxgb_free() is called by the cxgb_controller_detach() routine
704 * to tear down the structures that were built up in
705 * cxgb_controller_attach(), and should be the final piece of work
706 * done when fully unloading the driver.
707 *
708 *
709 *  1. Shutting down the threads started by the cxgb_controller_attach()
710 *     routine.
711 *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
712 *  3. Detaching all of the port devices created during the
713 *     cxgb_controller_attach() routine.
714 *  4. Removing the device children created via cxgb_controller_attach().
715 *  5. Releasing PCI resources associated with the device.
716 *  6. Turning off the offload support, iff it was turned on.
717 *  7. Destroying the mutexes created in cxgb_controller_attach().
718 *
719 */
720static void
721cxgb_free(struct adapter *sc)
722{
723	int i, nqsets = 0;
724
725	ADAPTER_LOCK(sc);
726	sc->flags |= CXGB_SHUTDOWN;
727	ADAPTER_UNLOCK(sc);
728
729	/*
730	 * Make sure all child devices are gone.
731	 */
732	bus_generic_detach(sc->dev);
733	for (i = 0; i < (sc)->params.nports; i++) {
734		if (sc->portdev[i] &&
735		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
736			device_printf(sc->dev, "failed to delete child port\n");
737		nqsets += sc->port[i].nqsets;
738	}
739
740	/*
741	 * At this point, it is as if cxgb_port_detach has run on all ports, and
742	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
743	 * all open devices have been closed.
744	 */
745	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
746					   __func__, sc->open_device_map));
747	for (i = 0; i < sc->params.nports; i++) {
748		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
749						  __func__, i));
750	}
751
752	/*
753	 * Finish off the adapter's callouts.
754	 */
755	callout_drain(&sc->cxgb_tick_ch);
756	callout_drain(&sc->sge_timer_ch);
757
758	/*
759	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
760	 * sysctls are cleaned up by the kernel linker.
761	 */
762	if (sc->flags & FULL_INIT_DONE) {
763 		t3_free_sge_resources(sc, nqsets);
764 		sc->flags &= ~FULL_INIT_DONE;
765 	}
766
767	/*
768	 * Release all interrupt resources.
769	 */
770	cxgb_teardown_interrupts(sc);
771	if (sc->flags & (USING_MSI | USING_MSIX)) {
772		device_printf(sc->dev, "releasing msi message(s)\n");
773		pci_release_msi(sc->dev);
774	} else {
775		device_printf(sc->dev, "no msi message to release\n");
776	}
777
778	if (sc->msix_regs_res != NULL) {
779		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
780		    sc->msix_regs_res);
781	}
782
783	/*
784	 * Free the adapter's taskqueue.
785	 */
786	if (sc->tq != NULL) {
787		taskqueue_free(sc->tq);
788		sc->tq = NULL;
789	}
790
791	free(sc->filters, M_DEVBUF);
792	t3_sge_free(sc);
793
794	if (sc->udbs_res != NULL)
795		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
796		    sc->udbs_res);
797
798	if (sc->regs_res != NULL)
799		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
800		    sc->regs_res);
801
802	MTX_DESTROY(&sc->mdio_lock);
803	MTX_DESTROY(&sc->sge.reg_lock);
804	MTX_DESTROY(&sc->elmer_lock);
805	mtx_lock(&t3_list_lock);
806	SLIST_REMOVE(&t3_list, sc, adapter, link);
807	mtx_unlock(&t3_list_lock);
808	ADAPTER_LOCK_DEINIT(sc);
809}
810
811/**
812 *	setup_sge_qsets - configure SGE Tx/Rx/response queues
813 *	@sc: the controller softc
814 *
815 *	Determines how many sets of SGE queues to use and initializes them.
816 *	We support multiple queue sets per port if we have MSI-X, otherwise
817 *	just one queue set per port.
818 */
819static int
820setup_sge_qsets(adapter_t *sc)
821{
822	int i, j, err, irq_idx = 0, qset_idx = 0;
823	u_int ntxq = SGE_TXQ_PER_SET;
824
825	if ((err = t3_sge_alloc(sc)) != 0) {
826		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
827		return (err);
828	}
829
830	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
831		irq_idx = -1;
832
833	for (i = 0; i < (sc)->params.nports; i++) {
834		struct port_info *pi = &sc->port[i];
835
836		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
837			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
838			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
839			    &sc->params.sge.qset[qset_idx], ntxq, pi);
840			if (err) {
841				t3_free_sge_resources(sc, qset_idx);
842				device_printf(sc->dev,
843				    "t3_sge_alloc_qset failed with %d\n", err);
844				return (err);
845			}
846		}
847	}
848
849	return (0);
850}
851
852static void
853cxgb_teardown_interrupts(adapter_t *sc)
854{
855	int i;
856
857	for (i = 0; i < SGE_QSETS; i++) {
858		if (sc->msix_intr_tag[i] == NULL) {
859
860			/* Should have been setup fully or not at all */
861			KASSERT(sc->msix_irq_res[i] == NULL &&
862				sc->msix_irq_rid[i] == 0,
863				("%s: half-done interrupt (%d).", __func__, i));
864
865			continue;
866		}
867
868		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
869				  sc->msix_intr_tag[i]);
870		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
871				     sc->msix_irq_res[i]);
872
873		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
874		sc->msix_irq_rid[i] = 0;
875	}
876
877	if (sc->intr_tag) {
878		KASSERT(sc->irq_res != NULL,
879			("%s: half-done interrupt.", __func__));
880
881		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
882		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
883				     sc->irq_res);
884
885		sc->irq_res = sc->intr_tag = NULL;
886		sc->irq_rid = 0;
887	}
888}
889
890static int
891cxgb_setup_interrupts(adapter_t *sc)
892{
893	struct resource *res;
894	void *tag;
895	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
896
897	sc->irq_rid = intr_flag ? 1 : 0;
898	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
899					     RF_SHAREABLE | RF_ACTIVE);
900	if (sc->irq_res == NULL) {
901		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
902			      intr_flag, sc->irq_rid);
903		err = EINVAL;
904		sc->irq_rid = 0;
905	} else {
906		err = bus_setup_intr(sc->dev, sc->irq_res,
907		    INTR_MPSAFE | INTR_TYPE_NET, NULL,
908		    sc->cxgb_intr, sc, &sc->intr_tag);
909
910		if (err) {
911			device_printf(sc->dev,
912				      "Cannot set up interrupt (%x, %u, %d)\n",
913				      intr_flag, sc->irq_rid, err);
914			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
915					     sc->irq_res);
916			sc->irq_res = sc->intr_tag = NULL;
917			sc->irq_rid = 0;
918		}
919	}
920
921	/* That's all for INTx or MSI */
922	if (!(intr_flag & USING_MSIX) || err)
923		return (err);
924
925	bus_describe_intr(sc->dev, sc->irq_res, sc->intr_tag, "err");
926	for (i = 0; i < sc->msi_count - 1; i++) {
927		rid = i + 2;
928		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
929					     RF_SHAREABLE | RF_ACTIVE);
930		if (res == NULL) {
931			device_printf(sc->dev, "Cannot allocate interrupt "
932				      "for message %d\n", rid);
933			err = EINVAL;
934			break;
935		}
936
937		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
938				     NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
939		if (err) {
940			device_printf(sc->dev, "Cannot set up interrupt "
941				      "for message %d (%d)\n", rid, err);
942			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
943			break;
944		}
945
946		sc->msix_irq_rid[i] = rid;
947		sc->msix_irq_res[i] = res;
948		sc->msix_intr_tag[i] = tag;
949		bus_describe_intr(sc->dev, res, tag, "qs%d", i);
950	}
951
952	if (err)
953		cxgb_teardown_interrupts(sc);
954
955	return (err);
956}
957
958
959static int
960cxgb_port_probe(device_t dev)
961{
962	struct port_info *p;
963	char buf[80];
964	const char *desc;
965
966	p = device_get_softc(dev);
967	desc = p->phy.desc;
968	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
969	device_set_desc_copy(dev, buf);
970	return (0);
971}
972
973
974static int
975cxgb_makedev(struct port_info *pi)
976{
977
978	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
979	    UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
980
981	if (pi->port_cdev == NULL)
982		return (ENOMEM);
983
984	pi->port_cdev->si_drv1 = (void *)pi;
985
986	return (0);
987}
988
989#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
990    IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
991    IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6)
992#define CXGB_CAP_ENABLE CXGB_CAP
993
994static int
995cxgb_port_attach(device_t dev)
996{
997	struct port_info *p;
998	struct ifnet *ifp;
999	int err;
1000	struct adapter *sc;
1001
1002	p = device_get_softc(dev);
1003	sc = p->adapter;
1004	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1005	    device_get_unit(device_get_parent(dev)), p->port_id);
1006	PORT_LOCK_INIT(p, p->lockbuf);
1007
1008	callout_init(&p->link_check_ch, CALLOUT_MPSAFE);
1009	TASK_INIT(&p->link_check_task, 0, check_link_status, p);
1010
1011	/* Allocate an ifnet object and set it up */
1012	ifp = p->ifp = if_alloc(IFT_ETHER);
1013	if (ifp == NULL) {
1014		device_printf(dev, "Cannot allocate ifnet\n");
1015		return (ENOMEM);
1016	}
1017
1018	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1019	ifp->if_init = cxgb_init;
1020	ifp->if_softc = p;
1021	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1022	ifp->if_ioctl = cxgb_ioctl;
1023	ifp->if_transmit = cxgb_transmit;
1024	ifp->if_qflush = cxgb_qflush;
1025
1026	ifp->if_capabilities = CXGB_CAP;
1027#ifdef TCP_OFFLOAD
1028	if (is_offload(sc))
1029		ifp->if_capabilities |= IFCAP_TOE4;
1030#endif
1031	ifp->if_capenable = CXGB_CAP_ENABLE;
1032	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
1033	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
1034
1035	/*
1036	 * Disable TSO on 4-port - it isn't supported by the firmware.
1037	 */
1038	if (sc->params.nports > 2) {
1039		ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1040		ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1041		ifp->if_hwassist &= ~CSUM_TSO;
1042	}
1043
1044	ether_ifattach(ifp, p->hw_addr);
1045
1046#ifdef DEFAULT_JUMBO
1047	if (sc->params.nports <= 2)
1048		ifp->if_mtu = ETHERMTU_JUMBO;
1049#endif
1050	if ((err = cxgb_makedev(p)) != 0) {
1051		printf("makedev failed %d\n", err);
1052		return (err);
1053	}
1054
1055	/* Create a list of media supported by this port */
1056	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1057	    cxgb_media_status);
1058	cxgb_build_medialist(p);
1059
1060	t3_sge_init_port(p);
1061
1062	return (err);
1063}
1064
1065/*
1066 * cxgb_port_detach() is called via the device_detach methods when
1067 * cxgb_free() calls the bus_generic_detach.  It is responsible for
1068 * removing the device from the view of the kernel, i.e. from all
1069 * interfaces lists etc.  This routine is only called when the driver is
1070 * being unloaded, not when the link goes down.
1071 */
1072static int
1073cxgb_port_detach(device_t dev)
1074{
1075	struct port_info *p;
1076	struct adapter *sc;
1077	int i;
1078
1079	p = device_get_softc(dev);
1080	sc = p->adapter;
1081
1082	/* Tell cxgb_ioctl and if_init that the port is going away */
1083	ADAPTER_LOCK(sc);
1084	SET_DOOMED(p);
1085	wakeup(&sc->flags);
1086	while (IS_BUSY(sc))
1087		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1088	SET_BUSY(sc);
1089	ADAPTER_UNLOCK(sc);
1090
1091	if (p->port_cdev != NULL)
1092		destroy_dev(p->port_cdev);
1093
1094	cxgb_uninit_synchronized(p);
1095	ether_ifdetach(p->ifp);
1096
1097	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1098		struct sge_qset *qs = &sc->sge.qs[i];
1099		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1100
1101		callout_drain(&txq->txq_watchdog);
1102		callout_drain(&txq->txq_timer);
1103	}
1104
1105	PORT_LOCK_DEINIT(p);
1106	if_free(p->ifp);
1107	p->ifp = NULL;
1108
1109	ADAPTER_LOCK(sc);
1110	CLR_BUSY(sc);
1111	wakeup_one(&sc->flags);
1112	ADAPTER_UNLOCK(sc);
1113	return (0);
1114}
1115
1116void
1117t3_fatal_err(struct adapter *sc)
1118{
1119	u_int fw_status[4];
1120
1121	if (sc->flags & FULL_INIT_DONE) {
1122		t3_sge_stop(sc);
1123		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1124		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1125		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1126		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1127		t3_intr_disable(sc);
1128	}
1129	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1130	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1131		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1132		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1133}
1134
1135int
1136t3_os_find_pci_capability(adapter_t *sc, int cap)
1137{
1138	device_t dev;
1139	struct pci_devinfo *dinfo;
1140	pcicfgregs *cfg;
1141	uint32_t status;
1142	uint8_t ptr;
1143
1144	dev = sc->dev;
1145	dinfo = device_get_ivars(dev);
1146	cfg = &dinfo->cfg;
1147
1148	status = pci_read_config(dev, PCIR_STATUS, 2);
1149	if (!(status & PCIM_STATUS_CAPPRESENT))
1150		return (0);
1151
1152	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1153	case 0:
1154	case 1:
1155		ptr = PCIR_CAP_PTR;
1156		break;
1157	case 2:
1158		ptr = PCIR_CAP_PTR_2;
1159		break;
1160	default:
1161		return (0);
1162		break;
1163	}
1164	ptr = pci_read_config(dev, ptr, 1);
1165
1166	while (ptr != 0) {
1167		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1168			return (ptr);
1169		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1170	}
1171
1172	return (0);
1173}
1174
1175int
1176t3_os_pci_save_state(struct adapter *sc)
1177{
1178	device_t dev;
1179	struct pci_devinfo *dinfo;
1180
1181	dev = sc->dev;
1182	dinfo = device_get_ivars(dev);
1183
1184	pci_cfg_save(dev, dinfo, 0);
1185	return (0);
1186}
1187
1188int
1189t3_os_pci_restore_state(struct adapter *sc)
1190{
1191	device_t dev;
1192	struct pci_devinfo *dinfo;
1193
1194	dev = sc->dev;
1195	dinfo = device_get_ivars(dev);
1196
1197	pci_cfg_restore(dev, dinfo);
1198	return (0);
1199}
1200
1201/**
1202 *	t3_os_link_changed - handle link status changes
1203 *	@sc: the adapter associated with the link change
1204 *	@port_id: the port index whose link status has changed
1205 *	@link_status: the new status of the link
1206 *	@speed: the new speed setting
1207 *	@duplex: the new duplex setting
1208 *	@fc: the new flow-control setting
1209 *
1210 *	This is the OS-dependent handler for link status changes.  The OS
1211 *	neutral handler takes care of most of the processing for these events,
1212 *	then calls this handler for any OS-specific processing.
1213 */
1214void
1215t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1216     int duplex, int fc, int mac_was_reset)
1217{
1218	struct port_info *pi = &adapter->port[port_id];
1219	struct ifnet *ifp = pi->ifp;
1220
1221	/* no race with detach, so ifp should always be good */
1222	KASSERT(ifp, ("%s: if detached.", __func__));
1223
1224	/* Reapply mac settings if they were lost due to a reset */
1225	if (mac_was_reset) {
1226		PORT_LOCK(pi);
1227		cxgb_update_mac_settings(pi);
1228		PORT_UNLOCK(pi);
1229	}
1230
1231	if (link_status) {
1232		ifp->if_baudrate = IF_Mbps(speed);
1233		if_link_state_change(ifp, LINK_STATE_UP);
1234	} else
1235		if_link_state_change(ifp, LINK_STATE_DOWN);
1236}
1237
1238/**
1239 *	t3_os_phymod_changed - handle PHY module changes
1240 *	@phy: the PHY reporting the module change
1241 *	@mod_type: new module type
1242 *
1243 *	This is the OS-dependent handler for PHY module changes.  It is
1244 *	invoked when a PHY module is removed or inserted for any OS-specific
1245 *	processing.
1246 */
1247void t3_os_phymod_changed(struct adapter *adap, int port_id)
1248{
1249	static const char *mod_str[] = {
1250		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1251	};
1252	struct port_info *pi = &adap->port[port_id];
1253	int mod = pi->phy.modtype;
1254
1255	if (mod != pi->media.ifm_cur->ifm_data)
1256		cxgb_build_medialist(pi);
1257
1258	if (mod == phy_modtype_none)
1259		if_printf(pi->ifp, "PHY module unplugged\n");
1260	else {
1261		KASSERT(mod < ARRAY_SIZE(mod_str),
1262			("invalid PHY module type %d", mod));
1263		if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1264	}
1265}
1266
1267void
1268t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1269{
1270
1271	/*
1272	 * The ifnet might not be allocated before this gets called,
1273	 * as this is called early on in attach by t3_prep_adapter
1274	 * save the address off in the port structure
1275	 */
1276	if (cxgb_debug)
1277		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1278	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1279}
1280
1281/*
1282 * Programs the XGMAC based on the settings in the ifnet.  These settings
1283 * include MTU, MAC address, mcast addresses, etc.
1284 */
1285static void
1286cxgb_update_mac_settings(struct port_info *p)
1287{
1288	struct ifnet *ifp = p->ifp;
1289	struct t3_rx_mode rm;
1290	struct cmac *mac = &p->mac;
1291	int mtu, hwtagging;
1292
1293	PORT_LOCK_ASSERT_OWNED(p);
1294
1295	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1296
1297	mtu = ifp->if_mtu;
1298	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1299		mtu += ETHER_VLAN_ENCAP_LEN;
1300
1301	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1302
1303	t3_mac_set_mtu(mac, mtu);
1304	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1305	t3_mac_set_address(mac, 0, p->hw_addr);
1306	t3_init_rx_mode(&rm, p);
1307	t3_mac_set_rx_mode(mac, &rm);
1308}
1309
1310
1311static int
1312await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1313			      unsigned long n)
1314{
1315	int attempts = 5;
1316
1317	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1318		if (!--attempts)
1319			return (ETIMEDOUT);
1320		t3_os_sleep(10);
1321	}
1322	return 0;
1323}
1324
1325static int
1326init_tp_parity(struct adapter *adap)
1327{
1328	int i;
1329	struct mbuf *m;
1330	struct cpl_set_tcb_field *greq;
1331	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1332
1333	t3_tp_set_offload_mode(adap, 1);
1334
1335	for (i = 0; i < 16; i++) {
1336		struct cpl_smt_write_req *req;
1337
1338		m = m_gethdr(M_WAITOK, MT_DATA);
1339		req = mtod(m, struct cpl_smt_write_req *);
1340		m->m_len = m->m_pkthdr.len = sizeof(*req);
1341		memset(req, 0, sizeof(*req));
1342		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1343		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1344		req->iff = i;
1345		t3_mgmt_tx(adap, m);
1346	}
1347
1348	for (i = 0; i < 2048; i++) {
1349		struct cpl_l2t_write_req *req;
1350
1351		m = m_gethdr(M_WAITOK, MT_DATA);
1352		req = mtod(m, struct cpl_l2t_write_req *);
1353		m->m_len = m->m_pkthdr.len = sizeof(*req);
1354		memset(req, 0, sizeof(*req));
1355		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1356		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1357		req->params = htonl(V_L2T_W_IDX(i));
1358		t3_mgmt_tx(adap, m);
1359	}
1360
1361	for (i = 0; i < 2048; i++) {
1362		struct cpl_rte_write_req *req;
1363
1364		m = m_gethdr(M_WAITOK, MT_DATA);
1365		req = mtod(m, struct cpl_rte_write_req *);
1366		m->m_len = m->m_pkthdr.len = sizeof(*req);
1367		memset(req, 0, sizeof(*req));
1368		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1369		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1370		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1371		t3_mgmt_tx(adap, m);
1372	}
1373
1374	m = m_gethdr(M_WAITOK, MT_DATA);
1375	greq = mtod(m, struct cpl_set_tcb_field *);
1376	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1377	memset(greq, 0, sizeof(*greq));
1378	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1379	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1380	greq->mask = htobe64(1);
1381	t3_mgmt_tx(adap, m);
1382
1383	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1384	t3_tp_set_offload_mode(adap, 0);
1385	return (i);
1386}
1387
1388/**
1389 *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1390 *	@adap: the adapter
1391 *
1392 *	Sets up RSS to distribute packets to multiple receive queues.  We
1393 *	configure the RSS CPU lookup table to distribute to the number of HW
1394 *	receive queues, and the response queue lookup table to narrow that
1395 *	down to the response queues actually configured for each port.
1396 *	We always configure the RSS mapping for two ports since the mapping
1397 *	table has plenty of entries.
1398 */
1399static void
1400setup_rss(adapter_t *adap)
1401{
1402	int i;
1403	u_int nq[2];
1404	uint8_t cpus[SGE_QSETS + 1];
1405	uint16_t rspq_map[RSS_TABLE_SIZE];
1406
1407	for (i = 0; i < SGE_QSETS; ++i)
1408		cpus[i] = i;
1409	cpus[SGE_QSETS] = 0xff;
1410
1411	nq[0] = nq[1] = 0;
1412	for_each_port(adap, i) {
1413		const struct port_info *pi = adap2pinfo(adap, i);
1414
1415		nq[pi->tx_chan] += pi->nqsets;
1416	}
1417	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1418		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1419		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1420	}
1421
1422	/* Calculate the reverse RSS map table */
1423	for (i = 0; i < SGE_QSETS; ++i)
1424		adap->rrss_map[i] = 0xff;
1425	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1426		if (adap->rrss_map[rspq_map[i]] == 0xff)
1427			adap->rrss_map[rspq_map[i]] = i;
1428
1429	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1430		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1431	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1432	              cpus, rspq_map);
1433
1434}
1435static void
1436send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1437			      int hi, int port)
1438{
1439	struct mbuf *m;
1440	struct mngt_pktsched_wr *req;
1441
1442	m = m_gethdr(M_NOWAIT, MT_DATA);
1443	if (m) {
1444		req = mtod(m, struct mngt_pktsched_wr *);
1445		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1446		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1447		req->sched = sched;
1448		req->idx = qidx;
1449		req->min = lo;
1450		req->max = hi;
1451		req->binding = port;
1452		m->m_len = m->m_pkthdr.len = sizeof(*req);
1453		t3_mgmt_tx(adap, m);
1454	}
1455}
1456
1457static void
1458bind_qsets(adapter_t *sc)
1459{
1460	int i, j;
1461
1462	for (i = 0; i < (sc)->params.nports; ++i) {
1463		const struct port_info *pi = adap2pinfo(sc, i);
1464
1465		for (j = 0; j < pi->nqsets; ++j) {
1466			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1467					  -1, pi->tx_chan);
1468
1469		}
1470	}
1471}
1472
1473static void
1474update_tpeeprom(struct adapter *adap)
1475{
1476	const struct firmware *tpeeprom;
1477
1478	uint32_t version;
1479	unsigned int major, minor;
1480	int ret, len;
1481	char rev, name[32];
1482
1483	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1484
1485	major = G_TP_VERSION_MAJOR(version);
1486	minor = G_TP_VERSION_MINOR(version);
1487	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1488		return;
1489
1490	rev = t3rev2char(adap);
1491	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1492
1493	tpeeprom = firmware_get(name);
1494	if (tpeeprom == NULL) {
1495		device_printf(adap->dev,
1496			      "could not load TP EEPROM: unable to load %s\n",
1497			      name);
1498		return;
1499	}
1500
1501	len = tpeeprom->datasize - 4;
1502
1503	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1504	if (ret)
1505		goto release_tpeeprom;
1506
1507	if (len != TP_SRAM_LEN) {
1508		device_printf(adap->dev,
1509			      "%s length is wrong len=%d expected=%d\n", name,
1510			      len, TP_SRAM_LEN);
1511		return;
1512	}
1513
1514	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1515	    TP_SRAM_OFFSET);
1516
1517	if (!ret) {
1518		device_printf(adap->dev,
1519			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1520			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1521	} else
1522		device_printf(adap->dev,
1523			      "Protocol SRAM image update in EEPROM failed\n");
1524
1525release_tpeeprom:
1526	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1527
1528	return;
1529}
1530
1531static int
1532update_tpsram(struct adapter *adap)
1533{
1534	const struct firmware *tpsram;
1535	int ret;
1536	char rev, name[32];
1537
1538	rev = t3rev2char(adap);
1539	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1540
1541	update_tpeeprom(adap);
1542
1543	tpsram = firmware_get(name);
1544	if (tpsram == NULL){
1545		device_printf(adap->dev, "could not load TP SRAM\n");
1546		return (EINVAL);
1547	} else
1548		device_printf(adap->dev, "updating TP SRAM\n");
1549
1550	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1551	if (ret)
1552		goto release_tpsram;
1553
1554	ret = t3_set_proto_sram(adap, tpsram->data);
1555	if (ret)
1556		device_printf(adap->dev, "loading protocol SRAM failed\n");
1557
1558release_tpsram:
1559	firmware_put(tpsram, FIRMWARE_UNLOAD);
1560
1561	return ret;
1562}
1563
1564/**
1565 *	cxgb_up - enable the adapter
1566 *	@adap: adapter being enabled
1567 *
1568 *	Called when the first port is enabled, this function performs the
1569 *	actions necessary to make an adapter operational, such as completing
1570 *	the initialization of HW modules, and enabling interrupts.
1571 */
1572static int
1573cxgb_up(struct adapter *sc)
1574{
1575	int err = 0;
1576	unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1577
1578	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1579					   __func__, sc->open_device_map));
1580
1581	if ((sc->flags & FULL_INIT_DONE) == 0) {
1582
1583		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1584
1585		if ((sc->flags & FW_UPTODATE) == 0)
1586			if ((err = upgrade_fw(sc)))
1587				goto out;
1588
1589		if ((sc->flags & TPS_UPTODATE) == 0)
1590			if ((err = update_tpsram(sc)))
1591				goto out;
1592
1593		if (is_offload(sc) && nfilters != 0) {
1594			sc->params.mc5.nservers = 0;
1595
1596			if (nfilters < 0)
1597				sc->params.mc5.nfilters = mxf;
1598			else
1599				sc->params.mc5.nfilters = min(nfilters, mxf);
1600		}
1601
1602		err = t3_init_hw(sc, 0);
1603		if (err)
1604			goto out;
1605
1606		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1607		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1608
1609		err = setup_sge_qsets(sc);
1610		if (err)
1611			goto out;
1612
1613		alloc_filters(sc);
1614		setup_rss(sc);
1615
1616		t3_add_configured_sysctls(sc);
1617		sc->flags |= FULL_INIT_DONE;
1618	}
1619
1620	t3_intr_clear(sc);
1621	t3_sge_start(sc);
1622	t3_intr_enable(sc);
1623
1624	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1625	    is_offload(sc) && init_tp_parity(sc) == 0)
1626		sc->flags |= TP_PARITY_INIT;
1627
1628	if (sc->flags & TP_PARITY_INIT) {
1629		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1630		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1631	}
1632
1633	if (!(sc->flags & QUEUES_BOUND)) {
1634		bind_qsets(sc);
1635		setup_hw_filters(sc);
1636		sc->flags |= QUEUES_BOUND;
1637	}
1638
1639	t3_sge_reset_adapter(sc);
1640out:
1641	return (err);
1642}
1643
1644/*
1645 * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1646 * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1647 * during controller_detach, not here.
1648 */
1649static void
1650cxgb_down(struct adapter *sc)
1651{
1652	t3_sge_stop(sc);
1653	t3_intr_disable(sc);
1654}
1655
1656/*
1657 * if_init for cxgb ports.
1658 */
1659static void
1660cxgb_init(void *arg)
1661{
1662	struct port_info *p = arg;
1663	struct adapter *sc = p->adapter;
1664
1665	ADAPTER_LOCK(sc);
1666	cxgb_init_locked(p); /* releases adapter lock */
1667	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1668}
1669
1670static int
1671cxgb_init_locked(struct port_info *p)
1672{
1673	struct adapter *sc = p->adapter;
1674	struct ifnet *ifp = p->ifp;
1675	struct cmac *mac = &p->mac;
1676	int i, rc = 0, may_sleep = 0, gave_up_lock = 0;
1677
1678	ADAPTER_LOCK_ASSERT_OWNED(sc);
1679
1680	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1681		gave_up_lock = 1;
1682		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1683			rc = EINTR;
1684			goto done;
1685		}
1686	}
1687	if (IS_DOOMED(p)) {
1688		rc = ENXIO;
1689		goto done;
1690	}
1691	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1692
1693	/*
1694	 * The code that runs during one-time adapter initialization can sleep
1695	 * so it's important not to hold any locks across it.
1696	 */
1697	may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1698
1699	if (may_sleep) {
1700		SET_BUSY(sc);
1701		gave_up_lock = 1;
1702		ADAPTER_UNLOCK(sc);
1703	}
1704
1705	if (sc->open_device_map == 0 && ((rc = cxgb_up(sc)) != 0))
1706			goto done;
1707
1708	PORT_LOCK(p);
1709	if (isset(&sc->open_device_map, p->port_id) &&
1710	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1711		PORT_UNLOCK(p);
1712		goto done;
1713	}
1714	t3_port_intr_enable(sc, p->port_id);
1715	if (!mac->multiport)
1716		t3_mac_init(mac);
1717	cxgb_update_mac_settings(p);
1718	t3_link_start(&p->phy, mac, &p->link_config);
1719	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1720	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1721	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1722	PORT_UNLOCK(p);
1723
1724	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1725		struct sge_qset *qs = &sc->sge.qs[i];
1726		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1727
1728		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1729				 txq->txq_watchdog.c_cpu);
1730	}
1731
1732	/* all ok */
1733	setbit(&sc->open_device_map, p->port_id);
1734	callout_reset(&p->link_check_ch,
1735	    p->phy.caps & SUPPORTED_LINK_IRQ ?  hz * 3 : hz / 4,
1736	    link_check_callout, p);
1737
1738done:
1739	if (may_sleep) {
1740		ADAPTER_LOCK(sc);
1741		KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1742		CLR_BUSY(sc);
1743	}
1744	if (gave_up_lock)
1745		wakeup_one(&sc->flags);
1746	ADAPTER_UNLOCK(sc);
1747	return (rc);
1748}
1749
1750static int
1751cxgb_uninit_locked(struct port_info *p)
1752{
1753	struct adapter *sc = p->adapter;
1754	int rc;
1755
1756	ADAPTER_LOCK_ASSERT_OWNED(sc);
1757
1758	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1759		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1760			rc = EINTR;
1761			goto done;
1762		}
1763	}
1764	if (IS_DOOMED(p)) {
1765		rc = ENXIO;
1766		goto done;
1767	}
1768	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1769	SET_BUSY(sc);
1770	ADAPTER_UNLOCK(sc);
1771
1772	rc = cxgb_uninit_synchronized(p);
1773
1774	ADAPTER_LOCK(sc);
1775	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1776	CLR_BUSY(sc);
1777	wakeup_one(&sc->flags);
1778done:
1779	ADAPTER_UNLOCK(sc);
1780	return (rc);
1781}
1782
1783/*
1784 * Called on "ifconfig down", and from port_detach
1785 */
1786static int
1787cxgb_uninit_synchronized(struct port_info *pi)
1788{
1789	struct adapter *sc = pi->adapter;
1790	struct ifnet *ifp = pi->ifp;
1791
1792	/*
1793	 * taskqueue_drain may cause a deadlock if the adapter lock is held.
1794	 */
1795	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1796
1797	/*
1798	 * Clear this port's bit from the open device map, and then drain all
1799	 * the tasks that can access/manipulate this port's port_info or ifp.
1800	 * We disable this port's interrupts here and so the slow/ext
1801	 * interrupt tasks won't be enqueued.  The tick task will continue to
1802	 * be enqueued every second but the runs after this drain will not see
1803	 * this port in the open device map.
1804	 *
1805	 * A well behaved task must take open_device_map into account and ignore
1806	 * ports that are not open.
1807	 */
1808	clrbit(&sc->open_device_map, pi->port_id);
1809	t3_port_intr_disable(sc, pi->port_id);
1810	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1811	taskqueue_drain(sc->tq, &sc->tick_task);
1812
1813	callout_drain(&pi->link_check_ch);
1814	taskqueue_drain(sc->tq, &pi->link_check_task);
1815
1816	PORT_LOCK(pi);
1817	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1818
1819	/* disable pause frames */
1820	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1821
1822	/* Reset RX FIFO HWM */
1823	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1824			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1825
1826	DELAY(100 * 1000);
1827
1828	/* Wait for TXFIFO empty */
1829	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1830			F_TXFIFO_EMPTY, 1, 20, 5);
1831
1832	DELAY(100 * 1000);
1833	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1834
1835	pi->phy.ops->power_down(&pi->phy, 1);
1836
1837	PORT_UNLOCK(pi);
1838
1839	pi->link_config.link_ok = 0;
1840	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1841
1842	if (sc->open_device_map == 0)
1843		cxgb_down(pi->adapter);
1844
1845	return (0);
1846}
1847
1848/*
1849 * Mark lro enabled or disabled in all qsets for this port
1850 */
1851static int
1852cxgb_set_lro(struct port_info *p, int enabled)
1853{
1854	int i;
1855	struct adapter *adp = p->adapter;
1856	struct sge_qset *q;
1857
1858	for (i = 0; i < p->nqsets; i++) {
1859		q = &adp->sge.qs[p->first_qset + i];
1860		q->lro.enabled = (enabled != 0);
1861	}
1862	return (0);
1863}
1864
1865static int
1866cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1867{
1868	struct port_info *p = ifp->if_softc;
1869	struct adapter *sc = p->adapter;
1870	struct ifreq *ifr = (struct ifreq *)data;
1871	int flags, error = 0, mtu;
1872	uint32_t mask;
1873
1874	switch (command) {
1875	case SIOCSIFMTU:
1876		ADAPTER_LOCK(sc);
1877		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1878		if (error) {
1879fail:
1880			ADAPTER_UNLOCK(sc);
1881			return (error);
1882		}
1883
1884		mtu = ifr->ifr_mtu;
1885		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
1886			error = EINVAL;
1887		} else {
1888			ifp->if_mtu = mtu;
1889			PORT_LOCK(p);
1890			cxgb_update_mac_settings(p);
1891			PORT_UNLOCK(p);
1892		}
1893		ADAPTER_UNLOCK(sc);
1894		break;
1895	case SIOCSIFFLAGS:
1896		ADAPTER_LOCK(sc);
1897		if (IS_DOOMED(p)) {
1898			error = ENXIO;
1899			goto fail;
1900		}
1901		if (ifp->if_flags & IFF_UP) {
1902			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1903				flags = p->if_flags;
1904				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1905				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
1906					if (IS_BUSY(sc)) {
1907						error = EBUSY;
1908						goto fail;
1909					}
1910					PORT_LOCK(p);
1911					cxgb_update_mac_settings(p);
1912					PORT_UNLOCK(p);
1913				}
1914				ADAPTER_UNLOCK(sc);
1915			} else
1916				error = cxgb_init_locked(p);
1917			p->if_flags = ifp->if_flags;
1918		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1919			error = cxgb_uninit_locked(p);
1920		else
1921			ADAPTER_UNLOCK(sc);
1922
1923		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1924		break;
1925	case SIOCADDMULTI:
1926	case SIOCDELMULTI:
1927		ADAPTER_LOCK(sc);
1928		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1929		if (error)
1930			goto fail;
1931
1932		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1933			PORT_LOCK(p);
1934			cxgb_update_mac_settings(p);
1935			PORT_UNLOCK(p);
1936		}
1937		ADAPTER_UNLOCK(sc);
1938
1939		break;
1940	case SIOCSIFCAP:
1941		ADAPTER_LOCK(sc);
1942		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1943		if (error)
1944			goto fail;
1945
1946		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1947		if (mask & IFCAP_TXCSUM) {
1948			ifp->if_capenable ^= IFCAP_TXCSUM;
1949			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
1950
1951			if (IFCAP_TSO4 & ifp->if_capenable &&
1952			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1953				ifp->if_capenable &= ~IFCAP_TSO4;
1954				if_printf(ifp,
1955				    "tso4 disabled due to -txcsum.\n");
1956			}
1957		}
1958		if (mask & IFCAP_TXCSUM_IPV6) {
1959			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1960			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
1961
1962			if (IFCAP_TSO6 & ifp->if_capenable &&
1963			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1964				ifp->if_capenable &= ~IFCAP_TSO6;
1965				if_printf(ifp,
1966				    "tso6 disabled due to -txcsum6.\n");
1967			}
1968		}
1969		if (mask & IFCAP_RXCSUM)
1970			ifp->if_capenable ^= IFCAP_RXCSUM;
1971		if (mask & IFCAP_RXCSUM_IPV6)
1972			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1973
1974		/*
1975		 * Note that we leave CSUM_TSO alone (it is always set).  The
1976		 * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
1977		 * sending a TSO request our way, so it's sufficient to toggle
1978		 * IFCAP_TSOx only.
1979		 */
1980		if (mask & IFCAP_TSO4) {
1981			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
1982			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1983				if_printf(ifp, "enable txcsum first.\n");
1984				error = EAGAIN;
1985				goto fail;
1986			}
1987			ifp->if_capenable ^= IFCAP_TSO4;
1988		}
1989		if (mask & IFCAP_TSO6) {
1990			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
1991			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1992				if_printf(ifp, "enable txcsum6 first.\n");
1993				error = EAGAIN;
1994				goto fail;
1995			}
1996			ifp->if_capenable ^= IFCAP_TSO6;
1997		}
1998		if (mask & IFCAP_LRO) {
1999			ifp->if_capenable ^= IFCAP_LRO;
2000
2001			/* Safe to do this even if cxgb_up not called yet */
2002			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2003		}
2004#ifdef TCP_OFFLOAD
2005		if (mask & IFCAP_TOE4) {
2006			int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE4;
2007
2008			error = toe_capability(p, enable);
2009			if (error == 0)
2010				ifp->if_capenable ^= mask;
2011		}
2012#endif
2013		if (mask & IFCAP_VLAN_HWTAGGING) {
2014			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2015			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2016				PORT_LOCK(p);
2017				cxgb_update_mac_settings(p);
2018				PORT_UNLOCK(p);
2019			}
2020		}
2021		if (mask & IFCAP_VLAN_MTU) {
2022			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2023			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2024				PORT_LOCK(p);
2025				cxgb_update_mac_settings(p);
2026				PORT_UNLOCK(p);
2027			}
2028		}
2029		if (mask & IFCAP_VLAN_HWTSO)
2030			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2031		if (mask & IFCAP_VLAN_HWCSUM)
2032			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2033
2034#ifdef VLAN_CAPABILITIES
2035		VLAN_CAPABILITIES(ifp);
2036#endif
2037		ADAPTER_UNLOCK(sc);
2038		break;
2039	case SIOCSIFMEDIA:
2040	case SIOCGIFMEDIA:
2041		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2042		break;
2043	default:
2044		error = ether_ioctl(ifp, command, data);
2045	}
2046
2047	return (error);
2048}
2049
2050static int
2051cxgb_media_change(struct ifnet *ifp)
2052{
2053	return (EOPNOTSUPP);
2054}
2055
2056/*
2057 * Translates phy->modtype to the correct Ethernet media subtype.
2058 */
2059static int
2060cxgb_ifm_type(int mod)
2061{
2062	switch (mod) {
2063	case phy_modtype_sr:
2064		return (IFM_10G_SR);
2065	case phy_modtype_lr:
2066		return (IFM_10G_LR);
2067	case phy_modtype_lrm:
2068		return (IFM_10G_LRM);
2069	case phy_modtype_twinax:
2070		return (IFM_10G_TWINAX);
2071	case phy_modtype_twinax_long:
2072		return (IFM_10G_TWINAX_LONG);
2073	case phy_modtype_none:
2074		return (IFM_NONE);
2075	case phy_modtype_unknown:
2076		return (IFM_UNKNOWN);
2077	}
2078
2079	KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2080	return (IFM_UNKNOWN);
2081}
2082
2083/*
2084 * Rebuilds the ifmedia list for this port, and sets the current media.
2085 */
2086static void
2087cxgb_build_medialist(struct port_info *p)
2088{
2089	struct cphy *phy = &p->phy;
2090	struct ifmedia *media = &p->media;
2091	int mod = phy->modtype;
2092	int m = IFM_ETHER | IFM_FDX;
2093
2094	PORT_LOCK(p);
2095
2096	ifmedia_removeall(media);
2097	if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2098		/* Copper (RJ45) */
2099
2100		if (phy->caps & SUPPORTED_10000baseT_Full)
2101			ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2102
2103		if (phy->caps & SUPPORTED_1000baseT_Full)
2104			ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2105
2106		if (phy->caps & SUPPORTED_100baseT_Full)
2107			ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2108
2109		if (phy->caps & SUPPORTED_10baseT_Full)
2110			ifmedia_add(media, m | IFM_10_T, mod, NULL);
2111
2112		ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2113		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2114
2115	} else if (phy->caps & SUPPORTED_TP) {
2116		/* Copper (CX4) */
2117
2118		KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2119			("%s: unexpected cap 0x%x", __func__, phy->caps));
2120
2121		ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2122		ifmedia_set(media, m | IFM_10G_CX4);
2123
2124	} else if (phy->caps & SUPPORTED_FIBRE &&
2125		   phy->caps & SUPPORTED_10000baseT_Full) {
2126		/* 10G optical (but includes SFP+ twinax) */
2127
2128		m |= cxgb_ifm_type(mod);
2129		if (IFM_SUBTYPE(m) == IFM_NONE)
2130			m &= ~IFM_FDX;
2131
2132		ifmedia_add(media, m, mod, NULL);
2133		ifmedia_set(media, m);
2134
2135	} else if (phy->caps & SUPPORTED_FIBRE &&
2136		   phy->caps & SUPPORTED_1000baseT_Full) {
2137		/* 1G optical */
2138
2139		/* XXX: Lie and claim to be SX, could actually be any 1G-X */
2140		ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2141		ifmedia_set(media, m | IFM_1000_SX);
2142
2143	} else {
2144		KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2145			    phy->caps));
2146	}
2147
2148	PORT_UNLOCK(p);
2149}
2150
2151static void
2152cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2153{
2154	struct port_info *p = ifp->if_softc;
2155	struct ifmedia_entry *cur = p->media.ifm_cur;
2156	int speed = p->link_config.speed;
2157
2158	if (cur->ifm_data != p->phy.modtype) {
2159		cxgb_build_medialist(p);
2160		cur = p->media.ifm_cur;
2161	}
2162
2163	ifmr->ifm_status = IFM_AVALID;
2164	if (!p->link_config.link_ok)
2165		return;
2166
2167	ifmr->ifm_status |= IFM_ACTIVE;
2168
2169	/*
2170	 * active and current will differ iff current media is autoselect.  That
2171	 * can happen only for copper RJ45.
2172	 */
2173	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2174		return;
2175	KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2176		("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2177
2178	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2179	if (speed == SPEED_10000)
2180		ifmr->ifm_active |= IFM_10G_T;
2181	else if (speed == SPEED_1000)
2182		ifmr->ifm_active |= IFM_1000_T;
2183	else if (speed == SPEED_100)
2184		ifmr->ifm_active |= IFM_100_TX;
2185	else if (speed == SPEED_10)
2186		ifmr->ifm_active |= IFM_10_T;
2187	else
2188		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2189			    speed));
2190}
2191
2192static void
2193cxgb_async_intr(void *data)
2194{
2195	adapter_t *sc = data;
2196
2197	t3_write_reg(sc, A_PL_INT_ENABLE0, 0);
2198	(void) t3_read_reg(sc, A_PL_INT_ENABLE0);
2199	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2200}
2201
2202static void
2203link_check_callout(void *arg)
2204{
2205	struct port_info *pi = arg;
2206	struct adapter *sc = pi->adapter;
2207
2208	if (!isset(&sc->open_device_map, pi->port_id))
2209		return;
2210
2211	taskqueue_enqueue(sc->tq, &pi->link_check_task);
2212}
2213
2214static void
2215check_link_status(void *arg, int pending)
2216{
2217	struct port_info *pi = arg;
2218	struct adapter *sc = pi->adapter;
2219
2220	if (!isset(&sc->open_device_map, pi->port_id))
2221		return;
2222
2223	t3_link_changed(sc, pi->port_id);
2224
2225	if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ))
2226		callout_reset(&pi->link_check_ch, hz, link_check_callout, pi);
2227}
2228
2229void
2230t3_os_link_intr(struct port_info *pi)
2231{
2232	/*
2233	 * Schedule a link check in the near future.  If the link is flapping
2234	 * rapidly we'll keep resetting the callout and delaying the check until
2235	 * things stabilize a bit.
2236	 */
2237	callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi);
2238}
2239
2240static void
2241check_t3b2_mac(struct adapter *sc)
2242{
2243	int i;
2244
2245	if (sc->flags & CXGB_SHUTDOWN)
2246		return;
2247
2248	for_each_port(sc, i) {
2249		struct port_info *p = &sc->port[i];
2250		int status;
2251#ifdef INVARIANTS
2252		struct ifnet *ifp = p->ifp;
2253#endif
2254
2255		if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2256		    !p->link_config.link_ok)
2257			continue;
2258
2259		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2260			("%s: state mismatch (drv_flags %x, device_map %x)",
2261			 __func__, ifp->if_drv_flags, sc->open_device_map));
2262
2263		PORT_LOCK(p);
2264		status = t3b2_mac_watchdog_task(&p->mac);
2265		if (status == 1)
2266			p->mac.stats.num_toggled++;
2267		else if (status == 2) {
2268			struct cmac *mac = &p->mac;
2269
2270			cxgb_update_mac_settings(p);
2271			t3_link_start(&p->phy, mac, &p->link_config);
2272			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2273			t3_port_intr_enable(sc, p->port_id);
2274			p->mac.stats.num_resets++;
2275		}
2276		PORT_UNLOCK(p);
2277	}
2278}
2279
2280static void
2281cxgb_tick(void *arg)
2282{
2283	adapter_t *sc = (adapter_t *)arg;
2284
2285	if (sc->flags & CXGB_SHUTDOWN)
2286		return;
2287
2288	taskqueue_enqueue(sc->tq, &sc->tick_task);
2289	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2290}
2291
2292static void
2293cxgb_tick_handler(void *arg, int count)
2294{
2295	adapter_t *sc = (adapter_t *)arg;
2296	const struct adapter_params *p = &sc->params;
2297	int i;
2298	uint32_t cause, reset;
2299
2300	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2301		return;
2302
2303	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2304		check_t3b2_mac(sc);
2305
2306	cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2307	if (cause) {
2308		struct sge_qset *qs = &sc->sge.qs[0];
2309		uint32_t mask, v;
2310
2311		v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2312
2313		mask = 1;
2314		for (i = 0; i < SGE_QSETS; i++) {
2315			if (v & mask)
2316				qs[i].rspq.starved++;
2317			mask <<= 1;
2318		}
2319
2320		mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2321
2322		for (i = 0; i < SGE_QSETS * 2; i++) {
2323			if (v & mask) {
2324				qs[i / 2].fl[i % 2].empty++;
2325			}
2326			mask <<= 1;
2327		}
2328
2329		/* clear */
2330		t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2331		t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2332	}
2333
2334	for (i = 0; i < sc->params.nports; i++) {
2335		struct port_info *pi = &sc->port[i];
2336		struct ifnet *ifp = pi->ifp;
2337		struct cmac *mac = &pi->mac;
2338		struct mac_stats *mstats = &mac->stats;
2339		int drops, j;
2340
2341		if (!isset(&sc->open_device_map, pi->port_id))
2342			continue;
2343
2344		PORT_LOCK(pi);
2345		t3_mac_update_stats(mac);
2346		PORT_UNLOCK(pi);
2347
2348		ifp->if_opackets = mstats->tx_frames;
2349		ifp->if_ipackets = mstats->rx_frames;
2350		ifp->if_obytes = mstats->tx_octets;
2351		ifp->if_ibytes = mstats->rx_octets;
2352		ifp->if_omcasts = mstats->tx_mcast_frames;
2353		ifp->if_imcasts = mstats->rx_mcast_frames;
2354		ifp->if_collisions = mstats->tx_total_collisions;
2355		ifp->if_iqdrops = mstats->rx_cong_drops;
2356
2357		drops = 0;
2358		for (j = pi->first_qset; j < pi->first_qset + pi->nqsets; j++)
2359			drops += sc->sge.qs[j].txq[TXQ_ETH].txq_mr->br_drops;
2360		ifp->if_oqdrops = drops;
2361
2362		ifp->if_oerrors =
2363		    mstats->tx_excess_collisions +
2364		    mstats->tx_underrun +
2365		    mstats->tx_len_errs +
2366		    mstats->tx_mac_internal_errs +
2367		    mstats->tx_excess_deferral +
2368		    mstats->tx_fcs_errs;
2369		ifp->if_ierrors =
2370		    mstats->rx_jabber +
2371		    mstats->rx_data_errs +
2372		    mstats->rx_sequence_errs +
2373		    mstats->rx_runt +
2374		    mstats->rx_too_long +
2375		    mstats->rx_mac_internal_errs +
2376		    mstats->rx_short +
2377		    mstats->rx_fcs_errs;
2378
2379		if (mac->multiport)
2380			continue;
2381
2382		/* Count rx fifo overflows, once per second */
2383		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2384		reset = 0;
2385		if (cause & F_RXFIFO_OVERFLOW) {
2386			mac->stats.rx_fifo_ovfl++;
2387			reset |= F_RXFIFO_OVERFLOW;
2388		}
2389		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2390	}
2391}
2392
2393static void
2394touch_bars(device_t dev)
2395{
2396	/*
2397	 * Don't enable yet
2398	 */
2399#if !defined(__LP64__) && 0
2400	u32 v;
2401
2402	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2403	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2404	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2405	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2406	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2407	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2408#endif
2409}
2410
2411static int
2412set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2413{
2414	uint8_t *buf;
2415	int err = 0;
2416	u32 aligned_offset, aligned_len, *p;
2417	struct adapter *adapter = pi->adapter;
2418
2419
2420	aligned_offset = offset & ~3;
2421	aligned_len = (len + (offset & 3) + 3) & ~3;
2422
2423	if (aligned_offset != offset || aligned_len != len) {
2424		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2425		if (!buf)
2426			return (ENOMEM);
2427		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2428		if (!err && aligned_len > 4)
2429			err = t3_seeprom_read(adapter,
2430					      aligned_offset + aligned_len - 4,
2431					      (u32 *)&buf[aligned_len - 4]);
2432		if (err)
2433			goto out;
2434		memcpy(buf + (offset & 3), data, len);
2435	} else
2436		buf = (uint8_t *)(uintptr_t)data;
2437
2438	err = t3_seeprom_wp(adapter, 0);
2439	if (err)
2440		goto out;
2441
2442	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2443		err = t3_seeprom_write(adapter, aligned_offset, *p);
2444		aligned_offset += 4;
2445	}
2446
2447	if (!err)
2448		err = t3_seeprom_wp(adapter, 1);
2449out:
2450	if (buf != data)
2451		free(buf, M_DEVBUF);
2452	return err;
2453}
2454
2455
2456static int
2457in_range(int val, int lo, int hi)
2458{
2459	return val < 0 || (val <= hi && val >= lo);
2460}
2461
2462static int
2463cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2464{
2465       return (0);
2466}
2467
2468static int
2469cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2470{
2471       return (0);
2472}
2473
2474static int
2475cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2476    int fflag, struct thread *td)
2477{
2478	int mmd, error = 0;
2479	struct port_info *pi = dev->si_drv1;
2480	adapter_t *sc = pi->adapter;
2481
2482#ifdef PRIV_SUPPORTED
2483	if (priv_check(td, PRIV_DRIVER)) {
2484		if (cxgb_debug)
2485			printf("user does not have access to privileged ioctls\n");
2486		return (EPERM);
2487	}
2488#else
2489	if (suser(td)) {
2490		if (cxgb_debug)
2491			printf("user does not have access to privileged ioctls\n");
2492		return (EPERM);
2493	}
2494#endif
2495
2496	switch (cmd) {
2497	case CHELSIO_GET_MIIREG: {
2498		uint32_t val;
2499		struct cphy *phy = &pi->phy;
2500		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2501
2502		if (!phy->mdio_read)
2503			return (EOPNOTSUPP);
2504		if (is_10G(sc)) {
2505			mmd = mid->phy_id >> 8;
2506			if (!mmd)
2507				mmd = MDIO_DEV_PCS;
2508			else if (mmd > MDIO_DEV_VEND2)
2509				return (EINVAL);
2510
2511			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2512					     mid->reg_num, &val);
2513		} else
2514		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2515					     mid->reg_num & 0x1f, &val);
2516		if (error == 0)
2517			mid->val_out = val;
2518		break;
2519	}
2520	case CHELSIO_SET_MIIREG: {
2521		struct cphy *phy = &pi->phy;
2522		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2523
2524		if (!phy->mdio_write)
2525			return (EOPNOTSUPP);
2526		if (is_10G(sc)) {
2527			mmd = mid->phy_id >> 8;
2528			if (!mmd)
2529				mmd = MDIO_DEV_PCS;
2530			else if (mmd > MDIO_DEV_VEND2)
2531				return (EINVAL);
2532
2533			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2534					      mmd, mid->reg_num, mid->val_in);
2535		} else
2536			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2537					      mid->reg_num & 0x1f,
2538					      mid->val_in);
2539		break;
2540	}
2541	case CHELSIO_SETREG: {
2542		struct ch_reg *edata = (struct ch_reg *)data;
2543		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2544			return (EFAULT);
2545		t3_write_reg(sc, edata->addr, edata->val);
2546		break;
2547	}
2548	case CHELSIO_GETREG: {
2549		struct ch_reg *edata = (struct ch_reg *)data;
2550		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2551			return (EFAULT);
2552		edata->val = t3_read_reg(sc, edata->addr);
2553		break;
2554	}
2555	case CHELSIO_GET_SGE_CONTEXT: {
2556		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2557		mtx_lock_spin(&sc->sge.reg_lock);
2558		switch (ecntxt->cntxt_type) {
2559		case CNTXT_TYPE_EGRESS:
2560			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2561			    ecntxt->data);
2562			break;
2563		case CNTXT_TYPE_FL:
2564			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2565			    ecntxt->data);
2566			break;
2567		case CNTXT_TYPE_RSP:
2568			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2569			    ecntxt->data);
2570			break;
2571		case CNTXT_TYPE_CQ:
2572			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2573			    ecntxt->data);
2574			break;
2575		default:
2576			error = EINVAL;
2577			break;
2578		}
2579		mtx_unlock_spin(&sc->sge.reg_lock);
2580		break;
2581	}
2582	case CHELSIO_GET_SGE_DESC: {
2583		struct ch_desc *edesc = (struct ch_desc *)data;
2584		int ret;
2585		if (edesc->queue_num >= SGE_QSETS * 6)
2586			return (EINVAL);
2587		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2588		    edesc->queue_num % 6, edesc->idx, edesc->data);
2589		if (ret < 0)
2590			return (EINVAL);
2591		edesc->size = ret;
2592		break;
2593	}
2594	case CHELSIO_GET_QSET_PARAMS: {
2595		struct qset_params *q;
2596		struct ch_qset_params *t = (struct ch_qset_params *)data;
2597		int q1 = pi->first_qset;
2598		int nqsets = pi->nqsets;
2599		int i;
2600
2601		if (t->qset_idx >= nqsets)
2602			return EINVAL;
2603
2604		i = q1 + t->qset_idx;
2605		q = &sc->params.sge.qset[i];
2606		t->rspq_size   = q->rspq_size;
2607		t->txq_size[0] = q->txq_size[0];
2608		t->txq_size[1] = q->txq_size[1];
2609		t->txq_size[2] = q->txq_size[2];
2610		t->fl_size[0]  = q->fl_size;
2611		t->fl_size[1]  = q->jumbo_size;
2612		t->polling     = q->polling;
2613		t->lro         = q->lro;
2614		t->intr_lat    = q->coalesce_usecs;
2615		t->cong_thres  = q->cong_thres;
2616		t->qnum        = i;
2617
2618		if ((sc->flags & FULL_INIT_DONE) == 0)
2619			t->vector = 0;
2620		else if (sc->flags & USING_MSIX)
2621			t->vector = rman_get_start(sc->msix_irq_res[i]);
2622		else
2623			t->vector = rman_get_start(sc->irq_res);
2624
2625		break;
2626	}
2627	case CHELSIO_GET_QSET_NUM: {
2628		struct ch_reg *edata = (struct ch_reg *)data;
2629		edata->val = pi->nqsets;
2630		break;
2631	}
2632	case CHELSIO_LOAD_FW: {
2633		uint8_t *fw_data;
2634		uint32_t vers;
2635		struct ch_mem_range *t = (struct ch_mem_range *)data;
2636
2637		/*
2638		 * You're allowed to load a firmware only before FULL_INIT_DONE
2639		 *
2640		 * FW_UPTODATE is also set so the rest of the initialization
2641		 * will not overwrite what was loaded here.  This gives you the
2642		 * flexibility to load any firmware (and maybe shoot yourself in
2643		 * the foot).
2644		 */
2645
2646		ADAPTER_LOCK(sc);
2647		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2648			ADAPTER_UNLOCK(sc);
2649			return (EBUSY);
2650		}
2651
2652		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2653		if (!fw_data)
2654			error = ENOMEM;
2655		else
2656			error = copyin(t->buf, fw_data, t->len);
2657
2658		if (!error)
2659			error = -t3_load_fw(sc, fw_data, t->len);
2660
2661		if (t3_get_fw_version(sc, &vers) == 0) {
2662			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2663			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2664			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2665		}
2666
2667		if (!error)
2668			sc->flags |= FW_UPTODATE;
2669
2670		free(fw_data, M_DEVBUF);
2671		ADAPTER_UNLOCK(sc);
2672		break;
2673	}
2674	case CHELSIO_LOAD_BOOT: {
2675		uint8_t *boot_data;
2676		struct ch_mem_range *t = (struct ch_mem_range *)data;
2677
2678		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2679		if (!boot_data)
2680			return ENOMEM;
2681
2682		error = copyin(t->buf, boot_data, t->len);
2683		if (!error)
2684			error = -t3_load_boot(sc, boot_data, t->len);
2685
2686		free(boot_data, M_DEVBUF);
2687		break;
2688	}
2689	case CHELSIO_GET_PM: {
2690		struct ch_pm *m = (struct ch_pm *)data;
2691		struct tp_params *p = &sc->params.tp;
2692
2693		if (!is_offload(sc))
2694			return (EOPNOTSUPP);
2695
2696		m->tx_pg_sz = p->tx_pg_size;
2697		m->tx_num_pg = p->tx_num_pgs;
2698		m->rx_pg_sz  = p->rx_pg_size;
2699		m->rx_num_pg = p->rx_num_pgs;
2700		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2701
2702		break;
2703	}
2704	case CHELSIO_SET_PM: {
2705		struct ch_pm *m = (struct ch_pm *)data;
2706		struct tp_params *p = &sc->params.tp;
2707
2708		if (!is_offload(sc))
2709			return (EOPNOTSUPP);
2710		if (sc->flags & FULL_INIT_DONE)
2711			return (EBUSY);
2712
2713		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2714		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2715			return (EINVAL);	/* not power of 2 */
2716		if (!(m->rx_pg_sz & 0x14000))
2717			return (EINVAL);	/* not 16KB or 64KB */
2718		if (!(m->tx_pg_sz & 0x1554000))
2719			return (EINVAL);
2720		if (m->tx_num_pg == -1)
2721			m->tx_num_pg = p->tx_num_pgs;
2722		if (m->rx_num_pg == -1)
2723			m->rx_num_pg = p->rx_num_pgs;
2724		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2725			return (EINVAL);
2726		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2727		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2728			return (EINVAL);
2729
2730		p->rx_pg_size = m->rx_pg_sz;
2731		p->tx_pg_size = m->tx_pg_sz;
2732		p->rx_num_pgs = m->rx_num_pg;
2733		p->tx_num_pgs = m->tx_num_pg;
2734		break;
2735	}
2736	case CHELSIO_SETMTUTAB: {
2737		struct ch_mtus *m = (struct ch_mtus *)data;
2738		int i;
2739
2740		if (!is_offload(sc))
2741			return (EOPNOTSUPP);
2742		if (offload_running(sc))
2743			return (EBUSY);
2744		if (m->nmtus != NMTUS)
2745			return (EINVAL);
2746		if (m->mtus[0] < 81)         /* accommodate SACK */
2747			return (EINVAL);
2748
2749		/*
2750		 * MTUs must be in ascending order
2751		 */
2752		for (i = 1; i < NMTUS; ++i)
2753			if (m->mtus[i] < m->mtus[i - 1])
2754				return (EINVAL);
2755
2756		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2757		break;
2758	}
2759	case CHELSIO_GETMTUTAB: {
2760		struct ch_mtus *m = (struct ch_mtus *)data;
2761
2762		if (!is_offload(sc))
2763			return (EOPNOTSUPP);
2764
2765		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2766		m->nmtus = NMTUS;
2767		break;
2768	}
2769	case CHELSIO_GET_MEM: {
2770		struct ch_mem_range *t = (struct ch_mem_range *)data;
2771		struct mc7 *mem;
2772		uint8_t *useraddr;
2773		u64 buf[32];
2774
2775		/*
2776		 * Use these to avoid modifying len/addr in the return
2777		 * struct
2778		 */
2779		uint32_t len = t->len, addr = t->addr;
2780
2781		if (!is_offload(sc))
2782			return (EOPNOTSUPP);
2783		if (!(sc->flags & FULL_INIT_DONE))
2784			return (EIO);         /* need the memory controllers */
2785		if ((addr & 0x7) || (len & 0x7))
2786			return (EINVAL);
2787		if (t->mem_id == MEM_CM)
2788			mem = &sc->cm;
2789		else if (t->mem_id == MEM_PMRX)
2790			mem = &sc->pmrx;
2791		else if (t->mem_id == MEM_PMTX)
2792			mem = &sc->pmtx;
2793		else
2794			return (EINVAL);
2795
2796		/*
2797		 * Version scheme:
2798		 * bits 0..9: chip version
2799		 * bits 10..15: chip revision
2800		 */
2801		t->version = 3 | (sc->params.rev << 10);
2802
2803		/*
2804		 * Read 256 bytes at a time as len can be large and we don't
2805		 * want to use huge intermediate buffers.
2806		 */
2807		useraddr = (uint8_t *)t->buf;
2808		while (len) {
2809			unsigned int chunk = min(len, sizeof(buf));
2810
2811			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2812			if (error)
2813				return (-error);
2814			if (copyout(buf, useraddr, chunk))
2815				return (EFAULT);
2816			useraddr += chunk;
2817			addr += chunk;
2818			len -= chunk;
2819		}
2820		break;
2821	}
2822	case CHELSIO_READ_TCAM_WORD: {
2823		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2824
2825		if (!is_offload(sc))
2826			return (EOPNOTSUPP);
2827		if (!(sc->flags & FULL_INIT_DONE))
2828			return (EIO);         /* need MC5 */
2829		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2830		break;
2831	}
2832	case CHELSIO_SET_TRACE_FILTER: {
2833		struct ch_trace *t = (struct ch_trace *)data;
2834		const struct trace_params *tp;
2835
2836		tp = (const struct trace_params *)&t->sip;
2837		if (t->config_tx)
2838			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2839					       t->trace_tx);
2840		if (t->config_rx)
2841			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2842					       t->trace_rx);
2843		break;
2844	}
2845	case CHELSIO_SET_PKTSCHED: {
2846		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2847		if (sc->open_device_map == 0)
2848			return (EAGAIN);
2849		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2850		    p->binding);
2851		break;
2852	}
2853	case CHELSIO_IFCONF_GETREGS: {
2854		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2855		int reglen = cxgb_get_regs_len();
2856		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2857		if (buf == NULL) {
2858			return (ENOMEM);
2859		}
2860		if (regs->len > reglen)
2861			regs->len = reglen;
2862		else if (regs->len < reglen)
2863			error = ENOBUFS;
2864
2865		if (!error) {
2866			cxgb_get_regs(sc, regs, buf);
2867			error = copyout(buf, regs->data, reglen);
2868		}
2869		free(buf, M_DEVBUF);
2870
2871		break;
2872	}
2873	case CHELSIO_SET_HW_SCHED: {
2874		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2875		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2876
2877		if ((sc->flags & FULL_INIT_DONE) == 0)
2878			return (EAGAIN);       /* need TP to be initialized */
2879		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2880		    !in_range(t->channel, 0, 1) ||
2881		    !in_range(t->kbps, 0, 10000000) ||
2882		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2883		    !in_range(t->flow_ipg, 0,
2884			      dack_ticks_to_usec(sc, 0x7ff)))
2885			return (EINVAL);
2886
2887		if (t->kbps >= 0) {
2888			error = t3_config_sched(sc, t->kbps, t->sched);
2889			if (error < 0)
2890				return (-error);
2891		}
2892		if (t->class_ipg >= 0)
2893			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2894		if (t->flow_ipg >= 0) {
2895			t->flow_ipg *= 1000;     /* us -> ns */
2896			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2897		}
2898		if (t->mode >= 0) {
2899			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2900
2901			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2902					 bit, t->mode ? bit : 0);
2903		}
2904		if (t->channel >= 0)
2905			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2906					 1 << t->sched, t->channel << t->sched);
2907		break;
2908	}
2909	case CHELSIO_GET_EEPROM: {
2910		int i;
2911		struct ch_eeprom *e = (struct ch_eeprom *)data;
2912		uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2913
2914		if (buf == NULL) {
2915			return (ENOMEM);
2916		}
2917		e->magic = EEPROM_MAGIC;
2918		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
2919			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
2920
2921		if (!error)
2922			error = copyout(buf + e->offset, e->data, e->len);
2923
2924		free(buf, M_DEVBUF);
2925		break;
2926	}
2927	case CHELSIO_CLEAR_STATS: {
2928		if (!(sc->flags & FULL_INIT_DONE))
2929			return EAGAIN;
2930
2931		PORT_LOCK(pi);
2932		t3_mac_update_stats(&pi->mac);
2933		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
2934		PORT_UNLOCK(pi);
2935		break;
2936	}
2937	case CHELSIO_GET_UP_LA: {
2938		struct ch_up_la *la = (struct ch_up_la *)data;
2939		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
2940		if (buf == NULL) {
2941			return (ENOMEM);
2942		}
2943		if (la->bufsize < LA_BUFSIZE)
2944			error = ENOBUFS;
2945
2946		if (!error)
2947			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
2948					      &la->bufsize, buf);
2949		if (!error)
2950			error = copyout(buf, la->data, la->bufsize);
2951
2952		free(buf, M_DEVBUF);
2953		break;
2954	}
2955	case CHELSIO_GET_UP_IOQS: {
2956		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
2957		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
2958		uint32_t *v;
2959
2960		if (buf == NULL) {
2961			return (ENOMEM);
2962		}
2963		if (ioqs->bufsize < IOQS_BUFSIZE)
2964			error = ENOBUFS;
2965
2966		if (!error)
2967			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
2968
2969		if (!error) {
2970			v = (uint32_t *)buf;
2971
2972			ioqs->ioq_rx_enable = *v++;
2973			ioqs->ioq_tx_enable = *v++;
2974			ioqs->ioq_rx_status = *v++;
2975			ioqs->ioq_tx_status = *v++;
2976
2977			error = copyout(v, ioqs->data, ioqs->bufsize);
2978		}
2979
2980		free(buf, M_DEVBUF);
2981		break;
2982	}
2983	case CHELSIO_SET_FILTER: {
2984		struct ch_filter *f = (struct ch_filter *)data;
2985		struct filter_info *p;
2986		unsigned int nfilters = sc->params.mc5.nfilters;
2987
2988		if (!is_offload(sc))
2989			return (EOPNOTSUPP);	/* No TCAM */
2990		if (!(sc->flags & FULL_INIT_DONE))
2991			return (EAGAIN);	/* mc5 not setup yet */
2992		if (nfilters == 0)
2993			return (EBUSY);		/* TOE will use TCAM */
2994
2995		/* sanity checks */
2996		if (f->filter_id >= nfilters ||
2997		    (f->val.dip && f->mask.dip != 0xffffffff) ||
2998		    (f->val.sport && f->mask.sport != 0xffff) ||
2999		    (f->val.dport && f->mask.dport != 0xffff) ||
3000		    (f->val.vlan && f->mask.vlan != 0xfff) ||
3001		    (f->val.vlan_prio &&
3002			f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3003		    (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3004		    f->qset >= SGE_QSETS ||
3005		    sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3006			return (EINVAL);
3007
3008		/* Was allocated with M_WAITOK */
3009		KASSERT(sc->filters, ("filter table NULL\n"));
3010
3011		p = &sc->filters[f->filter_id];
3012		if (p->locked)
3013			return (EPERM);
3014
3015		bzero(p, sizeof(*p));
3016		p->sip = f->val.sip;
3017		p->sip_mask = f->mask.sip;
3018		p->dip = f->val.dip;
3019		p->sport = f->val.sport;
3020		p->dport = f->val.dport;
3021		p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3022		p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3023		    FILTER_NO_VLAN_PRI;
3024		p->mac_hit = f->mac_hit;
3025		p->mac_vld = f->mac_addr_idx != 0xffff;
3026		p->mac_idx = f->mac_addr_idx;
3027		p->pkt_type = f->proto;
3028		p->report_filter_id = f->want_filter_id;
3029		p->pass = f->pass;
3030		p->rss = f->rss;
3031		p->qset = f->qset;
3032
3033		error = set_filter(sc, f->filter_id, p);
3034		if (error == 0)
3035			p->valid = 1;
3036		break;
3037	}
3038	case CHELSIO_DEL_FILTER: {
3039		struct ch_filter *f = (struct ch_filter *)data;
3040		struct filter_info *p;
3041		unsigned int nfilters = sc->params.mc5.nfilters;
3042
3043		if (!is_offload(sc))
3044			return (EOPNOTSUPP);
3045		if (!(sc->flags & FULL_INIT_DONE))
3046			return (EAGAIN);
3047		if (nfilters == 0 || sc->filters == NULL)
3048			return (EINVAL);
3049		if (f->filter_id >= nfilters)
3050		       return (EINVAL);
3051
3052		p = &sc->filters[f->filter_id];
3053		if (p->locked)
3054			return (EPERM);
3055		if (!p->valid)
3056			return (EFAULT); /* Read "Bad address" as "Bad index" */
3057
3058		bzero(p, sizeof(*p));
3059		p->sip = p->sip_mask = 0xffffffff;
3060		p->vlan = 0xfff;
3061		p->vlan_prio = FILTER_NO_VLAN_PRI;
3062		p->pkt_type = 1;
3063		error = set_filter(sc, f->filter_id, p);
3064		break;
3065	}
3066	case CHELSIO_GET_FILTER: {
3067		struct ch_filter *f = (struct ch_filter *)data;
3068		struct filter_info *p;
3069		unsigned int i, nfilters = sc->params.mc5.nfilters;
3070
3071		if (!is_offload(sc))
3072			return (EOPNOTSUPP);
3073		if (!(sc->flags & FULL_INIT_DONE))
3074			return (EAGAIN);
3075		if (nfilters == 0 || sc->filters == NULL)
3076			return (EINVAL);
3077
3078		i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3079		for (; i < nfilters; i++) {
3080			p = &sc->filters[i];
3081			if (!p->valid)
3082				continue;
3083
3084			bzero(f, sizeof(*f));
3085
3086			f->filter_id = i;
3087			f->val.sip = p->sip;
3088			f->mask.sip = p->sip_mask;
3089			f->val.dip = p->dip;
3090			f->mask.dip = p->dip ? 0xffffffff : 0;
3091			f->val.sport = p->sport;
3092			f->mask.sport = p->sport ? 0xffff : 0;
3093			f->val.dport = p->dport;
3094			f->mask.dport = p->dport ? 0xffff : 0;
3095			f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3096			f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3097			f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3098			    0 : p->vlan_prio;
3099			f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3100			    0 : FILTER_NO_VLAN_PRI;
3101			f->mac_hit = p->mac_hit;
3102			f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3103			f->proto = p->pkt_type;
3104			f->want_filter_id = p->report_filter_id;
3105			f->pass = p->pass;
3106			f->rss = p->rss;
3107			f->qset = p->qset;
3108
3109			break;
3110		}
3111
3112		if (i == nfilters)
3113			f->filter_id = 0xffffffff;
3114		break;
3115	}
3116	default:
3117		return (EOPNOTSUPP);
3118		break;
3119	}
3120
3121	return (error);
3122}
3123
3124static __inline void
3125reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3126    unsigned int end)
3127{
3128	uint32_t *p = (uint32_t *)(buf + start);
3129
3130	for ( ; start <= end; start += sizeof(uint32_t))
3131		*p++ = t3_read_reg(ap, start);
3132}
3133
3134#define T3_REGMAP_SIZE (3 * 1024)
3135static int
3136cxgb_get_regs_len(void)
3137{
3138	return T3_REGMAP_SIZE;
3139}
3140
3141static void
3142cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3143{
3144
3145	/*
3146	 * Version scheme:
3147	 * bits 0..9: chip version
3148	 * bits 10..15: chip revision
3149	 * bit 31: set for PCIe cards
3150	 */
3151	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3152
3153	/*
3154	 * We skip the MAC statistics registers because they are clear-on-read.
3155	 * Also reading multi-register stats would need to synchronize with the
3156	 * periodic mac stats accumulation.  Hard to justify the complexity.
3157	 */
3158	memset(buf, 0, cxgb_get_regs_len());
3159	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3160	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3161	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3162	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3163	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3164	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3165		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3166	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3167		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3168}
3169
3170static int
3171alloc_filters(struct adapter *sc)
3172{
3173	struct filter_info *p;
3174	unsigned int nfilters = sc->params.mc5.nfilters;
3175
3176	if (nfilters == 0)
3177		return (0);
3178
3179	p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3180	sc->filters = p;
3181
3182	p = &sc->filters[nfilters - 1];
3183	p->vlan = 0xfff;
3184	p->vlan_prio = FILTER_NO_VLAN_PRI;
3185	p->pass = p->rss = p->valid = p->locked = 1;
3186
3187	return (0);
3188}
3189
3190static int
3191setup_hw_filters(struct adapter *sc)
3192{
3193	int i, rc;
3194	unsigned int nfilters = sc->params.mc5.nfilters;
3195
3196	if (!sc->filters)
3197		return (0);
3198
3199	t3_enable_filters(sc);
3200
3201	for (i = rc = 0; i < nfilters && !rc; i++) {
3202		if (sc->filters[i].locked)
3203			rc = set_filter(sc, i, &sc->filters[i]);
3204	}
3205
3206	return (rc);
3207}
3208
3209static int
3210set_filter(struct adapter *sc, int id, const struct filter_info *f)
3211{
3212	int len;
3213	struct mbuf *m;
3214	struct ulp_txpkt *txpkt;
3215	struct work_request_hdr *wr;
3216	struct cpl_pass_open_req *oreq;
3217	struct cpl_set_tcb_field *sreq;
3218
3219	len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3220	KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3221
3222	id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3223	      sc->params.mc5.nfilters;
3224
3225	m = m_gethdr(M_WAITOK, MT_DATA);
3226	m->m_len = m->m_pkthdr.len = len;
3227	bzero(mtod(m, char *), len);
3228
3229	wr = mtod(m, struct work_request_hdr *);
3230	wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3231
3232	oreq = (struct cpl_pass_open_req *)(wr + 1);
3233	txpkt = (struct ulp_txpkt *)oreq;
3234	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3235	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3236	OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3237	oreq->local_port = htons(f->dport);
3238	oreq->peer_port = htons(f->sport);
3239	oreq->local_ip = htonl(f->dip);
3240	oreq->peer_ip = htonl(f->sip);
3241	oreq->peer_netmask = htonl(f->sip_mask);
3242	oreq->opt0h = 0;
3243	oreq->opt0l = htonl(F_NO_OFFLOAD);
3244	oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3245			 V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3246			 V_VLAN_PRI(f->vlan_prio >> 1) |
3247			 V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3248			 V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3249			 V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3250
3251	sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3252	set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3253			  (f->report_filter_id << 15) | (1 << 23) |
3254			  ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3255	set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3256	t3_mgmt_tx(sc, m);
3257
3258	if (f->pass && !f->rss) {
3259		len = sizeof(*sreq);
3260		m = m_gethdr(M_WAITOK, MT_DATA);
3261		m->m_len = m->m_pkthdr.len = len;
3262		bzero(mtod(m, char *), len);
3263		sreq = mtod(m, struct cpl_set_tcb_field *);
3264		sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3265		mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3266				 (u64)sc->rrss_map[f->qset] << 19);
3267		t3_mgmt_tx(sc, m);
3268	}
3269	return 0;
3270}
3271
3272static inline void
3273mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3274    unsigned int word, u64 mask, u64 val)
3275{
3276	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3277	req->reply = V_NO_REPLY(1);
3278	req->cpu_idx = 0;
3279	req->word = htons(word);
3280	req->mask = htobe64(mask);
3281	req->val = htobe64(val);
3282}
3283
3284static inline void
3285set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3286    unsigned int word, u64 mask, u64 val)
3287{
3288	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3289
3290	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3291	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3292	mk_set_tcb_field(req, tid, word, mask, val);
3293}
3294
3295void
3296t3_iterate(void (*func)(struct adapter *, void *), void *arg)
3297{
3298	struct adapter *sc;
3299
3300	mtx_lock(&t3_list_lock);
3301	SLIST_FOREACH(sc, &t3_list, link) {
3302		/*
3303		 * func should not make any assumptions about what state sc is
3304		 * in - the only guarantee is that sc->sc_lock is a valid lock.
3305		 */
3306		func(sc, arg);
3307	}
3308	mtx_unlock(&t3_list_lock);
3309}
3310
3311#ifdef TCP_OFFLOAD
3312static int
3313toe_capability(struct port_info *pi, int enable)
3314{
3315	int rc;
3316	struct adapter *sc = pi->adapter;
3317
3318	ADAPTER_LOCK_ASSERT_OWNED(sc);
3319
3320	if (!is_offload(sc))
3321		return (ENODEV);
3322
3323	if (enable) {
3324		if (!(sc->flags & FULL_INIT_DONE)) {
3325			log(LOG_WARNING,
3326			    "You must enable a cxgb interface first\n");
3327			return (EAGAIN);
3328		}
3329
3330		if (isset(&sc->offload_map, pi->port_id))
3331			return (0);
3332
3333		if (!(sc->flags & TOM_INIT_DONE)) {
3334			rc = t3_activate_uld(sc, ULD_TOM);
3335			if (rc == EAGAIN) {
3336				log(LOG_WARNING,
3337				    "You must kldload t3_tom.ko before trying "
3338				    "to enable TOE on a cxgb interface.\n");
3339			}
3340			if (rc != 0)
3341				return (rc);
3342			KASSERT(sc->tom_softc != NULL,
3343			    ("%s: TOM activated but softc NULL", __func__));
3344			KASSERT(sc->flags & TOM_INIT_DONE,
3345			    ("%s: TOM activated but flag not set", __func__));
3346		}
3347
3348		setbit(&sc->offload_map, pi->port_id);
3349
3350		/*
3351		 * XXX: Temporary code to allow iWARP to be enabled when TOE is
3352		 * enabled on any port.  Need to figure out how to enable,
3353		 * disable, load, and unload iWARP cleanly.
3354		 */
3355		if (!isset(&sc->offload_map, MAX_NPORTS) &&
3356		    t3_activate_uld(sc, ULD_IWARP) == 0)
3357			setbit(&sc->offload_map, MAX_NPORTS);
3358	} else {
3359		if (!isset(&sc->offload_map, pi->port_id))
3360			return (0);
3361
3362		KASSERT(sc->flags & TOM_INIT_DONE,
3363		    ("%s: TOM never initialized?", __func__));
3364		clrbit(&sc->offload_map, pi->port_id);
3365	}
3366
3367	return (0);
3368}
3369
3370/*
3371 * Add an upper layer driver to the global list.
3372 */
3373int
3374t3_register_uld(struct uld_info *ui)
3375{
3376	int rc = 0;
3377	struct uld_info *u;
3378
3379	mtx_lock(&t3_uld_list_lock);
3380	SLIST_FOREACH(u, &t3_uld_list, link) {
3381	    if (u->uld_id == ui->uld_id) {
3382		    rc = EEXIST;
3383		    goto done;
3384	    }
3385	}
3386
3387	SLIST_INSERT_HEAD(&t3_uld_list, ui, link);
3388	ui->refcount = 0;
3389done:
3390	mtx_unlock(&t3_uld_list_lock);
3391	return (rc);
3392}
3393
3394int
3395t3_unregister_uld(struct uld_info *ui)
3396{
3397	int rc = EINVAL;
3398	struct uld_info *u;
3399
3400	mtx_lock(&t3_uld_list_lock);
3401
3402	SLIST_FOREACH(u, &t3_uld_list, link) {
3403	    if (u == ui) {
3404		    if (ui->refcount > 0) {
3405			    rc = EBUSY;
3406			    goto done;
3407		    }
3408
3409		    SLIST_REMOVE(&t3_uld_list, ui, uld_info, link);
3410		    rc = 0;
3411		    goto done;
3412	    }
3413	}
3414done:
3415	mtx_unlock(&t3_uld_list_lock);
3416	return (rc);
3417}
3418
3419int
3420t3_activate_uld(struct adapter *sc, int id)
3421{
3422	int rc = EAGAIN;
3423	struct uld_info *ui;
3424
3425	mtx_lock(&t3_uld_list_lock);
3426
3427	SLIST_FOREACH(ui, &t3_uld_list, link) {
3428		if (ui->uld_id == id) {
3429			rc = ui->activate(sc);
3430			if (rc == 0)
3431				ui->refcount++;
3432			goto done;
3433		}
3434	}
3435done:
3436	mtx_unlock(&t3_uld_list_lock);
3437
3438	return (rc);
3439}
3440
3441int
3442t3_deactivate_uld(struct adapter *sc, int id)
3443{
3444	int rc = EINVAL;
3445	struct uld_info *ui;
3446
3447	mtx_lock(&t3_uld_list_lock);
3448
3449	SLIST_FOREACH(ui, &t3_uld_list, link) {
3450		if (ui->uld_id == id) {
3451			rc = ui->deactivate(sc);
3452			if (rc == 0)
3453				ui->refcount--;
3454			goto done;
3455		}
3456	}
3457done:
3458	mtx_unlock(&t3_uld_list_lock);
3459
3460	return (rc);
3461}
3462
3463static int
3464cpl_not_handled(struct sge_qset *qs __unused, struct rsp_desc *r __unused,
3465    struct mbuf *m)
3466{
3467	m_freem(m);
3468	return (EDOOFUS);
3469}
3470
3471int
3472t3_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h)
3473{
3474	uintptr_t *loc, new;
3475
3476	if (opcode >= NUM_CPL_HANDLERS)
3477		return (EINVAL);
3478
3479	new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled;
3480	loc = (uintptr_t *) &sc->cpl_handler[opcode];
3481	atomic_store_rel_ptr(loc, new);
3482
3483	return (0);
3484}
3485#endif
3486
3487static int
3488cxgbc_mod_event(module_t mod, int cmd, void *arg)
3489{
3490	int rc = 0;
3491
3492	switch (cmd) {
3493	case MOD_LOAD:
3494		mtx_init(&t3_list_lock, "T3 adapters", 0, MTX_DEF);
3495		SLIST_INIT(&t3_list);
3496#ifdef TCP_OFFLOAD
3497		mtx_init(&t3_uld_list_lock, "T3 ULDs", 0, MTX_DEF);
3498		SLIST_INIT(&t3_uld_list);
3499#endif
3500		break;
3501
3502	case MOD_UNLOAD:
3503#ifdef TCP_OFFLOAD
3504		mtx_lock(&t3_uld_list_lock);
3505		if (!SLIST_EMPTY(&t3_uld_list)) {
3506			rc = EBUSY;
3507			mtx_unlock(&t3_uld_list_lock);
3508			break;
3509		}
3510		mtx_unlock(&t3_uld_list_lock);
3511		mtx_destroy(&t3_uld_list_lock);
3512#endif
3513		mtx_lock(&t3_list_lock);
3514		if (!SLIST_EMPTY(&t3_list)) {
3515			rc = EBUSY;
3516			mtx_unlock(&t3_list_lock);
3517			break;
3518		}
3519		mtx_unlock(&t3_list_lock);
3520		mtx_destroy(&t3_list_lock);
3521		break;
3522	}
3523
3524	return (rc);
3525}
3526