1/**************************************************************************
2
3Copyright (c) 2007-2009, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13    contributors may be used to endorse or promote products derived from
14    this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: releng/11.0/sys/dev/cxgb/cxgb_main.c 283291 2015-05-22 17:05:21Z jkim $");
32
33#include "opt_inet.h"
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/bus.h>
39#include <sys/module.h>
40#include <sys/pciio.h>
41#include <sys/conf.h>
42#include <machine/bus.h>
43#include <machine/resource.h>
44#include <sys/bus_dma.h>
45#include <sys/ktr.h>
46#include <sys/rman.h>
47#include <sys/ioccom.h>
48#include <sys/mbuf.h>
49#include <sys/linker.h>
50#include <sys/firmware.h>
51#include <sys/socket.h>
52#include <sys/sockio.h>
53#include <sys/smp.h>
54#include <sys/sysctl.h>
55#include <sys/syslog.h>
56#include <sys/queue.h>
57#include <sys/taskqueue.h>
58#include <sys/proc.h>
59
60#include <net/bpf.h>
61#include <net/ethernet.h>
62#include <net/if.h>
63#include <net/if_var.h>
64#include <net/if_arp.h>
65#include <net/if_dl.h>
66#include <net/if_media.h>
67#include <net/if_types.h>
68#include <net/if_vlan_var.h>
69
70#include <netinet/in_systm.h>
71#include <netinet/in.h>
72#include <netinet/if_ether.h>
73#include <netinet/ip.h>
74#include <netinet/ip.h>
75#include <netinet/tcp.h>
76#include <netinet/udp.h>
77
78#include <dev/pci/pcireg.h>
79#include <dev/pci/pcivar.h>
80#include <dev/pci/pci_private.h>
81
82#include <cxgb_include.h>
83
84#ifdef PRIV_SUPPORTED
85#include <sys/priv.h>
86#endif
87
88static int cxgb_setup_interrupts(adapter_t *);
89static void cxgb_teardown_interrupts(adapter_t *);
90static void cxgb_init(void *);
91static int cxgb_init_locked(struct port_info *);
92static int cxgb_uninit_locked(struct port_info *);
93static int cxgb_uninit_synchronized(struct port_info *);
94static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
95static int cxgb_media_change(struct ifnet *);
96static int cxgb_ifm_type(int);
97static void cxgb_build_medialist(struct port_info *);
98static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
99static uint64_t cxgb_get_counter(struct ifnet *, ift_counter);
100static int setup_sge_qsets(adapter_t *);
101static void cxgb_async_intr(void *);
102static void cxgb_tick_handler(void *, int);
103static void cxgb_tick(void *);
104static void link_check_callout(void *);
105static void check_link_status(void *, int);
106static void setup_rss(adapter_t *sc);
107static int alloc_filters(struct adapter *);
108static int setup_hw_filters(struct adapter *);
109static int set_filter(struct adapter *, int, const struct filter_info *);
110static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
111    unsigned int, u64, u64);
112static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
113    unsigned int, u64, u64);
114#ifdef TCP_OFFLOAD
115static int cpl_not_handled(struct sge_qset *, struct rsp_desc *, struct mbuf *);
116#endif
117
118/* Attachment glue for the PCI controller end of the device.  Each port of
119 * the device is attached separately, as defined later.
120 */
121static int cxgb_controller_probe(device_t);
122static int cxgb_controller_attach(device_t);
123static int cxgb_controller_detach(device_t);
124static void cxgb_free(struct adapter *);
125static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
126    unsigned int end);
127static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
128static int cxgb_get_regs_len(void);
129static void touch_bars(device_t dev);
130static void cxgb_update_mac_settings(struct port_info *p);
131#ifdef TCP_OFFLOAD
132static int toe_capability(struct port_info *, int);
133#endif
134
135static device_method_t cxgb_controller_methods[] = {
136	DEVMETHOD(device_probe,		cxgb_controller_probe),
137	DEVMETHOD(device_attach,	cxgb_controller_attach),
138	DEVMETHOD(device_detach,	cxgb_controller_detach),
139
140	DEVMETHOD_END
141};
142
143static driver_t cxgb_controller_driver = {
144	"cxgbc",
145	cxgb_controller_methods,
146	sizeof(struct adapter)
147};
148
149static int cxgbc_mod_event(module_t, int, void *);
150static devclass_t	cxgb_controller_devclass;
151DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass,
152    cxgbc_mod_event, 0);
153MODULE_VERSION(cxgbc, 1);
154MODULE_DEPEND(cxgbc, firmware, 1, 1, 1);
155
156/*
157 * Attachment glue for the ports.  Attachment is done directly to the
158 * controller device.
159 */
160static int cxgb_port_probe(device_t);
161static int cxgb_port_attach(device_t);
162static int cxgb_port_detach(device_t);
163
164static device_method_t cxgb_port_methods[] = {
165	DEVMETHOD(device_probe,		cxgb_port_probe),
166	DEVMETHOD(device_attach,	cxgb_port_attach),
167	DEVMETHOD(device_detach,	cxgb_port_detach),
168	{ 0, 0 }
169};
170
171static driver_t cxgb_port_driver = {
172	"cxgb",
173	cxgb_port_methods,
174	0
175};
176
177static d_ioctl_t cxgb_extension_ioctl;
178static d_open_t cxgb_extension_open;
179static d_close_t cxgb_extension_close;
180
181static struct cdevsw cxgb_cdevsw = {
182       .d_version =    D_VERSION,
183       .d_flags =      0,
184       .d_open =       cxgb_extension_open,
185       .d_close =      cxgb_extension_close,
186       .d_ioctl =      cxgb_extension_ioctl,
187       .d_name =       "cxgb",
188};
189
190static devclass_t	cxgb_port_devclass;
191DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
192MODULE_VERSION(cxgb, 1);
193
194static struct mtx t3_list_lock;
195static SLIST_HEAD(, adapter) t3_list;
196#ifdef TCP_OFFLOAD
197static struct mtx t3_uld_list_lock;
198static SLIST_HEAD(, uld_info) t3_uld_list;
199#endif
200
201/*
202 * The driver uses the best interrupt scheme available on a platform in the
203 * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
204 * of these schemes the driver may consider as follows:
205 *
206 * msi = 2: choose from among all three options
207 * msi = 1 : only consider MSI and pin interrupts
208 * msi = 0: force pin interrupts
209 */
210static int msi_allowed = 2;
211
212SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
213SYSCTL_INT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
214    "MSI-X, MSI, INTx selector");
215
216/*
217 * The driver uses an auto-queue algorithm by default.
218 * To disable it and force a single queue-set per port, use multiq = 0
219 */
220static int multiq = 1;
221SYSCTL_INT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
222    "use min(ncpus/ports, 8) queue-sets per port");
223
224/*
225 * By default the driver will not update the firmware unless
226 * it was compiled against a newer version
227 *
228 */
229static int force_fw_update = 0;
230SYSCTL_INT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
231    "update firmware even if up to date");
232
233int cxgb_use_16k_clusters = -1;
234SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
235    &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
236
237static int nfilters = -1;
238SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
239    &nfilters, 0, "max number of entries in the filter table");
240
241enum {
242	MAX_TXQ_ENTRIES      = 16384,
243	MAX_CTRL_TXQ_ENTRIES = 1024,
244	MAX_RSPQ_ENTRIES     = 16384,
245	MAX_RX_BUFFERS       = 16384,
246	MAX_RX_JUMBO_BUFFERS = 16384,
247	MIN_TXQ_ENTRIES      = 4,
248	MIN_CTRL_TXQ_ENTRIES = 4,
249	MIN_RSPQ_ENTRIES     = 32,
250	MIN_FL_ENTRIES       = 32,
251	MIN_FL_JUMBO_ENTRIES = 32
252};
253
254struct filter_info {
255	u32 sip;
256	u32 sip_mask;
257	u32 dip;
258	u16 sport;
259	u16 dport;
260	u32 vlan:12;
261	u32 vlan_prio:3;
262	u32 mac_hit:1;
263	u32 mac_idx:4;
264	u32 mac_vld:1;
265	u32 pkt_type:2;
266	u32 report_filter_id:1;
267	u32 pass:1;
268	u32 rss:1;
269	u32 qset:3;
270	u32 locked:1;
271	u32 valid:1;
272};
273
274enum { FILTER_NO_VLAN_PRI = 7 };
275
276#define EEPROM_MAGIC 0x38E2F10C
277
278#define PORT_MASK ((1 << MAX_NPORTS) - 1)
279
280/* Table for probing the cards.  The desc field isn't actually used */
281struct cxgb_ident {
282	uint16_t	vendor;
283	uint16_t	device;
284	int		index;
285	char		*desc;
286} cxgb_identifiers[] = {
287	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
288	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
289	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
290	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
291	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
292	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
293	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
294	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
295	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
296	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
297	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
298	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
299	{PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
300	{PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
301	{0, 0, 0, NULL}
302};
303
304static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
305
306
307static __inline char
308t3rev2char(struct adapter *adapter)
309{
310	char rev = 'z';
311
312	switch(adapter->params.rev) {
313	case T3_REV_A:
314		rev = 'a';
315		break;
316	case T3_REV_B:
317	case T3_REV_B2:
318		rev = 'b';
319		break;
320	case T3_REV_C:
321		rev = 'c';
322		break;
323	}
324	return rev;
325}
326
327static struct cxgb_ident *
328cxgb_get_ident(device_t dev)
329{
330	struct cxgb_ident *id;
331
332	for (id = cxgb_identifiers; id->desc != NULL; id++) {
333		if ((id->vendor == pci_get_vendor(dev)) &&
334		    (id->device == pci_get_device(dev))) {
335			return (id);
336		}
337	}
338	return (NULL);
339}
340
341static const struct adapter_info *
342cxgb_get_adapter_info(device_t dev)
343{
344	struct cxgb_ident *id;
345	const struct adapter_info *ai;
346
347	id = cxgb_get_ident(dev);
348	if (id == NULL)
349		return (NULL);
350
351	ai = t3_get_adapter_info(id->index);
352
353	return (ai);
354}
355
356static int
357cxgb_controller_probe(device_t dev)
358{
359	const struct adapter_info *ai;
360	char *ports, buf[80];
361	int nports;
362
363	ai = cxgb_get_adapter_info(dev);
364	if (ai == NULL)
365		return (ENXIO);
366
367	nports = ai->nports0 + ai->nports1;
368	if (nports == 1)
369		ports = "port";
370	else
371		ports = "ports";
372
373	snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
374	device_set_desc_copy(dev, buf);
375	return (BUS_PROBE_DEFAULT);
376}
377
378#define FW_FNAME "cxgb_t3fw"
379#define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
380#define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
381
382static int
383upgrade_fw(adapter_t *sc)
384{
385	const struct firmware *fw;
386	int status;
387	u32 vers;
388
389	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
390		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
391		return (ENOENT);
392	} else
393		device_printf(sc->dev, "installing firmware on card\n");
394	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
395
396	if (status != 0) {
397		device_printf(sc->dev, "failed to install firmware: %d\n",
398		    status);
399	} else {
400		t3_get_fw_version(sc, &vers);
401		snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
402		    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
403		    G_FW_VERSION_MICRO(vers));
404	}
405
406	firmware_put(fw, FIRMWARE_UNLOAD);
407
408	return (status);
409}
410
411/*
412 * The cxgb_controller_attach function is responsible for the initial
413 * bringup of the device.  Its responsibilities include:
414 *
415 *  1. Determine if the device supports MSI or MSI-X.
416 *  2. Allocate bus resources so that we can access the Base Address Register
417 *  3. Create and initialize mutexes for the controller and its control
418 *     logic such as SGE and MDIO.
419 *  4. Call hardware specific setup routine for the adapter as a whole.
420 *  5. Allocate the BAR for doing MSI-X.
421 *  6. Setup the line interrupt iff MSI-X is not supported.
422 *  7. Create the driver's taskq.
423 *  8. Start one task queue service thread.
424 *  9. Check if the firmware and SRAM are up-to-date.  They will be
425 *     auto-updated later (before FULL_INIT_DONE), if required.
426 * 10. Create a child device for each MAC (port)
427 * 11. Initialize T3 private state.
428 * 12. Trigger the LED
429 * 13. Setup offload iff supported.
430 * 14. Reset/restart the tick callout.
431 * 15. Attach sysctls
432 *
433 * NOTE: Any modification or deviation from this list MUST be reflected in
434 * the above comment.  Failure to do so will result in problems on various
435 * error conditions including link flapping.
436 */
437static int
438cxgb_controller_attach(device_t dev)
439{
440	device_t child;
441	const struct adapter_info *ai;
442	struct adapter *sc;
443	int i, error = 0;
444	uint32_t vers;
445	int port_qsets = 1;
446	int msi_needed, reg;
447	char buf[80];
448
449	sc = device_get_softc(dev);
450	sc->dev = dev;
451	sc->msi_count = 0;
452	ai = cxgb_get_adapter_info(dev);
453
454	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
455	    device_get_unit(dev));
456	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
457
458	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
459	    device_get_unit(dev));
460	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
461	    device_get_unit(dev));
462	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
463	    device_get_unit(dev));
464
465	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
466	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
467	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
468
469	mtx_lock(&t3_list_lock);
470	SLIST_INSERT_HEAD(&t3_list, sc, link);
471	mtx_unlock(&t3_list_lock);
472
473	/* find the PCIe link width and set max read request to 4KB*/
474	if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
475		uint16_t lnk;
476
477		lnk = pci_read_config(dev, reg + PCIER_LINK_STA, 2);
478		sc->link_width = (lnk & PCIEM_LINK_STA_WIDTH) >> 4;
479		if (sc->link_width < 8 &&
480		    (ai->caps & SUPPORTED_10000baseT_Full)) {
481			device_printf(sc->dev,
482			    "PCIe x%d Link, expect reduced performance\n",
483			    sc->link_width);
484		}
485
486		pci_set_max_read_req(dev, 4096);
487	}
488
489	touch_bars(dev);
490	pci_enable_busmaster(dev);
491	/*
492	 * Allocate the registers and make them available to the driver.
493	 * The registers that we care about for NIC mode are in BAR 0
494	 */
495	sc->regs_rid = PCIR_BAR(0);
496	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
497	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
498		device_printf(dev, "Cannot allocate BAR region 0\n");
499		error = ENXIO;
500		goto out;
501	}
502
503	sc->bt = rman_get_bustag(sc->regs_res);
504	sc->bh = rman_get_bushandle(sc->regs_res);
505	sc->mmio_len = rman_get_size(sc->regs_res);
506
507	for (i = 0; i < MAX_NPORTS; i++)
508		sc->port[i].adapter = sc;
509
510	if (t3_prep_adapter(sc, ai, 1) < 0) {
511		printf("prep adapter failed\n");
512		error = ENODEV;
513		goto out;
514	}
515
516	sc->udbs_rid = PCIR_BAR(2);
517	sc->udbs_res = NULL;
518	if (is_offload(sc) &&
519	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
520		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
521		device_printf(dev, "Cannot allocate BAR region 1\n");
522		error = ENXIO;
523		goto out;
524	}
525
526        /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
527	 * enough messages for the queue sets.  If that fails, try falling
528	 * back to MSI.  If that fails, then try falling back to the legacy
529	 * interrupt pin model.
530	 */
531	sc->msix_regs_rid = 0x20;
532	if ((msi_allowed >= 2) &&
533	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
534	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
535
536		if (multiq)
537			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
538		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
539
540		if (pci_msix_count(dev) == 0 ||
541		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
542		    sc->msi_count != msi_needed) {
543			device_printf(dev, "alloc msix failed - "
544				      "msi_count=%d, msi_needed=%d, err=%d; "
545				      "will try MSI\n", sc->msi_count,
546				      msi_needed, error);
547			sc->msi_count = 0;
548			port_qsets = 1;
549			pci_release_msi(dev);
550			bus_release_resource(dev, SYS_RES_MEMORY,
551			    sc->msix_regs_rid, sc->msix_regs_res);
552			sc->msix_regs_res = NULL;
553		} else {
554			sc->flags |= USING_MSIX;
555			sc->cxgb_intr = cxgb_async_intr;
556			device_printf(dev,
557				      "using MSI-X interrupts (%u vectors)\n",
558				      sc->msi_count);
559		}
560	}
561
562	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
563		sc->msi_count = 1;
564		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
565			device_printf(dev, "alloc msi failed - "
566				      "err=%d; will try INTx\n", error);
567			sc->msi_count = 0;
568			port_qsets = 1;
569			pci_release_msi(dev);
570		} else {
571			sc->flags |= USING_MSI;
572			sc->cxgb_intr = t3_intr_msi;
573			device_printf(dev, "using MSI interrupts\n");
574		}
575	}
576	if (sc->msi_count == 0) {
577		device_printf(dev, "using line interrupts\n");
578		sc->cxgb_intr = t3b_intr;
579	}
580
581	/* Create a private taskqueue thread for handling driver events */
582	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
583	    taskqueue_thread_enqueue, &sc->tq);
584	if (sc->tq == NULL) {
585		device_printf(dev, "failed to allocate controller task queue\n");
586		goto out;
587	}
588
589	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
590	    device_get_nameunit(dev));
591	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
592
593
594	/* Create a periodic callout for checking adapter status */
595	callout_init(&sc->cxgb_tick_ch, 1);
596
597	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
598		/*
599		 * Warn user that a firmware update will be attempted in init.
600		 */
601		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
602		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
603		sc->flags &= ~FW_UPTODATE;
604	} else {
605		sc->flags |= FW_UPTODATE;
606	}
607
608	if (t3_check_tpsram_version(sc) < 0) {
609		/*
610		 * Warn user that a firmware update will be attempted in init.
611		 */
612		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
613		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
614		sc->flags &= ~TPS_UPTODATE;
615	} else {
616		sc->flags |= TPS_UPTODATE;
617	}
618
619	/*
620	 * Create a child device for each MAC.  The ethernet attachment
621	 * will be done in these children.
622	 */
623	for (i = 0; i < (sc)->params.nports; i++) {
624		struct port_info *pi;
625
626		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
627			device_printf(dev, "failed to add child port\n");
628			error = EINVAL;
629			goto out;
630		}
631		pi = &sc->port[i];
632		pi->adapter = sc;
633		pi->nqsets = port_qsets;
634		pi->first_qset = i*port_qsets;
635		pi->port_id = i;
636		pi->tx_chan = i >= ai->nports0;
637		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
638		sc->rxpkt_map[pi->txpkt_intf] = i;
639		sc->port[i].tx_chan = i >= ai->nports0;
640		sc->portdev[i] = child;
641		device_set_softc(child, pi);
642	}
643	if ((error = bus_generic_attach(dev)) != 0)
644		goto out;
645
646	/* initialize sge private state */
647	t3_sge_init_adapter(sc);
648
649	t3_led_ready(sc);
650
651	error = t3_get_fw_version(sc, &vers);
652	if (error)
653		goto out;
654
655	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
656	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
657	    G_FW_VERSION_MICRO(vers));
658
659	snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
660		 ai->desc, is_offload(sc) ? "R" : "",
661		 sc->params.vpd.ec, sc->params.vpd.sn);
662	device_set_desc_copy(dev, buf);
663
664	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
665		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
666		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
667
668	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
669	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
670	t3_add_attach_sysctls(sc);
671
672#ifdef TCP_OFFLOAD
673	for (i = 0; i < NUM_CPL_HANDLERS; i++)
674		sc->cpl_handler[i] = cpl_not_handled;
675#endif
676
677	t3_intr_clear(sc);
678	error = cxgb_setup_interrupts(sc);
679out:
680	if (error)
681		cxgb_free(sc);
682
683	return (error);
684}
685
686/*
687 * The cxgb_controller_detach routine is called with the device is
688 * unloaded from the system.
689 */
690
691static int
692cxgb_controller_detach(device_t dev)
693{
694	struct adapter *sc;
695
696	sc = device_get_softc(dev);
697
698	cxgb_free(sc);
699
700	return (0);
701}
702
703/*
704 * The cxgb_free() is called by the cxgb_controller_detach() routine
705 * to tear down the structures that were built up in
706 * cxgb_controller_attach(), and should be the final piece of work
707 * done when fully unloading the driver.
708 *
709 *
710 *  1. Shutting down the threads started by the cxgb_controller_attach()
711 *     routine.
712 *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
713 *  3. Detaching all of the port devices created during the
714 *     cxgb_controller_attach() routine.
715 *  4. Removing the device children created via cxgb_controller_attach().
716 *  5. Releasing PCI resources associated with the device.
717 *  6. Turning off the offload support, iff it was turned on.
718 *  7. Destroying the mutexes created in cxgb_controller_attach().
719 *
720 */
721static void
722cxgb_free(struct adapter *sc)
723{
724	int i, nqsets = 0;
725
726	ADAPTER_LOCK(sc);
727	sc->flags |= CXGB_SHUTDOWN;
728	ADAPTER_UNLOCK(sc);
729
730	/*
731	 * Make sure all child devices are gone.
732	 */
733	bus_generic_detach(sc->dev);
734	for (i = 0; i < (sc)->params.nports; i++) {
735		if (sc->portdev[i] &&
736		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
737			device_printf(sc->dev, "failed to delete child port\n");
738		nqsets += sc->port[i].nqsets;
739	}
740
741	/*
742	 * At this point, it is as if cxgb_port_detach has run on all ports, and
743	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
744	 * all open devices have been closed.
745	 */
746	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
747					   __func__, sc->open_device_map));
748	for (i = 0; i < sc->params.nports; i++) {
749		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
750						  __func__, i));
751	}
752
753	/*
754	 * Finish off the adapter's callouts.
755	 */
756	callout_drain(&sc->cxgb_tick_ch);
757	callout_drain(&sc->sge_timer_ch);
758
759	/*
760	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
761	 * sysctls are cleaned up by the kernel linker.
762	 */
763	if (sc->flags & FULL_INIT_DONE) {
764 		t3_free_sge_resources(sc, nqsets);
765 		sc->flags &= ~FULL_INIT_DONE;
766 	}
767
768	/*
769	 * Release all interrupt resources.
770	 */
771	cxgb_teardown_interrupts(sc);
772	if (sc->flags & (USING_MSI | USING_MSIX)) {
773		device_printf(sc->dev, "releasing msi message(s)\n");
774		pci_release_msi(sc->dev);
775	} else {
776		device_printf(sc->dev, "no msi message to release\n");
777	}
778
779	if (sc->msix_regs_res != NULL) {
780		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
781		    sc->msix_regs_res);
782	}
783
784	/*
785	 * Free the adapter's taskqueue.
786	 */
787	if (sc->tq != NULL) {
788		taskqueue_free(sc->tq);
789		sc->tq = NULL;
790	}
791
792	free(sc->filters, M_DEVBUF);
793	t3_sge_free(sc);
794
795	if (sc->udbs_res != NULL)
796		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
797		    sc->udbs_res);
798
799	if (sc->regs_res != NULL)
800		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
801		    sc->regs_res);
802
803	MTX_DESTROY(&sc->mdio_lock);
804	MTX_DESTROY(&sc->sge.reg_lock);
805	MTX_DESTROY(&sc->elmer_lock);
806	mtx_lock(&t3_list_lock);
807	SLIST_REMOVE(&t3_list, sc, adapter, link);
808	mtx_unlock(&t3_list_lock);
809	ADAPTER_LOCK_DEINIT(sc);
810}
811
812/**
813 *	setup_sge_qsets - configure SGE Tx/Rx/response queues
814 *	@sc: the controller softc
815 *
816 *	Determines how many sets of SGE queues to use and initializes them.
817 *	We support multiple queue sets per port if we have MSI-X, otherwise
818 *	just one queue set per port.
819 */
820static int
821setup_sge_qsets(adapter_t *sc)
822{
823	int i, j, err, irq_idx = 0, qset_idx = 0;
824	u_int ntxq = SGE_TXQ_PER_SET;
825
826	if ((err = t3_sge_alloc(sc)) != 0) {
827		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
828		return (err);
829	}
830
831	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
832		irq_idx = -1;
833
834	for (i = 0; i < (sc)->params.nports; i++) {
835		struct port_info *pi = &sc->port[i];
836
837		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
838			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
839			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
840			    &sc->params.sge.qset[qset_idx], ntxq, pi);
841			if (err) {
842				t3_free_sge_resources(sc, qset_idx);
843				device_printf(sc->dev,
844				    "t3_sge_alloc_qset failed with %d\n", err);
845				return (err);
846			}
847		}
848	}
849
850	return (0);
851}
852
853static void
854cxgb_teardown_interrupts(adapter_t *sc)
855{
856	int i;
857
858	for (i = 0; i < SGE_QSETS; i++) {
859		if (sc->msix_intr_tag[i] == NULL) {
860
861			/* Should have been setup fully or not at all */
862			KASSERT(sc->msix_irq_res[i] == NULL &&
863				sc->msix_irq_rid[i] == 0,
864				("%s: half-done interrupt (%d).", __func__, i));
865
866			continue;
867		}
868
869		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
870				  sc->msix_intr_tag[i]);
871		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
872				     sc->msix_irq_res[i]);
873
874		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
875		sc->msix_irq_rid[i] = 0;
876	}
877
878	if (sc->intr_tag) {
879		KASSERT(sc->irq_res != NULL,
880			("%s: half-done interrupt.", __func__));
881
882		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
883		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
884				     sc->irq_res);
885
886		sc->irq_res = sc->intr_tag = NULL;
887		sc->irq_rid = 0;
888	}
889}
890
891static int
892cxgb_setup_interrupts(adapter_t *sc)
893{
894	struct resource *res;
895	void *tag;
896	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
897
898	sc->irq_rid = intr_flag ? 1 : 0;
899	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
900					     RF_SHAREABLE | RF_ACTIVE);
901	if (sc->irq_res == NULL) {
902		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
903			      intr_flag, sc->irq_rid);
904		err = EINVAL;
905		sc->irq_rid = 0;
906	} else {
907		err = bus_setup_intr(sc->dev, sc->irq_res,
908		    INTR_MPSAFE | INTR_TYPE_NET, NULL,
909		    sc->cxgb_intr, sc, &sc->intr_tag);
910
911		if (err) {
912			device_printf(sc->dev,
913				      "Cannot set up interrupt (%x, %u, %d)\n",
914				      intr_flag, sc->irq_rid, err);
915			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
916					     sc->irq_res);
917			sc->irq_res = sc->intr_tag = NULL;
918			sc->irq_rid = 0;
919		}
920	}
921
922	/* That's all for INTx or MSI */
923	if (!(intr_flag & USING_MSIX) || err)
924		return (err);
925
926	bus_describe_intr(sc->dev, sc->irq_res, sc->intr_tag, "err");
927	for (i = 0; i < sc->msi_count - 1; i++) {
928		rid = i + 2;
929		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
930					     RF_SHAREABLE | RF_ACTIVE);
931		if (res == NULL) {
932			device_printf(sc->dev, "Cannot allocate interrupt "
933				      "for message %d\n", rid);
934			err = EINVAL;
935			break;
936		}
937
938		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
939				     NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
940		if (err) {
941			device_printf(sc->dev, "Cannot set up interrupt "
942				      "for message %d (%d)\n", rid, err);
943			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
944			break;
945		}
946
947		sc->msix_irq_rid[i] = rid;
948		sc->msix_irq_res[i] = res;
949		sc->msix_intr_tag[i] = tag;
950		bus_describe_intr(sc->dev, res, tag, "qs%d", i);
951	}
952
953	if (err)
954		cxgb_teardown_interrupts(sc);
955
956	return (err);
957}
958
959
960static int
961cxgb_port_probe(device_t dev)
962{
963	struct port_info *p;
964	char buf[80];
965	const char *desc;
966
967	p = device_get_softc(dev);
968	desc = p->phy.desc;
969	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
970	device_set_desc_copy(dev, buf);
971	return (0);
972}
973
974
975static int
976cxgb_makedev(struct port_info *pi)
977{
978
979	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
980	    UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
981
982	if (pi->port_cdev == NULL)
983		return (ENOMEM);
984
985	pi->port_cdev->si_drv1 = (void *)pi;
986
987	return (0);
988}
989
990#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
991    IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
992    IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6)
993#define CXGB_CAP_ENABLE CXGB_CAP
994
995static int
996cxgb_port_attach(device_t dev)
997{
998	struct port_info *p;
999	struct ifnet *ifp;
1000	int err;
1001	struct adapter *sc;
1002
1003	p = device_get_softc(dev);
1004	sc = p->adapter;
1005	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1006	    device_get_unit(device_get_parent(dev)), p->port_id);
1007	PORT_LOCK_INIT(p, p->lockbuf);
1008
1009	callout_init(&p->link_check_ch, 1);
1010	TASK_INIT(&p->link_check_task, 0, check_link_status, p);
1011
1012	/* Allocate an ifnet object and set it up */
1013	ifp = p->ifp = if_alloc(IFT_ETHER);
1014	if (ifp == NULL) {
1015		device_printf(dev, "Cannot allocate ifnet\n");
1016		return (ENOMEM);
1017	}
1018
1019	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1020	ifp->if_init = cxgb_init;
1021	ifp->if_softc = p;
1022	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1023	ifp->if_ioctl = cxgb_ioctl;
1024	ifp->if_transmit = cxgb_transmit;
1025	ifp->if_qflush = cxgb_qflush;
1026	ifp->if_get_counter = cxgb_get_counter;
1027
1028	ifp->if_capabilities = CXGB_CAP;
1029#ifdef TCP_OFFLOAD
1030	if (is_offload(sc))
1031		ifp->if_capabilities |= IFCAP_TOE4;
1032#endif
1033	ifp->if_capenable = CXGB_CAP_ENABLE;
1034	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
1035	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
1036
1037	/*
1038	 * Disable TSO on 4-port - it isn't supported by the firmware.
1039	 */
1040	if (sc->params.nports > 2) {
1041		ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1042		ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1043		ifp->if_hwassist &= ~CSUM_TSO;
1044	}
1045
1046	ether_ifattach(ifp, p->hw_addr);
1047
1048#ifdef DEFAULT_JUMBO
1049	if (sc->params.nports <= 2)
1050		ifp->if_mtu = ETHERMTU_JUMBO;
1051#endif
1052	if ((err = cxgb_makedev(p)) != 0) {
1053		printf("makedev failed %d\n", err);
1054		return (err);
1055	}
1056
1057	/* Create a list of media supported by this port */
1058	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1059	    cxgb_media_status);
1060	cxgb_build_medialist(p);
1061
1062	t3_sge_init_port(p);
1063
1064	return (err);
1065}
1066
1067/*
1068 * cxgb_port_detach() is called via the device_detach methods when
1069 * cxgb_free() calls the bus_generic_detach.  It is responsible for
1070 * removing the device from the view of the kernel, i.e. from all
1071 * interfaces lists etc.  This routine is only called when the driver is
1072 * being unloaded, not when the link goes down.
1073 */
1074static int
1075cxgb_port_detach(device_t dev)
1076{
1077	struct port_info *p;
1078	struct adapter *sc;
1079	int i;
1080
1081	p = device_get_softc(dev);
1082	sc = p->adapter;
1083
1084	/* Tell cxgb_ioctl and if_init that the port is going away */
1085	ADAPTER_LOCK(sc);
1086	SET_DOOMED(p);
1087	wakeup(&sc->flags);
1088	while (IS_BUSY(sc))
1089		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1090	SET_BUSY(sc);
1091	ADAPTER_UNLOCK(sc);
1092
1093	if (p->port_cdev != NULL)
1094		destroy_dev(p->port_cdev);
1095
1096	cxgb_uninit_synchronized(p);
1097	ether_ifdetach(p->ifp);
1098
1099	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1100		struct sge_qset *qs = &sc->sge.qs[i];
1101		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1102
1103		callout_drain(&txq->txq_watchdog);
1104		callout_drain(&txq->txq_timer);
1105	}
1106
1107	PORT_LOCK_DEINIT(p);
1108	if_free(p->ifp);
1109	p->ifp = NULL;
1110
1111	ADAPTER_LOCK(sc);
1112	CLR_BUSY(sc);
1113	wakeup_one(&sc->flags);
1114	ADAPTER_UNLOCK(sc);
1115	return (0);
1116}
1117
1118void
1119t3_fatal_err(struct adapter *sc)
1120{
1121	u_int fw_status[4];
1122
1123	if (sc->flags & FULL_INIT_DONE) {
1124		t3_sge_stop(sc);
1125		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1126		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1127		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1128		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1129		t3_intr_disable(sc);
1130	}
1131	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1132	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1133		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1134		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1135}
1136
1137int
1138t3_os_find_pci_capability(adapter_t *sc, int cap)
1139{
1140	device_t dev;
1141	struct pci_devinfo *dinfo;
1142	pcicfgregs *cfg;
1143	uint32_t status;
1144	uint8_t ptr;
1145
1146	dev = sc->dev;
1147	dinfo = device_get_ivars(dev);
1148	cfg = &dinfo->cfg;
1149
1150	status = pci_read_config(dev, PCIR_STATUS, 2);
1151	if (!(status & PCIM_STATUS_CAPPRESENT))
1152		return (0);
1153
1154	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1155	case 0:
1156	case 1:
1157		ptr = PCIR_CAP_PTR;
1158		break;
1159	case 2:
1160		ptr = PCIR_CAP_PTR_2;
1161		break;
1162	default:
1163		return (0);
1164		break;
1165	}
1166	ptr = pci_read_config(dev, ptr, 1);
1167
1168	while (ptr != 0) {
1169		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1170			return (ptr);
1171		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1172	}
1173
1174	return (0);
1175}
1176
1177int
1178t3_os_pci_save_state(struct adapter *sc)
1179{
1180	device_t dev;
1181	struct pci_devinfo *dinfo;
1182
1183	dev = sc->dev;
1184	dinfo = device_get_ivars(dev);
1185
1186	pci_cfg_save(dev, dinfo, 0);
1187	return (0);
1188}
1189
1190int
1191t3_os_pci_restore_state(struct adapter *sc)
1192{
1193	device_t dev;
1194	struct pci_devinfo *dinfo;
1195
1196	dev = sc->dev;
1197	dinfo = device_get_ivars(dev);
1198
1199	pci_cfg_restore(dev, dinfo);
1200	return (0);
1201}
1202
1203/**
1204 *	t3_os_link_changed - handle link status changes
1205 *	@sc: the adapter associated with the link change
1206 *	@port_id: the port index whose link status has changed
1207 *	@link_status: the new status of the link
1208 *	@speed: the new speed setting
1209 *	@duplex: the new duplex setting
1210 *	@fc: the new flow-control setting
1211 *
1212 *	This is the OS-dependent handler for link status changes.  The OS
1213 *	neutral handler takes care of most of the processing for these events,
1214 *	then calls this handler for any OS-specific processing.
1215 */
1216void
1217t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1218     int duplex, int fc, int mac_was_reset)
1219{
1220	struct port_info *pi = &adapter->port[port_id];
1221	struct ifnet *ifp = pi->ifp;
1222
1223	/* no race with detach, so ifp should always be good */
1224	KASSERT(ifp, ("%s: if detached.", __func__));
1225
1226	/* Reapply mac settings if they were lost due to a reset */
1227	if (mac_was_reset) {
1228		PORT_LOCK(pi);
1229		cxgb_update_mac_settings(pi);
1230		PORT_UNLOCK(pi);
1231	}
1232
1233	if (link_status) {
1234		ifp->if_baudrate = IF_Mbps(speed);
1235		if_link_state_change(ifp, LINK_STATE_UP);
1236	} else
1237		if_link_state_change(ifp, LINK_STATE_DOWN);
1238}
1239
1240/**
1241 *	t3_os_phymod_changed - handle PHY module changes
1242 *	@phy: the PHY reporting the module change
1243 *	@mod_type: new module type
1244 *
1245 *	This is the OS-dependent handler for PHY module changes.  It is
1246 *	invoked when a PHY module is removed or inserted for any OS-specific
1247 *	processing.
1248 */
1249void t3_os_phymod_changed(struct adapter *adap, int port_id)
1250{
1251	static const char *mod_str[] = {
1252		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1253	};
1254	struct port_info *pi = &adap->port[port_id];
1255	int mod = pi->phy.modtype;
1256
1257	if (mod != pi->media.ifm_cur->ifm_data)
1258		cxgb_build_medialist(pi);
1259
1260	if (mod == phy_modtype_none)
1261		if_printf(pi->ifp, "PHY module unplugged\n");
1262	else {
1263		KASSERT(mod < ARRAY_SIZE(mod_str),
1264			("invalid PHY module type %d", mod));
1265		if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1266	}
1267}
1268
1269void
1270t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1271{
1272
1273	/*
1274	 * The ifnet might not be allocated before this gets called,
1275	 * as this is called early on in attach by t3_prep_adapter
1276	 * save the address off in the port structure
1277	 */
1278	if (cxgb_debug)
1279		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1280	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1281}
1282
1283/*
1284 * Programs the XGMAC based on the settings in the ifnet.  These settings
1285 * include MTU, MAC address, mcast addresses, etc.
1286 */
1287static void
1288cxgb_update_mac_settings(struct port_info *p)
1289{
1290	struct ifnet *ifp = p->ifp;
1291	struct t3_rx_mode rm;
1292	struct cmac *mac = &p->mac;
1293	int mtu, hwtagging;
1294
1295	PORT_LOCK_ASSERT_OWNED(p);
1296
1297	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1298
1299	mtu = ifp->if_mtu;
1300	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1301		mtu += ETHER_VLAN_ENCAP_LEN;
1302
1303	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1304
1305	t3_mac_set_mtu(mac, mtu);
1306	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1307	t3_mac_set_address(mac, 0, p->hw_addr);
1308	t3_init_rx_mode(&rm, p);
1309	t3_mac_set_rx_mode(mac, &rm);
1310}
1311
1312
1313static int
1314await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1315			      unsigned long n)
1316{
1317	int attempts = 5;
1318
1319	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1320		if (!--attempts)
1321			return (ETIMEDOUT);
1322		t3_os_sleep(10);
1323	}
1324	return 0;
1325}
1326
1327static int
1328init_tp_parity(struct adapter *adap)
1329{
1330	int i;
1331	struct mbuf *m;
1332	struct cpl_set_tcb_field *greq;
1333	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1334
1335	t3_tp_set_offload_mode(adap, 1);
1336
1337	for (i = 0; i < 16; i++) {
1338		struct cpl_smt_write_req *req;
1339
1340		m = m_gethdr(M_WAITOK, MT_DATA);
1341		req = mtod(m, struct cpl_smt_write_req *);
1342		m->m_len = m->m_pkthdr.len = sizeof(*req);
1343		memset(req, 0, sizeof(*req));
1344		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1345		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1346		req->iff = i;
1347		t3_mgmt_tx(adap, m);
1348	}
1349
1350	for (i = 0; i < 2048; i++) {
1351		struct cpl_l2t_write_req *req;
1352
1353		m = m_gethdr(M_WAITOK, MT_DATA);
1354		req = mtod(m, struct cpl_l2t_write_req *);
1355		m->m_len = m->m_pkthdr.len = sizeof(*req);
1356		memset(req, 0, sizeof(*req));
1357		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1358		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1359		req->params = htonl(V_L2T_W_IDX(i));
1360		t3_mgmt_tx(adap, m);
1361	}
1362
1363	for (i = 0; i < 2048; i++) {
1364		struct cpl_rte_write_req *req;
1365
1366		m = m_gethdr(M_WAITOK, MT_DATA);
1367		req = mtod(m, struct cpl_rte_write_req *);
1368		m->m_len = m->m_pkthdr.len = sizeof(*req);
1369		memset(req, 0, sizeof(*req));
1370		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1371		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1372		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1373		t3_mgmt_tx(adap, m);
1374	}
1375
1376	m = m_gethdr(M_WAITOK, MT_DATA);
1377	greq = mtod(m, struct cpl_set_tcb_field *);
1378	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1379	memset(greq, 0, sizeof(*greq));
1380	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1381	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1382	greq->mask = htobe64(1);
1383	t3_mgmt_tx(adap, m);
1384
1385	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1386	t3_tp_set_offload_mode(adap, 0);
1387	return (i);
1388}
1389
1390/**
1391 *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1392 *	@adap: the adapter
1393 *
1394 *	Sets up RSS to distribute packets to multiple receive queues.  We
1395 *	configure the RSS CPU lookup table to distribute to the number of HW
1396 *	receive queues, and the response queue lookup table to narrow that
1397 *	down to the response queues actually configured for each port.
1398 *	We always configure the RSS mapping for two ports since the mapping
1399 *	table has plenty of entries.
1400 */
1401static void
1402setup_rss(adapter_t *adap)
1403{
1404	int i;
1405	u_int nq[2];
1406	uint8_t cpus[SGE_QSETS + 1];
1407	uint16_t rspq_map[RSS_TABLE_SIZE];
1408
1409	for (i = 0; i < SGE_QSETS; ++i)
1410		cpus[i] = i;
1411	cpus[SGE_QSETS] = 0xff;
1412
1413	nq[0] = nq[1] = 0;
1414	for_each_port(adap, i) {
1415		const struct port_info *pi = adap2pinfo(adap, i);
1416
1417		nq[pi->tx_chan] += pi->nqsets;
1418	}
1419	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1420		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1421		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1422	}
1423
1424	/* Calculate the reverse RSS map table */
1425	for (i = 0; i < SGE_QSETS; ++i)
1426		adap->rrss_map[i] = 0xff;
1427	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1428		if (adap->rrss_map[rspq_map[i]] == 0xff)
1429			adap->rrss_map[rspq_map[i]] = i;
1430
1431	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1432		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1433	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1434	              cpus, rspq_map);
1435
1436}
1437static void
1438send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1439			      int hi, int port)
1440{
1441	struct mbuf *m;
1442	struct mngt_pktsched_wr *req;
1443
1444	m = m_gethdr(M_NOWAIT, MT_DATA);
1445	if (m) {
1446		req = mtod(m, struct mngt_pktsched_wr *);
1447		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1448		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1449		req->sched = sched;
1450		req->idx = qidx;
1451		req->min = lo;
1452		req->max = hi;
1453		req->binding = port;
1454		m->m_len = m->m_pkthdr.len = sizeof(*req);
1455		t3_mgmt_tx(adap, m);
1456	}
1457}
1458
1459static void
1460bind_qsets(adapter_t *sc)
1461{
1462	int i, j;
1463
1464	for (i = 0; i < (sc)->params.nports; ++i) {
1465		const struct port_info *pi = adap2pinfo(sc, i);
1466
1467		for (j = 0; j < pi->nqsets; ++j) {
1468			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1469					  -1, pi->tx_chan);
1470
1471		}
1472	}
1473}
1474
1475static void
1476update_tpeeprom(struct adapter *adap)
1477{
1478	const struct firmware *tpeeprom;
1479
1480	uint32_t version;
1481	unsigned int major, minor;
1482	int ret, len;
1483	char rev, name[32];
1484
1485	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1486
1487	major = G_TP_VERSION_MAJOR(version);
1488	minor = G_TP_VERSION_MINOR(version);
1489	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1490		return;
1491
1492	rev = t3rev2char(adap);
1493	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1494
1495	tpeeprom = firmware_get(name);
1496	if (tpeeprom == NULL) {
1497		device_printf(adap->dev,
1498			      "could not load TP EEPROM: unable to load %s\n",
1499			      name);
1500		return;
1501	}
1502
1503	len = tpeeprom->datasize - 4;
1504
1505	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1506	if (ret)
1507		goto release_tpeeprom;
1508
1509	if (len != TP_SRAM_LEN) {
1510		device_printf(adap->dev,
1511			      "%s length is wrong len=%d expected=%d\n", name,
1512			      len, TP_SRAM_LEN);
1513		return;
1514	}
1515
1516	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1517	    TP_SRAM_OFFSET);
1518
1519	if (!ret) {
1520		device_printf(adap->dev,
1521			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1522			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1523	} else
1524		device_printf(adap->dev,
1525			      "Protocol SRAM image update in EEPROM failed\n");
1526
1527release_tpeeprom:
1528	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1529
1530	return;
1531}
1532
1533static int
1534update_tpsram(struct adapter *adap)
1535{
1536	const struct firmware *tpsram;
1537	int ret;
1538	char rev, name[32];
1539
1540	rev = t3rev2char(adap);
1541	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1542
1543	update_tpeeprom(adap);
1544
1545	tpsram = firmware_get(name);
1546	if (tpsram == NULL){
1547		device_printf(adap->dev, "could not load TP SRAM\n");
1548		return (EINVAL);
1549	} else
1550		device_printf(adap->dev, "updating TP SRAM\n");
1551
1552	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1553	if (ret)
1554		goto release_tpsram;
1555
1556	ret = t3_set_proto_sram(adap, tpsram->data);
1557	if (ret)
1558		device_printf(adap->dev, "loading protocol SRAM failed\n");
1559
1560release_tpsram:
1561	firmware_put(tpsram, FIRMWARE_UNLOAD);
1562
1563	return ret;
1564}
1565
1566/**
1567 *	cxgb_up - enable the adapter
1568 *	@adap: adapter being enabled
1569 *
1570 *	Called when the first port is enabled, this function performs the
1571 *	actions necessary to make an adapter operational, such as completing
1572 *	the initialization of HW modules, and enabling interrupts.
1573 */
1574static int
1575cxgb_up(struct adapter *sc)
1576{
1577	int err = 0;
1578	unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1579
1580	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1581					   __func__, sc->open_device_map));
1582
1583	if ((sc->flags & FULL_INIT_DONE) == 0) {
1584
1585		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1586
1587		if ((sc->flags & FW_UPTODATE) == 0)
1588			if ((err = upgrade_fw(sc)))
1589				goto out;
1590
1591		if ((sc->flags & TPS_UPTODATE) == 0)
1592			if ((err = update_tpsram(sc)))
1593				goto out;
1594
1595		if (is_offload(sc) && nfilters != 0) {
1596			sc->params.mc5.nservers = 0;
1597
1598			if (nfilters < 0)
1599				sc->params.mc5.nfilters = mxf;
1600			else
1601				sc->params.mc5.nfilters = min(nfilters, mxf);
1602		}
1603
1604		err = t3_init_hw(sc, 0);
1605		if (err)
1606			goto out;
1607
1608		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1609		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1610
1611		err = setup_sge_qsets(sc);
1612		if (err)
1613			goto out;
1614
1615		alloc_filters(sc);
1616		setup_rss(sc);
1617
1618		t3_add_configured_sysctls(sc);
1619		sc->flags |= FULL_INIT_DONE;
1620	}
1621
1622	t3_intr_clear(sc);
1623	t3_sge_start(sc);
1624	t3_intr_enable(sc);
1625
1626	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1627	    is_offload(sc) && init_tp_parity(sc) == 0)
1628		sc->flags |= TP_PARITY_INIT;
1629
1630	if (sc->flags & TP_PARITY_INIT) {
1631		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1632		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1633	}
1634
1635	if (!(sc->flags & QUEUES_BOUND)) {
1636		bind_qsets(sc);
1637		setup_hw_filters(sc);
1638		sc->flags |= QUEUES_BOUND;
1639	}
1640
1641	t3_sge_reset_adapter(sc);
1642out:
1643	return (err);
1644}
1645
1646/*
1647 * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1648 * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1649 * during controller_detach, not here.
1650 */
1651static void
1652cxgb_down(struct adapter *sc)
1653{
1654	t3_sge_stop(sc);
1655	t3_intr_disable(sc);
1656}
1657
1658/*
1659 * if_init for cxgb ports.
1660 */
1661static void
1662cxgb_init(void *arg)
1663{
1664	struct port_info *p = arg;
1665	struct adapter *sc = p->adapter;
1666
1667	ADAPTER_LOCK(sc);
1668	cxgb_init_locked(p); /* releases adapter lock */
1669	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1670}
1671
1672static int
1673cxgb_init_locked(struct port_info *p)
1674{
1675	struct adapter *sc = p->adapter;
1676	struct ifnet *ifp = p->ifp;
1677	struct cmac *mac = &p->mac;
1678	int i, rc = 0, may_sleep = 0, gave_up_lock = 0;
1679
1680	ADAPTER_LOCK_ASSERT_OWNED(sc);
1681
1682	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1683		gave_up_lock = 1;
1684		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1685			rc = EINTR;
1686			goto done;
1687		}
1688	}
1689	if (IS_DOOMED(p)) {
1690		rc = ENXIO;
1691		goto done;
1692	}
1693	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1694
1695	/*
1696	 * The code that runs during one-time adapter initialization can sleep
1697	 * so it's important not to hold any locks across it.
1698	 */
1699	may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1700
1701	if (may_sleep) {
1702		SET_BUSY(sc);
1703		gave_up_lock = 1;
1704		ADAPTER_UNLOCK(sc);
1705	}
1706
1707	if (sc->open_device_map == 0 && ((rc = cxgb_up(sc)) != 0))
1708			goto done;
1709
1710	PORT_LOCK(p);
1711	if (isset(&sc->open_device_map, p->port_id) &&
1712	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1713		PORT_UNLOCK(p);
1714		goto done;
1715	}
1716	t3_port_intr_enable(sc, p->port_id);
1717	if (!mac->multiport)
1718		t3_mac_init(mac);
1719	cxgb_update_mac_settings(p);
1720	t3_link_start(&p->phy, mac, &p->link_config);
1721	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1722	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1723	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1724	PORT_UNLOCK(p);
1725
1726	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1727		struct sge_qset *qs = &sc->sge.qs[i];
1728		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1729
1730		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1731				 txq->txq_watchdog.c_cpu);
1732	}
1733
1734	/* all ok */
1735	setbit(&sc->open_device_map, p->port_id);
1736	callout_reset(&p->link_check_ch,
1737	    p->phy.caps & SUPPORTED_LINK_IRQ ?  hz * 3 : hz / 4,
1738	    link_check_callout, p);
1739
1740done:
1741	if (may_sleep) {
1742		ADAPTER_LOCK(sc);
1743		KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1744		CLR_BUSY(sc);
1745	}
1746	if (gave_up_lock)
1747		wakeup_one(&sc->flags);
1748	ADAPTER_UNLOCK(sc);
1749	return (rc);
1750}
1751
1752static int
1753cxgb_uninit_locked(struct port_info *p)
1754{
1755	struct adapter *sc = p->adapter;
1756	int rc;
1757
1758	ADAPTER_LOCK_ASSERT_OWNED(sc);
1759
1760	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1761		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1762			rc = EINTR;
1763			goto done;
1764		}
1765	}
1766	if (IS_DOOMED(p)) {
1767		rc = ENXIO;
1768		goto done;
1769	}
1770	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1771	SET_BUSY(sc);
1772	ADAPTER_UNLOCK(sc);
1773
1774	rc = cxgb_uninit_synchronized(p);
1775
1776	ADAPTER_LOCK(sc);
1777	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1778	CLR_BUSY(sc);
1779	wakeup_one(&sc->flags);
1780done:
1781	ADAPTER_UNLOCK(sc);
1782	return (rc);
1783}
1784
1785/*
1786 * Called on "ifconfig down", and from port_detach
1787 */
1788static int
1789cxgb_uninit_synchronized(struct port_info *pi)
1790{
1791	struct adapter *sc = pi->adapter;
1792	struct ifnet *ifp = pi->ifp;
1793
1794	/*
1795	 * taskqueue_drain may cause a deadlock if the adapter lock is held.
1796	 */
1797	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1798
1799	/*
1800	 * Clear this port's bit from the open device map, and then drain all
1801	 * the tasks that can access/manipulate this port's port_info or ifp.
1802	 * We disable this port's interrupts here and so the slow/ext
1803	 * interrupt tasks won't be enqueued.  The tick task will continue to
1804	 * be enqueued every second but the runs after this drain will not see
1805	 * this port in the open device map.
1806	 *
1807	 * A well behaved task must take open_device_map into account and ignore
1808	 * ports that are not open.
1809	 */
1810	clrbit(&sc->open_device_map, pi->port_id);
1811	t3_port_intr_disable(sc, pi->port_id);
1812	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1813	taskqueue_drain(sc->tq, &sc->tick_task);
1814
1815	callout_drain(&pi->link_check_ch);
1816	taskqueue_drain(sc->tq, &pi->link_check_task);
1817
1818	PORT_LOCK(pi);
1819	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1820
1821	/* disable pause frames */
1822	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1823
1824	/* Reset RX FIFO HWM */
1825	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1826			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1827
1828	DELAY(100 * 1000);
1829
1830	/* Wait for TXFIFO empty */
1831	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1832			F_TXFIFO_EMPTY, 1, 20, 5);
1833
1834	DELAY(100 * 1000);
1835	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1836
1837	pi->phy.ops->power_down(&pi->phy, 1);
1838
1839	PORT_UNLOCK(pi);
1840
1841	pi->link_config.link_ok = 0;
1842	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1843
1844	if (sc->open_device_map == 0)
1845		cxgb_down(pi->adapter);
1846
1847	return (0);
1848}
1849
1850/*
1851 * Mark lro enabled or disabled in all qsets for this port
1852 */
1853static int
1854cxgb_set_lro(struct port_info *p, int enabled)
1855{
1856	int i;
1857	struct adapter *adp = p->adapter;
1858	struct sge_qset *q;
1859
1860	for (i = 0; i < p->nqsets; i++) {
1861		q = &adp->sge.qs[p->first_qset + i];
1862		q->lro.enabled = (enabled != 0);
1863	}
1864	return (0);
1865}
1866
1867static int
1868cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1869{
1870	struct port_info *p = ifp->if_softc;
1871	struct adapter *sc = p->adapter;
1872	struct ifreq *ifr = (struct ifreq *)data;
1873	int flags, error = 0, mtu;
1874	uint32_t mask;
1875
1876	switch (command) {
1877	case SIOCSIFMTU:
1878		ADAPTER_LOCK(sc);
1879		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1880		if (error) {
1881fail:
1882			ADAPTER_UNLOCK(sc);
1883			return (error);
1884		}
1885
1886		mtu = ifr->ifr_mtu;
1887		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
1888			error = EINVAL;
1889		} else {
1890			ifp->if_mtu = mtu;
1891			PORT_LOCK(p);
1892			cxgb_update_mac_settings(p);
1893			PORT_UNLOCK(p);
1894		}
1895		ADAPTER_UNLOCK(sc);
1896		break;
1897	case SIOCSIFFLAGS:
1898		ADAPTER_LOCK(sc);
1899		if (IS_DOOMED(p)) {
1900			error = ENXIO;
1901			goto fail;
1902		}
1903		if (ifp->if_flags & IFF_UP) {
1904			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1905				flags = p->if_flags;
1906				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1907				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
1908					if (IS_BUSY(sc)) {
1909						error = EBUSY;
1910						goto fail;
1911					}
1912					PORT_LOCK(p);
1913					cxgb_update_mac_settings(p);
1914					PORT_UNLOCK(p);
1915				}
1916				ADAPTER_UNLOCK(sc);
1917			} else
1918				error = cxgb_init_locked(p);
1919			p->if_flags = ifp->if_flags;
1920		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1921			error = cxgb_uninit_locked(p);
1922		else
1923			ADAPTER_UNLOCK(sc);
1924
1925		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1926		break;
1927	case SIOCADDMULTI:
1928	case SIOCDELMULTI:
1929		ADAPTER_LOCK(sc);
1930		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1931		if (error)
1932			goto fail;
1933
1934		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1935			PORT_LOCK(p);
1936			cxgb_update_mac_settings(p);
1937			PORT_UNLOCK(p);
1938		}
1939		ADAPTER_UNLOCK(sc);
1940
1941		break;
1942	case SIOCSIFCAP:
1943		ADAPTER_LOCK(sc);
1944		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1945		if (error)
1946			goto fail;
1947
1948		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1949		if (mask & IFCAP_TXCSUM) {
1950			ifp->if_capenable ^= IFCAP_TXCSUM;
1951			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
1952
1953			if (IFCAP_TSO4 & ifp->if_capenable &&
1954			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1955				ifp->if_capenable &= ~IFCAP_TSO4;
1956				if_printf(ifp,
1957				    "tso4 disabled due to -txcsum.\n");
1958			}
1959		}
1960		if (mask & IFCAP_TXCSUM_IPV6) {
1961			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1962			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
1963
1964			if (IFCAP_TSO6 & ifp->if_capenable &&
1965			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1966				ifp->if_capenable &= ~IFCAP_TSO6;
1967				if_printf(ifp,
1968				    "tso6 disabled due to -txcsum6.\n");
1969			}
1970		}
1971		if (mask & IFCAP_RXCSUM)
1972			ifp->if_capenable ^= IFCAP_RXCSUM;
1973		if (mask & IFCAP_RXCSUM_IPV6)
1974			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1975
1976		/*
1977		 * Note that we leave CSUM_TSO alone (it is always set).  The
1978		 * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
1979		 * sending a TSO request our way, so it's sufficient to toggle
1980		 * IFCAP_TSOx only.
1981		 */
1982		if (mask & IFCAP_TSO4) {
1983			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
1984			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1985				if_printf(ifp, "enable txcsum first.\n");
1986				error = EAGAIN;
1987				goto fail;
1988			}
1989			ifp->if_capenable ^= IFCAP_TSO4;
1990		}
1991		if (mask & IFCAP_TSO6) {
1992			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
1993			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1994				if_printf(ifp, "enable txcsum6 first.\n");
1995				error = EAGAIN;
1996				goto fail;
1997			}
1998			ifp->if_capenable ^= IFCAP_TSO6;
1999		}
2000		if (mask & IFCAP_LRO) {
2001			ifp->if_capenable ^= IFCAP_LRO;
2002
2003			/* Safe to do this even if cxgb_up not called yet */
2004			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2005		}
2006#ifdef TCP_OFFLOAD
2007		if (mask & IFCAP_TOE4) {
2008			int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE4;
2009
2010			error = toe_capability(p, enable);
2011			if (error == 0)
2012				ifp->if_capenable ^= mask;
2013		}
2014#endif
2015		if (mask & IFCAP_VLAN_HWTAGGING) {
2016			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2017			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2018				PORT_LOCK(p);
2019				cxgb_update_mac_settings(p);
2020				PORT_UNLOCK(p);
2021			}
2022		}
2023		if (mask & IFCAP_VLAN_MTU) {
2024			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2025			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2026				PORT_LOCK(p);
2027				cxgb_update_mac_settings(p);
2028				PORT_UNLOCK(p);
2029			}
2030		}
2031		if (mask & IFCAP_VLAN_HWTSO)
2032			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2033		if (mask & IFCAP_VLAN_HWCSUM)
2034			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2035
2036#ifdef VLAN_CAPABILITIES
2037		VLAN_CAPABILITIES(ifp);
2038#endif
2039		ADAPTER_UNLOCK(sc);
2040		break;
2041	case SIOCSIFMEDIA:
2042	case SIOCGIFMEDIA:
2043		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2044		break;
2045	default:
2046		error = ether_ioctl(ifp, command, data);
2047	}
2048
2049	return (error);
2050}
2051
2052static int
2053cxgb_media_change(struct ifnet *ifp)
2054{
2055	return (EOPNOTSUPP);
2056}
2057
2058/*
2059 * Translates phy->modtype to the correct Ethernet media subtype.
2060 */
2061static int
2062cxgb_ifm_type(int mod)
2063{
2064	switch (mod) {
2065	case phy_modtype_sr:
2066		return (IFM_10G_SR);
2067	case phy_modtype_lr:
2068		return (IFM_10G_LR);
2069	case phy_modtype_lrm:
2070		return (IFM_10G_LRM);
2071	case phy_modtype_twinax:
2072		return (IFM_10G_TWINAX);
2073	case phy_modtype_twinax_long:
2074		return (IFM_10G_TWINAX_LONG);
2075	case phy_modtype_none:
2076		return (IFM_NONE);
2077	case phy_modtype_unknown:
2078		return (IFM_UNKNOWN);
2079	}
2080
2081	KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2082	return (IFM_UNKNOWN);
2083}
2084
2085/*
2086 * Rebuilds the ifmedia list for this port, and sets the current media.
2087 */
2088static void
2089cxgb_build_medialist(struct port_info *p)
2090{
2091	struct cphy *phy = &p->phy;
2092	struct ifmedia *media = &p->media;
2093	int mod = phy->modtype;
2094	int m = IFM_ETHER | IFM_FDX;
2095
2096	PORT_LOCK(p);
2097
2098	ifmedia_removeall(media);
2099	if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2100		/* Copper (RJ45) */
2101
2102		if (phy->caps & SUPPORTED_10000baseT_Full)
2103			ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2104
2105		if (phy->caps & SUPPORTED_1000baseT_Full)
2106			ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2107
2108		if (phy->caps & SUPPORTED_100baseT_Full)
2109			ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2110
2111		if (phy->caps & SUPPORTED_10baseT_Full)
2112			ifmedia_add(media, m | IFM_10_T, mod, NULL);
2113
2114		ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2115		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2116
2117	} else if (phy->caps & SUPPORTED_TP) {
2118		/* Copper (CX4) */
2119
2120		KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2121			("%s: unexpected cap 0x%x", __func__, phy->caps));
2122
2123		ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2124		ifmedia_set(media, m | IFM_10G_CX4);
2125
2126	} else if (phy->caps & SUPPORTED_FIBRE &&
2127		   phy->caps & SUPPORTED_10000baseT_Full) {
2128		/* 10G optical (but includes SFP+ twinax) */
2129
2130		m |= cxgb_ifm_type(mod);
2131		if (IFM_SUBTYPE(m) == IFM_NONE)
2132			m &= ~IFM_FDX;
2133
2134		ifmedia_add(media, m, mod, NULL);
2135		ifmedia_set(media, m);
2136
2137	} else if (phy->caps & SUPPORTED_FIBRE &&
2138		   phy->caps & SUPPORTED_1000baseT_Full) {
2139		/* 1G optical */
2140
2141		/* XXX: Lie and claim to be SX, could actually be any 1G-X */
2142		ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2143		ifmedia_set(media, m | IFM_1000_SX);
2144
2145	} else {
2146		KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2147			    phy->caps));
2148	}
2149
2150	PORT_UNLOCK(p);
2151}
2152
2153static void
2154cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2155{
2156	struct port_info *p = ifp->if_softc;
2157	struct ifmedia_entry *cur = p->media.ifm_cur;
2158	int speed = p->link_config.speed;
2159
2160	if (cur->ifm_data != p->phy.modtype) {
2161		cxgb_build_medialist(p);
2162		cur = p->media.ifm_cur;
2163	}
2164
2165	ifmr->ifm_status = IFM_AVALID;
2166	if (!p->link_config.link_ok)
2167		return;
2168
2169	ifmr->ifm_status |= IFM_ACTIVE;
2170
2171	/*
2172	 * active and current will differ iff current media is autoselect.  That
2173	 * can happen only for copper RJ45.
2174	 */
2175	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2176		return;
2177	KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2178		("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2179
2180	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2181	if (speed == SPEED_10000)
2182		ifmr->ifm_active |= IFM_10G_T;
2183	else if (speed == SPEED_1000)
2184		ifmr->ifm_active |= IFM_1000_T;
2185	else if (speed == SPEED_100)
2186		ifmr->ifm_active |= IFM_100_TX;
2187	else if (speed == SPEED_10)
2188		ifmr->ifm_active |= IFM_10_T;
2189	else
2190		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2191			    speed));
2192}
2193
2194static uint64_t
2195cxgb_get_counter(struct ifnet *ifp, ift_counter c)
2196{
2197	struct port_info *pi = ifp->if_softc;
2198	struct adapter *sc = pi->adapter;
2199	struct cmac *mac = &pi->mac;
2200	struct mac_stats *mstats = &mac->stats;
2201
2202	cxgb_refresh_stats(pi);
2203
2204	switch (c) {
2205	case IFCOUNTER_IPACKETS:
2206		return (mstats->rx_frames);
2207
2208	case IFCOUNTER_IERRORS:
2209		return (mstats->rx_jabber + mstats->rx_data_errs +
2210		    mstats->rx_sequence_errs + mstats->rx_runt +
2211		    mstats->rx_too_long + mstats->rx_mac_internal_errs +
2212		    mstats->rx_short + mstats->rx_fcs_errs);
2213
2214	case IFCOUNTER_OPACKETS:
2215		return (mstats->tx_frames);
2216
2217	case IFCOUNTER_OERRORS:
2218		return (mstats->tx_excess_collisions + mstats->tx_underrun +
2219		    mstats->tx_len_errs + mstats->tx_mac_internal_errs +
2220		    mstats->tx_excess_deferral + mstats->tx_fcs_errs);
2221
2222	case IFCOUNTER_COLLISIONS:
2223		return (mstats->tx_total_collisions);
2224
2225	case IFCOUNTER_IBYTES:
2226		return (mstats->rx_octets);
2227
2228	case IFCOUNTER_OBYTES:
2229		return (mstats->tx_octets);
2230
2231	case IFCOUNTER_IMCASTS:
2232		return (mstats->rx_mcast_frames);
2233
2234	case IFCOUNTER_OMCASTS:
2235		return (mstats->tx_mcast_frames);
2236
2237	case IFCOUNTER_IQDROPS:
2238		return (mstats->rx_cong_drops);
2239
2240	case IFCOUNTER_OQDROPS: {
2241		int i;
2242		uint64_t drops;
2243
2244		drops = 0;
2245		if (sc->flags & FULL_INIT_DONE) {
2246			for (i = pi->first_qset; i < pi->first_qset + pi->nqsets; i++)
2247				drops += sc->sge.qs[i].txq[TXQ_ETH].txq_mr->br_drops;
2248		}
2249
2250		return (drops);
2251
2252	}
2253
2254	default:
2255		return (if_get_counter_default(ifp, c));
2256	}
2257}
2258
2259static void
2260cxgb_async_intr(void *data)
2261{
2262	adapter_t *sc = data;
2263
2264	t3_write_reg(sc, A_PL_INT_ENABLE0, 0);
2265	(void) t3_read_reg(sc, A_PL_INT_ENABLE0);
2266	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2267}
2268
2269static void
2270link_check_callout(void *arg)
2271{
2272	struct port_info *pi = arg;
2273	struct adapter *sc = pi->adapter;
2274
2275	if (!isset(&sc->open_device_map, pi->port_id))
2276		return;
2277
2278	taskqueue_enqueue(sc->tq, &pi->link_check_task);
2279}
2280
2281static void
2282check_link_status(void *arg, int pending)
2283{
2284	struct port_info *pi = arg;
2285	struct adapter *sc = pi->adapter;
2286
2287	if (!isset(&sc->open_device_map, pi->port_id))
2288		return;
2289
2290	t3_link_changed(sc, pi->port_id);
2291
2292	if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ) ||
2293	    pi->link_config.link_ok == 0)
2294		callout_reset(&pi->link_check_ch, hz, link_check_callout, pi);
2295}
2296
2297void
2298t3_os_link_intr(struct port_info *pi)
2299{
2300	/*
2301	 * Schedule a link check in the near future.  If the link is flapping
2302	 * rapidly we'll keep resetting the callout and delaying the check until
2303	 * things stabilize a bit.
2304	 */
2305	callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi);
2306}
2307
2308static void
2309check_t3b2_mac(struct adapter *sc)
2310{
2311	int i;
2312
2313	if (sc->flags & CXGB_SHUTDOWN)
2314		return;
2315
2316	for_each_port(sc, i) {
2317		struct port_info *p = &sc->port[i];
2318		int status;
2319#ifdef INVARIANTS
2320		struct ifnet *ifp = p->ifp;
2321#endif
2322
2323		if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2324		    !p->link_config.link_ok)
2325			continue;
2326
2327		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2328			("%s: state mismatch (drv_flags %x, device_map %x)",
2329			 __func__, ifp->if_drv_flags, sc->open_device_map));
2330
2331		PORT_LOCK(p);
2332		status = t3b2_mac_watchdog_task(&p->mac);
2333		if (status == 1)
2334			p->mac.stats.num_toggled++;
2335		else if (status == 2) {
2336			struct cmac *mac = &p->mac;
2337
2338			cxgb_update_mac_settings(p);
2339			t3_link_start(&p->phy, mac, &p->link_config);
2340			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2341			t3_port_intr_enable(sc, p->port_id);
2342			p->mac.stats.num_resets++;
2343		}
2344		PORT_UNLOCK(p);
2345	}
2346}
2347
2348static void
2349cxgb_tick(void *arg)
2350{
2351	adapter_t *sc = (adapter_t *)arg;
2352
2353	if (sc->flags & CXGB_SHUTDOWN)
2354		return;
2355
2356	taskqueue_enqueue(sc->tq, &sc->tick_task);
2357	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2358}
2359
2360void
2361cxgb_refresh_stats(struct port_info *pi)
2362{
2363	struct timeval tv;
2364	const struct timeval interval = {0, 250000};    /* 250ms */
2365
2366	getmicrotime(&tv);
2367	timevalsub(&tv, &interval);
2368	if (timevalcmp(&tv, &pi->last_refreshed, <))
2369		return;
2370
2371	PORT_LOCK(pi);
2372	t3_mac_update_stats(&pi->mac);
2373	PORT_UNLOCK(pi);
2374	getmicrotime(&pi->last_refreshed);
2375}
2376
2377static void
2378cxgb_tick_handler(void *arg, int count)
2379{
2380	adapter_t *sc = (adapter_t *)arg;
2381	const struct adapter_params *p = &sc->params;
2382	int i;
2383	uint32_t cause, reset;
2384
2385	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2386		return;
2387
2388	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2389		check_t3b2_mac(sc);
2390
2391	cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2392	if (cause) {
2393		struct sge_qset *qs = &sc->sge.qs[0];
2394		uint32_t mask, v;
2395
2396		v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2397
2398		mask = 1;
2399		for (i = 0; i < SGE_QSETS; i++) {
2400			if (v & mask)
2401				qs[i].rspq.starved++;
2402			mask <<= 1;
2403		}
2404
2405		mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2406
2407		for (i = 0; i < SGE_QSETS * 2; i++) {
2408			if (v & mask) {
2409				qs[i / 2].fl[i % 2].empty++;
2410			}
2411			mask <<= 1;
2412		}
2413
2414		/* clear */
2415		t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2416		t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2417	}
2418
2419	for (i = 0; i < sc->params.nports; i++) {
2420		struct port_info *pi = &sc->port[i];
2421		struct cmac *mac = &pi->mac;
2422
2423		if (!isset(&sc->open_device_map, pi->port_id))
2424			continue;
2425
2426		cxgb_refresh_stats(pi);
2427
2428		if (mac->multiport)
2429			continue;
2430
2431		/* Count rx fifo overflows, once per second */
2432		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2433		reset = 0;
2434		if (cause & F_RXFIFO_OVERFLOW) {
2435			mac->stats.rx_fifo_ovfl++;
2436			reset |= F_RXFIFO_OVERFLOW;
2437		}
2438		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2439	}
2440}
2441
2442static void
2443touch_bars(device_t dev)
2444{
2445	/*
2446	 * Don't enable yet
2447	 */
2448#if !defined(__LP64__) && 0
2449	u32 v;
2450
2451	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2452	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2453	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2454	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2455	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2456	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2457#endif
2458}
2459
2460static int
2461set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2462{
2463	uint8_t *buf;
2464	int err = 0;
2465	u32 aligned_offset, aligned_len, *p;
2466	struct adapter *adapter = pi->adapter;
2467
2468
2469	aligned_offset = offset & ~3;
2470	aligned_len = (len + (offset & 3) + 3) & ~3;
2471
2472	if (aligned_offset != offset || aligned_len != len) {
2473		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2474		if (!buf)
2475			return (ENOMEM);
2476		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2477		if (!err && aligned_len > 4)
2478			err = t3_seeprom_read(adapter,
2479					      aligned_offset + aligned_len - 4,
2480					      (u32 *)&buf[aligned_len - 4]);
2481		if (err)
2482			goto out;
2483		memcpy(buf + (offset & 3), data, len);
2484	} else
2485		buf = (uint8_t *)(uintptr_t)data;
2486
2487	err = t3_seeprom_wp(adapter, 0);
2488	if (err)
2489		goto out;
2490
2491	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2492		err = t3_seeprom_write(adapter, aligned_offset, *p);
2493		aligned_offset += 4;
2494	}
2495
2496	if (!err)
2497		err = t3_seeprom_wp(adapter, 1);
2498out:
2499	if (buf != data)
2500		free(buf, M_DEVBUF);
2501	return err;
2502}
2503
2504
2505static int
2506in_range(int val, int lo, int hi)
2507{
2508	return val < 0 || (val <= hi && val >= lo);
2509}
2510
2511static int
2512cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2513{
2514       return (0);
2515}
2516
2517static int
2518cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2519{
2520       return (0);
2521}
2522
2523static int
2524cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2525    int fflag, struct thread *td)
2526{
2527	int mmd, error = 0;
2528	struct port_info *pi = dev->si_drv1;
2529	adapter_t *sc = pi->adapter;
2530
2531#ifdef PRIV_SUPPORTED
2532	if (priv_check(td, PRIV_DRIVER)) {
2533		if (cxgb_debug)
2534			printf("user does not have access to privileged ioctls\n");
2535		return (EPERM);
2536	}
2537#else
2538	if (suser(td)) {
2539		if (cxgb_debug)
2540			printf("user does not have access to privileged ioctls\n");
2541		return (EPERM);
2542	}
2543#endif
2544
2545	switch (cmd) {
2546	case CHELSIO_GET_MIIREG: {
2547		uint32_t val;
2548		struct cphy *phy = &pi->phy;
2549		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2550
2551		if (!phy->mdio_read)
2552			return (EOPNOTSUPP);
2553		if (is_10G(sc)) {
2554			mmd = mid->phy_id >> 8;
2555			if (!mmd)
2556				mmd = MDIO_DEV_PCS;
2557			else if (mmd > MDIO_DEV_VEND2)
2558				return (EINVAL);
2559
2560			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2561					     mid->reg_num, &val);
2562		} else
2563		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2564					     mid->reg_num & 0x1f, &val);
2565		if (error == 0)
2566			mid->val_out = val;
2567		break;
2568	}
2569	case CHELSIO_SET_MIIREG: {
2570		struct cphy *phy = &pi->phy;
2571		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2572
2573		if (!phy->mdio_write)
2574			return (EOPNOTSUPP);
2575		if (is_10G(sc)) {
2576			mmd = mid->phy_id >> 8;
2577			if (!mmd)
2578				mmd = MDIO_DEV_PCS;
2579			else if (mmd > MDIO_DEV_VEND2)
2580				return (EINVAL);
2581
2582			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2583					      mmd, mid->reg_num, mid->val_in);
2584		} else
2585			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2586					      mid->reg_num & 0x1f,
2587					      mid->val_in);
2588		break;
2589	}
2590	case CHELSIO_SETREG: {
2591		struct ch_reg *edata = (struct ch_reg *)data;
2592		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2593			return (EFAULT);
2594		t3_write_reg(sc, edata->addr, edata->val);
2595		break;
2596	}
2597	case CHELSIO_GETREG: {
2598		struct ch_reg *edata = (struct ch_reg *)data;
2599		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2600			return (EFAULT);
2601		edata->val = t3_read_reg(sc, edata->addr);
2602		break;
2603	}
2604	case CHELSIO_GET_SGE_CONTEXT: {
2605		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2606		mtx_lock_spin(&sc->sge.reg_lock);
2607		switch (ecntxt->cntxt_type) {
2608		case CNTXT_TYPE_EGRESS:
2609			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2610			    ecntxt->data);
2611			break;
2612		case CNTXT_TYPE_FL:
2613			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2614			    ecntxt->data);
2615			break;
2616		case CNTXT_TYPE_RSP:
2617			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2618			    ecntxt->data);
2619			break;
2620		case CNTXT_TYPE_CQ:
2621			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2622			    ecntxt->data);
2623			break;
2624		default:
2625			error = EINVAL;
2626			break;
2627		}
2628		mtx_unlock_spin(&sc->sge.reg_lock);
2629		break;
2630	}
2631	case CHELSIO_GET_SGE_DESC: {
2632		struct ch_desc *edesc = (struct ch_desc *)data;
2633		int ret;
2634		if (edesc->queue_num >= SGE_QSETS * 6)
2635			return (EINVAL);
2636		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2637		    edesc->queue_num % 6, edesc->idx, edesc->data);
2638		if (ret < 0)
2639			return (EINVAL);
2640		edesc->size = ret;
2641		break;
2642	}
2643	case CHELSIO_GET_QSET_PARAMS: {
2644		struct qset_params *q;
2645		struct ch_qset_params *t = (struct ch_qset_params *)data;
2646		int q1 = pi->first_qset;
2647		int nqsets = pi->nqsets;
2648		int i;
2649
2650		if (t->qset_idx >= nqsets)
2651			return EINVAL;
2652
2653		i = q1 + t->qset_idx;
2654		q = &sc->params.sge.qset[i];
2655		t->rspq_size   = q->rspq_size;
2656		t->txq_size[0] = q->txq_size[0];
2657		t->txq_size[1] = q->txq_size[1];
2658		t->txq_size[2] = q->txq_size[2];
2659		t->fl_size[0]  = q->fl_size;
2660		t->fl_size[1]  = q->jumbo_size;
2661		t->polling     = q->polling;
2662		t->lro         = q->lro;
2663		t->intr_lat    = q->coalesce_usecs;
2664		t->cong_thres  = q->cong_thres;
2665		t->qnum        = i;
2666
2667		if ((sc->flags & FULL_INIT_DONE) == 0)
2668			t->vector = 0;
2669		else if (sc->flags & USING_MSIX)
2670			t->vector = rman_get_start(sc->msix_irq_res[i]);
2671		else
2672			t->vector = rman_get_start(sc->irq_res);
2673
2674		break;
2675	}
2676	case CHELSIO_GET_QSET_NUM: {
2677		struct ch_reg *edata = (struct ch_reg *)data;
2678		edata->val = pi->nqsets;
2679		break;
2680	}
2681	case CHELSIO_LOAD_FW: {
2682		uint8_t *fw_data;
2683		uint32_t vers;
2684		struct ch_mem_range *t = (struct ch_mem_range *)data;
2685
2686		/*
2687		 * You're allowed to load a firmware only before FULL_INIT_DONE
2688		 *
2689		 * FW_UPTODATE is also set so the rest of the initialization
2690		 * will not overwrite what was loaded here.  This gives you the
2691		 * flexibility to load any firmware (and maybe shoot yourself in
2692		 * the foot).
2693		 */
2694
2695		ADAPTER_LOCK(sc);
2696		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2697			ADAPTER_UNLOCK(sc);
2698			return (EBUSY);
2699		}
2700
2701		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2702		if (!fw_data)
2703			error = ENOMEM;
2704		else
2705			error = copyin(t->buf, fw_data, t->len);
2706
2707		if (!error)
2708			error = -t3_load_fw(sc, fw_data, t->len);
2709
2710		if (t3_get_fw_version(sc, &vers) == 0) {
2711			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2712			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2713			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2714		}
2715
2716		if (!error)
2717			sc->flags |= FW_UPTODATE;
2718
2719		free(fw_data, M_DEVBUF);
2720		ADAPTER_UNLOCK(sc);
2721		break;
2722	}
2723	case CHELSIO_LOAD_BOOT: {
2724		uint8_t *boot_data;
2725		struct ch_mem_range *t = (struct ch_mem_range *)data;
2726
2727		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2728		if (!boot_data)
2729			return ENOMEM;
2730
2731		error = copyin(t->buf, boot_data, t->len);
2732		if (!error)
2733			error = -t3_load_boot(sc, boot_data, t->len);
2734
2735		free(boot_data, M_DEVBUF);
2736		break;
2737	}
2738	case CHELSIO_GET_PM: {
2739		struct ch_pm *m = (struct ch_pm *)data;
2740		struct tp_params *p = &sc->params.tp;
2741
2742		if (!is_offload(sc))
2743			return (EOPNOTSUPP);
2744
2745		m->tx_pg_sz = p->tx_pg_size;
2746		m->tx_num_pg = p->tx_num_pgs;
2747		m->rx_pg_sz  = p->rx_pg_size;
2748		m->rx_num_pg = p->rx_num_pgs;
2749		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2750
2751		break;
2752	}
2753	case CHELSIO_SET_PM: {
2754		struct ch_pm *m = (struct ch_pm *)data;
2755		struct tp_params *p = &sc->params.tp;
2756
2757		if (!is_offload(sc))
2758			return (EOPNOTSUPP);
2759		if (sc->flags & FULL_INIT_DONE)
2760			return (EBUSY);
2761
2762		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2763		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2764			return (EINVAL);	/* not power of 2 */
2765		if (!(m->rx_pg_sz & 0x14000))
2766			return (EINVAL);	/* not 16KB or 64KB */
2767		if (!(m->tx_pg_sz & 0x1554000))
2768			return (EINVAL);
2769		if (m->tx_num_pg == -1)
2770			m->tx_num_pg = p->tx_num_pgs;
2771		if (m->rx_num_pg == -1)
2772			m->rx_num_pg = p->rx_num_pgs;
2773		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2774			return (EINVAL);
2775		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2776		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2777			return (EINVAL);
2778
2779		p->rx_pg_size = m->rx_pg_sz;
2780		p->tx_pg_size = m->tx_pg_sz;
2781		p->rx_num_pgs = m->rx_num_pg;
2782		p->tx_num_pgs = m->tx_num_pg;
2783		break;
2784	}
2785	case CHELSIO_SETMTUTAB: {
2786		struct ch_mtus *m = (struct ch_mtus *)data;
2787		int i;
2788
2789		if (!is_offload(sc))
2790			return (EOPNOTSUPP);
2791		if (offload_running(sc))
2792			return (EBUSY);
2793		if (m->nmtus != NMTUS)
2794			return (EINVAL);
2795		if (m->mtus[0] < 81)         /* accommodate SACK */
2796			return (EINVAL);
2797
2798		/*
2799		 * MTUs must be in ascending order
2800		 */
2801		for (i = 1; i < NMTUS; ++i)
2802			if (m->mtus[i] < m->mtus[i - 1])
2803				return (EINVAL);
2804
2805		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2806		break;
2807	}
2808	case CHELSIO_GETMTUTAB: {
2809		struct ch_mtus *m = (struct ch_mtus *)data;
2810
2811		if (!is_offload(sc))
2812			return (EOPNOTSUPP);
2813
2814		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2815		m->nmtus = NMTUS;
2816		break;
2817	}
2818	case CHELSIO_GET_MEM: {
2819		struct ch_mem_range *t = (struct ch_mem_range *)data;
2820		struct mc7 *mem;
2821		uint8_t *useraddr;
2822		u64 buf[32];
2823
2824		/*
2825		 * Use these to avoid modifying len/addr in the return
2826		 * struct
2827		 */
2828		uint32_t len = t->len, addr = t->addr;
2829
2830		if (!is_offload(sc))
2831			return (EOPNOTSUPP);
2832		if (!(sc->flags & FULL_INIT_DONE))
2833			return (EIO);         /* need the memory controllers */
2834		if ((addr & 0x7) || (len & 0x7))
2835			return (EINVAL);
2836		if (t->mem_id == MEM_CM)
2837			mem = &sc->cm;
2838		else if (t->mem_id == MEM_PMRX)
2839			mem = &sc->pmrx;
2840		else if (t->mem_id == MEM_PMTX)
2841			mem = &sc->pmtx;
2842		else
2843			return (EINVAL);
2844
2845		/*
2846		 * Version scheme:
2847		 * bits 0..9: chip version
2848		 * bits 10..15: chip revision
2849		 */
2850		t->version = 3 | (sc->params.rev << 10);
2851
2852		/*
2853		 * Read 256 bytes at a time as len can be large and we don't
2854		 * want to use huge intermediate buffers.
2855		 */
2856		useraddr = (uint8_t *)t->buf;
2857		while (len) {
2858			unsigned int chunk = min(len, sizeof(buf));
2859
2860			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2861			if (error)
2862				return (-error);
2863			if (copyout(buf, useraddr, chunk))
2864				return (EFAULT);
2865			useraddr += chunk;
2866			addr += chunk;
2867			len -= chunk;
2868		}
2869		break;
2870	}
2871	case CHELSIO_READ_TCAM_WORD: {
2872		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2873
2874		if (!is_offload(sc))
2875			return (EOPNOTSUPP);
2876		if (!(sc->flags & FULL_INIT_DONE))
2877			return (EIO);         /* need MC5 */
2878		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2879		break;
2880	}
2881	case CHELSIO_SET_TRACE_FILTER: {
2882		struct ch_trace *t = (struct ch_trace *)data;
2883		const struct trace_params *tp;
2884
2885		tp = (const struct trace_params *)&t->sip;
2886		if (t->config_tx)
2887			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2888					       t->trace_tx);
2889		if (t->config_rx)
2890			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2891					       t->trace_rx);
2892		break;
2893	}
2894	case CHELSIO_SET_PKTSCHED: {
2895		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2896		if (sc->open_device_map == 0)
2897			return (EAGAIN);
2898		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2899		    p->binding);
2900		break;
2901	}
2902	case CHELSIO_IFCONF_GETREGS: {
2903		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2904		int reglen = cxgb_get_regs_len();
2905		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2906		if (buf == NULL) {
2907			return (ENOMEM);
2908		}
2909		if (regs->len > reglen)
2910			regs->len = reglen;
2911		else if (regs->len < reglen)
2912			error = ENOBUFS;
2913
2914		if (!error) {
2915			cxgb_get_regs(sc, regs, buf);
2916			error = copyout(buf, regs->data, reglen);
2917		}
2918		free(buf, M_DEVBUF);
2919
2920		break;
2921	}
2922	case CHELSIO_SET_HW_SCHED: {
2923		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2924		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2925
2926		if ((sc->flags & FULL_INIT_DONE) == 0)
2927			return (EAGAIN);       /* need TP to be initialized */
2928		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2929		    !in_range(t->channel, 0, 1) ||
2930		    !in_range(t->kbps, 0, 10000000) ||
2931		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2932		    !in_range(t->flow_ipg, 0,
2933			      dack_ticks_to_usec(sc, 0x7ff)))
2934			return (EINVAL);
2935
2936		if (t->kbps >= 0) {
2937			error = t3_config_sched(sc, t->kbps, t->sched);
2938			if (error < 0)
2939				return (-error);
2940		}
2941		if (t->class_ipg >= 0)
2942			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2943		if (t->flow_ipg >= 0) {
2944			t->flow_ipg *= 1000;     /* us -> ns */
2945			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2946		}
2947		if (t->mode >= 0) {
2948			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2949
2950			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2951					 bit, t->mode ? bit : 0);
2952		}
2953		if (t->channel >= 0)
2954			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2955					 1 << t->sched, t->channel << t->sched);
2956		break;
2957	}
2958	case CHELSIO_GET_EEPROM: {
2959		int i;
2960		struct ch_eeprom *e = (struct ch_eeprom *)data;
2961		uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2962
2963		if (buf == NULL) {
2964			return (ENOMEM);
2965		}
2966		e->magic = EEPROM_MAGIC;
2967		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
2968			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
2969
2970		if (!error)
2971			error = copyout(buf + e->offset, e->data, e->len);
2972
2973		free(buf, M_DEVBUF);
2974		break;
2975	}
2976	case CHELSIO_CLEAR_STATS: {
2977		if (!(sc->flags & FULL_INIT_DONE))
2978			return EAGAIN;
2979
2980		PORT_LOCK(pi);
2981		t3_mac_update_stats(&pi->mac);
2982		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
2983		PORT_UNLOCK(pi);
2984		break;
2985	}
2986	case CHELSIO_GET_UP_LA: {
2987		struct ch_up_la *la = (struct ch_up_la *)data;
2988		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
2989		if (buf == NULL) {
2990			return (ENOMEM);
2991		}
2992		if (la->bufsize < LA_BUFSIZE)
2993			error = ENOBUFS;
2994
2995		if (!error)
2996			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
2997					      &la->bufsize, buf);
2998		if (!error)
2999			error = copyout(buf, la->data, la->bufsize);
3000
3001		free(buf, M_DEVBUF);
3002		break;
3003	}
3004	case CHELSIO_GET_UP_IOQS: {
3005		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3006		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3007		uint32_t *v;
3008
3009		if (buf == NULL) {
3010			return (ENOMEM);
3011		}
3012		if (ioqs->bufsize < IOQS_BUFSIZE)
3013			error = ENOBUFS;
3014
3015		if (!error)
3016			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3017
3018		if (!error) {
3019			v = (uint32_t *)buf;
3020
3021			ioqs->ioq_rx_enable = *v++;
3022			ioqs->ioq_tx_enable = *v++;
3023			ioqs->ioq_rx_status = *v++;
3024			ioqs->ioq_tx_status = *v++;
3025
3026			error = copyout(v, ioqs->data, ioqs->bufsize);
3027		}
3028
3029		free(buf, M_DEVBUF);
3030		break;
3031	}
3032	case CHELSIO_SET_FILTER: {
3033		struct ch_filter *f = (struct ch_filter *)data;
3034		struct filter_info *p;
3035		unsigned int nfilters = sc->params.mc5.nfilters;
3036
3037		if (!is_offload(sc))
3038			return (EOPNOTSUPP);	/* No TCAM */
3039		if (!(sc->flags & FULL_INIT_DONE))
3040			return (EAGAIN);	/* mc5 not setup yet */
3041		if (nfilters == 0)
3042			return (EBUSY);		/* TOE will use TCAM */
3043
3044		/* sanity checks */
3045		if (f->filter_id >= nfilters ||
3046		    (f->val.dip && f->mask.dip != 0xffffffff) ||
3047		    (f->val.sport && f->mask.sport != 0xffff) ||
3048		    (f->val.dport && f->mask.dport != 0xffff) ||
3049		    (f->val.vlan && f->mask.vlan != 0xfff) ||
3050		    (f->val.vlan_prio &&
3051			f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3052		    (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3053		    f->qset >= SGE_QSETS ||
3054		    sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3055			return (EINVAL);
3056
3057		/* Was allocated with M_WAITOK */
3058		KASSERT(sc->filters, ("filter table NULL\n"));
3059
3060		p = &sc->filters[f->filter_id];
3061		if (p->locked)
3062			return (EPERM);
3063
3064		bzero(p, sizeof(*p));
3065		p->sip = f->val.sip;
3066		p->sip_mask = f->mask.sip;
3067		p->dip = f->val.dip;
3068		p->sport = f->val.sport;
3069		p->dport = f->val.dport;
3070		p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3071		p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3072		    FILTER_NO_VLAN_PRI;
3073		p->mac_hit = f->mac_hit;
3074		p->mac_vld = f->mac_addr_idx != 0xffff;
3075		p->mac_idx = f->mac_addr_idx;
3076		p->pkt_type = f->proto;
3077		p->report_filter_id = f->want_filter_id;
3078		p->pass = f->pass;
3079		p->rss = f->rss;
3080		p->qset = f->qset;
3081
3082		error = set_filter(sc, f->filter_id, p);
3083		if (error == 0)
3084			p->valid = 1;
3085		break;
3086	}
3087	case CHELSIO_DEL_FILTER: {
3088		struct ch_filter *f = (struct ch_filter *)data;
3089		struct filter_info *p;
3090		unsigned int nfilters = sc->params.mc5.nfilters;
3091
3092		if (!is_offload(sc))
3093			return (EOPNOTSUPP);
3094		if (!(sc->flags & FULL_INIT_DONE))
3095			return (EAGAIN);
3096		if (nfilters == 0 || sc->filters == NULL)
3097			return (EINVAL);
3098		if (f->filter_id >= nfilters)
3099		       return (EINVAL);
3100
3101		p = &sc->filters[f->filter_id];
3102		if (p->locked)
3103			return (EPERM);
3104		if (!p->valid)
3105			return (EFAULT); /* Read "Bad address" as "Bad index" */
3106
3107		bzero(p, sizeof(*p));
3108		p->sip = p->sip_mask = 0xffffffff;
3109		p->vlan = 0xfff;
3110		p->vlan_prio = FILTER_NO_VLAN_PRI;
3111		p->pkt_type = 1;
3112		error = set_filter(sc, f->filter_id, p);
3113		break;
3114	}
3115	case CHELSIO_GET_FILTER: {
3116		struct ch_filter *f = (struct ch_filter *)data;
3117		struct filter_info *p;
3118		unsigned int i, nfilters = sc->params.mc5.nfilters;
3119
3120		if (!is_offload(sc))
3121			return (EOPNOTSUPP);
3122		if (!(sc->flags & FULL_INIT_DONE))
3123			return (EAGAIN);
3124		if (nfilters == 0 || sc->filters == NULL)
3125			return (EINVAL);
3126
3127		i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3128		for (; i < nfilters; i++) {
3129			p = &sc->filters[i];
3130			if (!p->valid)
3131				continue;
3132
3133			bzero(f, sizeof(*f));
3134
3135			f->filter_id = i;
3136			f->val.sip = p->sip;
3137			f->mask.sip = p->sip_mask;
3138			f->val.dip = p->dip;
3139			f->mask.dip = p->dip ? 0xffffffff : 0;
3140			f->val.sport = p->sport;
3141			f->mask.sport = p->sport ? 0xffff : 0;
3142			f->val.dport = p->dport;
3143			f->mask.dport = p->dport ? 0xffff : 0;
3144			f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3145			f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3146			f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3147			    0 : p->vlan_prio;
3148			f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3149			    0 : FILTER_NO_VLAN_PRI;
3150			f->mac_hit = p->mac_hit;
3151			f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3152			f->proto = p->pkt_type;
3153			f->want_filter_id = p->report_filter_id;
3154			f->pass = p->pass;
3155			f->rss = p->rss;
3156			f->qset = p->qset;
3157
3158			break;
3159		}
3160
3161		if (i == nfilters)
3162			f->filter_id = 0xffffffff;
3163		break;
3164	}
3165	default:
3166		return (EOPNOTSUPP);
3167		break;
3168	}
3169
3170	return (error);
3171}
3172
3173static __inline void
3174reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3175    unsigned int end)
3176{
3177	uint32_t *p = (uint32_t *)(buf + start);
3178
3179	for ( ; start <= end; start += sizeof(uint32_t))
3180		*p++ = t3_read_reg(ap, start);
3181}
3182
3183#define T3_REGMAP_SIZE (3 * 1024)
3184static int
3185cxgb_get_regs_len(void)
3186{
3187	return T3_REGMAP_SIZE;
3188}
3189
3190static void
3191cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3192{
3193
3194	/*
3195	 * Version scheme:
3196	 * bits 0..9: chip version
3197	 * bits 10..15: chip revision
3198	 * bit 31: set for PCIe cards
3199	 */
3200	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3201
3202	/*
3203	 * We skip the MAC statistics registers because they are clear-on-read.
3204	 * Also reading multi-register stats would need to synchronize with the
3205	 * periodic mac stats accumulation.  Hard to justify the complexity.
3206	 */
3207	memset(buf, 0, cxgb_get_regs_len());
3208	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3209	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3210	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3211	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3212	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3213	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3214		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3215	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3216		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3217}
3218
3219static int
3220alloc_filters(struct adapter *sc)
3221{
3222	struct filter_info *p;
3223	unsigned int nfilters = sc->params.mc5.nfilters;
3224
3225	if (nfilters == 0)
3226		return (0);
3227
3228	p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3229	sc->filters = p;
3230
3231	p = &sc->filters[nfilters - 1];
3232	p->vlan = 0xfff;
3233	p->vlan_prio = FILTER_NO_VLAN_PRI;
3234	p->pass = p->rss = p->valid = p->locked = 1;
3235
3236	return (0);
3237}
3238
3239static int
3240setup_hw_filters(struct adapter *sc)
3241{
3242	int i, rc;
3243	unsigned int nfilters = sc->params.mc5.nfilters;
3244
3245	if (!sc->filters)
3246		return (0);
3247
3248	t3_enable_filters(sc);
3249
3250	for (i = rc = 0; i < nfilters && !rc; i++) {
3251		if (sc->filters[i].locked)
3252			rc = set_filter(sc, i, &sc->filters[i]);
3253	}
3254
3255	return (rc);
3256}
3257
3258static int
3259set_filter(struct adapter *sc, int id, const struct filter_info *f)
3260{
3261	int len;
3262	struct mbuf *m;
3263	struct ulp_txpkt *txpkt;
3264	struct work_request_hdr *wr;
3265	struct cpl_pass_open_req *oreq;
3266	struct cpl_set_tcb_field *sreq;
3267
3268	len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3269	KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3270
3271	id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3272	      sc->params.mc5.nfilters;
3273
3274	m = m_gethdr(M_WAITOK, MT_DATA);
3275	m->m_len = m->m_pkthdr.len = len;
3276	bzero(mtod(m, char *), len);
3277
3278	wr = mtod(m, struct work_request_hdr *);
3279	wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3280
3281	oreq = (struct cpl_pass_open_req *)(wr + 1);
3282	txpkt = (struct ulp_txpkt *)oreq;
3283	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3284	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3285	OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3286	oreq->local_port = htons(f->dport);
3287	oreq->peer_port = htons(f->sport);
3288	oreq->local_ip = htonl(f->dip);
3289	oreq->peer_ip = htonl(f->sip);
3290	oreq->peer_netmask = htonl(f->sip_mask);
3291	oreq->opt0h = 0;
3292	oreq->opt0l = htonl(F_NO_OFFLOAD);
3293	oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3294			 V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3295			 V_VLAN_PRI(f->vlan_prio >> 1) |
3296			 V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3297			 V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3298			 V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3299
3300	sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3301	set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3302			  (f->report_filter_id << 15) | (1 << 23) |
3303			  ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3304	set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3305	t3_mgmt_tx(sc, m);
3306
3307	if (f->pass && !f->rss) {
3308		len = sizeof(*sreq);
3309		m = m_gethdr(M_WAITOK, MT_DATA);
3310		m->m_len = m->m_pkthdr.len = len;
3311		bzero(mtod(m, char *), len);
3312		sreq = mtod(m, struct cpl_set_tcb_field *);
3313		sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3314		mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3315				 (u64)sc->rrss_map[f->qset] << 19);
3316		t3_mgmt_tx(sc, m);
3317	}
3318	return 0;
3319}
3320
3321static inline void
3322mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3323    unsigned int word, u64 mask, u64 val)
3324{
3325	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3326	req->reply = V_NO_REPLY(1);
3327	req->cpu_idx = 0;
3328	req->word = htons(word);
3329	req->mask = htobe64(mask);
3330	req->val = htobe64(val);
3331}
3332
3333static inline void
3334set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3335    unsigned int word, u64 mask, u64 val)
3336{
3337	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3338
3339	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3340	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3341	mk_set_tcb_field(req, tid, word, mask, val);
3342}
3343
3344void
3345t3_iterate(void (*func)(struct adapter *, void *), void *arg)
3346{
3347	struct adapter *sc;
3348
3349	mtx_lock(&t3_list_lock);
3350	SLIST_FOREACH(sc, &t3_list, link) {
3351		/*
3352		 * func should not make any assumptions about what state sc is
3353		 * in - the only guarantee is that sc->sc_lock is a valid lock.
3354		 */
3355		func(sc, arg);
3356	}
3357	mtx_unlock(&t3_list_lock);
3358}
3359
3360#ifdef TCP_OFFLOAD
3361static int
3362toe_capability(struct port_info *pi, int enable)
3363{
3364	int rc;
3365	struct adapter *sc = pi->adapter;
3366
3367	ADAPTER_LOCK_ASSERT_OWNED(sc);
3368
3369	if (!is_offload(sc))
3370		return (ENODEV);
3371
3372	if (enable) {
3373		if (!(sc->flags & FULL_INIT_DONE)) {
3374			log(LOG_WARNING,
3375			    "You must enable a cxgb interface first\n");
3376			return (EAGAIN);
3377		}
3378
3379		if (isset(&sc->offload_map, pi->port_id))
3380			return (0);
3381
3382		if (!(sc->flags & TOM_INIT_DONE)) {
3383			rc = t3_activate_uld(sc, ULD_TOM);
3384			if (rc == EAGAIN) {
3385				log(LOG_WARNING,
3386				    "You must kldload t3_tom.ko before trying "
3387				    "to enable TOE on a cxgb interface.\n");
3388			}
3389			if (rc != 0)
3390				return (rc);
3391			KASSERT(sc->tom_softc != NULL,
3392			    ("%s: TOM activated but softc NULL", __func__));
3393			KASSERT(sc->flags & TOM_INIT_DONE,
3394			    ("%s: TOM activated but flag not set", __func__));
3395		}
3396
3397		setbit(&sc->offload_map, pi->port_id);
3398
3399		/*
3400		 * XXX: Temporary code to allow iWARP to be enabled when TOE is
3401		 * enabled on any port.  Need to figure out how to enable,
3402		 * disable, load, and unload iWARP cleanly.
3403		 */
3404		if (!isset(&sc->offload_map, MAX_NPORTS) &&
3405		    t3_activate_uld(sc, ULD_IWARP) == 0)
3406			setbit(&sc->offload_map, MAX_NPORTS);
3407	} else {
3408		if (!isset(&sc->offload_map, pi->port_id))
3409			return (0);
3410
3411		KASSERT(sc->flags & TOM_INIT_DONE,
3412		    ("%s: TOM never initialized?", __func__));
3413		clrbit(&sc->offload_map, pi->port_id);
3414	}
3415
3416	return (0);
3417}
3418
3419/*
3420 * Add an upper layer driver to the global list.
3421 */
3422int
3423t3_register_uld(struct uld_info *ui)
3424{
3425	int rc = 0;
3426	struct uld_info *u;
3427
3428	mtx_lock(&t3_uld_list_lock);
3429	SLIST_FOREACH(u, &t3_uld_list, link) {
3430	    if (u->uld_id == ui->uld_id) {
3431		    rc = EEXIST;
3432		    goto done;
3433	    }
3434	}
3435
3436	SLIST_INSERT_HEAD(&t3_uld_list, ui, link);
3437	ui->refcount = 0;
3438done:
3439	mtx_unlock(&t3_uld_list_lock);
3440	return (rc);
3441}
3442
3443int
3444t3_unregister_uld(struct uld_info *ui)
3445{
3446	int rc = EINVAL;
3447	struct uld_info *u;
3448
3449	mtx_lock(&t3_uld_list_lock);
3450
3451	SLIST_FOREACH(u, &t3_uld_list, link) {
3452	    if (u == ui) {
3453		    if (ui->refcount > 0) {
3454			    rc = EBUSY;
3455			    goto done;
3456		    }
3457
3458		    SLIST_REMOVE(&t3_uld_list, ui, uld_info, link);
3459		    rc = 0;
3460		    goto done;
3461	    }
3462	}
3463done:
3464	mtx_unlock(&t3_uld_list_lock);
3465	return (rc);
3466}
3467
3468int
3469t3_activate_uld(struct adapter *sc, int id)
3470{
3471	int rc = EAGAIN;
3472	struct uld_info *ui;
3473
3474	mtx_lock(&t3_uld_list_lock);
3475
3476	SLIST_FOREACH(ui, &t3_uld_list, link) {
3477		if (ui->uld_id == id) {
3478			rc = ui->activate(sc);
3479			if (rc == 0)
3480				ui->refcount++;
3481			goto done;
3482		}
3483	}
3484done:
3485	mtx_unlock(&t3_uld_list_lock);
3486
3487	return (rc);
3488}
3489
3490int
3491t3_deactivate_uld(struct adapter *sc, int id)
3492{
3493	int rc = EINVAL;
3494	struct uld_info *ui;
3495
3496	mtx_lock(&t3_uld_list_lock);
3497
3498	SLIST_FOREACH(ui, &t3_uld_list, link) {
3499		if (ui->uld_id == id) {
3500			rc = ui->deactivate(sc);
3501			if (rc == 0)
3502				ui->refcount--;
3503			goto done;
3504		}
3505	}
3506done:
3507	mtx_unlock(&t3_uld_list_lock);
3508
3509	return (rc);
3510}
3511
3512static int
3513cpl_not_handled(struct sge_qset *qs __unused, struct rsp_desc *r __unused,
3514    struct mbuf *m)
3515{
3516	m_freem(m);
3517	return (EDOOFUS);
3518}
3519
3520int
3521t3_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h)
3522{
3523	uintptr_t *loc, new;
3524
3525	if (opcode >= NUM_CPL_HANDLERS)
3526		return (EINVAL);
3527
3528	new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled;
3529	loc = (uintptr_t *) &sc->cpl_handler[opcode];
3530	atomic_store_rel_ptr(loc, new);
3531
3532	return (0);
3533}
3534#endif
3535
3536static int
3537cxgbc_mod_event(module_t mod, int cmd, void *arg)
3538{
3539	int rc = 0;
3540
3541	switch (cmd) {
3542	case MOD_LOAD:
3543		mtx_init(&t3_list_lock, "T3 adapters", 0, MTX_DEF);
3544		SLIST_INIT(&t3_list);
3545#ifdef TCP_OFFLOAD
3546		mtx_init(&t3_uld_list_lock, "T3 ULDs", 0, MTX_DEF);
3547		SLIST_INIT(&t3_uld_list);
3548#endif
3549		break;
3550
3551	case MOD_UNLOAD:
3552#ifdef TCP_OFFLOAD
3553		mtx_lock(&t3_uld_list_lock);
3554		if (!SLIST_EMPTY(&t3_uld_list)) {
3555			rc = EBUSY;
3556			mtx_unlock(&t3_uld_list_lock);
3557			break;
3558		}
3559		mtx_unlock(&t3_uld_list_lock);
3560		mtx_destroy(&t3_uld_list_lock);
3561#endif
3562		mtx_lock(&t3_list_lock);
3563		if (!SLIST_EMPTY(&t3_list)) {
3564			rc = EBUSY;
3565			mtx_unlock(&t3_list_lock);
3566			break;
3567		}
3568		mtx_unlock(&t3_list_lock);
3569		mtx_destroy(&t3_list_lock);
3570		break;
3571	}
3572
3573	return (rc);
3574}
3575