1/**************************************************************************
2SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3
4Copyright (c) 2007-2009, Chelsio Inc.
5All rights reserved.
6
7Redistribution and use in source and binary forms, with or without
8modification, are permitted provided that the following conditions are met:
9
10 1. Redistributions of source code must retain the above copyright notice,
11    this list of conditions and the following disclaimer.
12
13 2. Neither the name of the Chelsio Corporation nor the names of its
14    contributors may be used to endorse or promote products derived from
15    this software without specific prior written permission.
16
17THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27POSSIBILITY OF SUCH DAMAGE.
28
29***************************************************************************/
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD$");
33
34#include "opt_inet.h"
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/kernel.h>
39#include <sys/bus.h>
40#include <sys/module.h>
41#include <sys/pciio.h>
42#include <sys/conf.h>
43#include <machine/bus.h>
44#include <machine/resource.h>
45#include <sys/ktr.h>
46#include <sys/rman.h>
47#include <sys/ioccom.h>
48#include <sys/mbuf.h>
49#include <sys/linker.h>
50#include <sys/firmware.h>
51#include <sys/socket.h>
52#include <sys/sockio.h>
53#include <sys/smp.h>
54#include <sys/sysctl.h>
55#include <sys/syslog.h>
56#include <sys/queue.h>
57#include <sys/taskqueue.h>
58#include <sys/proc.h>
59
60#include <net/bpf.h>
61#include <net/ethernet.h>
62#include <net/if.h>
63#include <net/if_var.h>
64#include <net/if_arp.h>
65#include <net/if_dl.h>
66#include <net/if_media.h>
67#include <net/if_types.h>
68#include <net/if_vlan_var.h>
69
70#include <netinet/in_systm.h>
71#include <netinet/in.h>
72#include <netinet/if_ether.h>
73#include <netinet/ip.h>
74#include <netinet/ip.h>
75#include <netinet/tcp.h>
76#include <netinet/udp.h>
77#include <netinet/netdump/netdump.h>
78
79#include <dev/pci/pcireg.h>
80#include <dev/pci/pcivar.h>
81#include <dev/pci/pci_private.h>
82
83#include <cxgb_include.h>
84
85#ifdef PRIV_SUPPORTED
86#include <sys/priv.h>
87#endif
88
89static int cxgb_setup_interrupts(adapter_t *);
90static void cxgb_teardown_interrupts(adapter_t *);
91static void cxgb_init(void *);
92static int cxgb_init_locked(struct port_info *);
93static int cxgb_uninit_locked(struct port_info *);
94static int cxgb_uninit_synchronized(struct port_info *);
95static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
96static int cxgb_media_change(struct ifnet *);
97static int cxgb_ifm_type(int);
98static void cxgb_build_medialist(struct port_info *);
99static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
100static uint64_t cxgb_get_counter(struct ifnet *, ift_counter);
101static int setup_sge_qsets(adapter_t *);
102static void cxgb_async_intr(void *);
103static void cxgb_tick_handler(void *, int);
104static void cxgb_tick(void *);
105static void link_check_callout(void *);
106static void check_link_status(void *, int);
107static void setup_rss(adapter_t *sc);
108static int alloc_filters(struct adapter *);
109static int setup_hw_filters(struct adapter *);
110static int set_filter(struct adapter *, int, const struct filter_info *);
111static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
112    unsigned int, u64, u64);
113static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
114    unsigned int, u64, u64);
115#ifdef TCP_OFFLOAD
116static int cpl_not_handled(struct sge_qset *, struct rsp_desc *, struct mbuf *);
117#endif
118
119/* Attachment glue for the PCI controller end of the device.  Each port of
120 * the device is attached separately, as defined later.
121 */
122static int cxgb_controller_probe(device_t);
123static int cxgb_controller_attach(device_t);
124static int cxgb_controller_detach(device_t);
125static void cxgb_free(struct adapter *);
126static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
127    unsigned int end);
128static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
129static int cxgb_get_regs_len(void);
130static void touch_bars(device_t dev);
131static void cxgb_update_mac_settings(struct port_info *p);
132#ifdef TCP_OFFLOAD
133static int toe_capability(struct port_info *, int);
134#endif
135
136/* Table for probing the cards.  The desc field isn't actually used */
137struct cxgb_ident {
138	uint16_t	vendor;
139	uint16_t	device;
140	int		index;
141	char		*desc;
142} cxgb_identifiers[] = {
143	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
144	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
145	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
146	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
147	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
148	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
149	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
150	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
151	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
152	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
153	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
154	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
155	{PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
156	{PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
157	{0, 0, 0, NULL}
158};
159
160static device_method_t cxgb_controller_methods[] = {
161	DEVMETHOD(device_probe,		cxgb_controller_probe),
162	DEVMETHOD(device_attach,	cxgb_controller_attach),
163	DEVMETHOD(device_detach,	cxgb_controller_detach),
164
165	DEVMETHOD_END
166};
167
168static driver_t cxgb_controller_driver = {
169	"cxgbc",
170	cxgb_controller_methods,
171	sizeof(struct adapter)
172};
173
174static int cxgbc_mod_event(module_t, int, void *);
175static devclass_t	cxgb_controller_devclass;
176DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass,
177    cxgbc_mod_event, 0);
178MODULE_PNP_INFO("U16:vendor;U16:device", pci, cxgbc, cxgb_identifiers,
179    nitems(cxgb_identifiers) - 1);
180MODULE_VERSION(cxgbc, 1);
181MODULE_DEPEND(cxgbc, firmware, 1, 1, 1);
182
183/*
184 * Attachment glue for the ports.  Attachment is done directly to the
185 * controller device.
186 */
187static int cxgb_port_probe(device_t);
188static int cxgb_port_attach(device_t);
189static int cxgb_port_detach(device_t);
190
191static device_method_t cxgb_port_methods[] = {
192	DEVMETHOD(device_probe,		cxgb_port_probe),
193	DEVMETHOD(device_attach,	cxgb_port_attach),
194	DEVMETHOD(device_detach,	cxgb_port_detach),
195	{ 0, 0 }
196};
197
198static driver_t cxgb_port_driver = {
199	"cxgb",
200	cxgb_port_methods,
201	0
202};
203
204static d_ioctl_t cxgb_extension_ioctl;
205static d_open_t cxgb_extension_open;
206static d_close_t cxgb_extension_close;
207
208static struct cdevsw cxgb_cdevsw = {
209       .d_version =    D_VERSION,
210       .d_flags =      0,
211       .d_open =       cxgb_extension_open,
212       .d_close =      cxgb_extension_close,
213       .d_ioctl =      cxgb_extension_ioctl,
214       .d_name =       "cxgb",
215};
216
217static devclass_t	cxgb_port_devclass;
218DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
219MODULE_VERSION(cxgb, 1);
220
221NETDUMP_DEFINE(cxgb);
222
223static struct mtx t3_list_lock;
224static SLIST_HEAD(, adapter) t3_list;
225#ifdef TCP_OFFLOAD
226static struct mtx t3_uld_list_lock;
227static SLIST_HEAD(, uld_info) t3_uld_list;
228#endif
229
230/*
231 * The driver uses the best interrupt scheme available on a platform in the
232 * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
233 * of these schemes the driver may consider as follows:
234 *
235 * msi = 2: choose from among all three options
236 * msi = 1 : only consider MSI and pin interrupts
237 * msi = 0: force pin interrupts
238 */
239static int msi_allowed = 2;
240
241SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
242SYSCTL_INT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
243    "MSI-X, MSI, INTx selector");
244
245/*
246 * The driver uses an auto-queue algorithm by default.
247 * To disable it and force a single queue-set per port, use multiq = 0
248 */
249static int multiq = 1;
250SYSCTL_INT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
251    "use min(ncpus/ports, 8) queue-sets per port");
252
253/*
254 * By default the driver will not update the firmware unless
255 * it was compiled against a newer version
256 *
257 */
258static int force_fw_update = 0;
259SYSCTL_INT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
260    "update firmware even if up to date");
261
262int cxgb_use_16k_clusters = -1;
263SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
264    &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
265
266static int nfilters = -1;
267SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
268    &nfilters, 0, "max number of entries in the filter table");
269
270enum {
271	MAX_TXQ_ENTRIES      = 16384,
272	MAX_CTRL_TXQ_ENTRIES = 1024,
273	MAX_RSPQ_ENTRIES     = 16384,
274	MAX_RX_BUFFERS       = 16384,
275	MAX_RX_JUMBO_BUFFERS = 16384,
276	MIN_TXQ_ENTRIES      = 4,
277	MIN_CTRL_TXQ_ENTRIES = 4,
278	MIN_RSPQ_ENTRIES     = 32,
279	MIN_FL_ENTRIES       = 32,
280	MIN_FL_JUMBO_ENTRIES = 32
281};
282
283struct filter_info {
284	u32 sip;
285	u32 sip_mask;
286	u32 dip;
287	u16 sport;
288	u16 dport;
289	u32 vlan:12;
290	u32 vlan_prio:3;
291	u32 mac_hit:1;
292	u32 mac_idx:4;
293	u32 mac_vld:1;
294	u32 pkt_type:2;
295	u32 report_filter_id:1;
296	u32 pass:1;
297	u32 rss:1;
298	u32 qset:3;
299	u32 locked:1;
300	u32 valid:1;
301};
302
303enum { FILTER_NO_VLAN_PRI = 7 };
304
305#define EEPROM_MAGIC 0x38E2F10C
306
307#define PORT_MASK ((1 << MAX_NPORTS) - 1)
308
309
310static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
311
312
313static __inline char
314t3rev2char(struct adapter *adapter)
315{
316	char rev = 'z';
317
318	switch(adapter->params.rev) {
319	case T3_REV_A:
320		rev = 'a';
321		break;
322	case T3_REV_B:
323	case T3_REV_B2:
324		rev = 'b';
325		break;
326	case T3_REV_C:
327		rev = 'c';
328		break;
329	}
330	return rev;
331}
332
333static struct cxgb_ident *
334cxgb_get_ident(device_t dev)
335{
336	struct cxgb_ident *id;
337
338	for (id = cxgb_identifiers; id->desc != NULL; id++) {
339		if ((id->vendor == pci_get_vendor(dev)) &&
340		    (id->device == pci_get_device(dev))) {
341			return (id);
342		}
343	}
344	return (NULL);
345}
346
347static const struct adapter_info *
348cxgb_get_adapter_info(device_t dev)
349{
350	struct cxgb_ident *id;
351	const struct adapter_info *ai;
352
353	id = cxgb_get_ident(dev);
354	if (id == NULL)
355		return (NULL);
356
357	ai = t3_get_adapter_info(id->index);
358
359	return (ai);
360}
361
362static int
363cxgb_controller_probe(device_t dev)
364{
365	const struct adapter_info *ai;
366	char *ports, buf[80];
367	int nports;
368
369	ai = cxgb_get_adapter_info(dev);
370	if (ai == NULL)
371		return (ENXIO);
372
373	nports = ai->nports0 + ai->nports1;
374	if (nports == 1)
375		ports = "port";
376	else
377		ports = "ports";
378
379	snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
380	device_set_desc_copy(dev, buf);
381	return (BUS_PROBE_DEFAULT);
382}
383
384#define FW_FNAME "cxgb_t3fw"
385#define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
386#define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
387
388static int
389upgrade_fw(adapter_t *sc)
390{
391	const struct firmware *fw;
392	int status;
393	u32 vers;
394
395	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
396		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
397		return (ENOENT);
398	} else
399		device_printf(sc->dev, "installing firmware on card\n");
400	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
401
402	if (status != 0) {
403		device_printf(sc->dev, "failed to install firmware: %d\n",
404		    status);
405	} else {
406		t3_get_fw_version(sc, &vers);
407		snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
408		    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
409		    G_FW_VERSION_MICRO(vers));
410	}
411
412	firmware_put(fw, FIRMWARE_UNLOAD);
413
414	return (status);
415}
416
417/*
418 * The cxgb_controller_attach function is responsible for the initial
419 * bringup of the device.  Its responsibilities include:
420 *
421 *  1. Determine if the device supports MSI or MSI-X.
422 *  2. Allocate bus resources so that we can access the Base Address Register
423 *  3. Create and initialize mutexes for the controller and its control
424 *     logic such as SGE and MDIO.
425 *  4. Call hardware specific setup routine for the adapter as a whole.
426 *  5. Allocate the BAR for doing MSI-X.
427 *  6. Setup the line interrupt iff MSI-X is not supported.
428 *  7. Create the driver's taskq.
429 *  8. Start one task queue service thread.
430 *  9. Check if the firmware and SRAM are up-to-date.  They will be
431 *     auto-updated later (before FULL_INIT_DONE), if required.
432 * 10. Create a child device for each MAC (port)
433 * 11. Initialize T3 private state.
434 * 12. Trigger the LED
435 * 13. Setup offload iff supported.
436 * 14. Reset/restart the tick callout.
437 * 15. Attach sysctls
438 *
439 * NOTE: Any modification or deviation from this list MUST be reflected in
440 * the above comment.  Failure to do so will result in problems on various
441 * error conditions including link flapping.
442 */
443static int
444cxgb_controller_attach(device_t dev)
445{
446	device_t child;
447	const struct adapter_info *ai;
448	struct adapter *sc;
449	int i, error = 0;
450	uint32_t vers;
451	int port_qsets = 1;
452	int msi_needed, reg;
453	char buf[80];
454
455	sc = device_get_softc(dev);
456	sc->dev = dev;
457	sc->msi_count = 0;
458	ai = cxgb_get_adapter_info(dev);
459
460	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
461	    device_get_unit(dev));
462	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
463
464	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
465	    device_get_unit(dev));
466	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
467	    device_get_unit(dev));
468	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
469	    device_get_unit(dev));
470
471	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
472	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
473	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
474
475	mtx_lock(&t3_list_lock);
476	SLIST_INSERT_HEAD(&t3_list, sc, link);
477	mtx_unlock(&t3_list_lock);
478
479	/* find the PCIe link width and set max read request to 4KB*/
480	if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
481		uint16_t lnk;
482
483		lnk = pci_read_config(dev, reg + PCIER_LINK_STA, 2);
484		sc->link_width = (lnk & PCIEM_LINK_STA_WIDTH) >> 4;
485		if (sc->link_width < 8 &&
486		    (ai->caps & SUPPORTED_10000baseT_Full)) {
487			device_printf(sc->dev,
488			    "PCIe x%d Link, expect reduced performance\n",
489			    sc->link_width);
490		}
491
492		pci_set_max_read_req(dev, 4096);
493	}
494
495	touch_bars(dev);
496	pci_enable_busmaster(dev);
497	/*
498	 * Allocate the registers and make them available to the driver.
499	 * The registers that we care about for NIC mode are in BAR 0
500	 */
501	sc->regs_rid = PCIR_BAR(0);
502	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
503	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
504		device_printf(dev, "Cannot allocate BAR region 0\n");
505		error = ENXIO;
506		goto out;
507	}
508
509	sc->bt = rman_get_bustag(sc->regs_res);
510	sc->bh = rman_get_bushandle(sc->regs_res);
511	sc->mmio_len = rman_get_size(sc->regs_res);
512
513	for (i = 0; i < MAX_NPORTS; i++)
514		sc->port[i].adapter = sc;
515
516	if (t3_prep_adapter(sc, ai, 1) < 0) {
517		printf("prep adapter failed\n");
518		error = ENODEV;
519		goto out;
520	}
521
522	sc->udbs_rid = PCIR_BAR(2);
523	sc->udbs_res = NULL;
524	if (is_offload(sc) &&
525	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
526		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
527		device_printf(dev, "Cannot allocate BAR region 1\n");
528		error = ENXIO;
529		goto out;
530	}
531
532        /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
533	 * enough messages for the queue sets.  If that fails, try falling
534	 * back to MSI.  If that fails, then try falling back to the legacy
535	 * interrupt pin model.
536	 */
537	sc->msix_regs_rid = 0x20;
538	if ((msi_allowed >= 2) &&
539	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
540	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
541
542		if (multiq)
543			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
544		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
545
546		if (pci_msix_count(dev) == 0 ||
547		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
548		    sc->msi_count != msi_needed) {
549			device_printf(dev, "alloc msix failed - "
550				      "msi_count=%d, msi_needed=%d, err=%d; "
551				      "will try MSI\n", sc->msi_count,
552				      msi_needed, error);
553			sc->msi_count = 0;
554			port_qsets = 1;
555			pci_release_msi(dev);
556			bus_release_resource(dev, SYS_RES_MEMORY,
557			    sc->msix_regs_rid, sc->msix_regs_res);
558			sc->msix_regs_res = NULL;
559		} else {
560			sc->flags |= USING_MSIX;
561			sc->cxgb_intr = cxgb_async_intr;
562			device_printf(dev,
563				      "using MSI-X interrupts (%u vectors)\n",
564				      sc->msi_count);
565		}
566	}
567
568	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
569		sc->msi_count = 1;
570		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
571			device_printf(dev, "alloc msi failed - "
572				      "err=%d; will try INTx\n", error);
573			sc->msi_count = 0;
574			port_qsets = 1;
575			pci_release_msi(dev);
576		} else {
577			sc->flags |= USING_MSI;
578			sc->cxgb_intr = t3_intr_msi;
579			device_printf(dev, "using MSI interrupts\n");
580		}
581	}
582	if (sc->msi_count == 0) {
583		device_printf(dev, "using line interrupts\n");
584		sc->cxgb_intr = t3b_intr;
585	}
586
587	/* Create a private taskqueue thread for handling driver events */
588	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
589	    taskqueue_thread_enqueue, &sc->tq);
590	if (sc->tq == NULL) {
591		device_printf(dev, "failed to allocate controller task queue\n");
592		goto out;
593	}
594
595	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
596	    device_get_nameunit(dev));
597	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
598
599
600	/* Create a periodic callout for checking adapter status */
601	callout_init(&sc->cxgb_tick_ch, 1);
602
603	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
604		/*
605		 * Warn user that a firmware update will be attempted in init.
606		 */
607		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
608		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
609		sc->flags &= ~FW_UPTODATE;
610	} else {
611		sc->flags |= FW_UPTODATE;
612	}
613
614	if (t3_check_tpsram_version(sc) < 0) {
615		/*
616		 * Warn user that a firmware update will be attempted in init.
617		 */
618		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
619		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
620		sc->flags &= ~TPS_UPTODATE;
621	} else {
622		sc->flags |= TPS_UPTODATE;
623	}
624
625	/*
626	 * Create a child device for each MAC.  The ethernet attachment
627	 * will be done in these children.
628	 */
629	for (i = 0; i < (sc)->params.nports; i++) {
630		struct port_info *pi;
631
632		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
633			device_printf(dev, "failed to add child port\n");
634			error = EINVAL;
635			goto out;
636		}
637		pi = &sc->port[i];
638		pi->adapter = sc;
639		pi->nqsets = port_qsets;
640		pi->first_qset = i*port_qsets;
641		pi->port_id = i;
642		pi->tx_chan = i >= ai->nports0;
643		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
644		sc->rxpkt_map[pi->txpkt_intf] = i;
645		sc->port[i].tx_chan = i >= ai->nports0;
646		sc->portdev[i] = child;
647		device_set_softc(child, pi);
648	}
649	if ((error = bus_generic_attach(dev)) != 0)
650		goto out;
651
652	/* initialize sge private state */
653	t3_sge_init_adapter(sc);
654
655	t3_led_ready(sc);
656
657	error = t3_get_fw_version(sc, &vers);
658	if (error)
659		goto out;
660
661	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
662	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
663	    G_FW_VERSION_MICRO(vers));
664
665	snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
666		 ai->desc, is_offload(sc) ? "R" : "",
667		 sc->params.vpd.ec, sc->params.vpd.sn);
668	device_set_desc_copy(dev, buf);
669
670	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
671		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
672		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
673
674	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
675	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
676	t3_add_attach_sysctls(sc);
677
678#ifdef TCP_OFFLOAD
679	for (i = 0; i < NUM_CPL_HANDLERS; i++)
680		sc->cpl_handler[i] = cpl_not_handled;
681#endif
682
683	t3_intr_clear(sc);
684	error = cxgb_setup_interrupts(sc);
685out:
686	if (error)
687		cxgb_free(sc);
688
689	return (error);
690}
691
692/*
693 * The cxgb_controller_detach routine is called with the device is
694 * unloaded from the system.
695 */
696
697static int
698cxgb_controller_detach(device_t dev)
699{
700	struct adapter *sc;
701
702	sc = device_get_softc(dev);
703
704	cxgb_free(sc);
705
706	return (0);
707}
708
709/*
710 * The cxgb_free() is called by the cxgb_controller_detach() routine
711 * to tear down the structures that were built up in
712 * cxgb_controller_attach(), and should be the final piece of work
713 * done when fully unloading the driver.
714 *
715 *
716 *  1. Shutting down the threads started by the cxgb_controller_attach()
717 *     routine.
718 *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
719 *  3. Detaching all of the port devices created during the
720 *     cxgb_controller_attach() routine.
721 *  4. Removing the device children created via cxgb_controller_attach().
722 *  5. Releasing PCI resources associated with the device.
723 *  6. Turning off the offload support, iff it was turned on.
724 *  7. Destroying the mutexes created in cxgb_controller_attach().
725 *
726 */
727static void
728cxgb_free(struct adapter *sc)
729{
730	int i, nqsets = 0;
731
732	ADAPTER_LOCK(sc);
733	sc->flags |= CXGB_SHUTDOWN;
734	ADAPTER_UNLOCK(sc);
735
736	/*
737	 * Make sure all child devices are gone.
738	 */
739	bus_generic_detach(sc->dev);
740	for (i = 0; i < (sc)->params.nports; i++) {
741		if (sc->portdev[i] &&
742		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
743			device_printf(sc->dev, "failed to delete child port\n");
744		nqsets += sc->port[i].nqsets;
745	}
746
747	/*
748	 * At this point, it is as if cxgb_port_detach has run on all ports, and
749	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
750	 * all open devices have been closed.
751	 */
752	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
753					   __func__, sc->open_device_map));
754	for (i = 0; i < sc->params.nports; i++) {
755		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
756						  __func__, i));
757	}
758
759	/*
760	 * Finish off the adapter's callouts.
761	 */
762	callout_drain(&sc->cxgb_tick_ch);
763	callout_drain(&sc->sge_timer_ch);
764
765	/*
766	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
767	 * sysctls are cleaned up by the kernel linker.
768	 */
769	if (sc->flags & FULL_INIT_DONE) {
770 		t3_free_sge_resources(sc, nqsets);
771 		sc->flags &= ~FULL_INIT_DONE;
772 	}
773
774	/*
775	 * Release all interrupt resources.
776	 */
777	cxgb_teardown_interrupts(sc);
778	if (sc->flags & (USING_MSI | USING_MSIX)) {
779		device_printf(sc->dev, "releasing msi message(s)\n");
780		pci_release_msi(sc->dev);
781	} else {
782		device_printf(sc->dev, "no msi message to release\n");
783	}
784
785	if (sc->msix_regs_res != NULL) {
786		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
787		    sc->msix_regs_res);
788	}
789
790	/*
791	 * Free the adapter's taskqueue.
792	 */
793	if (sc->tq != NULL) {
794		taskqueue_free(sc->tq);
795		sc->tq = NULL;
796	}
797
798	free(sc->filters, M_DEVBUF);
799	t3_sge_free(sc);
800
801	if (sc->udbs_res != NULL)
802		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
803		    sc->udbs_res);
804
805	if (sc->regs_res != NULL)
806		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
807		    sc->regs_res);
808
809	MTX_DESTROY(&sc->mdio_lock);
810	MTX_DESTROY(&sc->sge.reg_lock);
811	MTX_DESTROY(&sc->elmer_lock);
812	mtx_lock(&t3_list_lock);
813	SLIST_REMOVE(&t3_list, sc, adapter, link);
814	mtx_unlock(&t3_list_lock);
815	ADAPTER_LOCK_DEINIT(sc);
816}
817
818/**
819 *	setup_sge_qsets - configure SGE Tx/Rx/response queues
820 *	@sc: the controller softc
821 *
822 *	Determines how many sets of SGE queues to use and initializes them.
823 *	We support multiple queue sets per port if we have MSI-X, otherwise
824 *	just one queue set per port.
825 */
826static int
827setup_sge_qsets(adapter_t *sc)
828{
829	int i, j, err, irq_idx = 0, qset_idx = 0;
830	u_int ntxq = SGE_TXQ_PER_SET;
831
832	if ((err = t3_sge_alloc(sc)) != 0) {
833		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
834		return (err);
835	}
836
837	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
838		irq_idx = -1;
839
840	for (i = 0; i < (sc)->params.nports; i++) {
841		struct port_info *pi = &sc->port[i];
842
843		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
844			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
845			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
846			    &sc->params.sge.qset[qset_idx], ntxq, pi);
847			if (err) {
848				t3_free_sge_resources(sc, qset_idx);
849				device_printf(sc->dev,
850				    "t3_sge_alloc_qset failed with %d\n", err);
851				return (err);
852			}
853		}
854	}
855
856	return (0);
857}
858
859static void
860cxgb_teardown_interrupts(adapter_t *sc)
861{
862	int i;
863
864	for (i = 0; i < SGE_QSETS; i++) {
865		if (sc->msix_intr_tag[i] == NULL) {
866
867			/* Should have been setup fully or not at all */
868			KASSERT(sc->msix_irq_res[i] == NULL &&
869				sc->msix_irq_rid[i] == 0,
870				("%s: half-done interrupt (%d).", __func__, i));
871
872			continue;
873		}
874
875		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
876				  sc->msix_intr_tag[i]);
877		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
878				     sc->msix_irq_res[i]);
879
880		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
881		sc->msix_irq_rid[i] = 0;
882	}
883
884	if (sc->intr_tag) {
885		KASSERT(sc->irq_res != NULL,
886			("%s: half-done interrupt.", __func__));
887
888		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
889		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
890				     sc->irq_res);
891
892		sc->irq_res = sc->intr_tag = NULL;
893		sc->irq_rid = 0;
894	}
895}
896
897static int
898cxgb_setup_interrupts(adapter_t *sc)
899{
900	struct resource *res;
901	void *tag;
902	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
903
904	sc->irq_rid = intr_flag ? 1 : 0;
905	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
906					     RF_SHAREABLE | RF_ACTIVE);
907	if (sc->irq_res == NULL) {
908		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
909			      intr_flag, sc->irq_rid);
910		err = EINVAL;
911		sc->irq_rid = 0;
912	} else {
913		err = bus_setup_intr(sc->dev, sc->irq_res,
914		    INTR_MPSAFE | INTR_TYPE_NET, NULL,
915		    sc->cxgb_intr, sc, &sc->intr_tag);
916
917		if (err) {
918			device_printf(sc->dev,
919				      "Cannot set up interrupt (%x, %u, %d)\n",
920				      intr_flag, sc->irq_rid, err);
921			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
922					     sc->irq_res);
923			sc->irq_res = sc->intr_tag = NULL;
924			sc->irq_rid = 0;
925		}
926	}
927
928	/* That's all for INTx or MSI */
929	if (!(intr_flag & USING_MSIX) || err)
930		return (err);
931
932	bus_describe_intr(sc->dev, sc->irq_res, sc->intr_tag, "err");
933	for (i = 0; i < sc->msi_count - 1; i++) {
934		rid = i + 2;
935		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
936					     RF_SHAREABLE | RF_ACTIVE);
937		if (res == NULL) {
938			device_printf(sc->dev, "Cannot allocate interrupt "
939				      "for message %d\n", rid);
940			err = EINVAL;
941			break;
942		}
943
944		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
945				     NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
946		if (err) {
947			device_printf(sc->dev, "Cannot set up interrupt "
948				      "for message %d (%d)\n", rid, err);
949			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
950			break;
951		}
952
953		sc->msix_irq_rid[i] = rid;
954		sc->msix_irq_res[i] = res;
955		sc->msix_intr_tag[i] = tag;
956		bus_describe_intr(sc->dev, res, tag, "qs%d", i);
957	}
958
959	if (err)
960		cxgb_teardown_interrupts(sc);
961
962	return (err);
963}
964
965
966static int
967cxgb_port_probe(device_t dev)
968{
969	struct port_info *p;
970	char buf[80];
971	const char *desc;
972
973	p = device_get_softc(dev);
974	desc = p->phy.desc;
975	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
976	device_set_desc_copy(dev, buf);
977	return (0);
978}
979
980
981static int
982cxgb_makedev(struct port_info *pi)
983{
984
985	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
986	    UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
987
988	if (pi->port_cdev == NULL)
989		return (ENOMEM);
990
991	pi->port_cdev->si_drv1 = (void *)pi;
992
993	return (0);
994}
995
996#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
997    IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
998    IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6)
999#define CXGB_CAP_ENABLE CXGB_CAP
1000
1001static int
1002cxgb_port_attach(device_t dev)
1003{
1004	struct port_info *p;
1005	struct ifnet *ifp;
1006	int err;
1007	struct adapter *sc;
1008
1009	p = device_get_softc(dev);
1010	sc = p->adapter;
1011	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1012	    device_get_unit(device_get_parent(dev)), p->port_id);
1013	PORT_LOCK_INIT(p, p->lockbuf);
1014
1015	callout_init(&p->link_check_ch, 1);
1016	TASK_INIT(&p->link_check_task, 0, check_link_status, p);
1017
1018	/* Allocate an ifnet object and set it up */
1019	ifp = p->ifp = if_alloc(IFT_ETHER);
1020	if (ifp == NULL) {
1021		device_printf(dev, "Cannot allocate ifnet\n");
1022		return (ENOMEM);
1023	}
1024
1025	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1026	ifp->if_init = cxgb_init;
1027	ifp->if_softc = p;
1028	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1029	ifp->if_ioctl = cxgb_ioctl;
1030	ifp->if_transmit = cxgb_transmit;
1031	ifp->if_qflush = cxgb_qflush;
1032	ifp->if_get_counter = cxgb_get_counter;
1033
1034	ifp->if_capabilities = CXGB_CAP;
1035#ifdef TCP_OFFLOAD
1036	if (is_offload(sc))
1037		ifp->if_capabilities |= IFCAP_TOE4;
1038#endif
1039	ifp->if_capenable = CXGB_CAP_ENABLE;
1040	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
1041	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
1042
1043	/*
1044	 * Disable TSO on 4-port - it isn't supported by the firmware.
1045	 */
1046	if (sc->params.nports > 2) {
1047		ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1048		ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1049		ifp->if_hwassist &= ~CSUM_TSO;
1050	}
1051
1052	ether_ifattach(ifp, p->hw_addr);
1053
1054	/* Attach driver netdump methods. */
1055	NETDUMP_SET(ifp, cxgb);
1056
1057#ifdef DEFAULT_JUMBO
1058	if (sc->params.nports <= 2)
1059		ifp->if_mtu = ETHERMTU_JUMBO;
1060#endif
1061	if ((err = cxgb_makedev(p)) != 0) {
1062		printf("makedev failed %d\n", err);
1063		return (err);
1064	}
1065
1066	/* Create a list of media supported by this port */
1067	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1068	    cxgb_media_status);
1069	cxgb_build_medialist(p);
1070
1071	t3_sge_init_port(p);
1072
1073	return (err);
1074}
1075
1076/*
1077 * cxgb_port_detach() is called via the device_detach methods when
1078 * cxgb_free() calls the bus_generic_detach.  It is responsible for
1079 * removing the device from the view of the kernel, i.e. from all
1080 * interfaces lists etc.  This routine is only called when the driver is
1081 * being unloaded, not when the link goes down.
1082 */
1083static int
1084cxgb_port_detach(device_t dev)
1085{
1086	struct port_info *p;
1087	struct adapter *sc;
1088	int i;
1089
1090	p = device_get_softc(dev);
1091	sc = p->adapter;
1092
1093	/* Tell cxgb_ioctl and if_init that the port is going away */
1094	ADAPTER_LOCK(sc);
1095	SET_DOOMED(p);
1096	wakeup(&sc->flags);
1097	while (IS_BUSY(sc))
1098		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1099	SET_BUSY(sc);
1100	ADAPTER_UNLOCK(sc);
1101
1102	if (p->port_cdev != NULL)
1103		destroy_dev(p->port_cdev);
1104
1105	cxgb_uninit_synchronized(p);
1106	ether_ifdetach(p->ifp);
1107
1108	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1109		struct sge_qset *qs = &sc->sge.qs[i];
1110		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1111
1112		callout_drain(&txq->txq_watchdog);
1113		callout_drain(&txq->txq_timer);
1114	}
1115
1116	PORT_LOCK_DEINIT(p);
1117	if_free(p->ifp);
1118	p->ifp = NULL;
1119
1120	ADAPTER_LOCK(sc);
1121	CLR_BUSY(sc);
1122	wakeup_one(&sc->flags);
1123	ADAPTER_UNLOCK(sc);
1124	return (0);
1125}
1126
1127void
1128t3_fatal_err(struct adapter *sc)
1129{
1130	u_int fw_status[4];
1131
1132	if (sc->flags & FULL_INIT_DONE) {
1133		t3_sge_stop(sc);
1134		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1135		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1136		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1137		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1138		t3_intr_disable(sc);
1139	}
1140	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1141	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1142		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1143		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1144}
1145
1146int
1147t3_os_find_pci_capability(adapter_t *sc, int cap)
1148{
1149	device_t dev;
1150	struct pci_devinfo *dinfo;
1151	pcicfgregs *cfg;
1152	uint32_t status;
1153	uint8_t ptr;
1154
1155	dev = sc->dev;
1156	dinfo = device_get_ivars(dev);
1157	cfg = &dinfo->cfg;
1158
1159	status = pci_read_config(dev, PCIR_STATUS, 2);
1160	if (!(status & PCIM_STATUS_CAPPRESENT))
1161		return (0);
1162
1163	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1164	case 0:
1165	case 1:
1166		ptr = PCIR_CAP_PTR;
1167		break;
1168	case 2:
1169		ptr = PCIR_CAP_PTR_2;
1170		break;
1171	default:
1172		return (0);
1173		break;
1174	}
1175	ptr = pci_read_config(dev, ptr, 1);
1176
1177	while (ptr != 0) {
1178		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1179			return (ptr);
1180		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1181	}
1182
1183	return (0);
1184}
1185
1186int
1187t3_os_pci_save_state(struct adapter *sc)
1188{
1189	device_t dev;
1190	struct pci_devinfo *dinfo;
1191
1192	dev = sc->dev;
1193	dinfo = device_get_ivars(dev);
1194
1195	pci_cfg_save(dev, dinfo, 0);
1196	return (0);
1197}
1198
1199int
1200t3_os_pci_restore_state(struct adapter *sc)
1201{
1202	device_t dev;
1203	struct pci_devinfo *dinfo;
1204
1205	dev = sc->dev;
1206	dinfo = device_get_ivars(dev);
1207
1208	pci_cfg_restore(dev, dinfo);
1209	return (0);
1210}
1211
1212/**
1213 *	t3_os_link_changed - handle link status changes
1214 *	@sc: the adapter associated with the link change
1215 *	@port_id: the port index whose link status has changed
1216 *	@link_status: the new status of the link
1217 *	@speed: the new speed setting
1218 *	@duplex: the new duplex setting
1219 *	@fc: the new flow-control setting
1220 *
1221 *	This is the OS-dependent handler for link status changes.  The OS
1222 *	neutral handler takes care of most of the processing for these events,
1223 *	then calls this handler for any OS-specific processing.
1224 */
1225void
1226t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1227     int duplex, int fc, int mac_was_reset)
1228{
1229	struct port_info *pi = &adapter->port[port_id];
1230	struct ifnet *ifp = pi->ifp;
1231
1232	/* no race with detach, so ifp should always be good */
1233	KASSERT(ifp, ("%s: if detached.", __func__));
1234
1235	/* Reapply mac settings if they were lost due to a reset */
1236	if (mac_was_reset) {
1237		PORT_LOCK(pi);
1238		cxgb_update_mac_settings(pi);
1239		PORT_UNLOCK(pi);
1240	}
1241
1242	if (link_status) {
1243		ifp->if_baudrate = IF_Mbps(speed);
1244		if_link_state_change(ifp, LINK_STATE_UP);
1245	} else
1246		if_link_state_change(ifp, LINK_STATE_DOWN);
1247}
1248
1249/**
1250 *	t3_os_phymod_changed - handle PHY module changes
1251 *	@phy: the PHY reporting the module change
1252 *	@mod_type: new module type
1253 *
1254 *	This is the OS-dependent handler for PHY module changes.  It is
1255 *	invoked when a PHY module is removed or inserted for any OS-specific
1256 *	processing.
1257 */
1258void t3_os_phymod_changed(struct adapter *adap, int port_id)
1259{
1260	static const char *mod_str[] = {
1261		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1262	};
1263	struct port_info *pi = &adap->port[port_id];
1264	int mod = pi->phy.modtype;
1265
1266	if (mod != pi->media.ifm_cur->ifm_data)
1267		cxgb_build_medialist(pi);
1268
1269	if (mod == phy_modtype_none)
1270		if_printf(pi->ifp, "PHY module unplugged\n");
1271	else {
1272		KASSERT(mod < ARRAY_SIZE(mod_str),
1273			("invalid PHY module type %d", mod));
1274		if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1275	}
1276}
1277
1278void
1279t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1280{
1281
1282	/*
1283	 * The ifnet might not be allocated before this gets called,
1284	 * as this is called early on in attach by t3_prep_adapter
1285	 * save the address off in the port structure
1286	 */
1287	if (cxgb_debug)
1288		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1289	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1290}
1291
1292/*
1293 * Programs the XGMAC based on the settings in the ifnet.  These settings
1294 * include MTU, MAC address, mcast addresses, etc.
1295 */
1296static void
1297cxgb_update_mac_settings(struct port_info *p)
1298{
1299	struct ifnet *ifp = p->ifp;
1300	struct t3_rx_mode rm;
1301	struct cmac *mac = &p->mac;
1302	int mtu, hwtagging;
1303
1304	PORT_LOCK_ASSERT_OWNED(p);
1305
1306	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1307
1308	mtu = ifp->if_mtu;
1309	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1310		mtu += ETHER_VLAN_ENCAP_LEN;
1311
1312	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1313
1314	t3_mac_set_mtu(mac, mtu);
1315	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1316	t3_mac_set_address(mac, 0, p->hw_addr);
1317	t3_init_rx_mode(&rm, p);
1318	t3_mac_set_rx_mode(mac, &rm);
1319}
1320
1321
1322static int
1323await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1324			      unsigned long n)
1325{
1326	int attempts = 5;
1327
1328	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1329		if (!--attempts)
1330			return (ETIMEDOUT);
1331		t3_os_sleep(10);
1332	}
1333	return 0;
1334}
1335
1336static int
1337init_tp_parity(struct adapter *adap)
1338{
1339	int i;
1340	struct mbuf *m;
1341	struct cpl_set_tcb_field *greq;
1342	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1343
1344	t3_tp_set_offload_mode(adap, 1);
1345
1346	for (i = 0; i < 16; i++) {
1347		struct cpl_smt_write_req *req;
1348
1349		m = m_gethdr(M_WAITOK, MT_DATA);
1350		req = mtod(m, struct cpl_smt_write_req *);
1351		m->m_len = m->m_pkthdr.len = sizeof(*req);
1352		memset(req, 0, sizeof(*req));
1353		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1354		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1355		req->iff = i;
1356		t3_mgmt_tx(adap, m);
1357	}
1358
1359	for (i = 0; i < 2048; i++) {
1360		struct cpl_l2t_write_req *req;
1361
1362		m = m_gethdr(M_WAITOK, MT_DATA);
1363		req = mtod(m, struct cpl_l2t_write_req *);
1364		m->m_len = m->m_pkthdr.len = sizeof(*req);
1365		memset(req, 0, sizeof(*req));
1366		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1367		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1368		req->params = htonl(V_L2T_W_IDX(i));
1369		t3_mgmt_tx(adap, m);
1370	}
1371
1372	for (i = 0; i < 2048; i++) {
1373		struct cpl_rte_write_req *req;
1374
1375		m = m_gethdr(M_WAITOK, MT_DATA);
1376		req = mtod(m, struct cpl_rte_write_req *);
1377		m->m_len = m->m_pkthdr.len = sizeof(*req);
1378		memset(req, 0, sizeof(*req));
1379		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1380		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1381		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1382		t3_mgmt_tx(adap, m);
1383	}
1384
1385	m = m_gethdr(M_WAITOK, MT_DATA);
1386	greq = mtod(m, struct cpl_set_tcb_field *);
1387	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1388	memset(greq, 0, sizeof(*greq));
1389	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1390	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1391	greq->mask = htobe64(1);
1392	t3_mgmt_tx(adap, m);
1393
1394	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1395	t3_tp_set_offload_mode(adap, 0);
1396	return (i);
1397}
1398
1399/**
1400 *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1401 *	@adap: the adapter
1402 *
1403 *	Sets up RSS to distribute packets to multiple receive queues.  We
1404 *	configure the RSS CPU lookup table to distribute to the number of HW
1405 *	receive queues, and the response queue lookup table to narrow that
1406 *	down to the response queues actually configured for each port.
1407 *	We always configure the RSS mapping for two ports since the mapping
1408 *	table has plenty of entries.
1409 */
1410static void
1411setup_rss(adapter_t *adap)
1412{
1413	int i;
1414	u_int nq[2];
1415	uint8_t cpus[SGE_QSETS + 1];
1416	uint16_t rspq_map[RSS_TABLE_SIZE];
1417
1418	for (i = 0; i < SGE_QSETS; ++i)
1419		cpus[i] = i;
1420	cpus[SGE_QSETS] = 0xff;
1421
1422	nq[0] = nq[1] = 0;
1423	for_each_port(adap, i) {
1424		const struct port_info *pi = adap2pinfo(adap, i);
1425
1426		nq[pi->tx_chan] += pi->nqsets;
1427	}
1428	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1429		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1430		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1431	}
1432
1433	/* Calculate the reverse RSS map table */
1434	for (i = 0; i < SGE_QSETS; ++i)
1435		adap->rrss_map[i] = 0xff;
1436	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1437		if (adap->rrss_map[rspq_map[i]] == 0xff)
1438			adap->rrss_map[rspq_map[i]] = i;
1439
1440	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1441		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1442	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1443	              cpus, rspq_map);
1444
1445}
1446static void
1447send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1448			      int hi, int port)
1449{
1450	struct mbuf *m;
1451	struct mngt_pktsched_wr *req;
1452
1453	m = m_gethdr(M_NOWAIT, MT_DATA);
1454	if (m) {
1455		req = mtod(m, struct mngt_pktsched_wr *);
1456		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1457		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1458		req->sched = sched;
1459		req->idx = qidx;
1460		req->min = lo;
1461		req->max = hi;
1462		req->binding = port;
1463		m->m_len = m->m_pkthdr.len = sizeof(*req);
1464		t3_mgmt_tx(adap, m);
1465	}
1466}
1467
1468static void
1469bind_qsets(adapter_t *sc)
1470{
1471	int i, j;
1472
1473	for (i = 0; i < (sc)->params.nports; ++i) {
1474		const struct port_info *pi = adap2pinfo(sc, i);
1475
1476		for (j = 0; j < pi->nqsets; ++j) {
1477			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1478					  -1, pi->tx_chan);
1479
1480		}
1481	}
1482}
1483
1484static void
1485update_tpeeprom(struct adapter *adap)
1486{
1487	const struct firmware *tpeeprom;
1488
1489	uint32_t version;
1490	unsigned int major, minor;
1491	int ret, len;
1492	char rev, name[32];
1493
1494	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1495
1496	major = G_TP_VERSION_MAJOR(version);
1497	minor = G_TP_VERSION_MINOR(version);
1498	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1499		return;
1500
1501	rev = t3rev2char(adap);
1502	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1503
1504	tpeeprom = firmware_get(name);
1505	if (tpeeprom == NULL) {
1506		device_printf(adap->dev,
1507			      "could not load TP EEPROM: unable to load %s\n",
1508			      name);
1509		return;
1510	}
1511
1512	len = tpeeprom->datasize - 4;
1513
1514	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1515	if (ret)
1516		goto release_tpeeprom;
1517
1518	if (len != TP_SRAM_LEN) {
1519		device_printf(adap->dev,
1520			      "%s length is wrong len=%d expected=%d\n", name,
1521			      len, TP_SRAM_LEN);
1522		return;
1523	}
1524
1525	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1526	    TP_SRAM_OFFSET);
1527
1528	if (!ret) {
1529		device_printf(adap->dev,
1530			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1531			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1532	} else
1533		device_printf(adap->dev,
1534			      "Protocol SRAM image update in EEPROM failed\n");
1535
1536release_tpeeprom:
1537	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1538
1539	return;
1540}
1541
1542static int
1543update_tpsram(struct adapter *adap)
1544{
1545	const struct firmware *tpsram;
1546	int ret;
1547	char rev, name[32];
1548
1549	rev = t3rev2char(adap);
1550	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1551
1552	update_tpeeprom(adap);
1553
1554	tpsram = firmware_get(name);
1555	if (tpsram == NULL){
1556		device_printf(adap->dev, "could not load TP SRAM\n");
1557		return (EINVAL);
1558	} else
1559		device_printf(adap->dev, "updating TP SRAM\n");
1560
1561	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1562	if (ret)
1563		goto release_tpsram;
1564
1565	ret = t3_set_proto_sram(adap, tpsram->data);
1566	if (ret)
1567		device_printf(adap->dev, "loading protocol SRAM failed\n");
1568
1569release_tpsram:
1570	firmware_put(tpsram, FIRMWARE_UNLOAD);
1571
1572	return ret;
1573}
1574
1575/**
1576 *	cxgb_up - enable the adapter
1577 *	@adap: adapter being enabled
1578 *
1579 *	Called when the first port is enabled, this function performs the
1580 *	actions necessary to make an adapter operational, such as completing
1581 *	the initialization of HW modules, and enabling interrupts.
1582 */
1583static int
1584cxgb_up(struct adapter *sc)
1585{
1586	int err = 0;
1587	unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1588
1589	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1590					   __func__, sc->open_device_map));
1591
1592	if ((sc->flags & FULL_INIT_DONE) == 0) {
1593
1594		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1595
1596		if ((sc->flags & FW_UPTODATE) == 0)
1597			if ((err = upgrade_fw(sc)))
1598				goto out;
1599
1600		if ((sc->flags & TPS_UPTODATE) == 0)
1601			if ((err = update_tpsram(sc)))
1602				goto out;
1603
1604		if (is_offload(sc) && nfilters != 0) {
1605			sc->params.mc5.nservers = 0;
1606
1607			if (nfilters < 0)
1608				sc->params.mc5.nfilters = mxf;
1609			else
1610				sc->params.mc5.nfilters = min(nfilters, mxf);
1611		}
1612
1613		err = t3_init_hw(sc, 0);
1614		if (err)
1615			goto out;
1616
1617		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1618		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1619
1620		err = setup_sge_qsets(sc);
1621		if (err)
1622			goto out;
1623
1624		alloc_filters(sc);
1625		setup_rss(sc);
1626
1627		t3_add_configured_sysctls(sc);
1628		sc->flags |= FULL_INIT_DONE;
1629	}
1630
1631	t3_intr_clear(sc);
1632	t3_sge_start(sc);
1633	t3_intr_enable(sc);
1634
1635	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1636	    is_offload(sc) && init_tp_parity(sc) == 0)
1637		sc->flags |= TP_PARITY_INIT;
1638
1639	if (sc->flags & TP_PARITY_INIT) {
1640		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1641		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1642	}
1643
1644	if (!(sc->flags & QUEUES_BOUND)) {
1645		bind_qsets(sc);
1646		setup_hw_filters(sc);
1647		sc->flags |= QUEUES_BOUND;
1648	}
1649
1650	t3_sge_reset_adapter(sc);
1651out:
1652	return (err);
1653}
1654
1655/*
1656 * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1657 * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1658 * during controller_detach, not here.
1659 */
1660static void
1661cxgb_down(struct adapter *sc)
1662{
1663	t3_sge_stop(sc);
1664	t3_intr_disable(sc);
1665}
1666
1667/*
1668 * if_init for cxgb ports.
1669 */
1670static void
1671cxgb_init(void *arg)
1672{
1673	struct port_info *p = arg;
1674	struct adapter *sc = p->adapter;
1675
1676	ADAPTER_LOCK(sc);
1677	cxgb_init_locked(p); /* releases adapter lock */
1678	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1679}
1680
1681static int
1682cxgb_init_locked(struct port_info *p)
1683{
1684	struct adapter *sc = p->adapter;
1685	struct ifnet *ifp = p->ifp;
1686	struct cmac *mac = &p->mac;
1687	int i, rc = 0, may_sleep = 0, gave_up_lock = 0;
1688
1689	ADAPTER_LOCK_ASSERT_OWNED(sc);
1690
1691	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1692		gave_up_lock = 1;
1693		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1694			rc = EINTR;
1695			goto done;
1696		}
1697	}
1698	if (IS_DOOMED(p)) {
1699		rc = ENXIO;
1700		goto done;
1701	}
1702	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1703
1704	/*
1705	 * The code that runs during one-time adapter initialization can sleep
1706	 * so it's important not to hold any locks across it.
1707	 */
1708	may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1709
1710	if (may_sleep) {
1711		SET_BUSY(sc);
1712		gave_up_lock = 1;
1713		ADAPTER_UNLOCK(sc);
1714	}
1715
1716	if (sc->open_device_map == 0 && ((rc = cxgb_up(sc)) != 0))
1717			goto done;
1718
1719	PORT_LOCK(p);
1720	if (isset(&sc->open_device_map, p->port_id) &&
1721	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1722		PORT_UNLOCK(p);
1723		goto done;
1724	}
1725	t3_port_intr_enable(sc, p->port_id);
1726	if (!mac->multiport)
1727		t3_mac_init(mac);
1728	cxgb_update_mac_settings(p);
1729	t3_link_start(&p->phy, mac, &p->link_config);
1730	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1731	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1732	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1733	PORT_UNLOCK(p);
1734
1735	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1736		struct sge_qset *qs = &sc->sge.qs[i];
1737		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1738
1739		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1740				 txq->txq_watchdog.c_cpu);
1741	}
1742
1743	/* all ok */
1744	setbit(&sc->open_device_map, p->port_id);
1745	callout_reset(&p->link_check_ch,
1746	    p->phy.caps & SUPPORTED_LINK_IRQ ?  hz * 3 : hz / 4,
1747	    link_check_callout, p);
1748
1749done:
1750	if (may_sleep) {
1751		ADAPTER_LOCK(sc);
1752		KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1753		CLR_BUSY(sc);
1754	}
1755	if (gave_up_lock)
1756		wakeup_one(&sc->flags);
1757	ADAPTER_UNLOCK(sc);
1758	return (rc);
1759}
1760
1761static int
1762cxgb_uninit_locked(struct port_info *p)
1763{
1764	struct adapter *sc = p->adapter;
1765	int rc;
1766
1767	ADAPTER_LOCK_ASSERT_OWNED(sc);
1768
1769	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1770		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1771			rc = EINTR;
1772			goto done;
1773		}
1774	}
1775	if (IS_DOOMED(p)) {
1776		rc = ENXIO;
1777		goto done;
1778	}
1779	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1780	SET_BUSY(sc);
1781	ADAPTER_UNLOCK(sc);
1782
1783	rc = cxgb_uninit_synchronized(p);
1784
1785	ADAPTER_LOCK(sc);
1786	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1787	CLR_BUSY(sc);
1788	wakeup_one(&sc->flags);
1789done:
1790	ADAPTER_UNLOCK(sc);
1791	return (rc);
1792}
1793
1794/*
1795 * Called on "ifconfig down", and from port_detach
1796 */
1797static int
1798cxgb_uninit_synchronized(struct port_info *pi)
1799{
1800	struct adapter *sc = pi->adapter;
1801	struct ifnet *ifp = pi->ifp;
1802
1803	/*
1804	 * taskqueue_drain may cause a deadlock if the adapter lock is held.
1805	 */
1806	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1807
1808	/*
1809	 * Clear this port's bit from the open device map, and then drain all
1810	 * the tasks that can access/manipulate this port's port_info or ifp.
1811	 * We disable this port's interrupts here and so the slow/ext
1812	 * interrupt tasks won't be enqueued.  The tick task will continue to
1813	 * be enqueued every second but the runs after this drain will not see
1814	 * this port in the open device map.
1815	 *
1816	 * A well behaved task must take open_device_map into account and ignore
1817	 * ports that are not open.
1818	 */
1819	clrbit(&sc->open_device_map, pi->port_id);
1820	t3_port_intr_disable(sc, pi->port_id);
1821	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1822	taskqueue_drain(sc->tq, &sc->tick_task);
1823
1824	callout_drain(&pi->link_check_ch);
1825	taskqueue_drain(sc->tq, &pi->link_check_task);
1826
1827	PORT_LOCK(pi);
1828	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1829
1830	/* disable pause frames */
1831	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1832
1833	/* Reset RX FIFO HWM */
1834	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1835			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1836
1837	DELAY(100 * 1000);
1838
1839	/* Wait for TXFIFO empty */
1840	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1841			F_TXFIFO_EMPTY, 1, 20, 5);
1842
1843	DELAY(100 * 1000);
1844	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1845
1846	pi->phy.ops->power_down(&pi->phy, 1);
1847
1848	PORT_UNLOCK(pi);
1849
1850	pi->link_config.link_ok = 0;
1851	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1852
1853	if (sc->open_device_map == 0)
1854		cxgb_down(pi->adapter);
1855
1856	return (0);
1857}
1858
1859/*
1860 * Mark lro enabled or disabled in all qsets for this port
1861 */
1862static int
1863cxgb_set_lro(struct port_info *p, int enabled)
1864{
1865	int i;
1866	struct adapter *adp = p->adapter;
1867	struct sge_qset *q;
1868
1869	for (i = 0; i < p->nqsets; i++) {
1870		q = &adp->sge.qs[p->first_qset + i];
1871		q->lro.enabled = (enabled != 0);
1872	}
1873	return (0);
1874}
1875
1876static int
1877cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1878{
1879	struct port_info *p = ifp->if_softc;
1880	struct adapter *sc = p->adapter;
1881	struct ifreq *ifr = (struct ifreq *)data;
1882	int flags, error = 0, mtu;
1883	uint32_t mask;
1884
1885	switch (command) {
1886	case SIOCSIFMTU:
1887		ADAPTER_LOCK(sc);
1888		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1889		if (error) {
1890fail:
1891			ADAPTER_UNLOCK(sc);
1892			return (error);
1893		}
1894
1895		mtu = ifr->ifr_mtu;
1896		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
1897			error = EINVAL;
1898		} else {
1899			ifp->if_mtu = mtu;
1900			PORT_LOCK(p);
1901			cxgb_update_mac_settings(p);
1902			PORT_UNLOCK(p);
1903		}
1904		ADAPTER_UNLOCK(sc);
1905		break;
1906	case SIOCSIFFLAGS:
1907		ADAPTER_LOCK(sc);
1908		if (IS_DOOMED(p)) {
1909			error = ENXIO;
1910			goto fail;
1911		}
1912		if (ifp->if_flags & IFF_UP) {
1913			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1914				flags = p->if_flags;
1915				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1916				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
1917					if (IS_BUSY(sc)) {
1918						error = EBUSY;
1919						goto fail;
1920					}
1921					PORT_LOCK(p);
1922					cxgb_update_mac_settings(p);
1923					PORT_UNLOCK(p);
1924				}
1925				ADAPTER_UNLOCK(sc);
1926			} else
1927				error = cxgb_init_locked(p);
1928			p->if_flags = ifp->if_flags;
1929		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1930			error = cxgb_uninit_locked(p);
1931		else
1932			ADAPTER_UNLOCK(sc);
1933
1934		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1935		break;
1936	case SIOCADDMULTI:
1937	case SIOCDELMULTI:
1938		ADAPTER_LOCK(sc);
1939		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1940		if (error)
1941			goto fail;
1942
1943		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1944			PORT_LOCK(p);
1945			cxgb_update_mac_settings(p);
1946			PORT_UNLOCK(p);
1947		}
1948		ADAPTER_UNLOCK(sc);
1949
1950		break;
1951	case SIOCSIFCAP:
1952		ADAPTER_LOCK(sc);
1953		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1954		if (error)
1955			goto fail;
1956
1957		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1958		if (mask & IFCAP_TXCSUM) {
1959			ifp->if_capenable ^= IFCAP_TXCSUM;
1960			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
1961
1962			if (IFCAP_TSO4 & ifp->if_capenable &&
1963			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1964				mask &= ~IFCAP_TSO4;
1965				ifp->if_capenable &= ~IFCAP_TSO4;
1966				if_printf(ifp,
1967				    "tso4 disabled due to -txcsum.\n");
1968			}
1969		}
1970		if (mask & IFCAP_TXCSUM_IPV6) {
1971			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1972			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
1973
1974			if (IFCAP_TSO6 & ifp->if_capenable &&
1975			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1976				mask &= ~IFCAP_TSO6;
1977				ifp->if_capenable &= ~IFCAP_TSO6;
1978				if_printf(ifp,
1979				    "tso6 disabled due to -txcsum6.\n");
1980			}
1981		}
1982		if (mask & IFCAP_RXCSUM)
1983			ifp->if_capenable ^= IFCAP_RXCSUM;
1984		if (mask & IFCAP_RXCSUM_IPV6)
1985			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1986
1987		/*
1988		 * Note that we leave CSUM_TSO alone (it is always set).  The
1989		 * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
1990		 * sending a TSO request our way, so it's sufficient to toggle
1991		 * IFCAP_TSOx only.
1992		 */
1993		if (mask & IFCAP_TSO4) {
1994			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
1995			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1996				if_printf(ifp, "enable txcsum first.\n");
1997				error = EAGAIN;
1998				goto fail;
1999			}
2000			ifp->if_capenable ^= IFCAP_TSO4;
2001		}
2002		if (mask & IFCAP_TSO6) {
2003			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
2004			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2005				if_printf(ifp, "enable txcsum6 first.\n");
2006				error = EAGAIN;
2007				goto fail;
2008			}
2009			ifp->if_capenable ^= IFCAP_TSO6;
2010		}
2011		if (mask & IFCAP_LRO) {
2012			ifp->if_capenable ^= IFCAP_LRO;
2013
2014			/* Safe to do this even if cxgb_up not called yet */
2015			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2016		}
2017#ifdef TCP_OFFLOAD
2018		if (mask & IFCAP_TOE4) {
2019			int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE4;
2020
2021			error = toe_capability(p, enable);
2022			if (error == 0)
2023				ifp->if_capenable ^= mask;
2024		}
2025#endif
2026		if (mask & IFCAP_VLAN_HWTAGGING) {
2027			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2028			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2029				PORT_LOCK(p);
2030				cxgb_update_mac_settings(p);
2031				PORT_UNLOCK(p);
2032			}
2033		}
2034		if (mask & IFCAP_VLAN_MTU) {
2035			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2036			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2037				PORT_LOCK(p);
2038				cxgb_update_mac_settings(p);
2039				PORT_UNLOCK(p);
2040			}
2041		}
2042		if (mask & IFCAP_VLAN_HWTSO)
2043			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2044		if (mask & IFCAP_VLAN_HWCSUM)
2045			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2046
2047#ifdef VLAN_CAPABILITIES
2048		VLAN_CAPABILITIES(ifp);
2049#endif
2050		ADAPTER_UNLOCK(sc);
2051		break;
2052	case SIOCSIFMEDIA:
2053	case SIOCGIFMEDIA:
2054		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2055		break;
2056	default:
2057		error = ether_ioctl(ifp, command, data);
2058	}
2059
2060	return (error);
2061}
2062
2063static int
2064cxgb_media_change(struct ifnet *ifp)
2065{
2066	return (EOPNOTSUPP);
2067}
2068
2069/*
2070 * Translates phy->modtype to the correct Ethernet media subtype.
2071 */
2072static int
2073cxgb_ifm_type(int mod)
2074{
2075	switch (mod) {
2076	case phy_modtype_sr:
2077		return (IFM_10G_SR);
2078	case phy_modtype_lr:
2079		return (IFM_10G_LR);
2080	case phy_modtype_lrm:
2081		return (IFM_10G_LRM);
2082	case phy_modtype_twinax:
2083		return (IFM_10G_TWINAX);
2084	case phy_modtype_twinax_long:
2085		return (IFM_10G_TWINAX_LONG);
2086	case phy_modtype_none:
2087		return (IFM_NONE);
2088	case phy_modtype_unknown:
2089		return (IFM_UNKNOWN);
2090	}
2091
2092	KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2093	return (IFM_UNKNOWN);
2094}
2095
2096/*
2097 * Rebuilds the ifmedia list for this port, and sets the current media.
2098 */
2099static void
2100cxgb_build_medialist(struct port_info *p)
2101{
2102	struct cphy *phy = &p->phy;
2103	struct ifmedia *media = &p->media;
2104	int mod = phy->modtype;
2105	int m = IFM_ETHER | IFM_FDX;
2106
2107	PORT_LOCK(p);
2108
2109	ifmedia_removeall(media);
2110	if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2111		/* Copper (RJ45) */
2112
2113		if (phy->caps & SUPPORTED_10000baseT_Full)
2114			ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2115
2116		if (phy->caps & SUPPORTED_1000baseT_Full)
2117			ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2118
2119		if (phy->caps & SUPPORTED_100baseT_Full)
2120			ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2121
2122		if (phy->caps & SUPPORTED_10baseT_Full)
2123			ifmedia_add(media, m | IFM_10_T, mod, NULL);
2124
2125		ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2126		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2127
2128	} else if (phy->caps & SUPPORTED_TP) {
2129		/* Copper (CX4) */
2130
2131		KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2132			("%s: unexpected cap 0x%x", __func__, phy->caps));
2133
2134		ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2135		ifmedia_set(media, m | IFM_10G_CX4);
2136
2137	} else if (phy->caps & SUPPORTED_FIBRE &&
2138		   phy->caps & SUPPORTED_10000baseT_Full) {
2139		/* 10G optical (but includes SFP+ twinax) */
2140
2141		m |= cxgb_ifm_type(mod);
2142		if (IFM_SUBTYPE(m) == IFM_NONE)
2143			m &= ~IFM_FDX;
2144
2145		ifmedia_add(media, m, mod, NULL);
2146		ifmedia_set(media, m);
2147
2148	} else if (phy->caps & SUPPORTED_FIBRE &&
2149		   phy->caps & SUPPORTED_1000baseT_Full) {
2150		/* 1G optical */
2151
2152		/* XXX: Lie and claim to be SX, could actually be any 1G-X */
2153		ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2154		ifmedia_set(media, m | IFM_1000_SX);
2155
2156	} else {
2157		KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2158			    phy->caps));
2159	}
2160
2161	PORT_UNLOCK(p);
2162}
2163
2164static void
2165cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2166{
2167	struct port_info *p = ifp->if_softc;
2168	struct ifmedia_entry *cur = p->media.ifm_cur;
2169	int speed = p->link_config.speed;
2170
2171	if (cur->ifm_data != p->phy.modtype) {
2172		cxgb_build_medialist(p);
2173		cur = p->media.ifm_cur;
2174	}
2175
2176	ifmr->ifm_status = IFM_AVALID;
2177	if (!p->link_config.link_ok)
2178		return;
2179
2180	ifmr->ifm_status |= IFM_ACTIVE;
2181
2182	/*
2183	 * active and current will differ iff current media is autoselect.  That
2184	 * can happen only for copper RJ45.
2185	 */
2186	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2187		return;
2188	KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2189		("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2190
2191	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2192	if (speed == SPEED_10000)
2193		ifmr->ifm_active |= IFM_10G_T;
2194	else if (speed == SPEED_1000)
2195		ifmr->ifm_active |= IFM_1000_T;
2196	else if (speed == SPEED_100)
2197		ifmr->ifm_active |= IFM_100_TX;
2198	else if (speed == SPEED_10)
2199		ifmr->ifm_active |= IFM_10_T;
2200	else
2201		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2202			    speed));
2203}
2204
2205static uint64_t
2206cxgb_get_counter(struct ifnet *ifp, ift_counter c)
2207{
2208	struct port_info *pi = ifp->if_softc;
2209	struct adapter *sc = pi->adapter;
2210	struct cmac *mac = &pi->mac;
2211	struct mac_stats *mstats = &mac->stats;
2212
2213	cxgb_refresh_stats(pi);
2214
2215	switch (c) {
2216	case IFCOUNTER_IPACKETS:
2217		return (mstats->rx_frames);
2218
2219	case IFCOUNTER_IERRORS:
2220		return (mstats->rx_jabber + mstats->rx_data_errs +
2221		    mstats->rx_sequence_errs + mstats->rx_runt +
2222		    mstats->rx_too_long + mstats->rx_mac_internal_errs +
2223		    mstats->rx_short + mstats->rx_fcs_errs);
2224
2225	case IFCOUNTER_OPACKETS:
2226		return (mstats->tx_frames);
2227
2228	case IFCOUNTER_OERRORS:
2229		return (mstats->tx_excess_collisions + mstats->tx_underrun +
2230		    mstats->tx_len_errs + mstats->tx_mac_internal_errs +
2231		    mstats->tx_excess_deferral + mstats->tx_fcs_errs);
2232
2233	case IFCOUNTER_COLLISIONS:
2234		return (mstats->tx_total_collisions);
2235
2236	case IFCOUNTER_IBYTES:
2237		return (mstats->rx_octets);
2238
2239	case IFCOUNTER_OBYTES:
2240		return (mstats->tx_octets);
2241
2242	case IFCOUNTER_IMCASTS:
2243		return (mstats->rx_mcast_frames);
2244
2245	case IFCOUNTER_OMCASTS:
2246		return (mstats->tx_mcast_frames);
2247
2248	case IFCOUNTER_IQDROPS:
2249		return (mstats->rx_cong_drops);
2250
2251	case IFCOUNTER_OQDROPS: {
2252		int i;
2253		uint64_t drops;
2254
2255		drops = 0;
2256		if (sc->flags & FULL_INIT_DONE) {
2257			for (i = pi->first_qset; i < pi->first_qset + pi->nqsets; i++)
2258				drops += sc->sge.qs[i].txq[TXQ_ETH].txq_mr->br_drops;
2259		}
2260
2261		return (drops);
2262
2263	}
2264
2265	default:
2266		return (if_get_counter_default(ifp, c));
2267	}
2268}
2269
2270static void
2271cxgb_async_intr(void *data)
2272{
2273	adapter_t *sc = data;
2274
2275	t3_write_reg(sc, A_PL_INT_ENABLE0, 0);
2276	(void) t3_read_reg(sc, A_PL_INT_ENABLE0);
2277	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2278}
2279
2280static void
2281link_check_callout(void *arg)
2282{
2283	struct port_info *pi = arg;
2284	struct adapter *sc = pi->adapter;
2285
2286	if (!isset(&sc->open_device_map, pi->port_id))
2287		return;
2288
2289	taskqueue_enqueue(sc->tq, &pi->link_check_task);
2290}
2291
2292static void
2293check_link_status(void *arg, int pending)
2294{
2295	struct port_info *pi = arg;
2296	struct adapter *sc = pi->adapter;
2297
2298	if (!isset(&sc->open_device_map, pi->port_id))
2299		return;
2300
2301	t3_link_changed(sc, pi->port_id);
2302
2303	if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ) ||
2304	    pi->link_config.link_ok == 0)
2305		callout_reset(&pi->link_check_ch, hz, link_check_callout, pi);
2306}
2307
2308void
2309t3_os_link_intr(struct port_info *pi)
2310{
2311	/*
2312	 * Schedule a link check in the near future.  If the link is flapping
2313	 * rapidly we'll keep resetting the callout and delaying the check until
2314	 * things stabilize a bit.
2315	 */
2316	callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi);
2317}
2318
2319static void
2320check_t3b2_mac(struct adapter *sc)
2321{
2322	int i;
2323
2324	if (sc->flags & CXGB_SHUTDOWN)
2325		return;
2326
2327	for_each_port(sc, i) {
2328		struct port_info *p = &sc->port[i];
2329		int status;
2330#ifdef INVARIANTS
2331		struct ifnet *ifp = p->ifp;
2332#endif
2333
2334		if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2335		    !p->link_config.link_ok)
2336			continue;
2337
2338		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2339			("%s: state mismatch (drv_flags %x, device_map %x)",
2340			 __func__, ifp->if_drv_flags, sc->open_device_map));
2341
2342		PORT_LOCK(p);
2343		status = t3b2_mac_watchdog_task(&p->mac);
2344		if (status == 1)
2345			p->mac.stats.num_toggled++;
2346		else if (status == 2) {
2347			struct cmac *mac = &p->mac;
2348
2349			cxgb_update_mac_settings(p);
2350			t3_link_start(&p->phy, mac, &p->link_config);
2351			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2352			t3_port_intr_enable(sc, p->port_id);
2353			p->mac.stats.num_resets++;
2354		}
2355		PORT_UNLOCK(p);
2356	}
2357}
2358
2359static void
2360cxgb_tick(void *arg)
2361{
2362	adapter_t *sc = (adapter_t *)arg;
2363
2364	if (sc->flags & CXGB_SHUTDOWN)
2365		return;
2366
2367	taskqueue_enqueue(sc->tq, &sc->tick_task);
2368	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2369}
2370
2371void
2372cxgb_refresh_stats(struct port_info *pi)
2373{
2374	struct timeval tv;
2375	const struct timeval interval = {0, 250000};    /* 250ms */
2376
2377	getmicrotime(&tv);
2378	timevalsub(&tv, &interval);
2379	if (timevalcmp(&tv, &pi->last_refreshed, <))
2380		return;
2381
2382	PORT_LOCK(pi);
2383	t3_mac_update_stats(&pi->mac);
2384	PORT_UNLOCK(pi);
2385	getmicrotime(&pi->last_refreshed);
2386}
2387
2388static void
2389cxgb_tick_handler(void *arg, int count)
2390{
2391	adapter_t *sc = (adapter_t *)arg;
2392	const struct adapter_params *p = &sc->params;
2393	int i;
2394	uint32_t cause, reset;
2395
2396	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2397		return;
2398
2399	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2400		check_t3b2_mac(sc);
2401
2402	cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2403	if (cause) {
2404		struct sge_qset *qs = &sc->sge.qs[0];
2405		uint32_t mask, v;
2406
2407		v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2408
2409		mask = 1;
2410		for (i = 0; i < SGE_QSETS; i++) {
2411			if (v & mask)
2412				qs[i].rspq.starved++;
2413			mask <<= 1;
2414		}
2415
2416		mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2417
2418		for (i = 0; i < SGE_QSETS * 2; i++) {
2419			if (v & mask) {
2420				qs[i / 2].fl[i % 2].empty++;
2421			}
2422			mask <<= 1;
2423		}
2424
2425		/* clear */
2426		t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2427		t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2428	}
2429
2430	for (i = 0; i < sc->params.nports; i++) {
2431		struct port_info *pi = &sc->port[i];
2432		struct cmac *mac = &pi->mac;
2433
2434		if (!isset(&sc->open_device_map, pi->port_id))
2435			continue;
2436
2437		cxgb_refresh_stats(pi);
2438
2439		if (mac->multiport)
2440			continue;
2441
2442		/* Count rx fifo overflows, once per second */
2443		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2444		reset = 0;
2445		if (cause & F_RXFIFO_OVERFLOW) {
2446			mac->stats.rx_fifo_ovfl++;
2447			reset |= F_RXFIFO_OVERFLOW;
2448		}
2449		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2450	}
2451}
2452
2453static void
2454touch_bars(device_t dev)
2455{
2456	/*
2457	 * Don't enable yet
2458	 */
2459#if !defined(__LP64__) && 0
2460	u32 v;
2461
2462	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2463	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2464	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2465	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2466	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2467	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2468#endif
2469}
2470
2471static int
2472set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2473{
2474	uint8_t *buf;
2475	int err = 0;
2476	u32 aligned_offset, aligned_len, *p;
2477	struct adapter *adapter = pi->adapter;
2478
2479
2480	aligned_offset = offset & ~3;
2481	aligned_len = (len + (offset & 3) + 3) & ~3;
2482
2483	if (aligned_offset != offset || aligned_len != len) {
2484		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2485		if (!buf)
2486			return (ENOMEM);
2487		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2488		if (!err && aligned_len > 4)
2489			err = t3_seeprom_read(adapter,
2490					      aligned_offset + aligned_len - 4,
2491					      (u32 *)&buf[aligned_len - 4]);
2492		if (err)
2493			goto out;
2494		memcpy(buf + (offset & 3), data, len);
2495	} else
2496		buf = (uint8_t *)(uintptr_t)data;
2497
2498	err = t3_seeprom_wp(adapter, 0);
2499	if (err)
2500		goto out;
2501
2502	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2503		err = t3_seeprom_write(adapter, aligned_offset, *p);
2504		aligned_offset += 4;
2505	}
2506
2507	if (!err)
2508		err = t3_seeprom_wp(adapter, 1);
2509out:
2510	if (buf != data)
2511		free(buf, M_DEVBUF);
2512	return err;
2513}
2514
2515
2516static int
2517in_range(int val, int lo, int hi)
2518{
2519	return val < 0 || (val <= hi && val >= lo);
2520}
2521
2522static int
2523cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2524{
2525       return (0);
2526}
2527
2528static int
2529cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2530{
2531       return (0);
2532}
2533
2534static int
2535cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2536    int fflag, struct thread *td)
2537{
2538	int mmd, error = 0;
2539	struct port_info *pi = dev->si_drv1;
2540	adapter_t *sc = pi->adapter;
2541
2542#ifdef PRIV_SUPPORTED
2543	if (priv_check(td, PRIV_DRIVER)) {
2544		if (cxgb_debug)
2545			printf("user does not have access to privileged ioctls\n");
2546		return (EPERM);
2547	}
2548#else
2549	if (suser(td)) {
2550		if (cxgb_debug)
2551			printf("user does not have access to privileged ioctls\n");
2552		return (EPERM);
2553	}
2554#endif
2555
2556	switch (cmd) {
2557	case CHELSIO_GET_MIIREG: {
2558		uint32_t val;
2559		struct cphy *phy = &pi->phy;
2560		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2561
2562		if (!phy->mdio_read)
2563			return (EOPNOTSUPP);
2564		if (is_10G(sc)) {
2565			mmd = mid->phy_id >> 8;
2566			if (!mmd)
2567				mmd = MDIO_DEV_PCS;
2568			else if (mmd > MDIO_DEV_VEND2)
2569				return (EINVAL);
2570
2571			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2572					     mid->reg_num, &val);
2573		} else
2574		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2575					     mid->reg_num & 0x1f, &val);
2576		if (error == 0)
2577			mid->val_out = val;
2578		break;
2579	}
2580	case CHELSIO_SET_MIIREG: {
2581		struct cphy *phy = &pi->phy;
2582		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2583
2584		if (!phy->mdio_write)
2585			return (EOPNOTSUPP);
2586		if (is_10G(sc)) {
2587			mmd = mid->phy_id >> 8;
2588			if (!mmd)
2589				mmd = MDIO_DEV_PCS;
2590			else if (mmd > MDIO_DEV_VEND2)
2591				return (EINVAL);
2592
2593			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2594					      mmd, mid->reg_num, mid->val_in);
2595		} else
2596			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2597					      mid->reg_num & 0x1f,
2598					      mid->val_in);
2599		break;
2600	}
2601	case CHELSIO_SETREG: {
2602		struct ch_reg *edata = (struct ch_reg *)data;
2603		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2604			return (EFAULT);
2605		t3_write_reg(sc, edata->addr, edata->val);
2606		break;
2607	}
2608	case CHELSIO_GETREG: {
2609		struct ch_reg *edata = (struct ch_reg *)data;
2610		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2611			return (EFAULT);
2612		edata->val = t3_read_reg(sc, edata->addr);
2613		break;
2614	}
2615	case CHELSIO_GET_SGE_CONTEXT: {
2616		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2617		mtx_lock_spin(&sc->sge.reg_lock);
2618		switch (ecntxt->cntxt_type) {
2619		case CNTXT_TYPE_EGRESS:
2620			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2621			    ecntxt->data);
2622			break;
2623		case CNTXT_TYPE_FL:
2624			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2625			    ecntxt->data);
2626			break;
2627		case CNTXT_TYPE_RSP:
2628			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2629			    ecntxt->data);
2630			break;
2631		case CNTXT_TYPE_CQ:
2632			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2633			    ecntxt->data);
2634			break;
2635		default:
2636			error = EINVAL;
2637			break;
2638		}
2639		mtx_unlock_spin(&sc->sge.reg_lock);
2640		break;
2641	}
2642	case CHELSIO_GET_SGE_DESC: {
2643		struct ch_desc *edesc = (struct ch_desc *)data;
2644		int ret;
2645		if (edesc->queue_num >= SGE_QSETS * 6)
2646			return (EINVAL);
2647		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2648		    edesc->queue_num % 6, edesc->idx, edesc->data);
2649		if (ret < 0)
2650			return (EINVAL);
2651		edesc->size = ret;
2652		break;
2653	}
2654	case CHELSIO_GET_QSET_PARAMS: {
2655		struct qset_params *q;
2656		struct ch_qset_params *t = (struct ch_qset_params *)data;
2657		int q1 = pi->first_qset;
2658		int nqsets = pi->nqsets;
2659		int i;
2660
2661		if (t->qset_idx >= nqsets)
2662			return EINVAL;
2663
2664		i = q1 + t->qset_idx;
2665		q = &sc->params.sge.qset[i];
2666		t->rspq_size   = q->rspq_size;
2667		t->txq_size[0] = q->txq_size[0];
2668		t->txq_size[1] = q->txq_size[1];
2669		t->txq_size[2] = q->txq_size[2];
2670		t->fl_size[0]  = q->fl_size;
2671		t->fl_size[1]  = q->jumbo_size;
2672		t->polling     = q->polling;
2673		t->lro         = q->lro;
2674		t->intr_lat    = q->coalesce_usecs;
2675		t->cong_thres  = q->cong_thres;
2676		t->qnum        = i;
2677
2678		if ((sc->flags & FULL_INIT_DONE) == 0)
2679			t->vector = 0;
2680		else if (sc->flags & USING_MSIX)
2681			t->vector = rman_get_start(sc->msix_irq_res[i]);
2682		else
2683			t->vector = rman_get_start(sc->irq_res);
2684
2685		break;
2686	}
2687	case CHELSIO_GET_QSET_NUM: {
2688		struct ch_reg *edata = (struct ch_reg *)data;
2689		edata->val = pi->nqsets;
2690		break;
2691	}
2692	case CHELSIO_LOAD_FW: {
2693		uint8_t *fw_data;
2694		uint32_t vers;
2695		struct ch_mem_range *t = (struct ch_mem_range *)data;
2696
2697		/*
2698		 * You're allowed to load a firmware only before FULL_INIT_DONE
2699		 *
2700		 * FW_UPTODATE is also set so the rest of the initialization
2701		 * will not overwrite what was loaded here.  This gives you the
2702		 * flexibility to load any firmware (and maybe shoot yourself in
2703		 * the foot).
2704		 */
2705
2706		ADAPTER_LOCK(sc);
2707		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2708			ADAPTER_UNLOCK(sc);
2709			return (EBUSY);
2710		}
2711
2712		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2713		if (!fw_data)
2714			error = ENOMEM;
2715		else
2716			error = copyin(t->buf, fw_data, t->len);
2717
2718		if (!error)
2719			error = -t3_load_fw(sc, fw_data, t->len);
2720
2721		if (t3_get_fw_version(sc, &vers) == 0) {
2722			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2723			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2724			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2725		}
2726
2727		if (!error)
2728			sc->flags |= FW_UPTODATE;
2729
2730		free(fw_data, M_DEVBUF);
2731		ADAPTER_UNLOCK(sc);
2732		break;
2733	}
2734	case CHELSIO_LOAD_BOOT: {
2735		uint8_t *boot_data;
2736		struct ch_mem_range *t = (struct ch_mem_range *)data;
2737
2738		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2739		if (!boot_data)
2740			return ENOMEM;
2741
2742		error = copyin(t->buf, boot_data, t->len);
2743		if (!error)
2744			error = -t3_load_boot(sc, boot_data, t->len);
2745
2746		free(boot_data, M_DEVBUF);
2747		break;
2748	}
2749	case CHELSIO_GET_PM: {
2750		struct ch_pm *m = (struct ch_pm *)data;
2751		struct tp_params *p = &sc->params.tp;
2752
2753		if (!is_offload(sc))
2754			return (EOPNOTSUPP);
2755
2756		m->tx_pg_sz = p->tx_pg_size;
2757		m->tx_num_pg = p->tx_num_pgs;
2758		m->rx_pg_sz  = p->rx_pg_size;
2759		m->rx_num_pg = p->rx_num_pgs;
2760		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2761
2762		break;
2763	}
2764	case CHELSIO_SET_PM: {
2765		struct ch_pm *m = (struct ch_pm *)data;
2766		struct tp_params *p = &sc->params.tp;
2767
2768		if (!is_offload(sc))
2769			return (EOPNOTSUPP);
2770		if (sc->flags & FULL_INIT_DONE)
2771			return (EBUSY);
2772
2773		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2774		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2775			return (EINVAL);	/* not power of 2 */
2776		if (!(m->rx_pg_sz & 0x14000))
2777			return (EINVAL);	/* not 16KB or 64KB */
2778		if (!(m->tx_pg_sz & 0x1554000))
2779			return (EINVAL);
2780		if (m->tx_num_pg == -1)
2781			m->tx_num_pg = p->tx_num_pgs;
2782		if (m->rx_num_pg == -1)
2783			m->rx_num_pg = p->rx_num_pgs;
2784		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2785			return (EINVAL);
2786		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2787		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2788			return (EINVAL);
2789
2790		p->rx_pg_size = m->rx_pg_sz;
2791		p->tx_pg_size = m->tx_pg_sz;
2792		p->rx_num_pgs = m->rx_num_pg;
2793		p->tx_num_pgs = m->tx_num_pg;
2794		break;
2795	}
2796	case CHELSIO_SETMTUTAB: {
2797		struct ch_mtus *m = (struct ch_mtus *)data;
2798		int i;
2799
2800		if (!is_offload(sc))
2801			return (EOPNOTSUPP);
2802		if (offload_running(sc))
2803			return (EBUSY);
2804		if (m->nmtus != NMTUS)
2805			return (EINVAL);
2806		if (m->mtus[0] < 81)         /* accommodate SACK */
2807			return (EINVAL);
2808
2809		/*
2810		 * MTUs must be in ascending order
2811		 */
2812		for (i = 1; i < NMTUS; ++i)
2813			if (m->mtus[i] < m->mtus[i - 1])
2814				return (EINVAL);
2815
2816		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2817		break;
2818	}
2819	case CHELSIO_GETMTUTAB: {
2820		struct ch_mtus *m = (struct ch_mtus *)data;
2821
2822		if (!is_offload(sc))
2823			return (EOPNOTSUPP);
2824
2825		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2826		m->nmtus = NMTUS;
2827		break;
2828	}
2829	case CHELSIO_GET_MEM: {
2830		struct ch_mem_range *t = (struct ch_mem_range *)data;
2831		struct mc7 *mem;
2832		uint8_t *useraddr;
2833		u64 buf[32];
2834
2835		/*
2836		 * Use these to avoid modifying len/addr in the return
2837		 * struct
2838		 */
2839		uint32_t len = t->len, addr = t->addr;
2840
2841		if (!is_offload(sc))
2842			return (EOPNOTSUPP);
2843		if (!(sc->flags & FULL_INIT_DONE))
2844			return (EIO);         /* need the memory controllers */
2845		if ((addr & 0x7) || (len & 0x7))
2846			return (EINVAL);
2847		if (t->mem_id == MEM_CM)
2848			mem = &sc->cm;
2849		else if (t->mem_id == MEM_PMRX)
2850			mem = &sc->pmrx;
2851		else if (t->mem_id == MEM_PMTX)
2852			mem = &sc->pmtx;
2853		else
2854			return (EINVAL);
2855
2856		/*
2857		 * Version scheme:
2858		 * bits 0..9: chip version
2859		 * bits 10..15: chip revision
2860		 */
2861		t->version = 3 | (sc->params.rev << 10);
2862
2863		/*
2864		 * Read 256 bytes at a time as len can be large and we don't
2865		 * want to use huge intermediate buffers.
2866		 */
2867		useraddr = (uint8_t *)t->buf;
2868		while (len) {
2869			unsigned int chunk = min(len, sizeof(buf));
2870
2871			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2872			if (error)
2873				return (-error);
2874			if (copyout(buf, useraddr, chunk))
2875				return (EFAULT);
2876			useraddr += chunk;
2877			addr += chunk;
2878			len -= chunk;
2879		}
2880		break;
2881	}
2882	case CHELSIO_READ_TCAM_WORD: {
2883		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2884
2885		if (!is_offload(sc))
2886			return (EOPNOTSUPP);
2887		if (!(sc->flags & FULL_INIT_DONE))
2888			return (EIO);         /* need MC5 */
2889		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2890		break;
2891	}
2892	case CHELSIO_SET_TRACE_FILTER: {
2893		struct ch_trace *t = (struct ch_trace *)data;
2894		const struct trace_params *tp;
2895
2896		tp = (const struct trace_params *)&t->sip;
2897		if (t->config_tx)
2898			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2899					       t->trace_tx);
2900		if (t->config_rx)
2901			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2902					       t->trace_rx);
2903		break;
2904	}
2905	case CHELSIO_SET_PKTSCHED: {
2906		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2907		if (sc->open_device_map == 0)
2908			return (EAGAIN);
2909		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2910		    p->binding);
2911		break;
2912	}
2913	case CHELSIO_IFCONF_GETREGS: {
2914		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2915		int reglen = cxgb_get_regs_len();
2916		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2917		if (buf == NULL) {
2918			return (ENOMEM);
2919		}
2920		if (regs->len > reglen)
2921			regs->len = reglen;
2922		else if (regs->len < reglen)
2923			error = ENOBUFS;
2924
2925		if (!error) {
2926			cxgb_get_regs(sc, regs, buf);
2927			error = copyout(buf, regs->data, reglen);
2928		}
2929		free(buf, M_DEVBUF);
2930
2931		break;
2932	}
2933	case CHELSIO_SET_HW_SCHED: {
2934		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2935		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2936
2937		if ((sc->flags & FULL_INIT_DONE) == 0)
2938			return (EAGAIN);       /* need TP to be initialized */
2939		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2940		    !in_range(t->channel, 0, 1) ||
2941		    !in_range(t->kbps, 0, 10000000) ||
2942		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2943		    !in_range(t->flow_ipg, 0,
2944			      dack_ticks_to_usec(sc, 0x7ff)))
2945			return (EINVAL);
2946
2947		if (t->kbps >= 0) {
2948			error = t3_config_sched(sc, t->kbps, t->sched);
2949			if (error < 0)
2950				return (-error);
2951		}
2952		if (t->class_ipg >= 0)
2953			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2954		if (t->flow_ipg >= 0) {
2955			t->flow_ipg *= 1000;     /* us -> ns */
2956			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2957		}
2958		if (t->mode >= 0) {
2959			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2960
2961			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2962					 bit, t->mode ? bit : 0);
2963		}
2964		if (t->channel >= 0)
2965			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2966					 1 << t->sched, t->channel << t->sched);
2967		break;
2968	}
2969	case CHELSIO_GET_EEPROM: {
2970		int i;
2971		struct ch_eeprom *e = (struct ch_eeprom *)data;
2972		uint8_t *buf;
2973
2974		if (e->offset & 3 || e->offset >= EEPROMSIZE ||
2975		    e->len > EEPROMSIZE || e->offset + e->len > EEPROMSIZE) {
2976			return (EINVAL);
2977		}
2978
2979		buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2980		if (buf == NULL) {
2981			return (ENOMEM);
2982		}
2983		e->magic = EEPROM_MAGIC;
2984		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
2985			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
2986
2987		if (!error)
2988			error = copyout(buf + e->offset, e->data, e->len);
2989
2990		free(buf, M_DEVBUF);
2991		break;
2992	}
2993	case CHELSIO_CLEAR_STATS: {
2994		if (!(sc->flags & FULL_INIT_DONE))
2995			return EAGAIN;
2996
2997		PORT_LOCK(pi);
2998		t3_mac_update_stats(&pi->mac);
2999		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3000		PORT_UNLOCK(pi);
3001		break;
3002	}
3003	case CHELSIO_GET_UP_LA: {
3004		struct ch_up_la *la = (struct ch_up_la *)data;
3005		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3006		if (buf == NULL) {
3007			return (ENOMEM);
3008		}
3009		if (la->bufsize < LA_BUFSIZE)
3010			error = ENOBUFS;
3011
3012		if (!error)
3013			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3014					      &la->bufsize, buf);
3015		if (!error)
3016			error = copyout(buf, la->data, la->bufsize);
3017
3018		free(buf, M_DEVBUF);
3019		break;
3020	}
3021	case CHELSIO_GET_UP_IOQS: {
3022		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3023		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3024		uint32_t *v;
3025
3026		if (buf == NULL) {
3027			return (ENOMEM);
3028		}
3029		if (ioqs->bufsize < IOQS_BUFSIZE)
3030			error = ENOBUFS;
3031
3032		if (!error)
3033			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3034
3035		if (!error) {
3036			v = (uint32_t *)buf;
3037
3038			ioqs->ioq_rx_enable = *v++;
3039			ioqs->ioq_tx_enable = *v++;
3040			ioqs->ioq_rx_status = *v++;
3041			ioqs->ioq_tx_status = *v++;
3042
3043			error = copyout(v, ioqs->data, ioqs->bufsize);
3044		}
3045
3046		free(buf, M_DEVBUF);
3047		break;
3048	}
3049	case CHELSIO_SET_FILTER: {
3050		struct ch_filter *f = (struct ch_filter *)data;
3051		struct filter_info *p;
3052		unsigned int nfilters = sc->params.mc5.nfilters;
3053
3054		if (!is_offload(sc))
3055			return (EOPNOTSUPP);	/* No TCAM */
3056		if (!(sc->flags & FULL_INIT_DONE))
3057			return (EAGAIN);	/* mc5 not setup yet */
3058		if (nfilters == 0)
3059			return (EBUSY);		/* TOE will use TCAM */
3060
3061		/* sanity checks */
3062		if (f->filter_id >= nfilters ||
3063		    (f->val.dip && f->mask.dip != 0xffffffff) ||
3064		    (f->val.sport && f->mask.sport != 0xffff) ||
3065		    (f->val.dport && f->mask.dport != 0xffff) ||
3066		    (f->val.vlan && f->mask.vlan != 0xfff) ||
3067		    (f->val.vlan_prio &&
3068			f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3069		    (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3070		    f->qset >= SGE_QSETS ||
3071		    sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3072			return (EINVAL);
3073
3074		/* Was allocated with M_WAITOK */
3075		KASSERT(sc->filters, ("filter table NULL\n"));
3076
3077		p = &sc->filters[f->filter_id];
3078		if (p->locked)
3079			return (EPERM);
3080
3081		bzero(p, sizeof(*p));
3082		p->sip = f->val.sip;
3083		p->sip_mask = f->mask.sip;
3084		p->dip = f->val.dip;
3085		p->sport = f->val.sport;
3086		p->dport = f->val.dport;
3087		p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3088		p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3089		    FILTER_NO_VLAN_PRI;
3090		p->mac_hit = f->mac_hit;
3091		p->mac_vld = f->mac_addr_idx != 0xffff;
3092		p->mac_idx = f->mac_addr_idx;
3093		p->pkt_type = f->proto;
3094		p->report_filter_id = f->want_filter_id;
3095		p->pass = f->pass;
3096		p->rss = f->rss;
3097		p->qset = f->qset;
3098
3099		error = set_filter(sc, f->filter_id, p);
3100		if (error == 0)
3101			p->valid = 1;
3102		break;
3103	}
3104	case CHELSIO_DEL_FILTER: {
3105		struct ch_filter *f = (struct ch_filter *)data;
3106		struct filter_info *p;
3107		unsigned int nfilters = sc->params.mc5.nfilters;
3108
3109		if (!is_offload(sc))
3110			return (EOPNOTSUPP);
3111		if (!(sc->flags & FULL_INIT_DONE))
3112			return (EAGAIN);
3113		if (nfilters == 0 || sc->filters == NULL)
3114			return (EINVAL);
3115		if (f->filter_id >= nfilters)
3116		       return (EINVAL);
3117
3118		p = &sc->filters[f->filter_id];
3119		if (p->locked)
3120			return (EPERM);
3121		if (!p->valid)
3122			return (EFAULT); /* Read "Bad address" as "Bad index" */
3123
3124		bzero(p, sizeof(*p));
3125		p->sip = p->sip_mask = 0xffffffff;
3126		p->vlan = 0xfff;
3127		p->vlan_prio = FILTER_NO_VLAN_PRI;
3128		p->pkt_type = 1;
3129		error = set_filter(sc, f->filter_id, p);
3130		break;
3131	}
3132	case CHELSIO_GET_FILTER: {
3133		struct ch_filter *f = (struct ch_filter *)data;
3134		struct filter_info *p;
3135		unsigned int i, nfilters = sc->params.mc5.nfilters;
3136
3137		if (!is_offload(sc))
3138			return (EOPNOTSUPP);
3139		if (!(sc->flags & FULL_INIT_DONE))
3140			return (EAGAIN);
3141		if (nfilters == 0 || sc->filters == NULL)
3142			return (EINVAL);
3143
3144		i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3145		for (; i < nfilters; i++) {
3146			p = &sc->filters[i];
3147			if (!p->valid)
3148				continue;
3149
3150			bzero(f, sizeof(*f));
3151
3152			f->filter_id = i;
3153			f->val.sip = p->sip;
3154			f->mask.sip = p->sip_mask;
3155			f->val.dip = p->dip;
3156			f->mask.dip = p->dip ? 0xffffffff : 0;
3157			f->val.sport = p->sport;
3158			f->mask.sport = p->sport ? 0xffff : 0;
3159			f->val.dport = p->dport;
3160			f->mask.dport = p->dport ? 0xffff : 0;
3161			f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3162			f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3163			f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3164			    0 : p->vlan_prio;
3165			f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3166			    0 : FILTER_NO_VLAN_PRI;
3167			f->mac_hit = p->mac_hit;
3168			f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3169			f->proto = p->pkt_type;
3170			f->want_filter_id = p->report_filter_id;
3171			f->pass = p->pass;
3172			f->rss = p->rss;
3173			f->qset = p->qset;
3174
3175			break;
3176		}
3177
3178		if (i == nfilters)
3179			f->filter_id = 0xffffffff;
3180		break;
3181	}
3182	default:
3183		return (EOPNOTSUPP);
3184		break;
3185	}
3186
3187	return (error);
3188}
3189
3190static __inline void
3191reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3192    unsigned int end)
3193{
3194	uint32_t *p = (uint32_t *)(buf + start);
3195
3196	for ( ; start <= end; start += sizeof(uint32_t))
3197		*p++ = t3_read_reg(ap, start);
3198}
3199
3200#define T3_REGMAP_SIZE (3 * 1024)
3201static int
3202cxgb_get_regs_len(void)
3203{
3204	return T3_REGMAP_SIZE;
3205}
3206
3207static void
3208cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3209{
3210
3211	/*
3212	 * Version scheme:
3213	 * bits 0..9: chip version
3214	 * bits 10..15: chip revision
3215	 * bit 31: set for PCIe cards
3216	 */
3217	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3218
3219	/*
3220	 * We skip the MAC statistics registers because they are clear-on-read.
3221	 * Also reading multi-register stats would need to synchronize with the
3222	 * periodic mac stats accumulation.  Hard to justify the complexity.
3223	 */
3224	memset(buf, 0, cxgb_get_regs_len());
3225	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3226	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3227	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3228	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3229	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3230	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3231		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3232	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3233		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3234}
3235
3236static int
3237alloc_filters(struct adapter *sc)
3238{
3239	struct filter_info *p;
3240	unsigned int nfilters = sc->params.mc5.nfilters;
3241
3242	if (nfilters == 0)
3243		return (0);
3244
3245	p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3246	sc->filters = p;
3247
3248	p = &sc->filters[nfilters - 1];
3249	p->vlan = 0xfff;
3250	p->vlan_prio = FILTER_NO_VLAN_PRI;
3251	p->pass = p->rss = p->valid = p->locked = 1;
3252
3253	return (0);
3254}
3255
3256static int
3257setup_hw_filters(struct adapter *sc)
3258{
3259	int i, rc;
3260	unsigned int nfilters = sc->params.mc5.nfilters;
3261
3262	if (!sc->filters)
3263		return (0);
3264
3265	t3_enable_filters(sc);
3266
3267	for (i = rc = 0; i < nfilters && !rc; i++) {
3268		if (sc->filters[i].locked)
3269			rc = set_filter(sc, i, &sc->filters[i]);
3270	}
3271
3272	return (rc);
3273}
3274
3275static int
3276set_filter(struct adapter *sc, int id, const struct filter_info *f)
3277{
3278	int len;
3279	struct mbuf *m;
3280	struct ulp_txpkt *txpkt;
3281	struct work_request_hdr *wr;
3282	struct cpl_pass_open_req *oreq;
3283	struct cpl_set_tcb_field *sreq;
3284
3285	len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3286	KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3287
3288	id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3289	      sc->params.mc5.nfilters;
3290
3291	m = m_gethdr(M_WAITOK, MT_DATA);
3292	m->m_len = m->m_pkthdr.len = len;
3293	bzero(mtod(m, char *), len);
3294
3295	wr = mtod(m, struct work_request_hdr *);
3296	wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3297
3298	oreq = (struct cpl_pass_open_req *)(wr + 1);
3299	txpkt = (struct ulp_txpkt *)oreq;
3300	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3301	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3302	OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3303	oreq->local_port = htons(f->dport);
3304	oreq->peer_port = htons(f->sport);
3305	oreq->local_ip = htonl(f->dip);
3306	oreq->peer_ip = htonl(f->sip);
3307	oreq->peer_netmask = htonl(f->sip_mask);
3308	oreq->opt0h = 0;
3309	oreq->opt0l = htonl(F_NO_OFFLOAD);
3310	oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3311			 V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3312			 V_VLAN_PRI(f->vlan_prio >> 1) |
3313			 V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3314			 V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3315			 V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3316
3317	sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3318	set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3319			  (f->report_filter_id << 15) | (1 << 23) |
3320			  ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3321	set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3322	t3_mgmt_tx(sc, m);
3323
3324	if (f->pass && !f->rss) {
3325		len = sizeof(*sreq);
3326		m = m_gethdr(M_WAITOK, MT_DATA);
3327		m->m_len = m->m_pkthdr.len = len;
3328		bzero(mtod(m, char *), len);
3329		sreq = mtod(m, struct cpl_set_tcb_field *);
3330		sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3331		mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3332				 (u64)sc->rrss_map[f->qset] << 19);
3333		t3_mgmt_tx(sc, m);
3334	}
3335	return 0;
3336}
3337
3338static inline void
3339mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3340    unsigned int word, u64 mask, u64 val)
3341{
3342	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3343	req->reply = V_NO_REPLY(1);
3344	req->cpu_idx = 0;
3345	req->word = htons(word);
3346	req->mask = htobe64(mask);
3347	req->val = htobe64(val);
3348}
3349
3350static inline void
3351set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3352    unsigned int word, u64 mask, u64 val)
3353{
3354	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3355
3356	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3357	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3358	mk_set_tcb_field(req, tid, word, mask, val);
3359}
3360
3361void
3362t3_iterate(void (*func)(struct adapter *, void *), void *arg)
3363{
3364	struct adapter *sc;
3365
3366	mtx_lock(&t3_list_lock);
3367	SLIST_FOREACH(sc, &t3_list, link) {
3368		/*
3369		 * func should not make any assumptions about what state sc is
3370		 * in - the only guarantee is that sc->sc_lock is a valid lock.
3371		 */
3372		func(sc, arg);
3373	}
3374	mtx_unlock(&t3_list_lock);
3375}
3376
3377#ifdef TCP_OFFLOAD
3378static int
3379toe_capability(struct port_info *pi, int enable)
3380{
3381	int rc;
3382	struct adapter *sc = pi->adapter;
3383
3384	ADAPTER_LOCK_ASSERT_OWNED(sc);
3385
3386	if (!is_offload(sc))
3387		return (ENODEV);
3388
3389	if (enable) {
3390		if (!(sc->flags & FULL_INIT_DONE)) {
3391			log(LOG_WARNING,
3392			    "You must enable a cxgb interface first\n");
3393			return (EAGAIN);
3394		}
3395
3396		if (isset(&sc->offload_map, pi->port_id))
3397			return (0);
3398
3399		if (!(sc->flags & TOM_INIT_DONE)) {
3400			rc = t3_activate_uld(sc, ULD_TOM);
3401			if (rc == EAGAIN) {
3402				log(LOG_WARNING,
3403				    "You must kldload t3_tom.ko before trying "
3404				    "to enable TOE on a cxgb interface.\n");
3405			}
3406			if (rc != 0)
3407				return (rc);
3408			KASSERT(sc->tom_softc != NULL,
3409			    ("%s: TOM activated but softc NULL", __func__));
3410			KASSERT(sc->flags & TOM_INIT_DONE,
3411			    ("%s: TOM activated but flag not set", __func__));
3412		}
3413
3414		setbit(&sc->offload_map, pi->port_id);
3415
3416		/*
3417		 * XXX: Temporary code to allow iWARP to be enabled when TOE is
3418		 * enabled on any port.  Need to figure out how to enable,
3419		 * disable, load, and unload iWARP cleanly.
3420		 */
3421		if (!isset(&sc->offload_map, MAX_NPORTS) &&
3422		    t3_activate_uld(sc, ULD_IWARP) == 0)
3423			setbit(&sc->offload_map, MAX_NPORTS);
3424	} else {
3425		if (!isset(&sc->offload_map, pi->port_id))
3426			return (0);
3427
3428		KASSERT(sc->flags & TOM_INIT_DONE,
3429		    ("%s: TOM never initialized?", __func__));
3430		clrbit(&sc->offload_map, pi->port_id);
3431	}
3432
3433	return (0);
3434}
3435
3436/*
3437 * Add an upper layer driver to the global list.
3438 */
3439int
3440t3_register_uld(struct uld_info *ui)
3441{
3442	int rc = 0;
3443	struct uld_info *u;
3444
3445	mtx_lock(&t3_uld_list_lock);
3446	SLIST_FOREACH(u, &t3_uld_list, link) {
3447	    if (u->uld_id == ui->uld_id) {
3448		    rc = EEXIST;
3449		    goto done;
3450	    }
3451	}
3452
3453	SLIST_INSERT_HEAD(&t3_uld_list, ui, link);
3454	ui->refcount = 0;
3455done:
3456	mtx_unlock(&t3_uld_list_lock);
3457	return (rc);
3458}
3459
3460int
3461t3_unregister_uld(struct uld_info *ui)
3462{
3463	int rc = EINVAL;
3464	struct uld_info *u;
3465
3466	mtx_lock(&t3_uld_list_lock);
3467
3468	SLIST_FOREACH(u, &t3_uld_list, link) {
3469	    if (u == ui) {
3470		    if (ui->refcount > 0) {
3471			    rc = EBUSY;
3472			    goto done;
3473		    }
3474
3475		    SLIST_REMOVE(&t3_uld_list, ui, uld_info, link);
3476		    rc = 0;
3477		    goto done;
3478	    }
3479	}
3480done:
3481	mtx_unlock(&t3_uld_list_lock);
3482	return (rc);
3483}
3484
3485int
3486t3_activate_uld(struct adapter *sc, int id)
3487{
3488	int rc = EAGAIN;
3489	struct uld_info *ui;
3490
3491	mtx_lock(&t3_uld_list_lock);
3492
3493	SLIST_FOREACH(ui, &t3_uld_list, link) {
3494		if (ui->uld_id == id) {
3495			rc = ui->activate(sc);
3496			if (rc == 0)
3497				ui->refcount++;
3498			goto done;
3499		}
3500	}
3501done:
3502	mtx_unlock(&t3_uld_list_lock);
3503
3504	return (rc);
3505}
3506
3507int
3508t3_deactivate_uld(struct adapter *sc, int id)
3509{
3510	int rc = EINVAL;
3511	struct uld_info *ui;
3512
3513	mtx_lock(&t3_uld_list_lock);
3514
3515	SLIST_FOREACH(ui, &t3_uld_list, link) {
3516		if (ui->uld_id == id) {
3517			rc = ui->deactivate(sc);
3518			if (rc == 0)
3519				ui->refcount--;
3520			goto done;
3521		}
3522	}
3523done:
3524	mtx_unlock(&t3_uld_list_lock);
3525
3526	return (rc);
3527}
3528
3529static int
3530cpl_not_handled(struct sge_qset *qs __unused, struct rsp_desc *r __unused,
3531    struct mbuf *m)
3532{
3533	m_freem(m);
3534	return (EDOOFUS);
3535}
3536
3537int
3538t3_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h)
3539{
3540	uintptr_t *loc, new;
3541
3542	if (opcode >= NUM_CPL_HANDLERS)
3543		return (EINVAL);
3544
3545	new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled;
3546	loc = (uintptr_t *) &sc->cpl_handler[opcode];
3547	atomic_store_rel_ptr(loc, new);
3548
3549	return (0);
3550}
3551#endif
3552
3553static int
3554cxgbc_mod_event(module_t mod, int cmd, void *arg)
3555{
3556	int rc = 0;
3557
3558	switch (cmd) {
3559	case MOD_LOAD:
3560		mtx_init(&t3_list_lock, "T3 adapters", 0, MTX_DEF);
3561		SLIST_INIT(&t3_list);
3562#ifdef TCP_OFFLOAD
3563		mtx_init(&t3_uld_list_lock, "T3 ULDs", 0, MTX_DEF);
3564		SLIST_INIT(&t3_uld_list);
3565#endif
3566		break;
3567
3568	case MOD_UNLOAD:
3569#ifdef TCP_OFFLOAD
3570		mtx_lock(&t3_uld_list_lock);
3571		if (!SLIST_EMPTY(&t3_uld_list)) {
3572			rc = EBUSY;
3573			mtx_unlock(&t3_uld_list_lock);
3574			break;
3575		}
3576		mtx_unlock(&t3_uld_list_lock);
3577		mtx_destroy(&t3_uld_list_lock);
3578#endif
3579		mtx_lock(&t3_list_lock);
3580		if (!SLIST_EMPTY(&t3_list)) {
3581			rc = EBUSY;
3582			mtx_unlock(&t3_list_lock);
3583			break;
3584		}
3585		mtx_unlock(&t3_list_lock);
3586		mtx_destroy(&t3_list_lock);
3587		break;
3588	}
3589
3590	return (rc);
3591}
3592
3593#ifdef NETDUMP
3594static void
3595cxgb_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize)
3596{
3597	struct port_info *pi;
3598	adapter_t *adap;
3599
3600	pi = if_getsoftc(ifp);
3601	adap = pi->adapter;
3602	ADAPTER_LOCK(adap);
3603	*nrxr = SGE_QSETS;
3604	*ncl = adap->sge.qs[0].fl[1].size;
3605	*clsize = adap->sge.qs[0].fl[1].buf_size;
3606	ADAPTER_UNLOCK(adap);
3607}
3608
3609static void
3610cxgb_netdump_event(struct ifnet *ifp, enum netdump_ev event)
3611{
3612	struct port_info *pi;
3613	struct sge_qset *qs;
3614	int i;
3615
3616	pi = if_getsoftc(ifp);
3617	if (event == NETDUMP_START)
3618		for (i = 0; i < SGE_QSETS; i++) {
3619			qs = &pi->adapter->sge.qs[i];
3620
3621			/* Need to reinit after netdump_mbuf_dump(). */
3622			qs->fl[0].zone = zone_pack;
3623			qs->fl[1].zone = zone_clust;
3624			qs->lro.enabled = 0;
3625		}
3626}
3627
3628static int
3629cxgb_netdump_transmit(struct ifnet *ifp, struct mbuf *m)
3630{
3631	struct port_info *pi;
3632	struct sge_qset *qs;
3633
3634	pi = if_getsoftc(ifp);
3635	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
3636	    IFF_DRV_RUNNING)
3637		return (ENOENT);
3638
3639	qs = &pi->adapter->sge.qs[pi->first_qset];
3640	return (cxgb_netdump_encap(qs, &m));
3641}
3642
3643static int
3644cxgb_netdump_poll(struct ifnet *ifp, int count)
3645{
3646	struct port_info *pi;
3647	adapter_t *adap;
3648	int i;
3649
3650	pi = if_getsoftc(ifp);
3651	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
3652		return (ENOENT);
3653
3654	adap = pi->adapter;
3655	for (i = 0; i < SGE_QSETS; i++)
3656		(void)cxgb_netdump_poll_rx(adap, &adap->sge.qs[i]);
3657	(void)cxgb_netdump_poll_tx(&adap->sge.qs[pi->first_qset]);
3658	return (0);
3659}
3660#endif /* NETDUMP */
3661