cxgb_main.c revision 209115
1/**************************************************************************
2
3Copyright (c) 2007-2009, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13    contributors may be used to endorse or promote products derived from
14    this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_main.c 209115 2010-06-12 22:24:39Z np $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/bus.h>
37#include <sys/module.h>
38#include <sys/pciio.h>
39#include <sys/conf.h>
40#include <machine/bus.h>
41#include <machine/resource.h>
42#include <sys/bus_dma.h>
43#include <sys/ktr.h>
44#include <sys/rman.h>
45#include <sys/ioccom.h>
46#include <sys/mbuf.h>
47#include <sys/linker.h>
48#include <sys/firmware.h>
49#include <sys/socket.h>
50#include <sys/sockio.h>
51#include <sys/smp.h>
52#include <sys/sysctl.h>
53#include <sys/syslog.h>
54#include <sys/queue.h>
55#include <sys/taskqueue.h>
56#include <sys/proc.h>
57
58#include <net/bpf.h>
59#include <net/ethernet.h>
60#include <net/if.h>
61#include <net/if_arp.h>
62#include <net/if_dl.h>
63#include <net/if_media.h>
64#include <net/if_types.h>
65#include <net/if_vlan_var.h>
66
67#include <netinet/in_systm.h>
68#include <netinet/in.h>
69#include <netinet/if_ether.h>
70#include <netinet/ip.h>
71#include <netinet/ip.h>
72#include <netinet/tcp.h>
73#include <netinet/udp.h>
74
75#include <dev/pci/pcireg.h>
76#include <dev/pci/pcivar.h>
77#include <dev/pci/pci_private.h>
78
79#include <cxgb_include.h>
80
81#ifdef PRIV_SUPPORTED
82#include <sys/priv.h>
83#endif
84
85static int cxgb_setup_interrupts(adapter_t *);
86static void cxgb_teardown_interrupts(adapter_t *);
87static void cxgb_init(void *);
88static int cxgb_init_locked(struct port_info *);
89static int cxgb_uninit_locked(struct port_info *);
90static int cxgb_uninit_synchronized(struct port_info *);
91static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92static int cxgb_media_change(struct ifnet *);
93static int cxgb_ifm_type(int);
94static void cxgb_build_medialist(struct port_info *);
95static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
96static int setup_sge_qsets(adapter_t *);
97static void cxgb_async_intr(void *);
98static void cxgb_ext_intr_handler(void *, int);
99static void cxgb_tick_handler(void *, int);
100static void cxgb_tick(void *);
101static void setup_rss(adapter_t *sc);
102static int alloc_filters(struct adapter *);
103static int setup_hw_filters(struct adapter *);
104static int set_filter(struct adapter *, int, const struct filter_info *);
105static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
106    unsigned int, u64, u64);
107static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
108    unsigned int, u64, u64);
109
110/* Attachment glue for the PCI controller end of the device.  Each port of
111 * the device is attached separately, as defined later.
112 */
113static int cxgb_controller_probe(device_t);
114static int cxgb_controller_attach(device_t);
115static int cxgb_controller_detach(device_t);
116static void cxgb_free(struct adapter *);
117static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
118    unsigned int end);
119static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
120static int cxgb_get_regs_len(void);
121static int offload_open(struct port_info *pi);
122static void touch_bars(device_t dev);
123static int offload_close(struct t3cdev *tdev);
124static void cxgb_update_mac_settings(struct port_info *p);
125
126static device_method_t cxgb_controller_methods[] = {
127	DEVMETHOD(device_probe,		cxgb_controller_probe),
128	DEVMETHOD(device_attach,	cxgb_controller_attach),
129	DEVMETHOD(device_detach,	cxgb_controller_detach),
130
131	/* bus interface */
132	DEVMETHOD(bus_print_child,	bus_generic_print_child),
133	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
134
135	{ 0, 0 }
136};
137
138static driver_t cxgb_controller_driver = {
139	"cxgbc",
140	cxgb_controller_methods,
141	sizeof(struct adapter)
142};
143
144static devclass_t	cxgb_controller_devclass;
145DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
146
147/*
148 * Attachment glue for the ports.  Attachment is done directly to the
149 * controller device.
150 */
151static int cxgb_port_probe(device_t);
152static int cxgb_port_attach(device_t);
153static int cxgb_port_detach(device_t);
154
155static device_method_t cxgb_port_methods[] = {
156	DEVMETHOD(device_probe,		cxgb_port_probe),
157	DEVMETHOD(device_attach,	cxgb_port_attach),
158	DEVMETHOD(device_detach,	cxgb_port_detach),
159	{ 0, 0 }
160};
161
162static driver_t cxgb_port_driver = {
163	"cxgb",
164	cxgb_port_methods,
165	0
166};
167
168static d_ioctl_t cxgb_extension_ioctl;
169static d_open_t cxgb_extension_open;
170static d_close_t cxgb_extension_close;
171
172static struct cdevsw cxgb_cdevsw = {
173       .d_version =    D_VERSION,
174       .d_flags =      0,
175       .d_open =       cxgb_extension_open,
176       .d_close =      cxgb_extension_close,
177       .d_ioctl =      cxgb_extension_ioctl,
178       .d_name =       "cxgb",
179};
180
181static devclass_t	cxgb_port_devclass;
182DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
183
184/*
185 * The driver uses the best interrupt scheme available on a platform in the
186 * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
187 * of these schemes the driver may consider as follows:
188 *
189 * msi = 2: choose from among all three options
190 * msi = 1 : only consider MSI and pin interrupts
191 * msi = 0: force pin interrupts
192 */
193static int msi_allowed = 2;
194
195TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
196SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
197SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
198    "MSI-X, MSI, INTx selector");
199
200/*
201 * The driver enables offload as a default.
202 * To disable it, use ofld_disable = 1.
203 */
204static int ofld_disable = 0;
205TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
206SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
207    "disable ULP offload");
208
209/*
210 * The driver uses an auto-queue algorithm by default.
211 * To disable it and force a single queue-set per port, use multiq = 0
212 */
213static int multiq = 1;
214TUNABLE_INT("hw.cxgb.multiq", &multiq);
215SYSCTL_UINT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
216    "use min(ncpus/ports, 8) queue-sets per port");
217
218/*
219 * By default the driver will not update the firmware unless
220 * it was compiled against a newer version
221 *
222 */
223static int force_fw_update = 0;
224TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
225SYSCTL_UINT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
226    "update firmware even if up to date");
227
228int cxgb_use_16k_clusters = -1;
229TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
230SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
231    &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
232
233/*
234 * Tune the size of the output queue.
235 */
236int cxgb_snd_queue_len = IFQ_MAXLEN;
237TUNABLE_INT("hw.cxgb.snd_queue_len", &cxgb_snd_queue_len);
238SYSCTL_UINT(_hw_cxgb, OID_AUTO, snd_queue_len, CTLFLAG_RDTUN,
239    &cxgb_snd_queue_len, 0, "send queue size ");
240
241static int nfilters = -1;
242TUNABLE_INT("hw.cxgb.nfilters", &nfilters);
243SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
244    &nfilters, 0, "max number of entries in the filter table");
245
246enum {
247	MAX_TXQ_ENTRIES      = 16384,
248	MAX_CTRL_TXQ_ENTRIES = 1024,
249	MAX_RSPQ_ENTRIES     = 16384,
250	MAX_RX_BUFFERS       = 16384,
251	MAX_RX_JUMBO_BUFFERS = 16384,
252	MIN_TXQ_ENTRIES      = 4,
253	MIN_CTRL_TXQ_ENTRIES = 4,
254	MIN_RSPQ_ENTRIES     = 32,
255	MIN_FL_ENTRIES       = 32,
256	MIN_FL_JUMBO_ENTRIES = 32
257};
258
259struct filter_info {
260	u32 sip;
261	u32 sip_mask;
262	u32 dip;
263	u16 sport;
264	u16 dport;
265	u32 vlan:12;
266	u32 vlan_prio:3;
267	u32 mac_hit:1;
268	u32 mac_idx:4;
269	u32 mac_vld:1;
270	u32 pkt_type:2;
271	u32 report_filter_id:1;
272	u32 pass:1;
273	u32 rss:1;
274	u32 qset:3;
275	u32 locked:1;
276	u32 valid:1;
277};
278
279enum { FILTER_NO_VLAN_PRI = 7 };
280
281#define EEPROM_MAGIC 0x38E2F10C
282
283#define PORT_MASK ((1 << MAX_NPORTS) - 1)
284
285/* Table for probing the cards.  The desc field isn't actually used */
286struct cxgb_ident {
287	uint16_t	vendor;
288	uint16_t	device;
289	int		index;
290	char		*desc;
291} cxgb_identifiers[] = {
292	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
293	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
294	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
295	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
296	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
297	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
298	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
299	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
300	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
301	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
302	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
303	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
304	{PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
305	{PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
306	{0, 0, 0, NULL}
307};
308
309static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
310
311
312static __inline char
313t3rev2char(struct adapter *adapter)
314{
315	char rev = 'z';
316
317	switch(adapter->params.rev) {
318	case T3_REV_A:
319		rev = 'a';
320		break;
321	case T3_REV_B:
322	case T3_REV_B2:
323		rev = 'b';
324		break;
325	case T3_REV_C:
326		rev = 'c';
327		break;
328	}
329	return rev;
330}
331
332static struct cxgb_ident *
333cxgb_get_ident(device_t dev)
334{
335	struct cxgb_ident *id;
336
337	for (id = cxgb_identifiers; id->desc != NULL; id++) {
338		if ((id->vendor == pci_get_vendor(dev)) &&
339		    (id->device == pci_get_device(dev))) {
340			return (id);
341		}
342	}
343	return (NULL);
344}
345
346static const struct adapter_info *
347cxgb_get_adapter_info(device_t dev)
348{
349	struct cxgb_ident *id;
350	const struct adapter_info *ai;
351
352	id = cxgb_get_ident(dev);
353	if (id == NULL)
354		return (NULL);
355
356	ai = t3_get_adapter_info(id->index);
357
358	return (ai);
359}
360
361static int
362cxgb_controller_probe(device_t dev)
363{
364	const struct adapter_info *ai;
365	char *ports, buf[80];
366	int nports;
367
368	ai = cxgb_get_adapter_info(dev);
369	if (ai == NULL)
370		return (ENXIO);
371
372	nports = ai->nports0 + ai->nports1;
373	if (nports == 1)
374		ports = "port";
375	else
376		ports = "ports";
377
378	snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
379	device_set_desc_copy(dev, buf);
380	return (BUS_PROBE_DEFAULT);
381}
382
383#define FW_FNAME "cxgb_t3fw"
384#define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
385#define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
386
387static int
388upgrade_fw(adapter_t *sc)
389{
390	const struct firmware *fw;
391	int status;
392	u32 vers;
393
394	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
395		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
396		return (ENOENT);
397	} else
398		device_printf(sc->dev, "installing firmware on card\n");
399	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
400
401	if (status != 0) {
402		device_printf(sc->dev, "failed to install firmware: %d\n",
403		    status);
404	} else {
405		t3_get_fw_version(sc, &vers);
406		snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
407		    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
408		    G_FW_VERSION_MICRO(vers));
409	}
410
411	firmware_put(fw, FIRMWARE_UNLOAD);
412
413	return (status);
414}
415
416/*
417 * The cxgb_controller_attach function is responsible for the initial
418 * bringup of the device.  Its responsibilities include:
419 *
420 *  1. Determine if the device supports MSI or MSI-X.
421 *  2. Allocate bus resources so that we can access the Base Address Register
422 *  3. Create and initialize mutexes for the controller and its control
423 *     logic such as SGE and MDIO.
424 *  4. Call hardware specific setup routine for the adapter as a whole.
425 *  5. Allocate the BAR for doing MSI-X.
426 *  6. Setup the line interrupt iff MSI-X is not supported.
427 *  7. Create the driver's taskq.
428 *  8. Start one task queue service thread.
429 *  9. Check if the firmware and SRAM are up-to-date.  They will be
430 *     auto-updated later (before FULL_INIT_DONE), if required.
431 * 10. Create a child device for each MAC (port)
432 * 11. Initialize T3 private state.
433 * 12. Trigger the LED
434 * 13. Setup offload iff supported.
435 * 14. Reset/restart the tick callout.
436 * 15. Attach sysctls
437 *
438 * NOTE: Any modification or deviation from this list MUST be reflected in
439 * the above comment.  Failure to do so will result in problems on various
440 * error conditions including link flapping.
441 */
442static int
443cxgb_controller_attach(device_t dev)
444{
445	device_t child;
446	const struct adapter_info *ai;
447	struct adapter *sc;
448	int i, error = 0;
449	uint32_t vers;
450	int port_qsets = 1;
451	int msi_needed, reg;
452	char buf[80];
453
454	sc = device_get_softc(dev);
455	sc->dev = dev;
456	sc->msi_count = 0;
457	ai = cxgb_get_adapter_info(dev);
458
459	/* find the PCIe link width and set max read request to 4KB*/
460	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
461		uint16_t lnk, pectl;
462		lnk = pci_read_config(dev, reg + 0x12, 2);
463		sc->link_width = (lnk >> 4) & 0x3f;
464
465		pectl = pci_read_config(dev, reg + 0x8, 2);
466		pectl = (pectl & ~0x7000) | (5 << 12);
467		pci_write_config(dev, reg + 0x8, pectl, 2);
468	}
469
470	if (sc->link_width != 0 && sc->link_width <= 4 &&
471	    (ai->nports0 + ai->nports1) <= 2) {
472		device_printf(sc->dev,
473		    "PCIe x%d Link, expect reduced performance\n",
474		    sc->link_width);
475	}
476
477	touch_bars(dev);
478	pci_enable_busmaster(dev);
479	/*
480	 * Allocate the registers and make them available to the driver.
481	 * The registers that we care about for NIC mode are in BAR 0
482	 */
483	sc->regs_rid = PCIR_BAR(0);
484	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
485	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
486		device_printf(dev, "Cannot allocate BAR region 0\n");
487		return (ENXIO);
488	}
489	sc->udbs_rid = PCIR_BAR(2);
490	sc->udbs_res = NULL;
491	if (is_offload(sc) &&
492	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
493		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
494		device_printf(dev, "Cannot allocate BAR region 1\n");
495		error = ENXIO;
496		goto out;
497	}
498
499	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
500	    device_get_unit(dev));
501	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
502
503	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
504	    device_get_unit(dev));
505	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
506	    device_get_unit(dev));
507	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
508	    device_get_unit(dev));
509
510	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
511	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
512	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
513
514	sc->bt = rman_get_bustag(sc->regs_res);
515	sc->bh = rman_get_bushandle(sc->regs_res);
516	sc->mmio_len = rman_get_size(sc->regs_res);
517
518	for (i = 0; i < MAX_NPORTS; i++)
519		sc->port[i].adapter = sc;
520
521	if (t3_prep_adapter(sc, ai, 1) < 0) {
522		printf("prep adapter failed\n");
523		error = ENODEV;
524		goto out;
525	}
526        /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
527	 * enough messages for the queue sets.  If that fails, try falling
528	 * back to MSI.  If that fails, then try falling back to the legacy
529	 * interrupt pin model.
530	 */
531	sc->msix_regs_rid = 0x20;
532	if ((msi_allowed >= 2) &&
533	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
534	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
535
536		if (multiq)
537			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
538		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
539
540		if (pci_msix_count(dev) == 0 ||
541		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
542		    sc->msi_count != msi_needed) {
543			device_printf(dev, "alloc msix failed - "
544				      "msi_count=%d, msi_needed=%d, err=%d; "
545				      "will try MSI\n", sc->msi_count,
546				      msi_needed, error);
547			sc->msi_count = 0;
548			port_qsets = 1;
549			pci_release_msi(dev);
550			bus_release_resource(dev, SYS_RES_MEMORY,
551			    sc->msix_regs_rid, sc->msix_regs_res);
552			sc->msix_regs_res = NULL;
553		} else {
554			sc->flags |= USING_MSIX;
555			sc->cxgb_intr = cxgb_async_intr;
556			device_printf(dev,
557				      "using MSI-X interrupts (%u vectors)\n",
558				      sc->msi_count);
559		}
560	}
561
562	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
563		sc->msi_count = 1;
564		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
565			device_printf(dev, "alloc msi failed - "
566				      "err=%d; will try INTx\n", error);
567			sc->msi_count = 0;
568			port_qsets = 1;
569			pci_release_msi(dev);
570		} else {
571			sc->flags |= USING_MSI;
572			sc->cxgb_intr = t3_intr_msi;
573			device_printf(dev, "using MSI interrupts\n");
574		}
575	}
576	if (sc->msi_count == 0) {
577		device_printf(dev, "using line interrupts\n");
578		sc->cxgb_intr = t3b_intr;
579	}
580
581	/* Create a private taskqueue thread for handling driver events */
582	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
583	    taskqueue_thread_enqueue, &sc->tq);
584	if (sc->tq == NULL) {
585		device_printf(dev, "failed to allocate controller task queue\n");
586		goto out;
587	}
588
589	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
590	    device_get_nameunit(dev));
591	TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
592	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
593
594
595	/* Create a periodic callout for checking adapter status */
596	callout_init(&sc->cxgb_tick_ch, TRUE);
597
598	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
599		/*
600		 * Warn user that a firmware update will be attempted in init.
601		 */
602		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
603		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
604		sc->flags &= ~FW_UPTODATE;
605	} else {
606		sc->flags |= FW_UPTODATE;
607	}
608
609	if (t3_check_tpsram_version(sc) < 0) {
610		/*
611		 * Warn user that a firmware update will be attempted in init.
612		 */
613		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
614		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
615		sc->flags &= ~TPS_UPTODATE;
616	} else {
617		sc->flags |= TPS_UPTODATE;
618	}
619
620	/*
621	 * Create a child device for each MAC.  The ethernet attachment
622	 * will be done in these children.
623	 */
624	for (i = 0; i < (sc)->params.nports; i++) {
625		struct port_info *pi;
626
627		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
628			device_printf(dev, "failed to add child port\n");
629			error = EINVAL;
630			goto out;
631		}
632		pi = &sc->port[i];
633		pi->adapter = sc;
634		pi->nqsets = port_qsets;
635		pi->first_qset = i*port_qsets;
636		pi->port_id = i;
637		pi->tx_chan = i >= ai->nports0;
638		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
639		sc->rxpkt_map[pi->txpkt_intf] = i;
640		sc->port[i].tx_chan = i >= ai->nports0;
641		sc->portdev[i] = child;
642		device_set_softc(child, pi);
643	}
644	if ((error = bus_generic_attach(dev)) != 0)
645		goto out;
646
647	/* initialize sge private state */
648	t3_sge_init_adapter(sc);
649
650	t3_led_ready(sc);
651
652	cxgb_offload_init();
653	if (is_offload(sc)) {
654		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
655		cxgb_adapter_ofld(sc);
656        }
657	error = t3_get_fw_version(sc, &vers);
658	if (error)
659		goto out;
660
661	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
662	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
663	    G_FW_VERSION_MICRO(vers));
664
665	snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
666		 ai->desc, is_offload(sc) ? "R" : "",
667		 sc->params.vpd.ec, sc->params.vpd.sn);
668	device_set_desc_copy(dev, buf);
669
670	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
671		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
672		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
673
674	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
675	callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
676	t3_add_attach_sysctls(sc);
677out:
678	if (error)
679		cxgb_free(sc);
680
681	return (error);
682}
683
684/*
685 * The cxgb_controller_detach routine is called with the device is
686 * unloaded from the system.
687 */
688
689static int
690cxgb_controller_detach(device_t dev)
691{
692	struct adapter *sc;
693
694	sc = device_get_softc(dev);
695
696	cxgb_free(sc);
697
698	return (0);
699}
700
701/*
702 * The cxgb_free() is called by the cxgb_controller_detach() routine
703 * to tear down the structures that were built up in
704 * cxgb_controller_attach(), and should be the final piece of work
705 * done when fully unloading the driver.
706 *
707 *
708 *  1. Shutting down the threads started by the cxgb_controller_attach()
709 *     routine.
710 *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
711 *  3. Detaching all of the port devices created during the
712 *     cxgb_controller_attach() routine.
713 *  4. Removing the device children created via cxgb_controller_attach().
714 *  5. Releasing PCI resources associated with the device.
715 *  6. Turning off the offload support, iff it was turned on.
716 *  7. Destroying the mutexes created in cxgb_controller_attach().
717 *
718 */
719static void
720cxgb_free(struct adapter *sc)
721{
722	int i;
723
724	ADAPTER_LOCK(sc);
725	sc->flags |= CXGB_SHUTDOWN;
726	ADAPTER_UNLOCK(sc);
727
728	/*
729	 * Make sure all child devices are gone.
730	 */
731	bus_generic_detach(sc->dev);
732	for (i = 0; i < (sc)->params.nports; i++) {
733		if (sc->portdev[i] &&
734		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
735			device_printf(sc->dev, "failed to delete child port\n");
736	}
737
738	/*
739	 * At this point, it is as if cxgb_port_detach has run on all ports, and
740	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
741	 * all open devices have been closed.
742	 */
743	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
744					   __func__, sc->open_device_map));
745	for (i = 0; i < sc->params.nports; i++) {
746		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
747						  __func__, i));
748	}
749
750	/*
751	 * Finish off the adapter's callouts.
752	 */
753	callout_drain(&sc->cxgb_tick_ch);
754	callout_drain(&sc->sge_timer_ch);
755
756	/*
757	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
758	 * sysctls are cleaned up by the kernel linker.
759	 */
760	if (sc->flags & FULL_INIT_DONE) {
761 		t3_free_sge_resources(sc);
762 		sc->flags &= ~FULL_INIT_DONE;
763 	}
764
765	/*
766	 * Release all interrupt resources.
767	 */
768	cxgb_teardown_interrupts(sc);
769	if (sc->flags & (USING_MSI | USING_MSIX)) {
770		device_printf(sc->dev, "releasing msi message(s)\n");
771		pci_release_msi(sc->dev);
772	} else {
773		device_printf(sc->dev, "no msi message to release\n");
774	}
775
776	if (sc->msix_regs_res != NULL) {
777		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
778		    sc->msix_regs_res);
779	}
780
781	/*
782	 * Free the adapter's taskqueue.
783	 */
784	if (sc->tq != NULL) {
785		taskqueue_free(sc->tq);
786		sc->tq = NULL;
787	}
788
789	if (is_offload(sc)) {
790		clrbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
791		cxgb_adapter_unofld(sc);
792	}
793
794#ifdef notyet
795	if (sc->flags & CXGB_OFLD_INIT)
796		cxgb_offload_deactivate(sc);
797#endif
798	free(sc->filters, M_DEVBUF);
799	t3_sge_free(sc);
800
801	cxgb_offload_exit();
802
803	if (sc->udbs_res != NULL)
804		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
805		    sc->udbs_res);
806
807	if (sc->regs_res != NULL)
808		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
809		    sc->regs_res);
810
811	MTX_DESTROY(&sc->mdio_lock);
812	MTX_DESTROY(&sc->sge.reg_lock);
813	MTX_DESTROY(&sc->elmer_lock);
814	ADAPTER_LOCK_DEINIT(sc);
815}
816
817/**
818 *	setup_sge_qsets - configure SGE Tx/Rx/response queues
819 *	@sc: the controller softc
820 *
821 *	Determines how many sets of SGE queues to use and initializes them.
822 *	We support multiple queue sets per port if we have MSI-X, otherwise
823 *	just one queue set per port.
824 */
825static int
826setup_sge_qsets(adapter_t *sc)
827{
828	int i, j, err, irq_idx = 0, qset_idx = 0;
829	u_int ntxq = SGE_TXQ_PER_SET;
830
831	if ((err = t3_sge_alloc(sc)) != 0) {
832		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
833		return (err);
834	}
835
836	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
837		irq_idx = -1;
838
839	for (i = 0; i < (sc)->params.nports; i++) {
840		struct port_info *pi = &sc->port[i];
841
842		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
843			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
844			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
845			    &sc->params.sge.qset[qset_idx], ntxq, pi);
846			if (err) {
847				t3_free_sge_resources(sc);
848				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n",
849				    err);
850				return (err);
851			}
852		}
853	}
854
855	return (0);
856}
857
858static void
859cxgb_teardown_interrupts(adapter_t *sc)
860{
861	int i;
862
863	for (i = 0; i < SGE_QSETS; i++) {
864		if (sc->msix_intr_tag[i] == NULL) {
865
866			/* Should have been setup fully or not at all */
867			KASSERT(sc->msix_irq_res[i] == NULL &&
868				sc->msix_irq_rid[i] == 0,
869				("%s: half-done interrupt (%d).", __func__, i));
870
871			continue;
872		}
873
874		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
875				  sc->msix_intr_tag[i]);
876		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
877				     sc->msix_irq_res[i]);
878
879		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
880		sc->msix_irq_rid[i] = 0;
881	}
882
883	if (sc->intr_tag) {
884		KASSERT(sc->irq_res != NULL,
885			("%s: half-done interrupt.", __func__));
886
887		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
888		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
889				     sc->irq_res);
890
891		sc->irq_res = sc->intr_tag = NULL;
892		sc->irq_rid = 0;
893	}
894}
895
896static int
897cxgb_setup_interrupts(adapter_t *sc)
898{
899	struct resource *res;
900	void *tag;
901	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
902
903	sc->irq_rid = intr_flag ? 1 : 0;
904	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
905					     RF_SHAREABLE | RF_ACTIVE);
906	if (sc->irq_res == NULL) {
907		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
908			      intr_flag, sc->irq_rid);
909		err = EINVAL;
910		sc->irq_rid = 0;
911	} else {
912		err = bus_setup_intr(sc->dev, sc->irq_res,
913		    INTR_MPSAFE | INTR_TYPE_NET, NULL,
914		    sc->cxgb_intr, sc, &sc->intr_tag);
915
916		if (err) {
917			device_printf(sc->dev,
918				      "Cannot set up interrupt (%x, %u, %d)\n",
919				      intr_flag, sc->irq_rid, err);
920			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
921					     sc->irq_res);
922			sc->irq_res = sc->intr_tag = NULL;
923			sc->irq_rid = 0;
924		}
925	}
926
927	/* That's all for INTx or MSI */
928	if (!(intr_flag & USING_MSIX) || err)
929		return (err);
930
931	for (i = 0; i < sc->msi_count - 1; i++) {
932		rid = i + 2;
933		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
934					     RF_SHAREABLE | RF_ACTIVE);
935		if (res == NULL) {
936			device_printf(sc->dev, "Cannot allocate interrupt "
937				      "for message %d\n", rid);
938			err = EINVAL;
939			break;
940		}
941
942		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
943				     NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
944		if (err) {
945			device_printf(sc->dev, "Cannot set up interrupt "
946				      "for message %d (%d)\n", rid, err);
947			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
948			break;
949		}
950
951		sc->msix_irq_rid[i] = rid;
952		sc->msix_irq_res[i] = res;
953		sc->msix_intr_tag[i] = tag;
954	}
955
956	if (err)
957		cxgb_teardown_interrupts(sc);
958
959	return (err);
960}
961
962
963static int
964cxgb_port_probe(device_t dev)
965{
966	struct port_info *p;
967	char buf[80];
968	const char *desc;
969
970	p = device_get_softc(dev);
971	desc = p->phy.desc;
972	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
973	device_set_desc_copy(dev, buf);
974	return (0);
975}
976
977
978static int
979cxgb_makedev(struct port_info *pi)
980{
981
982	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
983	    UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
984
985	if (pi->port_cdev == NULL)
986		return (ENOMEM);
987
988	pi->port_cdev->si_drv1 = (void *)pi;
989
990	return (0);
991}
992
993#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
994    IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
995    IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE)
996#define CXGB_CAP_ENABLE (CXGB_CAP & ~IFCAP_TSO6)
997
998static int
999cxgb_port_attach(device_t dev)
1000{
1001	struct port_info *p;
1002	struct ifnet *ifp;
1003	int err;
1004	struct adapter *sc;
1005
1006	p = device_get_softc(dev);
1007	sc = p->adapter;
1008	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1009	    device_get_unit(device_get_parent(dev)), p->port_id);
1010	PORT_LOCK_INIT(p, p->lockbuf);
1011
1012	/* Allocate an ifnet object and set it up */
1013	ifp = p->ifp = if_alloc(IFT_ETHER);
1014	if (ifp == NULL) {
1015		device_printf(dev, "Cannot allocate ifnet\n");
1016		return (ENOMEM);
1017	}
1018
1019	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1020	ifp->if_init = cxgb_init;
1021	ifp->if_softc = p;
1022	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1023	ifp->if_ioctl = cxgb_ioctl;
1024	ifp->if_start = cxgb_start;
1025
1026	ifp->if_snd.ifq_drv_maxlen = max(cxgb_snd_queue_len, ifqmaxlen);
1027	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
1028	IFQ_SET_READY(&ifp->if_snd);
1029
1030	ifp->if_capabilities = CXGB_CAP;
1031	ifp->if_capenable = CXGB_CAP_ENABLE;
1032	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO;
1033
1034	/*
1035	 * Disable TSO on 4-port - it isn't supported by the firmware.
1036	 */
1037	if (sc->params.nports > 2) {
1038		ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1039		ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1040		ifp->if_hwassist &= ~CSUM_TSO;
1041	}
1042
1043	ether_ifattach(ifp, p->hw_addr);
1044	ifp->if_transmit = cxgb_transmit;
1045	ifp->if_qflush = cxgb_qflush;
1046
1047#ifdef DEFAULT_JUMBO
1048	if (sc->params.nports <= 2)
1049		ifp->if_mtu = ETHERMTU_JUMBO;
1050#endif
1051	if ((err = cxgb_makedev(p)) != 0) {
1052		printf("makedev failed %d\n", err);
1053		return (err);
1054	}
1055
1056	/* Create a list of media supported by this port */
1057	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1058	    cxgb_media_status);
1059	cxgb_build_medialist(p);
1060
1061	t3_sge_init_port(p);
1062
1063	return (err);
1064}
1065
1066/*
1067 * cxgb_port_detach() is called via the device_detach methods when
1068 * cxgb_free() calls the bus_generic_detach.  It is responsible for
1069 * removing the device from the view of the kernel, i.e. from all
1070 * interfaces lists etc.  This routine is only called when the driver is
1071 * being unloaded, not when the link goes down.
1072 */
1073static int
1074cxgb_port_detach(device_t dev)
1075{
1076	struct port_info *p;
1077	struct adapter *sc;
1078	int i;
1079
1080	p = device_get_softc(dev);
1081	sc = p->adapter;
1082
1083	/* Tell cxgb_ioctl and if_init that the port is going away */
1084	ADAPTER_LOCK(sc);
1085	SET_DOOMED(p);
1086	wakeup(&sc->flags);
1087	while (IS_BUSY(sc))
1088		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1089	SET_BUSY(sc);
1090	ADAPTER_UNLOCK(sc);
1091
1092	if (p->port_cdev != NULL)
1093		destroy_dev(p->port_cdev);
1094
1095	cxgb_uninit_synchronized(p);
1096	ether_ifdetach(p->ifp);
1097
1098	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1099		struct sge_qset *qs = &sc->sge.qs[i];
1100		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1101
1102		callout_drain(&txq->txq_watchdog);
1103		callout_drain(&txq->txq_timer);
1104	}
1105
1106	PORT_LOCK_DEINIT(p);
1107	if_free(p->ifp);
1108	p->ifp = NULL;
1109
1110	ADAPTER_LOCK(sc);
1111	CLR_BUSY(sc);
1112	wakeup_one(&sc->flags);
1113	ADAPTER_UNLOCK(sc);
1114	return (0);
1115}
1116
1117void
1118t3_fatal_err(struct adapter *sc)
1119{
1120	u_int fw_status[4];
1121
1122	if (sc->flags & FULL_INIT_DONE) {
1123		t3_sge_stop(sc);
1124		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1125		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1126		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1127		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1128		t3_intr_disable(sc);
1129	}
1130	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1131	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1132		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1133		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1134}
1135
1136int
1137t3_os_find_pci_capability(adapter_t *sc, int cap)
1138{
1139	device_t dev;
1140	struct pci_devinfo *dinfo;
1141	pcicfgregs *cfg;
1142	uint32_t status;
1143	uint8_t ptr;
1144
1145	dev = sc->dev;
1146	dinfo = device_get_ivars(dev);
1147	cfg = &dinfo->cfg;
1148
1149	status = pci_read_config(dev, PCIR_STATUS, 2);
1150	if (!(status & PCIM_STATUS_CAPPRESENT))
1151		return (0);
1152
1153	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1154	case 0:
1155	case 1:
1156		ptr = PCIR_CAP_PTR;
1157		break;
1158	case 2:
1159		ptr = PCIR_CAP_PTR_2;
1160		break;
1161	default:
1162		return (0);
1163		break;
1164	}
1165	ptr = pci_read_config(dev, ptr, 1);
1166
1167	while (ptr != 0) {
1168		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1169			return (ptr);
1170		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1171	}
1172
1173	return (0);
1174}
1175
1176int
1177t3_os_pci_save_state(struct adapter *sc)
1178{
1179	device_t dev;
1180	struct pci_devinfo *dinfo;
1181
1182	dev = sc->dev;
1183	dinfo = device_get_ivars(dev);
1184
1185	pci_cfg_save(dev, dinfo, 0);
1186	return (0);
1187}
1188
1189int
1190t3_os_pci_restore_state(struct adapter *sc)
1191{
1192	device_t dev;
1193	struct pci_devinfo *dinfo;
1194
1195	dev = sc->dev;
1196	dinfo = device_get_ivars(dev);
1197
1198	pci_cfg_restore(dev, dinfo);
1199	return (0);
1200}
1201
1202/**
1203 *	t3_os_link_changed - handle link status changes
1204 *	@sc: the adapter associated with the link change
1205 *	@port_id: the port index whose link status has changed
1206 *	@link_status: the new status of the link
1207 *	@speed: the new speed setting
1208 *	@duplex: the new duplex setting
1209 *	@fc: the new flow-control setting
1210 *
1211 *	This is the OS-dependent handler for link status changes.  The OS
1212 *	neutral handler takes care of most of the processing for these events,
1213 *	then calls this handler for any OS-specific processing.
1214 */
1215void
1216t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1217     int duplex, int fc, int mac_was_reset)
1218{
1219	struct port_info *pi = &adapter->port[port_id];
1220	struct ifnet *ifp = pi->ifp;
1221
1222	/* no race with detach, so ifp should always be good */
1223	KASSERT(ifp, ("%s: if detached.", __func__));
1224
1225	/* Reapply mac settings if they were lost due to a reset */
1226	if (mac_was_reset) {
1227		PORT_LOCK(pi);
1228		cxgb_update_mac_settings(pi);
1229		PORT_UNLOCK(pi);
1230	}
1231
1232	if (link_status) {
1233		ifp->if_baudrate = IF_Mbps(speed);
1234		if_link_state_change(ifp, LINK_STATE_UP);
1235	} else
1236		if_link_state_change(ifp, LINK_STATE_DOWN);
1237}
1238
1239/**
1240 *	t3_os_phymod_changed - handle PHY module changes
1241 *	@phy: the PHY reporting the module change
1242 *	@mod_type: new module type
1243 *
1244 *	This is the OS-dependent handler for PHY module changes.  It is
1245 *	invoked when a PHY module is removed or inserted for any OS-specific
1246 *	processing.
1247 */
1248void t3_os_phymod_changed(struct adapter *adap, int port_id)
1249{
1250	static const char *mod_str[] = {
1251		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1252	};
1253	struct port_info *pi = &adap->port[port_id];
1254	int mod = pi->phy.modtype;
1255
1256	if (mod != pi->media.ifm_cur->ifm_data)
1257		cxgb_build_medialist(pi);
1258
1259	if (mod == phy_modtype_none)
1260		if_printf(pi->ifp, "PHY module unplugged\n");
1261	else {
1262		KASSERT(mod < ARRAY_SIZE(mod_str),
1263			("invalid PHY module type %d", mod));
1264		if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1265	}
1266}
1267
1268/*
1269 * Interrupt-context handler for external (PHY) interrupts.
1270 */
1271void
1272t3_os_ext_intr_handler(adapter_t *sc)
1273{
1274	if (cxgb_debug)
1275		printf("t3_os_ext_intr_handler\n");
1276	/*
1277	 * Schedule a task to handle external interrupts as they may be slow
1278	 * and we use a mutex to protect MDIO registers.  We disable PHY
1279	 * interrupts in the meantime and let the task reenable them when
1280	 * it's done.
1281	 */
1282	if (sc->slow_intr_mask) {
1283		ADAPTER_LOCK(sc);
1284		sc->slow_intr_mask &= ~F_T3DBG;
1285		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1286		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
1287		ADAPTER_UNLOCK(sc);
1288	}
1289}
1290
1291void
1292t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1293{
1294
1295	/*
1296	 * The ifnet might not be allocated before this gets called,
1297	 * as this is called early on in attach by t3_prep_adapter
1298	 * save the address off in the port structure
1299	 */
1300	if (cxgb_debug)
1301		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1302	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1303}
1304
1305/*
1306 * Programs the XGMAC based on the settings in the ifnet.  These settings
1307 * include MTU, MAC address, mcast addresses, etc.
1308 */
1309static void
1310cxgb_update_mac_settings(struct port_info *p)
1311{
1312	struct ifnet *ifp = p->ifp;
1313	struct t3_rx_mode rm;
1314	struct cmac *mac = &p->mac;
1315	int mtu, hwtagging;
1316
1317	PORT_LOCK_ASSERT_OWNED(p);
1318
1319	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1320
1321	mtu = ifp->if_mtu;
1322	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1323		mtu += ETHER_VLAN_ENCAP_LEN;
1324
1325	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1326
1327	t3_mac_set_mtu(mac, mtu);
1328	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1329	t3_mac_set_address(mac, 0, p->hw_addr);
1330	t3_init_rx_mode(&rm, p);
1331	t3_mac_set_rx_mode(mac, &rm);
1332}
1333
1334
1335static int
1336await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1337			      unsigned long n)
1338{
1339	int attempts = 5;
1340
1341	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1342		if (!--attempts)
1343			return (ETIMEDOUT);
1344		t3_os_sleep(10);
1345	}
1346	return 0;
1347}
1348
1349static int
1350init_tp_parity(struct adapter *adap)
1351{
1352	int i;
1353	struct mbuf *m;
1354	struct cpl_set_tcb_field *greq;
1355	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1356
1357	t3_tp_set_offload_mode(adap, 1);
1358
1359	for (i = 0; i < 16; i++) {
1360		struct cpl_smt_write_req *req;
1361
1362		m = m_gethdr(M_WAITOK, MT_DATA);
1363		req = mtod(m, struct cpl_smt_write_req *);
1364		m->m_len = m->m_pkthdr.len = sizeof(*req);
1365		memset(req, 0, sizeof(*req));
1366		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1367		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1368		req->iff = i;
1369		t3_mgmt_tx(adap, m);
1370	}
1371
1372	for (i = 0; i < 2048; i++) {
1373		struct cpl_l2t_write_req *req;
1374
1375		m = m_gethdr(M_WAITOK, MT_DATA);
1376		req = mtod(m, struct cpl_l2t_write_req *);
1377		m->m_len = m->m_pkthdr.len = sizeof(*req);
1378		memset(req, 0, sizeof(*req));
1379		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1380		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1381		req->params = htonl(V_L2T_W_IDX(i));
1382		t3_mgmt_tx(adap, m);
1383	}
1384
1385	for (i = 0; i < 2048; i++) {
1386		struct cpl_rte_write_req *req;
1387
1388		m = m_gethdr(M_WAITOK, MT_DATA);
1389		req = mtod(m, struct cpl_rte_write_req *);
1390		m->m_len = m->m_pkthdr.len = sizeof(*req);
1391		memset(req, 0, sizeof(*req));
1392		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1393		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1394		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1395		t3_mgmt_tx(adap, m);
1396	}
1397
1398	m = m_gethdr(M_WAITOK, MT_DATA);
1399	greq = mtod(m, struct cpl_set_tcb_field *);
1400	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1401	memset(greq, 0, sizeof(*greq));
1402	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1403	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1404	greq->mask = htobe64(1);
1405	t3_mgmt_tx(adap, m);
1406
1407	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1408	t3_tp_set_offload_mode(adap, 0);
1409	return (i);
1410}
1411
1412/**
1413 *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1414 *	@adap: the adapter
1415 *
1416 *	Sets up RSS to distribute packets to multiple receive queues.  We
1417 *	configure the RSS CPU lookup table to distribute to the number of HW
1418 *	receive queues, and the response queue lookup table to narrow that
1419 *	down to the response queues actually configured for each port.
1420 *	We always configure the RSS mapping for two ports since the mapping
1421 *	table has plenty of entries.
1422 */
1423static void
1424setup_rss(adapter_t *adap)
1425{
1426	int i;
1427	u_int nq[2];
1428	uint8_t cpus[SGE_QSETS + 1];
1429	uint16_t rspq_map[RSS_TABLE_SIZE];
1430
1431	for (i = 0; i < SGE_QSETS; ++i)
1432		cpus[i] = i;
1433	cpus[SGE_QSETS] = 0xff;
1434
1435	nq[0] = nq[1] = 0;
1436	for_each_port(adap, i) {
1437		const struct port_info *pi = adap2pinfo(adap, i);
1438
1439		nq[pi->tx_chan] += pi->nqsets;
1440	}
1441	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1442		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1443		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1444	}
1445
1446	/* Calculate the reverse RSS map table */
1447	for (i = 0; i < SGE_QSETS; ++i)
1448		adap->rrss_map[i] = 0xff;
1449	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1450		if (adap->rrss_map[rspq_map[i]] == 0xff)
1451			adap->rrss_map[rspq_map[i]] = i;
1452
1453	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1454		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1455	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1456	              cpus, rspq_map);
1457
1458}
1459
1460/*
1461 * Sends an mbuf to an offload queue driver
1462 * after dealing with any active network taps.
1463 */
1464static inline int
1465offload_tx(struct t3cdev *tdev, struct mbuf *m)
1466{
1467	int ret;
1468
1469	ret = t3_offload_tx(tdev, m);
1470	return (ret);
1471}
1472
1473static int
1474write_smt_entry(struct adapter *adapter, int idx)
1475{
1476	struct port_info *pi = &adapter->port[idx];
1477	struct cpl_smt_write_req *req;
1478	struct mbuf *m;
1479
1480	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1481		return (ENOMEM);
1482
1483	req = mtod(m, struct cpl_smt_write_req *);
1484	m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1485
1486	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1487	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1488	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1489	req->iff = idx;
1490	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1491	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1492
1493	m_set_priority(m, 1);
1494
1495	offload_tx(&adapter->tdev, m);
1496
1497	return (0);
1498}
1499
1500static int
1501init_smt(struct adapter *adapter)
1502{
1503	int i;
1504
1505	for_each_port(adapter, i)
1506		write_smt_entry(adapter, i);
1507	return 0;
1508}
1509
1510static void
1511init_port_mtus(adapter_t *adapter)
1512{
1513	unsigned int mtus = ETHERMTU | (ETHERMTU << 16);
1514
1515	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1516}
1517
1518static void
1519send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1520			      int hi, int port)
1521{
1522	struct mbuf *m;
1523	struct mngt_pktsched_wr *req;
1524
1525	m = m_gethdr(M_DONTWAIT, MT_DATA);
1526	if (m) {
1527		req = mtod(m, struct mngt_pktsched_wr *);
1528		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1529		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1530		req->sched = sched;
1531		req->idx = qidx;
1532		req->min = lo;
1533		req->max = hi;
1534		req->binding = port;
1535		m->m_len = m->m_pkthdr.len = sizeof(*req);
1536		t3_mgmt_tx(adap, m);
1537	}
1538}
1539
1540static void
1541bind_qsets(adapter_t *sc)
1542{
1543	int i, j;
1544
1545	for (i = 0; i < (sc)->params.nports; ++i) {
1546		const struct port_info *pi = adap2pinfo(sc, i);
1547
1548		for (j = 0; j < pi->nqsets; ++j) {
1549			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1550					  -1, pi->tx_chan);
1551
1552		}
1553	}
1554}
1555
1556static void
1557update_tpeeprom(struct adapter *adap)
1558{
1559	const struct firmware *tpeeprom;
1560
1561	uint32_t version;
1562	unsigned int major, minor;
1563	int ret, len;
1564	char rev, name[32];
1565
1566	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1567
1568	major = G_TP_VERSION_MAJOR(version);
1569	minor = G_TP_VERSION_MINOR(version);
1570	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1571		return;
1572
1573	rev = t3rev2char(adap);
1574	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1575
1576	tpeeprom = firmware_get(name);
1577	if (tpeeprom == NULL) {
1578		device_printf(adap->dev,
1579			      "could not load TP EEPROM: unable to load %s\n",
1580			      name);
1581		return;
1582	}
1583
1584	len = tpeeprom->datasize - 4;
1585
1586	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1587	if (ret)
1588		goto release_tpeeprom;
1589
1590	if (len != TP_SRAM_LEN) {
1591		device_printf(adap->dev,
1592			      "%s length is wrong len=%d expected=%d\n", name,
1593			      len, TP_SRAM_LEN);
1594		return;
1595	}
1596
1597	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1598	    TP_SRAM_OFFSET);
1599
1600	if (!ret) {
1601		device_printf(adap->dev,
1602			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1603			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1604	} else
1605		device_printf(adap->dev,
1606			      "Protocol SRAM image update in EEPROM failed\n");
1607
1608release_tpeeprom:
1609	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1610
1611	return;
1612}
1613
1614static int
1615update_tpsram(struct adapter *adap)
1616{
1617	const struct firmware *tpsram;
1618	int ret;
1619	char rev, name[32];
1620
1621	rev = t3rev2char(adap);
1622	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1623
1624	update_tpeeprom(adap);
1625
1626	tpsram = firmware_get(name);
1627	if (tpsram == NULL){
1628		device_printf(adap->dev, "could not load TP SRAM\n");
1629		return (EINVAL);
1630	} else
1631		device_printf(adap->dev, "updating TP SRAM\n");
1632
1633	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1634	if (ret)
1635		goto release_tpsram;
1636
1637	ret = t3_set_proto_sram(adap, tpsram->data);
1638	if (ret)
1639		device_printf(adap->dev, "loading protocol SRAM failed\n");
1640
1641release_tpsram:
1642	firmware_put(tpsram, FIRMWARE_UNLOAD);
1643
1644	return ret;
1645}
1646
1647/**
1648 *	cxgb_up - enable the adapter
1649 *	@adap: adapter being enabled
1650 *
1651 *	Called when the first port is enabled, this function performs the
1652 *	actions necessary to make an adapter operational, such as completing
1653 *	the initialization of HW modules, and enabling interrupts.
1654 */
1655static int
1656cxgb_up(struct adapter *sc)
1657{
1658	int err = 0;
1659	unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1660
1661	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1662					   __func__, sc->open_device_map));
1663
1664	if ((sc->flags & FULL_INIT_DONE) == 0) {
1665
1666		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1667
1668		if ((sc->flags & FW_UPTODATE) == 0)
1669			if ((err = upgrade_fw(sc)))
1670				goto out;
1671
1672		if ((sc->flags & TPS_UPTODATE) == 0)
1673			if ((err = update_tpsram(sc)))
1674				goto out;
1675
1676		if (is_offload(sc) && nfilters != 0) {
1677			sc->params.mc5.nservers = 0;
1678
1679			if (nfilters < 0)
1680				sc->params.mc5.nfilters = mxf;
1681			else
1682				sc->params.mc5.nfilters = min(nfilters, mxf);
1683		}
1684
1685		err = t3_init_hw(sc, 0);
1686		if (err)
1687			goto out;
1688
1689		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1690		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1691
1692		err = setup_sge_qsets(sc);
1693		if (err)
1694			goto out;
1695
1696		alloc_filters(sc);
1697		setup_rss(sc);
1698
1699		t3_intr_clear(sc);
1700		err = cxgb_setup_interrupts(sc);
1701		if (err)
1702			goto out;
1703
1704		t3_add_configured_sysctls(sc);
1705		sc->flags |= FULL_INIT_DONE;
1706	}
1707
1708	t3_intr_clear(sc);
1709	t3_sge_start(sc);
1710	t3_intr_enable(sc);
1711
1712	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1713	    is_offload(sc) && init_tp_parity(sc) == 0)
1714		sc->flags |= TP_PARITY_INIT;
1715
1716	if (sc->flags & TP_PARITY_INIT) {
1717		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1718		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1719	}
1720
1721	if (!(sc->flags & QUEUES_BOUND)) {
1722		bind_qsets(sc);
1723		setup_hw_filters(sc);
1724		sc->flags |= QUEUES_BOUND;
1725	}
1726
1727	t3_sge_reset_adapter(sc);
1728out:
1729	return (err);
1730}
1731
1732/*
1733 * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1734 * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1735 * during controller_detach, not here.
1736 */
1737static void
1738cxgb_down(struct adapter *sc)
1739{
1740	t3_sge_stop(sc);
1741	t3_intr_disable(sc);
1742}
1743
1744static int
1745offload_open(struct port_info *pi)
1746{
1747	struct adapter *sc = pi->adapter;
1748	struct t3cdev *tdev = &sc->tdev;
1749
1750	setbit(&sc->open_device_map, OFFLOAD_DEVMAP_BIT);
1751
1752	t3_tp_set_offload_mode(sc, 1);
1753	tdev->lldev = pi->ifp;
1754	init_port_mtus(sc);
1755	t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
1756		     sc->params.rev == 0 ?  sc->port[0].ifp->if_mtu : 0xffff);
1757	init_smt(sc);
1758	cxgb_add_clients(tdev);
1759
1760	return (0);
1761}
1762
1763static int
1764offload_close(struct t3cdev *tdev)
1765{
1766	struct adapter *adapter = tdev2adap(tdev);
1767
1768	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1769		return (0);
1770
1771	/* Call back all registered clients */
1772	cxgb_remove_clients(tdev);
1773
1774	tdev->lldev = NULL;
1775	cxgb_set_dummy_ops(tdev);
1776	t3_tp_set_offload_mode(adapter, 0);
1777
1778	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1779
1780	return (0);
1781}
1782
1783/*
1784 * if_init for cxgb ports.
1785 */
1786static void
1787cxgb_init(void *arg)
1788{
1789	struct port_info *p = arg;
1790	struct adapter *sc = p->adapter;
1791
1792	ADAPTER_LOCK(sc);
1793	cxgb_init_locked(p); /* releases adapter lock */
1794	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1795}
1796
1797static int
1798cxgb_init_locked(struct port_info *p)
1799{
1800	struct adapter *sc = p->adapter;
1801	struct ifnet *ifp = p->ifp;
1802	struct cmac *mac = &p->mac;
1803	int i, rc = 0, may_sleep = 0;
1804
1805	ADAPTER_LOCK_ASSERT_OWNED(sc);
1806
1807	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1808		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1809			rc = EINTR;
1810			goto done;
1811		}
1812	}
1813	if (IS_DOOMED(p)) {
1814		rc = ENXIO;
1815		goto done;
1816	}
1817	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1818
1819	/*
1820	 * The code that runs during one-time adapter initialization can sleep
1821	 * so it's important not to hold any locks across it.
1822	 */
1823	may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1824
1825	if (may_sleep) {
1826		SET_BUSY(sc);
1827		ADAPTER_UNLOCK(sc);
1828	}
1829
1830	if (sc->open_device_map == 0) {
1831		if ((rc = cxgb_up(sc)) != 0)
1832			goto done;
1833
1834		if (is_offload(sc) && !ofld_disable && offload_open(p))
1835			log(LOG_WARNING,
1836			    "Could not initialize offload capabilities\n");
1837	}
1838
1839	PORT_LOCK(p);
1840	if (isset(&sc->open_device_map, p->port_id) &&
1841	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1842		PORT_UNLOCK(p);
1843		goto done;
1844	}
1845	t3_port_intr_enable(sc, p->port_id);
1846	if (!mac->multiport)
1847		t3_mac_init(mac);
1848	cxgb_update_mac_settings(p);
1849	t3_link_start(&p->phy, mac, &p->link_config);
1850	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1851	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1852	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1853	PORT_UNLOCK(p);
1854
1855	t3_link_changed(sc, p->port_id);
1856
1857	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1858		struct sge_qset *qs = &sc->sge.qs[i];
1859		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1860
1861		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1862				 txq->txq_watchdog.c_cpu);
1863	}
1864
1865	/* all ok */
1866	setbit(&sc->open_device_map, p->port_id);
1867
1868done:
1869	if (may_sleep) {
1870		ADAPTER_LOCK(sc);
1871		KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1872		CLR_BUSY(sc);
1873		wakeup_one(&sc->flags);
1874	}
1875	ADAPTER_UNLOCK(sc);
1876	return (rc);
1877}
1878
1879static int
1880cxgb_uninit_locked(struct port_info *p)
1881{
1882	struct adapter *sc = p->adapter;
1883	int rc;
1884
1885	ADAPTER_LOCK_ASSERT_OWNED(sc);
1886
1887	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1888		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1889			rc = EINTR;
1890			goto done;
1891		}
1892	}
1893	if (IS_DOOMED(p)) {
1894		rc = ENXIO;
1895		goto done;
1896	}
1897	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1898	SET_BUSY(sc);
1899	ADAPTER_UNLOCK(sc);
1900
1901	rc = cxgb_uninit_synchronized(p);
1902
1903	ADAPTER_LOCK(sc);
1904	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1905	CLR_BUSY(sc);
1906	wakeup_one(&sc->flags);
1907done:
1908	ADAPTER_UNLOCK(sc);
1909	return (rc);
1910}
1911
1912/*
1913 * Called on "ifconfig down", and from port_detach
1914 */
1915static int
1916cxgb_uninit_synchronized(struct port_info *pi)
1917{
1918	struct adapter *sc = pi->adapter;
1919	struct ifnet *ifp = pi->ifp;
1920
1921	/*
1922	 * taskqueue_drain may cause a deadlock if the adapter lock is held.
1923	 */
1924	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1925
1926	/*
1927	 * Clear this port's bit from the open device map, and then drain all
1928	 * the tasks that can access/manipulate this port's port_info or ifp.
1929	 * We disable this port's interrupts here and so the the slow/ext
1930	 * interrupt tasks won't be enqueued.  The tick task will continue to
1931	 * be enqueued every second but the runs after this drain will not see
1932	 * this port in the open device map.
1933	 *
1934	 * A well behaved task must take open_device_map into account and ignore
1935	 * ports that are not open.
1936	 */
1937	clrbit(&sc->open_device_map, pi->port_id);
1938	t3_port_intr_disable(sc, pi->port_id);
1939	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1940	taskqueue_drain(sc->tq, &sc->ext_intr_task);
1941	taskqueue_drain(sc->tq, &sc->tick_task);
1942
1943	PORT_LOCK(pi);
1944	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1945
1946	/* disable pause frames */
1947	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1948
1949	/* Reset RX FIFO HWM */
1950	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1951			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1952
1953	DELAY(100 * 1000);
1954
1955	/* Wait for TXFIFO empty */
1956	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1957			F_TXFIFO_EMPTY, 1, 20, 5);
1958
1959	DELAY(100 * 1000);
1960	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1961
1962
1963	pi->phy.ops->power_down(&pi->phy, 1);
1964
1965	PORT_UNLOCK(pi);
1966
1967	pi->link_config.link_ok = 0;
1968	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1969
1970	if ((sc->open_device_map & PORT_MASK) == 0)
1971		offload_close(&sc->tdev);
1972
1973	if (sc->open_device_map == 0)
1974		cxgb_down(pi->adapter);
1975
1976	return (0);
1977}
1978
1979/*
1980 * Mark lro enabled or disabled in all qsets for this port
1981 */
1982static int
1983cxgb_set_lro(struct port_info *p, int enabled)
1984{
1985	int i;
1986	struct adapter *adp = p->adapter;
1987	struct sge_qset *q;
1988
1989	for (i = 0; i < p->nqsets; i++) {
1990		q = &adp->sge.qs[p->first_qset + i];
1991		q->lro.enabled = (enabled != 0);
1992	}
1993	return (0);
1994}
1995
1996static int
1997cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1998{
1999	struct port_info *p = ifp->if_softc;
2000	struct adapter *sc = p->adapter;
2001	struct ifreq *ifr = (struct ifreq *)data;
2002	int flags, error = 0, mtu;
2003	uint32_t mask;
2004
2005	switch (command) {
2006	case SIOCSIFMTU:
2007		ADAPTER_LOCK(sc);
2008		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2009		if (error) {
2010fail:
2011			ADAPTER_UNLOCK(sc);
2012			return (error);
2013		}
2014
2015		mtu = ifr->ifr_mtu;
2016		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
2017			error = EINVAL;
2018		} else {
2019			ifp->if_mtu = mtu;
2020			PORT_LOCK(p);
2021			cxgb_update_mac_settings(p);
2022			PORT_UNLOCK(p);
2023		}
2024		ADAPTER_UNLOCK(sc);
2025		break;
2026	case SIOCSIFFLAGS:
2027		ADAPTER_LOCK(sc);
2028		if (IS_DOOMED(p)) {
2029			error = ENXIO;
2030			goto fail;
2031		}
2032		if (ifp->if_flags & IFF_UP) {
2033			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2034				flags = p->if_flags;
2035				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
2036				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
2037					if (IS_BUSY(sc)) {
2038						error = EBUSY;
2039						goto fail;
2040					}
2041					PORT_LOCK(p);
2042					cxgb_update_mac_settings(p);
2043					PORT_UNLOCK(p);
2044				}
2045				ADAPTER_UNLOCK(sc);
2046			} else
2047				error = cxgb_init_locked(p);
2048			p->if_flags = ifp->if_flags;
2049		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2050			error = cxgb_uninit_locked(p);
2051		else
2052			ADAPTER_UNLOCK(sc);
2053
2054		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
2055		break;
2056	case SIOCADDMULTI:
2057	case SIOCDELMULTI:
2058		ADAPTER_LOCK(sc);
2059		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2060		if (error)
2061			goto fail;
2062
2063		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2064			PORT_LOCK(p);
2065			cxgb_update_mac_settings(p);
2066			PORT_UNLOCK(p);
2067		}
2068		ADAPTER_UNLOCK(sc);
2069
2070		break;
2071	case SIOCSIFCAP:
2072		ADAPTER_LOCK(sc);
2073		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2074		if (error)
2075			goto fail;
2076
2077		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2078		if (mask & IFCAP_TXCSUM) {
2079			ifp->if_capenable ^= IFCAP_TXCSUM;
2080			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2081
2082			if (IFCAP_TSO & ifp->if_capenable &&
2083			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2084				ifp->if_capenable &= ~IFCAP_TSO;
2085				ifp->if_hwassist &= ~CSUM_TSO;
2086				if_printf(ifp,
2087				    "tso disabled due to -txcsum.\n");
2088			}
2089		}
2090		if (mask & IFCAP_RXCSUM)
2091			ifp->if_capenable ^= IFCAP_RXCSUM;
2092		if (mask & IFCAP_TSO4) {
2093			ifp->if_capenable ^= IFCAP_TSO4;
2094
2095			if (IFCAP_TSO & ifp->if_capenable) {
2096				if (IFCAP_TXCSUM & ifp->if_capenable)
2097					ifp->if_hwassist |= CSUM_TSO;
2098				else {
2099					ifp->if_capenable &= ~IFCAP_TSO;
2100					ifp->if_hwassist &= ~CSUM_TSO;
2101					if_printf(ifp,
2102					    "enable txcsum first.\n");
2103					error = EAGAIN;
2104				}
2105			} else
2106				ifp->if_hwassist &= ~CSUM_TSO;
2107		}
2108		if (mask & IFCAP_LRO) {
2109			ifp->if_capenable ^= IFCAP_LRO;
2110
2111			/* Safe to do this even if cxgb_up not called yet */
2112			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2113		}
2114		if (mask & IFCAP_VLAN_HWTAGGING) {
2115			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2116			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2117				PORT_LOCK(p);
2118				cxgb_update_mac_settings(p);
2119				PORT_UNLOCK(p);
2120			}
2121		}
2122		if (mask & IFCAP_VLAN_MTU) {
2123			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2124			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2125				PORT_LOCK(p);
2126				cxgb_update_mac_settings(p);
2127				PORT_UNLOCK(p);
2128			}
2129		}
2130		if (mask & IFCAP_VLAN_HWTSO)
2131			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2132		if (mask & IFCAP_VLAN_HWCSUM)
2133			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2134
2135#ifdef VLAN_CAPABILITIES
2136		VLAN_CAPABILITIES(ifp);
2137#endif
2138		ADAPTER_UNLOCK(sc);
2139		break;
2140	case SIOCSIFMEDIA:
2141	case SIOCGIFMEDIA:
2142		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2143		break;
2144	default:
2145		error = ether_ioctl(ifp, command, data);
2146	}
2147
2148	return (error);
2149}
2150
2151static int
2152cxgb_media_change(struct ifnet *ifp)
2153{
2154	return (EOPNOTSUPP);
2155}
2156
2157/*
2158 * Translates phy->modtype to the correct Ethernet media subtype.
2159 */
2160static int
2161cxgb_ifm_type(int mod)
2162{
2163	switch (mod) {
2164	case phy_modtype_sr:
2165		return (IFM_10G_SR);
2166	case phy_modtype_lr:
2167		return (IFM_10G_LR);
2168	case phy_modtype_lrm:
2169		return (IFM_10G_LRM);
2170	case phy_modtype_twinax:
2171		return (IFM_10G_TWINAX);
2172	case phy_modtype_twinax_long:
2173		return (IFM_10G_TWINAX_LONG);
2174	case phy_modtype_none:
2175		return (IFM_NONE);
2176	case phy_modtype_unknown:
2177		return (IFM_UNKNOWN);
2178	}
2179
2180	KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2181	return (IFM_UNKNOWN);
2182}
2183
2184/*
2185 * Rebuilds the ifmedia list for this port, and sets the current media.
2186 */
2187static void
2188cxgb_build_medialist(struct port_info *p)
2189{
2190	struct cphy *phy = &p->phy;
2191	struct ifmedia *media = &p->media;
2192	int mod = phy->modtype;
2193	int m = IFM_ETHER | IFM_FDX;
2194
2195	PORT_LOCK(p);
2196
2197	ifmedia_removeall(media);
2198	if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2199		/* Copper (RJ45) */
2200
2201		if (phy->caps & SUPPORTED_10000baseT_Full)
2202			ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2203
2204		if (phy->caps & SUPPORTED_1000baseT_Full)
2205			ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2206
2207		if (phy->caps & SUPPORTED_100baseT_Full)
2208			ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2209
2210		if (phy->caps & SUPPORTED_10baseT_Full)
2211			ifmedia_add(media, m | IFM_10_T, mod, NULL);
2212
2213		ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2214		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2215
2216	} else if (phy->caps & SUPPORTED_TP) {
2217		/* Copper (CX4) */
2218
2219		KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2220			("%s: unexpected cap 0x%x", __func__, phy->caps));
2221
2222		ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2223		ifmedia_set(media, m | IFM_10G_CX4);
2224
2225	} else if (phy->caps & SUPPORTED_FIBRE &&
2226		   phy->caps & SUPPORTED_10000baseT_Full) {
2227		/* 10G optical (but includes SFP+ twinax) */
2228
2229		m |= cxgb_ifm_type(mod);
2230		if (IFM_SUBTYPE(m) == IFM_NONE)
2231			m &= ~IFM_FDX;
2232
2233		ifmedia_add(media, m, mod, NULL);
2234		ifmedia_set(media, m);
2235
2236	} else if (phy->caps & SUPPORTED_FIBRE &&
2237		   phy->caps & SUPPORTED_1000baseT_Full) {
2238		/* 1G optical */
2239
2240		/* XXX: Lie and claim to be SX, could actually be any 1G-X */
2241		ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2242		ifmedia_set(media, m | IFM_1000_SX);
2243
2244	} else {
2245		KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2246			    phy->caps));
2247	}
2248
2249	PORT_UNLOCK(p);
2250}
2251
2252static void
2253cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2254{
2255	struct port_info *p = ifp->if_softc;
2256	struct ifmedia_entry *cur = p->media.ifm_cur;
2257	int speed = p->link_config.speed;
2258
2259	if (cur->ifm_data != p->phy.modtype) {
2260		cxgb_build_medialist(p);
2261		cur = p->media.ifm_cur;
2262	}
2263
2264	ifmr->ifm_status = IFM_AVALID;
2265	if (!p->link_config.link_ok)
2266		return;
2267
2268	ifmr->ifm_status |= IFM_ACTIVE;
2269
2270	/*
2271	 * active and current will differ iff current media is autoselect.  That
2272	 * can happen only for copper RJ45.
2273	 */
2274	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2275		return;
2276	KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2277		("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2278
2279	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2280	if (speed == SPEED_10000)
2281		ifmr->ifm_active |= IFM_10G_T;
2282	else if (speed == SPEED_1000)
2283		ifmr->ifm_active |= IFM_1000_T;
2284	else if (speed == SPEED_100)
2285		ifmr->ifm_active |= IFM_100_TX;
2286	else if (speed == SPEED_10)
2287		ifmr->ifm_active |= IFM_10_T;
2288	else
2289		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2290			    speed));
2291}
2292
2293static void
2294cxgb_async_intr(void *data)
2295{
2296	adapter_t *sc = data;
2297
2298	if (cxgb_debug)
2299		device_printf(sc->dev, "cxgb_async_intr\n");
2300	/*
2301	 * May need to sleep - defer to taskqueue
2302	 */
2303	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2304}
2305
2306static void
2307cxgb_ext_intr_handler(void *arg, int count)
2308{
2309	adapter_t *sc = (adapter_t *)arg;
2310
2311	if (cxgb_debug)
2312		printf("cxgb_ext_intr_handler\n");
2313
2314	t3_phy_intr_handler(sc);
2315
2316	/* Now reenable external interrupts */
2317	ADAPTER_LOCK(sc);
2318	if (sc->slow_intr_mask) {
2319		sc->slow_intr_mask |= F_T3DBG;
2320		t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
2321		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
2322	}
2323	ADAPTER_UNLOCK(sc);
2324}
2325
2326static inline int
2327link_poll_needed(struct port_info *p)
2328{
2329	struct cphy *phy = &p->phy;
2330
2331	if (phy->caps & POLL_LINK_1ST_TIME) {
2332		p->phy.caps &= ~POLL_LINK_1ST_TIME;
2333		return (1);
2334	}
2335
2336	return (p->link_fault || !(phy->caps & SUPPORTED_LINK_IRQ));
2337}
2338
2339static void
2340check_link_status(adapter_t *sc)
2341{
2342	int i;
2343
2344	for (i = 0; i < (sc)->params.nports; ++i) {
2345		struct port_info *p = &sc->port[i];
2346
2347		if (!isset(&sc->open_device_map, p->port_id))
2348			continue;
2349
2350		if (link_poll_needed(p))
2351			t3_link_changed(sc, i);
2352	}
2353}
2354
2355static void
2356check_t3b2_mac(struct adapter *sc)
2357{
2358	int i;
2359
2360	if (sc->flags & CXGB_SHUTDOWN)
2361		return;
2362
2363	for_each_port(sc, i) {
2364		struct port_info *p = &sc->port[i];
2365		int status;
2366#ifdef INVARIANTS
2367		struct ifnet *ifp = p->ifp;
2368#endif
2369
2370		if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2371		    !p->link_config.link_ok)
2372			continue;
2373
2374		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2375			("%s: state mismatch (drv_flags %x, device_map %x)",
2376			 __func__, ifp->if_drv_flags, sc->open_device_map));
2377
2378		PORT_LOCK(p);
2379		status = t3b2_mac_watchdog_task(&p->mac);
2380		if (status == 1)
2381			p->mac.stats.num_toggled++;
2382		else if (status == 2) {
2383			struct cmac *mac = &p->mac;
2384
2385			cxgb_update_mac_settings(p);
2386			t3_link_start(&p->phy, mac, &p->link_config);
2387			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2388			t3_port_intr_enable(sc, p->port_id);
2389			p->mac.stats.num_resets++;
2390		}
2391		PORT_UNLOCK(p);
2392	}
2393}
2394
2395static void
2396cxgb_tick(void *arg)
2397{
2398	adapter_t *sc = (adapter_t *)arg;
2399
2400	if (sc->flags & CXGB_SHUTDOWN)
2401		return;
2402
2403	taskqueue_enqueue(sc->tq, &sc->tick_task);
2404	callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
2405}
2406
2407static void
2408cxgb_tick_handler(void *arg, int count)
2409{
2410	adapter_t *sc = (adapter_t *)arg;
2411	const struct adapter_params *p = &sc->params;
2412	int i;
2413	uint32_t cause, reset;
2414
2415	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2416		return;
2417
2418	check_link_status(sc);
2419
2420	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2421		check_t3b2_mac(sc);
2422
2423	cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2424	if (cause) {
2425		struct sge_qset *qs = &sc->sge.qs[0];
2426		uint32_t mask, v;
2427
2428		v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2429
2430		mask = 1;
2431		for (i = 0; i < SGE_QSETS; i++) {
2432			if (v & mask)
2433				qs[i].rspq.starved++;
2434			mask <<= 1;
2435		}
2436
2437		mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2438
2439		for (i = 0; i < SGE_QSETS * 2; i++) {
2440			if (v & mask) {
2441				qs[i / 2].fl[i % 2].empty++;
2442			}
2443			mask <<= 1;
2444		}
2445
2446		/* clear */
2447		t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2448		t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2449	}
2450
2451	for (i = 0; i < sc->params.nports; i++) {
2452		struct port_info *pi = &sc->port[i];
2453		struct ifnet *ifp = pi->ifp;
2454		struct cmac *mac = &pi->mac;
2455		struct mac_stats *mstats = &mac->stats;
2456		int drops, j;
2457
2458		if (!isset(&sc->open_device_map, pi->port_id))
2459			continue;
2460
2461		PORT_LOCK(pi);
2462		t3_mac_update_stats(mac);
2463		PORT_UNLOCK(pi);
2464
2465		ifp->if_opackets = mstats->tx_frames;
2466		ifp->if_ipackets = mstats->rx_frames;
2467		ifp->if_obytes = mstats->tx_octets;
2468		ifp->if_ibytes = mstats->rx_octets;
2469		ifp->if_omcasts = mstats->tx_mcast_frames;
2470		ifp->if_imcasts = mstats->rx_mcast_frames;
2471		ifp->if_collisions = mstats->tx_total_collisions;
2472		ifp->if_iqdrops = mstats->rx_cong_drops;
2473
2474		drops = 0;
2475		for (j = pi->first_qset; j < pi->first_qset + pi->nqsets; j++)
2476			drops += sc->sge.qs[j].txq[TXQ_ETH].txq_mr->br_drops;
2477		ifp->if_snd.ifq_drops = drops;
2478
2479		ifp->if_oerrors =
2480		    mstats->tx_excess_collisions +
2481		    mstats->tx_underrun +
2482		    mstats->tx_len_errs +
2483		    mstats->tx_mac_internal_errs +
2484		    mstats->tx_excess_deferral +
2485		    mstats->tx_fcs_errs;
2486		ifp->if_ierrors =
2487		    mstats->rx_jabber +
2488		    mstats->rx_data_errs +
2489		    mstats->rx_sequence_errs +
2490		    mstats->rx_runt +
2491		    mstats->rx_too_long +
2492		    mstats->rx_mac_internal_errs +
2493		    mstats->rx_short +
2494		    mstats->rx_fcs_errs;
2495
2496		if (mac->multiport)
2497			continue;
2498
2499		/* Count rx fifo overflows, once per second */
2500		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2501		reset = 0;
2502		if (cause & F_RXFIFO_OVERFLOW) {
2503			mac->stats.rx_fifo_ovfl++;
2504			reset |= F_RXFIFO_OVERFLOW;
2505		}
2506		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2507	}
2508}
2509
2510static void
2511touch_bars(device_t dev)
2512{
2513	/*
2514	 * Don't enable yet
2515	 */
2516#if !defined(__LP64__) && 0
2517	u32 v;
2518
2519	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2520	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2521	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2522	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2523	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2524	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2525#endif
2526}
2527
2528static int
2529set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2530{
2531	uint8_t *buf;
2532	int err = 0;
2533	u32 aligned_offset, aligned_len, *p;
2534	struct adapter *adapter = pi->adapter;
2535
2536
2537	aligned_offset = offset & ~3;
2538	aligned_len = (len + (offset & 3) + 3) & ~3;
2539
2540	if (aligned_offset != offset || aligned_len != len) {
2541		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2542		if (!buf)
2543			return (ENOMEM);
2544		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2545		if (!err && aligned_len > 4)
2546			err = t3_seeprom_read(adapter,
2547					      aligned_offset + aligned_len - 4,
2548					      (u32 *)&buf[aligned_len - 4]);
2549		if (err)
2550			goto out;
2551		memcpy(buf + (offset & 3), data, len);
2552	} else
2553		buf = (uint8_t *)(uintptr_t)data;
2554
2555	err = t3_seeprom_wp(adapter, 0);
2556	if (err)
2557		goto out;
2558
2559	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2560		err = t3_seeprom_write(adapter, aligned_offset, *p);
2561		aligned_offset += 4;
2562	}
2563
2564	if (!err)
2565		err = t3_seeprom_wp(adapter, 1);
2566out:
2567	if (buf != data)
2568		free(buf, M_DEVBUF);
2569	return err;
2570}
2571
2572
2573static int
2574in_range(int val, int lo, int hi)
2575{
2576	return val < 0 || (val <= hi && val >= lo);
2577}
2578
2579static int
2580cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2581{
2582       return (0);
2583}
2584
2585static int
2586cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2587{
2588       return (0);
2589}
2590
2591static int
2592cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2593    int fflag, struct thread *td)
2594{
2595	int mmd, error = 0;
2596	struct port_info *pi = dev->si_drv1;
2597	adapter_t *sc = pi->adapter;
2598
2599#ifdef PRIV_SUPPORTED
2600	if (priv_check(td, PRIV_DRIVER)) {
2601		if (cxgb_debug)
2602			printf("user does not have access to privileged ioctls\n");
2603		return (EPERM);
2604	}
2605#else
2606	if (suser(td)) {
2607		if (cxgb_debug)
2608			printf("user does not have access to privileged ioctls\n");
2609		return (EPERM);
2610	}
2611#endif
2612
2613	switch (cmd) {
2614	case CHELSIO_GET_MIIREG: {
2615		uint32_t val;
2616		struct cphy *phy = &pi->phy;
2617		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2618
2619		if (!phy->mdio_read)
2620			return (EOPNOTSUPP);
2621		if (is_10G(sc)) {
2622			mmd = mid->phy_id >> 8;
2623			if (!mmd)
2624				mmd = MDIO_DEV_PCS;
2625			else if (mmd > MDIO_DEV_VEND2)
2626				return (EINVAL);
2627
2628			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2629					     mid->reg_num, &val);
2630		} else
2631		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2632					     mid->reg_num & 0x1f, &val);
2633		if (error == 0)
2634			mid->val_out = val;
2635		break;
2636	}
2637	case CHELSIO_SET_MIIREG: {
2638		struct cphy *phy = &pi->phy;
2639		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2640
2641		if (!phy->mdio_write)
2642			return (EOPNOTSUPP);
2643		if (is_10G(sc)) {
2644			mmd = mid->phy_id >> 8;
2645			if (!mmd)
2646				mmd = MDIO_DEV_PCS;
2647			else if (mmd > MDIO_DEV_VEND2)
2648				return (EINVAL);
2649
2650			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2651					      mmd, mid->reg_num, mid->val_in);
2652		} else
2653			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2654					      mid->reg_num & 0x1f,
2655					      mid->val_in);
2656		break;
2657	}
2658	case CHELSIO_SETREG: {
2659		struct ch_reg *edata = (struct ch_reg *)data;
2660		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2661			return (EFAULT);
2662		t3_write_reg(sc, edata->addr, edata->val);
2663		break;
2664	}
2665	case CHELSIO_GETREG: {
2666		struct ch_reg *edata = (struct ch_reg *)data;
2667		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2668			return (EFAULT);
2669		edata->val = t3_read_reg(sc, edata->addr);
2670		break;
2671	}
2672	case CHELSIO_GET_SGE_CONTEXT: {
2673		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2674		mtx_lock_spin(&sc->sge.reg_lock);
2675		switch (ecntxt->cntxt_type) {
2676		case CNTXT_TYPE_EGRESS:
2677			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2678			    ecntxt->data);
2679			break;
2680		case CNTXT_TYPE_FL:
2681			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2682			    ecntxt->data);
2683			break;
2684		case CNTXT_TYPE_RSP:
2685			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2686			    ecntxt->data);
2687			break;
2688		case CNTXT_TYPE_CQ:
2689			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2690			    ecntxt->data);
2691			break;
2692		default:
2693			error = EINVAL;
2694			break;
2695		}
2696		mtx_unlock_spin(&sc->sge.reg_lock);
2697		break;
2698	}
2699	case CHELSIO_GET_SGE_DESC: {
2700		struct ch_desc *edesc = (struct ch_desc *)data;
2701		int ret;
2702		if (edesc->queue_num >= SGE_QSETS * 6)
2703			return (EINVAL);
2704		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2705		    edesc->queue_num % 6, edesc->idx, edesc->data);
2706		if (ret < 0)
2707			return (EINVAL);
2708		edesc->size = ret;
2709		break;
2710	}
2711	case CHELSIO_GET_QSET_PARAMS: {
2712		struct qset_params *q;
2713		struct ch_qset_params *t = (struct ch_qset_params *)data;
2714		int q1 = pi->first_qset;
2715		int nqsets = pi->nqsets;
2716		int i;
2717
2718		if (t->qset_idx >= nqsets)
2719			return EINVAL;
2720
2721		i = q1 + t->qset_idx;
2722		q = &sc->params.sge.qset[i];
2723		t->rspq_size   = q->rspq_size;
2724		t->txq_size[0] = q->txq_size[0];
2725		t->txq_size[1] = q->txq_size[1];
2726		t->txq_size[2] = q->txq_size[2];
2727		t->fl_size[0]  = q->fl_size;
2728		t->fl_size[1]  = q->jumbo_size;
2729		t->polling     = q->polling;
2730		t->lro         = q->lro;
2731		t->intr_lat    = q->coalesce_usecs;
2732		t->cong_thres  = q->cong_thres;
2733		t->qnum        = i;
2734
2735		if ((sc->flags & FULL_INIT_DONE) == 0)
2736			t->vector = 0;
2737		else if (sc->flags & USING_MSIX)
2738			t->vector = rman_get_start(sc->msix_irq_res[i]);
2739		else
2740			t->vector = rman_get_start(sc->irq_res);
2741
2742		break;
2743	}
2744	case CHELSIO_GET_QSET_NUM: {
2745		struct ch_reg *edata = (struct ch_reg *)data;
2746		edata->val = pi->nqsets;
2747		break;
2748	}
2749	case CHELSIO_LOAD_FW: {
2750		uint8_t *fw_data;
2751		uint32_t vers;
2752		struct ch_mem_range *t = (struct ch_mem_range *)data;
2753
2754		/*
2755		 * You're allowed to load a firmware only before FULL_INIT_DONE
2756		 *
2757		 * FW_UPTODATE is also set so the rest of the initialization
2758		 * will not overwrite what was loaded here.  This gives you the
2759		 * flexibility to load any firmware (and maybe shoot yourself in
2760		 * the foot).
2761		 */
2762
2763		ADAPTER_LOCK(sc);
2764		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2765			ADAPTER_UNLOCK(sc);
2766			return (EBUSY);
2767		}
2768
2769		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2770		if (!fw_data)
2771			error = ENOMEM;
2772		else
2773			error = copyin(t->buf, fw_data, t->len);
2774
2775		if (!error)
2776			error = -t3_load_fw(sc, fw_data, t->len);
2777
2778		if (t3_get_fw_version(sc, &vers) == 0) {
2779			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2780			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2781			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2782		}
2783
2784		if (!error)
2785			sc->flags |= FW_UPTODATE;
2786
2787		free(fw_data, M_DEVBUF);
2788		ADAPTER_UNLOCK(sc);
2789		break;
2790	}
2791	case CHELSIO_LOAD_BOOT: {
2792		uint8_t *boot_data;
2793		struct ch_mem_range *t = (struct ch_mem_range *)data;
2794
2795		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2796		if (!boot_data)
2797			return ENOMEM;
2798
2799		error = copyin(t->buf, boot_data, t->len);
2800		if (!error)
2801			error = -t3_load_boot(sc, boot_data, t->len);
2802
2803		free(boot_data, M_DEVBUF);
2804		break;
2805	}
2806	case CHELSIO_GET_PM: {
2807		struct ch_pm *m = (struct ch_pm *)data;
2808		struct tp_params *p = &sc->params.tp;
2809
2810		if (!is_offload(sc))
2811			return (EOPNOTSUPP);
2812
2813		m->tx_pg_sz = p->tx_pg_size;
2814		m->tx_num_pg = p->tx_num_pgs;
2815		m->rx_pg_sz  = p->rx_pg_size;
2816		m->rx_num_pg = p->rx_num_pgs;
2817		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2818
2819		break;
2820	}
2821	case CHELSIO_SET_PM: {
2822		struct ch_pm *m = (struct ch_pm *)data;
2823		struct tp_params *p = &sc->params.tp;
2824
2825		if (!is_offload(sc))
2826			return (EOPNOTSUPP);
2827		if (sc->flags & FULL_INIT_DONE)
2828			return (EBUSY);
2829
2830		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2831		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2832			return (EINVAL);	/* not power of 2 */
2833		if (!(m->rx_pg_sz & 0x14000))
2834			return (EINVAL);	/* not 16KB or 64KB */
2835		if (!(m->tx_pg_sz & 0x1554000))
2836			return (EINVAL);
2837		if (m->tx_num_pg == -1)
2838			m->tx_num_pg = p->tx_num_pgs;
2839		if (m->rx_num_pg == -1)
2840			m->rx_num_pg = p->rx_num_pgs;
2841		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2842			return (EINVAL);
2843		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2844		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2845			return (EINVAL);
2846
2847		p->rx_pg_size = m->rx_pg_sz;
2848		p->tx_pg_size = m->tx_pg_sz;
2849		p->rx_num_pgs = m->rx_num_pg;
2850		p->tx_num_pgs = m->tx_num_pg;
2851		break;
2852	}
2853	case CHELSIO_SETMTUTAB: {
2854		struct ch_mtus *m = (struct ch_mtus *)data;
2855		int i;
2856
2857		if (!is_offload(sc))
2858			return (EOPNOTSUPP);
2859		if (offload_running(sc))
2860			return (EBUSY);
2861		if (m->nmtus != NMTUS)
2862			return (EINVAL);
2863		if (m->mtus[0] < 81)         /* accommodate SACK */
2864			return (EINVAL);
2865
2866		/*
2867		 * MTUs must be in ascending order
2868		 */
2869		for (i = 1; i < NMTUS; ++i)
2870			if (m->mtus[i] < m->mtus[i - 1])
2871				return (EINVAL);
2872
2873		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2874		break;
2875	}
2876	case CHELSIO_GETMTUTAB: {
2877		struct ch_mtus *m = (struct ch_mtus *)data;
2878
2879		if (!is_offload(sc))
2880			return (EOPNOTSUPP);
2881
2882		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2883		m->nmtus = NMTUS;
2884		break;
2885	}
2886	case CHELSIO_GET_MEM: {
2887		struct ch_mem_range *t = (struct ch_mem_range *)data;
2888		struct mc7 *mem;
2889		uint8_t *useraddr;
2890		u64 buf[32];
2891
2892		/*
2893		 * Use these to avoid modifying len/addr in the the return
2894		 * struct
2895		 */
2896		uint32_t len = t->len, addr = t->addr;
2897
2898		if (!is_offload(sc))
2899			return (EOPNOTSUPP);
2900		if (!(sc->flags & FULL_INIT_DONE))
2901			return (EIO);         /* need the memory controllers */
2902		if ((addr & 0x7) || (len & 0x7))
2903			return (EINVAL);
2904		if (t->mem_id == MEM_CM)
2905			mem = &sc->cm;
2906		else if (t->mem_id == MEM_PMRX)
2907			mem = &sc->pmrx;
2908		else if (t->mem_id == MEM_PMTX)
2909			mem = &sc->pmtx;
2910		else
2911			return (EINVAL);
2912
2913		/*
2914		 * Version scheme:
2915		 * bits 0..9: chip version
2916		 * bits 10..15: chip revision
2917		 */
2918		t->version = 3 | (sc->params.rev << 10);
2919
2920		/*
2921		 * Read 256 bytes at a time as len can be large and we don't
2922		 * want to use huge intermediate buffers.
2923		 */
2924		useraddr = (uint8_t *)t->buf;
2925		while (len) {
2926			unsigned int chunk = min(len, sizeof(buf));
2927
2928			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2929			if (error)
2930				return (-error);
2931			if (copyout(buf, useraddr, chunk))
2932				return (EFAULT);
2933			useraddr += chunk;
2934			addr += chunk;
2935			len -= chunk;
2936		}
2937		break;
2938	}
2939	case CHELSIO_READ_TCAM_WORD: {
2940		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2941
2942		if (!is_offload(sc))
2943			return (EOPNOTSUPP);
2944		if (!(sc->flags & FULL_INIT_DONE))
2945			return (EIO);         /* need MC5 */
2946		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2947		break;
2948	}
2949	case CHELSIO_SET_TRACE_FILTER: {
2950		struct ch_trace *t = (struct ch_trace *)data;
2951		const struct trace_params *tp;
2952
2953		tp = (const struct trace_params *)&t->sip;
2954		if (t->config_tx)
2955			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2956					       t->trace_tx);
2957		if (t->config_rx)
2958			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2959					       t->trace_rx);
2960		break;
2961	}
2962	case CHELSIO_SET_PKTSCHED: {
2963		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2964		if (sc->open_device_map == 0)
2965			return (EAGAIN);
2966		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2967		    p->binding);
2968		break;
2969	}
2970	case CHELSIO_IFCONF_GETREGS: {
2971		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2972		int reglen = cxgb_get_regs_len();
2973		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2974		if (buf == NULL) {
2975			return (ENOMEM);
2976		}
2977		if (regs->len > reglen)
2978			regs->len = reglen;
2979		else if (regs->len < reglen)
2980			error = ENOBUFS;
2981
2982		if (!error) {
2983			cxgb_get_regs(sc, regs, buf);
2984			error = copyout(buf, regs->data, reglen);
2985		}
2986		free(buf, M_DEVBUF);
2987
2988		break;
2989	}
2990	case CHELSIO_SET_HW_SCHED: {
2991		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2992		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2993
2994		if ((sc->flags & FULL_INIT_DONE) == 0)
2995			return (EAGAIN);       /* need TP to be initialized */
2996		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2997		    !in_range(t->channel, 0, 1) ||
2998		    !in_range(t->kbps, 0, 10000000) ||
2999		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
3000		    !in_range(t->flow_ipg, 0,
3001			      dack_ticks_to_usec(sc, 0x7ff)))
3002			return (EINVAL);
3003
3004		if (t->kbps >= 0) {
3005			error = t3_config_sched(sc, t->kbps, t->sched);
3006			if (error < 0)
3007				return (-error);
3008		}
3009		if (t->class_ipg >= 0)
3010			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
3011		if (t->flow_ipg >= 0) {
3012			t->flow_ipg *= 1000;     /* us -> ns */
3013			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
3014		}
3015		if (t->mode >= 0) {
3016			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
3017
3018			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
3019					 bit, t->mode ? bit : 0);
3020		}
3021		if (t->channel >= 0)
3022			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
3023					 1 << t->sched, t->channel << t->sched);
3024		break;
3025	}
3026	case CHELSIO_GET_EEPROM: {
3027		int i;
3028		struct ch_eeprom *e = (struct ch_eeprom *)data;
3029		uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
3030
3031		if (buf == NULL) {
3032			return (ENOMEM);
3033		}
3034		e->magic = EEPROM_MAGIC;
3035		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
3036			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
3037
3038		if (!error)
3039			error = copyout(buf + e->offset, e->data, e->len);
3040
3041		free(buf, M_DEVBUF);
3042		break;
3043	}
3044	case CHELSIO_CLEAR_STATS: {
3045		if (!(sc->flags & FULL_INIT_DONE))
3046			return EAGAIN;
3047
3048		PORT_LOCK(pi);
3049		t3_mac_update_stats(&pi->mac);
3050		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3051		PORT_UNLOCK(pi);
3052		break;
3053	}
3054	case CHELSIO_GET_UP_LA: {
3055		struct ch_up_la *la = (struct ch_up_la *)data;
3056		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3057		if (buf == NULL) {
3058			return (ENOMEM);
3059		}
3060		if (la->bufsize < LA_BUFSIZE)
3061			error = ENOBUFS;
3062
3063		if (!error)
3064			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3065					      &la->bufsize, buf);
3066		if (!error)
3067			error = copyout(buf, la->data, la->bufsize);
3068
3069		free(buf, M_DEVBUF);
3070		break;
3071	}
3072	case CHELSIO_GET_UP_IOQS: {
3073		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3074		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3075		uint32_t *v;
3076
3077		if (buf == NULL) {
3078			return (ENOMEM);
3079		}
3080		if (ioqs->bufsize < IOQS_BUFSIZE)
3081			error = ENOBUFS;
3082
3083		if (!error)
3084			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3085
3086		if (!error) {
3087			v = (uint32_t *)buf;
3088
3089			ioqs->bufsize -= 4 * sizeof(uint32_t);
3090			ioqs->ioq_rx_enable = *v++;
3091			ioqs->ioq_tx_enable = *v++;
3092			ioqs->ioq_rx_status = *v++;
3093			ioqs->ioq_tx_status = *v++;
3094
3095			error = copyout(v, ioqs->data, ioqs->bufsize);
3096		}
3097
3098		free(buf, M_DEVBUF);
3099		break;
3100	}
3101	case CHELSIO_SET_FILTER: {
3102		struct ch_filter *f = (struct ch_filter *)data;;
3103		struct filter_info *p;
3104		unsigned int nfilters = sc->params.mc5.nfilters;
3105
3106		if (!is_offload(sc))
3107			return (EOPNOTSUPP);	/* No TCAM */
3108		if (!(sc->flags & FULL_INIT_DONE))
3109			return (EAGAIN);	/* mc5 not setup yet */
3110		if (nfilters == 0)
3111			return (EBUSY);		/* TOE will use TCAM */
3112
3113		/* sanity checks */
3114		if (f->filter_id >= nfilters ||
3115		    (f->val.dip && f->mask.dip != 0xffffffff) ||
3116		    (f->val.sport && f->mask.sport != 0xffff) ||
3117		    (f->val.dport && f->mask.dport != 0xffff) ||
3118		    (f->val.vlan && f->mask.vlan != 0xfff) ||
3119		    (f->val.vlan_prio &&
3120			f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3121		    (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3122		    f->qset >= SGE_QSETS ||
3123		    sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3124			return (EINVAL);
3125
3126		/* Was allocated with M_WAITOK */
3127		KASSERT(sc->filters, ("filter table NULL\n"));
3128
3129		p = &sc->filters[f->filter_id];
3130		if (p->locked)
3131			return (EPERM);
3132
3133		bzero(p, sizeof(*p));
3134		p->sip = f->val.sip;
3135		p->sip_mask = f->mask.sip;
3136		p->dip = f->val.dip;
3137		p->sport = f->val.sport;
3138		p->dport = f->val.dport;
3139		p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3140		p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3141		    FILTER_NO_VLAN_PRI;
3142		p->mac_hit = f->mac_hit;
3143		p->mac_vld = f->mac_addr_idx != 0xffff;
3144		p->mac_idx = f->mac_addr_idx;
3145		p->pkt_type = f->proto;
3146		p->report_filter_id = f->want_filter_id;
3147		p->pass = f->pass;
3148		p->rss = f->rss;
3149		p->qset = f->qset;
3150
3151		error = set_filter(sc, f->filter_id, p);
3152		if (error == 0)
3153			p->valid = 1;
3154		break;
3155	}
3156	case CHELSIO_DEL_FILTER: {
3157		struct ch_filter *f = (struct ch_filter *)data;
3158		struct filter_info *p;
3159		unsigned int nfilters = sc->params.mc5.nfilters;
3160
3161		if (!is_offload(sc))
3162			return (EOPNOTSUPP);
3163		if (!(sc->flags & FULL_INIT_DONE))
3164			return (EAGAIN);
3165		if (nfilters == 0 || sc->filters == NULL)
3166			return (EINVAL);
3167		if (f->filter_id >= nfilters)
3168		       return (EINVAL);
3169
3170		p = &sc->filters[f->filter_id];
3171		if (p->locked)
3172			return (EPERM);
3173		if (!p->valid)
3174			return (EFAULT); /* Read "Bad address" as "Bad index" */
3175
3176		bzero(p, sizeof(*p));
3177		p->sip = p->sip_mask = 0xffffffff;
3178		p->vlan = 0xfff;
3179		p->vlan_prio = FILTER_NO_VLAN_PRI;
3180		p->pkt_type = 1;
3181		error = set_filter(sc, f->filter_id, p);
3182		break;
3183	}
3184	case CHELSIO_GET_FILTER: {
3185		struct ch_filter *f = (struct ch_filter *)data;
3186		struct filter_info *p;
3187		unsigned int i, nfilters = sc->params.mc5.nfilters;
3188
3189		if (!is_offload(sc))
3190			return (EOPNOTSUPP);
3191		if (!(sc->flags & FULL_INIT_DONE))
3192			return (EAGAIN);
3193		if (nfilters == 0 || sc->filters == NULL)
3194			return (EINVAL);
3195
3196		i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3197		for (; i < nfilters; i++) {
3198			p = &sc->filters[i];
3199			if (!p->valid)
3200				continue;
3201
3202			bzero(f, sizeof(*f));
3203
3204			f->filter_id = i;
3205			f->val.sip = p->sip;
3206			f->mask.sip = p->sip_mask;
3207			f->val.dip = p->dip;
3208			f->mask.dip = p->dip ? 0xffffffff : 0;
3209			f->val.sport = p->sport;
3210			f->mask.sport = p->sport ? 0xffff : 0;
3211			f->val.dport = p->dport;
3212			f->mask.dport = p->dport ? 0xffff : 0;
3213			f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3214			f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3215			f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3216			    0 : p->vlan_prio;
3217			f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3218			    0 : FILTER_NO_VLAN_PRI;
3219			f->mac_hit = p->mac_hit;
3220			f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3221			f->proto = p->pkt_type;
3222			f->want_filter_id = p->report_filter_id;
3223			f->pass = p->pass;
3224			f->rss = p->rss;
3225			f->qset = p->qset;
3226
3227			break;
3228		}
3229
3230		if (i == nfilters)
3231			f->filter_id = 0xffffffff;
3232		break;
3233	}
3234	default:
3235		return (EOPNOTSUPP);
3236		break;
3237	}
3238
3239	return (error);
3240}
3241
3242static __inline void
3243reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3244    unsigned int end)
3245{
3246	uint32_t *p = (uint32_t *)(buf + start);
3247
3248	for ( ; start <= end; start += sizeof(uint32_t))
3249		*p++ = t3_read_reg(ap, start);
3250}
3251
3252#define T3_REGMAP_SIZE (3 * 1024)
3253static int
3254cxgb_get_regs_len(void)
3255{
3256	return T3_REGMAP_SIZE;
3257}
3258
3259static void
3260cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3261{
3262
3263	/*
3264	 * Version scheme:
3265	 * bits 0..9: chip version
3266	 * bits 10..15: chip revision
3267	 * bit 31: set for PCIe cards
3268	 */
3269	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3270
3271	/*
3272	 * We skip the MAC statistics registers because they are clear-on-read.
3273	 * Also reading multi-register stats would need to synchronize with the
3274	 * periodic mac stats accumulation.  Hard to justify the complexity.
3275	 */
3276	memset(buf, 0, cxgb_get_regs_len());
3277	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3278	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3279	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3280	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3281	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3282	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3283		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3284	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3285		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3286}
3287
3288static int
3289alloc_filters(struct adapter *sc)
3290{
3291	struct filter_info *p;
3292	unsigned int nfilters = sc->params.mc5.nfilters;
3293
3294	if (nfilters == 0)
3295		return (0);
3296
3297	p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3298	sc->filters = p;
3299
3300	p = &sc->filters[nfilters - 1];
3301	p->vlan = 0xfff;
3302	p->vlan_prio = FILTER_NO_VLAN_PRI;
3303	p->pass = p->rss = p->valid = p->locked = 1;
3304
3305	return (0);
3306}
3307
3308static int
3309setup_hw_filters(struct adapter *sc)
3310{
3311	int i, rc;
3312	unsigned int nfilters = sc->params.mc5.nfilters;
3313
3314	if (!sc->filters)
3315		return (0);
3316
3317	t3_enable_filters(sc);
3318
3319	for (i = rc = 0; i < nfilters && !rc; i++) {
3320		if (sc->filters[i].locked)
3321			rc = set_filter(sc, i, &sc->filters[i]);
3322	}
3323
3324	return (rc);
3325}
3326
3327static int
3328set_filter(struct adapter *sc, int id, const struct filter_info *f)
3329{
3330	int len;
3331	struct mbuf *m;
3332	struct ulp_txpkt *txpkt;
3333	struct work_request_hdr *wr;
3334	struct cpl_pass_open_req *oreq;
3335	struct cpl_set_tcb_field *sreq;
3336
3337	len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3338	KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3339
3340	id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3341	      sc->params.mc5.nfilters;
3342
3343	m = m_gethdr(M_WAITOK, MT_DATA);
3344	m->m_len = m->m_pkthdr.len = len;
3345	bzero(mtod(m, char *), len);
3346
3347	wr = mtod(m, struct work_request_hdr *);
3348	wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3349
3350	oreq = (struct cpl_pass_open_req *)(wr + 1);
3351	txpkt = (struct ulp_txpkt *)oreq;
3352	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3353	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3354	OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3355	oreq->local_port = htons(f->dport);
3356	oreq->peer_port = htons(f->sport);
3357	oreq->local_ip = htonl(f->dip);
3358	oreq->peer_ip = htonl(f->sip);
3359	oreq->peer_netmask = htonl(f->sip_mask);
3360	oreq->opt0h = 0;
3361	oreq->opt0l = htonl(F_NO_OFFLOAD);
3362	oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3363			 V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3364			 V_VLAN_PRI(f->vlan_prio >> 1) |
3365			 V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3366			 V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3367			 V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3368
3369	sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3370	set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3371			  (f->report_filter_id << 15) | (1 << 23) |
3372			  ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3373	set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3374	t3_mgmt_tx(sc, m);
3375
3376	if (f->pass && !f->rss) {
3377		len = sizeof(*sreq);
3378		m = m_gethdr(M_WAITOK, MT_DATA);
3379		m->m_len = m->m_pkthdr.len = len;
3380		bzero(mtod(m, char *), len);
3381		sreq = mtod(m, struct cpl_set_tcb_field *);
3382		sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3383		mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3384				 (u64)sc->rrss_map[f->qset] << 19);
3385		t3_mgmt_tx(sc, m);
3386	}
3387	return 0;
3388}
3389
3390static inline void
3391mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3392    unsigned int word, u64 mask, u64 val)
3393{
3394	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3395	req->reply = V_NO_REPLY(1);
3396	req->cpu_idx = 0;
3397	req->word = htons(word);
3398	req->mask = htobe64(mask);
3399	req->val = htobe64(val);
3400}
3401
3402static inline void
3403set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3404    unsigned int word, u64 mask, u64 val)
3405{
3406	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3407
3408	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3409	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3410	mk_set_tcb_field(req, tid, word, mask, val);
3411}
3412