cxgb_main.c revision 206109
1/**************************************************************************
2
3Copyright (c) 2007-2009, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13    contributors may be used to endorse or promote products derived from
14    this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_main.c 206109 2010-04-02 17:50:52Z np $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/bus.h>
37#include <sys/module.h>
38#include <sys/pciio.h>
39#include <sys/conf.h>
40#include <machine/bus.h>
41#include <machine/resource.h>
42#include <sys/bus_dma.h>
43#include <sys/ktr.h>
44#include <sys/rman.h>
45#include <sys/ioccom.h>
46#include <sys/mbuf.h>
47#include <sys/linker.h>
48#include <sys/firmware.h>
49#include <sys/socket.h>
50#include <sys/sockio.h>
51#include <sys/smp.h>
52#include <sys/sysctl.h>
53#include <sys/syslog.h>
54#include <sys/queue.h>
55#include <sys/taskqueue.h>
56#include <sys/proc.h>
57
58#include <net/bpf.h>
59#include <net/ethernet.h>
60#include <net/if.h>
61#include <net/if_arp.h>
62#include <net/if_dl.h>
63#include <net/if_media.h>
64#include <net/if_types.h>
65#include <net/if_vlan_var.h>
66
67#include <netinet/in_systm.h>
68#include <netinet/in.h>
69#include <netinet/if_ether.h>
70#include <netinet/ip.h>
71#include <netinet/ip.h>
72#include <netinet/tcp.h>
73#include <netinet/udp.h>
74
75#include <dev/pci/pcireg.h>
76#include <dev/pci/pcivar.h>
77#include <dev/pci/pci_private.h>
78
79#include <cxgb_include.h>
80
81#ifdef PRIV_SUPPORTED
82#include <sys/priv.h>
83#endif
84
85static int cxgb_setup_interrupts(adapter_t *);
86static void cxgb_teardown_interrupts(adapter_t *);
87static void cxgb_init(void *);
88static int cxgb_init_locked(struct port_info *);
89static int cxgb_uninit_locked(struct port_info *);
90static int cxgb_uninit_synchronized(struct port_info *);
91static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92static int cxgb_media_change(struct ifnet *);
93static int cxgb_ifm_type(int);
94static void cxgb_build_medialist(struct port_info *);
95static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
96static int setup_sge_qsets(adapter_t *);
97static void cxgb_async_intr(void *);
98static void cxgb_ext_intr_handler(void *, int);
99static void cxgb_tick_handler(void *, int);
100static void cxgb_tick(void *);
101static void setup_rss(adapter_t *sc);
102
103/* Attachment glue for the PCI controller end of the device.  Each port of
104 * the device is attached separately, as defined later.
105 */
106static int cxgb_controller_probe(device_t);
107static int cxgb_controller_attach(device_t);
108static int cxgb_controller_detach(device_t);
109static void cxgb_free(struct adapter *);
110static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
111    unsigned int end);
112static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
113static int cxgb_get_regs_len(void);
114static int offload_open(struct port_info *pi);
115static void touch_bars(device_t dev);
116static int offload_close(struct t3cdev *tdev);
117static void cxgb_update_mac_settings(struct port_info *p);
118
119static device_method_t cxgb_controller_methods[] = {
120	DEVMETHOD(device_probe,		cxgb_controller_probe),
121	DEVMETHOD(device_attach,	cxgb_controller_attach),
122	DEVMETHOD(device_detach,	cxgb_controller_detach),
123
124	/* bus interface */
125	DEVMETHOD(bus_print_child,	bus_generic_print_child),
126	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
127
128	{ 0, 0 }
129};
130
131static driver_t cxgb_controller_driver = {
132	"cxgbc",
133	cxgb_controller_methods,
134	sizeof(struct adapter)
135};
136
137static devclass_t	cxgb_controller_devclass;
138DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
139
140/*
141 * Attachment glue for the ports.  Attachment is done directly to the
142 * controller device.
143 */
144static int cxgb_port_probe(device_t);
145static int cxgb_port_attach(device_t);
146static int cxgb_port_detach(device_t);
147
148static device_method_t cxgb_port_methods[] = {
149	DEVMETHOD(device_probe,		cxgb_port_probe),
150	DEVMETHOD(device_attach,	cxgb_port_attach),
151	DEVMETHOD(device_detach,	cxgb_port_detach),
152	{ 0, 0 }
153};
154
155static driver_t cxgb_port_driver = {
156	"cxgb",
157	cxgb_port_methods,
158	0
159};
160
161static d_ioctl_t cxgb_extension_ioctl;
162static d_open_t cxgb_extension_open;
163static d_close_t cxgb_extension_close;
164
165static struct cdevsw cxgb_cdevsw = {
166       .d_version =    D_VERSION,
167       .d_flags =      0,
168       .d_open =       cxgb_extension_open,
169       .d_close =      cxgb_extension_close,
170       .d_ioctl =      cxgb_extension_ioctl,
171       .d_name =       "cxgb",
172};
173
174static devclass_t	cxgb_port_devclass;
175DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
176
177/*
178 * The driver uses the best interrupt scheme available on a platform in the
179 * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
180 * of these schemes the driver may consider as follows:
181 *
182 * msi = 2: choose from among all three options
183 * msi = 1 : only consider MSI and pin interrupts
184 * msi = 0: force pin interrupts
185 */
186static int msi_allowed = 2;
187
188TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
189SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
190SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
191    "MSI-X, MSI, INTx selector");
192
193/*
194 * The driver enables offload as a default.
195 * To disable it, use ofld_disable = 1.
196 */
197static int ofld_disable = 0;
198TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
199SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
200    "disable ULP offload");
201
202/*
203 * The driver uses an auto-queue algorithm by default.
204 * To disable it and force a single queue-set per port, use multiq = 0
205 */
206static int multiq = 1;
207TUNABLE_INT("hw.cxgb.multiq", &multiq);
208SYSCTL_UINT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
209    "use min(ncpus/ports, 8) queue-sets per port");
210
211/*
212 * By default the driver will not update the firmware unless
213 * it was compiled against a newer version
214 *
215 */
216static int force_fw_update = 0;
217TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
218SYSCTL_UINT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
219    "update firmware even if up to date");
220
221int cxgb_use_16k_clusters = -1;
222TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
223SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
224    &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
225
226/*
227 * Tune the size of the output queue.
228 */
229int cxgb_snd_queue_len = IFQ_MAXLEN;
230TUNABLE_INT("hw.cxgb.snd_queue_len", &cxgb_snd_queue_len);
231SYSCTL_UINT(_hw_cxgb, OID_AUTO, snd_queue_len, CTLFLAG_RDTUN,
232    &cxgb_snd_queue_len, 0, "send queue size ");
233
234
235enum {
236	MAX_TXQ_ENTRIES      = 16384,
237	MAX_CTRL_TXQ_ENTRIES = 1024,
238	MAX_RSPQ_ENTRIES     = 16384,
239	MAX_RX_BUFFERS       = 16384,
240	MAX_RX_JUMBO_BUFFERS = 16384,
241	MIN_TXQ_ENTRIES      = 4,
242	MIN_CTRL_TXQ_ENTRIES = 4,
243	MIN_RSPQ_ENTRIES     = 32,
244	MIN_FL_ENTRIES       = 32,
245	MIN_FL_JUMBO_ENTRIES = 32
246};
247
248struct filter_info {
249	u32 sip;
250	u32 sip_mask;
251	u32 dip;
252	u16 sport;
253	u16 dport;
254	u32 vlan:12;
255	u32 vlan_prio:3;
256	u32 mac_hit:1;
257	u32 mac_idx:4;
258	u32 mac_vld:1;
259	u32 pkt_type:2;
260	u32 report_filter_id:1;
261	u32 pass:1;
262	u32 rss:1;
263	u32 qset:3;
264	u32 locked:1;
265	u32 valid:1;
266};
267
268enum { FILTER_NO_VLAN_PRI = 7 };
269
270#define EEPROM_MAGIC 0x38E2F10C
271
272#define PORT_MASK ((1 << MAX_NPORTS) - 1)
273
274/* Table for probing the cards.  The desc field isn't actually used */
275struct cxgb_ident {
276	uint16_t	vendor;
277	uint16_t	device;
278	int		index;
279	char		*desc;
280} cxgb_identifiers[] = {
281	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
282	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
283	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
284	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
285	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
286	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
287	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
288	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
289	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
290	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
291	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
292	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
293	{PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
294	{PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
295	{0, 0, 0, NULL}
296};
297
298static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
299
300
301static __inline char
302t3rev2char(struct adapter *adapter)
303{
304	char rev = 'z';
305
306	switch(adapter->params.rev) {
307	case T3_REV_A:
308		rev = 'a';
309		break;
310	case T3_REV_B:
311	case T3_REV_B2:
312		rev = 'b';
313		break;
314	case T3_REV_C:
315		rev = 'c';
316		break;
317	}
318	return rev;
319}
320
321static struct cxgb_ident *
322cxgb_get_ident(device_t dev)
323{
324	struct cxgb_ident *id;
325
326	for (id = cxgb_identifiers; id->desc != NULL; id++) {
327		if ((id->vendor == pci_get_vendor(dev)) &&
328		    (id->device == pci_get_device(dev))) {
329			return (id);
330		}
331	}
332	return (NULL);
333}
334
335static const struct adapter_info *
336cxgb_get_adapter_info(device_t dev)
337{
338	struct cxgb_ident *id;
339	const struct adapter_info *ai;
340
341	id = cxgb_get_ident(dev);
342	if (id == NULL)
343		return (NULL);
344
345	ai = t3_get_adapter_info(id->index);
346
347	return (ai);
348}
349
350static int
351cxgb_controller_probe(device_t dev)
352{
353	const struct adapter_info *ai;
354	char *ports, buf[80];
355	int nports;
356
357	ai = cxgb_get_adapter_info(dev);
358	if (ai == NULL)
359		return (ENXIO);
360
361	nports = ai->nports0 + ai->nports1;
362	if (nports == 1)
363		ports = "port";
364	else
365		ports = "ports";
366
367	snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
368	device_set_desc_copy(dev, buf);
369	return (BUS_PROBE_DEFAULT);
370}
371
372#define FW_FNAME "cxgb_t3fw"
373#define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
374#define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
375
376static int
377upgrade_fw(adapter_t *sc)
378{
379	const struct firmware *fw;
380	int status;
381	u32 vers;
382
383	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
384		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
385		return (ENOENT);
386	} else
387		device_printf(sc->dev, "installing firmware on card\n");
388	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
389
390	if (status != 0) {
391		device_printf(sc->dev, "failed to install firmware: %d\n",
392		    status);
393	} else {
394		t3_get_fw_version(sc, &vers);
395		snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
396		    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
397		    G_FW_VERSION_MICRO(vers));
398	}
399
400	firmware_put(fw, FIRMWARE_UNLOAD);
401
402	return (status);
403}
404
405/*
406 * The cxgb_controller_attach function is responsible for the initial
407 * bringup of the device.  Its responsibilities include:
408 *
409 *  1. Determine if the device supports MSI or MSI-X.
410 *  2. Allocate bus resources so that we can access the Base Address Register
411 *  3. Create and initialize mutexes for the controller and its control
412 *     logic such as SGE and MDIO.
413 *  4. Call hardware specific setup routine for the adapter as a whole.
414 *  5. Allocate the BAR for doing MSI-X.
415 *  6. Setup the line interrupt iff MSI-X is not supported.
416 *  7. Create the driver's taskq.
417 *  8. Start one task queue service thread.
418 *  9. Check if the firmware and SRAM are up-to-date.  They will be
419 *     auto-updated later (before FULL_INIT_DONE), if required.
420 * 10. Create a child device for each MAC (port)
421 * 11. Initialize T3 private state.
422 * 12. Trigger the LED
423 * 13. Setup offload iff supported.
424 * 14. Reset/restart the tick callout.
425 * 15. Attach sysctls
426 *
427 * NOTE: Any modification or deviation from this list MUST be reflected in
428 * the above comment.  Failure to do so will result in problems on various
429 * error conditions including link flapping.
430 */
431static int
432cxgb_controller_attach(device_t dev)
433{
434	device_t child;
435	const struct adapter_info *ai;
436	struct adapter *sc;
437	int i, error = 0;
438	uint32_t vers;
439	int port_qsets = 1;
440	int msi_needed, reg;
441	char buf[80];
442
443	sc = device_get_softc(dev);
444	sc->dev = dev;
445	sc->msi_count = 0;
446	ai = cxgb_get_adapter_info(dev);
447
448	/* find the PCIe link width and set max read request to 4KB*/
449	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
450		uint16_t lnk, pectl;
451		lnk = pci_read_config(dev, reg + 0x12, 2);
452		sc->link_width = (lnk >> 4) & 0x3f;
453
454		pectl = pci_read_config(dev, reg + 0x8, 2);
455		pectl = (pectl & ~0x7000) | (5 << 12);
456		pci_write_config(dev, reg + 0x8, pectl, 2);
457	}
458
459	if (sc->link_width != 0 && sc->link_width <= 4 &&
460	    (ai->nports0 + ai->nports1) <= 2) {
461		device_printf(sc->dev,
462		    "PCIe x%d Link, expect reduced performance\n",
463		    sc->link_width);
464	}
465
466	touch_bars(dev);
467	pci_enable_busmaster(dev);
468	/*
469	 * Allocate the registers and make them available to the driver.
470	 * The registers that we care about for NIC mode are in BAR 0
471	 */
472	sc->regs_rid = PCIR_BAR(0);
473	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
474	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
475		device_printf(dev, "Cannot allocate BAR region 0\n");
476		return (ENXIO);
477	}
478	sc->udbs_rid = PCIR_BAR(2);
479	sc->udbs_res = NULL;
480	if (is_offload(sc) &&
481	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
482		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
483		device_printf(dev, "Cannot allocate BAR region 1\n");
484		error = ENXIO;
485		goto out;
486	}
487
488	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
489	    device_get_unit(dev));
490	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
491
492	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
493	    device_get_unit(dev));
494	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
495	    device_get_unit(dev));
496	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
497	    device_get_unit(dev));
498
499	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
500	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
501	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
502
503	sc->bt = rman_get_bustag(sc->regs_res);
504	sc->bh = rman_get_bushandle(sc->regs_res);
505	sc->mmio_len = rman_get_size(sc->regs_res);
506
507	for (i = 0; i < MAX_NPORTS; i++)
508		sc->port[i].adapter = sc;
509
510	if (t3_prep_adapter(sc, ai, 1) < 0) {
511		printf("prep adapter failed\n");
512		error = ENODEV;
513		goto out;
514	}
515        /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
516	 * enough messages for the queue sets.  If that fails, try falling
517	 * back to MSI.  If that fails, then try falling back to the legacy
518	 * interrupt pin model.
519	 */
520	sc->msix_regs_rid = 0x20;
521	if ((msi_allowed >= 2) &&
522	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
523	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
524
525		if (multiq)
526			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
527		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
528
529		if (pci_msix_count(dev) == 0 ||
530		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
531		    sc->msi_count != msi_needed) {
532			device_printf(dev, "alloc msix failed - "
533				      "msi_count=%d, msi_needed=%d, err=%d; "
534				      "will try MSI\n", sc->msi_count,
535				      msi_needed, error);
536			sc->msi_count = 0;
537			port_qsets = 1;
538			pci_release_msi(dev);
539			bus_release_resource(dev, SYS_RES_MEMORY,
540			    sc->msix_regs_rid, sc->msix_regs_res);
541			sc->msix_regs_res = NULL;
542		} else {
543			sc->flags |= USING_MSIX;
544			sc->cxgb_intr = cxgb_async_intr;
545			device_printf(dev,
546				      "using MSI-X interrupts (%u vectors)\n",
547				      sc->msi_count);
548		}
549	}
550
551	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
552		sc->msi_count = 1;
553		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
554			device_printf(dev, "alloc msi failed - "
555				      "err=%d; will try INTx\n", error);
556			sc->msi_count = 0;
557			port_qsets = 1;
558			pci_release_msi(dev);
559		} else {
560			sc->flags |= USING_MSI;
561			sc->cxgb_intr = t3_intr_msi;
562			device_printf(dev, "using MSI interrupts\n");
563		}
564	}
565	if (sc->msi_count == 0) {
566		device_printf(dev, "using line interrupts\n");
567		sc->cxgb_intr = t3b_intr;
568	}
569
570	/* Create a private taskqueue thread for handling driver events */
571	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
572	    taskqueue_thread_enqueue, &sc->tq);
573	if (sc->tq == NULL) {
574		device_printf(dev, "failed to allocate controller task queue\n");
575		goto out;
576	}
577
578	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
579	    device_get_nameunit(dev));
580	TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
581	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
582
583
584	/* Create a periodic callout for checking adapter status */
585	callout_init(&sc->cxgb_tick_ch, TRUE);
586
587	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
588		/*
589		 * Warn user that a firmware update will be attempted in init.
590		 */
591		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
592		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
593		sc->flags &= ~FW_UPTODATE;
594	} else {
595		sc->flags |= FW_UPTODATE;
596	}
597
598	if (t3_check_tpsram_version(sc) < 0) {
599		/*
600		 * Warn user that a firmware update will be attempted in init.
601		 */
602		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
603		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
604		sc->flags &= ~TPS_UPTODATE;
605	} else {
606		sc->flags |= TPS_UPTODATE;
607	}
608
609	/*
610	 * Create a child device for each MAC.  The ethernet attachment
611	 * will be done in these children.
612	 */
613	for (i = 0; i < (sc)->params.nports; i++) {
614		struct port_info *pi;
615
616		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
617			device_printf(dev, "failed to add child port\n");
618			error = EINVAL;
619			goto out;
620		}
621		pi = &sc->port[i];
622		pi->adapter = sc;
623		pi->nqsets = port_qsets;
624		pi->first_qset = i*port_qsets;
625		pi->port_id = i;
626		pi->tx_chan = i >= ai->nports0;
627		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
628		sc->rxpkt_map[pi->txpkt_intf] = i;
629		sc->port[i].tx_chan = i >= ai->nports0;
630		sc->portdev[i] = child;
631		device_set_softc(child, pi);
632	}
633	if ((error = bus_generic_attach(dev)) != 0)
634		goto out;
635
636	/* initialize sge private state */
637	t3_sge_init_adapter(sc);
638
639	t3_led_ready(sc);
640
641	cxgb_offload_init();
642	if (is_offload(sc)) {
643		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
644		cxgb_adapter_ofld(sc);
645        }
646	error = t3_get_fw_version(sc, &vers);
647	if (error)
648		goto out;
649
650	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
651	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
652	    G_FW_VERSION_MICRO(vers));
653
654	snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
655		 ai->desc, is_offload(sc) ? "R" : "",
656		 sc->params.vpd.ec, sc->params.vpd.sn);
657	device_set_desc_copy(dev, buf);
658
659	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
660		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
661		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
662
663	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
664	callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
665	t3_add_attach_sysctls(sc);
666out:
667	if (error)
668		cxgb_free(sc);
669
670	return (error);
671}
672
673/*
674 * The cxgb_controller_detach routine is called with the device is
675 * unloaded from the system.
676 */
677
678static int
679cxgb_controller_detach(device_t dev)
680{
681	struct adapter *sc;
682
683	sc = device_get_softc(dev);
684
685	cxgb_free(sc);
686
687	return (0);
688}
689
690/*
691 * The cxgb_free() is called by the cxgb_controller_detach() routine
692 * to tear down the structures that were built up in
693 * cxgb_controller_attach(), and should be the final piece of work
694 * done when fully unloading the driver.
695 *
696 *
697 *  1. Shutting down the threads started by the cxgb_controller_attach()
698 *     routine.
699 *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
700 *  3. Detaching all of the port devices created during the
701 *     cxgb_controller_attach() routine.
702 *  4. Removing the device children created via cxgb_controller_attach().
703 *  5. Releasing PCI resources associated with the device.
704 *  6. Turning off the offload support, iff it was turned on.
705 *  7. Destroying the mutexes created in cxgb_controller_attach().
706 *
707 */
708static void
709cxgb_free(struct adapter *sc)
710{
711	int i;
712
713	ADAPTER_LOCK(sc);
714	sc->flags |= CXGB_SHUTDOWN;
715	ADAPTER_UNLOCK(sc);
716
717	/*
718	 * Make sure all child devices are gone.
719	 */
720	bus_generic_detach(sc->dev);
721	for (i = 0; i < (sc)->params.nports; i++) {
722		if (sc->portdev[i] &&
723		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
724			device_printf(sc->dev, "failed to delete child port\n");
725	}
726
727	/*
728	 * At this point, it is as if cxgb_port_detach has run on all ports, and
729	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
730	 * all open devices have been closed.
731	 */
732	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
733					   __func__, sc->open_device_map));
734	for (i = 0; i < sc->params.nports; i++) {
735		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
736						  __func__, i));
737	}
738
739	/*
740	 * Finish off the adapter's callouts.
741	 */
742	callout_drain(&sc->cxgb_tick_ch);
743	callout_drain(&sc->sge_timer_ch);
744
745	/*
746	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
747	 * sysctls are cleaned up by the kernel linker.
748	 */
749	if (sc->flags & FULL_INIT_DONE) {
750 		t3_free_sge_resources(sc);
751 		sc->flags &= ~FULL_INIT_DONE;
752 	}
753
754	/*
755	 * Release all interrupt resources.
756	 */
757	cxgb_teardown_interrupts(sc);
758	if (sc->flags & (USING_MSI | USING_MSIX)) {
759		device_printf(sc->dev, "releasing msi message(s)\n");
760		pci_release_msi(sc->dev);
761	} else {
762		device_printf(sc->dev, "no msi message to release\n");
763	}
764
765	if (sc->msix_regs_res != NULL) {
766		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
767		    sc->msix_regs_res);
768	}
769
770	/*
771	 * Free the adapter's taskqueue.
772	 */
773	if (sc->tq != NULL) {
774		taskqueue_free(sc->tq);
775		sc->tq = NULL;
776	}
777
778	if (is_offload(sc)) {
779		clrbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
780		cxgb_adapter_unofld(sc);
781	}
782
783#ifdef notyet
784	if (sc->flags & CXGB_OFLD_INIT)
785		cxgb_offload_deactivate(sc);
786#endif
787	free(sc->filters, M_DEVBUF);
788	t3_sge_free(sc);
789
790	cxgb_offload_exit();
791
792	if (sc->udbs_res != NULL)
793		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
794		    sc->udbs_res);
795
796	if (sc->regs_res != NULL)
797		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
798		    sc->regs_res);
799
800	MTX_DESTROY(&sc->mdio_lock);
801	MTX_DESTROY(&sc->sge.reg_lock);
802	MTX_DESTROY(&sc->elmer_lock);
803	ADAPTER_LOCK_DEINIT(sc);
804}
805
806/**
807 *	setup_sge_qsets - configure SGE Tx/Rx/response queues
808 *	@sc: the controller softc
809 *
810 *	Determines how many sets of SGE queues to use and initializes them.
811 *	We support multiple queue sets per port if we have MSI-X, otherwise
812 *	just one queue set per port.
813 */
814static int
815setup_sge_qsets(adapter_t *sc)
816{
817	int i, j, err, irq_idx = 0, qset_idx = 0;
818	u_int ntxq = SGE_TXQ_PER_SET;
819
820	if ((err = t3_sge_alloc(sc)) != 0) {
821		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
822		return (err);
823	}
824
825	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
826		irq_idx = -1;
827
828	for (i = 0; i < (sc)->params.nports; i++) {
829		struct port_info *pi = &sc->port[i];
830
831		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
832			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
833			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
834			    &sc->params.sge.qset[qset_idx], ntxq, pi);
835			if (err) {
836				t3_free_sge_resources(sc);
837				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n",
838				    err);
839				return (err);
840			}
841		}
842	}
843
844	return (0);
845}
846
847static void
848cxgb_teardown_interrupts(adapter_t *sc)
849{
850	int i;
851
852	for (i = 0; i < SGE_QSETS; i++) {
853		if (sc->msix_intr_tag[i] == NULL) {
854
855			/* Should have been setup fully or not at all */
856			KASSERT(sc->msix_irq_res[i] == NULL &&
857				sc->msix_irq_rid[i] == 0,
858				("%s: half-done interrupt (%d).", __func__, i));
859
860			continue;
861		}
862
863		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
864				  sc->msix_intr_tag[i]);
865		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
866				     sc->msix_irq_res[i]);
867
868		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
869		sc->msix_irq_rid[i] = 0;
870	}
871
872	if (sc->intr_tag) {
873		KASSERT(sc->irq_res != NULL,
874			("%s: half-done interrupt.", __func__));
875
876		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
877		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
878				     sc->irq_res);
879
880		sc->irq_res = sc->intr_tag = NULL;
881		sc->irq_rid = 0;
882	}
883}
884
885static int
886cxgb_setup_interrupts(adapter_t *sc)
887{
888	struct resource *res;
889	void *tag;
890	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
891
892	sc->irq_rid = intr_flag ? 1 : 0;
893	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
894					     RF_SHAREABLE | RF_ACTIVE);
895	if (sc->irq_res == NULL) {
896		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
897			      intr_flag, sc->irq_rid);
898		err = EINVAL;
899		sc->irq_rid = 0;
900	} else {
901		err = bus_setup_intr(sc->dev, sc->irq_res,
902		    INTR_MPSAFE | INTR_TYPE_NET, NULL,
903		    sc->cxgb_intr, sc, &sc->intr_tag);
904
905		if (err) {
906			device_printf(sc->dev,
907				      "Cannot set up interrupt (%x, %u, %d)\n",
908				      intr_flag, sc->irq_rid, err);
909			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
910					     sc->irq_res);
911			sc->irq_res = sc->intr_tag = NULL;
912			sc->irq_rid = 0;
913		}
914	}
915
916	/* That's all for INTx or MSI */
917	if (!(intr_flag & USING_MSIX) || err)
918		return (err);
919
920	for (i = 0; i < sc->msi_count - 1; i++) {
921		rid = i + 2;
922		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
923					     RF_SHAREABLE | RF_ACTIVE);
924		if (res == NULL) {
925			device_printf(sc->dev, "Cannot allocate interrupt "
926				      "for message %d\n", rid);
927			err = EINVAL;
928			break;
929		}
930
931		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
932				     NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
933		if (err) {
934			device_printf(sc->dev, "Cannot set up interrupt "
935				      "for message %d (%d)\n", rid, err);
936			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
937			break;
938		}
939
940		sc->msix_irq_rid[i] = rid;
941		sc->msix_irq_res[i] = res;
942		sc->msix_intr_tag[i] = tag;
943	}
944
945	if (err)
946		cxgb_teardown_interrupts(sc);
947
948	return (err);
949}
950
951
952static int
953cxgb_port_probe(device_t dev)
954{
955	struct port_info *p;
956	char buf[80];
957	const char *desc;
958
959	p = device_get_softc(dev);
960	desc = p->phy.desc;
961	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
962	device_set_desc_copy(dev, buf);
963	return (0);
964}
965
966
967static int
968cxgb_makedev(struct port_info *pi)
969{
970
971	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
972	    UID_ROOT, GID_WHEEL, 0600, if_name(pi->ifp));
973
974	if (pi->port_cdev == NULL)
975		return (ENOMEM);
976
977	pi->port_cdev->si_drv1 = (void *)pi;
978
979	return (0);
980}
981
982#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
983    IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
984    IFCAP_VLAN_HWTSO)
985#define CXGB_CAP_ENABLE (CXGB_CAP & ~IFCAP_TSO6)
986
987static int
988cxgb_port_attach(device_t dev)
989{
990	struct port_info *p;
991	struct ifnet *ifp;
992	int err;
993	struct adapter *sc;
994
995	p = device_get_softc(dev);
996	sc = p->adapter;
997	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
998	    device_get_unit(device_get_parent(dev)), p->port_id);
999	PORT_LOCK_INIT(p, p->lockbuf);
1000
1001	/* Allocate an ifnet object and set it up */
1002	ifp = p->ifp = if_alloc(IFT_ETHER);
1003	if (ifp == NULL) {
1004		device_printf(dev, "Cannot allocate ifnet\n");
1005		return (ENOMEM);
1006	}
1007
1008	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1009	ifp->if_init = cxgb_init;
1010	ifp->if_softc = p;
1011	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1012	ifp->if_ioctl = cxgb_ioctl;
1013	ifp->if_start = cxgb_start;
1014
1015	ifp->if_snd.ifq_drv_maxlen = cxgb_snd_queue_len;
1016	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
1017	IFQ_SET_READY(&ifp->if_snd);
1018
1019	ifp->if_capabilities = CXGB_CAP;
1020	ifp->if_capenable = CXGB_CAP_ENABLE;
1021	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO;
1022
1023	/*
1024	 * Disable TSO on 4-port - it isn't supported by the firmware.
1025	 */
1026	if (sc->params.nports > 2) {
1027		ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1028		ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1029		ifp->if_hwassist &= ~CSUM_TSO;
1030	}
1031
1032	ether_ifattach(ifp, p->hw_addr);
1033	ifp->if_transmit = cxgb_transmit;
1034	ifp->if_qflush = cxgb_qflush;
1035
1036#ifdef DEFAULT_JUMBO
1037	if (sc->params.nports <= 2)
1038		ifp->if_mtu = ETHERMTU_JUMBO;
1039#endif
1040	if ((err = cxgb_makedev(p)) != 0) {
1041		printf("makedev failed %d\n", err);
1042		return (err);
1043	}
1044
1045	/* Create a list of media supported by this port */
1046	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1047	    cxgb_media_status);
1048	cxgb_build_medialist(p);
1049
1050	t3_sge_init_port(p);
1051
1052	return (err);
1053}
1054
1055/*
1056 * cxgb_port_detach() is called via the device_detach methods when
1057 * cxgb_free() calls the bus_generic_detach.  It is responsible for
1058 * removing the device from the view of the kernel, i.e. from all
1059 * interfaces lists etc.  This routine is only called when the driver is
1060 * being unloaded, not when the link goes down.
1061 */
1062static int
1063cxgb_port_detach(device_t dev)
1064{
1065	struct port_info *p;
1066	struct adapter *sc;
1067	int i;
1068
1069	p = device_get_softc(dev);
1070	sc = p->adapter;
1071
1072	/* Tell cxgb_ioctl and if_init that the port is going away */
1073	ADAPTER_LOCK(sc);
1074	SET_DOOMED(p);
1075	wakeup(&sc->flags);
1076	while (IS_BUSY(sc))
1077		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1078	SET_BUSY(sc);
1079	ADAPTER_UNLOCK(sc);
1080
1081	if (p->port_cdev != NULL)
1082		destroy_dev(p->port_cdev);
1083
1084	cxgb_uninit_synchronized(p);
1085	ether_ifdetach(p->ifp);
1086
1087	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1088		struct sge_qset *qs = &sc->sge.qs[i];
1089		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1090
1091		callout_drain(&txq->txq_watchdog);
1092		callout_drain(&txq->txq_timer);
1093	}
1094
1095	PORT_LOCK_DEINIT(p);
1096	if_free(p->ifp);
1097	p->ifp = NULL;
1098
1099	ADAPTER_LOCK(sc);
1100	CLR_BUSY(sc);
1101	wakeup_one(&sc->flags);
1102	ADAPTER_UNLOCK(sc);
1103	return (0);
1104}
1105
1106void
1107t3_fatal_err(struct adapter *sc)
1108{
1109	u_int fw_status[4];
1110
1111	if (sc->flags & FULL_INIT_DONE) {
1112		t3_sge_stop(sc);
1113		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1114		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1115		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1116		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1117		t3_intr_disable(sc);
1118	}
1119	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1120	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1121		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1122		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1123}
1124
1125int
1126t3_os_find_pci_capability(adapter_t *sc, int cap)
1127{
1128	device_t dev;
1129	struct pci_devinfo *dinfo;
1130	pcicfgregs *cfg;
1131	uint32_t status;
1132	uint8_t ptr;
1133
1134	dev = sc->dev;
1135	dinfo = device_get_ivars(dev);
1136	cfg = &dinfo->cfg;
1137
1138	status = pci_read_config(dev, PCIR_STATUS, 2);
1139	if (!(status & PCIM_STATUS_CAPPRESENT))
1140		return (0);
1141
1142	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1143	case 0:
1144	case 1:
1145		ptr = PCIR_CAP_PTR;
1146		break;
1147	case 2:
1148		ptr = PCIR_CAP_PTR_2;
1149		break;
1150	default:
1151		return (0);
1152		break;
1153	}
1154	ptr = pci_read_config(dev, ptr, 1);
1155
1156	while (ptr != 0) {
1157		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1158			return (ptr);
1159		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1160	}
1161
1162	return (0);
1163}
1164
1165int
1166t3_os_pci_save_state(struct adapter *sc)
1167{
1168	device_t dev;
1169	struct pci_devinfo *dinfo;
1170
1171	dev = sc->dev;
1172	dinfo = device_get_ivars(dev);
1173
1174	pci_cfg_save(dev, dinfo, 0);
1175	return (0);
1176}
1177
1178int
1179t3_os_pci_restore_state(struct adapter *sc)
1180{
1181	device_t dev;
1182	struct pci_devinfo *dinfo;
1183
1184	dev = sc->dev;
1185	dinfo = device_get_ivars(dev);
1186
1187	pci_cfg_restore(dev, dinfo);
1188	return (0);
1189}
1190
1191/**
1192 *	t3_os_link_changed - handle link status changes
1193 *	@sc: the adapter associated with the link change
1194 *	@port_id: the port index whose link status has changed
1195 *	@link_status: the new status of the link
1196 *	@speed: the new speed setting
1197 *	@duplex: the new duplex setting
1198 *	@fc: the new flow-control setting
1199 *
1200 *	This is the OS-dependent handler for link status changes.  The OS
1201 *	neutral handler takes care of most of the processing for these events,
1202 *	then calls this handler for any OS-specific processing.
1203 */
1204void
1205t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1206     int duplex, int fc, int mac_was_reset)
1207{
1208	struct port_info *pi = &adapter->port[port_id];
1209	struct ifnet *ifp = pi->ifp;
1210
1211	/* no race with detach, so ifp should always be good */
1212	KASSERT(ifp, ("%s: if detached.", __func__));
1213
1214	/* Reapply mac settings if they were lost due to a reset */
1215	if (mac_was_reset) {
1216		PORT_LOCK(pi);
1217		cxgb_update_mac_settings(pi);
1218		PORT_UNLOCK(pi);
1219	}
1220
1221	if (link_status) {
1222		ifp->if_baudrate = IF_Mbps(speed);
1223		if_link_state_change(ifp, LINK_STATE_UP);
1224	} else
1225		if_link_state_change(ifp, LINK_STATE_DOWN);
1226}
1227
1228/**
1229 *	t3_os_phymod_changed - handle PHY module changes
1230 *	@phy: the PHY reporting the module change
1231 *	@mod_type: new module type
1232 *
1233 *	This is the OS-dependent handler for PHY module changes.  It is
1234 *	invoked when a PHY module is removed or inserted for any OS-specific
1235 *	processing.
1236 */
1237void t3_os_phymod_changed(struct adapter *adap, int port_id)
1238{
1239	static const char *mod_str[] = {
1240		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1241	};
1242	struct port_info *pi = &adap->port[port_id];
1243	int mod = pi->phy.modtype;
1244
1245	if (mod != pi->media.ifm_cur->ifm_data)
1246		cxgb_build_medialist(pi);
1247
1248	if (mod == phy_modtype_none)
1249		if_printf(pi->ifp, "PHY module unplugged\n");
1250	else {
1251		KASSERT(mod < ARRAY_SIZE(mod_str),
1252			("invalid PHY module type %d", mod));
1253		if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1254	}
1255}
1256
1257/*
1258 * Interrupt-context handler for external (PHY) interrupts.
1259 */
1260void
1261t3_os_ext_intr_handler(adapter_t *sc)
1262{
1263	if (cxgb_debug)
1264		printf("t3_os_ext_intr_handler\n");
1265	/*
1266	 * Schedule a task to handle external interrupts as they may be slow
1267	 * and we use a mutex to protect MDIO registers.  We disable PHY
1268	 * interrupts in the meantime and let the task reenable them when
1269	 * it's done.
1270	 */
1271	if (sc->slow_intr_mask) {
1272		ADAPTER_LOCK(sc);
1273		sc->slow_intr_mask &= ~F_T3DBG;
1274		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1275		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
1276		ADAPTER_UNLOCK(sc);
1277	}
1278}
1279
1280void
1281t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1282{
1283
1284	/*
1285	 * The ifnet might not be allocated before this gets called,
1286	 * as this is called early on in attach by t3_prep_adapter
1287	 * save the address off in the port structure
1288	 */
1289	if (cxgb_debug)
1290		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1291	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1292}
1293
1294/*
1295 * Programs the XGMAC based on the settings in the ifnet.  These settings
1296 * include MTU, MAC address, mcast addresses, etc.
1297 */
1298static void
1299cxgb_update_mac_settings(struct port_info *p)
1300{
1301	struct ifnet *ifp = p->ifp;
1302	struct t3_rx_mode rm;
1303	struct cmac *mac = &p->mac;
1304	int mtu, hwtagging;
1305
1306	PORT_LOCK_ASSERT_OWNED(p);
1307
1308	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1309
1310	mtu = ifp->if_mtu;
1311	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1312		mtu += ETHER_VLAN_ENCAP_LEN;
1313
1314	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1315
1316	t3_mac_set_mtu(mac, mtu);
1317	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1318	t3_mac_set_address(mac, 0, p->hw_addr);
1319	t3_init_rx_mode(&rm, p);
1320	t3_mac_set_rx_mode(mac, &rm);
1321}
1322
1323
1324static int
1325await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1326			      unsigned long n)
1327{
1328	int attempts = 5;
1329
1330	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1331		if (!--attempts)
1332			return (ETIMEDOUT);
1333		t3_os_sleep(10);
1334	}
1335	return 0;
1336}
1337
1338static int
1339init_tp_parity(struct adapter *adap)
1340{
1341	int i;
1342	struct mbuf *m;
1343	struct cpl_set_tcb_field *greq;
1344	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1345
1346	t3_tp_set_offload_mode(adap, 1);
1347
1348	for (i = 0; i < 16; i++) {
1349		struct cpl_smt_write_req *req;
1350
1351		m = m_gethdr(M_WAITOK, MT_DATA);
1352		req = mtod(m, struct cpl_smt_write_req *);
1353		m->m_len = m->m_pkthdr.len = sizeof(*req);
1354		memset(req, 0, sizeof(*req));
1355		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1356		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1357		req->iff = i;
1358		t3_mgmt_tx(adap, m);
1359	}
1360
1361	for (i = 0; i < 2048; i++) {
1362		struct cpl_l2t_write_req *req;
1363
1364		m = m_gethdr(M_WAITOK, MT_DATA);
1365		req = mtod(m, struct cpl_l2t_write_req *);
1366		m->m_len = m->m_pkthdr.len = sizeof(*req);
1367		memset(req, 0, sizeof(*req));
1368		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1369		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1370		req->params = htonl(V_L2T_W_IDX(i));
1371		t3_mgmt_tx(adap, m);
1372	}
1373
1374	for (i = 0; i < 2048; i++) {
1375		struct cpl_rte_write_req *req;
1376
1377		m = m_gethdr(M_WAITOK, MT_DATA);
1378		req = mtod(m, struct cpl_rte_write_req *);
1379		m->m_len = m->m_pkthdr.len = sizeof(*req);
1380		memset(req, 0, sizeof(*req));
1381		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1382		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1383		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1384		t3_mgmt_tx(adap, m);
1385	}
1386
1387	m = m_gethdr(M_WAITOK, MT_DATA);
1388	greq = mtod(m, struct cpl_set_tcb_field *);
1389	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1390	memset(greq, 0, sizeof(*greq));
1391	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1392	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1393	greq->mask = htobe64(1);
1394	t3_mgmt_tx(adap, m);
1395
1396	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1397	t3_tp_set_offload_mode(adap, 0);
1398	return (i);
1399}
1400
1401/**
1402 *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1403 *	@adap: the adapter
1404 *
1405 *	Sets up RSS to distribute packets to multiple receive queues.  We
1406 *	configure the RSS CPU lookup table to distribute to the number of HW
1407 *	receive queues, and the response queue lookup table to narrow that
1408 *	down to the response queues actually configured for each port.
1409 *	We always configure the RSS mapping for two ports since the mapping
1410 *	table has plenty of entries.
1411 */
1412static void
1413setup_rss(adapter_t *adap)
1414{
1415	int i;
1416	u_int nq[2];
1417	uint8_t cpus[SGE_QSETS + 1];
1418	uint16_t rspq_map[RSS_TABLE_SIZE];
1419
1420	for (i = 0; i < SGE_QSETS; ++i)
1421		cpus[i] = i;
1422	cpus[SGE_QSETS] = 0xff;
1423
1424	nq[0] = nq[1] = 0;
1425	for_each_port(adap, i) {
1426		const struct port_info *pi = adap2pinfo(adap, i);
1427
1428		nq[pi->tx_chan] += pi->nqsets;
1429	}
1430	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1431		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1432		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1433	}
1434
1435	/* Calculate the reverse RSS map table */
1436	for (i = 0; i < SGE_QSETS; ++i)
1437		adap->rrss_map[i] = 0xff;
1438	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1439		if (adap->rrss_map[rspq_map[i]] == 0xff)
1440			adap->rrss_map[rspq_map[i]] = i;
1441
1442	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1443		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1444	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1445	              cpus, rspq_map);
1446
1447}
1448
1449/*
1450 * Sends an mbuf to an offload queue driver
1451 * after dealing with any active network taps.
1452 */
1453static inline int
1454offload_tx(struct t3cdev *tdev, struct mbuf *m)
1455{
1456	int ret;
1457
1458	ret = t3_offload_tx(tdev, m);
1459	return (ret);
1460}
1461
1462static int
1463write_smt_entry(struct adapter *adapter, int idx)
1464{
1465	struct port_info *pi = &adapter->port[idx];
1466	struct cpl_smt_write_req *req;
1467	struct mbuf *m;
1468
1469	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1470		return (ENOMEM);
1471
1472	req = mtod(m, struct cpl_smt_write_req *);
1473	m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1474
1475	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1476	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1477	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1478	req->iff = idx;
1479	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1480	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1481
1482	m_set_priority(m, 1);
1483
1484	offload_tx(&adapter->tdev, m);
1485
1486	return (0);
1487}
1488
1489static int
1490init_smt(struct adapter *adapter)
1491{
1492	int i;
1493
1494	for_each_port(adapter, i)
1495		write_smt_entry(adapter, i);
1496	return 0;
1497}
1498
1499static void
1500init_port_mtus(adapter_t *adapter)
1501{
1502	unsigned int mtus = ETHERMTU | (ETHERMTU << 16);
1503
1504	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1505}
1506
1507static void
1508send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1509			      int hi, int port)
1510{
1511	struct mbuf *m;
1512	struct mngt_pktsched_wr *req;
1513
1514	m = m_gethdr(M_DONTWAIT, MT_DATA);
1515	if (m) {
1516		req = mtod(m, struct mngt_pktsched_wr *);
1517		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1518		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1519		req->sched = sched;
1520		req->idx = qidx;
1521		req->min = lo;
1522		req->max = hi;
1523		req->binding = port;
1524		m->m_len = m->m_pkthdr.len = sizeof(*req);
1525		t3_mgmt_tx(adap, m);
1526	}
1527}
1528
1529static void
1530bind_qsets(adapter_t *sc)
1531{
1532	int i, j;
1533
1534	for (i = 0; i < (sc)->params.nports; ++i) {
1535		const struct port_info *pi = adap2pinfo(sc, i);
1536
1537		for (j = 0; j < pi->nqsets; ++j) {
1538			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1539					  -1, pi->tx_chan);
1540
1541		}
1542	}
1543}
1544
1545static void
1546update_tpeeprom(struct adapter *adap)
1547{
1548	const struct firmware *tpeeprom;
1549
1550	uint32_t version;
1551	unsigned int major, minor;
1552	int ret, len;
1553	char rev, name[32];
1554
1555	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1556
1557	major = G_TP_VERSION_MAJOR(version);
1558	minor = G_TP_VERSION_MINOR(version);
1559	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1560		return;
1561
1562	rev = t3rev2char(adap);
1563	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1564
1565	tpeeprom = firmware_get(name);
1566	if (tpeeprom == NULL) {
1567		device_printf(adap->dev,
1568			      "could not load TP EEPROM: unable to load %s\n",
1569			      name);
1570		return;
1571	}
1572
1573	len = tpeeprom->datasize - 4;
1574
1575	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1576	if (ret)
1577		goto release_tpeeprom;
1578
1579	if (len != TP_SRAM_LEN) {
1580		device_printf(adap->dev,
1581			      "%s length is wrong len=%d expected=%d\n", name,
1582			      len, TP_SRAM_LEN);
1583		return;
1584	}
1585
1586	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1587	    TP_SRAM_OFFSET);
1588
1589	if (!ret) {
1590		device_printf(adap->dev,
1591			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1592			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1593	} else
1594		device_printf(adap->dev,
1595			      "Protocol SRAM image update in EEPROM failed\n");
1596
1597release_tpeeprom:
1598	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1599
1600	return;
1601}
1602
1603static int
1604update_tpsram(struct adapter *adap)
1605{
1606	const struct firmware *tpsram;
1607	int ret;
1608	char rev, name[32];
1609
1610	rev = t3rev2char(adap);
1611	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1612
1613	update_tpeeprom(adap);
1614
1615	tpsram = firmware_get(name);
1616	if (tpsram == NULL){
1617		device_printf(adap->dev, "could not load TP SRAM\n");
1618		return (EINVAL);
1619	} else
1620		device_printf(adap->dev, "updating TP SRAM\n");
1621
1622	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1623	if (ret)
1624		goto release_tpsram;
1625
1626	ret = t3_set_proto_sram(adap, tpsram->data);
1627	if (ret)
1628		device_printf(adap->dev, "loading protocol SRAM failed\n");
1629
1630release_tpsram:
1631	firmware_put(tpsram, FIRMWARE_UNLOAD);
1632
1633	return ret;
1634}
1635
1636/**
1637 *	cxgb_up - enable the adapter
1638 *	@adap: adapter being enabled
1639 *
1640 *	Called when the first port is enabled, this function performs the
1641 *	actions necessary to make an adapter operational, such as completing
1642 *	the initialization of HW modules, and enabling interrupts.
1643 */
1644static int
1645cxgb_up(struct adapter *sc)
1646{
1647	int err = 0;
1648
1649	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1650					   __func__, sc->open_device_map));
1651
1652	if ((sc->flags & FULL_INIT_DONE) == 0) {
1653
1654		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1655
1656		if ((sc->flags & FW_UPTODATE) == 0)
1657			if ((err = upgrade_fw(sc)))
1658				goto out;
1659
1660		if ((sc->flags & TPS_UPTODATE) == 0)
1661			if ((err = update_tpsram(sc)))
1662				goto out;
1663
1664		err = t3_init_hw(sc, 0);
1665		if (err)
1666			goto out;
1667
1668		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1669		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1670
1671		err = setup_sge_qsets(sc);
1672		if (err)
1673			goto out;
1674
1675		setup_rss(sc);
1676
1677		t3_intr_clear(sc);
1678		err = cxgb_setup_interrupts(sc);
1679		if (err)
1680			goto out;
1681
1682		t3_add_configured_sysctls(sc);
1683		sc->flags |= FULL_INIT_DONE;
1684	}
1685
1686	t3_intr_clear(sc);
1687	t3_sge_start(sc);
1688	t3_intr_enable(sc);
1689
1690	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1691	    is_offload(sc) && init_tp_parity(sc) == 0)
1692		sc->flags |= TP_PARITY_INIT;
1693
1694	if (sc->flags & TP_PARITY_INIT) {
1695		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1696		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1697	}
1698
1699	if (!(sc->flags & QUEUES_BOUND)) {
1700		bind_qsets(sc);
1701		sc->flags |= QUEUES_BOUND;
1702	}
1703
1704	t3_sge_reset_adapter(sc);
1705out:
1706	return (err);
1707}
1708
1709/*
1710 * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1711 * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1712 * during controller_detach, not here.
1713 */
1714static void
1715cxgb_down(struct adapter *sc)
1716{
1717	t3_sge_stop(sc);
1718	t3_intr_disable(sc);
1719}
1720
1721static int
1722offload_open(struct port_info *pi)
1723{
1724	struct adapter *sc = pi->adapter;
1725	struct t3cdev *tdev = &sc->tdev;
1726
1727	setbit(&sc->open_device_map, OFFLOAD_DEVMAP_BIT);
1728
1729	t3_tp_set_offload_mode(sc, 1);
1730	tdev->lldev = pi->ifp;
1731	init_port_mtus(sc);
1732	t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
1733		     sc->params.rev == 0 ?  sc->port[0].ifp->if_mtu : 0xffff);
1734	init_smt(sc);
1735	cxgb_add_clients(tdev);
1736
1737	return (0);
1738}
1739
1740static int
1741offload_close(struct t3cdev *tdev)
1742{
1743	struct adapter *adapter = tdev2adap(tdev);
1744
1745	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1746		return (0);
1747
1748	/* Call back all registered clients */
1749	cxgb_remove_clients(tdev);
1750
1751	tdev->lldev = NULL;
1752	cxgb_set_dummy_ops(tdev);
1753	t3_tp_set_offload_mode(adapter, 0);
1754
1755	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1756
1757	return (0);
1758}
1759
1760/*
1761 * if_init for cxgb ports.
1762 */
1763static void
1764cxgb_init(void *arg)
1765{
1766	struct port_info *p = arg;
1767	struct adapter *sc = p->adapter;
1768
1769	ADAPTER_LOCK(sc);
1770	cxgb_init_locked(p); /* releases adapter lock */
1771	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1772}
1773
1774static int
1775cxgb_init_locked(struct port_info *p)
1776{
1777	struct adapter *sc = p->adapter;
1778	struct ifnet *ifp = p->ifp;
1779	struct cmac *mac = &p->mac;
1780	int i, rc = 0, may_sleep = 0;
1781
1782	ADAPTER_LOCK_ASSERT_OWNED(sc);
1783
1784	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1785		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1786			rc = EINTR;
1787			goto done;
1788		}
1789	}
1790	if (IS_DOOMED(p)) {
1791		rc = ENXIO;
1792		goto done;
1793	}
1794	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1795
1796	/*
1797	 * The code that runs during one-time adapter initialization can sleep
1798	 * so it's important not to hold any locks across it.
1799	 */
1800	may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1801
1802	if (may_sleep) {
1803		SET_BUSY(sc);
1804		ADAPTER_UNLOCK(sc);
1805	}
1806
1807	if (sc->open_device_map == 0) {
1808		if ((rc = cxgb_up(sc)) != 0)
1809			goto done;
1810
1811		if (is_offload(sc) && !ofld_disable && offload_open(p))
1812			log(LOG_WARNING,
1813			    "Could not initialize offload capabilities\n");
1814	}
1815
1816	PORT_LOCK(p);
1817	if (isset(&sc->open_device_map, p->port_id) &&
1818	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1819		PORT_UNLOCK(p);
1820		goto done;
1821	}
1822	t3_port_intr_enable(sc, p->port_id);
1823	if (!mac->multiport)
1824		t3_mac_init(mac);
1825	cxgb_update_mac_settings(p);
1826	t3_link_start(&p->phy, mac, &p->link_config);
1827	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1828	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1829	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1830	PORT_UNLOCK(p);
1831
1832	t3_link_changed(sc, p->port_id);
1833
1834	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1835		struct sge_qset *qs = &sc->sge.qs[i];
1836		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1837
1838		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1839				 txq->txq_watchdog.c_cpu);
1840	}
1841
1842	/* all ok */
1843	setbit(&sc->open_device_map, p->port_id);
1844
1845done:
1846	if (may_sleep) {
1847		ADAPTER_LOCK(sc);
1848		KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1849		CLR_BUSY(sc);
1850		wakeup_one(&sc->flags);
1851	}
1852	ADAPTER_UNLOCK(sc);
1853	return (rc);
1854}
1855
1856static int
1857cxgb_uninit_locked(struct port_info *p)
1858{
1859	struct adapter *sc = p->adapter;
1860	int rc;
1861
1862	ADAPTER_LOCK_ASSERT_OWNED(sc);
1863
1864	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1865		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1866			rc = EINTR;
1867			goto done;
1868		}
1869	}
1870	if (IS_DOOMED(p)) {
1871		rc = ENXIO;
1872		goto done;
1873	}
1874	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1875	SET_BUSY(sc);
1876	ADAPTER_UNLOCK(sc);
1877
1878	rc = cxgb_uninit_synchronized(p);
1879
1880	ADAPTER_LOCK(sc);
1881	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1882	CLR_BUSY(sc);
1883	wakeup_one(&sc->flags);
1884done:
1885	ADAPTER_UNLOCK(sc);
1886	return (rc);
1887}
1888
1889/*
1890 * Called on "ifconfig down", and from port_detach
1891 */
1892static int
1893cxgb_uninit_synchronized(struct port_info *pi)
1894{
1895	struct adapter *sc = pi->adapter;
1896	struct ifnet *ifp = pi->ifp;
1897
1898	/*
1899	 * taskqueue_drain may cause a deadlock if the adapter lock is held.
1900	 */
1901	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1902
1903	/*
1904	 * Clear this port's bit from the open device map, and then drain all
1905	 * the tasks that can access/manipulate this port's port_info or ifp.
1906	 * We disable this port's interrupts here and so the the slow/ext
1907	 * interrupt tasks won't be enqueued.  The tick task will continue to
1908	 * be enqueued every second but the runs after this drain will not see
1909	 * this port in the open device map.
1910	 *
1911	 * A well behaved task must take open_device_map into account and ignore
1912	 * ports that are not open.
1913	 */
1914	clrbit(&sc->open_device_map, pi->port_id);
1915	t3_port_intr_disable(sc, pi->port_id);
1916	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1917	taskqueue_drain(sc->tq, &sc->ext_intr_task);
1918	taskqueue_drain(sc->tq, &sc->tick_task);
1919
1920	PORT_LOCK(pi);
1921	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1922
1923	/* disable pause frames */
1924	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1925
1926	/* Reset RX FIFO HWM */
1927	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1928			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1929
1930	DELAY(100 * 1000);
1931
1932	/* Wait for TXFIFO empty */
1933	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1934			F_TXFIFO_EMPTY, 1, 20, 5);
1935
1936	DELAY(100 * 1000);
1937	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1938
1939
1940	pi->phy.ops->power_down(&pi->phy, 1);
1941
1942	PORT_UNLOCK(pi);
1943
1944	pi->link_config.link_ok = 0;
1945	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1946
1947	if ((sc->open_device_map & PORT_MASK) == 0)
1948		offload_close(&sc->tdev);
1949
1950	if (sc->open_device_map == 0)
1951		cxgb_down(pi->adapter);
1952
1953	return (0);
1954}
1955
1956/*
1957 * Mark lro enabled or disabled in all qsets for this port
1958 */
1959static int
1960cxgb_set_lro(struct port_info *p, int enabled)
1961{
1962	int i;
1963	struct adapter *adp = p->adapter;
1964	struct sge_qset *q;
1965
1966	PORT_LOCK_ASSERT_OWNED(p);
1967	for (i = 0; i < p->nqsets; i++) {
1968		q = &adp->sge.qs[p->first_qset + i];
1969		q->lro.enabled = (enabled != 0);
1970	}
1971	return (0);
1972}
1973
1974static int
1975cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1976{
1977	struct port_info *p = ifp->if_softc;
1978	struct adapter *sc = p->adapter;
1979	struct ifreq *ifr = (struct ifreq *)data;
1980	int flags, error = 0, mtu;
1981	uint32_t mask;
1982
1983	switch (command) {
1984	case SIOCSIFMTU:
1985		ADAPTER_LOCK(sc);
1986		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1987		if (error) {
1988fail:
1989			ADAPTER_UNLOCK(sc);
1990			return (error);
1991		}
1992
1993		mtu = ifr->ifr_mtu;
1994		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
1995			error = EINVAL;
1996		} else {
1997			ifp->if_mtu = mtu;
1998			PORT_LOCK(p);
1999			cxgb_update_mac_settings(p);
2000			PORT_UNLOCK(p);
2001		}
2002		ADAPTER_UNLOCK(sc);
2003		break;
2004	case SIOCSIFFLAGS:
2005		ADAPTER_LOCK(sc);
2006		if (IS_DOOMED(p)) {
2007			error = ENXIO;
2008			goto fail;
2009		}
2010		if (ifp->if_flags & IFF_UP) {
2011			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2012				flags = p->if_flags;
2013				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
2014				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
2015					if (IS_BUSY(sc)) {
2016						error = EBUSY;
2017						goto fail;
2018					}
2019					PORT_LOCK(p);
2020					cxgb_update_mac_settings(p);
2021					PORT_UNLOCK(p);
2022				}
2023				ADAPTER_UNLOCK(sc);
2024			} else
2025				error = cxgb_init_locked(p);
2026			p->if_flags = ifp->if_flags;
2027		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2028			error = cxgb_uninit_locked(p);
2029		else
2030			ADAPTER_UNLOCK(sc);
2031
2032		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
2033		break;
2034	case SIOCADDMULTI:
2035	case SIOCDELMULTI:
2036		ADAPTER_LOCK(sc);
2037		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2038		if (error)
2039			goto fail;
2040
2041		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2042			PORT_LOCK(p);
2043			cxgb_update_mac_settings(p);
2044			PORT_UNLOCK(p);
2045		}
2046		ADAPTER_UNLOCK(sc);
2047
2048		break;
2049	case SIOCSIFCAP:
2050		ADAPTER_LOCK(sc);
2051		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2052		if (error)
2053			goto fail;
2054
2055		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2056		if (mask & IFCAP_TXCSUM) {
2057			ifp->if_capenable ^= IFCAP_TXCSUM;
2058			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2059
2060			if (IFCAP_TSO & ifp->if_capenable &&
2061			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2062				ifp->if_capenable &= ~IFCAP_TSO;
2063				ifp->if_hwassist &= ~CSUM_TSO;
2064				if_printf(ifp,
2065				    "tso disabled due to -txcsum.\n");
2066			}
2067		}
2068		if (mask & IFCAP_RXCSUM)
2069			ifp->if_capenable ^= IFCAP_RXCSUM;
2070		if (mask & IFCAP_TSO4) {
2071			ifp->if_capenable ^= IFCAP_TSO4;
2072
2073			if (IFCAP_TSO & ifp->if_capenable) {
2074				if (IFCAP_TXCSUM & ifp->if_capenable)
2075					ifp->if_hwassist |= CSUM_TSO;
2076				else {
2077					ifp->if_capenable &= ~IFCAP_TSO;
2078					ifp->if_hwassist &= ~CSUM_TSO;
2079					if_printf(ifp,
2080					    "enable txcsum first.\n");
2081					error = EAGAIN;
2082				}
2083			} else
2084				ifp->if_hwassist &= ~CSUM_TSO;
2085		}
2086		if (mask & IFCAP_LRO) {
2087			ifp->if_capenable ^= IFCAP_LRO;
2088
2089			/* Safe to do this even if cxgb_up not called yet */
2090			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2091		}
2092		if (mask & IFCAP_VLAN_HWTAGGING) {
2093			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2094			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2095				PORT_LOCK(p);
2096				cxgb_update_mac_settings(p);
2097				PORT_UNLOCK(p);
2098			}
2099		}
2100		if (mask & IFCAP_VLAN_MTU) {
2101			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2102			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2103				PORT_LOCK(p);
2104				cxgb_update_mac_settings(p);
2105				PORT_UNLOCK(p);
2106			}
2107		}
2108		if (mask & IFCAP_VLAN_HWTSO)
2109			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2110		if (mask & IFCAP_VLAN_HWCSUM)
2111			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2112
2113#ifdef VLAN_CAPABILITIES
2114		VLAN_CAPABILITIES(ifp);
2115#endif
2116		ADAPTER_UNLOCK(sc);
2117		break;
2118	case SIOCSIFMEDIA:
2119	case SIOCGIFMEDIA:
2120		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2121		break;
2122	default:
2123		error = ether_ioctl(ifp, command, data);
2124	}
2125
2126	return (error);
2127}
2128
2129static int
2130cxgb_media_change(struct ifnet *ifp)
2131{
2132	return (EOPNOTSUPP);
2133}
2134
2135/*
2136 * Translates phy->modtype to the correct Ethernet media subtype.
2137 */
2138static int
2139cxgb_ifm_type(int mod)
2140{
2141	switch (mod) {
2142	case phy_modtype_sr:
2143		return (IFM_10G_SR);
2144	case phy_modtype_lr:
2145		return (IFM_10G_LR);
2146	case phy_modtype_lrm:
2147		return (IFM_10G_LRM);
2148	case phy_modtype_twinax:
2149		return (IFM_10G_TWINAX);
2150	case phy_modtype_twinax_long:
2151		return (IFM_10G_TWINAX_LONG);
2152	case phy_modtype_none:
2153		return (IFM_NONE);
2154	case phy_modtype_unknown:
2155		return (IFM_UNKNOWN);
2156	}
2157
2158	KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2159	return (IFM_UNKNOWN);
2160}
2161
2162/*
2163 * Rebuilds the ifmedia list for this port, and sets the current media.
2164 */
2165static void
2166cxgb_build_medialist(struct port_info *p)
2167{
2168	struct cphy *phy = &p->phy;
2169	struct ifmedia *media = &p->media;
2170	int mod = phy->modtype;
2171	int m = IFM_ETHER | IFM_FDX;
2172
2173	PORT_LOCK(p);
2174
2175	ifmedia_removeall(media);
2176	if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2177		/* Copper (RJ45) */
2178
2179		if (phy->caps & SUPPORTED_10000baseT_Full)
2180			ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2181
2182		if (phy->caps & SUPPORTED_1000baseT_Full)
2183			ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2184
2185		if (phy->caps & SUPPORTED_100baseT_Full)
2186			ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2187
2188		if (phy->caps & SUPPORTED_10baseT_Full)
2189			ifmedia_add(media, m | IFM_10_T, mod, NULL);
2190
2191		ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2192		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2193
2194	} else if (phy->caps & SUPPORTED_TP) {
2195		/* Copper (CX4) */
2196
2197		KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2198			("%s: unexpected cap 0x%x", __func__, phy->caps));
2199
2200		ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2201		ifmedia_set(media, m | IFM_10G_CX4);
2202
2203	} else if (phy->caps & SUPPORTED_FIBRE &&
2204		   phy->caps & SUPPORTED_10000baseT_Full) {
2205		/* 10G optical (but includes SFP+ twinax) */
2206
2207		m |= cxgb_ifm_type(mod);
2208		if (IFM_SUBTYPE(m) == IFM_NONE)
2209			m &= ~IFM_FDX;
2210
2211		ifmedia_add(media, m, mod, NULL);
2212		ifmedia_set(media, m);
2213
2214	} else if (phy->caps & SUPPORTED_FIBRE &&
2215		   phy->caps & SUPPORTED_1000baseT_Full) {
2216		/* 1G optical */
2217
2218		/* XXX: Lie and claim to be SX, could actually be any 1G-X */
2219		ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2220		ifmedia_set(media, m | IFM_1000_SX);
2221
2222	} else {
2223		KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2224			    phy->caps));
2225	}
2226
2227	PORT_UNLOCK(p);
2228}
2229
2230static void
2231cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2232{
2233	struct port_info *p = ifp->if_softc;
2234	struct ifmedia_entry *cur = p->media.ifm_cur;
2235	int speed = p->link_config.speed;
2236
2237	if (cur->ifm_data != p->phy.modtype) {
2238		cxgb_build_medialist(p);
2239		cur = p->media.ifm_cur;
2240	}
2241
2242	ifmr->ifm_status = IFM_AVALID;
2243	if (!p->link_config.link_ok)
2244		return;
2245
2246	ifmr->ifm_status |= IFM_ACTIVE;
2247
2248	/*
2249	 * active and current will differ iff current media is autoselect.  That
2250	 * can happen only for copper RJ45.
2251	 */
2252	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2253		return;
2254	KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2255		("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2256
2257	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2258	if (speed == SPEED_10000)
2259		ifmr->ifm_active |= IFM_10G_T;
2260	else if (speed == SPEED_1000)
2261		ifmr->ifm_active |= IFM_1000_T;
2262	else if (speed == SPEED_100)
2263		ifmr->ifm_active |= IFM_100_TX;
2264	else if (speed == SPEED_10)
2265		ifmr->ifm_active |= IFM_10_T;
2266	else
2267		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2268			    speed));
2269}
2270
2271static void
2272cxgb_async_intr(void *data)
2273{
2274	adapter_t *sc = data;
2275
2276	if (cxgb_debug)
2277		device_printf(sc->dev, "cxgb_async_intr\n");
2278	/*
2279	 * May need to sleep - defer to taskqueue
2280	 */
2281	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2282}
2283
2284static void
2285cxgb_ext_intr_handler(void *arg, int count)
2286{
2287	adapter_t *sc = (adapter_t *)arg;
2288
2289	if (cxgb_debug)
2290		printf("cxgb_ext_intr_handler\n");
2291
2292	t3_phy_intr_handler(sc);
2293
2294	/* Now reenable external interrupts */
2295	ADAPTER_LOCK(sc);
2296	if (sc->slow_intr_mask) {
2297		sc->slow_intr_mask |= F_T3DBG;
2298		t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
2299		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
2300	}
2301	ADAPTER_UNLOCK(sc);
2302}
2303
2304static inline int
2305link_poll_needed(struct port_info *p)
2306{
2307	struct cphy *phy = &p->phy;
2308
2309	if (phy->caps & POLL_LINK_1ST_TIME) {
2310		p->phy.caps &= ~POLL_LINK_1ST_TIME;
2311		return (1);
2312	}
2313
2314	return (p->link_fault || !(phy->caps & SUPPORTED_LINK_IRQ));
2315}
2316
2317static void
2318check_link_status(adapter_t *sc)
2319{
2320	int i;
2321
2322	for (i = 0; i < (sc)->params.nports; ++i) {
2323		struct port_info *p = &sc->port[i];
2324
2325		if (!isset(&sc->open_device_map, p->port_id))
2326			continue;
2327
2328		if (link_poll_needed(p))
2329			t3_link_changed(sc, i);
2330	}
2331}
2332
2333static void
2334check_t3b2_mac(struct adapter *sc)
2335{
2336	int i;
2337
2338	if (sc->flags & CXGB_SHUTDOWN)
2339		return;
2340
2341	for_each_port(sc, i) {
2342		struct port_info *p = &sc->port[i];
2343		int status;
2344#ifdef INVARIANTS
2345		struct ifnet *ifp = p->ifp;
2346#endif
2347
2348		if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2349		    !p->link_config.link_ok)
2350			continue;
2351
2352		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2353			("%s: state mismatch (drv_flags %x, device_map %x)",
2354			 __func__, ifp->if_drv_flags, sc->open_device_map));
2355
2356		PORT_LOCK(p);
2357		status = t3b2_mac_watchdog_task(&p->mac);
2358		if (status == 1)
2359			p->mac.stats.num_toggled++;
2360		else if (status == 2) {
2361			struct cmac *mac = &p->mac;
2362
2363			cxgb_update_mac_settings(p);
2364			t3_link_start(&p->phy, mac, &p->link_config);
2365			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2366			t3_port_intr_enable(sc, p->port_id);
2367			p->mac.stats.num_resets++;
2368		}
2369		PORT_UNLOCK(p);
2370	}
2371}
2372
2373static void
2374cxgb_tick(void *arg)
2375{
2376	adapter_t *sc = (adapter_t *)arg;
2377
2378	if (sc->flags & CXGB_SHUTDOWN)
2379		return;
2380
2381	taskqueue_enqueue(sc->tq, &sc->tick_task);
2382	callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
2383}
2384
2385static void
2386cxgb_tick_handler(void *arg, int count)
2387{
2388	adapter_t *sc = (adapter_t *)arg;
2389	const struct adapter_params *p = &sc->params;
2390	int i;
2391	uint32_t cause, reset;
2392
2393	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2394		return;
2395
2396	check_link_status(sc);
2397
2398	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2399		check_t3b2_mac(sc);
2400
2401	cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2402	if (cause) {
2403		struct sge_qset *qs = &sc->sge.qs[0];
2404		uint32_t mask, v;
2405
2406		v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2407
2408		mask = 1;
2409		for (i = 0; i < SGE_QSETS; i++) {
2410			if (v & mask)
2411				qs[i].rspq.starved++;
2412			mask <<= 1;
2413		}
2414
2415		mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2416
2417		for (i = 0; i < SGE_QSETS * 2; i++) {
2418			if (v & mask) {
2419				qs[i / 2].fl[i % 2].empty++;
2420			}
2421			mask <<= 1;
2422		}
2423
2424		/* clear */
2425		t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2426		t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2427	}
2428
2429	for (i = 0; i < sc->params.nports; i++) {
2430		struct port_info *pi = &sc->port[i];
2431		struct ifnet *ifp = pi->ifp;
2432		struct cmac *mac = &pi->mac;
2433		struct mac_stats *mstats = &mac->stats;
2434		int drops, j;
2435
2436		if (!isset(&sc->open_device_map, pi->port_id))
2437			continue;
2438
2439		PORT_LOCK(pi);
2440		t3_mac_update_stats(mac);
2441		PORT_UNLOCK(pi);
2442
2443		ifp->if_opackets = mstats->tx_frames;
2444		ifp->if_ipackets = mstats->rx_frames;
2445		ifp->if_obytes = mstats->tx_octets;
2446		ifp->if_ibytes = mstats->rx_octets;
2447		ifp->if_omcasts = mstats->tx_mcast_frames;
2448		ifp->if_imcasts = mstats->rx_mcast_frames;
2449		ifp->if_collisions = mstats->tx_total_collisions;
2450		ifp->if_iqdrops = mstats->rx_cong_drops;
2451
2452		drops = 0;
2453		for (j = pi->first_qset; j < pi->first_qset + pi->nqsets; j++)
2454			drops += sc->sge.qs[j].txq[TXQ_ETH].txq_mr->br_drops;
2455		ifp->if_snd.ifq_drops = drops;
2456
2457		ifp->if_oerrors =
2458		    mstats->tx_excess_collisions +
2459		    mstats->tx_underrun +
2460		    mstats->tx_len_errs +
2461		    mstats->tx_mac_internal_errs +
2462		    mstats->tx_excess_deferral +
2463		    mstats->tx_fcs_errs;
2464		ifp->if_ierrors =
2465		    mstats->rx_jabber +
2466		    mstats->rx_data_errs +
2467		    mstats->rx_sequence_errs +
2468		    mstats->rx_runt +
2469		    mstats->rx_too_long +
2470		    mstats->rx_mac_internal_errs +
2471		    mstats->rx_short +
2472		    mstats->rx_fcs_errs;
2473
2474		if (mac->multiport)
2475			continue;
2476
2477		/* Count rx fifo overflows, once per second */
2478		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2479		reset = 0;
2480		if (cause & F_RXFIFO_OVERFLOW) {
2481			mac->stats.rx_fifo_ovfl++;
2482			reset |= F_RXFIFO_OVERFLOW;
2483		}
2484		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2485	}
2486}
2487
2488static void
2489touch_bars(device_t dev)
2490{
2491	/*
2492	 * Don't enable yet
2493	 */
2494#if !defined(__LP64__) && 0
2495	u32 v;
2496
2497	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2498	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2499	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2500	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2501	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2502	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2503#endif
2504}
2505
2506static int
2507set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2508{
2509	uint8_t *buf;
2510	int err = 0;
2511	u32 aligned_offset, aligned_len, *p;
2512	struct adapter *adapter = pi->adapter;
2513
2514
2515	aligned_offset = offset & ~3;
2516	aligned_len = (len + (offset & 3) + 3) & ~3;
2517
2518	if (aligned_offset != offset || aligned_len != len) {
2519		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2520		if (!buf)
2521			return (ENOMEM);
2522		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2523		if (!err && aligned_len > 4)
2524			err = t3_seeprom_read(adapter,
2525					      aligned_offset + aligned_len - 4,
2526					      (u32 *)&buf[aligned_len - 4]);
2527		if (err)
2528			goto out;
2529		memcpy(buf + (offset & 3), data, len);
2530	} else
2531		buf = (uint8_t *)(uintptr_t)data;
2532
2533	err = t3_seeprom_wp(adapter, 0);
2534	if (err)
2535		goto out;
2536
2537	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2538		err = t3_seeprom_write(adapter, aligned_offset, *p);
2539		aligned_offset += 4;
2540	}
2541
2542	if (!err)
2543		err = t3_seeprom_wp(adapter, 1);
2544out:
2545	if (buf != data)
2546		free(buf, M_DEVBUF);
2547	return err;
2548}
2549
2550
2551static int
2552in_range(int val, int lo, int hi)
2553{
2554	return val < 0 || (val <= hi && val >= lo);
2555}
2556
2557static int
2558cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2559{
2560       return (0);
2561}
2562
2563static int
2564cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2565{
2566       return (0);
2567}
2568
2569static int
2570cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2571    int fflag, struct thread *td)
2572{
2573	int mmd, error = 0;
2574	struct port_info *pi = dev->si_drv1;
2575	adapter_t *sc = pi->adapter;
2576
2577#ifdef PRIV_SUPPORTED
2578	if (priv_check(td, PRIV_DRIVER)) {
2579		if (cxgb_debug)
2580			printf("user does not have access to privileged ioctls\n");
2581		return (EPERM);
2582	}
2583#else
2584	if (suser(td)) {
2585		if (cxgb_debug)
2586			printf("user does not have access to privileged ioctls\n");
2587		return (EPERM);
2588	}
2589#endif
2590
2591	switch (cmd) {
2592	case CHELSIO_GET_MIIREG: {
2593		uint32_t val;
2594		struct cphy *phy = &pi->phy;
2595		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2596
2597		if (!phy->mdio_read)
2598			return (EOPNOTSUPP);
2599		if (is_10G(sc)) {
2600			mmd = mid->phy_id >> 8;
2601			if (!mmd)
2602				mmd = MDIO_DEV_PCS;
2603			else if (mmd > MDIO_DEV_VEND2)
2604				return (EINVAL);
2605
2606			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2607					     mid->reg_num, &val);
2608		} else
2609		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2610					     mid->reg_num & 0x1f, &val);
2611		if (error == 0)
2612			mid->val_out = val;
2613		break;
2614	}
2615	case CHELSIO_SET_MIIREG: {
2616		struct cphy *phy = &pi->phy;
2617		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2618
2619		if (!phy->mdio_write)
2620			return (EOPNOTSUPP);
2621		if (is_10G(sc)) {
2622			mmd = mid->phy_id >> 8;
2623			if (!mmd)
2624				mmd = MDIO_DEV_PCS;
2625			else if (mmd > MDIO_DEV_VEND2)
2626				return (EINVAL);
2627
2628			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2629					      mmd, mid->reg_num, mid->val_in);
2630		} else
2631			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2632					      mid->reg_num & 0x1f,
2633					      mid->val_in);
2634		break;
2635	}
2636	case CHELSIO_SETREG: {
2637		struct ch_reg *edata = (struct ch_reg *)data;
2638		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2639			return (EFAULT);
2640		t3_write_reg(sc, edata->addr, edata->val);
2641		break;
2642	}
2643	case CHELSIO_GETREG: {
2644		struct ch_reg *edata = (struct ch_reg *)data;
2645		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2646			return (EFAULT);
2647		edata->val = t3_read_reg(sc, edata->addr);
2648		break;
2649	}
2650	case CHELSIO_GET_SGE_CONTEXT: {
2651		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2652		mtx_lock_spin(&sc->sge.reg_lock);
2653		switch (ecntxt->cntxt_type) {
2654		case CNTXT_TYPE_EGRESS:
2655			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2656			    ecntxt->data);
2657			break;
2658		case CNTXT_TYPE_FL:
2659			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2660			    ecntxt->data);
2661			break;
2662		case CNTXT_TYPE_RSP:
2663			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2664			    ecntxt->data);
2665			break;
2666		case CNTXT_TYPE_CQ:
2667			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2668			    ecntxt->data);
2669			break;
2670		default:
2671			error = EINVAL;
2672			break;
2673		}
2674		mtx_unlock_spin(&sc->sge.reg_lock);
2675		break;
2676	}
2677	case CHELSIO_GET_SGE_DESC: {
2678		struct ch_desc *edesc = (struct ch_desc *)data;
2679		int ret;
2680		if (edesc->queue_num >= SGE_QSETS * 6)
2681			return (EINVAL);
2682		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2683		    edesc->queue_num % 6, edesc->idx, edesc->data);
2684		if (ret < 0)
2685			return (EINVAL);
2686		edesc->size = ret;
2687		break;
2688	}
2689	case CHELSIO_GET_QSET_PARAMS: {
2690		struct qset_params *q;
2691		struct ch_qset_params *t = (struct ch_qset_params *)data;
2692		int q1 = pi->first_qset;
2693		int nqsets = pi->nqsets;
2694		int i;
2695
2696		if (t->qset_idx >= nqsets)
2697			return EINVAL;
2698
2699		i = q1 + t->qset_idx;
2700		q = &sc->params.sge.qset[i];
2701		t->rspq_size   = q->rspq_size;
2702		t->txq_size[0] = q->txq_size[0];
2703		t->txq_size[1] = q->txq_size[1];
2704		t->txq_size[2] = q->txq_size[2];
2705		t->fl_size[0]  = q->fl_size;
2706		t->fl_size[1]  = q->jumbo_size;
2707		t->polling     = q->polling;
2708		t->lro         = q->lro;
2709		t->intr_lat    = q->coalesce_usecs;
2710		t->cong_thres  = q->cong_thres;
2711		t->qnum        = i;
2712
2713		if ((sc->flags & FULL_INIT_DONE) == 0)
2714			t->vector = 0;
2715		else if (sc->flags & USING_MSIX)
2716			t->vector = rman_get_start(sc->msix_irq_res[i]);
2717		else
2718			t->vector = rman_get_start(sc->irq_res);
2719
2720		break;
2721	}
2722	case CHELSIO_GET_QSET_NUM: {
2723		struct ch_reg *edata = (struct ch_reg *)data;
2724		edata->val = pi->nqsets;
2725		break;
2726	}
2727	case CHELSIO_LOAD_FW: {
2728		uint8_t *fw_data;
2729		uint32_t vers;
2730		struct ch_mem_range *t = (struct ch_mem_range *)data;
2731
2732		/*
2733		 * You're allowed to load a firmware only before FULL_INIT_DONE
2734		 *
2735		 * FW_UPTODATE is also set so the rest of the initialization
2736		 * will not overwrite what was loaded here.  This gives you the
2737		 * flexibility to load any firmware (and maybe shoot yourself in
2738		 * the foot).
2739		 */
2740
2741		ADAPTER_LOCK(sc);
2742		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2743			ADAPTER_UNLOCK(sc);
2744			return (EBUSY);
2745		}
2746
2747		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2748		if (!fw_data)
2749			error = ENOMEM;
2750		else
2751			error = copyin(t->buf, fw_data, t->len);
2752
2753		if (!error)
2754			error = -t3_load_fw(sc, fw_data, t->len);
2755
2756		if (t3_get_fw_version(sc, &vers) == 0) {
2757			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2758			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2759			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2760		}
2761
2762		if (!error)
2763			sc->flags |= FW_UPTODATE;
2764
2765		free(fw_data, M_DEVBUF);
2766		ADAPTER_UNLOCK(sc);
2767		break;
2768	}
2769	case CHELSIO_LOAD_BOOT: {
2770		uint8_t *boot_data;
2771		struct ch_mem_range *t = (struct ch_mem_range *)data;
2772
2773		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2774		if (!boot_data)
2775			return ENOMEM;
2776
2777		error = copyin(t->buf, boot_data, t->len);
2778		if (!error)
2779			error = -t3_load_boot(sc, boot_data, t->len);
2780
2781		free(boot_data, M_DEVBUF);
2782		break;
2783	}
2784	case CHELSIO_GET_PM: {
2785		struct ch_pm *m = (struct ch_pm *)data;
2786		struct tp_params *p = &sc->params.tp;
2787
2788		if (!is_offload(sc))
2789			return (EOPNOTSUPP);
2790
2791		m->tx_pg_sz = p->tx_pg_size;
2792		m->tx_num_pg = p->tx_num_pgs;
2793		m->rx_pg_sz  = p->rx_pg_size;
2794		m->rx_num_pg = p->rx_num_pgs;
2795		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2796
2797		break;
2798	}
2799	case CHELSIO_SET_PM: {
2800		struct ch_pm *m = (struct ch_pm *)data;
2801		struct tp_params *p = &sc->params.tp;
2802
2803		if (!is_offload(sc))
2804			return (EOPNOTSUPP);
2805		if (sc->flags & FULL_INIT_DONE)
2806			return (EBUSY);
2807
2808		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2809		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2810			return (EINVAL);	/* not power of 2 */
2811		if (!(m->rx_pg_sz & 0x14000))
2812			return (EINVAL);	/* not 16KB or 64KB */
2813		if (!(m->tx_pg_sz & 0x1554000))
2814			return (EINVAL);
2815		if (m->tx_num_pg == -1)
2816			m->tx_num_pg = p->tx_num_pgs;
2817		if (m->rx_num_pg == -1)
2818			m->rx_num_pg = p->rx_num_pgs;
2819		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2820			return (EINVAL);
2821		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2822		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2823			return (EINVAL);
2824
2825		p->rx_pg_size = m->rx_pg_sz;
2826		p->tx_pg_size = m->tx_pg_sz;
2827		p->rx_num_pgs = m->rx_num_pg;
2828		p->tx_num_pgs = m->tx_num_pg;
2829		break;
2830	}
2831	case CHELSIO_SETMTUTAB: {
2832		struct ch_mtus *m = (struct ch_mtus *)data;
2833		int i;
2834
2835		if (!is_offload(sc))
2836			return (EOPNOTSUPP);
2837		if (offload_running(sc))
2838			return (EBUSY);
2839		if (m->nmtus != NMTUS)
2840			return (EINVAL);
2841		if (m->mtus[0] < 81)         /* accommodate SACK */
2842			return (EINVAL);
2843
2844		/*
2845		 * MTUs must be in ascending order
2846		 */
2847		for (i = 1; i < NMTUS; ++i)
2848			if (m->mtus[i] < m->mtus[i - 1])
2849				return (EINVAL);
2850
2851		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2852		break;
2853	}
2854	case CHELSIO_GETMTUTAB: {
2855		struct ch_mtus *m = (struct ch_mtus *)data;
2856
2857		if (!is_offload(sc))
2858			return (EOPNOTSUPP);
2859
2860		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2861		m->nmtus = NMTUS;
2862		break;
2863	}
2864	case CHELSIO_GET_MEM: {
2865		struct ch_mem_range *t = (struct ch_mem_range *)data;
2866		struct mc7 *mem;
2867		uint8_t *useraddr;
2868		u64 buf[32];
2869
2870		/*
2871		 * Use these to avoid modifying len/addr in the the return
2872		 * struct
2873		 */
2874		uint32_t len = t->len, addr = t->addr;
2875
2876		if (!is_offload(sc))
2877			return (EOPNOTSUPP);
2878		if (!(sc->flags & FULL_INIT_DONE))
2879			return (EIO);         /* need the memory controllers */
2880		if ((addr & 0x7) || (len & 0x7))
2881			return (EINVAL);
2882		if (t->mem_id == MEM_CM)
2883			mem = &sc->cm;
2884		else if (t->mem_id == MEM_PMRX)
2885			mem = &sc->pmrx;
2886		else if (t->mem_id == MEM_PMTX)
2887			mem = &sc->pmtx;
2888		else
2889			return (EINVAL);
2890
2891		/*
2892		 * Version scheme:
2893		 * bits 0..9: chip version
2894		 * bits 10..15: chip revision
2895		 */
2896		t->version = 3 | (sc->params.rev << 10);
2897
2898		/*
2899		 * Read 256 bytes at a time as len can be large and we don't
2900		 * want to use huge intermediate buffers.
2901		 */
2902		useraddr = (uint8_t *)t->buf;
2903		while (len) {
2904			unsigned int chunk = min(len, sizeof(buf));
2905
2906			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2907			if (error)
2908				return (-error);
2909			if (copyout(buf, useraddr, chunk))
2910				return (EFAULT);
2911			useraddr += chunk;
2912			addr += chunk;
2913			len -= chunk;
2914		}
2915		break;
2916	}
2917	case CHELSIO_READ_TCAM_WORD: {
2918		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2919
2920		if (!is_offload(sc))
2921			return (EOPNOTSUPP);
2922		if (!(sc->flags & FULL_INIT_DONE))
2923			return (EIO);         /* need MC5 */
2924		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2925		break;
2926	}
2927	case CHELSIO_SET_TRACE_FILTER: {
2928		struct ch_trace *t = (struct ch_trace *)data;
2929		const struct trace_params *tp;
2930
2931		tp = (const struct trace_params *)&t->sip;
2932		if (t->config_tx)
2933			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2934					       t->trace_tx);
2935		if (t->config_rx)
2936			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2937					       t->trace_rx);
2938		break;
2939	}
2940	case CHELSIO_SET_PKTSCHED: {
2941		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2942		if (sc->open_device_map == 0)
2943			return (EAGAIN);
2944		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2945		    p->binding);
2946		break;
2947	}
2948	case CHELSIO_IFCONF_GETREGS: {
2949		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2950		int reglen = cxgb_get_regs_len();
2951		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2952		if (buf == NULL) {
2953			return (ENOMEM);
2954		}
2955		if (regs->len > reglen)
2956			regs->len = reglen;
2957		else if (regs->len < reglen)
2958			error = ENOBUFS;
2959
2960		if (!error) {
2961			cxgb_get_regs(sc, regs, buf);
2962			error = copyout(buf, regs->data, reglen);
2963		}
2964		free(buf, M_DEVBUF);
2965
2966		break;
2967	}
2968	case CHELSIO_SET_HW_SCHED: {
2969		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2970		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2971
2972		if ((sc->flags & FULL_INIT_DONE) == 0)
2973			return (EAGAIN);       /* need TP to be initialized */
2974		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2975		    !in_range(t->channel, 0, 1) ||
2976		    !in_range(t->kbps, 0, 10000000) ||
2977		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2978		    !in_range(t->flow_ipg, 0,
2979			      dack_ticks_to_usec(sc, 0x7ff)))
2980			return (EINVAL);
2981
2982		if (t->kbps >= 0) {
2983			error = t3_config_sched(sc, t->kbps, t->sched);
2984			if (error < 0)
2985				return (-error);
2986		}
2987		if (t->class_ipg >= 0)
2988			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2989		if (t->flow_ipg >= 0) {
2990			t->flow_ipg *= 1000;     /* us -> ns */
2991			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2992		}
2993		if (t->mode >= 0) {
2994			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2995
2996			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2997					 bit, t->mode ? bit : 0);
2998		}
2999		if (t->channel >= 0)
3000			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
3001					 1 << t->sched, t->channel << t->sched);
3002		break;
3003	}
3004	case CHELSIO_GET_EEPROM: {
3005		int i;
3006		struct ch_eeprom *e = (struct ch_eeprom *)data;
3007		uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
3008
3009		if (buf == NULL) {
3010			return (ENOMEM);
3011		}
3012		e->magic = EEPROM_MAGIC;
3013		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
3014			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
3015
3016		if (!error)
3017			error = copyout(buf + e->offset, e->data, e->len);
3018
3019		free(buf, M_DEVBUF);
3020		break;
3021	}
3022	case CHELSIO_CLEAR_STATS: {
3023		if (!(sc->flags & FULL_INIT_DONE))
3024			return EAGAIN;
3025
3026		PORT_LOCK(pi);
3027		t3_mac_update_stats(&pi->mac);
3028		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3029		PORT_UNLOCK(pi);
3030		break;
3031	}
3032	case CHELSIO_GET_UP_LA: {
3033		struct ch_up_la *la = (struct ch_up_la *)data;
3034		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3035		if (buf == NULL) {
3036			return (ENOMEM);
3037		}
3038		if (la->bufsize < LA_BUFSIZE)
3039			error = ENOBUFS;
3040
3041		if (!error)
3042			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3043					      &la->bufsize, buf);
3044		if (!error)
3045			error = copyout(buf, la->data, la->bufsize);
3046
3047		free(buf, M_DEVBUF);
3048		break;
3049	}
3050	case CHELSIO_GET_UP_IOQS: {
3051		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3052		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3053		uint32_t *v;
3054
3055		if (buf == NULL) {
3056			return (ENOMEM);
3057		}
3058		if (ioqs->bufsize < IOQS_BUFSIZE)
3059			error = ENOBUFS;
3060
3061		if (!error)
3062			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3063
3064		if (!error) {
3065			v = (uint32_t *)buf;
3066
3067			ioqs->bufsize -= 4 * sizeof(uint32_t);
3068			ioqs->ioq_rx_enable = *v++;
3069			ioqs->ioq_tx_enable = *v++;
3070			ioqs->ioq_rx_status = *v++;
3071			ioqs->ioq_tx_status = *v++;
3072
3073			error = copyout(v, ioqs->data, ioqs->bufsize);
3074		}
3075
3076		free(buf, M_DEVBUF);
3077		break;
3078	}
3079	default:
3080		return (EOPNOTSUPP);
3081		break;
3082	}
3083
3084	return (error);
3085}
3086
3087static __inline void
3088reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3089    unsigned int end)
3090{
3091	uint32_t *p = (uint32_t *)(buf + start);
3092
3093	for ( ; start <= end; start += sizeof(uint32_t))
3094		*p++ = t3_read_reg(ap, start);
3095}
3096
3097#define T3_REGMAP_SIZE (3 * 1024)
3098static int
3099cxgb_get_regs_len(void)
3100{
3101	return T3_REGMAP_SIZE;
3102}
3103
3104static void
3105cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3106{
3107
3108	/*
3109	 * Version scheme:
3110	 * bits 0..9: chip version
3111	 * bits 10..15: chip revision
3112	 * bit 31: set for PCIe cards
3113	 */
3114	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3115
3116	/*
3117	 * We skip the MAC statistics registers because they are clear-on-read.
3118	 * Also reading multi-register stats would need to synchronize with the
3119	 * periodic mac stats accumulation.  Hard to justify the complexity.
3120	 */
3121	memset(buf, 0, cxgb_get_regs_len());
3122	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3123	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3124	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3125	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3126	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3127	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3128		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3129	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3130		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3131}
3132
3133
3134MODULE_DEPEND(if_cxgb, cxgb_t3fw, 1, 1, 1);
3135