cxgb_main.c revision 192593
1/**************************************************************************
2
3Copyright (c) 2007-2009, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13    contributors may be used to endorse or promote products derived from
14    this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_main.c 192593 2009-05-22 18:26:47Z gnn $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/bus.h>
37#include <sys/module.h>
38#include <sys/pciio.h>
39#include <sys/conf.h>
40#include <machine/bus.h>
41#include <machine/resource.h>
42#include <sys/bus_dma.h>
43#include <sys/ktr.h>
44#include <sys/rman.h>
45#include <sys/ioccom.h>
46#include <sys/mbuf.h>
47#include <sys/linker.h>
48#include <sys/firmware.h>
49#include <sys/socket.h>
50#include <sys/sockio.h>
51#include <sys/smp.h>
52#include <sys/sysctl.h>
53#include <sys/syslog.h>
54#include <sys/queue.h>
55#include <sys/taskqueue.h>
56#include <sys/proc.h>
57
58#include <net/bpf.h>
59#include <net/ethernet.h>
60#include <net/if.h>
61#include <net/if_arp.h>
62#include <net/if_dl.h>
63#include <net/if_media.h>
64#include <net/if_types.h>
65#include <net/if_vlan_var.h>
66
67#include <netinet/in_systm.h>
68#include <netinet/in.h>
69#include <netinet/if_ether.h>
70#include <netinet/ip.h>
71#include <netinet/ip.h>
72#include <netinet/tcp.h>
73#include <netinet/udp.h>
74
75#include <dev/pci/pcireg.h>
76#include <dev/pci/pcivar.h>
77#include <dev/pci/pci_private.h>
78
79#include <cxgb_include.h>
80
81#ifdef PRIV_SUPPORTED
82#include <sys/priv.h>
83#endif
84
85static int cxgb_setup_msix(adapter_t *, int);
86static void cxgb_teardown_msix(adapter_t *);
87static void cxgb_init(void *);
88static void cxgb_init_locked(struct port_info *);
89static void cxgb_stop_locked(struct port_info *);
90static void cxgb_set_rxmode(struct port_info *);
91static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92static int cxgb_media_change(struct ifnet *);
93static int cxgb_ifm_type(int);
94static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
95static int setup_sge_qsets(adapter_t *);
96static void cxgb_async_intr(void *);
97static void cxgb_ext_intr_handler(void *, int);
98static void cxgb_tick_handler(void *, int);
99static void cxgb_down_locked(struct adapter *sc);
100static void cxgb_tick(void *);
101static void setup_rss(adapter_t *sc);
102
103/* Attachment glue for the PCI controller end of the device.  Each port of
104 * the device is attached separately, as defined later.
105 */
106static int cxgb_controller_probe(device_t);
107static int cxgb_controller_attach(device_t);
108static int cxgb_controller_detach(device_t);
109static void cxgb_free(struct adapter *);
110static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
111    unsigned int end);
112static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
113static int cxgb_get_regs_len(void);
114static int offload_open(struct port_info *pi);
115static void touch_bars(device_t dev);
116static int offload_close(struct t3cdev *tdev);
117static void cxgb_link_start(struct port_info *p);
118int t3_detect_link_fault(adapter_t *adapter, int port_id);
119
120static device_method_t cxgb_controller_methods[] = {
121	DEVMETHOD(device_probe,		cxgb_controller_probe),
122	DEVMETHOD(device_attach,	cxgb_controller_attach),
123	DEVMETHOD(device_detach,	cxgb_controller_detach),
124
125	/* bus interface */
126	DEVMETHOD(bus_print_child,	bus_generic_print_child),
127	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
128
129	{ 0, 0 }
130};
131
132static driver_t cxgb_controller_driver = {
133	"cxgbc",
134	cxgb_controller_methods,
135	sizeof(struct adapter)
136};
137
138static devclass_t	cxgb_controller_devclass;
139DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
140
141/*
142 * Attachment glue for the ports.  Attachment is done directly to the
143 * controller device.
144 */
145static int cxgb_port_probe(device_t);
146static int cxgb_port_attach(device_t);
147static int cxgb_port_detach(device_t);
148
149static device_method_t cxgb_port_methods[] = {
150	DEVMETHOD(device_probe,		cxgb_port_probe),
151	DEVMETHOD(device_attach,	cxgb_port_attach),
152	DEVMETHOD(device_detach,	cxgb_port_detach),
153	{ 0, 0 }
154};
155
156static driver_t cxgb_port_driver = {
157	"cxgb",
158	cxgb_port_methods,
159	0
160};
161
162static d_ioctl_t cxgb_extension_ioctl;
163static d_open_t cxgb_extension_open;
164static d_close_t cxgb_extension_close;
165
166static struct cdevsw cxgb_cdevsw = {
167       .d_version =    D_VERSION,
168       .d_flags =      0,
169       .d_open =       cxgb_extension_open,
170       .d_close =      cxgb_extension_close,
171       .d_ioctl =      cxgb_extension_ioctl,
172       .d_name =       "cxgb",
173};
174
175static devclass_t	cxgb_port_devclass;
176DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
177
178#define SGE_MSIX_COUNT (SGE_QSETS + 1)
179
180/*
181 * The driver uses the best interrupt scheme available on a platform in the
182 * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
183 * of these schemes the driver may consider as follows:
184 *
185 * msi = 2: choose from among all three options
186 * msi = 1 : only consider MSI and pin interrupts
187 * msi = 0: force pin interrupts
188 */
189static int msi_allowed = 2;
190
191TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
192SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
193SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
194    "MSI-X, MSI, INTx selector");
195
196/*
197 * The driver enables offload as a default.
198 * To disable it, use ofld_disable = 1.
199 */
200static int ofld_disable = 0;
201TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
202SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
203    "disable ULP offload");
204
205/*
206 * The driver uses an auto-queue algorithm by default.
207 * To disable it and force a single queue-set per port, use multiq = 0
208 */
209static int multiq = 1;
210TUNABLE_INT("hw.cxgb.multiq", &multiq);
211SYSCTL_UINT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
212    "use min(ncpus/ports, 8) queue-sets per port");
213
214/*
215 * By default the driver will not update the firmware unless
216 * it was compiled against a newer version
217 *
218 */
219static int force_fw_update = 0;
220TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
221SYSCTL_UINT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
222    "update firmware even if up to date");
223
224int cxgb_use_16k_clusters = 1;
225TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
226SYSCTL_UINT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
227    &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
228
229enum {
230	MAX_TXQ_ENTRIES      = 16384,
231	MAX_CTRL_TXQ_ENTRIES = 1024,
232	MAX_RSPQ_ENTRIES     = 16384,
233	MAX_RX_BUFFERS       = 16384,
234	MAX_RX_JUMBO_BUFFERS = 16384,
235	MIN_TXQ_ENTRIES      = 4,
236	MIN_CTRL_TXQ_ENTRIES = 4,
237	MIN_RSPQ_ENTRIES     = 32,
238	MIN_FL_ENTRIES       = 32,
239	MIN_FL_JUMBO_ENTRIES = 32
240};
241
242struct filter_info {
243	u32 sip;
244	u32 sip_mask;
245	u32 dip;
246	u16 sport;
247	u16 dport;
248	u32 vlan:12;
249	u32 vlan_prio:3;
250	u32 mac_hit:1;
251	u32 mac_idx:4;
252	u32 mac_vld:1;
253	u32 pkt_type:2;
254	u32 report_filter_id:1;
255	u32 pass:1;
256	u32 rss:1;
257	u32 qset:3;
258	u32 locked:1;
259	u32 valid:1;
260};
261
262enum { FILTER_NO_VLAN_PRI = 7 };
263
264#define EEPROM_MAGIC 0x38E2F10C
265
266#define PORT_MASK ((1 << MAX_NPORTS) - 1)
267
268/* Table for probing the cards.  The desc field isn't actually used */
269struct cxgb_ident {
270	uint16_t	vendor;
271	uint16_t	device;
272	int		index;
273	char		*desc;
274} cxgb_identifiers[] = {
275	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
276	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
277	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
278	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
279	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
280	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
281	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
282	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
283	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
284	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
285	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
286	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "N310E"},
287	{0, 0, 0, NULL}
288};
289
290static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
291
292
293static __inline char
294t3rev2char(struct adapter *adapter)
295{
296	char rev = 'z';
297
298	switch(adapter->params.rev) {
299	case T3_REV_A:
300		rev = 'a';
301		break;
302	case T3_REV_B:
303	case T3_REV_B2:
304		rev = 'b';
305		break;
306	case T3_REV_C:
307		rev = 'c';
308		break;
309	}
310	return rev;
311}
312
313static struct cxgb_ident *
314cxgb_get_ident(device_t dev)
315{
316	struct cxgb_ident *id;
317
318	for (id = cxgb_identifiers; id->desc != NULL; id++) {
319		if ((id->vendor == pci_get_vendor(dev)) &&
320		    (id->device == pci_get_device(dev))) {
321			return (id);
322		}
323	}
324	return (NULL);
325}
326
327static const struct adapter_info *
328cxgb_get_adapter_info(device_t dev)
329{
330	struct cxgb_ident *id;
331	const struct adapter_info *ai;
332
333	id = cxgb_get_ident(dev);
334	if (id == NULL)
335		return (NULL);
336
337	ai = t3_get_adapter_info(id->index);
338
339	return (ai);
340}
341
342static int
343cxgb_controller_probe(device_t dev)
344{
345	const struct adapter_info *ai;
346	char *ports, buf[80];
347	int nports;
348	struct adapter *sc = device_get_softc(dev);
349
350	ai = cxgb_get_adapter_info(dev);
351	if (ai == NULL)
352		return (ENXIO);
353
354	nports = ai->nports0 + ai->nports1;
355	if (nports == 1)
356		ports = "port";
357	else
358		ports = "ports";
359
360	snprintf(buf, sizeof(buf), "%s %sNIC, rev: %d nports: %d %s",
361	    ai->desc, is_offload(sc) ? "R" : "",
362	    sc->params.rev, nports, ports);
363	device_set_desc_copy(dev, buf);
364	return (BUS_PROBE_DEFAULT);
365}
366
367#define FW_FNAME "cxgb_t3fw"
368#define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
369#define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
370
371static int
372upgrade_fw(adapter_t *sc)
373{
374#ifdef FIRMWARE_LATEST
375	const struct firmware *fw;
376#else
377	struct firmware *fw;
378#endif
379	int status;
380
381	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
382		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
383		return (ENOENT);
384	} else
385		device_printf(sc->dev, "updating firmware on card\n");
386	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
387
388	device_printf(sc->dev, "firmware update returned %s %d\n", (status == 0) ? "success" : "fail", status);
389
390	firmware_put(fw, FIRMWARE_UNLOAD);
391
392	return (status);
393}
394
395/*
396 * The cxgb_controller_attach function is responsible for the initial
397 * bringup of the device.  Its responsibilities include:
398 *
399 *  1. Determine if the device supports MSI or MSI-X.
400 *  2. Allocate bus resources so that we can access the Base Address Register
401 *  3. Create and initialize mutexes for the controller and its control
402 *     logic such as SGE and MDIO.
403 *  4. Call hardware specific setup routine for the adapter as a whole.
404 *  5. Allocate the BAR for doing MSI-X.
405 *  6. Setup the line interrupt iff MSI-X is not supported.
406 *  7. Create the driver's taskq.
407 *  8. Start one task queue service thread.
408 *  9. Check if the firmware and SRAM are up-to-date.  They will be
409 *     auto-updated later (before FULL_INIT_DONE), if required.
410 * 10. Create a child device for each MAC (port)
411 * 11. Initialize T3 private state.
412 * 12. Trigger the LED
413 * 13. Setup offload iff supported.
414 * 14. Reset/restart the tick callout.
415 * 15. Attach sysctls
416 *
417 * NOTE: Any modification or deviation from this list MUST be reflected in
418 * the above comment.  Failure to do so will result in problems on various
419 * error conditions including link flapping.
420 */
421static int
422cxgb_controller_attach(device_t dev)
423{
424	device_t child;
425	const struct adapter_info *ai;
426	struct adapter *sc;
427	int i, error = 0;
428	uint32_t vers;
429	int port_qsets = 1;
430#ifdef MSI_SUPPORTED
431	int msi_needed, reg;
432#endif
433	char buf[80];
434
435	sc = device_get_softc(dev);
436	sc->dev = dev;
437	sc->msi_count = 0;
438	ai = cxgb_get_adapter_info(dev);
439
440	/*
441	 * XXX not really related but a recent addition
442	 */
443#ifdef MSI_SUPPORTED
444	/* find the PCIe link width and set max read request to 4KB*/
445	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
446		uint16_t lnk, pectl;
447		lnk = pci_read_config(dev, reg + 0x12, 2);
448		sc->link_width = (lnk >> 4) & 0x3f;
449
450		pectl = pci_read_config(dev, reg + 0x8, 2);
451		pectl = (pectl & ~0x7000) | (5 << 12);
452		pci_write_config(dev, reg + 0x8, pectl, 2);
453	}
454
455	if (sc->link_width != 0 && sc->link_width <= 4 &&
456	    (ai->nports0 + ai->nports1) <= 2) {
457		device_printf(sc->dev,
458		    "PCIe x%d Link, expect reduced performance\n",
459		    sc->link_width);
460	}
461#endif
462	touch_bars(dev);
463	pci_enable_busmaster(dev);
464	/*
465	 * Allocate the registers and make them available to the driver.
466	 * The registers that we care about for NIC mode are in BAR 0
467	 */
468	sc->regs_rid = PCIR_BAR(0);
469	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
470	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
471		device_printf(dev, "Cannot allocate BAR region 0\n");
472		return (ENXIO);
473	}
474	sc->udbs_rid = PCIR_BAR(2);
475	sc->udbs_res = NULL;
476	if (is_offload(sc) &&
477	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
478		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
479		device_printf(dev, "Cannot allocate BAR region 1\n");
480		error = ENXIO;
481		goto out;
482	}
483
484	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
485	    device_get_unit(dev));
486	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
487
488	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
489	    device_get_unit(dev));
490	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
491	    device_get_unit(dev));
492	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
493	    device_get_unit(dev));
494
495	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
496	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
497	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
498
499	sc->bt = rman_get_bustag(sc->regs_res);
500	sc->bh = rman_get_bushandle(sc->regs_res);
501	sc->mmio_len = rman_get_size(sc->regs_res);
502
503	if (t3_prep_adapter(sc, ai, 1) < 0) {
504		printf("prep adapter failed\n");
505		error = ENODEV;
506		goto out;
507	}
508        /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
509	 * enough messages for the queue sets.  If that fails, try falling
510	 * back to MSI.  If that fails, then try falling back to the legacy
511	 * interrupt pin model.
512	 */
513#ifdef MSI_SUPPORTED
514
515	sc->msix_regs_rid = 0x20;
516	if ((msi_allowed >= 2) &&
517	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
518	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
519
520		msi_needed = sc->msi_count = SGE_MSIX_COUNT;
521
522		if (((error = pci_alloc_msix(dev, &sc->msi_count)) != 0) ||
523		    (sc->msi_count != msi_needed)) {
524			device_printf(dev, "msix allocation failed - msi_count = %d"
525			    " msi_needed=%d will try msi err=%d\n", sc->msi_count,
526			    msi_needed, error);
527			sc->msi_count = 0;
528			pci_release_msi(dev);
529			bus_release_resource(dev, SYS_RES_MEMORY,
530			    sc->msix_regs_rid, sc->msix_regs_res);
531			sc->msix_regs_res = NULL;
532		} else {
533			sc->flags |= USING_MSIX;
534			sc->cxgb_intr = t3_intr_msix;
535		}
536	}
537
538	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
539		sc->msi_count = 1;
540		if (pci_alloc_msi(dev, &sc->msi_count)) {
541			device_printf(dev, "alloc msi failed - will try INTx\n");
542			sc->msi_count = 0;
543			pci_release_msi(dev);
544		} else {
545			sc->flags |= USING_MSI;
546			sc->irq_rid = 1;
547			sc->cxgb_intr = t3_intr_msi;
548		}
549	}
550#endif
551	if (sc->msi_count == 0) {
552		device_printf(dev, "using line interrupts\n");
553		sc->irq_rid = 0;
554		sc->cxgb_intr = t3b_intr;
555	}
556
557	if ((sc->flags & USING_MSIX) && multiq)
558		port_qsets = min((SGE_QSETS/(sc)->params.nports), mp_ncpus);
559
560	/* Create a private taskqueue thread for handling driver events */
561#ifdef TASKQUEUE_CURRENT
562	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
563	    taskqueue_thread_enqueue, &sc->tq);
564#else
565	sc->tq = taskqueue_create_fast("cxgb_taskq", M_NOWAIT,
566	    taskqueue_thread_enqueue, &sc->tq);
567#endif
568	if (sc->tq == NULL) {
569		device_printf(dev, "failed to allocate controller task queue\n");
570		goto out;
571	}
572
573	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
574	    device_get_nameunit(dev));
575	TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
576	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
577
578
579	/* Create a periodic callout for checking adapter status */
580	callout_init(&sc->cxgb_tick_ch, TRUE);
581
582	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
583		/*
584		 * Warn user that a firmware update will be attempted in init.
585		 */
586		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
587		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
588		sc->flags &= ~FW_UPTODATE;
589	} else {
590		sc->flags |= FW_UPTODATE;
591	}
592
593	if (t3_check_tpsram_version(sc) < 0) {
594		/*
595		 * Warn user that a firmware update will be attempted in init.
596		 */
597		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
598		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
599		sc->flags &= ~TPS_UPTODATE;
600	} else {
601		sc->flags |= TPS_UPTODATE;
602	}
603
604	/*
605	 * Create a child device for each MAC.  The ethernet attachment
606	 * will be done in these children.
607	 */
608	for (i = 0; i < (sc)->params.nports; i++) {
609		struct port_info *pi;
610
611		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
612			device_printf(dev, "failed to add child port\n");
613			error = EINVAL;
614			goto out;
615		}
616		pi = &sc->port[i];
617		pi->adapter = sc;
618		pi->nqsets = port_qsets;
619		pi->first_qset = i*port_qsets;
620		pi->port_id = i;
621		pi->tx_chan = i >= ai->nports0;
622		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
623		sc->rxpkt_map[pi->txpkt_intf] = i;
624		sc->port[i].tx_chan = i >= ai->nports0;
625		sc->portdev[i] = child;
626		device_set_softc(child, pi);
627	}
628	if ((error = bus_generic_attach(dev)) != 0)
629		goto out;
630
631	/* initialize sge private state */
632	t3_sge_init_adapter(sc);
633
634	t3_led_ready(sc);
635
636	cxgb_offload_init();
637	if (is_offload(sc)) {
638		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
639		cxgb_adapter_ofld(sc);
640        }
641	error = t3_get_fw_version(sc, &vers);
642	if (error)
643		goto out;
644
645	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
646	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
647	    G_FW_VERSION_MICRO(vers));
648
649	snprintf(buf, sizeof(buf), "%s\t E/C: %s S/N: %s",
650		 ai->desc,
651		 sc->params.vpd.ec, sc->params.vpd.sn);
652	device_set_desc_copy(dev, buf);
653
654	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
655		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
656		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
657
658	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
659	callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
660	t3_add_attach_sysctls(sc);
661out:
662	if (error)
663		cxgb_free(sc);
664
665	return (error);
666}
667
668/*
669 * The cxgb_controller_detach routine is called with the device is
670 * unloaded from the system.
671 */
672
673static int
674cxgb_controller_detach(device_t dev)
675{
676	struct adapter *sc;
677
678	sc = device_get_softc(dev);
679
680	cxgb_free(sc);
681
682	return (0);
683}
684
685/*
686 * The cxgb_free() is called by the cxgb_controller_detach() routine
687 * to tear down the structures that were built up in
688 * cxgb_controller_attach(), and should be the final piece of work
689 * done when fully unloading the driver.
690 *
691 *
692 *  1. Shutting down the threads started by the cxgb_controller_attach()
693 *     routine.
694 *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
695 *  3. Detaching all of the port devices created during the
696 *     cxgb_controller_attach() routine.
697 *  4. Removing the device children created via cxgb_controller_attach().
698 *  5. Releaseing PCI resources associated with the device.
699 *  6. Turning off the offload support, iff it was turned on.
700 *  7. Destroying the mutexes created in cxgb_controller_attach().
701 *
702 */
703static void
704cxgb_free(struct adapter *sc)
705{
706	int i;
707
708	ADAPTER_LOCK(sc);
709	sc->flags |= CXGB_SHUTDOWN;
710	ADAPTER_UNLOCK(sc);
711
712	cxgb_pcpu_shutdown_threads(sc);
713
714	ADAPTER_LOCK(sc);
715	cxgb_down_locked(sc);
716	ADAPTER_UNLOCK(sc);
717
718	t3_sge_deinit_sw(sc);
719	/*
720	 * Wait for last callout
721	 */
722
723	DELAY(hz*100);
724
725	bus_generic_detach(sc->dev);
726
727	for (i = 0; i < (sc)->params.nports; i++) {
728		if (sc->portdev[i] &&
729		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
730			device_printf(sc->dev, "failed to delete child port\n");
731	}
732
733#ifdef MSI_SUPPORTED
734	if (sc->flags & (USING_MSI | USING_MSIX)) {
735		device_printf(sc->dev, "releasing msi message(s)\n");
736		pci_release_msi(sc->dev);
737	} else {
738		device_printf(sc->dev, "no msi message to release\n");
739	}
740#endif
741	if (sc->msix_regs_res != NULL) {
742		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
743		    sc->msix_regs_res);
744	}
745
746	if (sc->tq != NULL) {
747		taskqueue_free(sc->tq);
748		sc->tq = NULL;
749	}
750
751	if (is_offload(sc)) {
752		cxgb_adapter_unofld(sc);
753		if (isset(&sc->open_device_map,	OFFLOAD_DEVMAP_BIT))
754			offload_close(&sc->tdev);
755		else
756			printf("cxgb_free: DEVMAP_BIT not set\n");
757	} else
758		printf("not offloading set\n");
759#ifdef notyet
760	if (sc->flags & CXGB_OFLD_INIT)
761		cxgb_offload_deactivate(sc);
762#endif
763	free(sc->filters, M_DEVBUF);
764	t3_sge_free(sc);
765
766	cxgb_offload_exit();
767
768	if (sc->udbs_res != NULL)
769		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
770		    sc->udbs_res);
771
772	if (sc->regs_res != NULL)
773		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
774		    sc->regs_res);
775
776	MTX_DESTROY(&sc->mdio_lock);
777	MTX_DESTROY(&sc->sge.reg_lock);
778	MTX_DESTROY(&sc->elmer_lock);
779	ADAPTER_LOCK_DEINIT(sc);
780}
781
782/**
783 *	setup_sge_qsets - configure SGE Tx/Rx/response queues
784 *	@sc: the controller softc
785 *
786 *	Determines how many sets of SGE queues to use and initializes them.
787 *	We support multiple queue sets per port if we have MSI-X, otherwise
788 *	just one queue set per port.
789 */
790static int
791setup_sge_qsets(adapter_t *sc)
792{
793	int i, j, err, irq_idx = 0, qset_idx = 0;
794	u_int ntxq = SGE_TXQ_PER_SET;
795
796	if ((err = t3_sge_alloc(sc)) != 0) {
797		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
798		return (err);
799	}
800
801	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
802		irq_idx = -1;
803
804	for (i = 0; i < (sc)->params.nports; i++) {
805		struct port_info *pi = &sc->port[i];
806
807		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
808			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
809			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
810			    &sc->params.sge.qset[qset_idx], ntxq, pi);
811			if (err) {
812				t3_free_sge_resources(sc);
813				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n",
814				    err);
815				return (err);
816			}
817		}
818	}
819
820	return (0);
821}
822
823static void
824cxgb_teardown_msix(adapter_t *sc)
825{
826	int i, nqsets;
827
828	for (nqsets = i = 0; i < (sc)->params.nports; i++)
829		nqsets += sc->port[i].nqsets;
830
831	for (i = 0; i < nqsets; i++) {
832		if (sc->msix_intr_tag[i] != NULL) {
833			bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
834			    sc->msix_intr_tag[i]);
835			sc->msix_intr_tag[i] = NULL;
836		}
837		if (sc->msix_irq_res[i] != NULL) {
838			bus_release_resource(sc->dev, SYS_RES_IRQ,
839			    sc->msix_irq_rid[i], sc->msix_irq_res[i]);
840			sc->msix_irq_res[i] = NULL;
841		}
842	}
843}
844
845static int
846cxgb_setup_msix(adapter_t *sc, int msix_count)
847{
848	int i, j, k, nqsets, rid;
849
850	/* The first message indicates link changes and error conditions */
851	sc->irq_rid = 1;
852	if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
853	   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
854		device_printf(sc->dev, "Cannot allocate msix interrupt\n");
855		return (EINVAL);
856	}
857
858	if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
859#ifdef INTR_FILTERS
860		NULL,
861#endif
862		cxgb_async_intr, sc, &sc->intr_tag)) {
863		device_printf(sc->dev, "Cannot set up interrupt\n");
864		return (EINVAL);
865	}
866	for (i = k = 0; i < (sc)->params.nports; i++) {
867		nqsets = sc->port[i].nqsets;
868		for (j = 0; j < nqsets; j++, k++) {
869			struct sge_qset *qs = &sc->sge.qs[k];
870
871			rid = k + 2;
872			if (cxgb_debug)
873				printf("rid=%d ", rid);
874			if ((sc->msix_irq_res[k] = bus_alloc_resource_any(
875			    sc->dev, SYS_RES_IRQ, &rid,
876			    RF_SHAREABLE | RF_ACTIVE)) == NULL) {
877				device_printf(sc->dev, "Cannot allocate "
878				    "interrupt for message %d\n", rid);
879				return (EINVAL);
880			}
881			sc->msix_irq_rid[k] = rid;
882			if (bus_setup_intr(sc->dev, sc->msix_irq_res[k],
883				INTR_MPSAFE|INTR_TYPE_NET,
884#ifdef INTR_FILTERS
885				NULL,
886#endif
887				t3_intr_msix, qs, &sc->msix_intr_tag[k])) {
888				device_printf(sc->dev, "Cannot set up "
889				    "interrupt for message %d\n", rid);
890				return (EINVAL);
891
892			}
893#if 0
894#ifdef IFNET_MULTIQUEUE
895			if (multiq) {
896				int vector = rman_get_start(sc->msix_irq_res[k]);
897				if (bootverbose)
898					device_printf(sc->dev, "binding vector=%d to cpu=%d\n", vector, k % mp_ncpus);
899				intr_bind(vector, k % mp_ncpus);
900			}
901#endif
902#endif
903		}
904	}
905
906	return (0);
907}
908
909static int
910cxgb_port_probe(device_t dev)
911{
912	struct port_info *p;
913	char buf[80];
914	const char *desc;
915
916	p = device_get_softc(dev);
917	desc = p->phy.desc;
918	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
919	device_set_desc_copy(dev, buf);
920	return (0);
921}
922
923
924static int
925cxgb_makedev(struct port_info *pi)
926{
927
928	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
929	    UID_ROOT, GID_WHEEL, 0600, if_name(pi->ifp));
930
931	if (pi->port_cdev == NULL)
932		return (ENOMEM);
933
934	pi->port_cdev->si_drv1 = (void *)pi;
935
936	return (0);
937}
938
939#ifndef LRO_SUPPORTED
940#ifdef IFCAP_LRO
941#undef IFCAP_LRO
942#endif
943#define IFCAP_LRO 0x0
944#endif
945
946#ifdef TSO_SUPPORTED
947#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO)
948/* Don't enable TSO6 yet */
949#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU | IFCAP_LRO)
950#else
951#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_JUMBO_MTU)
952/* Don't enable TSO6 yet */
953#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM |  IFCAP_JUMBO_MTU)
954#define IFCAP_TSO4 0x0
955#define IFCAP_TSO6 0x0
956#define CSUM_TSO   0x0
957#endif
958
959
960static int
961cxgb_port_attach(device_t dev)
962{
963	struct port_info *p;
964	struct ifnet *ifp;
965	int err, media_flags;
966	struct adapter *sc;
967
968
969	p = device_get_softc(dev);
970	sc = p->adapter;
971	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
972	    device_get_unit(device_get_parent(dev)), p->port_id);
973	PORT_LOCK_INIT(p, p->lockbuf);
974
975	/* Allocate an ifnet object and set it up */
976	ifp = p->ifp = if_alloc(IFT_ETHER);
977	if (ifp == NULL) {
978		device_printf(dev, "Cannot allocate ifnet\n");
979		return (ENOMEM);
980	}
981
982	/*
983	 * Note that there is currently no watchdog timer.
984	 */
985	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
986	ifp->if_init = cxgb_init;
987	ifp->if_softc = p;
988	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
989	ifp->if_ioctl = cxgb_ioctl;
990	ifp->if_start = cxgb_start;
991
992
993	ifp->if_timer = 0;	/* Disable ifnet watchdog */
994	ifp->if_watchdog = NULL;
995
996	ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN;
997	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
998	IFQ_SET_READY(&ifp->if_snd);
999
1000	ifp->if_hwassist = ifp->if_capabilities = ifp->if_capenable = 0;
1001	ifp->if_capabilities |= CXGB_CAP;
1002	ifp->if_capenable |= CXGB_CAP_ENABLE;
1003	ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO);
1004	/*
1005	 * disable TSO on 4-port - it isn't supported by the firmware yet
1006	 */
1007	if (p->adapter->params.nports > 2) {
1008		ifp->if_capabilities &= ~(IFCAP_TSO4 | IFCAP_TSO6);
1009		ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_TSO6);
1010		ifp->if_hwassist &= ~CSUM_TSO;
1011	}
1012
1013	ether_ifattach(ifp, p->hw_addr);
1014
1015#ifdef IFNET_MULTIQUEUE
1016	ifp->if_transmit = cxgb_pcpu_transmit;
1017#endif
1018	/*
1019	 * Only default to jumbo frames on 10GigE
1020	 */
1021	if (p->adapter->params.nports <= 2)
1022		ifp->if_mtu = ETHERMTU_JUMBO;
1023	if ((err = cxgb_makedev(p)) != 0) {
1024		printf("makedev failed %d\n", err);
1025		return (err);
1026	}
1027	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1028	    cxgb_media_status);
1029
1030	if (!strcmp(p->phy.desc,	"10GBASE-CX4")) {
1031		media_flags = IFM_ETHER | IFM_10G_CX4 | IFM_FDX;
1032	} else if (!strcmp(p->phy.desc, "10GBASE-SR")) {
1033		media_flags = IFM_ETHER | IFM_10G_SR | IFM_FDX;
1034	} else if (!strcmp(p->phy.desc, "10GBASE-R")) {
1035		media_flags = cxgb_ifm_type(p->phy.modtype);
1036	} else if (!strcmp(p->phy.desc, "10/100/1000BASE-T")) {
1037		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T, 0, NULL);
1038		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T | IFM_FDX,
1039			    0, NULL);
1040		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX,
1041			    0, NULL);
1042		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
1043			    0, NULL);
1044		ifmedia_add(&p->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
1045			    0, NULL);
1046		media_flags = 0;
1047	} else if (!strcmp(p->phy.desc, "1000BASE-X")) {
1048		/*
1049		 * XXX: This is not very accurate.  Fix when common code
1050		 * returns more specific value - eg 1000BASE-SX, LX, etc.
1051		 *
1052		 * XXX: In the meantime, don't lie. Consider setting IFM_AUTO
1053		 * instead of SX.
1054		 */
1055		media_flags = IFM_ETHER | IFM_1000_SX | IFM_FDX;
1056	} else {
1057	        printf("unsupported media type %s\n", p->phy.desc);
1058		return (ENXIO);
1059	}
1060	if (media_flags) {
1061		/*
1062		 * Note the modtype on which we based our flags.  If modtype
1063		 * changes, we'll redo the ifmedia for this ifp.  modtype may
1064		 * change when transceivers are plugged in/out, and in other
1065		 * situations.
1066		 */
1067		ifmedia_add(&p->media, media_flags, p->phy.modtype, NULL);
1068		ifmedia_set(&p->media, media_flags);
1069	} else {
1070		ifmedia_add(&p->media, IFM_ETHER | IFM_AUTO, 0, NULL);
1071		ifmedia_set(&p->media, IFM_ETHER | IFM_AUTO);
1072	}
1073
1074	/* Get the latest mac address, User can use a LAA */
1075	bcopy(IF_LLADDR(p->ifp), p->hw_addr, ETHER_ADDR_LEN);
1076	t3_sge_init_port(p);
1077
1078	/* If it's MSI or INTx, allocate a single interrupt for everything */
1079	if ((sc->flags & USING_MSIX) == 0) {
1080		if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
1081		   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
1082			device_printf(sc->dev, "Cannot allocate interrupt rid=%d\n",
1083			    sc->irq_rid);
1084			err = EINVAL;
1085			goto out;
1086		}
1087		device_printf(sc->dev, "allocated irq_res=%p\n", sc->irq_res);
1088
1089		if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
1090#ifdef INTR_FILTERS
1091			NULL,
1092#endif
1093			sc->cxgb_intr, sc, &sc->intr_tag)) {
1094			device_printf(sc->dev, "Cannot set up interrupt\n");
1095			err = EINVAL;
1096			goto irq_err;
1097		}
1098	} else {
1099		cxgb_setup_msix(sc, sc->msi_count);
1100	}
1101
1102#if defined(LINK_ATTACH)
1103	cxgb_link_start(p);
1104	t3_link_changed(sc, p->port_id);
1105#endif
1106out:
1107	return (err);
1108irq_err:
1109	CH_ERR(sc, "request_irq failed, err %d\n", err);
1110	goto out;
1111}
1112
1113/*
1114 * cxgb_port_detach() is called via the device_detach methods when
1115 * cxgb_free() calls the bus_generic_detach.  It is responsible for
1116 * removing the device from the view of the kernel, i.e. from all
1117 * interfaces lists etc.  This routine is only called when the driver is
1118 * being unloaded, not when the link goes down.
1119 *
1120 */
1121static int
1122cxgb_port_detach(device_t dev)
1123{
1124	struct port_info *p;
1125	struct adapter *sc;
1126
1127	p = device_get_softc(dev);
1128	sc = p->adapter;
1129
1130	if (p->port_cdev != NULL)
1131		destroy_dev(p->port_cdev);
1132
1133	ether_ifdetach(p->ifp);
1134	printf("waiting for callout to stop ...");
1135	printf("done\n");
1136
1137	PORT_LOCK(p);
1138	if (p->ifp->if_drv_flags & IFF_DRV_RUNNING)
1139		cxgb_stop_locked(p);
1140	PORT_UNLOCK(p);
1141
1142	if (sc->intr_tag != NULL) {
1143		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
1144		sc->intr_tag = NULL;
1145	}
1146	if (sc->irq_res != NULL) {
1147		device_printf(sc->dev, "de-allocating interrupt irq_rid=%d irq_res=%p\n",
1148		    sc->irq_rid, sc->irq_res);
1149		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
1150		    sc->irq_res);
1151		sc->irq_res = NULL;
1152	}
1153
1154	if (sc->flags & USING_MSIX)
1155		cxgb_teardown_msix(sc);
1156
1157	callout_drain(&sc->cxgb_tick_ch);
1158	callout_drain(&sc->sge_timer_ch);
1159
1160	if (sc->tq != NULL) {
1161		printf("draining slow intr\n");
1162
1163		taskqueue_drain(sc->tq, &sc->slow_intr_task);
1164			printf("draining ext intr\n");
1165		taskqueue_drain(sc->tq, &sc->ext_intr_task);
1166		printf("draining tick task\n");
1167		taskqueue_drain(sc->tq, &sc->tick_task);
1168	}
1169
1170	/*
1171	 * the lock may be acquired in ifdetach
1172	 */
1173	PORT_LOCK_DEINIT(p);
1174	if_free(p->ifp);
1175
1176	return (0);
1177}
1178
1179void
1180t3_fatal_err(struct adapter *sc)
1181{
1182	u_int fw_status[4];
1183
1184	if (sc->flags & FULL_INIT_DONE) {
1185		t3_sge_stop(sc);
1186		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1187		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1188		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1189		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1190		t3_intr_disable(sc);
1191	}
1192	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1193	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1194		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1195		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1196}
1197
1198int
1199t3_os_find_pci_capability(adapter_t *sc, int cap)
1200{
1201	device_t dev;
1202	struct pci_devinfo *dinfo;
1203	pcicfgregs *cfg;
1204	uint32_t status;
1205	uint8_t ptr;
1206
1207	dev = sc->dev;
1208	dinfo = device_get_ivars(dev);
1209	cfg = &dinfo->cfg;
1210
1211	status = pci_read_config(dev, PCIR_STATUS, 2);
1212	if (!(status & PCIM_STATUS_CAPPRESENT))
1213		return (0);
1214
1215	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1216	case 0:
1217	case 1:
1218		ptr = PCIR_CAP_PTR;
1219		break;
1220	case 2:
1221		ptr = PCIR_CAP_PTR_2;
1222		break;
1223	default:
1224		return (0);
1225		break;
1226	}
1227	ptr = pci_read_config(dev, ptr, 1);
1228
1229	while (ptr != 0) {
1230		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1231			return (ptr);
1232		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1233	}
1234
1235	return (0);
1236}
1237
1238int
1239t3_os_pci_save_state(struct adapter *sc)
1240{
1241	device_t dev;
1242	struct pci_devinfo *dinfo;
1243
1244	dev = sc->dev;
1245	dinfo = device_get_ivars(dev);
1246
1247	pci_cfg_save(dev, dinfo, 0);
1248	return (0);
1249}
1250
1251int
1252t3_os_pci_restore_state(struct adapter *sc)
1253{
1254	device_t dev;
1255	struct pci_devinfo *dinfo;
1256
1257	dev = sc->dev;
1258	dinfo = device_get_ivars(dev);
1259
1260	pci_cfg_restore(dev, dinfo);
1261	return (0);
1262}
1263
1264/**
1265 *	t3_os_link_changed - handle link status changes
1266 *	@adapter: the adapter associated with the link change
1267 *	@port_id: the port index whose limk status has changed
1268 *	@link_status: the new status of the link
1269 *	@speed: the new speed setting
1270 *	@duplex: the new duplex setting
1271 *	@fc: the new flow-control setting
1272 *
1273 *	This is the OS-dependent handler for link status changes.  The OS
1274 *	neutral handler takes care of most of the processing for these events,
1275 *	then calls this handler for any OS-specific processing.
1276 */
1277void
1278t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1279     int duplex, int fc)
1280{
1281	struct port_info *pi = &adapter->port[port_id];
1282
1283	if (link_status) {
1284		pi->ifp->if_baudrate = IF_Mbps(speed);
1285		if_link_state_change(pi->ifp, LINK_STATE_UP);
1286	} else
1287		if_link_state_change(pi->ifp, LINK_STATE_DOWN);
1288}
1289
1290/**
1291 *	t3_os_phymod_changed - handle PHY module changes
1292 *	@phy: the PHY reporting the module change
1293 *	@mod_type: new module type
1294 *
1295 *	This is the OS-dependent handler for PHY module changes.  It is
1296 *	invoked when a PHY module is removed or inserted for any OS-specific
1297 *	processing.
1298 */
1299void t3_os_phymod_changed(struct adapter *adap, int port_id)
1300{
1301	static const char *mod_str[] = {
1302		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX", "unknown"
1303	};
1304
1305	struct port_info *pi = &adap->port[port_id];
1306
1307	if (pi->phy.modtype == phy_modtype_none)
1308		device_printf(adap->dev, "PHY module unplugged\n");
1309	else {
1310		KASSERT(pi->phy.modtype < ARRAY_SIZE(mod_str),
1311		    ("invalid PHY module type %d", pi->phy.modtype));
1312		device_printf(adap->dev, "%s PHY module inserted\n",
1313		    mod_str[pi->phy.modtype]);
1314	}
1315}
1316
1317/*
1318 * Interrupt-context handler for external (PHY) interrupts.
1319 */
1320void
1321t3_os_ext_intr_handler(adapter_t *sc)
1322{
1323	if (cxgb_debug)
1324		printf("t3_os_ext_intr_handler\n");
1325	/*
1326	 * Schedule a task to handle external interrupts as they may be slow
1327	 * and we use a mutex to protect MDIO registers.  We disable PHY
1328	 * interrupts in the meantime and let the task reenable them when
1329	 * it's done.
1330	 */
1331	ADAPTER_LOCK(sc);
1332	if (sc->slow_intr_mask) {
1333		sc->slow_intr_mask &= ~F_T3DBG;
1334		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1335		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
1336	}
1337	ADAPTER_UNLOCK(sc);
1338}
1339
1340void
1341t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1342{
1343
1344	/*
1345	 * The ifnet might not be allocated before this gets called,
1346	 * as this is called early on in attach by t3_prep_adapter
1347	 * save the address off in the port structure
1348	 */
1349	if (cxgb_debug)
1350		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1351	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1352}
1353
1354/**
1355 *	link_start - enable a port
1356 *	@p: the port to enable
1357 *
1358 *	Performs the MAC and PHY actions needed to enable a port.
1359 */
1360static void
1361cxgb_link_start(struct port_info *p)
1362{
1363	struct ifnet *ifp;
1364	struct t3_rx_mode rm;
1365	struct cmac *mac = &p->mac;
1366	int mtu, hwtagging;
1367
1368	ifp = p->ifp;
1369
1370	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1371
1372	mtu = ifp->if_mtu;
1373	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1374		mtu += ETHER_VLAN_ENCAP_LEN;
1375
1376	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1377
1378	t3_init_rx_mode(&rm, p);
1379	if (!mac->multiport)
1380		t3_mac_reset(mac);
1381	t3_mac_set_mtu(mac, mtu);
1382	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1383	t3_mac_set_address(mac, 0, p->hw_addr);
1384	t3_mac_set_rx_mode(mac, &rm);
1385	t3_link_start(&p->phy, mac, &p->link_config);
1386	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1387}
1388
1389
1390static int
1391await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1392			      unsigned long n)
1393{
1394	int attempts = 5;
1395
1396	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1397		if (!--attempts)
1398			return (ETIMEDOUT);
1399		t3_os_sleep(10);
1400	}
1401	return 0;
1402}
1403
1404static int
1405init_tp_parity(struct adapter *adap)
1406{
1407	int i;
1408	struct mbuf *m;
1409	struct cpl_set_tcb_field *greq;
1410	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1411
1412	t3_tp_set_offload_mode(adap, 1);
1413
1414	for (i = 0; i < 16; i++) {
1415		struct cpl_smt_write_req *req;
1416
1417		m = m_gethdr(M_WAITOK, MT_DATA);
1418		req = mtod(m, struct cpl_smt_write_req *);
1419		m->m_len = m->m_pkthdr.len = sizeof(*req);
1420		memset(req, 0, sizeof(*req));
1421		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1422		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1423		req->iff = i;
1424		t3_mgmt_tx(adap, m);
1425	}
1426
1427	for (i = 0; i < 2048; i++) {
1428		struct cpl_l2t_write_req *req;
1429
1430		m = m_gethdr(M_WAITOK, MT_DATA);
1431		req = mtod(m, struct cpl_l2t_write_req *);
1432		m->m_len = m->m_pkthdr.len = sizeof(*req);
1433		memset(req, 0, sizeof(*req));
1434		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1435		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1436		req->params = htonl(V_L2T_W_IDX(i));
1437		t3_mgmt_tx(adap, m);
1438	}
1439
1440	for (i = 0; i < 2048; i++) {
1441		struct cpl_rte_write_req *req;
1442
1443		m = m_gethdr(M_WAITOK, MT_DATA);
1444		req = mtod(m, struct cpl_rte_write_req *);
1445		m->m_len = m->m_pkthdr.len = sizeof(*req);
1446		memset(req, 0, sizeof(*req));
1447		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1448		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1449		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1450		t3_mgmt_tx(adap, m);
1451	}
1452
1453	m = m_gethdr(M_WAITOK, MT_DATA);
1454	greq = mtod(m, struct cpl_set_tcb_field *);
1455	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1456	memset(greq, 0, sizeof(*greq));
1457	greq->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1458	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1459	greq->mask = htobe64(1);
1460	t3_mgmt_tx(adap, m);
1461
1462	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1463	t3_tp_set_offload_mode(adap, 0);
1464	return (i);
1465}
1466
1467/**
1468 *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1469 *	@adap: the adapter
1470 *
1471 *	Sets up RSS to distribute packets to multiple receive queues.  We
1472 *	configure the RSS CPU lookup table to distribute to the number of HW
1473 *	receive queues, and the response queue lookup table to narrow that
1474 *	down to the response queues actually configured for each port.
1475 *	We always configure the RSS mapping for two ports since the mapping
1476 *	table has plenty of entries.
1477 */
1478static void
1479setup_rss(adapter_t *adap)
1480{
1481	int i;
1482	u_int nq[2];
1483	uint8_t cpus[SGE_QSETS + 1];
1484	uint16_t rspq_map[RSS_TABLE_SIZE];
1485
1486	for (i = 0; i < SGE_QSETS; ++i)
1487		cpus[i] = i;
1488	cpus[SGE_QSETS] = 0xff;
1489
1490	nq[0] = nq[1] = 0;
1491	for_each_port(adap, i) {
1492		const struct port_info *pi = adap2pinfo(adap, i);
1493
1494		nq[pi->tx_chan] += pi->nqsets;
1495	}
1496	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1497		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1498		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1499	}
1500	/* Calculate the reverse RSS map table */
1501	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1502		if (adap->rrss_map[rspq_map[i]] == 0xff)
1503			adap->rrss_map[rspq_map[i]] = i;
1504
1505	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1506		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1507	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1508	              cpus, rspq_map);
1509
1510}
1511
1512/*
1513 * Sends an mbuf to an offload queue driver
1514 * after dealing with any active network taps.
1515 */
1516static inline int
1517offload_tx(struct t3cdev *tdev, struct mbuf *m)
1518{
1519	int ret;
1520
1521	ret = t3_offload_tx(tdev, m);
1522	return (ret);
1523}
1524
1525static int
1526write_smt_entry(struct adapter *adapter, int idx)
1527{
1528	struct port_info *pi = &adapter->port[idx];
1529	struct cpl_smt_write_req *req;
1530	struct mbuf *m;
1531
1532	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1533		return (ENOMEM);
1534
1535	req = mtod(m, struct cpl_smt_write_req *);
1536	m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1537
1538	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1539	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1540	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1541	req->iff = idx;
1542	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1543	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1544
1545	m_set_priority(m, 1);
1546
1547	offload_tx(&adapter->tdev, m);
1548
1549	return (0);
1550}
1551
1552static int
1553init_smt(struct adapter *adapter)
1554{
1555	int i;
1556
1557	for_each_port(adapter, i)
1558		write_smt_entry(adapter, i);
1559	return 0;
1560}
1561
1562static void
1563init_port_mtus(adapter_t *adapter)
1564{
1565	unsigned int mtus = adapter->port[0].ifp->if_mtu;
1566
1567	if (adapter->port[1].ifp)
1568		mtus |= adapter->port[1].ifp->if_mtu << 16;
1569	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1570}
1571
1572static void
1573send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1574			      int hi, int port)
1575{
1576	struct mbuf *m;
1577	struct mngt_pktsched_wr *req;
1578
1579	m = m_gethdr(M_DONTWAIT, MT_DATA);
1580	if (m) {
1581		req = mtod(m, struct mngt_pktsched_wr *);
1582		req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1583		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1584		req->sched = sched;
1585		req->idx = qidx;
1586		req->min = lo;
1587		req->max = hi;
1588		req->binding = port;
1589		m->m_len = m->m_pkthdr.len = sizeof(*req);
1590		t3_mgmt_tx(adap, m);
1591	}
1592}
1593
1594static void
1595bind_qsets(adapter_t *sc)
1596{
1597	int i, j;
1598
1599	cxgb_pcpu_startup_threads(sc);
1600	for (i = 0; i < (sc)->params.nports; ++i) {
1601		const struct port_info *pi = adap2pinfo(sc, i);
1602
1603		for (j = 0; j < pi->nqsets; ++j) {
1604			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1605					  -1, pi->tx_chan);
1606
1607		}
1608	}
1609}
1610
1611static void
1612update_tpeeprom(struct adapter *adap)
1613{
1614#ifdef FIRMWARE_LATEST
1615	const struct firmware *tpeeprom;
1616#else
1617	struct firmware *tpeeprom;
1618#endif
1619
1620	uint32_t version;
1621	unsigned int major, minor;
1622	int ret, len;
1623	char rev, name[32];
1624
1625	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1626
1627	major = G_TP_VERSION_MAJOR(version);
1628	minor = G_TP_VERSION_MINOR(version);
1629	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1630		return;
1631
1632	rev = t3rev2char(adap);
1633	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1634
1635	tpeeprom = firmware_get(name);
1636	if (tpeeprom == NULL) {
1637		device_printf(adap->dev,
1638			      "could not load TP EEPROM: unable to load %s\n",
1639			      name);
1640		return;
1641	}
1642
1643	len = tpeeprom->datasize - 4;
1644
1645	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1646	if (ret)
1647		goto release_tpeeprom;
1648
1649	if (len != TP_SRAM_LEN) {
1650		device_printf(adap->dev,
1651			      "%s length is wrong len=%d expected=%d\n", name,
1652			      len, TP_SRAM_LEN);
1653		return;
1654	}
1655
1656	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1657	    TP_SRAM_OFFSET);
1658
1659	if (!ret) {
1660		device_printf(adap->dev,
1661			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1662			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1663	} else
1664		device_printf(adap->dev,
1665			      "Protocol SRAM image update in EEPROM failed\n");
1666
1667release_tpeeprom:
1668	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1669
1670	return;
1671}
1672
1673static int
1674update_tpsram(struct adapter *adap)
1675{
1676#ifdef FIRMWARE_LATEST
1677	const struct firmware *tpsram;
1678#else
1679	struct firmware *tpsram;
1680#endif
1681	int ret;
1682	char rev, name[32];
1683
1684	rev = t3rev2char(adap);
1685	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1686
1687	update_tpeeprom(adap);
1688
1689	tpsram = firmware_get(name);
1690	if (tpsram == NULL){
1691		device_printf(adap->dev, "could not load TP SRAM\n");
1692		return (EINVAL);
1693	} else
1694		device_printf(adap->dev, "updating TP SRAM\n");
1695
1696	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1697	if (ret)
1698		goto release_tpsram;
1699
1700	ret = t3_set_proto_sram(adap, tpsram->data);
1701	if (ret)
1702		device_printf(adap->dev, "loading protocol SRAM failed\n");
1703
1704release_tpsram:
1705	firmware_put(tpsram, FIRMWARE_UNLOAD);
1706
1707	return ret;
1708}
1709
1710/**
1711 *	cxgb_up - enable the adapter
1712 *	@adap: adapter being enabled
1713 *
1714 *	Called when the first port is enabled, this function performs the
1715 *	actions necessary to make an adapter operational, such as completing
1716 *	the initialization of HW modules, and enabling interrupts.
1717 */
1718static int
1719cxgb_up(struct adapter *sc)
1720{
1721	int err = 0;
1722
1723	if ((sc->flags & FULL_INIT_DONE) == 0) {
1724
1725		if ((sc->flags & FW_UPTODATE) == 0)
1726			if ((err = upgrade_fw(sc)))
1727				goto out;
1728		if ((sc->flags & TPS_UPTODATE) == 0)
1729			if ((err = update_tpsram(sc)))
1730				goto out;
1731		err = t3_init_hw(sc, 0);
1732		if (err)
1733			goto out;
1734
1735		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1736		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1737
1738		err = setup_sge_qsets(sc);
1739		if (err)
1740			goto out;
1741
1742		setup_rss(sc);
1743		t3_add_configured_sysctls(sc);
1744		sc->flags |= FULL_INIT_DONE;
1745	}
1746
1747	t3_intr_clear(sc);
1748
1749	t3_sge_start(sc);
1750	t3_intr_enable(sc);
1751
1752	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1753	    is_offload(sc) && init_tp_parity(sc) == 0)
1754		sc->flags |= TP_PARITY_INIT;
1755
1756	if (sc->flags & TP_PARITY_INIT) {
1757		t3_write_reg(sc, A_TP_INT_CAUSE,
1758				F_CMCACHEPERR | F_ARPLUTPERR);
1759		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1760	}
1761
1762
1763	if (!(sc->flags & QUEUES_BOUND)) {
1764		bind_qsets(sc);
1765		sc->flags |= QUEUES_BOUND;
1766	}
1767out:
1768	return (err);
1769}
1770
1771
1772/*
1773 * Bring down the interface but do not free any resources.
1774 */
1775static void
1776cxgb_down_locked(struct adapter *sc)
1777{
1778
1779	t3_sge_stop(sc);
1780	t3_intr_disable(sc);
1781
1782	callout_stop(&sc->cxgb_tick_ch);
1783	callout_stop(&sc->sge_timer_ch);
1784}
1785
1786static int
1787offload_open(struct port_info *pi)
1788{
1789	struct adapter *adapter = pi->adapter;
1790	struct t3cdev *tdev = &adapter->tdev;
1791
1792	int adap_up = adapter->open_device_map & PORT_MASK;
1793	int err = 0;
1794
1795	if (atomic_cmpset_int(&adapter->open_device_map,
1796		(adapter->open_device_map & ~(1<<OFFLOAD_DEVMAP_BIT)),
1797		(adapter->open_device_map | (1<<OFFLOAD_DEVMAP_BIT))) == 0)
1798		return (0);
1799
1800	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1801		printf("offload_open: DEVMAP_BIT did not get set 0x%x\n",
1802		    adapter->open_device_map);
1803	ADAPTER_LOCK(pi->adapter);
1804	if (!adap_up)
1805		err = cxgb_up(adapter);
1806	ADAPTER_UNLOCK(pi->adapter);
1807	if (err)
1808		return (err);
1809
1810	t3_tp_set_offload_mode(adapter, 1);
1811	tdev->lldev = pi->ifp;
1812
1813	init_port_mtus(adapter);
1814	t3_load_mtus(adapter, adapter->params.mtus, adapter->params.a_wnd,
1815		     adapter->params.b_wnd,
1816		     adapter->params.rev == 0 ?
1817		       adapter->port[0].ifp->if_mtu : 0xffff);
1818	init_smt(adapter);
1819	/* Call back all registered clients */
1820	cxgb_add_clients(tdev);
1821
1822	/* restore them in case the offload module has changed them */
1823	if (err) {
1824		t3_tp_set_offload_mode(adapter, 0);
1825		clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1826		cxgb_set_dummy_ops(tdev);
1827	}
1828	return (err);
1829}
1830
1831static int
1832offload_close(struct t3cdev *tdev)
1833{
1834	struct adapter *adapter = tdev2adap(tdev);
1835
1836	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1837		return (0);
1838
1839	/* Call back all registered clients */
1840	cxgb_remove_clients(tdev);
1841
1842	tdev->lldev = NULL;
1843	cxgb_set_dummy_ops(tdev);
1844	t3_tp_set_offload_mode(adapter, 0);
1845	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1846
1847	ADAPTER_LOCK(adapter);
1848	if (!adapter->open_device_map)
1849		cxgb_down_locked(adapter);
1850
1851	ADAPTER_UNLOCK(adapter);
1852
1853	return (0);
1854}
1855
1856
1857static void
1858cxgb_init(void *arg)
1859{
1860	struct port_info *p = arg;
1861
1862	PORT_LOCK(p);
1863	cxgb_init_locked(p);
1864	PORT_UNLOCK(p);
1865}
1866
1867static void
1868cxgb_init_locked(struct port_info *p)
1869{
1870	struct ifnet *ifp;
1871	adapter_t *sc = p->adapter;
1872	int err;
1873
1874	PORT_LOCK_ASSERT_OWNED(p);
1875	ifp = p->ifp;
1876
1877	ADAPTER_LOCK(p->adapter);
1878	if ((sc->open_device_map == 0) && (err = cxgb_up(sc))) {
1879		ADAPTER_UNLOCK(p->adapter);
1880		cxgb_stop_locked(p);
1881		return;
1882	}
1883	if (p->adapter->open_device_map == 0) {
1884		t3_intr_clear(sc);
1885	}
1886	setbit(&p->adapter->open_device_map, p->port_id);
1887	ADAPTER_UNLOCK(p->adapter);
1888
1889	if (is_offload(sc) && !ofld_disable) {
1890		err = offload_open(p);
1891		if (err)
1892			log(LOG_WARNING,
1893			    "Could not initialize offload capabilities\n");
1894	}
1895
1896	device_printf(sc->dev, "enabling interrupts on port=%d\n", p->port_id);
1897	t3_port_intr_enable(sc, p->port_id);
1898
1899#if !defined(LINK_ATTACH)
1900	cxgb_link_start(p);
1901	t3_link_changed(sc, p->port_id);
1902#endif
1903	ifp->if_baudrate = IF_Mbps(p->link_config.speed);
1904
1905 	callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
1906	t3_sge_reset_adapter(sc);
1907
1908	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1909	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1910}
1911
1912static void
1913cxgb_set_rxmode(struct port_info *p)
1914{
1915	struct t3_rx_mode rm;
1916	struct cmac *mac = &p->mac;
1917
1918	t3_init_rx_mode(&rm, p);
1919	mtx_lock(&p->adapter->mdio_lock);
1920	t3_mac_set_rx_mode(mac, &rm);
1921	mtx_unlock(&p->adapter->mdio_lock);
1922}
1923
1924static void
1925cxgb_stop_locked(struct port_info *pi)
1926{
1927	struct ifnet *ifp;
1928
1929	PORT_LOCK_ASSERT_OWNED(pi);
1930	ADAPTER_LOCK_ASSERT_NOTOWNED(pi->adapter);
1931
1932	ifp = pi->ifp;
1933	t3_port_intr_disable(pi->adapter, pi->port_id);
1934	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1935
1936	/* disable pause frames */
1937	t3_set_reg_field(pi->adapter, A_XGM_TX_CFG + pi->mac.offset,
1938			 F_TXPAUSEEN, 0);
1939
1940	/* Reset RX FIFO HWM */
1941        t3_set_reg_field(pi->adapter, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1942			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1943
1944
1945	ADAPTER_LOCK(pi->adapter);
1946	clrbit(&pi->adapter->open_device_map, pi->port_id);
1947
1948	if (pi->adapter->open_device_map == 0)
1949		cxgb_down_locked(pi->adapter);
1950
1951	ADAPTER_UNLOCK(pi->adapter);
1952
1953#if !defined(LINK_ATTACH)
1954	DELAY(100);
1955
1956	/* Wait for TXFIFO empty */
1957	t3_wait_op_done(pi->adapter, A_XGM_TXFIFO_CFG + pi->mac.offset,
1958			F_TXFIFO_EMPTY, 1, 20, 5);
1959
1960	DELAY(100);
1961	t3_mac_disable(&pi->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
1962
1963	pi->phy.ops->power_down(&pi->phy, 1);
1964#endif
1965
1966}
1967
1968static int
1969cxgb_set_mtu(struct port_info *p, int mtu)
1970{
1971	struct ifnet *ifp = p->ifp;
1972	int error = 0;
1973
1974	if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO))
1975		error = EINVAL;
1976	else if (ifp->if_mtu != mtu) {
1977		PORT_LOCK(p);
1978		ifp->if_mtu = mtu;
1979		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1980			cxgb_stop_locked(p);
1981			cxgb_init_locked(p);
1982		}
1983		PORT_UNLOCK(p);
1984	}
1985	return (error);
1986}
1987
1988#ifdef LRO_SUPPORTED
1989/*
1990 * Mark lro enabled or disabled in all qsets for this port
1991 */
1992static int
1993cxgb_set_lro(struct port_info *p, int enabled)
1994{
1995	int i;
1996	struct adapter *adp = p->adapter;
1997	struct sge_qset *q;
1998
1999	PORT_LOCK_ASSERT_OWNED(p);
2000	for (i = 0; i < p->nqsets; i++) {
2001		q = &adp->sge.qs[p->first_qset + i];
2002		q->lro.enabled = (enabled != 0);
2003	}
2004	return (0);
2005}
2006#endif
2007
2008static int
2009cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
2010{
2011	struct port_info *p = ifp->if_softc;
2012#ifdef INET
2013	struct ifaddr *ifa = (struct ifaddr *)data;
2014#endif
2015	struct ifreq *ifr = (struct ifreq *)data;
2016	int flags, error = 0, reinit = 0;
2017	uint32_t mask;
2018
2019	/*
2020	 * XXX need to check that we aren't in the middle of an unload
2021	 */
2022	switch (command) {
2023	case SIOCSIFMTU:
2024		error = cxgb_set_mtu(p, ifr->ifr_mtu);
2025		break;
2026	case SIOCSIFADDR:
2027#ifdef INET
2028		if (ifa->ifa_addr->sa_family == AF_INET) {
2029			ifp->if_flags |= IFF_UP;
2030			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
2031				PORT_LOCK(p);
2032				cxgb_init_locked(p);
2033				PORT_UNLOCK(p);
2034			}
2035			arp_ifinit(ifp, ifa);
2036		} else
2037#endif
2038			error = ether_ioctl(ifp, command, data);
2039		break;
2040	case SIOCSIFFLAGS:
2041		PORT_LOCK(p);
2042		if (ifp->if_flags & IFF_UP) {
2043			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2044				flags = p->if_flags;
2045				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
2046				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI))
2047					cxgb_set_rxmode(p);
2048			} else
2049				cxgb_init_locked(p);
2050			p->if_flags = ifp->if_flags;
2051		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2052			cxgb_stop_locked(p);
2053
2054		PORT_UNLOCK(p);
2055		break;
2056	case SIOCADDMULTI:
2057	case SIOCDELMULTI:
2058		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2059			cxgb_set_rxmode(p);
2060		}
2061		break;
2062	case SIOCSIFMEDIA:
2063	case SIOCGIFMEDIA:
2064		PORT_LOCK(p);
2065		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2066		PORT_UNLOCK(p);
2067		break;
2068	case SIOCSIFCAP:
2069		PORT_LOCK(p);
2070		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2071		if (mask & IFCAP_TXCSUM) {
2072			if (IFCAP_TXCSUM & ifp->if_capenable) {
2073				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
2074				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
2075				    | CSUM_IP | CSUM_TSO);
2076			} else {
2077				ifp->if_capenable |= IFCAP_TXCSUM;
2078				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP
2079				    | CSUM_IP);
2080			}
2081		}
2082		if (mask & IFCAP_RXCSUM) {
2083			ifp->if_capenable ^= IFCAP_RXCSUM;
2084		}
2085		if (mask & IFCAP_TSO4) {
2086			if (IFCAP_TSO4 & ifp->if_capenable) {
2087				ifp->if_capenable &= ~IFCAP_TSO4;
2088				ifp->if_hwassist &= ~CSUM_TSO;
2089			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
2090				ifp->if_capenable |= IFCAP_TSO4;
2091				ifp->if_hwassist |= CSUM_TSO;
2092			} else {
2093				if (cxgb_debug)
2094					printf("cxgb requires tx checksum offload"
2095					    " be enabled to use TSO\n");
2096				error = EINVAL;
2097			}
2098		}
2099#ifdef LRO_SUPPORTED
2100		if (mask & IFCAP_LRO) {
2101			ifp->if_capenable ^= IFCAP_LRO;
2102
2103			/* Safe to do this even if cxgb_up not called yet */
2104			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2105		}
2106#endif
2107		if (mask & IFCAP_VLAN_HWTAGGING) {
2108			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2109			reinit = ifp->if_drv_flags & IFF_DRV_RUNNING;
2110		}
2111		if (mask & IFCAP_VLAN_MTU) {
2112			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2113			reinit = ifp->if_drv_flags & IFF_DRV_RUNNING;
2114		}
2115		if (mask & IFCAP_VLAN_HWCSUM) {
2116			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2117		}
2118		if (reinit) {
2119			cxgb_stop_locked(p);
2120			cxgb_init_locked(p);
2121		}
2122		PORT_UNLOCK(p);
2123
2124#ifdef VLAN_CAPABILITIES
2125		VLAN_CAPABILITIES(ifp);
2126#endif
2127		break;
2128	default:
2129		error = ether_ioctl(ifp, command, data);
2130		break;
2131	}
2132	return (error);
2133}
2134
2135static int
2136cxgb_media_change(struct ifnet *ifp)
2137{
2138	if_printf(ifp, "media change not supported\n");
2139	return (ENXIO);
2140}
2141
2142/*
2143 * Translates from phy->modtype to IFM_TYPE.
2144 */
2145static int
2146cxgb_ifm_type(int phymod)
2147{
2148	int rc = IFM_ETHER | IFM_FDX;
2149
2150	switch (phymod) {
2151	case phy_modtype_sr:
2152		rc |= IFM_10G_SR;
2153		break;
2154	case phy_modtype_lr:
2155		rc |= IFM_10G_LR;
2156		break;
2157	case phy_modtype_lrm:
2158#ifdef IFM_10G_LRM
2159		rc |= IFM_10G_LRM;
2160#endif
2161		break;
2162	case phy_modtype_twinax:
2163#ifdef IFM_10G_TWINAX
2164		rc |= IFM_10G_TWINAX;
2165#endif
2166		break;
2167	case phy_modtype_twinax_long:
2168#ifdef IFM_10G_TWINAX_LONG
2169		rc |= IFM_10G_TWINAX_LONG;
2170#endif
2171		break;
2172	case phy_modtype_none:
2173		rc = IFM_ETHER | IFM_NONE;
2174		break;
2175	case phy_modtype_unknown:
2176		break;
2177	}
2178
2179	return (rc);
2180}
2181
2182static void
2183cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2184{
2185	struct port_info *p = ifp->if_softc;
2186	struct ifmedia_entry *cur = p->media.ifm_cur;
2187	int m;
2188
2189	if (cur->ifm_data != p->phy.modtype) {
2190		/* p->media about to be rebuilt, must hold lock */
2191		PORT_LOCK_ASSERT_OWNED(p);
2192
2193		m = cxgb_ifm_type(p->phy.modtype);
2194		ifmedia_removeall(&p->media);
2195		ifmedia_add(&p->media, m, p->phy.modtype, NULL);
2196		ifmedia_set(&p->media, m);
2197		cur = p->media.ifm_cur; /* ifmedia_set modified ifm_cur */
2198		ifmr->ifm_current = m;
2199	}
2200
2201	ifmr->ifm_status = IFM_AVALID;
2202	ifmr->ifm_active = IFM_ETHER;
2203
2204	if (!p->link_config.link_ok)
2205		return;
2206
2207	ifmr->ifm_status |= IFM_ACTIVE;
2208
2209	switch (p->link_config.speed) {
2210	case 10:
2211		ifmr->ifm_active |= IFM_10_T;
2212		break;
2213	case 100:
2214		ifmr->ifm_active |= IFM_100_TX;
2215			break;
2216	case 1000:
2217		ifmr->ifm_active |= IFM_1000_T;
2218		break;
2219	case 10000:
2220		ifmr->ifm_active |= IFM_SUBTYPE(cur->ifm_media);
2221		break;
2222	}
2223
2224	if (p->link_config.duplex)
2225		ifmr->ifm_active |= IFM_FDX;
2226	else
2227		ifmr->ifm_active |= IFM_HDX;
2228}
2229
2230static void
2231cxgb_async_intr(void *data)
2232{
2233	adapter_t *sc = data;
2234
2235	if (cxgb_debug)
2236		device_printf(sc->dev, "cxgb_async_intr\n");
2237	/*
2238	 * May need to sleep - defer to taskqueue
2239	 */
2240	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2241}
2242
2243static void
2244cxgb_ext_intr_handler(void *arg, int count)
2245{
2246	adapter_t *sc = (adapter_t *)arg;
2247
2248	if (cxgb_debug)
2249		printf("cxgb_ext_intr_handler\n");
2250
2251	t3_phy_intr_handler(sc);
2252
2253	/* Now reenable external interrupts */
2254	ADAPTER_LOCK(sc);
2255	if (sc->slow_intr_mask) {
2256		sc->slow_intr_mask |= F_T3DBG;
2257		t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
2258		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
2259	}
2260	ADAPTER_UNLOCK(sc);
2261}
2262
2263static void
2264check_link_status(adapter_t *sc)
2265{
2266	int i;
2267
2268	/* For synchronized access to open_device_map */
2269	ADAPTER_LOCK_ASSERT_OWNED(sc);
2270
2271	for (i = 0; i < (sc)->params.nports; ++i) {
2272		struct port_info *p = &sc->port[i];
2273		struct link_config *lc = &p->link_config;
2274
2275		if (!isset(&sc->open_device_map, p->port_id)) {
2276			/*
2277			 * port is down, report link down too.  Note
2278			 * that we do this for IRQ based PHYs too.
2279			 */
2280			lc->link_ok = 0;
2281			t3_os_link_changed(sc, i, lc->link_ok, lc->speed,
2282					   lc->duplex, lc->fc);
2283		} else if (p->link_fault || !(p->phy.caps & SUPPORTED_IRQ))
2284			t3_link_changed(sc, i);
2285	}
2286}
2287
2288static void
2289check_t3b2_mac(struct adapter *adapter)
2290{
2291	int i;
2292
2293	if(adapter->flags & CXGB_SHUTDOWN)
2294		return;
2295
2296	for_each_port(adapter, i) {
2297		struct port_info *p = &adapter->port[i];
2298		struct ifnet *ifp = p->ifp;
2299		int status;
2300
2301		if(adapter->flags & CXGB_SHUTDOWN)
2302			return;
2303
2304		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2305			continue;
2306
2307		status = 0;
2308		PORT_LOCK(p);
2309		if ((ifp->if_drv_flags & IFF_DRV_RUNNING))
2310			status = t3b2_mac_watchdog_task(&p->mac);
2311		if (status == 1)
2312			p->mac.stats.num_toggled++;
2313		else if (status == 2) {
2314			struct cmac *mac = &p->mac;
2315			int mtu = ifp->if_mtu;
2316
2317			if (ifp->if_capenable & IFCAP_VLAN_MTU)
2318				mtu += ETHER_VLAN_ENCAP_LEN;
2319			t3_mac_set_mtu(mac, mtu);
2320			t3_mac_set_address(mac, 0, p->hw_addr);
2321			cxgb_set_rxmode(p);
2322			t3_link_start(&p->phy, mac, &p->link_config);
2323			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2324			t3_port_intr_enable(adapter, p->port_id);
2325			p->mac.stats.num_resets++;
2326		}
2327		PORT_UNLOCK(p);
2328	}
2329}
2330
2331static void
2332cxgb_tick(void *arg)
2333{
2334	adapter_t *sc = (adapter_t *)arg;
2335
2336	if(sc->flags & CXGB_SHUTDOWN)
2337		return;
2338
2339	taskqueue_enqueue(sc->tq, &sc->tick_task);
2340	callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
2341}
2342
2343static void
2344cxgb_tick_handler(void *arg, int count)
2345{
2346	adapter_t *sc = (adapter_t *)arg;
2347	const struct adapter_params *p = &sc->params;
2348	int i;
2349	uint32_t cause, reset;
2350
2351	if(sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2352		return;
2353
2354	ADAPTER_LOCK(sc);
2355
2356	check_link_status(sc);
2357
2358	sc->check_task_cnt++;
2359
2360	/*
2361	 * adapter lock can currently only be acquired after the
2362	 * port lock
2363	 */
2364	ADAPTER_UNLOCK(sc);
2365
2366	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2367		check_t3b2_mac(sc);
2368
2369	cause = t3_read_reg(sc, A_SG_INT_CAUSE);
2370	reset = 0;
2371	if (cause & F_FLEMPTY) {
2372		struct sge_qset *qs = &sc->sge.qs[0];
2373
2374		i = 0;
2375		reset |= F_FLEMPTY;
2376
2377		cause = (t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) >>
2378			 S_FL0EMPTY) & 0xffff;
2379		while (cause) {
2380			qs->fl[i].empty += (cause & 1);
2381			if (i)
2382				qs++;
2383			i ^= 1;
2384			cause >>= 1;
2385		}
2386	}
2387	t3_write_reg(sc, A_SG_INT_CAUSE, reset);
2388
2389	for (i = 0; i < sc->params.nports; i++) {
2390		struct port_info *pi = &sc->port[i];
2391		struct ifnet *ifp = pi->ifp;
2392		struct cmac *mac = &pi->mac;
2393		struct mac_stats *mstats = &mac->stats;
2394		PORT_LOCK(pi);
2395		t3_mac_update_stats(mac);
2396		PORT_UNLOCK(pi);
2397
2398		ifp->if_opackets =
2399		    mstats->tx_frames_64 +
2400		    mstats->tx_frames_65_127 +
2401		    mstats->tx_frames_128_255 +
2402		    mstats->tx_frames_256_511 +
2403		    mstats->tx_frames_512_1023 +
2404		    mstats->tx_frames_1024_1518 +
2405		    mstats->tx_frames_1519_max;
2406
2407		ifp->if_ipackets =
2408		    mstats->rx_frames_64 +
2409		    mstats->rx_frames_65_127 +
2410		    mstats->rx_frames_128_255 +
2411		    mstats->rx_frames_256_511 +
2412		    mstats->rx_frames_512_1023 +
2413		    mstats->rx_frames_1024_1518 +
2414		    mstats->rx_frames_1519_max;
2415
2416		ifp->if_obytes = mstats->tx_octets;
2417		ifp->if_ibytes = mstats->rx_octets;
2418		ifp->if_omcasts = mstats->tx_mcast_frames;
2419		ifp->if_imcasts = mstats->rx_mcast_frames;
2420
2421		ifp->if_collisions =
2422		    mstats->tx_total_collisions;
2423
2424		ifp->if_iqdrops = mstats->rx_cong_drops;
2425
2426		ifp->if_oerrors =
2427		    mstats->tx_excess_collisions +
2428		    mstats->tx_underrun +
2429		    mstats->tx_len_errs +
2430		    mstats->tx_mac_internal_errs +
2431		    mstats->tx_excess_deferral +
2432		    mstats->tx_fcs_errs;
2433		ifp->if_ierrors =
2434		    mstats->rx_jabber +
2435		    mstats->rx_data_errs +
2436		    mstats->rx_sequence_errs +
2437		    mstats->rx_runt +
2438		    mstats->rx_too_long +
2439		    mstats->rx_mac_internal_errs +
2440		    mstats->rx_short +
2441		    mstats->rx_fcs_errs;
2442
2443		if (mac->multiport)
2444			continue;
2445
2446		/* Count rx fifo overflows, once per second */
2447		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2448		reset = 0;
2449		if (cause & F_RXFIFO_OVERFLOW) {
2450			mac->stats.rx_fifo_ovfl++;
2451			reset |= F_RXFIFO_OVERFLOW;
2452		}
2453		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2454	}
2455}
2456
2457static void
2458touch_bars(device_t dev)
2459{
2460	/*
2461	 * Don't enable yet
2462	 */
2463#if !defined(__LP64__) && 0
2464	u32 v;
2465
2466	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2467	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2468	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2469	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2470	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2471	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2472#endif
2473}
2474
2475static int
2476set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2477{
2478	uint8_t *buf;
2479	int err = 0;
2480	u32 aligned_offset, aligned_len, *p;
2481	struct adapter *adapter = pi->adapter;
2482
2483
2484	aligned_offset = offset & ~3;
2485	aligned_len = (len + (offset & 3) + 3) & ~3;
2486
2487	if (aligned_offset != offset || aligned_len != len) {
2488		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2489		if (!buf)
2490			return (ENOMEM);
2491		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2492		if (!err && aligned_len > 4)
2493			err = t3_seeprom_read(adapter,
2494					      aligned_offset + aligned_len - 4,
2495					      (u32 *)&buf[aligned_len - 4]);
2496		if (err)
2497			goto out;
2498		memcpy(buf + (offset & 3), data, len);
2499	} else
2500		buf = (uint8_t *)(uintptr_t)data;
2501
2502	err = t3_seeprom_wp(adapter, 0);
2503	if (err)
2504		goto out;
2505
2506	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2507		err = t3_seeprom_write(adapter, aligned_offset, *p);
2508		aligned_offset += 4;
2509	}
2510
2511	if (!err)
2512		err = t3_seeprom_wp(adapter, 1);
2513out:
2514	if (buf != data)
2515		free(buf, M_DEVBUF);
2516	return err;
2517}
2518
2519
2520static int
2521in_range(int val, int lo, int hi)
2522{
2523	return val < 0 || (val <= hi && val >= lo);
2524}
2525
2526static int
2527cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2528{
2529       return (0);
2530}
2531
2532static int
2533cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2534{
2535       return (0);
2536}
2537
2538static int
2539cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2540    int fflag, struct thread *td)
2541{
2542	int mmd, error = 0;
2543	struct port_info *pi = dev->si_drv1;
2544	adapter_t *sc = pi->adapter;
2545
2546#ifdef PRIV_SUPPORTED
2547	if (priv_check(td, PRIV_DRIVER)) {
2548		if (cxgb_debug)
2549			printf("user does not have access to privileged ioctls\n");
2550		return (EPERM);
2551	}
2552#else
2553	if (suser(td)) {
2554		if (cxgb_debug)
2555			printf("user does not have access to privileged ioctls\n");
2556		return (EPERM);
2557	}
2558#endif
2559
2560	switch (cmd) {
2561	case CHELSIO_GET_MIIREG: {
2562		uint32_t val;
2563		struct cphy *phy = &pi->phy;
2564		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2565
2566		if (!phy->mdio_read)
2567			return (EOPNOTSUPP);
2568		if (is_10G(sc)) {
2569			mmd = mid->phy_id >> 8;
2570			if (!mmd)
2571				mmd = MDIO_DEV_PCS;
2572			else if (mmd > MDIO_DEV_VEND2)
2573				return (EINVAL);
2574
2575			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2576					     mid->reg_num, &val);
2577		} else
2578		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2579					     mid->reg_num & 0x1f, &val);
2580		if (error == 0)
2581			mid->val_out = val;
2582		break;
2583	}
2584	case CHELSIO_SET_MIIREG: {
2585		struct cphy *phy = &pi->phy;
2586		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2587
2588		if (!phy->mdio_write)
2589			return (EOPNOTSUPP);
2590		if (is_10G(sc)) {
2591			mmd = mid->phy_id >> 8;
2592			if (!mmd)
2593				mmd = MDIO_DEV_PCS;
2594			else if (mmd > MDIO_DEV_VEND2)
2595				return (EINVAL);
2596
2597			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2598					      mmd, mid->reg_num, mid->val_in);
2599		} else
2600			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2601					      mid->reg_num & 0x1f,
2602					      mid->val_in);
2603		break;
2604	}
2605	case CHELSIO_SETREG: {
2606		struct ch_reg *edata = (struct ch_reg *)data;
2607		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2608			return (EFAULT);
2609		t3_write_reg(sc, edata->addr, edata->val);
2610		break;
2611	}
2612	case CHELSIO_GETREG: {
2613		struct ch_reg *edata = (struct ch_reg *)data;
2614		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2615			return (EFAULT);
2616		edata->val = t3_read_reg(sc, edata->addr);
2617		break;
2618	}
2619	case CHELSIO_GET_SGE_CONTEXT: {
2620		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2621		mtx_lock_spin(&sc->sge.reg_lock);
2622		switch (ecntxt->cntxt_type) {
2623		case CNTXT_TYPE_EGRESS:
2624			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2625			    ecntxt->data);
2626			break;
2627		case CNTXT_TYPE_FL:
2628			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2629			    ecntxt->data);
2630			break;
2631		case CNTXT_TYPE_RSP:
2632			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2633			    ecntxt->data);
2634			break;
2635		case CNTXT_TYPE_CQ:
2636			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2637			    ecntxt->data);
2638			break;
2639		default:
2640			error = EINVAL;
2641			break;
2642		}
2643		mtx_unlock_spin(&sc->sge.reg_lock);
2644		break;
2645	}
2646	case CHELSIO_GET_SGE_DESC: {
2647		struct ch_desc *edesc = (struct ch_desc *)data;
2648		int ret;
2649		if (edesc->queue_num >= SGE_QSETS * 6)
2650			return (EINVAL);
2651		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2652		    edesc->queue_num % 6, edesc->idx, edesc->data);
2653		if (ret < 0)
2654			return (EINVAL);
2655		edesc->size = ret;
2656		break;
2657	}
2658	case CHELSIO_GET_QSET_PARAMS: {
2659		struct qset_params *q;
2660		struct ch_qset_params *t = (struct ch_qset_params *)data;
2661		int q1 = pi->first_qset;
2662		int nqsets = pi->nqsets;
2663		int i;
2664
2665		if (t->qset_idx >= nqsets)
2666			return EINVAL;
2667
2668		i = q1 + t->qset_idx;
2669		q = &sc->params.sge.qset[i];
2670		t->rspq_size   = q->rspq_size;
2671		t->txq_size[0] = q->txq_size[0];
2672		t->txq_size[1] = q->txq_size[1];
2673		t->txq_size[2] = q->txq_size[2];
2674		t->fl_size[0]  = q->fl_size;
2675		t->fl_size[1]  = q->jumbo_size;
2676		t->polling     = q->polling;
2677		t->lro         = q->lro;
2678		t->intr_lat    = q->coalesce_usecs;
2679		t->cong_thres  = q->cong_thres;
2680		t->qnum        = i;
2681
2682		if (sc->flags & USING_MSIX)
2683			t->vector = rman_get_start(sc->msix_irq_res[i]);
2684		else
2685			t->vector = rman_get_start(sc->irq_res);
2686
2687		break;
2688	}
2689	case CHELSIO_GET_QSET_NUM: {
2690		struct ch_reg *edata = (struct ch_reg *)data;
2691		edata->val = pi->nqsets;
2692		break;
2693	}
2694	case CHELSIO_LOAD_FW: {
2695		uint8_t *fw_data;
2696		uint32_t vers;
2697		struct ch_mem_range *t = (struct ch_mem_range *)data;
2698
2699		/*
2700		 * You're allowed to load a firmware only before FULL_INIT_DONE
2701		 *
2702		 * FW_UPTODATE is also set so the rest of the initialization
2703		 * will not overwrite what was loaded here.  This gives you the
2704		 * flexibility to load any firmware (and maybe shoot yourself in
2705		 * the foot).
2706		 */
2707
2708		ADAPTER_LOCK(sc);
2709		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2710			ADAPTER_UNLOCK(sc);
2711			return (EBUSY);
2712		}
2713
2714		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2715		if (!fw_data)
2716			error = ENOMEM;
2717		else
2718			error = copyin(t->buf, fw_data, t->len);
2719
2720		if (!error)
2721			error = -t3_load_fw(sc, fw_data, t->len);
2722
2723		if (t3_get_fw_version(sc, &vers) == 0) {
2724			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2725			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2726			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2727		}
2728
2729		if (!error)
2730			sc->flags |= FW_UPTODATE;
2731
2732		free(fw_data, M_DEVBUF);
2733		ADAPTER_UNLOCK(sc);
2734		break;
2735	}
2736	case CHELSIO_LOAD_BOOT: {
2737		uint8_t *boot_data;
2738		struct ch_mem_range *t = (struct ch_mem_range *)data;
2739
2740		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2741		if (!boot_data)
2742			return ENOMEM;
2743
2744		error = copyin(t->buf, boot_data, t->len);
2745		if (!error)
2746			error = -t3_load_boot(sc, boot_data, t->len);
2747
2748		free(boot_data, M_DEVBUF);
2749		break;
2750	}
2751	case CHELSIO_GET_PM: {
2752		struct ch_pm *m = (struct ch_pm *)data;
2753		struct tp_params *p = &sc->params.tp;
2754
2755		if (!is_offload(sc))
2756			return (EOPNOTSUPP);
2757
2758		m->tx_pg_sz = p->tx_pg_size;
2759		m->tx_num_pg = p->tx_num_pgs;
2760		m->rx_pg_sz  = p->rx_pg_size;
2761		m->rx_num_pg = p->rx_num_pgs;
2762		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2763
2764		break;
2765	}
2766	case CHELSIO_SET_PM: {
2767		struct ch_pm *m = (struct ch_pm *)data;
2768		struct tp_params *p = &sc->params.tp;
2769
2770		if (!is_offload(sc))
2771			return (EOPNOTSUPP);
2772		if (sc->flags & FULL_INIT_DONE)
2773			return (EBUSY);
2774
2775		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2776		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2777			return (EINVAL);	/* not power of 2 */
2778		if (!(m->rx_pg_sz & 0x14000))
2779			return (EINVAL);	/* not 16KB or 64KB */
2780		if (!(m->tx_pg_sz & 0x1554000))
2781			return (EINVAL);
2782		if (m->tx_num_pg == -1)
2783			m->tx_num_pg = p->tx_num_pgs;
2784		if (m->rx_num_pg == -1)
2785			m->rx_num_pg = p->rx_num_pgs;
2786		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2787			return (EINVAL);
2788		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2789		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2790			return (EINVAL);
2791
2792		p->rx_pg_size = m->rx_pg_sz;
2793		p->tx_pg_size = m->tx_pg_sz;
2794		p->rx_num_pgs = m->rx_num_pg;
2795		p->tx_num_pgs = m->tx_num_pg;
2796		break;
2797	}
2798	case CHELSIO_SETMTUTAB: {
2799		struct ch_mtus *m = (struct ch_mtus *)data;
2800		int i;
2801
2802		if (!is_offload(sc))
2803			return (EOPNOTSUPP);
2804		if (offload_running(sc))
2805			return (EBUSY);
2806		if (m->nmtus != NMTUS)
2807			return (EINVAL);
2808		if (m->mtus[0] < 81)         /* accommodate SACK */
2809			return (EINVAL);
2810
2811		/*
2812		 * MTUs must be in ascending order
2813		 */
2814		for (i = 1; i < NMTUS; ++i)
2815			if (m->mtus[i] < m->mtus[i - 1])
2816				return (EINVAL);
2817
2818		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2819		break;
2820	}
2821	case CHELSIO_GETMTUTAB: {
2822		struct ch_mtus *m = (struct ch_mtus *)data;
2823
2824		if (!is_offload(sc))
2825			return (EOPNOTSUPP);
2826
2827		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2828		m->nmtus = NMTUS;
2829		break;
2830	}
2831	case CHELSIO_GET_MEM: {
2832		struct ch_mem_range *t = (struct ch_mem_range *)data;
2833		struct mc7 *mem;
2834		uint8_t *useraddr;
2835		u64 buf[32];
2836
2837		/*
2838		 * Use these to avoid modifying len/addr in the the return
2839		 * struct
2840		 */
2841		uint32_t len = t->len, addr = t->addr;
2842
2843		if (!is_offload(sc))
2844			return (EOPNOTSUPP);
2845		if (!(sc->flags & FULL_INIT_DONE))
2846			return (EIO);         /* need the memory controllers */
2847		if ((addr & 0x7) || (len & 0x7))
2848			return (EINVAL);
2849		if (t->mem_id == MEM_CM)
2850			mem = &sc->cm;
2851		else if (t->mem_id == MEM_PMRX)
2852			mem = &sc->pmrx;
2853		else if (t->mem_id == MEM_PMTX)
2854			mem = &sc->pmtx;
2855		else
2856			return (EINVAL);
2857
2858		/*
2859		 * Version scheme:
2860		 * bits 0..9: chip version
2861		 * bits 10..15: chip revision
2862		 */
2863		t->version = 3 | (sc->params.rev << 10);
2864
2865		/*
2866		 * Read 256 bytes at a time as len can be large and we don't
2867		 * want to use huge intermediate buffers.
2868		 */
2869		useraddr = (uint8_t *)t->buf;
2870		while (len) {
2871			unsigned int chunk = min(len, sizeof(buf));
2872
2873			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2874			if (error)
2875				return (-error);
2876			if (copyout(buf, useraddr, chunk))
2877				return (EFAULT);
2878			useraddr += chunk;
2879			addr += chunk;
2880			len -= chunk;
2881		}
2882		break;
2883	}
2884	case CHELSIO_READ_TCAM_WORD: {
2885		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2886
2887		if (!is_offload(sc))
2888			return (EOPNOTSUPP);
2889		if (!(sc->flags & FULL_INIT_DONE))
2890			return (EIO);         /* need MC5 */
2891		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2892		break;
2893	}
2894	case CHELSIO_SET_TRACE_FILTER: {
2895		struct ch_trace *t = (struct ch_trace *)data;
2896		const struct trace_params *tp;
2897
2898		tp = (const struct trace_params *)&t->sip;
2899		if (t->config_tx)
2900			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2901					       t->trace_tx);
2902		if (t->config_rx)
2903			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2904					       t->trace_rx);
2905		break;
2906	}
2907	case CHELSIO_SET_PKTSCHED: {
2908		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2909		if (sc->open_device_map == 0)
2910			return (EAGAIN);
2911		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2912		    p->binding);
2913		break;
2914	}
2915	case CHELSIO_IFCONF_GETREGS: {
2916		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2917		int reglen = cxgb_get_regs_len();
2918		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2919		if (buf == NULL) {
2920			return (ENOMEM);
2921		}
2922		if (regs->len > reglen)
2923			regs->len = reglen;
2924		else if (regs->len < reglen)
2925			error = ENOBUFS;
2926
2927		if (!error) {
2928			cxgb_get_regs(sc, regs, buf);
2929			error = copyout(buf, regs->data, reglen);
2930		}
2931		free(buf, M_DEVBUF);
2932
2933		break;
2934	}
2935	case CHELSIO_SET_HW_SCHED: {
2936		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2937		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2938
2939		if ((sc->flags & FULL_INIT_DONE) == 0)
2940			return (EAGAIN);       /* need TP to be initialized */
2941		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2942		    !in_range(t->channel, 0, 1) ||
2943		    !in_range(t->kbps, 0, 10000000) ||
2944		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2945		    !in_range(t->flow_ipg, 0,
2946			      dack_ticks_to_usec(sc, 0x7ff)))
2947			return (EINVAL);
2948
2949		if (t->kbps >= 0) {
2950			error = t3_config_sched(sc, t->kbps, t->sched);
2951			if (error < 0)
2952				return (-error);
2953		}
2954		if (t->class_ipg >= 0)
2955			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2956		if (t->flow_ipg >= 0) {
2957			t->flow_ipg *= 1000;     /* us -> ns */
2958			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2959		}
2960		if (t->mode >= 0) {
2961			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2962
2963			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2964					 bit, t->mode ? bit : 0);
2965		}
2966		if (t->channel >= 0)
2967			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2968					 1 << t->sched, t->channel << t->sched);
2969		break;
2970	}
2971	case CHELSIO_GET_EEPROM: {
2972		int i;
2973		struct ch_eeprom *e = (struct ch_eeprom *)data;
2974		uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2975
2976		if (buf == NULL) {
2977			return (ENOMEM);
2978		}
2979		e->magic = EEPROM_MAGIC;
2980		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
2981			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
2982
2983		if (!error)
2984			error = copyout(buf + e->offset, e->data, e->len);
2985
2986		free(buf, M_DEVBUF);
2987		break;
2988	}
2989	case CHELSIO_CLEAR_STATS: {
2990		if (!(sc->flags & FULL_INIT_DONE))
2991			return EAGAIN;
2992
2993		PORT_LOCK(pi);
2994		t3_mac_update_stats(&pi->mac);
2995		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
2996		PORT_UNLOCK(pi);
2997		break;
2998	}
2999	case CHELSIO_GET_UP_LA: {
3000		struct ch_up_la *la = (struct ch_up_la *)data;
3001		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3002		if (buf == NULL) {
3003			return (ENOMEM);
3004		}
3005		if (la->bufsize < LA_BUFSIZE)
3006			error = ENOBUFS;
3007
3008		if (!error)
3009			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3010					      &la->bufsize, buf);
3011		if (!error)
3012			error = copyout(buf, la->data, la->bufsize);
3013
3014		free(buf, M_DEVBUF);
3015		break;
3016	}
3017	case CHELSIO_GET_UP_IOQS: {
3018		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3019		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3020		uint32_t *v;
3021
3022		if (buf == NULL) {
3023			return (ENOMEM);
3024		}
3025		if (ioqs->bufsize < IOQS_BUFSIZE)
3026			error = ENOBUFS;
3027
3028		if (!error)
3029			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3030
3031		if (!error) {
3032			v = (uint32_t *)buf;
3033
3034			ioqs->bufsize -= 4 * sizeof(uint32_t);
3035			ioqs->ioq_rx_enable = *v++;
3036			ioqs->ioq_tx_enable = *v++;
3037			ioqs->ioq_rx_status = *v++;
3038			ioqs->ioq_tx_status = *v++;
3039
3040			error = copyout(v, ioqs->data, ioqs->bufsize);
3041		}
3042
3043		free(buf, M_DEVBUF);
3044		break;
3045	}
3046	default:
3047		return (EOPNOTSUPP);
3048		break;
3049	}
3050
3051	return (error);
3052}
3053
3054static __inline void
3055reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3056    unsigned int end)
3057{
3058	uint32_t *p = (uint32_t *)(buf + start);
3059
3060	for ( ; start <= end; start += sizeof(uint32_t))
3061		*p++ = t3_read_reg(ap, start);
3062}
3063
3064#define T3_REGMAP_SIZE (3 * 1024)
3065static int
3066cxgb_get_regs_len(void)
3067{
3068	return T3_REGMAP_SIZE;
3069}
3070
3071static void
3072cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3073{
3074
3075	/*
3076	 * Version scheme:
3077	 * bits 0..9: chip version
3078	 * bits 10..15: chip revision
3079	 * bit 31: set for PCIe cards
3080	 */
3081	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3082
3083	/*
3084	 * We skip the MAC statistics registers because they are clear-on-read.
3085	 * Also reading multi-register stats would need to synchronize with the
3086	 * periodic mac stats accumulation.  Hard to justify the complexity.
3087	 */
3088	memset(buf, 0, cxgb_get_regs_len());
3089	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3090	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3091	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3092	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3093	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3094	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3095		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3096	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3097		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3098}
3099
3100
3101MODULE_DEPEND(if_cxgb, cxgb_t3fw, 1, 1, 1);
3102