cxgb_main.c revision 194661
1/**************************************************************************
2
3Copyright (c) 2007-2009, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13    contributors may be used to endorse or promote products derived from
14    this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_main.c 194661 2009-06-22 21:42:57Z np $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/bus.h>
37#include <sys/module.h>
38#include <sys/pciio.h>
39#include <sys/conf.h>
40#include <machine/bus.h>
41#include <machine/resource.h>
42#include <sys/bus_dma.h>
43#include <sys/ktr.h>
44#include <sys/rman.h>
45#include <sys/ioccom.h>
46#include <sys/mbuf.h>
47#include <sys/linker.h>
48#include <sys/firmware.h>
49#include <sys/socket.h>
50#include <sys/sockio.h>
51#include <sys/smp.h>
52#include <sys/sysctl.h>
53#include <sys/syslog.h>
54#include <sys/queue.h>
55#include <sys/taskqueue.h>
56#include <sys/proc.h>
57
58#include <net/bpf.h>
59#include <net/ethernet.h>
60#include <net/if.h>
61#include <net/if_arp.h>
62#include <net/if_dl.h>
63#include <net/if_media.h>
64#include <net/if_types.h>
65#include <net/if_vlan_var.h>
66
67#include <netinet/in_systm.h>
68#include <netinet/in.h>
69#include <netinet/if_ether.h>
70#include <netinet/ip.h>
71#include <netinet/ip.h>
72#include <netinet/tcp.h>
73#include <netinet/udp.h>
74
75#include <dev/pci/pcireg.h>
76#include <dev/pci/pcivar.h>
77#include <dev/pci/pci_private.h>
78
79#include <cxgb_include.h>
80
81#ifdef PRIV_SUPPORTED
82#include <sys/priv.h>
83#endif
84
85static int cxgb_setup_interrupts(adapter_t *);
86static void cxgb_teardown_interrupts(adapter_t *);
87static int cxgb_begin_op(struct port_info *, const char *);
88static int cxgb_begin_detach(struct port_info *);
89static int cxgb_end_op(struct port_info *);
90static void cxgb_init(void *);
91static int cxgb_init_synchronized(struct port_info *);
92static int cxgb_uninit_synchronized(struct port_info *);
93static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
94static int cxgb_media_change(struct ifnet *);
95static int cxgb_ifm_type(int);
96static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
97static int setup_sge_qsets(adapter_t *);
98static void cxgb_async_intr(void *);
99static void cxgb_ext_intr_handler(void *, int);
100static void cxgb_tick_handler(void *, int);
101static void cxgb_tick(void *);
102static void setup_rss(adapter_t *sc);
103
104/* Attachment glue for the PCI controller end of the device.  Each port of
105 * the device is attached separately, as defined later.
106 */
107static int cxgb_controller_probe(device_t);
108static int cxgb_controller_attach(device_t);
109static int cxgb_controller_detach(device_t);
110static void cxgb_free(struct adapter *);
111static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
112    unsigned int end);
113static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
114static int cxgb_get_regs_len(void);
115static int offload_open(struct port_info *pi);
116static void touch_bars(device_t dev);
117static int offload_close(struct t3cdev *tdev);
118int t3_detect_link_fault(adapter_t *adapter, int port_id);
119
120static device_method_t cxgb_controller_methods[] = {
121	DEVMETHOD(device_probe,		cxgb_controller_probe),
122	DEVMETHOD(device_attach,	cxgb_controller_attach),
123	DEVMETHOD(device_detach,	cxgb_controller_detach),
124
125	/* bus interface */
126	DEVMETHOD(bus_print_child,	bus_generic_print_child),
127	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
128
129	{ 0, 0 }
130};
131
132static driver_t cxgb_controller_driver = {
133	"cxgbc",
134	cxgb_controller_methods,
135	sizeof(struct adapter)
136};
137
138static devclass_t	cxgb_controller_devclass;
139DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
140
141/*
142 * Attachment glue for the ports.  Attachment is done directly to the
143 * controller device.
144 */
145static int cxgb_port_probe(device_t);
146static int cxgb_port_attach(device_t);
147static int cxgb_port_detach(device_t);
148
149static device_method_t cxgb_port_methods[] = {
150	DEVMETHOD(device_probe,		cxgb_port_probe),
151	DEVMETHOD(device_attach,	cxgb_port_attach),
152	DEVMETHOD(device_detach,	cxgb_port_detach),
153	{ 0, 0 }
154};
155
156static driver_t cxgb_port_driver = {
157	"cxgb",
158	cxgb_port_methods,
159	0
160};
161
162static d_ioctl_t cxgb_extension_ioctl;
163static d_open_t cxgb_extension_open;
164static d_close_t cxgb_extension_close;
165
166static struct cdevsw cxgb_cdevsw = {
167       .d_version =    D_VERSION,
168       .d_flags =      0,
169       .d_open =       cxgb_extension_open,
170       .d_close =      cxgb_extension_close,
171       .d_ioctl =      cxgb_extension_ioctl,
172       .d_name =       "cxgb",
173};
174
175static devclass_t	cxgb_port_devclass;
176DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
177
178/*
179 * The driver uses the best interrupt scheme available on a platform in the
180 * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
181 * of these schemes the driver may consider as follows:
182 *
183 * msi = 2: choose from among all three options
184 * msi = 1 : only consider MSI and pin interrupts
185 * msi = 0: force pin interrupts
186 */
187static int msi_allowed = 2;
188
189TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
190SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
191SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
192    "MSI-X, MSI, INTx selector");
193
194/*
195 * The driver enables offload as a default.
196 * To disable it, use ofld_disable = 1.
197 */
198static int ofld_disable = 0;
199TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
200SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
201    "disable ULP offload");
202
203/*
204 * The driver uses an auto-queue algorithm by default.
205 * To disable it and force a single queue-set per port, use multiq = 0
206 */
207static int multiq = 1;
208TUNABLE_INT("hw.cxgb.multiq", &multiq);
209SYSCTL_UINT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
210    "use min(ncpus/ports, 8) queue-sets per port");
211
212/*
213 * By default the driver will not update the firmware unless
214 * it was compiled against a newer version
215 *
216 */
217static int force_fw_update = 0;
218TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
219SYSCTL_UINT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
220    "update firmware even if up to date");
221
222int cxgb_use_16k_clusters = 1;
223TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
224SYSCTL_UINT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
225    &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
226
227/*
228 * Tune the size of the output queue.
229 */
230int cxgb_snd_queue_len = IFQ_MAXLEN;
231TUNABLE_INT("hw.cxgb.snd_queue_len", &cxgb_snd_queue_len);
232SYSCTL_UINT(_hw_cxgb, OID_AUTO, snd_queue_len, CTLFLAG_RDTUN,
233    &cxgb_snd_queue_len, 0, "send queue size ");
234
235
236enum {
237	MAX_TXQ_ENTRIES      = 16384,
238	MAX_CTRL_TXQ_ENTRIES = 1024,
239	MAX_RSPQ_ENTRIES     = 16384,
240	MAX_RX_BUFFERS       = 16384,
241	MAX_RX_JUMBO_BUFFERS = 16384,
242	MIN_TXQ_ENTRIES      = 4,
243	MIN_CTRL_TXQ_ENTRIES = 4,
244	MIN_RSPQ_ENTRIES     = 32,
245	MIN_FL_ENTRIES       = 32,
246	MIN_FL_JUMBO_ENTRIES = 32
247};
248
249struct filter_info {
250	u32 sip;
251	u32 sip_mask;
252	u32 dip;
253	u16 sport;
254	u16 dport;
255	u32 vlan:12;
256	u32 vlan_prio:3;
257	u32 mac_hit:1;
258	u32 mac_idx:4;
259	u32 mac_vld:1;
260	u32 pkt_type:2;
261	u32 report_filter_id:1;
262	u32 pass:1;
263	u32 rss:1;
264	u32 qset:3;
265	u32 locked:1;
266	u32 valid:1;
267};
268
269enum { FILTER_NO_VLAN_PRI = 7 };
270
271#define EEPROM_MAGIC 0x38E2F10C
272
273#define PORT_MASK ((1 << MAX_NPORTS) - 1)
274
275/* Table for probing the cards.  The desc field isn't actually used */
276struct cxgb_ident {
277	uint16_t	vendor;
278	uint16_t	device;
279	int		index;
280	char		*desc;
281} cxgb_identifiers[] = {
282	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
283	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
284	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
285	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
286	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
287	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
288	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
289	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
290	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
291	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
292	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
293	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "N310E"},
294	{0, 0, 0, NULL}
295};
296
297static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
298
299
300static __inline char
301t3rev2char(struct adapter *adapter)
302{
303	char rev = 'z';
304
305	switch(adapter->params.rev) {
306	case T3_REV_A:
307		rev = 'a';
308		break;
309	case T3_REV_B:
310	case T3_REV_B2:
311		rev = 'b';
312		break;
313	case T3_REV_C:
314		rev = 'c';
315		break;
316	}
317	return rev;
318}
319
320static struct cxgb_ident *
321cxgb_get_ident(device_t dev)
322{
323	struct cxgb_ident *id;
324
325	for (id = cxgb_identifiers; id->desc != NULL; id++) {
326		if ((id->vendor == pci_get_vendor(dev)) &&
327		    (id->device == pci_get_device(dev))) {
328			return (id);
329		}
330	}
331	return (NULL);
332}
333
334static const struct adapter_info *
335cxgb_get_adapter_info(device_t dev)
336{
337	struct cxgb_ident *id;
338	const struct adapter_info *ai;
339
340	id = cxgb_get_ident(dev);
341	if (id == NULL)
342		return (NULL);
343
344	ai = t3_get_adapter_info(id->index);
345
346	return (ai);
347}
348
349static int
350cxgb_controller_probe(device_t dev)
351{
352	const struct adapter_info *ai;
353	char *ports, buf[80];
354	int nports;
355	struct adapter *sc = device_get_softc(dev);
356
357	ai = cxgb_get_adapter_info(dev);
358	if (ai == NULL)
359		return (ENXIO);
360
361	nports = ai->nports0 + ai->nports1;
362	if (nports == 1)
363		ports = "port";
364	else
365		ports = "ports";
366
367	snprintf(buf, sizeof(buf), "%s %sNIC, rev: %d nports: %d %s",
368	    ai->desc, is_offload(sc) ? "R" : "",
369	    sc->params.rev, nports, ports);
370	device_set_desc_copy(dev, buf);
371	return (BUS_PROBE_DEFAULT);
372}
373
374#define FW_FNAME "cxgb_t3fw"
375#define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
376#define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
377
378static int
379upgrade_fw(adapter_t *sc)
380{
381#ifdef FIRMWARE_LATEST
382	const struct firmware *fw;
383#else
384	struct firmware *fw;
385#endif
386	int status;
387
388	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
389		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
390		return (ENOENT);
391	} else
392		device_printf(sc->dev, "updating firmware on card\n");
393	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
394
395	device_printf(sc->dev, "firmware update returned %s %d\n", (status == 0) ? "success" : "fail", status);
396
397	firmware_put(fw, FIRMWARE_UNLOAD);
398
399	return (status);
400}
401
402/*
403 * The cxgb_controller_attach function is responsible for the initial
404 * bringup of the device.  Its responsibilities include:
405 *
406 *  1. Determine if the device supports MSI or MSI-X.
407 *  2. Allocate bus resources so that we can access the Base Address Register
408 *  3. Create and initialize mutexes for the controller and its control
409 *     logic such as SGE and MDIO.
410 *  4. Call hardware specific setup routine for the adapter as a whole.
411 *  5. Allocate the BAR for doing MSI-X.
412 *  6. Setup the line interrupt iff MSI-X is not supported.
413 *  7. Create the driver's taskq.
414 *  8. Start one task queue service thread.
415 *  9. Check if the firmware and SRAM are up-to-date.  They will be
416 *     auto-updated later (before FULL_INIT_DONE), if required.
417 * 10. Create a child device for each MAC (port)
418 * 11. Initialize T3 private state.
419 * 12. Trigger the LED
420 * 13. Setup offload iff supported.
421 * 14. Reset/restart the tick callout.
422 * 15. Attach sysctls
423 *
424 * NOTE: Any modification or deviation from this list MUST be reflected in
425 * the above comment.  Failure to do so will result in problems on various
426 * error conditions including link flapping.
427 */
428static int
429cxgb_controller_attach(device_t dev)
430{
431	device_t child;
432	const struct adapter_info *ai;
433	struct adapter *sc;
434	int i, error = 0;
435	uint32_t vers;
436	int port_qsets = 1;
437#ifdef MSI_SUPPORTED
438	int msi_needed, reg;
439#endif
440	char buf[80];
441
442	sc = device_get_softc(dev);
443	sc->dev = dev;
444	sc->msi_count = 0;
445	ai = cxgb_get_adapter_info(dev);
446
447	/*
448	 * XXX not really related but a recent addition
449	 */
450#ifdef MSI_SUPPORTED
451	/* find the PCIe link width and set max read request to 4KB*/
452	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
453		uint16_t lnk, pectl;
454		lnk = pci_read_config(dev, reg + 0x12, 2);
455		sc->link_width = (lnk >> 4) & 0x3f;
456
457		pectl = pci_read_config(dev, reg + 0x8, 2);
458		pectl = (pectl & ~0x7000) | (5 << 12);
459		pci_write_config(dev, reg + 0x8, pectl, 2);
460	}
461
462	if (sc->link_width != 0 && sc->link_width <= 4 &&
463	    (ai->nports0 + ai->nports1) <= 2) {
464		device_printf(sc->dev,
465		    "PCIe x%d Link, expect reduced performance\n",
466		    sc->link_width);
467	}
468#endif
469	touch_bars(dev);
470	pci_enable_busmaster(dev);
471	/*
472	 * Allocate the registers and make them available to the driver.
473	 * The registers that we care about for NIC mode are in BAR 0
474	 */
475	sc->regs_rid = PCIR_BAR(0);
476	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
477	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
478		device_printf(dev, "Cannot allocate BAR region 0\n");
479		return (ENXIO);
480	}
481	sc->udbs_rid = PCIR_BAR(2);
482	sc->udbs_res = NULL;
483	if (is_offload(sc) &&
484	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
485		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
486		device_printf(dev, "Cannot allocate BAR region 1\n");
487		error = ENXIO;
488		goto out;
489	}
490
491	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
492	    device_get_unit(dev));
493	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
494
495	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
496	    device_get_unit(dev));
497	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
498	    device_get_unit(dev));
499	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
500	    device_get_unit(dev));
501
502	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
503	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
504	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
505
506	sc->bt = rman_get_bustag(sc->regs_res);
507	sc->bh = rman_get_bushandle(sc->regs_res);
508	sc->mmio_len = rman_get_size(sc->regs_res);
509
510	if (t3_prep_adapter(sc, ai, 1) < 0) {
511		printf("prep adapter failed\n");
512		error = ENODEV;
513		goto out;
514	}
515        /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
516	 * enough messages for the queue sets.  If that fails, try falling
517	 * back to MSI.  If that fails, then try falling back to the legacy
518	 * interrupt pin model.
519	 */
520#ifdef MSI_SUPPORTED
521
522	sc->msix_regs_rid = 0x20;
523	if ((msi_allowed >= 2) &&
524	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
525	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
526
527		if (multiq)
528			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
529		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
530
531		if (pci_msix_count(dev) == 0 ||
532		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
533		    sc->msi_count != msi_needed) {
534			device_printf(dev, "alloc msix failed - "
535				      "msi_count=%d, msi_needed=%d, err=%d; "
536				      "will try MSI\n", sc->msi_count,
537				      msi_needed, error);
538			sc->msi_count = 0;
539			port_qsets = 1;
540			pci_release_msi(dev);
541			bus_release_resource(dev, SYS_RES_MEMORY,
542			    sc->msix_regs_rid, sc->msix_regs_res);
543			sc->msix_regs_res = NULL;
544		} else {
545			sc->flags |= USING_MSIX;
546			sc->cxgb_intr = cxgb_async_intr;
547			device_printf(dev,
548				      "using MSI-X interrupts (%u vectors)\n",
549				      sc->msi_count);
550		}
551	}
552
553	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
554		sc->msi_count = 1;
555		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
556			device_printf(dev, "alloc msi failed - "
557				      "err=%d; will try INTx\n", error);
558			sc->msi_count = 0;
559			port_qsets = 1;
560			pci_release_msi(dev);
561		} else {
562			sc->flags |= USING_MSI;
563			sc->cxgb_intr = t3_intr_msi;
564			device_printf(dev, "using MSI interrupts\n");
565		}
566	}
567#endif
568	if (sc->msi_count == 0) {
569		device_printf(dev, "using line interrupts\n");
570		sc->cxgb_intr = t3b_intr;
571	}
572
573	/* Create a private taskqueue thread for handling driver events */
574#ifdef TASKQUEUE_CURRENT
575	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
576	    taskqueue_thread_enqueue, &sc->tq);
577#else
578	sc->tq = taskqueue_create_fast("cxgb_taskq", M_NOWAIT,
579	    taskqueue_thread_enqueue, &sc->tq);
580#endif
581	if (sc->tq == NULL) {
582		device_printf(dev, "failed to allocate controller task queue\n");
583		goto out;
584	}
585
586	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
587	    device_get_nameunit(dev));
588	TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
589	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
590
591
592	/* Create a periodic callout for checking adapter status */
593	callout_init(&sc->cxgb_tick_ch, TRUE);
594
595	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
596		/*
597		 * Warn user that a firmware update will be attempted in init.
598		 */
599		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
600		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
601		sc->flags &= ~FW_UPTODATE;
602	} else {
603		sc->flags |= FW_UPTODATE;
604	}
605
606	if (t3_check_tpsram_version(sc) < 0) {
607		/*
608		 * Warn user that a firmware update will be attempted in init.
609		 */
610		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
611		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
612		sc->flags &= ~TPS_UPTODATE;
613	} else {
614		sc->flags |= TPS_UPTODATE;
615	}
616
617	/*
618	 * Create a child device for each MAC.  The ethernet attachment
619	 * will be done in these children.
620	 */
621	for (i = 0; i < (sc)->params.nports; i++) {
622		struct port_info *pi;
623
624		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
625			device_printf(dev, "failed to add child port\n");
626			error = EINVAL;
627			goto out;
628		}
629		pi = &sc->port[i];
630		pi->adapter = sc;
631		pi->nqsets = port_qsets;
632		pi->first_qset = i*port_qsets;
633		pi->port_id = i;
634		pi->tx_chan = i >= ai->nports0;
635		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
636		sc->rxpkt_map[pi->txpkt_intf] = i;
637		sc->port[i].tx_chan = i >= ai->nports0;
638		sc->portdev[i] = child;
639		device_set_softc(child, pi);
640	}
641	if ((error = bus_generic_attach(dev)) != 0)
642		goto out;
643
644	/* initialize sge private state */
645	t3_sge_init_adapter(sc);
646
647	t3_led_ready(sc);
648
649	cxgb_offload_init();
650	if (is_offload(sc)) {
651		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
652		cxgb_adapter_ofld(sc);
653        }
654	error = t3_get_fw_version(sc, &vers);
655	if (error)
656		goto out;
657
658	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
659	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
660	    G_FW_VERSION_MICRO(vers));
661
662	snprintf(buf, sizeof(buf), "%s\t E/C: %s S/N: %s",
663		 ai->desc,
664		 sc->params.vpd.ec, sc->params.vpd.sn);
665	device_set_desc_copy(dev, buf);
666
667	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
668		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
669		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
670
671	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
672	callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
673	t3_add_attach_sysctls(sc);
674out:
675	if (error)
676		cxgb_free(sc);
677
678	return (error);
679}
680
681/*
682 * The cxgb_controller_detach routine is called with the device is
683 * unloaded from the system.
684 */
685
686static int
687cxgb_controller_detach(device_t dev)
688{
689	struct adapter *sc;
690
691	sc = device_get_softc(dev);
692
693	cxgb_free(sc);
694
695	return (0);
696}
697
698/*
699 * The cxgb_free() is called by the cxgb_controller_detach() routine
700 * to tear down the structures that were built up in
701 * cxgb_controller_attach(), and should be the final piece of work
702 * done when fully unloading the driver.
703 *
704 *
705 *  1. Shutting down the threads started by the cxgb_controller_attach()
706 *     routine.
707 *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
708 *  3. Detaching all of the port devices created during the
709 *     cxgb_controller_attach() routine.
710 *  4. Removing the device children created via cxgb_controller_attach().
711 *  5. Releasing PCI resources associated with the device.
712 *  6. Turning off the offload support, iff it was turned on.
713 *  7. Destroying the mutexes created in cxgb_controller_attach().
714 *
715 */
716static void
717cxgb_free(struct adapter *sc)
718{
719	int i;
720
721	ADAPTER_LOCK(sc);
722	sc->flags |= CXGB_SHUTDOWN;
723	ADAPTER_UNLOCK(sc);
724
725	/*
726	 * Make sure all child devices are gone.
727	 */
728	bus_generic_detach(sc->dev);
729	for (i = 0; i < (sc)->params.nports; i++) {
730		if (sc->portdev[i] &&
731		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
732			device_printf(sc->dev, "failed to delete child port\n");
733	}
734
735	/*
736	 * At this point, it is as if cxgb_port_detach has run on all ports, and
737	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
738	 * all open devices have been closed.
739	 */
740	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
741					   __func__, sc->open_device_map));
742	for (i = 0; i < sc->params.nports; i++) {
743		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
744						  __func__, i));
745	}
746
747	/*
748	 * Finish off the adapter's callouts.
749	 */
750	callout_drain(&sc->cxgb_tick_ch);
751	callout_drain(&sc->sge_timer_ch);
752
753	/*
754	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
755	 * sysctls are cleaned up by the kernel linker.
756	 */
757	if (sc->flags & FULL_INIT_DONE) {
758 		t3_free_sge_resources(sc);
759 		sc->flags &= ~FULL_INIT_DONE;
760 	}
761
762	/*
763	 * Release all interrupt resources.
764	 */
765	cxgb_teardown_interrupts(sc);
766#ifdef MSI_SUPPORTED
767	if (sc->flags & (USING_MSI | USING_MSIX)) {
768		device_printf(sc->dev, "releasing msi message(s)\n");
769		pci_release_msi(sc->dev);
770	} else {
771		device_printf(sc->dev, "no msi message to release\n");
772	}
773
774	if (sc->msix_regs_res != NULL) {
775		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
776		    sc->msix_regs_res);
777	}
778#endif
779
780	/*
781	 * Free the adapter's taskqueue.
782	 */
783	if (sc->tq != NULL) {
784		taskqueue_free(sc->tq);
785		sc->tq = NULL;
786	}
787
788	if (is_offload(sc)) {
789		clrbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
790		cxgb_adapter_unofld(sc);
791	}
792
793#ifdef notyet
794	if (sc->flags & CXGB_OFLD_INIT)
795		cxgb_offload_deactivate(sc);
796#endif
797	free(sc->filters, M_DEVBUF);
798	t3_sge_free(sc);
799
800	cxgb_offload_exit();
801
802	if (sc->udbs_res != NULL)
803		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
804		    sc->udbs_res);
805
806	if (sc->regs_res != NULL)
807		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
808		    sc->regs_res);
809
810	MTX_DESTROY(&sc->mdio_lock);
811	MTX_DESTROY(&sc->sge.reg_lock);
812	MTX_DESTROY(&sc->elmer_lock);
813	ADAPTER_LOCK_DEINIT(sc);
814}
815
816/**
817 *	setup_sge_qsets - configure SGE Tx/Rx/response queues
818 *	@sc: the controller softc
819 *
820 *	Determines how many sets of SGE queues to use and initializes them.
821 *	We support multiple queue sets per port if we have MSI-X, otherwise
822 *	just one queue set per port.
823 */
824static int
825setup_sge_qsets(adapter_t *sc)
826{
827	int i, j, err, irq_idx = 0, qset_idx = 0;
828	u_int ntxq = SGE_TXQ_PER_SET;
829
830	if ((err = t3_sge_alloc(sc)) != 0) {
831		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
832		return (err);
833	}
834
835	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
836		irq_idx = -1;
837
838	for (i = 0; i < (sc)->params.nports; i++) {
839		struct port_info *pi = &sc->port[i];
840
841		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
842			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
843			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
844			    &sc->params.sge.qset[qset_idx], ntxq, pi);
845			if (err) {
846				t3_free_sge_resources(sc);
847				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n",
848				    err);
849				return (err);
850			}
851		}
852	}
853
854	return (0);
855}
856
857static void
858cxgb_teardown_interrupts(adapter_t *sc)
859{
860	int i;
861
862	for (i = 0; i < SGE_QSETS; i++) {
863		if (sc->msix_intr_tag[i] == NULL) {
864
865			/* Should have been setup fully or not at all */
866			KASSERT(sc->msix_irq_res[i] == NULL &&
867				sc->msix_irq_rid[i] == 0,
868				("%s: half-done interrupt (%d).", __func__, i));
869
870			continue;
871		}
872
873		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
874				  sc->msix_intr_tag[i]);
875		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
876				     sc->msix_irq_res[i]);
877
878		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
879		sc->msix_irq_rid[i] = 0;
880	}
881
882	if (sc->intr_tag) {
883		KASSERT(sc->irq_res != NULL,
884			("%s: half-done interrupt.", __func__));
885
886		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
887		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
888				     sc->irq_res);
889
890		sc->irq_res = sc->intr_tag = NULL;
891		sc->irq_rid = 0;
892	}
893}
894
895static int
896cxgb_setup_interrupts(adapter_t *sc)
897{
898	struct resource *res;
899	void *tag;
900	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
901
902	sc->irq_rid = intr_flag ? 1 : 0;
903	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
904					     RF_SHAREABLE | RF_ACTIVE);
905	if (sc->irq_res == NULL) {
906		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
907			      intr_flag, sc->irq_rid);
908		err = EINVAL;
909		sc->irq_rid = 0;
910	} else {
911		err = bus_setup_intr(sc->dev, sc->irq_res,
912				     INTR_MPSAFE | INTR_TYPE_NET,
913#ifdef INTR_FILTERS
914				     NULL,
915#endif
916				     sc->cxgb_intr, sc, &sc->intr_tag);
917
918		if (err) {
919			device_printf(sc->dev,
920				      "Cannot set up interrupt (%x, %u, %d)\n",
921				      intr_flag, sc->irq_rid, err);
922			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
923					     sc->irq_res);
924			sc->irq_res = sc->intr_tag = NULL;
925			sc->irq_rid = 0;
926		}
927	}
928
929	/* That's all for INTx or MSI */
930	if (!(intr_flag & USING_MSIX) || err)
931		return (err);
932
933	for (i = 0; i < sc->msi_count - 1; i++) {
934		rid = i + 2;
935		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
936					     RF_SHAREABLE | RF_ACTIVE);
937		if (res == NULL) {
938			device_printf(sc->dev, "Cannot allocate interrupt "
939				      "for message %d\n", rid);
940			err = EINVAL;
941			break;
942		}
943
944		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
945#ifdef INTR_FILTERS
946				     NULL,
947#endif
948				     t3_intr_msix, &sc->sge.qs[i], &tag);
949		if (err) {
950			device_printf(sc->dev, "Cannot set up interrupt "
951				      "for message %d (%d)\n", rid, err);
952			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
953			break;
954		}
955
956		sc->msix_irq_rid[i] = rid;
957		sc->msix_irq_res[i] = res;
958		sc->msix_intr_tag[i] = tag;
959	}
960
961	if (err)
962		cxgb_teardown_interrupts(sc);
963
964	return (err);
965}
966
967
968static int
969cxgb_port_probe(device_t dev)
970{
971	struct port_info *p;
972	char buf[80];
973	const char *desc;
974
975	p = device_get_softc(dev);
976	desc = p->phy.desc;
977	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
978	device_set_desc_copy(dev, buf);
979	return (0);
980}
981
982
983static int
984cxgb_makedev(struct port_info *pi)
985{
986
987	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
988	    UID_ROOT, GID_WHEEL, 0600, if_name(pi->ifp));
989
990	if (pi->port_cdev == NULL)
991		return (ENOMEM);
992
993	pi->port_cdev->si_drv1 = (void *)pi;
994
995	return (0);
996}
997
998#ifndef LRO_SUPPORTED
999#ifdef IFCAP_LRO
1000#undef IFCAP_LRO
1001#endif
1002#define IFCAP_LRO 0x0
1003#endif
1004
1005#ifdef TSO_SUPPORTED
1006#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO)
1007/* Don't enable TSO6 yet */
1008#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU | IFCAP_LRO)
1009#else
1010#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_JUMBO_MTU)
1011/* Don't enable TSO6 yet */
1012#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM |  IFCAP_JUMBO_MTU)
1013#define IFCAP_TSO4 0x0
1014#define IFCAP_TSO6 0x0
1015#define CSUM_TSO   0x0
1016#endif
1017
1018
1019static int
1020cxgb_port_attach(device_t dev)
1021{
1022	struct port_info *p;
1023	struct ifnet *ifp;
1024	int err, media_flags;
1025	struct adapter *sc;
1026
1027
1028	p = device_get_softc(dev);
1029	sc = p->adapter;
1030	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1031	    device_get_unit(device_get_parent(dev)), p->port_id);
1032	PORT_LOCK_INIT(p, p->lockbuf);
1033
1034	/* Allocate an ifnet object and set it up */
1035	ifp = p->ifp = if_alloc(IFT_ETHER);
1036	if (ifp == NULL) {
1037		device_printf(dev, "Cannot allocate ifnet\n");
1038		return (ENOMEM);
1039	}
1040
1041	/*
1042	 * Note that there is currently no watchdog timer.
1043	 */
1044	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1045	ifp->if_init = cxgb_init;
1046	ifp->if_softc = p;
1047	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1048	ifp->if_ioctl = cxgb_ioctl;
1049	ifp->if_start = cxgb_start;
1050
1051
1052	ifp->if_timer = 0;	/* Disable ifnet watchdog */
1053	ifp->if_watchdog = NULL;
1054
1055	ifp->if_snd.ifq_drv_maxlen = cxgb_snd_queue_len;
1056	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
1057	IFQ_SET_READY(&ifp->if_snd);
1058
1059	ifp->if_hwassist = ifp->if_capabilities = ifp->if_capenable = 0;
1060	ifp->if_capabilities |= CXGB_CAP;
1061	ifp->if_capenable |= CXGB_CAP_ENABLE;
1062	ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO);
1063	/*
1064	 * disable TSO on 4-port - it isn't supported by the firmware yet
1065	 */
1066	if (p->adapter->params.nports > 2) {
1067		ifp->if_capabilities &= ~(IFCAP_TSO4 | IFCAP_TSO6);
1068		ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_TSO6);
1069		ifp->if_hwassist &= ~CSUM_TSO;
1070	}
1071
1072	ether_ifattach(ifp, p->hw_addr);
1073	ifp->if_transmit = cxgb_transmit;
1074	ifp->if_qflush = cxgb_qflush;
1075
1076	/*
1077	 * Only default to jumbo frames on 10GigE
1078	 */
1079	if (p->adapter->params.nports <= 2)
1080		ifp->if_mtu = ETHERMTU_JUMBO;
1081	if ((err = cxgb_makedev(p)) != 0) {
1082		printf("makedev failed %d\n", err);
1083		return (err);
1084	}
1085	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1086	    cxgb_media_status);
1087
1088	if (!strcmp(p->phy.desc,	"10GBASE-CX4")) {
1089		media_flags = IFM_ETHER | IFM_10G_CX4 | IFM_FDX;
1090	} else if (!strcmp(p->phy.desc, "10GBASE-SR")) {
1091		media_flags = IFM_ETHER | IFM_10G_SR | IFM_FDX;
1092	} else if (!strcmp(p->phy.desc, "10GBASE-R")) {
1093		media_flags = cxgb_ifm_type(p->phy.modtype);
1094	} else if (!strcmp(p->phy.desc, "10/100/1000BASE-T")) {
1095		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T, 0, NULL);
1096		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T | IFM_FDX,
1097			    0, NULL);
1098		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX,
1099			    0, NULL);
1100		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
1101			    0, NULL);
1102		ifmedia_add(&p->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
1103			    0, NULL);
1104		media_flags = 0;
1105	} else if (!strcmp(p->phy.desc, "1000BASE-X")) {
1106		/*
1107		 * XXX: This is not very accurate.  Fix when common code
1108		 * returns more specific value - eg 1000BASE-SX, LX, etc.
1109		 *
1110		 * XXX: In the meantime, don't lie. Consider setting IFM_AUTO
1111		 * instead of SX.
1112		 */
1113		media_flags = IFM_ETHER | IFM_1000_SX | IFM_FDX;
1114	} else {
1115	        printf("unsupported media type %s\n", p->phy.desc);
1116		return (ENXIO);
1117	}
1118	if (media_flags) {
1119		/*
1120		 * Note the modtype on which we based our flags.  If modtype
1121		 * changes, we'll redo the ifmedia for this ifp.  modtype may
1122		 * change when transceivers are plugged in/out, and in other
1123		 * situations.
1124		 */
1125		ifmedia_add(&p->media, media_flags, p->phy.modtype, NULL);
1126		ifmedia_set(&p->media, media_flags);
1127	} else {
1128		ifmedia_add(&p->media, IFM_ETHER | IFM_AUTO, 0, NULL);
1129		ifmedia_set(&p->media, IFM_ETHER | IFM_AUTO);
1130	}
1131
1132	t3_sge_init_port(p);
1133
1134	return (err);
1135}
1136
1137/*
1138 * cxgb_port_detach() is called via the device_detach methods when
1139 * cxgb_free() calls the bus_generic_detach.  It is responsible for
1140 * removing the device from the view of the kernel, i.e. from all
1141 * interfaces lists etc.  This routine is only called when the driver is
1142 * being unloaded, not when the link goes down.
1143 */
1144static int
1145cxgb_port_detach(device_t dev)
1146{
1147	struct port_info *p;
1148	struct adapter *sc;
1149	int i;
1150
1151	p = device_get_softc(dev);
1152	sc = p->adapter;
1153
1154	cxgb_begin_detach(p);
1155
1156	if (p->port_cdev != NULL)
1157		destroy_dev(p->port_cdev);
1158
1159	cxgb_uninit_synchronized(p);
1160	ether_ifdetach(p->ifp);
1161
1162	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1163		struct sge_qset *qs = &sc->sge.qs[i];
1164		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1165
1166		callout_drain(&txq->txq_watchdog);
1167		callout_drain(&txq->txq_timer);
1168	}
1169
1170	PORT_LOCK_DEINIT(p);
1171	if_free(p->ifp);
1172	p->ifp = NULL;
1173
1174	cxgb_end_op(p);
1175	return (0);
1176}
1177
1178void
1179t3_fatal_err(struct adapter *sc)
1180{
1181	u_int fw_status[4];
1182
1183	if (sc->flags & FULL_INIT_DONE) {
1184		t3_sge_stop(sc);
1185		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1186		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1187		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1188		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1189		t3_intr_disable(sc);
1190	}
1191	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1192	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1193		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1194		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1195}
1196
1197int
1198t3_os_find_pci_capability(adapter_t *sc, int cap)
1199{
1200	device_t dev;
1201	struct pci_devinfo *dinfo;
1202	pcicfgregs *cfg;
1203	uint32_t status;
1204	uint8_t ptr;
1205
1206	dev = sc->dev;
1207	dinfo = device_get_ivars(dev);
1208	cfg = &dinfo->cfg;
1209
1210	status = pci_read_config(dev, PCIR_STATUS, 2);
1211	if (!(status & PCIM_STATUS_CAPPRESENT))
1212		return (0);
1213
1214	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1215	case 0:
1216	case 1:
1217		ptr = PCIR_CAP_PTR;
1218		break;
1219	case 2:
1220		ptr = PCIR_CAP_PTR_2;
1221		break;
1222	default:
1223		return (0);
1224		break;
1225	}
1226	ptr = pci_read_config(dev, ptr, 1);
1227
1228	while (ptr != 0) {
1229		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1230			return (ptr);
1231		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1232	}
1233
1234	return (0);
1235}
1236
1237int
1238t3_os_pci_save_state(struct adapter *sc)
1239{
1240	device_t dev;
1241	struct pci_devinfo *dinfo;
1242
1243	dev = sc->dev;
1244	dinfo = device_get_ivars(dev);
1245
1246	pci_cfg_save(dev, dinfo, 0);
1247	return (0);
1248}
1249
1250int
1251t3_os_pci_restore_state(struct adapter *sc)
1252{
1253	device_t dev;
1254	struct pci_devinfo *dinfo;
1255
1256	dev = sc->dev;
1257	dinfo = device_get_ivars(dev);
1258
1259	pci_cfg_restore(dev, dinfo);
1260	return (0);
1261}
1262
1263/**
1264 *	t3_os_link_changed - handle link status changes
1265 *	@adapter: the adapter associated with the link change
1266 *	@port_id: the port index whose limk status has changed
1267 *	@link_status: the new status of the link
1268 *	@speed: the new speed setting
1269 *	@duplex: the new duplex setting
1270 *	@fc: the new flow-control setting
1271 *
1272 *	This is the OS-dependent handler for link status changes.  The OS
1273 *	neutral handler takes care of most of the processing for these events,
1274 *	then calls this handler for any OS-specific processing.
1275 */
1276void
1277t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1278     int duplex, int fc)
1279{
1280	struct port_info *pi = &adapter->port[port_id];
1281	struct ifnet *ifp = pi->ifp;
1282
1283	/* no race with detach, so ifp should always be good */
1284	KASSERT(ifp, ("%s: if detached.", __func__));
1285
1286	if (link_status) {
1287		ifp->if_baudrate = IF_Mbps(speed);
1288		if_link_state_change(ifp, LINK_STATE_UP);
1289	} else
1290		if_link_state_change(ifp, LINK_STATE_DOWN);
1291}
1292
1293/**
1294 *	t3_os_phymod_changed - handle PHY module changes
1295 *	@phy: the PHY reporting the module change
1296 *	@mod_type: new module type
1297 *
1298 *	This is the OS-dependent handler for PHY module changes.  It is
1299 *	invoked when a PHY module is removed or inserted for any OS-specific
1300 *	processing.
1301 */
1302void t3_os_phymod_changed(struct adapter *adap, int port_id)
1303{
1304	static const char *mod_str[] = {
1305		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX", "unknown"
1306	};
1307
1308	struct port_info *pi = &adap->port[port_id];
1309
1310	if (pi->phy.modtype == phy_modtype_none)
1311		device_printf(adap->dev, "PHY module unplugged\n");
1312	else {
1313		KASSERT(pi->phy.modtype < ARRAY_SIZE(mod_str),
1314		    ("invalid PHY module type %d", pi->phy.modtype));
1315		device_printf(adap->dev, "%s PHY module inserted\n",
1316		    mod_str[pi->phy.modtype]);
1317	}
1318}
1319
1320/*
1321 * Interrupt-context handler for external (PHY) interrupts.
1322 */
1323void
1324t3_os_ext_intr_handler(adapter_t *sc)
1325{
1326	if (cxgb_debug)
1327		printf("t3_os_ext_intr_handler\n");
1328	/*
1329	 * Schedule a task to handle external interrupts as they may be slow
1330	 * and we use a mutex to protect MDIO registers.  We disable PHY
1331	 * interrupts in the meantime and let the task reenable them when
1332	 * it's done.
1333	 */
1334	if (sc->slow_intr_mask) {
1335		ADAPTER_LOCK(sc);
1336		sc->slow_intr_mask &= ~F_T3DBG;
1337		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1338		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
1339		ADAPTER_UNLOCK(sc);
1340	}
1341}
1342
1343void
1344t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1345{
1346
1347	/*
1348	 * The ifnet might not be allocated before this gets called,
1349	 * as this is called early on in attach by t3_prep_adapter
1350	 * save the address off in the port structure
1351	 */
1352	if (cxgb_debug)
1353		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1354	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1355}
1356
1357/*
1358 * Programs the XGMAC based on the settings in the ifnet.  These settings
1359 * include MTU, MAC address, mcast addresses, etc.
1360 */
1361static void
1362cxgb_update_mac_settings(struct port_info *p)
1363{
1364	struct ifnet *ifp = p->ifp;
1365	struct t3_rx_mode rm;
1366	struct cmac *mac = &p->mac;
1367	int mtu, hwtagging;
1368
1369	PORT_LOCK_ASSERT_OWNED(p);
1370
1371	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1372
1373	mtu = ifp->if_mtu;
1374	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1375		mtu += ETHER_VLAN_ENCAP_LEN;
1376
1377	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1378
1379	t3_mac_set_mtu(mac, mtu);
1380	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1381	t3_mac_set_address(mac, 0, p->hw_addr);
1382	t3_init_rx_mode(&rm, p);
1383	t3_mac_set_rx_mode(mac, &rm);
1384}
1385
1386
1387static int
1388await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1389			      unsigned long n)
1390{
1391	int attempts = 5;
1392
1393	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1394		if (!--attempts)
1395			return (ETIMEDOUT);
1396		t3_os_sleep(10);
1397	}
1398	return 0;
1399}
1400
1401static int
1402init_tp_parity(struct adapter *adap)
1403{
1404	int i;
1405	struct mbuf *m;
1406	struct cpl_set_tcb_field *greq;
1407	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1408
1409	t3_tp_set_offload_mode(adap, 1);
1410
1411	for (i = 0; i < 16; i++) {
1412		struct cpl_smt_write_req *req;
1413
1414		m = m_gethdr(M_WAITOK, MT_DATA);
1415		req = mtod(m, struct cpl_smt_write_req *);
1416		m->m_len = m->m_pkthdr.len = sizeof(*req);
1417		memset(req, 0, sizeof(*req));
1418		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1419		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1420		req->iff = i;
1421		t3_mgmt_tx(adap, m);
1422	}
1423
1424	for (i = 0; i < 2048; i++) {
1425		struct cpl_l2t_write_req *req;
1426
1427		m = m_gethdr(M_WAITOK, MT_DATA);
1428		req = mtod(m, struct cpl_l2t_write_req *);
1429		m->m_len = m->m_pkthdr.len = sizeof(*req);
1430		memset(req, 0, sizeof(*req));
1431		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1432		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1433		req->params = htonl(V_L2T_W_IDX(i));
1434		t3_mgmt_tx(adap, m);
1435	}
1436
1437	for (i = 0; i < 2048; i++) {
1438		struct cpl_rte_write_req *req;
1439
1440		m = m_gethdr(M_WAITOK, MT_DATA);
1441		req = mtod(m, struct cpl_rte_write_req *);
1442		m->m_len = m->m_pkthdr.len = sizeof(*req);
1443		memset(req, 0, sizeof(*req));
1444		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1445		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1446		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1447		t3_mgmt_tx(adap, m);
1448	}
1449
1450	m = m_gethdr(M_WAITOK, MT_DATA);
1451	greq = mtod(m, struct cpl_set_tcb_field *);
1452	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1453	memset(greq, 0, sizeof(*greq));
1454	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1455	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1456	greq->mask = htobe64(1);
1457	t3_mgmt_tx(adap, m);
1458
1459	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1460	t3_tp_set_offload_mode(adap, 0);
1461	return (i);
1462}
1463
1464/**
1465 *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1466 *	@adap: the adapter
1467 *
1468 *	Sets up RSS to distribute packets to multiple receive queues.  We
1469 *	configure the RSS CPU lookup table to distribute to the number of HW
1470 *	receive queues, and the response queue lookup table to narrow that
1471 *	down to the response queues actually configured for each port.
1472 *	We always configure the RSS mapping for two ports since the mapping
1473 *	table has plenty of entries.
1474 */
1475static void
1476setup_rss(adapter_t *adap)
1477{
1478	int i;
1479	u_int nq[2];
1480	uint8_t cpus[SGE_QSETS + 1];
1481	uint16_t rspq_map[RSS_TABLE_SIZE];
1482
1483	for (i = 0; i < SGE_QSETS; ++i)
1484		cpus[i] = i;
1485	cpus[SGE_QSETS] = 0xff;
1486
1487	nq[0] = nq[1] = 0;
1488	for_each_port(adap, i) {
1489		const struct port_info *pi = adap2pinfo(adap, i);
1490
1491		nq[pi->tx_chan] += pi->nqsets;
1492	}
1493	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1494		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1495		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1496	}
1497	/* Calculate the reverse RSS map table */
1498	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1499		if (adap->rrss_map[rspq_map[i]] == 0xff)
1500			adap->rrss_map[rspq_map[i]] = i;
1501
1502	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1503		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1504	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1505	              cpus, rspq_map);
1506
1507}
1508
1509/*
1510 * Sends an mbuf to an offload queue driver
1511 * after dealing with any active network taps.
1512 */
1513static inline int
1514offload_tx(struct t3cdev *tdev, struct mbuf *m)
1515{
1516	int ret;
1517
1518	ret = t3_offload_tx(tdev, m);
1519	return (ret);
1520}
1521
1522static int
1523write_smt_entry(struct adapter *adapter, int idx)
1524{
1525	struct port_info *pi = &adapter->port[idx];
1526	struct cpl_smt_write_req *req;
1527	struct mbuf *m;
1528
1529	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1530		return (ENOMEM);
1531
1532	req = mtod(m, struct cpl_smt_write_req *);
1533	m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1534
1535	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1536	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1537	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1538	req->iff = idx;
1539	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1540	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1541
1542	m_set_priority(m, 1);
1543
1544	offload_tx(&adapter->tdev, m);
1545
1546	return (0);
1547}
1548
1549static int
1550init_smt(struct adapter *adapter)
1551{
1552	int i;
1553
1554	for_each_port(adapter, i)
1555		write_smt_entry(adapter, i);
1556	return 0;
1557}
1558
1559static void
1560init_port_mtus(adapter_t *adapter)
1561{
1562	unsigned int mtus = ETHERMTU | (ETHERMTU << 16);
1563
1564	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1565}
1566
1567static void
1568send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1569			      int hi, int port)
1570{
1571	struct mbuf *m;
1572	struct mngt_pktsched_wr *req;
1573
1574	m = m_gethdr(M_DONTWAIT, MT_DATA);
1575	if (m) {
1576		req = mtod(m, struct mngt_pktsched_wr *);
1577		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1578		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1579		req->sched = sched;
1580		req->idx = qidx;
1581		req->min = lo;
1582		req->max = hi;
1583		req->binding = port;
1584		m->m_len = m->m_pkthdr.len = sizeof(*req);
1585		t3_mgmt_tx(adap, m);
1586	}
1587}
1588
1589static void
1590bind_qsets(adapter_t *sc)
1591{
1592	int i, j;
1593
1594	for (i = 0; i < (sc)->params.nports; ++i) {
1595		const struct port_info *pi = adap2pinfo(sc, i);
1596
1597		for (j = 0; j < pi->nqsets; ++j) {
1598			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1599					  -1, pi->tx_chan);
1600
1601		}
1602	}
1603}
1604
1605static void
1606update_tpeeprom(struct adapter *adap)
1607{
1608#ifdef FIRMWARE_LATEST
1609	const struct firmware *tpeeprom;
1610#else
1611	struct firmware *tpeeprom;
1612#endif
1613
1614	uint32_t version;
1615	unsigned int major, minor;
1616	int ret, len;
1617	char rev, name[32];
1618
1619	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1620
1621	major = G_TP_VERSION_MAJOR(version);
1622	minor = G_TP_VERSION_MINOR(version);
1623	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1624		return;
1625
1626	rev = t3rev2char(adap);
1627	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1628
1629	tpeeprom = firmware_get(name);
1630	if (tpeeprom == NULL) {
1631		device_printf(adap->dev,
1632			      "could not load TP EEPROM: unable to load %s\n",
1633			      name);
1634		return;
1635	}
1636
1637	len = tpeeprom->datasize - 4;
1638
1639	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1640	if (ret)
1641		goto release_tpeeprom;
1642
1643	if (len != TP_SRAM_LEN) {
1644		device_printf(adap->dev,
1645			      "%s length is wrong len=%d expected=%d\n", name,
1646			      len, TP_SRAM_LEN);
1647		return;
1648	}
1649
1650	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1651	    TP_SRAM_OFFSET);
1652
1653	if (!ret) {
1654		device_printf(adap->dev,
1655			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1656			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1657	} else
1658		device_printf(adap->dev,
1659			      "Protocol SRAM image update in EEPROM failed\n");
1660
1661release_tpeeprom:
1662	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1663
1664	return;
1665}
1666
1667static int
1668update_tpsram(struct adapter *adap)
1669{
1670#ifdef FIRMWARE_LATEST
1671	const struct firmware *tpsram;
1672#else
1673	struct firmware *tpsram;
1674#endif
1675	int ret;
1676	char rev, name[32];
1677
1678	rev = t3rev2char(adap);
1679	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1680
1681	update_tpeeprom(adap);
1682
1683	tpsram = firmware_get(name);
1684	if (tpsram == NULL){
1685		device_printf(adap->dev, "could not load TP SRAM\n");
1686		return (EINVAL);
1687	} else
1688		device_printf(adap->dev, "updating TP SRAM\n");
1689
1690	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1691	if (ret)
1692		goto release_tpsram;
1693
1694	ret = t3_set_proto_sram(adap, tpsram->data);
1695	if (ret)
1696		device_printf(adap->dev, "loading protocol SRAM failed\n");
1697
1698release_tpsram:
1699	firmware_put(tpsram, FIRMWARE_UNLOAD);
1700
1701	return ret;
1702}
1703
1704/**
1705 *	cxgb_up - enable the adapter
1706 *	@adap: adapter being enabled
1707 *
1708 *	Called when the first port is enabled, this function performs the
1709 *	actions necessary to make an adapter operational, such as completing
1710 *	the initialization of HW modules, and enabling interrupts.
1711 */
1712static int
1713cxgb_up(struct adapter *sc)
1714{
1715	int err = 0;
1716
1717	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1718	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1719					   __func__, sc->open_device_map));
1720
1721	if ((sc->flags & FULL_INIT_DONE) == 0) {
1722
1723		if ((sc->flags & FW_UPTODATE) == 0)
1724			if ((err = upgrade_fw(sc)))
1725				goto out;
1726
1727		if ((sc->flags & TPS_UPTODATE) == 0)
1728			if ((err = update_tpsram(sc)))
1729				goto out;
1730
1731		err = t3_init_hw(sc, 0);
1732		if (err)
1733			goto out;
1734
1735		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1736		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1737
1738		err = setup_sge_qsets(sc);
1739		if (err)
1740			goto out;
1741
1742		setup_rss(sc);
1743
1744		t3_intr_clear(sc);
1745		err = cxgb_setup_interrupts(sc);
1746		if (err)
1747			goto out;
1748
1749		t3_add_configured_sysctls(sc);
1750		sc->flags |= FULL_INIT_DONE;
1751	}
1752
1753	t3_intr_clear(sc);
1754	t3_sge_start(sc);
1755	t3_intr_enable(sc);
1756
1757	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1758	    is_offload(sc) && init_tp_parity(sc) == 0)
1759		sc->flags |= TP_PARITY_INIT;
1760
1761	if (sc->flags & TP_PARITY_INIT) {
1762		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1763		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1764	}
1765
1766	if (!(sc->flags & QUEUES_BOUND)) {
1767		bind_qsets(sc);
1768		sc->flags |= QUEUES_BOUND;
1769	}
1770
1771	t3_sge_reset_adapter(sc);
1772out:
1773	return (err);
1774}
1775
1776/*
1777 * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1778 * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1779 * during controller_detach, not here.
1780 */
1781static void
1782cxgb_down(struct adapter *sc)
1783{
1784	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1785
1786	t3_sge_stop(sc);
1787	t3_intr_disable(sc);
1788}
1789
1790static int
1791offload_open(struct port_info *pi)
1792{
1793	struct adapter *sc = pi->adapter;
1794	struct t3cdev *tdev = &sc->tdev;
1795
1796	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1797
1798	setbit(&sc->open_device_map, OFFLOAD_DEVMAP_BIT);
1799
1800	t3_tp_set_offload_mode(sc, 1);
1801	tdev->lldev = pi->ifp;
1802	init_port_mtus(sc);
1803	t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
1804		     sc->params.rev == 0 ?  sc->port[0].ifp->if_mtu : 0xffff);
1805	init_smt(sc);
1806	cxgb_add_clients(tdev);
1807
1808	return (0);
1809}
1810
1811static int
1812offload_close(struct t3cdev *tdev)
1813{
1814	struct adapter *adapter = tdev2adap(tdev);
1815
1816	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1817		return (0);
1818
1819	/* Call back all registered clients */
1820	cxgb_remove_clients(tdev);
1821
1822	tdev->lldev = NULL;
1823	cxgb_set_dummy_ops(tdev);
1824	t3_tp_set_offload_mode(adapter, 0);
1825
1826	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1827
1828	return (0);
1829}
1830
1831/*
1832 * Begin a synchronized operation.  If this call succeeds, it is guaranteed that
1833 * no one will remove the port or its ifp from underneath the caller.  Caller is
1834 * also granted exclusive access to open_device_map.
1835 *
1836 * operation here means init, uninit, detach, and ioctl service.
1837 *
1838 * May fail.
1839 * EINTR (ctrl-c pressed during ifconfig for example).
1840 * ENXIO (port is about to detach - due to kldunload for example).
1841 */
1842int
1843cxgb_begin_op(struct port_info *p, const char *wmsg)
1844{
1845	int rc = 0;
1846	struct adapter *sc = p->adapter;
1847
1848	ADAPTER_LOCK(sc);
1849
1850	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1851		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, wmsg, 0)) {
1852			rc = EINTR;
1853			goto done;
1854		}
1855	}
1856
1857	if (IS_DOOMED(p))
1858		rc = ENXIO;
1859	else if (!IS_BUSY(sc))
1860		SET_BUSY(sc);
1861	else {
1862		KASSERT(0, ("%s: port %d, p->flags = %x , sc->flags = %x",
1863			    __func__, p->port_id, p->flags, sc->flags));
1864		rc = EDOOFUS;
1865	}
1866
1867done:
1868	ADAPTER_UNLOCK(sc);
1869	return (rc);
1870}
1871
1872/*
1873 * End a synchronized operation.  Read comment block above cxgb_begin_op.
1874 */
1875int
1876cxgb_end_op(struct port_info *p)
1877{
1878	struct adapter *sc = p->adapter;
1879
1880	ADAPTER_LOCK(sc);
1881	KASSERT(IS_BUSY(sc), ("%s: not busy.", __func__));
1882	CLR_BUSY(sc);
1883	wakeup_one(&sc->flags);
1884	ADAPTER_UNLOCK(sc);
1885
1886	return (0);
1887}
1888
1889/*
1890 * Prepare for port detachment.  Detach is a special kind of synchronized
1891 * operation.  Also read comment before cxgb_begin_op.
1892 */
1893static int
1894cxgb_begin_detach(struct port_info *p)
1895{
1896	struct adapter *sc = p->adapter;
1897
1898	/*
1899	 * Inform those waiting for this port that it is going to be destroyed
1900	 * and they should not continue further.  (They'll return with ENXIO).
1901	 */
1902	ADAPTER_LOCK(sc);
1903	SET_DOOMED(p);
1904	wakeup(&sc->flags);
1905	ADAPTER_UNLOCK(sc);
1906
1907	/*
1908	 * Wait for in-progress operations.
1909	 */
1910	ADAPTER_LOCK(sc);
1911	while (IS_BUSY(sc)) {
1912		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1913	}
1914	SET_BUSY(sc);
1915	ADAPTER_UNLOCK(sc);
1916
1917	return (0);
1918}
1919
1920/*
1921 * if_init for cxgb ports.
1922 */
1923static void
1924cxgb_init(void *arg)
1925{
1926	struct port_info *p = arg;
1927
1928	if (cxgb_begin_op(p, "cxgbinit"))
1929		return;
1930
1931	cxgb_init_synchronized(p);
1932	cxgb_end_op(p);
1933}
1934
1935static int
1936cxgb_init_synchronized(struct port_info *p)
1937{
1938	struct adapter *sc = p->adapter;
1939	struct ifnet *ifp = p->ifp;
1940	struct cmac *mac = &p->mac;
1941	int i, rc;
1942
1943	if (sc->open_device_map == 0) {
1944		if ((rc = cxgb_up(sc)) != 0)
1945			return (rc);
1946
1947		if (is_offload(sc) && !ofld_disable && offload_open(p))
1948			log(LOG_WARNING,
1949			    "Could not initialize offload capabilities\n");
1950	}
1951
1952	PORT_LOCK(p);
1953	t3_port_intr_enable(sc, p->port_id);
1954	if (!mac->multiport)
1955		t3_mac_reset(mac);
1956	cxgb_update_mac_settings(p);
1957	t3_link_start(&p->phy, mac, &p->link_config);
1958	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1959	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1960	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1961	PORT_UNLOCK(p);
1962
1963	t3_link_changed(sc, p->port_id);
1964
1965	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1966		struct sge_qset *qs = &sc->sge.qs[i];
1967		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1968
1969		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1970				 txq->txq_watchdog.c_cpu);
1971	}
1972
1973	/* all ok */
1974	setbit(&sc->open_device_map, p->port_id);
1975
1976	return (0);
1977}
1978
1979/*
1980 * Called on "ifconfig down", and from port_detach
1981 */
1982static int
1983cxgb_uninit_synchronized(struct port_info *pi)
1984{
1985	struct adapter *sc = pi->adapter;
1986	struct ifnet *ifp = pi->ifp;
1987
1988	/*
1989	 * Clear this port's bit from the open device map, and then drain all
1990	 * the tasks that can access/manipulate this port's port_info or ifp.
1991	 * We disable this port's interrupts here and so the the slow/ext
1992	 * interrupt tasks won't be enqueued.  The tick task will continue to
1993	 * be enqueued every second but the runs after this drain will not see
1994	 * this port in the open device map.
1995	 *
1996	 * A well behaved task must take open_device_map into account and ignore
1997	 * ports that are not open.
1998	 */
1999	clrbit(&sc->open_device_map, pi->port_id);
2000	t3_port_intr_disable(sc, pi->port_id);
2001	taskqueue_drain(sc->tq, &sc->slow_intr_task);
2002	taskqueue_drain(sc->tq, &sc->ext_intr_task);
2003	taskqueue_drain(sc->tq, &sc->tick_task);
2004
2005	PORT_LOCK(pi);
2006	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2007
2008	/* disable pause frames */
2009	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
2010
2011	/* Reset RX FIFO HWM */
2012	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
2013			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
2014
2015	DELAY(100);
2016
2017	/* Wait for TXFIFO empty */
2018	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
2019			F_TXFIFO_EMPTY, 1, 20, 5);
2020
2021	DELAY(100);
2022	t3_mac_disable(&pi->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
2023
2024
2025	pi->phy.ops->power_down(&pi->phy, 1);
2026
2027	PORT_UNLOCK(pi);
2028
2029	pi->link_config.link_ok = 0;
2030	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0);
2031
2032	if ((sc->open_device_map & PORT_MASK) == 0)
2033		offload_close(&sc->tdev);
2034
2035	if (sc->open_device_map == 0)
2036		cxgb_down(pi->adapter);
2037
2038	return (0);
2039}
2040
2041#ifdef LRO_SUPPORTED
2042/*
2043 * Mark lro enabled or disabled in all qsets for this port
2044 */
2045static int
2046cxgb_set_lro(struct port_info *p, int enabled)
2047{
2048	int i;
2049	struct adapter *adp = p->adapter;
2050	struct sge_qset *q;
2051
2052	PORT_LOCK_ASSERT_OWNED(p);
2053	for (i = 0; i < p->nqsets; i++) {
2054		q = &adp->sge.qs[p->first_qset + i];
2055		q->lro.enabled = (enabled != 0);
2056	}
2057	return (0);
2058}
2059#endif
2060
2061static int
2062cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
2063{
2064	struct port_info *p = ifp->if_softc;
2065	struct ifreq *ifr = (struct ifreq *)data;
2066	int flags, error = 0, mtu, handle_unsynchronized = 0;
2067	uint32_t mask;
2068
2069	if ((error = cxgb_begin_op(p, "cxgbioct")) != 0)
2070		return (error);
2071
2072	/*
2073	 * Only commands that should be handled within begin-op/end-op are
2074	 * serviced in this switch statement.  See handle_unsynchronized.
2075	 */
2076	switch (command) {
2077	case SIOCSIFMTU:
2078		mtu = ifr->ifr_mtu;
2079		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
2080			error = EINVAL;
2081		} else {
2082			ifp->if_mtu = mtu;
2083			PORT_LOCK(p);
2084			cxgb_update_mac_settings(p);
2085			PORT_UNLOCK(p);
2086		}
2087
2088		break;
2089	case SIOCSIFFLAGS:
2090		if (ifp->if_flags & IFF_UP) {
2091			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2092				flags = p->if_flags;
2093				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
2094				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
2095					PORT_LOCK(p);
2096					cxgb_update_mac_settings(p);
2097					PORT_UNLOCK(p);
2098				}
2099			} else
2100				error = cxgb_init_synchronized(p);
2101			p->if_flags = ifp->if_flags;
2102		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2103			error = cxgb_uninit_synchronized(p);
2104
2105		break;
2106	case SIOCADDMULTI:
2107	case SIOCDELMULTI:
2108		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2109			PORT_LOCK(p);
2110			cxgb_update_mac_settings(p);
2111			PORT_UNLOCK(p);
2112		}
2113
2114		break;
2115	case SIOCSIFCAP:
2116		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2117		if (mask & IFCAP_TXCSUM) {
2118			if (IFCAP_TXCSUM & ifp->if_capenable) {
2119				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
2120				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
2121				    | CSUM_IP | CSUM_TSO);
2122			} else {
2123				ifp->if_capenable |= IFCAP_TXCSUM;
2124				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP
2125				    | CSUM_IP);
2126			}
2127		}
2128		if (mask & IFCAP_RXCSUM) {
2129			ifp->if_capenable ^= IFCAP_RXCSUM;
2130		}
2131		if (mask & IFCAP_TSO4) {
2132			if (IFCAP_TSO4 & ifp->if_capenable) {
2133				ifp->if_capenable &= ~IFCAP_TSO4;
2134				ifp->if_hwassist &= ~CSUM_TSO;
2135			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
2136				ifp->if_capenable |= IFCAP_TSO4;
2137				ifp->if_hwassist |= CSUM_TSO;
2138			} else
2139				error = EINVAL;
2140		}
2141#ifdef LRO_SUPPORTED
2142		if (mask & IFCAP_LRO) {
2143			ifp->if_capenable ^= IFCAP_LRO;
2144
2145			/* Safe to do this even if cxgb_up not called yet */
2146			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2147		}
2148#endif
2149		if (mask & IFCAP_VLAN_HWTAGGING) {
2150			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2151			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2152				PORT_LOCK(p);
2153				cxgb_update_mac_settings(p);
2154				PORT_UNLOCK(p);
2155			}
2156		}
2157		if (mask & IFCAP_VLAN_MTU) {
2158			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2159			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2160				PORT_LOCK(p);
2161				cxgb_update_mac_settings(p);
2162				PORT_UNLOCK(p);
2163			}
2164		}
2165		if (mask & IFCAP_VLAN_HWCSUM) {
2166			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2167		}
2168
2169#ifdef VLAN_CAPABILITIES
2170		VLAN_CAPABILITIES(ifp);
2171#endif
2172		break;
2173	default:
2174		handle_unsynchronized = 1;
2175		break;
2176	}
2177
2178	/*
2179	 * We don't want to call anything outside the driver while inside a
2180	 * begin-op/end-op block.  If it calls us back (eg.  ether_ioctl may
2181	 * call cxgb_init) we may deadlock if the state is already marked busy.
2182	 *
2183	 * XXX: this probably opens a small race window with kldunload...
2184	 */
2185	cxgb_end_op(p);
2186
2187	/* The IS_DOOMED check is racy, we're clutching at straws here */
2188	if (handle_unsynchronized && !IS_DOOMED(p)) {
2189		if (command == SIOCSIFMEDIA || command == SIOCGIFMEDIA)
2190			error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2191		else
2192			error = ether_ioctl(ifp, command, data);
2193	}
2194
2195	return (error);
2196}
2197
2198static int
2199cxgb_media_change(struct ifnet *ifp)
2200{
2201	if_printf(ifp, "media change not supported\n");
2202	return (ENXIO);
2203}
2204
2205/*
2206 * Translates from phy->modtype to IFM_TYPE.
2207 */
2208static int
2209cxgb_ifm_type(int phymod)
2210{
2211	int rc = IFM_ETHER | IFM_FDX;
2212
2213	switch (phymod) {
2214	case phy_modtype_sr:
2215		rc |= IFM_10G_SR;
2216		break;
2217	case phy_modtype_lr:
2218		rc |= IFM_10G_LR;
2219		break;
2220	case phy_modtype_lrm:
2221#ifdef IFM_10G_LRM
2222		rc |= IFM_10G_LRM;
2223#endif
2224		break;
2225	case phy_modtype_twinax:
2226#ifdef IFM_10G_TWINAX
2227		rc |= IFM_10G_TWINAX;
2228#endif
2229		break;
2230	case phy_modtype_twinax_long:
2231#ifdef IFM_10G_TWINAX_LONG
2232		rc |= IFM_10G_TWINAX_LONG;
2233#endif
2234		break;
2235	case phy_modtype_none:
2236		rc = IFM_ETHER | IFM_NONE;
2237		break;
2238	case phy_modtype_unknown:
2239		break;
2240	}
2241
2242	return (rc);
2243}
2244
2245static void
2246cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2247{
2248	struct port_info *p = ifp->if_softc;
2249	struct ifmedia_entry *cur = p->media.ifm_cur;
2250	int m;
2251
2252	if (cur->ifm_data != p->phy.modtype) {
2253
2254		PORT_LOCK(p);
2255		m = cxgb_ifm_type(p->phy.modtype);
2256		ifmedia_removeall(&p->media);
2257		ifmedia_add(&p->media, m, p->phy.modtype, NULL);
2258		ifmedia_set(&p->media, m);
2259		cur = p->media.ifm_cur; /* ifmedia_set modified ifm_cur */
2260		ifmr->ifm_current = m;
2261		PORT_UNLOCK(p);
2262	}
2263
2264	ifmr->ifm_status = IFM_AVALID;
2265	ifmr->ifm_active = IFM_ETHER;
2266
2267	if (!p->link_config.link_ok)
2268		return;
2269
2270	ifmr->ifm_status |= IFM_ACTIVE;
2271
2272	switch (p->link_config.speed) {
2273	case 10:
2274		ifmr->ifm_active |= IFM_10_T;
2275		break;
2276	case 100:
2277		ifmr->ifm_active |= IFM_100_TX;
2278			break;
2279	case 1000:
2280		ifmr->ifm_active |= IFM_1000_T;
2281		break;
2282	case 10000:
2283		ifmr->ifm_active |= IFM_SUBTYPE(cur->ifm_media);
2284		break;
2285	}
2286
2287	if (p->link_config.duplex)
2288		ifmr->ifm_active |= IFM_FDX;
2289	else
2290		ifmr->ifm_active |= IFM_HDX;
2291}
2292
2293static void
2294cxgb_async_intr(void *data)
2295{
2296	adapter_t *sc = data;
2297
2298	if (cxgb_debug)
2299		device_printf(sc->dev, "cxgb_async_intr\n");
2300	/*
2301	 * May need to sleep - defer to taskqueue
2302	 */
2303	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2304}
2305
2306static void
2307cxgb_ext_intr_handler(void *arg, int count)
2308{
2309	adapter_t *sc = (adapter_t *)arg;
2310
2311	if (cxgb_debug)
2312		printf("cxgb_ext_intr_handler\n");
2313
2314	t3_phy_intr_handler(sc);
2315
2316	/* Now reenable external interrupts */
2317	ADAPTER_LOCK(sc);
2318	if (sc->slow_intr_mask) {
2319		sc->slow_intr_mask |= F_T3DBG;
2320		t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
2321		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
2322	}
2323	ADAPTER_UNLOCK(sc);
2324}
2325
2326static void
2327check_link_status(adapter_t *sc)
2328{
2329	int i;
2330
2331	for (i = 0; i < (sc)->params.nports; ++i) {
2332		struct port_info *p = &sc->port[i];
2333
2334		if (!isset(&sc->open_device_map, p->port_id))
2335			continue;
2336
2337		if (p->link_fault || !(p->phy.caps & SUPPORTED_IRQ))
2338			t3_link_changed(sc, i);
2339	}
2340}
2341
2342static void
2343check_t3b2_mac(struct adapter *sc)
2344{
2345	int i;
2346
2347	if (sc->flags & CXGB_SHUTDOWN)
2348		return;
2349
2350	for_each_port(sc, i) {
2351		struct port_info *p = &sc->port[i];
2352		int status;
2353#ifdef INVARIANTS
2354		struct ifnet *ifp = p->ifp;
2355#endif
2356
2357		if (!isset(&sc->open_device_map, p->port_id))
2358			continue;
2359
2360		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2361			("%s: state mismatch (drv_flags %x, device_map %x)",
2362			 __func__, ifp->if_drv_flags, sc->open_device_map));
2363
2364		PORT_LOCK(p);
2365		status = t3b2_mac_watchdog_task(&p->mac);
2366		if (status == 1)
2367			p->mac.stats.num_toggled++;
2368		else if (status == 2) {
2369			struct cmac *mac = &p->mac;
2370
2371			cxgb_update_mac_settings(p);
2372			t3_link_start(&p->phy, mac, &p->link_config);
2373			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2374			t3_port_intr_enable(sc, p->port_id);
2375			p->mac.stats.num_resets++;
2376		}
2377		PORT_UNLOCK(p);
2378	}
2379}
2380
2381static void
2382cxgb_tick(void *arg)
2383{
2384	adapter_t *sc = (adapter_t *)arg;
2385
2386	if (sc->flags & CXGB_SHUTDOWN)
2387		return;
2388
2389	taskqueue_enqueue(sc->tq, &sc->tick_task);
2390	callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
2391}
2392
2393static void
2394cxgb_tick_handler(void *arg, int count)
2395{
2396	adapter_t *sc = (adapter_t *)arg;
2397	const struct adapter_params *p = &sc->params;
2398	int i;
2399	uint32_t cause, reset;
2400
2401	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2402		return;
2403
2404	check_link_status(sc);
2405	sc->check_task_cnt++;
2406
2407	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2408		check_t3b2_mac(sc);
2409
2410	cause = t3_read_reg(sc, A_SG_INT_CAUSE);
2411	reset = 0;
2412	if (cause & F_FLEMPTY) {
2413		struct sge_qset *qs = &sc->sge.qs[0];
2414
2415		i = 0;
2416		reset |= F_FLEMPTY;
2417
2418		cause = (t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) >>
2419			 S_FL0EMPTY) & 0xffff;
2420		while (cause) {
2421			qs->fl[i].empty += (cause & 1);
2422			if (i)
2423				qs++;
2424			i ^= 1;
2425			cause >>= 1;
2426		}
2427	}
2428	t3_write_reg(sc, A_SG_INT_CAUSE, reset);
2429
2430	for (i = 0; i < sc->params.nports; i++) {
2431		struct port_info *pi = &sc->port[i];
2432		struct ifnet *ifp = pi->ifp;
2433		struct cmac *mac = &pi->mac;
2434		struct mac_stats *mstats = &mac->stats;
2435
2436		if (!isset(&sc->open_device_map, pi->port_id))
2437			continue;
2438
2439		PORT_LOCK(pi);
2440		t3_mac_update_stats(mac);
2441		PORT_UNLOCK(pi);
2442
2443		ifp->if_opackets =
2444		    mstats->tx_frames_64 +
2445		    mstats->tx_frames_65_127 +
2446		    mstats->tx_frames_128_255 +
2447		    mstats->tx_frames_256_511 +
2448		    mstats->tx_frames_512_1023 +
2449		    mstats->tx_frames_1024_1518 +
2450		    mstats->tx_frames_1519_max;
2451
2452		ifp->if_ipackets =
2453		    mstats->rx_frames_64 +
2454		    mstats->rx_frames_65_127 +
2455		    mstats->rx_frames_128_255 +
2456		    mstats->rx_frames_256_511 +
2457		    mstats->rx_frames_512_1023 +
2458		    mstats->rx_frames_1024_1518 +
2459		    mstats->rx_frames_1519_max;
2460
2461		ifp->if_obytes = mstats->tx_octets;
2462		ifp->if_ibytes = mstats->rx_octets;
2463		ifp->if_omcasts = mstats->tx_mcast_frames;
2464		ifp->if_imcasts = mstats->rx_mcast_frames;
2465
2466		ifp->if_collisions =
2467		    mstats->tx_total_collisions;
2468
2469		ifp->if_iqdrops = mstats->rx_cong_drops;
2470
2471		ifp->if_oerrors =
2472		    mstats->tx_excess_collisions +
2473		    mstats->tx_underrun +
2474		    mstats->tx_len_errs +
2475		    mstats->tx_mac_internal_errs +
2476		    mstats->tx_excess_deferral +
2477		    mstats->tx_fcs_errs;
2478		ifp->if_ierrors =
2479		    mstats->rx_jabber +
2480		    mstats->rx_data_errs +
2481		    mstats->rx_sequence_errs +
2482		    mstats->rx_runt +
2483		    mstats->rx_too_long +
2484		    mstats->rx_mac_internal_errs +
2485		    mstats->rx_short +
2486		    mstats->rx_fcs_errs;
2487
2488		if (mac->multiport)
2489			continue;
2490
2491		/* Count rx fifo overflows, once per second */
2492		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2493		reset = 0;
2494		if (cause & F_RXFIFO_OVERFLOW) {
2495			mac->stats.rx_fifo_ovfl++;
2496			reset |= F_RXFIFO_OVERFLOW;
2497		}
2498		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2499	}
2500}
2501
2502static void
2503touch_bars(device_t dev)
2504{
2505	/*
2506	 * Don't enable yet
2507	 */
2508#if !defined(__LP64__) && 0
2509	u32 v;
2510
2511	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2512	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2513	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2514	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2515	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2516	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2517#endif
2518}
2519
2520static int
2521set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2522{
2523	uint8_t *buf;
2524	int err = 0;
2525	u32 aligned_offset, aligned_len, *p;
2526	struct adapter *adapter = pi->adapter;
2527
2528
2529	aligned_offset = offset & ~3;
2530	aligned_len = (len + (offset & 3) + 3) & ~3;
2531
2532	if (aligned_offset != offset || aligned_len != len) {
2533		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2534		if (!buf)
2535			return (ENOMEM);
2536		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2537		if (!err && aligned_len > 4)
2538			err = t3_seeprom_read(adapter,
2539					      aligned_offset + aligned_len - 4,
2540					      (u32 *)&buf[aligned_len - 4]);
2541		if (err)
2542			goto out;
2543		memcpy(buf + (offset & 3), data, len);
2544	} else
2545		buf = (uint8_t *)(uintptr_t)data;
2546
2547	err = t3_seeprom_wp(adapter, 0);
2548	if (err)
2549		goto out;
2550
2551	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2552		err = t3_seeprom_write(adapter, aligned_offset, *p);
2553		aligned_offset += 4;
2554	}
2555
2556	if (!err)
2557		err = t3_seeprom_wp(adapter, 1);
2558out:
2559	if (buf != data)
2560		free(buf, M_DEVBUF);
2561	return err;
2562}
2563
2564
2565static int
2566in_range(int val, int lo, int hi)
2567{
2568	return val < 0 || (val <= hi && val >= lo);
2569}
2570
2571static int
2572cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2573{
2574       return (0);
2575}
2576
2577static int
2578cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2579{
2580       return (0);
2581}
2582
2583static int
2584cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2585    int fflag, struct thread *td)
2586{
2587	int mmd, error = 0;
2588	struct port_info *pi = dev->si_drv1;
2589	adapter_t *sc = pi->adapter;
2590
2591#ifdef PRIV_SUPPORTED
2592	if (priv_check(td, PRIV_DRIVER)) {
2593		if (cxgb_debug)
2594			printf("user does not have access to privileged ioctls\n");
2595		return (EPERM);
2596	}
2597#else
2598	if (suser(td)) {
2599		if (cxgb_debug)
2600			printf("user does not have access to privileged ioctls\n");
2601		return (EPERM);
2602	}
2603#endif
2604
2605	switch (cmd) {
2606	case CHELSIO_GET_MIIREG: {
2607		uint32_t val;
2608		struct cphy *phy = &pi->phy;
2609		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2610
2611		if (!phy->mdio_read)
2612			return (EOPNOTSUPP);
2613		if (is_10G(sc)) {
2614			mmd = mid->phy_id >> 8;
2615			if (!mmd)
2616				mmd = MDIO_DEV_PCS;
2617			else if (mmd > MDIO_DEV_VEND2)
2618				return (EINVAL);
2619
2620			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2621					     mid->reg_num, &val);
2622		} else
2623		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2624					     mid->reg_num & 0x1f, &val);
2625		if (error == 0)
2626			mid->val_out = val;
2627		break;
2628	}
2629	case CHELSIO_SET_MIIREG: {
2630		struct cphy *phy = &pi->phy;
2631		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2632
2633		if (!phy->mdio_write)
2634			return (EOPNOTSUPP);
2635		if (is_10G(sc)) {
2636			mmd = mid->phy_id >> 8;
2637			if (!mmd)
2638				mmd = MDIO_DEV_PCS;
2639			else if (mmd > MDIO_DEV_VEND2)
2640				return (EINVAL);
2641
2642			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2643					      mmd, mid->reg_num, mid->val_in);
2644		} else
2645			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2646					      mid->reg_num & 0x1f,
2647					      mid->val_in);
2648		break;
2649	}
2650	case CHELSIO_SETREG: {
2651		struct ch_reg *edata = (struct ch_reg *)data;
2652		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2653			return (EFAULT);
2654		t3_write_reg(sc, edata->addr, edata->val);
2655		break;
2656	}
2657	case CHELSIO_GETREG: {
2658		struct ch_reg *edata = (struct ch_reg *)data;
2659		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2660			return (EFAULT);
2661		edata->val = t3_read_reg(sc, edata->addr);
2662		break;
2663	}
2664	case CHELSIO_GET_SGE_CONTEXT: {
2665		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2666		mtx_lock_spin(&sc->sge.reg_lock);
2667		switch (ecntxt->cntxt_type) {
2668		case CNTXT_TYPE_EGRESS:
2669			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2670			    ecntxt->data);
2671			break;
2672		case CNTXT_TYPE_FL:
2673			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2674			    ecntxt->data);
2675			break;
2676		case CNTXT_TYPE_RSP:
2677			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2678			    ecntxt->data);
2679			break;
2680		case CNTXT_TYPE_CQ:
2681			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2682			    ecntxt->data);
2683			break;
2684		default:
2685			error = EINVAL;
2686			break;
2687		}
2688		mtx_unlock_spin(&sc->sge.reg_lock);
2689		break;
2690	}
2691	case CHELSIO_GET_SGE_DESC: {
2692		struct ch_desc *edesc = (struct ch_desc *)data;
2693		int ret;
2694		if (edesc->queue_num >= SGE_QSETS * 6)
2695			return (EINVAL);
2696		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2697		    edesc->queue_num % 6, edesc->idx, edesc->data);
2698		if (ret < 0)
2699			return (EINVAL);
2700		edesc->size = ret;
2701		break;
2702	}
2703	case CHELSIO_GET_QSET_PARAMS: {
2704		struct qset_params *q;
2705		struct ch_qset_params *t = (struct ch_qset_params *)data;
2706		int q1 = pi->first_qset;
2707		int nqsets = pi->nqsets;
2708		int i;
2709
2710		if (t->qset_idx >= nqsets)
2711			return EINVAL;
2712
2713		i = q1 + t->qset_idx;
2714		q = &sc->params.sge.qset[i];
2715		t->rspq_size   = q->rspq_size;
2716		t->txq_size[0] = q->txq_size[0];
2717		t->txq_size[1] = q->txq_size[1];
2718		t->txq_size[2] = q->txq_size[2];
2719		t->fl_size[0]  = q->fl_size;
2720		t->fl_size[1]  = q->jumbo_size;
2721		t->polling     = q->polling;
2722		t->lro         = q->lro;
2723		t->intr_lat    = q->coalesce_usecs;
2724		t->cong_thres  = q->cong_thres;
2725		t->qnum        = i;
2726
2727		if (sc->flags & USING_MSIX)
2728			t->vector = rman_get_start(sc->msix_irq_res[i]);
2729		else
2730			t->vector = rman_get_start(sc->irq_res);
2731
2732		break;
2733	}
2734	case CHELSIO_GET_QSET_NUM: {
2735		struct ch_reg *edata = (struct ch_reg *)data;
2736		edata->val = pi->nqsets;
2737		break;
2738	}
2739	case CHELSIO_LOAD_FW: {
2740		uint8_t *fw_data;
2741		uint32_t vers;
2742		struct ch_mem_range *t = (struct ch_mem_range *)data;
2743
2744		/*
2745		 * You're allowed to load a firmware only before FULL_INIT_DONE
2746		 *
2747		 * FW_UPTODATE is also set so the rest of the initialization
2748		 * will not overwrite what was loaded here.  This gives you the
2749		 * flexibility to load any firmware (and maybe shoot yourself in
2750		 * the foot).
2751		 */
2752
2753		ADAPTER_LOCK(sc);
2754		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2755			ADAPTER_UNLOCK(sc);
2756			return (EBUSY);
2757		}
2758
2759		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2760		if (!fw_data)
2761			error = ENOMEM;
2762		else
2763			error = copyin(t->buf, fw_data, t->len);
2764
2765		if (!error)
2766			error = -t3_load_fw(sc, fw_data, t->len);
2767
2768		if (t3_get_fw_version(sc, &vers) == 0) {
2769			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2770			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2771			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2772		}
2773
2774		if (!error)
2775			sc->flags |= FW_UPTODATE;
2776
2777		free(fw_data, M_DEVBUF);
2778		ADAPTER_UNLOCK(sc);
2779		break;
2780	}
2781	case CHELSIO_LOAD_BOOT: {
2782		uint8_t *boot_data;
2783		struct ch_mem_range *t = (struct ch_mem_range *)data;
2784
2785		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2786		if (!boot_data)
2787			return ENOMEM;
2788
2789		error = copyin(t->buf, boot_data, t->len);
2790		if (!error)
2791			error = -t3_load_boot(sc, boot_data, t->len);
2792
2793		free(boot_data, M_DEVBUF);
2794		break;
2795	}
2796	case CHELSIO_GET_PM: {
2797		struct ch_pm *m = (struct ch_pm *)data;
2798		struct tp_params *p = &sc->params.tp;
2799
2800		if (!is_offload(sc))
2801			return (EOPNOTSUPP);
2802
2803		m->tx_pg_sz = p->tx_pg_size;
2804		m->tx_num_pg = p->tx_num_pgs;
2805		m->rx_pg_sz  = p->rx_pg_size;
2806		m->rx_num_pg = p->rx_num_pgs;
2807		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2808
2809		break;
2810	}
2811	case CHELSIO_SET_PM: {
2812		struct ch_pm *m = (struct ch_pm *)data;
2813		struct tp_params *p = &sc->params.tp;
2814
2815		if (!is_offload(sc))
2816			return (EOPNOTSUPP);
2817		if (sc->flags & FULL_INIT_DONE)
2818			return (EBUSY);
2819
2820		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2821		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2822			return (EINVAL);	/* not power of 2 */
2823		if (!(m->rx_pg_sz & 0x14000))
2824			return (EINVAL);	/* not 16KB or 64KB */
2825		if (!(m->tx_pg_sz & 0x1554000))
2826			return (EINVAL);
2827		if (m->tx_num_pg == -1)
2828			m->tx_num_pg = p->tx_num_pgs;
2829		if (m->rx_num_pg == -1)
2830			m->rx_num_pg = p->rx_num_pgs;
2831		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2832			return (EINVAL);
2833		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2834		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2835			return (EINVAL);
2836
2837		p->rx_pg_size = m->rx_pg_sz;
2838		p->tx_pg_size = m->tx_pg_sz;
2839		p->rx_num_pgs = m->rx_num_pg;
2840		p->tx_num_pgs = m->tx_num_pg;
2841		break;
2842	}
2843	case CHELSIO_SETMTUTAB: {
2844		struct ch_mtus *m = (struct ch_mtus *)data;
2845		int i;
2846
2847		if (!is_offload(sc))
2848			return (EOPNOTSUPP);
2849		if (offload_running(sc))
2850			return (EBUSY);
2851		if (m->nmtus != NMTUS)
2852			return (EINVAL);
2853		if (m->mtus[0] < 81)         /* accommodate SACK */
2854			return (EINVAL);
2855
2856		/*
2857		 * MTUs must be in ascending order
2858		 */
2859		for (i = 1; i < NMTUS; ++i)
2860			if (m->mtus[i] < m->mtus[i - 1])
2861				return (EINVAL);
2862
2863		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2864		break;
2865	}
2866	case CHELSIO_GETMTUTAB: {
2867		struct ch_mtus *m = (struct ch_mtus *)data;
2868
2869		if (!is_offload(sc))
2870			return (EOPNOTSUPP);
2871
2872		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2873		m->nmtus = NMTUS;
2874		break;
2875	}
2876	case CHELSIO_GET_MEM: {
2877		struct ch_mem_range *t = (struct ch_mem_range *)data;
2878		struct mc7 *mem;
2879		uint8_t *useraddr;
2880		u64 buf[32];
2881
2882		/*
2883		 * Use these to avoid modifying len/addr in the the return
2884		 * struct
2885		 */
2886		uint32_t len = t->len, addr = t->addr;
2887
2888		if (!is_offload(sc))
2889			return (EOPNOTSUPP);
2890		if (!(sc->flags & FULL_INIT_DONE))
2891			return (EIO);         /* need the memory controllers */
2892		if ((addr & 0x7) || (len & 0x7))
2893			return (EINVAL);
2894		if (t->mem_id == MEM_CM)
2895			mem = &sc->cm;
2896		else if (t->mem_id == MEM_PMRX)
2897			mem = &sc->pmrx;
2898		else if (t->mem_id == MEM_PMTX)
2899			mem = &sc->pmtx;
2900		else
2901			return (EINVAL);
2902
2903		/*
2904		 * Version scheme:
2905		 * bits 0..9: chip version
2906		 * bits 10..15: chip revision
2907		 */
2908		t->version = 3 | (sc->params.rev << 10);
2909
2910		/*
2911		 * Read 256 bytes at a time as len can be large and we don't
2912		 * want to use huge intermediate buffers.
2913		 */
2914		useraddr = (uint8_t *)t->buf;
2915		while (len) {
2916			unsigned int chunk = min(len, sizeof(buf));
2917
2918			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2919			if (error)
2920				return (-error);
2921			if (copyout(buf, useraddr, chunk))
2922				return (EFAULT);
2923			useraddr += chunk;
2924			addr += chunk;
2925			len -= chunk;
2926		}
2927		break;
2928	}
2929	case CHELSIO_READ_TCAM_WORD: {
2930		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2931
2932		if (!is_offload(sc))
2933			return (EOPNOTSUPP);
2934		if (!(sc->flags & FULL_INIT_DONE))
2935			return (EIO);         /* need MC5 */
2936		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2937		break;
2938	}
2939	case CHELSIO_SET_TRACE_FILTER: {
2940		struct ch_trace *t = (struct ch_trace *)data;
2941		const struct trace_params *tp;
2942
2943		tp = (const struct trace_params *)&t->sip;
2944		if (t->config_tx)
2945			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2946					       t->trace_tx);
2947		if (t->config_rx)
2948			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2949					       t->trace_rx);
2950		break;
2951	}
2952	case CHELSIO_SET_PKTSCHED: {
2953		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2954		if (sc->open_device_map == 0)
2955			return (EAGAIN);
2956		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2957		    p->binding);
2958		break;
2959	}
2960	case CHELSIO_IFCONF_GETREGS: {
2961		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2962		int reglen = cxgb_get_regs_len();
2963		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2964		if (buf == NULL) {
2965			return (ENOMEM);
2966		}
2967		if (regs->len > reglen)
2968			regs->len = reglen;
2969		else if (regs->len < reglen)
2970			error = ENOBUFS;
2971
2972		if (!error) {
2973			cxgb_get_regs(sc, regs, buf);
2974			error = copyout(buf, regs->data, reglen);
2975		}
2976		free(buf, M_DEVBUF);
2977
2978		break;
2979	}
2980	case CHELSIO_SET_HW_SCHED: {
2981		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2982		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2983
2984		if ((sc->flags & FULL_INIT_DONE) == 0)
2985			return (EAGAIN);       /* need TP to be initialized */
2986		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2987		    !in_range(t->channel, 0, 1) ||
2988		    !in_range(t->kbps, 0, 10000000) ||
2989		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2990		    !in_range(t->flow_ipg, 0,
2991			      dack_ticks_to_usec(sc, 0x7ff)))
2992			return (EINVAL);
2993
2994		if (t->kbps >= 0) {
2995			error = t3_config_sched(sc, t->kbps, t->sched);
2996			if (error < 0)
2997				return (-error);
2998		}
2999		if (t->class_ipg >= 0)
3000			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
3001		if (t->flow_ipg >= 0) {
3002			t->flow_ipg *= 1000;     /* us -> ns */
3003			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
3004		}
3005		if (t->mode >= 0) {
3006			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
3007
3008			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
3009					 bit, t->mode ? bit : 0);
3010		}
3011		if (t->channel >= 0)
3012			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
3013					 1 << t->sched, t->channel << t->sched);
3014		break;
3015	}
3016	case CHELSIO_GET_EEPROM: {
3017		int i;
3018		struct ch_eeprom *e = (struct ch_eeprom *)data;
3019		uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
3020
3021		if (buf == NULL) {
3022			return (ENOMEM);
3023		}
3024		e->magic = EEPROM_MAGIC;
3025		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
3026			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
3027
3028		if (!error)
3029			error = copyout(buf + e->offset, e->data, e->len);
3030
3031		free(buf, M_DEVBUF);
3032		break;
3033	}
3034	case CHELSIO_CLEAR_STATS: {
3035		if (!(sc->flags & FULL_INIT_DONE))
3036			return EAGAIN;
3037
3038		PORT_LOCK(pi);
3039		t3_mac_update_stats(&pi->mac);
3040		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3041		PORT_UNLOCK(pi);
3042		break;
3043	}
3044	case CHELSIO_GET_UP_LA: {
3045		struct ch_up_la *la = (struct ch_up_la *)data;
3046		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3047		if (buf == NULL) {
3048			return (ENOMEM);
3049		}
3050		if (la->bufsize < LA_BUFSIZE)
3051			error = ENOBUFS;
3052
3053		if (!error)
3054			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3055					      &la->bufsize, buf);
3056		if (!error)
3057			error = copyout(buf, la->data, la->bufsize);
3058
3059		free(buf, M_DEVBUF);
3060		break;
3061	}
3062	case CHELSIO_GET_UP_IOQS: {
3063		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3064		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3065		uint32_t *v;
3066
3067		if (buf == NULL) {
3068			return (ENOMEM);
3069		}
3070		if (ioqs->bufsize < IOQS_BUFSIZE)
3071			error = ENOBUFS;
3072
3073		if (!error)
3074			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3075
3076		if (!error) {
3077			v = (uint32_t *)buf;
3078
3079			ioqs->bufsize -= 4 * sizeof(uint32_t);
3080			ioqs->ioq_rx_enable = *v++;
3081			ioqs->ioq_tx_enable = *v++;
3082			ioqs->ioq_rx_status = *v++;
3083			ioqs->ioq_tx_status = *v++;
3084
3085			error = copyout(v, ioqs->data, ioqs->bufsize);
3086		}
3087
3088		free(buf, M_DEVBUF);
3089		break;
3090	}
3091	default:
3092		return (EOPNOTSUPP);
3093		break;
3094	}
3095
3096	return (error);
3097}
3098
3099static __inline void
3100reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3101    unsigned int end)
3102{
3103	uint32_t *p = (uint32_t *)(buf + start);
3104
3105	for ( ; start <= end; start += sizeof(uint32_t))
3106		*p++ = t3_read_reg(ap, start);
3107}
3108
3109#define T3_REGMAP_SIZE (3 * 1024)
3110static int
3111cxgb_get_regs_len(void)
3112{
3113	return T3_REGMAP_SIZE;
3114}
3115
3116static void
3117cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3118{
3119
3120	/*
3121	 * Version scheme:
3122	 * bits 0..9: chip version
3123	 * bits 10..15: chip revision
3124	 * bit 31: set for PCIe cards
3125	 */
3126	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3127
3128	/*
3129	 * We skip the MAC statistics registers because they are clear-on-read.
3130	 * Also reading multi-register stats would need to synchronize with the
3131	 * periodic mac stats accumulation.  Hard to justify the complexity.
3132	 */
3133	memset(buf, 0, cxgb_get_regs_len());
3134	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3135	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3136	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3137	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3138	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3139	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3140		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3141	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3142		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3143}
3144
3145
3146MODULE_DEPEND(if_cxgb, cxgb_t3fw, 1, 1, 1);
3147