cxgb_main.c revision 167746
1/**************************************************************************
2
3Copyright (c) 2007, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13    notice, this list of conditions and the following disclaimer in the
14    documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Chelsio Corporation nor the names of its
17    contributors may be used to endorse or promote products derived from
18    this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30POSSIBILITY OF SUCH DAMAGE.
31
32***************************************************************************/
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_main.c 167746 2007-03-20 21:43:32Z kmacy $");
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/kernel.h>
40#include <sys/bus.h>
41#include <sys/module.h>
42#include <sys/pciio.h>
43#include <sys/conf.h>
44#include <machine/bus.h>
45#include <machine/resource.h>
46#include <sys/bus_dma.h>
47#include <sys/rman.h>
48#include <sys/ioccom.h>
49#include <sys/mbuf.h>
50#include <sys/linker.h>
51#include <sys/firmware.h>
52#include <sys/socket.h>
53#include <sys/sockio.h>
54#include <sys/smp.h>
55#include <sys/sysctl.h>
56#include <sys/queue.h>
57#include <sys/taskqueue.h>
58
59
60
61#include <net/bpf.h>
62#include <net/ethernet.h>
63#include <net/if.h>
64#include <net/if_arp.h>
65#include <net/if_dl.h>
66#include <net/if_media.h>
67#include <net/if_types.h>
68
69#include <netinet/in_systm.h>
70#include <netinet/in.h>
71#include <netinet/if_ether.h>
72#include <netinet/ip.h>
73#include <netinet/ip.h>
74#include <netinet/tcp.h>
75#include <netinet/udp.h>
76
77#include <dev/pci/pcireg.h>
78#include <dev/pci/pcivar.h>
79#include <dev/pci/pci_private.h>
80
81#include <dev/cxgb/cxgb_osdep.h>
82#include <dev/cxgb/common/cxgb_common.h>
83#include <dev/cxgb/cxgb_ioctl.h>
84#include <dev/cxgb/common/cxgb_regs.h>
85#include <dev/cxgb/common/cxgb_t3_cpl.h>
86#include <dev/cxgb/common/cxgb_firmware_exports.h>
87
88
89#ifdef PRIV_SUPPORTED
90#include <sys/priv.h>
91#endif
92
93static int cxgb_setup_msix(adapter_t *, int);
94static void cxgb_init(void *);
95static void cxgb_init_locked(struct port_info *);
96static void cxgb_stop_locked(struct port_info *);
97static void cxgb_set_rxmode(struct port_info *);
98static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
99static void cxgb_start(struct ifnet *);
100static void cxgb_start_proc(void *, int ncount);
101static int cxgb_media_change(struct ifnet *);
102static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
103static int setup_sge_qsets(adapter_t *);
104static void cxgb_async_intr(void *);
105static void cxgb_ext_intr_handler(void *, int);
106static void cxgb_tick(void *);
107static void setup_rss(adapter_t *sc);
108
109/* Attachment glue for the PCI controller end of the device.  Each port of
110 * the device is attached separately, as defined later.
111 */
112static int cxgb_controller_probe(device_t);
113static int cxgb_controller_attach(device_t);
114static int cxgb_controller_detach(device_t);
115static void cxgb_free(struct adapter *);
116static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
117    unsigned int end);
118static void cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf);
119static int cxgb_get_regs_len(void);
120
121static device_method_t cxgb_controller_methods[] = {
122	DEVMETHOD(device_probe,		cxgb_controller_probe),
123	DEVMETHOD(device_attach,	cxgb_controller_attach),
124	DEVMETHOD(device_detach,	cxgb_controller_detach),
125
126	/* bus interface */
127	DEVMETHOD(bus_print_child,	bus_generic_print_child),
128	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
129
130	{ 0, 0 }
131};
132
133static driver_t cxgb_controller_driver = {
134	"cxgbc",
135	cxgb_controller_methods,
136	sizeof(struct adapter)
137};
138
139static devclass_t	cxgb_controller_devclass;
140DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
141
142/*
143 * Attachment glue for the ports.  Attachment is done directly to the
144 * controller device.
145 */
146static int cxgb_port_probe(device_t);
147static int cxgb_port_attach(device_t);
148static int cxgb_port_detach(device_t);
149
150static device_method_t cxgb_port_methods[] = {
151	DEVMETHOD(device_probe,		cxgb_port_probe),
152	DEVMETHOD(device_attach,	cxgb_port_attach),
153	DEVMETHOD(device_detach,	cxgb_port_detach),
154	{ 0, 0 }
155};
156
157static driver_t cxgb_port_driver = {
158	"cxgb",
159	cxgb_port_methods,
160	0
161};
162
163static d_ioctl_t cxgb_extension_ioctl;
164
165static devclass_t	cxgb_port_devclass;
166DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
167
168#define SGE_MSIX_COUNT (SGE_QSETS + 1)
169
170/*
171 * The driver uses the best interrupt scheme available on a platform in the
172 * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
173 * of these schemes the driver may consider as follows:
174 *
175 * msi = 2: choose from among all three options
176 * msi = 1 : only consider MSI and pin interrupts
177 * msi = 0: force pin interrupts
178 */
179static int msi_allowed = 0;
180TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
181
182SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
183SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
184    "MSI-X, MSI, INTx selector");
185
186enum {
187	MAX_TXQ_ENTRIES      = 16384,
188	MAX_CTRL_TXQ_ENTRIES = 1024,
189	MAX_RSPQ_ENTRIES     = 16384,
190	MAX_RX_BUFFERS       = 16384,
191	MAX_RX_JUMBO_BUFFERS = 16384,
192	MIN_TXQ_ENTRIES      = 4,
193	MIN_CTRL_TXQ_ENTRIES = 4,
194	MIN_RSPQ_ENTRIES     = 32,
195	MIN_FL_ENTRIES       = 32
196};
197
198#define PORT_MASK ((1 << MAX_NPORTS) - 1)
199
200/* Table for probing the cards.  The desc field isn't actually used */
201struct cxgb_ident {
202	uint16_t	vendor;
203	uint16_t	device;
204	int		index;
205	char		*desc;
206} cxgb_identifiers[] = {
207	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
208	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
209	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
210	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
211	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
212	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
213	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
214	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
215	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
216	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
217	{0, 0, 0, NULL}
218};
219
220static struct cxgb_ident *
221cxgb_get_ident(device_t dev)
222{
223	struct cxgb_ident *id;
224
225	for (id = cxgb_identifiers; id->desc != NULL; id++) {
226		if ((id->vendor == pci_get_vendor(dev)) &&
227		    (id->device == pci_get_device(dev))) {
228			return (id);
229		}
230	}
231	return (NULL);
232}
233
234static const struct adapter_info *
235cxgb_get_adapter_info(device_t dev)
236{
237	struct cxgb_ident *id;
238	const struct adapter_info *ai;
239
240	id = cxgb_get_ident(dev);
241	if (id == NULL)
242		return (NULL);
243
244	ai = t3_get_adapter_info(id->index);
245
246	return (ai);
247}
248
249static int
250cxgb_controller_probe(device_t dev)
251{
252	const struct adapter_info *ai;
253	char *ports, buf[80];
254
255	ai = cxgb_get_adapter_info(dev);
256	if (ai == NULL)
257		return (ENXIO);
258
259	if (ai->nports == 1)
260		ports = "port";
261	else
262		ports = "ports";
263
264	snprintf(buf, sizeof(buf), "%s RNIC, %d %s", ai->desc, ai->nports, ports);
265	device_set_desc_copy(dev, buf);
266	return (BUS_PROBE_DEFAULT);
267}
268
269static int
270cxgb_fw_download(adapter_t *sc, device_t dev)
271{
272	char buf[32];
273#ifdef FIRMWARE_LATEST
274	const struct firmware *fw;
275#else
276	struct firmware *fw;
277#endif
278	int status;
279
280	snprintf(&buf[0], sizeof(buf), "t3fw%d%d", FW_VERSION_MAJOR,
281	    FW_VERSION_MINOR);
282
283	fw = firmware_get(buf);
284
285
286	if (fw == NULL) {
287		device_printf(dev, "Could not find firmware image %s\n", buf);
288		return ENOENT;
289	}
290
291	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
292
293	firmware_put(fw, FIRMWARE_UNLOAD);
294
295	return (status);
296}
297
298
299static int
300cxgb_controller_attach(device_t dev)
301{
302	driver_intr_t *cxgb_intr = NULL;
303	device_t child;
304	const struct adapter_info *ai;
305	struct adapter *sc;
306	int i, msi_count = 0, error = 0;
307	uint32_t vers;
308
309	sc = device_get_softc(dev);
310	sc->dev = dev;
311
312	pci_enable_busmaster(dev);
313
314	/*
315	 * Allocate the registers and make them available to the driver.
316	 * The registers that we care about for NIC mode are in BAR 0
317	 */
318	sc->regs_rid = PCIR_BAR(0);
319	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
320	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
321		device_printf(dev, "Cannot allocate BAR\n");
322		return (ENXIO);
323	}
324
325	mtx_init(&sc->sge.reg_lock, "SGE reg lock", NULL, MTX_DEF);
326	mtx_init(&sc->lock, "cxgb controller lock", NULL, MTX_DEF);
327	mtx_init(&sc->mdio_lock, "cxgb mdio", NULL, MTX_DEF);
328
329	sc->bt = rman_get_bustag(sc->regs_res);
330	sc->bh = rman_get_bushandle(sc->regs_res);
331	sc->mmio_len = rman_get_size(sc->regs_res);
332
333	/* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
334	 * enough messages for the queue sets.  If that fails, try falling
335	 * back to MSI.  If that fails, then try falling back to the legacy
336	 * interrupt pin model.
337	 */
338#ifdef MSI_SUPPORTED
339	sc->msix_regs_rid = 0x20;
340	if ((msi_allowed >= 2) &&
341	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
342	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
343
344		msi_count = SGE_MSIX_COUNT;
345		if ((pci_alloc_msix(dev, &msi_count) != 0) ||
346		    (msi_count != SGE_MSIX_COUNT)) {
347			msi_count = 0;
348			pci_release_msi(dev);
349			bus_release_resource(dev, SYS_RES_MEMORY,
350			    sc->msix_regs_rid, sc->msix_regs_res);
351			sc->msix_regs_res = NULL;
352		} else {
353			sc->flags |= USING_MSIX;
354			cxgb_intr = t3_intr_msix;
355		}
356
357		printf("allocated %d msix intrs\n", msi_count);
358	}
359
360	if ((msi_allowed >= 1) && (msi_count == 0)) {
361		msi_count = 1;
362		if (pci_alloc_msi(dev, &msi_count)) {
363			device_printf(dev, "alloc msi failed\n");
364			msi_count = 0;
365			pci_release_msi(dev);
366		} else {
367			sc->flags |= USING_MSI;
368			sc->irq_rid = 1;
369			cxgb_intr = t3_intr_msi;
370		}
371	}
372#endif
373	if (msi_count == 0) {
374		sc->irq_rid = 0;
375		cxgb_intr = t3b_intr;
376	}
377
378
379	/* Create a private taskqueue thread for handling driver events */
380#ifdef TASKQUEUE_CURRENT
381	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
382	    taskqueue_thread_enqueue, &sc->tq);
383#else
384	sc->tq = taskqueue_create_fast("cxgb_taskq", M_NOWAIT,
385	    taskqueue_thread_enqueue, &sc->tq);
386#endif
387	if (sc->tq == NULL) {
388		device_printf(dev, "failed to allocate controller task queue\n");
389		goto out;
390	}
391
392	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
393	    device_get_nameunit(dev));
394	TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
395
396
397	/* Create a periodic callout for checking adapter status */
398	callout_init_mtx(&sc->cxgb_tick_ch, &sc->lock, CALLOUT_RETURNUNLOCKED);
399
400	ai = cxgb_get_adapter_info(dev);
401	if (t3_prep_adapter(sc, ai, 1) < 0) {
402		error = ENODEV;
403		goto out;
404	}
405	if (t3_check_fw_version(sc) != 0) {
406		/*
407		 * Warn user that a firmware update will be attempted in init.
408		 */
409		device_printf(dev, "firmware needs to be updated to version %d.%d\n",
410		    FW_VERSION_MAJOR, FW_VERSION_MINOR);
411		sc->flags &= ~FW_UPTODATE;
412	} else {
413		sc->flags |= FW_UPTODATE;
414	}
415
416	if (t3_init_hw(sc, 0) != 0) {
417		device_printf(dev, "hw initialization failed\n");
418		error = ENXIO;
419		goto out;
420	}
421	t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
422
423	/*
424	 * Create a child device for each MAC.  The ethernet attachment
425	 * will be done in these children.
426	 */
427	for (i = 0; i < (sc)->params.nports; ++i) {
428		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
429			device_printf(dev, "failed to add child port\n");
430			error = EINVAL;
431			goto out;
432		}
433		sc->portdev[i] = child;
434		sc->port[i].adapter = sc;
435#ifdef MULTIQ
436		sc->port[i].nqsets = mp_ncpus;
437#else
438		sc->port[i].nqsets = 1;
439#endif
440		sc->port[i].first_qset = i;
441		sc->port[i].port = i;
442		device_set_softc(child, &sc->port[i]);
443	}
444	if ((error = bus_generic_attach(dev)) != 0)
445		goto out;;
446
447	if ((error = setup_sge_qsets(sc)) != 0)
448		goto out;
449
450	setup_rss(sc);
451
452	/* If it's MSI or INTx, allocate a single interrupt for everything */
453	if ((sc->flags & USING_MSIX) == 0) {
454		if ((sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
455		   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
456			device_printf(dev, "Cannot allocate interrupt rid=%d\n", sc->irq_rid);
457			error = EINVAL;
458			goto out;
459		}
460		device_printf(dev, "allocated irq_res=%p\n", sc->irq_res);
461
462		if (bus_setup_intr(dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
463#ifdef INTR_FILTERS
464			NULL,
465#endif
466			cxgb_intr, sc, &sc->intr_tag)) {
467			device_printf(dev, "Cannot set up interrupt\n");
468			error = EINVAL;
469			goto out;
470		}
471	} else {
472		cxgb_setup_msix(sc, msi_count);
473	}
474
475	sc->params.stats_update_period = 1;
476
477	/* initialize sge private state */
478	t3_sge_init_sw(sc);
479
480	t3_led_ready(sc);
481
482	error = t3_get_fw_version(sc, &vers);
483	if (error)
484		goto out;
485
486	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d", G_FW_VERSION_MAJOR(vers),
487	    G_FW_VERSION_MINOR(vers));
488
489	t3_add_sysctls(sc);
490
491out:
492	if (error)
493		cxgb_free(sc);
494
495	return (error);
496}
497
498static int
499cxgb_controller_detach(device_t dev)
500{
501	struct adapter *sc;
502
503	sc = device_get_softc(dev);
504
505	cxgb_free(sc);
506
507	return (0);
508}
509
510static void
511cxgb_free(struct adapter *sc)
512{
513	int i;
514
515	for (i = 0; i < (sc)->params.nports; ++i) {
516		if (sc->portdev[i] != NULL)
517			device_delete_child(sc->dev, sc->portdev[i]);
518	}
519
520	t3_sge_deinit_sw(sc);
521
522	if (sc->tq != NULL) {
523		taskqueue_drain(sc->tq, &sc->ext_intr_task);
524		taskqueue_free(sc->tq);
525	}
526
527	callout_drain(&sc->cxgb_tick_ch);
528
529	bus_generic_detach(sc->dev);
530
531	t3_free_sge_resources(sc);
532	t3_sge_free(sc);
533
534	for (i = 0; i < SGE_QSETS; i++) {
535		if (sc->msix_intr_tag[i] != NULL) {
536			bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
537			    sc->msix_intr_tag[i]);
538		}
539		if (sc->msix_irq_res[i] != NULL) {
540			bus_release_resource(sc->dev, SYS_RES_IRQ,
541			    sc->msix_irq_rid[i], sc->msix_irq_res[i]);
542		}
543	}
544
545	if (sc->intr_tag != NULL) {
546		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
547	}
548
549	if (sc->irq_res != NULL) {
550		device_printf(sc->dev, "de-allocating interrupt irq_rid=%d irq_res=%p\n",
551		    sc->irq_rid, sc->irq_res);
552		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
553		    sc->irq_res);
554	}
555#ifdef MSI_SUPPORTED
556	if (sc->flags & (USING_MSI | USING_MSIX)) {
557		device_printf(sc->dev, "releasing msi message(s)\n");
558		pci_release_msi(sc->dev);
559	}
560#endif
561	if (sc->msix_regs_res != NULL) {
562		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
563		    sc->msix_regs_res);
564	}
565
566	if (sc->regs_res != NULL)
567		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
568		    sc->regs_res);
569
570	mtx_destroy(&sc->mdio_lock);
571	mtx_destroy(&sc->sge.reg_lock);
572	mtx_destroy(&sc->lock);
573
574	return;
575}
576
577/**
578 *	setup_sge_qsets - configure SGE Tx/Rx/response queues
579 *	@sc: the controller softc
580 *
581 *	Determines how many sets of SGE queues to use and initializes them.
582 *	We support multiple queue sets per port if we have MSI-X, otherwise
583 *	just one queue set per port.
584 */
585static int
586setup_sge_qsets(adapter_t *sc)
587{
588	int i, j, err, irq_idx, qset_idx;
589	u_int ntxq = 3;
590
591	if ((err = t3_sge_alloc(sc)) != 0) {
592		printf("t3_sge_alloc returned %d\n", err);
593		return (err);
594	}
595
596	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
597		irq_idx = -1;
598	else
599		irq_idx = 0;
600
601	for (qset_idx = 0, i = 0; i < (sc)->params.nports; ++i) {
602		struct port_info *pi = &sc->port[i];
603
604		for (j = 0; j < pi->nqsets; ++j, ++qset_idx) {
605			err = t3_sge_alloc_qset(sc, qset_idx, 1,
606			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
607			    &sc->params.sge.qset[qset_idx], ntxq, pi);
608			if (err) {
609				t3_free_sge_resources(sc);
610				printf("t3_sge_alloc_qset failed with %d\n", err);
611				return (err);
612			}
613		}
614	}
615
616	return (0);
617}
618
619static int
620cxgb_setup_msix(adapter_t *sc, int msix_count)
621{
622	int i, j, k, nqsets, rid;
623
624	/* The first message indicates link changes and error conditions */
625	sc->irq_rid = 1;
626	if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
627	   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
628		device_printf(sc->dev, "Cannot allocate msix interrupt\n");
629		return (EINVAL);
630	}
631	if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
632#ifdef INTR_FILTERS
633			NULL,
634#endif
635		cxgb_async_intr, sc, &sc->intr_tag)) {
636		device_printf(sc->dev, "Cannot set up interrupt\n");
637		return (EINVAL);
638	}
639
640	for (i = 0, k = 0; i < (sc)->params.nports; ++i) {
641		nqsets = sc->port[i].nqsets;
642		for (j = 0; j < nqsets; ++j, k++) {
643			struct sge_qset *qs = &sc->sge.qs[k];
644
645			rid = k + 2;
646			if (cxgb_debug)
647				printf("rid=%d ", rid);
648			if ((sc->msix_irq_res[k] = bus_alloc_resource_any(
649			    sc->dev, SYS_RES_IRQ, &rid,
650			    RF_SHAREABLE | RF_ACTIVE)) == NULL) {
651				device_printf(sc->dev, "Cannot allocate "
652				    "interrupt for message %d\n", rid);
653				return (EINVAL);
654			}
655			sc->msix_irq_rid[k] = rid;
656			if (bus_setup_intr(sc->dev, sc->msix_irq_res[j],
657			    INTR_MPSAFE|INTR_TYPE_NET,
658#ifdef INTR_FILTERS
659			NULL,
660#endif
661				t3_intr_msix, qs, &sc->msix_intr_tag[k])) {
662				device_printf(sc->dev, "Cannot set up "
663				    "interrupt for message %d\n", rid);
664				return (EINVAL);
665			}
666		}
667	}
668	return (0);
669}
670
671static int
672cxgb_port_probe(device_t dev)
673{
674	struct port_info *p;
675	char buf[80];
676
677	p = device_get_softc(dev);
678
679	snprintf(buf, sizeof(buf), "Port %d %s", p->port, p->port_type->desc);
680	device_set_desc_copy(dev, buf);
681	return (0);
682}
683
684
685static int
686cxgb_makedev(struct port_info *pi)
687{
688	struct cdevsw *cxgb_cdevsw;
689
690	if ((cxgb_cdevsw = malloc(sizeof(struct cdevsw), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
691		return (ENOMEM);
692
693	cxgb_cdevsw->d_version = D_VERSION;
694	cxgb_cdevsw->d_name = strdup(pi->ifp->if_xname, M_DEVBUF);
695	cxgb_cdevsw->d_ioctl = cxgb_extension_ioctl;
696
697	pi->port_cdev = make_dev(cxgb_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
698	    pi->ifp->if_xname);
699
700	if (pi->port_cdev == NULL)
701		return (ENOMEM);
702
703	pi->port_cdev->si_drv1 = (void *)pi;
704
705	return (0);
706}
707
708
709#ifdef TSO_SUPPORTED
710#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU)
711/* Don't enable TSO6 yet */
712#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU)
713#else
714#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_JUMBO_MTU)
715/* Don't enable TSO6 yet */
716#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM |  IFCAP_JUMBO_MTU)
717#define IFCAP_TSO4 0x0
718#define CSUM_TSO   0x0
719#endif
720
721
722static int
723cxgb_port_attach(device_t dev)
724{
725	struct port_info *p;
726	struct ifnet *ifp;
727	int media_flags;
728	int err;
729	char buf[64];
730
731	p = device_get_softc(dev);
732
733	snprintf(buf, sizeof(buf), "cxgb port %d", p->port);
734	mtx_init(&p->lock, buf, 0, MTX_DEF);
735
736	/* Allocate an ifnet object and set it up */
737	ifp = p->ifp = if_alloc(IFT_ETHER);
738	if (ifp == NULL) {
739		device_printf(dev, "Cannot allocate ifnet\n");
740		return (ENOMEM);
741	}
742
743	/*
744	 * Note that there is currently no watchdog timer.
745	 */
746	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
747	ifp->if_init = cxgb_init;
748	ifp->if_softc = p;
749	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
750	ifp->if_ioctl = cxgb_ioctl;
751	ifp->if_start = cxgb_start;
752	ifp->if_timer = 0;	/* Disable ifnet watchdog */
753	ifp->if_watchdog = NULL;
754
755	ifp->if_snd.ifq_drv_maxlen = TX_ETH_Q_SIZE;
756	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
757	IFQ_SET_READY(&ifp->if_snd);
758
759	ifp->if_hwassist = ifp->if_capabilities = ifp->if_capenable = 0;
760	ifp->if_capabilities |= CXGB_CAP;
761	ifp->if_capenable |= CXGB_CAP_ENABLE;
762	ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO);
763	ifp->if_baudrate = 100000000;
764
765	ether_ifattach(ifp, p->hw_addr);
766#ifdef DEFAULT_JUMBO
767	ifp->if_mtu = 9000;
768#endif
769	if ((err = cxgb_makedev(p)) != 0) {
770		printf("makedev failed %d\n", err);
771		return (err);
772	}
773	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
774	    cxgb_media_status);
775
776	if (!strcmp(p->port_type->desc, "10GBASE-CX4"))
777	        media_flags = IFM_ETHER | IFM_10G_CX4;
778	else if (!strcmp(p->port_type->desc, "10GBASE-SR"))
779	        media_flags = IFM_ETHER | IFM_10G_SR;
780	else if (!strcmp(p->port_type->desc, "10GBASE-XR"))
781	        media_flags = IFM_ETHER | IFM_10G_LR;
782	else {
783	        printf("unsupported media type %s\n", p->port_type->desc);
784		return (ENXIO);
785	}
786
787	ifmedia_add(&p->media, media_flags, 0, NULL);
788	ifmedia_add(&p->media, IFM_ETHER | IFM_AUTO, 0, NULL);
789	ifmedia_set(&p->media, media_flags);
790
791	snprintf(buf, sizeof(buf), "cxgb_port_taskq%d", p->port);
792#ifdef TASKQUEUE_CURRENT
793	/* Create a port for handling TX without starvation */
794	p->tq = taskqueue_create(buf, M_NOWAIT,
795	    taskqueue_thread_enqueue, &p->tq);
796#else
797	/* Create a port for handling TX without starvation */
798	p->tq = taskqueue_create_fast(buf, M_NOWAIT,
799	    taskqueue_thread_enqueue, &p->tq);
800#endif
801
802
803	if (p->tq == NULL) {
804		device_printf(dev, "failed to allocate port task queue\n");
805		return (ENOMEM);
806	}
807	taskqueue_start_threads(&p->tq, 1, PI_NET, "%s taskq",
808	    device_get_nameunit(dev));
809	TASK_INIT(&p->start_task, 0, cxgb_start_proc, ifp);
810
811
812	return (0);
813}
814
815static int
816cxgb_port_detach(device_t dev)
817{
818	struct port_info *p;
819
820	p = device_get_softc(dev);
821	mtx_destroy(&p->lock);
822	if (p->tq != NULL) {
823		taskqueue_drain(p->tq, &p->start_task);
824		taskqueue_free(p->tq);
825		p->tq = NULL;
826	}
827
828	ether_ifdetach(p->ifp);
829	if_free(p->ifp);
830
831	destroy_dev(p->port_cdev);
832
833
834	return (0);
835}
836
837void
838t3_fatal_err(struct adapter *sc)
839{
840	u_int fw_status[4];
841
842	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
843	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
844		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
845		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
846}
847
848int
849t3_os_find_pci_capability(adapter_t *sc, int cap)
850{
851	device_t dev;
852	struct pci_devinfo *dinfo;
853	pcicfgregs *cfg;
854	uint32_t status;
855	uint8_t ptr;
856
857	dev = sc->dev;
858	dinfo = device_get_ivars(dev);
859	cfg = &dinfo->cfg;
860
861	status = pci_read_config(dev, PCIR_STATUS, 2);
862	if (!(status & PCIM_STATUS_CAPPRESENT))
863		return (0);
864
865	switch (cfg->hdrtype & PCIM_HDRTYPE) {
866	case 0:
867	case 1:
868		ptr = PCIR_CAP_PTR;
869		break;
870	case 2:
871		ptr = PCIR_CAP_PTR_2;
872		break;
873	default:
874		return (0);
875		break;
876	}
877	ptr = pci_read_config(dev, ptr, 1);
878
879	while (ptr != 0) {
880		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
881			return (ptr);
882		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
883	}
884
885	return (0);
886}
887
888int
889t3_os_pci_save_state(struct adapter *sc)
890{
891	device_t dev;
892	struct pci_devinfo *dinfo;
893
894	dev = sc->dev;
895	dinfo = device_get_ivars(dev);
896
897	pci_cfg_save(dev, dinfo, 0);
898	return (0);
899}
900
901int
902t3_os_pci_restore_state(struct adapter *sc)
903{
904	device_t dev;
905	struct pci_devinfo *dinfo;
906
907	dev = sc->dev;
908	dinfo = device_get_ivars(dev);
909
910	pci_cfg_restore(dev, dinfo);
911	return (0);
912}
913
914/**
915 *	t3_os_link_changed - handle link status changes
916 *	@adapter: the adapter associated with the link change
917 *	@port_id: the port index whose limk status has changed
918 *	@link_stat: the new status of the link
919 *	@speed: the new speed setting
920 *	@duplex: the new duplex setting
921 *	@fc: the new flow-control setting
922 *
923 *	This is the OS-dependent handler for link status changes.  The OS
924 *	neutral handler takes care of most of the processing for these events,
925 *	then calls this handler for any OS-specific processing.
926 */
927void
928t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
929     int duplex, int fc)
930{
931	struct port_info *pi = &adapter->port[port_id];
932
933	if ((pi->ifp->if_flags & IFF_UP) == 0)
934		return;
935
936	if (link_status)
937		if_link_state_change(pi->ifp, LINK_STATE_UP);
938	else
939		if_link_state_change(pi->ifp, LINK_STATE_DOWN);
940
941}
942
943
944/*
945 * Interrupt-context handler for external (PHY) interrupts.
946 */
947void
948t3_os_ext_intr_handler(adapter_t *sc)
949{
950	if (cxgb_debug)
951		printf("t3_os_ext_intr_handler\n");
952	/*
953	 * Schedule a task to handle external interrupts as they may be slow
954	 * and we use a mutex to protect MDIO registers.  We disable PHY
955	 * interrupts in the meantime and let the task reenable them when
956	 * it's done.
957	 */
958	if (sc->slow_intr_mask) {
959		sc->slow_intr_mask &= ~F_T3DBG;
960		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
961		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
962	}
963}
964
965void
966t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
967{
968
969	/*
970	 * The ifnet might not be allocated before this gets called,
971	 * as this is called early on in attach by t3_prep_adapter
972	 * save the address off in the port structure
973	 */
974	if (cxgb_debug)
975		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
976	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
977}
978
979/**
980 *	link_start - enable a port
981 *	@p: the port to enable
982 *
983 *	Performs the MAC and PHY actions needed to enable a port.
984 */
985static void
986cxgb_link_start(struct port_info *p)
987{
988	struct ifnet *ifp;
989	struct t3_rx_mode rm;
990	struct cmac *mac = &p->mac;
991
992	ifp = p->ifp;
993
994	t3_init_rx_mode(&rm, p);
995	t3_mac_reset(mac);
996	t3_mac_set_mtu(mac, ifp->if_mtu);
997	t3_mac_set_address(mac, 0, p->hw_addr);
998	t3_mac_set_rx_mode(mac, &rm);
999	t3_link_start(&p->phy, mac, &p->link_config);
1000	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1001}
1002
1003/**
1004 *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1005 *	@adap: the adapter
1006 *
1007 *	Sets up RSS to distribute packets to multiple receive queues.  We
1008 *	configure the RSS CPU lookup table to distribute to the number of HW
1009 *	receive queues, and the response queue lookup table to narrow that
1010 *	down to the response queues actually configured for each port.
1011 *	We always configure the RSS mapping for two ports since the mapping
1012 *	table has plenty of entries.
1013 */
1014static void
1015setup_rss(adapter_t *adap)
1016{
1017	int i;
1018	u_int nq0 = adap->port[0].nqsets;
1019	u_int nq1 = max((u_int)adap->port[1].nqsets, 1U);
1020	uint8_t cpus[SGE_QSETS + 1];
1021	uint16_t rspq_map[RSS_TABLE_SIZE];
1022
1023	for (i = 0; i < SGE_QSETS; ++i)
1024		cpus[i] = i;
1025	cpus[SGE_QSETS] = 0xff;
1026
1027	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1028		rspq_map[i] = i % nq0;
1029		rspq_map[i + RSS_TABLE_SIZE / 2] = (i % nq1) + nq0;
1030	}
1031
1032	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1033	    F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN |
1034	    V_RRCPLCPUSIZE(6), cpus, rspq_map);
1035}
1036
1037static void
1038send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1039			      int hi, int port)
1040{
1041	struct mbuf *m;
1042	struct mngt_pktsched_wr *req;
1043
1044	m = m_gethdr(M_NOWAIT, MT_DATA);
1045	req = (struct mngt_pktsched_wr *)m->m_data;
1046	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1047	req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1048	req->sched = sched;
1049	req->idx = qidx;
1050	req->min = lo;
1051	req->max = hi;
1052	req->binding = port;
1053	m->m_len = m->m_pkthdr.len = sizeof(*req);
1054	t3_mgmt_tx(adap, m);
1055}
1056
1057static void
1058bind_qsets(adapter_t *sc)
1059{
1060	int i, j;
1061
1062	for (i = 0; i < (sc)->params.nports; ++i) {
1063		const struct port_info *pi = adap2pinfo(sc, i);
1064
1065		for (j = 0; j < pi->nqsets; ++j)
1066			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1067					  -1, i);
1068	}
1069}
1070
1071static void
1072cxgb_init(void *arg)
1073{
1074	struct port_info *p = arg;
1075
1076	PORT_LOCK(p);
1077	cxgb_init_locked(p);
1078	PORT_UNLOCK(p);
1079}
1080
1081static void
1082cxgb_init_locked(struct port_info *p)
1083{
1084	struct ifnet *ifp;
1085	adapter_t *sc = p->adapter;
1086	int error;
1087
1088	mtx_assert(&p->lock, MA_OWNED);
1089
1090	ifp = p->ifp;
1091	if ((sc->flags & FW_UPTODATE) == 0) {
1092		device_printf(sc->dev, "updating firmware to version %d.%d\n",
1093		    FW_VERSION_MAJOR, FW_VERSION_MINOR);
1094		if ((error = cxgb_fw_download(sc, sc->dev)) != 0) {
1095			device_printf(sc->dev, "firmware download failed err: %d"
1096			    "interface will be unavailable\n", error);
1097			return;
1098		}
1099		sc->flags |= FW_UPTODATE;
1100	}
1101
1102	cxgb_link_start(p);
1103	ADAPTER_LOCK(p->adapter);
1104	if (p->adapter->open_device_map == 0)
1105		t3_intr_clear(sc);
1106	t3_sge_start(sc);
1107
1108	p->adapter->open_device_map |= (1 << p->port);
1109	ADAPTER_UNLOCK(p->adapter);
1110	t3_intr_enable(sc);
1111	t3_port_intr_enable(sc, p->port);
1112	if ((p->adapter->flags & (USING_MSIX | QUEUES_BOUND)) == USING_MSIX)
1113		bind_qsets(sc);
1114	p->adapter->flags |= QUEUES_BOUND;
1115	callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
1116	    cxgb_tick, sc);
1117
1118
1119	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1120	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1121}
1122
1123static void
1124cxgb_set_rxmode(struct port_info *p)
1125{
1126	struct t3_rx_mode rm;
1127	struct cmac *mac = &p->mac;
1128
1129	t3_init_rx_mode(&rm, p);
1130	t3_mac_set_rx_mode(mac, &rm);
1131}
1132
1133static void
1134cxgb_stop_locked(struct port_info *p)
1135{
1136	struct ifnet *ifp;
1137
1138	mtx_assert(&p->lock, MA_OWNED);
1139	mtx_assert(&p->adapter->lock, MA_NOTOWNED);
1140
1141	callout_stop(&p->adapter->cxgb_tick_ch);
1142	ifp = p->ifp;
1143
1144	ADAPTER_LOCK(p->adapter);
1145	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1146	p->adapter->open_device_map &= ~(1 << p->port);
1147	if (p->adapter->open_device_map == 0)
1148		t3_intr_disable(p->adapter);
1149	ADAPTER_UNLOCK(p->adapter);
1150	t3_port_intr_disable(p->adapter, p->port);
1151	t3_mac_disable(&p->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
1152
1153}
1154
1155static int
1156cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1157{
1158	struct port_info *p = ifp->if_softc;
1159	struct ifaddr *ifa = (struct ifaddr *)data;
1160	struct ifreq *ifr = (struct ifreq *)data;
1161	int flags, error = 0;
1162	uint32_t mask;
1163
1164	switch (command) {
1165	case SIOCSIFMTU:
1166		if ((ifr->ifr_mtu < ETHERMIN) ||
1167		    (ifr->ifr_mtu > ETHER_MAX_LEN_JUMBO))
1168			error = EINVAL;
1169		else if (ifp->if_mtu != ifr->ifr_mtu) {
1170			PORT_LOCK(p);
1171			ifp->if_mtu = ifr->ifr_mtu;
1172			t3_mac_set_mtu(&p->mac, ifp->if_mtu);
1173			PORT_UNLOCK(p);
1174		}
1175		break;
1176	case SIOCSIFADDR:
1177	case SIOCGIFADDR:
1178		if (ifa->ifa_addr->sa_family == AF_INET) {
1179			ifp->if_flags |= IFF_UP;
1180			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1181				cxgb_init(p);
1182			}
1183			arp_ifinit(ifp, ifa);
1184		} else
1185			error = ether_ioctl(ifp, command, data);
1186		break;
1187	case SIOCSIFFLAGS:
1188		PORT_LOCK(p);
1189		if (ifp->if_flags & IFF_UP) {
1190			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1191				flags = p->if_flags;
1192				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1193				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI))
1194					cxgb_set_rxmode(p);
1195
1196			} else
1197				cxgb_init_locked(p);
1198		} else {
1199			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1200				cxgb_stop_locked(p);
1201			}
1202		}
1203		p->if_flags = ifp->if_flags;
1204		PORT_UNLOCK(p);
1205		break;
1206	case SIOCSIFMEDIA:
1207	case SIOCGIFMEDIA:
1208		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
1209		break;
1210	case SIOCSIFCAP:
1211		PORT_LOCK(p);
1212		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1213		if (mask & IFCAP_TXCSUM) {
1214			if (IFCAP_TXCSUM & ifp->if_capenable) {
1215				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
1216				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
1217				    | CSUM_TSO);
1218			} else {
1219				ifp->if_capenable |= IFCAP_TXCSUM;
1220				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1221			}
1222		} else if (mask & IFCAP_RXCSUM) {
1223			if (IFCAP_RXCSUM & ifp->if_capenable) {
1224				ifp->if_capenable &= ~IFCAP_RXCSUM;
1225			} else {
1226				ifp->if_capenable |= IFCAP_RXCSUM;
1227			}
1228		}
1229		if (mask & IFCAP_TSO4) {
1230			if (IFCAP_TSO4 & ifp->if_capenable) {
1231				ifp->if_capenable &= ~IFCAP_TSO4;
1232				ifp->if_hwassist &= ~CSUM_TSO;
1233			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
1234				ifp->if_capenable |= IFCAP_TSO4;
1235				ifp->if_hwassist |= CSUM_TSO;
1236			} else {
1237				if (cxgb_debug)
1238					printf("cxgb requires tx checksum offload"
1239					    " be enabled to use TSO\n");
1240				error = EINVAL;
1241			}
1242		}
1243		PORT_UNLOCK(p);
1244		break;
1245	default:
1246		error = ether_ioctl(ifp, command, data);
1247		break;
1248	}
1249
1250	return (error);
1251}
1252
1253static int
1254cxgb_start_tx(struct ifnet *ifp, uint32_t txmax)
1255{
1256	struct sge_qset *qs;
1257	struct sge_txq *txq;
1258	struct port_info *p = ifp->if_softc;
1259	struct mbuf *m = NULL;
1260	int err, in_use_init;
1261
1262
1263	if (!p->link_config.link_ok)
1264		return (ENXIO);
1265
1266	if (IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1267		return (ENOBUFS);
1268
1269	qs = &p->adapter->sge.qs[p->first_qset];
1270	txq = &qs->txq[TXQ_ETH];
1271	err = 0;
1272
1273	mtx_lock(&txq->lock);
1274	in_use_init = txq->in_use;
1275	while ((txq->in_use - in_use_init < txmax) &&
1276	    (txq->size > txq->in_use + TX_MAX_DESC)) {
1277		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
1278		if (m == NULL)
1279			break;
1280		if ((err = t3_encap(p, &m)) != 0)
1281			break;
1282		BPF_MTAP(ifp, m);
1283	}
1284	mtx_unlock(&txq->lock);
1285
1286	if (__predict_false(err)) {
1287		if (cxgb_debug)
1288			printf("would set OFLAGS\n");
1289		if (err == ENOMEM) {
1290			IFQ_LOCK(&ifp->if_snd);
1291			IFQ_DRV_PREPEND(&ifp->if_snd, m);
1292			IFQ_UNLOCK(&ifp->if_snd);
1293		}
1294	}
1295	if (err == 0 && m == NULL)
1296		err = ENOBUFS;
1297
1298	return (err);
1299}
1300
1301static void
1302cxgb_start_proc(void *arg, int ncount)
1303{
1304	struct ifnet *ifp = arg;
1305	struct port_info *pi = ifp->if_softc;
1306	struct sge_qset *qs;
1307	struct sge_txq *txq;
1308	int error = 0;
1309
1310	qs = &pi->adapter->sge.qs[pi->first_qset];
1311	txq = &qs->txq[TXQ_ETH];
1312
1313	while (error == 0) {
1314		if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC)
1315			taskqueue_enqueue(pi->adapter->tq,
1316			    &pi->adapter->timer_reclaim_task);
1317
1318		error = cxgb_start_tx(ifp, TX_START_MAX_DESC);
1319	}
1320}
1321
1322static void
1323cxgb_start(struct ifnet *ifp)
1324{
1325	struct port_info *pi = ifp->if_softc;
1326	struct sge_qset *qs;
1327	struct sge_txq *txq;
1328	int err;
1329
1330	qs = &pi->adapter->sge.qs[pi->first_qset];
1331	txq = &qs->txq[TXQ_ETH];
1332
1333	if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC)
1334		taskqueue_enqueue(pi->adapter->tq,
1335		    &pi->adapter->timer_reclaim_task);
1336
1337	err = cxgb_start_tx(ifp, TX_START_MAX_DESC);
1338
1339	if (err == 0)
1340		taskqueue_enqueue(pi->tq, &pi->start_task);
1341}
1342
1343
1344static int
1345cxgb_media_change(struct ifnet *ifp)
1346{
1347	if_printf(ifp, "media change not supported\n");
1348	return (ENXIO);
1349}
1350
1351static void
1352cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1353{
1354	struct port_info *p = ifp->if_softc;
1355
1356	ifmr->ifm_status = IFM_AVALID;
1357	ifmr->ifm_active = IFM_ETHER;
1358
1359	if (!p->link_config.link_ok)
1360		return;
1361
1362	ifmr->ifm_status |= IFM_ACTIVE;
1363
1364	if (p->link_config.duplex)
1365		ifmr->ifm_active |= IFM_FDX;
1366	else
1367		ifmr->ifm_active |= IFM_HDX;
1368}
1369
1370static void
1371cxgb_async_intr(void *data)
1372{
1373	if (cxgb_debug)
1374		printf("cxgb_async_intr\n");
1375}
1376
1377static void
1378cxgb_ext_intr_handler(void *arg, int count)
1379{
1380	adapter_t *sc = (adapter_t *)arg;
1381
1382	if (cxgb_debug)
1383		printf("cxgb_ext_intr_handler\n");
1384
1385	t3_phy_intr_handler(sc);
1386
1387	/* Now reenable external interrupts */
1388	if (sc->slow_intr_mask) {
1389		sc->slow_intr_mask |= F_T3DBG;
1390		t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
1391		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1392	}
1393}
1394
1395static void
1396check_link_status(adapter_t *sc)
1397{
1398	int i;
1399
1400	for (i = 0; i < (sc)->params.nports; ++i) {
1401		struct port_info *p = &sc->port[i];
1402
1403		if (!(p->port_type->caps & SUPPORTED_IRQ))
1404			t3_link_changed(sc, i);
1405	}
1406}
1407
1408static void
1409check_t3b2_mac(struct adapter *adapter)
1410{
1411	int i;
1412
1413	for_each_port(adapter, i) {
1414		struct port_info *p = &adapter->port[i];
1415		struct ifnet *ifp = p->ifp;
1416		int status;
1417
1418		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1419			continue;
1420
1421		status = 0;
1422		PORT_LOCK(p);
1423		if ((ifp->if_drv_flags & IFF_DRV_RUNNING))
1424			status = t3b2_mac_watchdog_task(&p->mac);
1425		if (status == 1)
1426			p->mac.stats.num_toggled++;
1427		else if (status == 2) {
1428			struct cmac *mac = &p->mac;
1429
1430			t3_mac_set_mtu(mac, ifp->if_mtu);
1431			t3_mac_set_address(mac, 0, p->hw_addr);
1432			cxgb_set_rxmode(p);
1433			t3_link_start(&p->phy, mac, &p->link_config);
1434			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1435			t3_port_intr_enable(adapter, p->port);
1436			p->mac.stats.num_resets++;
1437		}
1438		PORT_UNLOCK(p);
1439	}
1440}
1441
1442static void
1443cxgb_tick(void *arg)
1444{
1445	adapter_t *sc = (adapter_t *)arg;
1446	const struct adapter_params *p = &sc->params;
1447
1448	if (p->linkpoll_period)
1449		check_link_status(sc);
1450	callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
1451	    cxgb_tick, sc);
1452
1453	/*
1454	 * adapter lock can currently only be acquire after the
1455	 * port lock
1456	 */
1457	ADAPTER_UNLOCK(sc);
1458	if (p->rev == T3_REV_B2)
1459		check_t3b2_mac(sc);
1460
1461}
1462
1463static int
1464in_range(int val, int lo, int hi)
1465{
1466	return val < 0 || (val <= hi && val >= lo);
1467}
1468
1469static int
1470cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
1471    int fflag, struct thread *td)
1472{
1473	int mmd, error = 0;
1474	struct port_info *pi = dev->si_drv1;
1475	adapter_t *sc = pi->adapter;
1476
1477#ifdef PRIV_SUPPORTED
1478	if (priv_check(td, PRIV_DRIVER)) {
1479		if (cxgb_debug)
1480			printf("user does not have access to privileged ioctls\n");
1481		return (EPERM);
1482	}
1483#else
1484	if (suser(td)) {
1485		if (cxgb_debug)
1486			printf("user does not have access to privileged ioctls\n");
1487		return (EPERM);
1488	}
1489#endif
1490
1491	switch (cmd) {
1492	case SIOCGMIIREG: {
1493		uint32_t val;
1494		struct cphy *phy = &pi->phy;
1495		struct mii_data *mid = (struct mii_data *)data;
1496
1497		if (!phy->mdio_read)
1498			return (EOPNOTSUPP);
1499		if (is_10G(sc)) {
1500			mmd = mid->phy_id >> 8;
1501			if (!mmd)
1502				mmd = MDIO_DEV_PCS;
1503			else if (mmd > MDIO_DEV_XGXS)
1504				return -EINVAL;
1505
1506			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
1507					     mid->reg_num, &val);
1508		} else
1509		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
1510					     mid->reg_num & 0x1f, &val);
1511		if (error == 0)
1512			mid->val_out = val;
1513		break;
1514	}
1515	case SIOCSMIIREG: {
1516		struct cphy *phy = &pi->phy;
1517		struct mii_data *mid = (struct mii_data *)data;
1518
1519		if (!phy->mdio_write)
1520			return (EOPNOTSUPP);
1521		if (is_10G(sc)) {
1522			mmd = mid->phy_id >> 8;
1523			if (!mmd)
1524				mmd = MDIO_DEV_PCS;
1525			else if (mmd > MDIO_DEV_XGXS)
1526				return (EINVAL);
1527
1528			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
1529					      mmd, mid->reg_num, mid->val_in);
1530		} else
1531			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
1532					      mid->reg_num & 0x1f,
1533					      mid->val_in);
1534		break;
1535	}
1536	case CHELSIO_SETREG: {
1537		struct ch_reg *edata = (struct ch_reg *)data;
1538		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
1539			return (EFAULT);
1540		t3_write_reg(sc, edata->addr, edata->val);
1541		break;
1542	}
1543	case CHELSIO_GETREG: {
1544		struct ch_reg *edata = (struct ch_reg *)data;
1545		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
1546			return (EFAULT);
1547		edata->val = t3_read_reg(sc, edata->addr);
1548		break;
1549	}
1550	case CHELSIO_GET_SGE_CONTEXT: {
1551		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
1552		mtx_lock(&sc->sge.reg_lock);
1553		switch (ecntxt->cntxt_type) {
1554		case CNTXT_TYPE_EGRESS:
1555			error = t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
1556			    ecntxt->data);
1557			break;
1558		case CNTXT_TYPE_FL:
1559			error = t3_sge_read_fl(sc, ecntxt->cntxt_id,
1560			    ecntxt->data);
1561			break;
1562		case CNTXT_TYPE_RSP:
1563			error = t3_sge_read_rspq(sc, ecntxt->cntxt_id,
1564			    ecntxt->data);
1565			break;
1566		case CNTXT_TYPE_CQ:
1567			error = t3_sge_read_cq(sc, ecntxt->cntxt_id,
1568			    ecntxt->data);
1569			break;
1570		default:
1571			error = EINVAL;
1572			break;
1573		}
1574		mtx_unlock(&sc->sge.reg_lock);
1575		break;
1576	}
1577	case CHELSIO_GET_SGE_DESC: {
1578		struct ch_desc *edesc = (struct ch_desc *)data;
1579		int ret;
1580		if (edesc->queue_num >= SGE_QSETS * 6)
1581			return (EINVAL);
1582		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
1583		    edesc->queue_num % 6, edesc->idx, edesc->data);
1584		if (ret < 0)
1585			return (EINVAL);
1586		edesc->size = ret;
1587		break;
1588	}
1589	case CHELSIO_SET_QSET_PARAMS: {
1590		struct qset_params *q;
1591		struct ch_qset_params *t = (struct ch_qset_params *)data;
1592
1593		if (t->qset_idx >= SGE_QSETS)
1594			return -EINVAL;
1595		if (!in_range(t->intr_lat, 0, M_NEWTIMER) ||
1596		    !in_range(t->cong_thres, 0, 255) ||
1597		    !in_range(t->txq_size[0], MIN_TXQ_ENTRIES,
1598			      MAX_TXQ_ENTRIES) ||
1599		    !in_range(t->txq_size[1], MIN_TXQ_ENTRIES,
1600			      MAX_TXQ_ENTRIES) ||
1601		    !in_range(t->txq_size[2], MIN_CTRL_TXQ_ENTRIES,
1602			      MAX_CTRL_TXQ_ENTRIES) ||
1603		    !in_range(t->fl_size[0], MIN_FL_ENTRIES, MAX_RX_BUFFERS) ||
1604		    !in_range(t->fl_size[1], MIN_FL_ENTRIES,
1605			      MAX_RX_JUMBO_BUFFERS) ||
1606		    !in_range(t->rspq_size, MIN_RSPQ_ENTRIES, MAX_RSPQ_ENTRIES))
1607		       return -EINVAL;
1608		if ((sc->flags & FULL_INIT_DONE) &&
1609		    (t->rspq_size >= 0 || t->fl_size[0] >= 0 ||
1610		     t->fl_size[1] >= 0 || t->txq_size[0] >= 0 ||
1611		     t->txq_size[1] >= 0 || t->txq_size[2] >= 0 ||
1612		     t->polling >= 0 || t->cong_thres >= 0))
1613			return -EBUSY;
1614
1615		q = &sc->params.sge.qset[t->qset_idx];
1616
1617		if (t->rspq_size >= 0)
1618			q->rspq_size = t->rspq_size;
1619		if (t->fl_size[0] >= 0)
1620			q->fl_size = t->fl_size[0];
1621		if (t->fl_size[1] >= 0)
1622			q->jumbo_size = t->fl_size[1];
1623		if (t->txq_size[0] >= 0)
1624			q->txq_size[0] = t->txq_size[0];
1625		if (t->txq_size[1] >= 0)
1626			q->txq_size[1] = t->txq_size[1];
1627		if (t->txq_size[2] >= 0)
1628			q->txq_size[2] = t->txq_size[2];
1629		if (t->cong_thres >= 0)
1630			q->cong_thres = t->cong_thres;
1631		if (t->intr_lat >= 0) {
1632			struct sge_qset *qs = &sc->sge.qs[t->qset_idx];
1633
1634			q->coalesce_nsecs = t->intr_lat*1000;
1635			t3_update_qset_coalesce(qs, q);
1636		}
1637		break;
1638	}
1639	case CHELSIO_GET_QSET_PARAMS: {
1640		struct qset_params *q;
1641		struct ch_qset_params *t = (struct ch_qset_params *)data;
1642
1643		if (t->qset_idx >= SGE_QSETS)
1644			return (EINVAL);
1645
1646		q = &(sc)->params.sge.qset[t->qset_idx];
1647		t->rspq_size   = q->rspq_size;
1648		t->txq_size[0] = q->txq_size[0];
1649		t->txq_size[1] = q->txq_size[1];
1650		t->txq_size[2] = q->txq_size[2];
1651		t->fl_size[0]  = q->fl_size;
1652		t->fl_size[1]  = q->jumbo_size;
1653		t->polling     = q->polling;
1654		t->intr_lat    = q->coalesce_nsecs / 1000;
1655		t->cong_thres  = q->cong_thres;
1656		break;
1657	}
1658	case CHELSIO_SET_QSET_NUM: {
1659		struct ch_reg *edata = (struct ch_reg *)data;
1660		unsigned int port_idx = pi->port;
1661
1662		if (sc->flags & FULL_INIT_DONE)
1663			return (EBUSY);
1664		if (edata->val < 1 ||
1665		    (edata->val > 1 && !(sc->flags & USING_MSIX)))
1666			return (EINVAL);
1667		if (edata->val + sc->port[!port_idx].nqsets > SGE_QSETS)
1668			return (EINVAL);
1669		sc->port[port_idx].nqsets = edata->val;
1670		/*
1671		 * XXX we're hardcoding ourselves to 2 ports
1672		 * just like the LEENUX
1673		 */
1674		sc->port[1].first_qset = sc->port[0].nqsets;
1675		break;
1676	}
1677	case CHELSIO_GET_QSET_NUM: {
1678		struct ch_reg *edata = (struct ch_reg *)data;
1679		edata->val = pi->nqsets;
1680		break;
1681	}
1682#ifdef notyet
1683		/*
1684		 * XXX FreeBSD driver does not currently support any
1685		 * offload functionality
1686		 */
1687	case CHELSIO_LOAD_FW:
1688	case CHELSIO_DEVUP:
1689	case CHELSIO_SETMTUTAB:
1690	case CHELSIO_GET_PM:
1691	case CHELSIO_SET_PM:
1692	case CHELSIO_READ_TCAM_WORD:
1693		return (EOPNOTSUPP);
1694		break;
1695#endif
1696	case CHELSIO_GET_MEM: {
1697		struct ch_mem_range *t = (struct ch_mem_range *)data;
1698		struct mc7 *mem;
1699		uint8_t *useraddr;
1700		u64 buf[32];
1701
1702		if (!is_offload(sc))
1703			return (EOPNOTSUPP);
1704		if (!(sc->flags & FULL_INIT_DONE))
1705			return (EIO);         /* need the memory controllers */
1706		if ((t->addr & 0x7) || (t->len & 0x7))
1707			return (EINVAL);
1708		if (t->mem_id == MEM_CM)
1709			mem = &sc->cm;
1710		else if (t->mem_id == MEM_PMRX)
1711			mem = &sc->pmrx;
1712		else if (t->mem_id == MEM_PMTX)
1713			mem = &sc->pmtx;
1714		else
1715			return (EINVAL);
1716
1717		/*
1718		 * Version scheme:
1719		 * bits 0..9: chip version
1720		 * bits 10..15: chip revision
1721		 */
1722		t->version = 3 | (sc->params.rev << 10);
1723
1724		/*
1725		 * Read 256 bytes at a time as len can be large and we don't
1726		 * want to use huge intermediate buffers.
1727		 */
1728		useraddr = (uint8_t *)(t + 1);   /* advance to start of buffer */
1729		while (t->len) {
1730			unsigned int chunk = min(t->len, sizeof(buf));
1731
1732			error = t3_mc7_bd_read(mem, t->addr / 8, chunk / 8, buf);
1733			if (error)
1734				return (-error);
1735			if (copyout(buf, useraddr, chunk))
1736				return (EFAULT);
1737			useraddr += chunk;
1738			t->addr += chunk;
1739			t->len -= chunk;
1740		}
1741		break;
1742	}
1743	case CHELSIO_SET_TRACE_FILTER: {
1744		struct ch_trace *t = (struct ch_trace *)data;
1745		const struct trace_params *tp;
1746
1747		tp = (const struct trace_params *)&t->sip;
1748		if (t->config_tx)
1749			t3_config_trace_filter(sc, tp, 0, t->invert_match,
1750					       t->trace_tx);
1751		if (t->config_rx)
1752			t3_config_trace_filter(sc, tp, 1, t->invert_match,
1753					       t->trace_rx);
1754		break;
1755	}
1756	case CHELSIO_SET_PKTSCHED: {
1757		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
1758		if (sc->open_device_map == 0)
1759			return (EAGAIN);
1760		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
1761		    p->binding);
1762		break;
1763	}
1764	case CHELSIO_IFCONF_GETREGS: {
1765		struct ifconf_regs *regs = (struct ifconf_regs *)data;
1766		int reglen = cxgb_get_regs_len();
1767		uint8_t *buf = malloc(REGDUMP_SIZE, M_DEVBUF, M_NOWAIT);
1768		if (buf == NULL) {
1769			return (ENOMEM);
1770		} if (regs->len > reglen)
1771			regs->len = reglen;
1772		else if (regs->len < reglen) {
1773			error = E2BIG;
1774			goto done;
1775		}
1776		cxgb_get_regs(sc, regs, buf);
1777		error = copyout(buf, regs->data, reglen);
1778
1779		done:
1780		free(buf, M_DEVBUF);
1781
1782		break;
1783	}
1784	default:
1785		return (EOPNOTSUPP);
1786		break;
1787	}
1788
1789	return (error);
1790}
1791
1792static __inline void
1793reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
1794    unsigned int end)
1795{
1796	uint32_t *p = (uint32_t *)buf + start;
1797
1798	for ( ; start <= end; start += sizeof(uint32_t))
1799		*p++ = t3_read_reg(ap, start);
1800}
1801
1802#define T3_REGMAP_SIZE (3 * 1024)
1803static int
1804cxgb_get_regs_len(void)
1805{
1806	return T3_REGMAP_SIZE;
1807}
1808#undef T3_REGMAP_SIZE
1809
1810static void
1811cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf)
1812{
1813
1814	/*
1815	 * Version scheme:
1816	 * bits 0..9: chip version
1817	 * bits 10..15: chip revision
1818	 * bit 31: set for PCIe cards
1819	 */
1820	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
1821
1822	/*
1823	 * We skip the MAC statistics registers because they are clear-on-read.
1824	 * Also reading multi-register stats would need to synchronize with the
1825	 * periodic mac stats accumulation.  Hard to justify the complexity.
1826	 */
1827	memset(buf, 0, REGDUMP_SIZE);
1828	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
1829	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
1830	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
1831	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
1832	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
1833	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
1834		       XGM_REG(A_XGM_SERDES_STAT3, 1));
1835	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
1836		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
1837}
1838