cxgb_main.c revision 167514
1/**************************************************************************
2
3Copyright (c) 2007, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13    notice, this list of conditions and the following disclaimer in the
14    documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Chelsio Corporation nor the names of its
17    contributors may be used to endorse or promote products derived from
18    this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30POSSIBILITY OF SUCH DAMAGE.
31
32***************************************************************************/
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_main.c 167514 2007-03-14 02:37:44Z kmacy $");
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/kernel.h>
40#include <sys/bus.h>
41#include <sys/module.h>
42#include <sys/pciio.h>
43#include <sys/conf.h>
44#include <machine/bus.h>
45#include <machine/resource.h>
46#include <sys/bus_dma.h>
47#include <sys/rman.h>
48#include <sys/ioccom.h>
49#include <sys/mbuf.h>
50#include <sys/linker.h>
51#include <sys/firmware.h>
52#include <sys/socket.h>
53#include <sys/sockio.h>
54#include <sys/smp.h>
55#include <sys/sysctl.h>
56#include <sys/queue.h>
57#include <sys/taskqueue.h>
58
59
60
61#include <net/bpf.h>
62#include <net/ethernet.h>
63#include <net/if.h>
64#include <net/if_arp.h>
65#include <net/if_dl.h>
66#include <net/if_media.h>
67#include <net/if_types.h>
68
69#include <netinet/in_systm.h>
70#include <netinet/in.h>
71#include <netinet/if_ether.h>
72#include <netinet/ip.h>
73#include <netinet/ip.h>
74#include <netinet/tcp.h>
75#include <netinet/udp.h>
76
77#include <dev/pci/pcireg.h>
78#include <dev/pci/pcivar.h>
79#include <dev/pci/pci_private.h>
80
81#include <dev/cxgb/cxgb_osdep.h>
82#include <dev/cxgb/common/cxgb_common.h>
83#include <dev/cxgb/cxgb_ioctl.h>
84#include <dev/cxgb/common/cxgb_regs.h>
85#include <dev/cxgb/common/cxgb_t3_cpl.h>
86#include <dev/cxgb/common/cxgb_firmware_exports.h>
87
88
89#ifdef PRIV_SUPPORTED
90#include <sys/priv.h>
91#endif
92
93static int cxgb_setup_msix(adapter_t *, int);
94static void cxgb_init(void *);
95static void cxgb_init_locked(struct port_info *);
96static void cxgb_stop(struct port_info *);
97static void cxgb_set_rxmode(struct port_info *);
98static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
99static void cxgb_start(struct ifnet *);
100static void cxgb_start_proc(void *, int ncount);
101static int cxgb_media_change(struct ifnet *);
102static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
103static int setup_sge_qsets(adapter_t *);
104static void cxgb_async_intr(void *);
105static void cxgb_ext_intr_handler(void *, int);
106static void cxgb_tick(void *);
107static void check_link_status(adapter_t *sc);
108static void setup_rss(adapter_t *sc);
109
110/* Attachment glue for the PCI controller end of the device.  Each port of
111 * the device is attached separately, as defined later.
112 */
113static int cxgb_controller_probe(device_t);
114static int cxgb_controller_attach(device_t);
115static int cxgb_controller_detach(device_t);
116static void cxgb_free(struct adapter *);
117static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
118    unsigned int end);
119static void cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf);
120static int cxgb_get_regs_len(void);
121
122static device_method_t cxgb_controller_methods[] = {
123	DEVMETHOD(device_probe,		cxgb_controller_probe),
124	DEVMETHOD(device_attach,	cxgb_controller_attach),
125	DEVMETHOD(device_detach,	cxgb_controller_detach),
126
127	/* bus interface */
128	DEVMETHOD(bus_print_child,	bus_generic_print_child),
129	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
130
131	{ 0, 0 }
132};
133
134static driver_t cxgb_controller_driver = {
135	"cxgbc",
136	cxgb_controller_methods,
137	sizeof(struct adapter)
138};
139
140static devclass_t	cxgb_controller_devclass;
141DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
142
143/*
144 * Attachment glue for the ports.  Attachment is done directly to the
145 * controller device.
146 */
147static int cxgb_port_probe(device_t);
148static int cxgb_port_attach(device_t);
149static int cxgb_port_detach(device_t);
150
151static device_method_t cxgb_port_methods[] = {
152	DEVMETHOD(device_probe,		cxgb_port_probe),
153	DEVMETHOD(device_attach,	cxgb_port_attach),
154	DEVMETHOD(device_detach,	cxgb_port_detach),
155	{ 0, 0 }
156};
157
158static driver_t cxgb_port_driver = {
159	"cxgb",
160	cxgb_port_methods,
161	0
162};
163
164static d_ioctl_t cxgb_extension_ioctl;
165
166static devclass_t	cxgb_port_devclass;
167DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
168
169#define SGE_MSIX_COUNT (SGE_QSETS + 1)
170
171/*
172 * The driver uses the best interrupt scheme available on a platform in the
173 * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
174 * of these schemes the driver may consider as follows:
175 *
176 * msi = 2: choose from among all three options
177 * msi = 1 : only consider MSI and pin interrupts
178 * msi = 0: force pin interrupts
179 */
180static int msi_allowed = 0;
181TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
182
183SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
184SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
185    "MSI-X, MSI, INTx selector");
186
187enum {
188	MAX_TXQ_ENTRIES      = 16384,
189	MAX_CTRL_TXQ_ENTRIES = 1024,
190	MAX_RSPQ_ENTRIES     = 16384,
191	MAX_RX_BUFFERS       = 16384,
192	MAX_RX_JUMBO_BUFFERS = 16384,
193	MIN_TXQ_ENTRIES      = 4,
194	MIN_CTRL_TXQ_ENTRIES = 4,
195	MIN_RSPQ_ENTRIES     = 32,
196	MIN_FL_ENTRIES       = 32
197};
198
199#define PORT_MASK ((1 << MAX_NPORTS) - 1)
200
201/* Table for probing the cards.  The desc field isn't actually used */
202struct cxgb_ident {
203	uint16_t	vendor;
204	uint16_t	device;
205	int		index;
206	char		*desc;
207} cxgb_identifiers[] = {
208	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
209	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
210	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
211	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
212	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
213	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
214	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
215	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
216	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
217	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
218	{0, 0, 0, NULL}
219};
220
221static struct cxgb_ident *
222cxgb_get_ident(device_t dev)
223{
224	struct cxgb_ident *id;
225
226	for (id = cxgb_identifiers; id->desc != NULL; id++) {
227		if ((id->vendor == pci_get_vendor(dev)) &&
228		    (id->device == pci_get_device(dev))) {
229			return (id);
230		}
231	}
232	return (NULL);
233}
234
235static const struct adapter_info *
236cxgb_get_adapter_info(device_t dev)
237{
238	struct cxgb_ident *id;
239	const struct adapter_info *ai;
240
241	id = cxgb_get_ident(dev);
242	if (id == NULL)
243		return (NULL);
244
245	ai = t3_get_adapter_info(id->index);
246
247	return (ai);
248}
249
250static int
251cxgb_controller_probe(device_t dev)
252{
253	const struct adapter_info *ai;
254	char *ports, buf[80];
255
256	ai = cxgb_get_adapter_info(dev);
257	if (ai == NULL)
258		return (ENXIO);
259
260	if (ai->nports == 1)
261		ports = "port";
262	else
263		ports = "ports";
264
265	snprintf(buf, sizeof(buf), "%s RNIC, %d %s", ai->desc, ai->nports, ports);
266	device_set_desc_copy(dev, buf);
267	return (BUS_PROBE_DEFAULT);
268}
269
270static int
271cxgb_fw_download(adapter_t *sc, device_t dev)
272{
273	char buf[32];
274#ifdef FIRMWARE_LATEST
275	const struct firmware *fw;
276#else
277	struct firmware *fw;
278#endif
279	int status;
280
281	snprintf(&buf[0], sizeof(buf), "t3fw%d%d", CHELSIO_FW_MAJOR, CHELSIO_FW_MINOR);
282
283	fw = firmware_get(buf);
284
285
286	if (fw == NULL) {
287		device_printf(dev, "Could not find firmware image %s\n", buf);
288		return ENOENT;
289	}
290
291	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
292
293	firmware_put(fw, FIRMWARE_UNLOAD);
294
295	return (status);
296}
297
298
299static int
300cxgb_controller_attach(device_t dev)
301{
302	driver_intr_t *cxgb_intr = NULL;
303	device_t child;
304	const struct adapter_info *ai;
305	struct adapter *sc;
306	int i, msi_count = 0, error = 0;
307	uint32_t vers;
308
309	sc = device_get_softc(dev);
310	sc->dev = dev;
311
312	pci_enable_busmaster(dev);
313
314	/*
315	 * Allocate the registers and make them available to the driver.
316	 * The registers that we care about for NIC mode are in BAR 0
317	 */
318	sc->regs_rid = PCIR_BAR(0);
319	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
320	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
321		device_printf(dev, "Cannot allocate BAR\n");
322		return (ENXIO);
323	}
324
325	mtx_init(&sc->sge.reg_lock, "SGE reg lock", NULL, MTX_DEF);
326	mtx_init(&sc->lock, "cxgb controller lock", NULL, MTX_DEF);
327	mtx_init(&sc->mdio_lock, "cxgb mdio", NULL, MTX_DEF);
328
329	sc->bt = rman_get_bustag(sc->regs_res);
330	sc->bh = rman_get_bushandle(sc->regs_res);
331	sc->mmio_len = rman_get_size(sc->regs_res);
332
333	/* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
334	 * enough messages for the queue sets.  If that fails, try falling
335	 * back to MSI.  If that fails, then try falling back to the legacy
336	 * interrupt pin model.
337	 */
338#ifdef MSI_SUPPORTED
339	sc->msix_regs_rid = 0x20;
340	if ((msi_allowed >= 2) &&
341	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
342	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
343
344		msi_count = SGE_MSIX_COUNT;
345		if ((pci_alloc_msix(dev, &msi_count) != 0) ||
346		    (msi_count != SGE_MSIX_COUNT)) {
347			msi_count = 0;
348			pci_release_msi(dev);
349			bus_release_resource(dev, SYS_RES_MEMORY,
350			    sc->msix_regs_rid, sc->msix_regs_res);
351			sc->msix_regs_res = NULL;
352		} else {
353			sc->flags |= USING_MSIX;
354			cxgb_intr = t3_intr_msix;
355		}
356
357		printf("allocated %d msix intrs\n", msi_count);
358	}
359
360	if ((msi_allowed >= 1) && (msi_count == 0)) {
361		msi_count = 1;
362		if (pci_alloc_msi(dev, &msi_count)) {
363			device_printf(dev, "alloc msi failed\n");
364			msi_count = 0;
365			pci_release_msi(dev);
366		} else {
367			sc->flags |= USING_MSI;
368			sc->irq_rid = 1;
369			cxgb_intr = t3_intr_msi;
370		}
371	}
372#endif
373	if (msi_count == 0) {
374		sc->irq_rid = 0;
375		cxgb_intr = t3b_intr;
376	}
377
378
379	/* Create a private taskqueue thread for handling driver events */
380#ifdef TASKQUEUE_CURRENT
381	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
382	    taskqueue_thread_enqueue, &sc->tq);
383#else
384	sc->tq = taskqueue_create_fast("cxgb_taskq", M_NOWAIT,
385	    taskqueue_thread_enqueue, &sc->tq);
386#endif
387	if (sc->tq == NULL) {
388		device_printf(dev, "failed to allocate controller task queue\n");
389		goto out;
390	}
391
392	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
393	    device_get_nameunit(dev));
394	TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
395
396
397	/* Create a periodic callout for checking adapter status */
398	callout_init_mtx(&sc->cxgb_tick_ch, &sc->lock, 0);
399
400	ai = cxgb_get_adapter_info(dev);
401	if (t3_prep_adapter(sc, ai, 1) < 0) {
402		error = ENODEV;
403		goto out;
404	}
405	if (t3_check_fw_version(sc) != 0) {
406		/*
407		 * Warn user that a firmware update will be attempted in init.
408		 */
409		device_printf(dev, "firmware needs to be updated to version %d.%d\n",
410		    CHELSIO_FW_MAJOR, CHELSIO_FW_MINOR);
411		sc->flags &= ~FW_UPTODATE;
412	} else {
413		sc->flags |= FW_UPTODATE;
414	}
415
416	if (t3_init_hw(sc, 0) != 0) {
417		device_printf(dev, "hw initialization failed\n");
418		error = ENXIO;
419		goto out;
420	}
421	t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
422
423	/*
424	 * Create a child device for each MAC.  The ethernet attachment
425	 * will be done in these children.
426	 */
427	for (i = 0; i < (sc)->params.nports; ++i) {
428		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
429			device_printf(dev, "failed to add child port\n");
430			error = EINVAL;
431			goto out;
432		}
433		sc->portdev[i] = child;
434		sc->port[i].adapter = sc;
435#ifdef MULTIQ
436		sc->port[i].nqsets = mp_ncpus;
437#else
438		sc->port[i].nqsets = 1;
439#endif
440		sc->port[i].first_qset = i;
441		sc->port[i].port = i;
442		device_set_softc(child, &sc->port[i]);
443	}
444	if ((error = bus_generic_attach(dev)) != 0)
445		goto out;;
446
447	if ((error = setup_sge_qsets(sc)) != 0)
448		goto out;
449
450	setup_rss(sc);
451
452	/* If it's MSI or INTx, allocate a single interrupt for everything */
453	if ((sc->flags & USING_MSIX) == 0) {
454		if ((sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
455		   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
456			device_printf(dev, "Cannot allocate interrupt rid=%d\n", sc->irq_rid);
457			error = EINVAL;
458			goto out;
459		}
460		device_printf(dev, "allocated irq_res=%p\n", sc->irq_res);
461
462		if (bus_setup_intr(dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
463#ifdef INTR_FILTERS
464			NULL,
465#endif
466			cxgb_intr, sc, &sc->intr_tag)) {
467			device_printf(dev, "Cannot set up interrupt\n");
468			error = EINVAL;
469			goto out;
470		}
471	} else {
472		cxgb_setup_msix(sc, msi_count);
473	}
474
475	sc->params.stats_update_period = 1;
476
477	/* initialize sge private state */
478	t3_sge_init_sw(sc);
479
480	t3_led_ready(sc);
481
482	error = t3_get_fw_version(sc, &vers);
483	if (error)
484		goto out;
485
486	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d", G_FW_VERSION_MAJOR(vers),
487	    G_FW_VERSION_MINOR(vers));
488
489	t3_add_sysctls(sc);
490
491out:
492	if (error)
493		cxgb_free(sc);
494
495	return (error);
496}
497
498static int
499cxgb_controller_detach(device_t dev)
500{
501	struct adapter *sc;
502
503	sc = device_get_softc(dev);
504
505	cxgb_free(sc);
506
507	return (0);
508}
509
510static void
511cxgb_free(struct adapter *sc)
512{
513	int i;
514
515	for (i = 0; i < (sc)->params.nports; ++i) {
516		if (sc->portdev[i] != NULL)
517			device_delete_child(sc->dev, sc->portdev[i]);
518	}
519
520	t3_sge_deinit_sw(sc);
521
522	if (sc->tq != NULL) {
523		taskqueue_drain(sc->tq, &sc->ext_intr_task);
524		taskqueue_free(sc->tq);
525	}
526
527	callout_drain(&sc->cxgb_tick_ch);
528
529	bus_generic_detach(sc->dev);
530
531	t3_free_sge_resources(sc);
532	t3_sge_free(sc);
533
534	for (i = 0; i < SGE_QSETS; i++) {
535		if (sc->msix_intr_tag[i] != NULL) {
536			bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
537			    sc->msix_intr_tag[i]);
538		}
539		if (sc->msix_irq_res[i] != NULL) {
540			bus_release_resource(sc->dev, SYS_RES_IRQ,
541			    sc->msix_irq_rid[i], sc->msix_irq_res[i]);
542		}
543	}
544
545	if (sc->intr_tag != NULL) {
546		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
547	}
548
549	if (sc->irq_res != NULL) {
550		device_printf(sc->dev, "de-allocating interrupt irq_rid=%d irq_res=%p\n",
551		    sc->irq_rid, sc->irq_res);
552		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
553		    sc->irq_res);
554	}
555#ifdef MSI_SUPPORTED
556	if (sc->flags & (USING_MSI | USING_MSIX)) {
557		device_printf(sc->dev, "releasing msi message(s)\n");
558		pci_release_msi(sc->dev);
559	}
560#endif
561	if (sc->msix_regs_res != NULL) {
562		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
563		    sc->msix_regs_res);
564	}
565
566	if (sc->regs_res != NULL)
567		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
568		    sc->regs_res);
569
570	mtx_destroy(&sc->mdio_lock);
571	mtx_destroy(&sc->sge.reg_lock);
572	mtx_destroy(&sc->lock);
573
574	return;
575}
576
577/**
578 *	setup_sge_qsets - configure SGE Tx/Rx/response queues
579 *	@sc: the controller softc
580 *
581 *	Determines how many sets of SGE queues to use and initializes them.
582 *	We support multiple queue sets per port if we have MSI-X, otherwise
583 *	just one queue set per port.
584 */
585static int
586setup_sge_qsets(adapter_t *sc)
587{
588	int i, j, err, irq_idx, qset_idx;
589	u_int ntxq = 3;
590
591	if ((err = t3_sge_alloc(sc)) != 0) {
592		printf("t3_sge_alloc returned %d\n", err);
593		return (err);
594	}
595
596	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
597		irq_idx = -1;
598	else
599		irq_idx = 0;
600
601	for (qset_idx = 0, i = 0; i < (sc)->params.nports; ++i) {
602		struct port_info *pi = &sc->port[i];
603
604		for (j = 0; j < pi->nqsets; ++j, ++qset_idx) {
605			err = t3_sge_alloc_qset(sc, qset_idx, 1,
606			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
607			    &sc->params.sge.qset[qset_idx], ntxq, pi);
608			if (err) {
609				t3_free_sge_resources(sc);
610				printf("t3_sge_alloc_qset failed with %d\n", err);
611				return (err);
612			}
613		}
614	}
615
616	return (0);
617}
618
619static int
620cxgb_setup_msix(adapter_t *sc, int msix_count)
621{
622	int i, j, k, nqsets, rid;
623
624	/* The first message indicates link changes and error conditions */
625	sc->irq_rid = 1;
626	if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
627	   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
628		device_printf(sc->dev, "Cannot allocate msix interrupt\n");
629		return (EINVAL);
630	}
631	if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
632#ifdef INTR_FILTERS
633			NULL,
634#endif
635		cxgb_async_intr, sc, &sc->intr_tag)) {
636		device_printf(sc->dev, "Cannot set up interrupt\n");
637		return (EINVAL);
638	}
639
640	for (i = 0, k = 0; i < (sc)->params.nports; ++i) {
641		nqsets = sc->port[i].nqsets;
642		for (j = 0; j < nqsets; ++j, k++) {
643			struct sge_qset *qs = &sc->sge.qs[k];
644
645			rid = k + 2;
646			if (cxgb_debug)
647				printf("rid=%d ", rid);
648			if ((sc->msix_irq_res[k] = bus_alloc_resource_any(
649			    sc->dev, SYS_RES_IRQ, &rid,
650			    RF_SHAREABLE | RF_ACTIVE)) == NULL) {
651				device_printf(sc->dev, "Cannot allocate "
652				    "interrupt for message %d\n", rid);
653				return (EINVAL);
654			}
655			sc->msix_irq_rid[k] = rid;
656			if (bus_setup_intr(sc->dev, sc->msix_irq_res[j],
657			    INTR_MPSAFE|INTR_TYPE_NET,
658#ifdef INTR_FILTERS
659			NULL,
660#endif
661				t3_intr_msix, qs, &sc->msix_intr_tag[k])) {
662				device_printf(sc->dev, "Cannot set up "
663				    "interrupt for message %d\n", rid);
664				return (EINVAL);
665			}
666		}
667	}
668	return (0);
669}
670
671static int
672cxgb_port_probe(device_t dev)
673{
674	struct port_info *p;
675	char buf[80];
676
677	p = device_get_softc(dev);
678
679	snprintf(buf, sizeof(buf), "Port %d %s", p->port, p->port_type->desc);
680	device_set_desc_copy(dev, buf);
681	return (0);
682}
683
684
685static int
686cxgb_makedev(struct port_info *pi)
687{
688	struct cdevsw *cxgb_cdevsw;
689
690	if ((cxgb_cdevsw = malloc(sizeof(struct cdevsw), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
691		return (ENOMEM);
692
693	cxgb_cdevsw->d_version = D_VERSION;
694	cxgb_cdevsw->d_name = strdup(pi->ifp->if_xname, M_DEVBUF);
695	cxgb_cdevsw->d_ioctl = cxgb_extension_ioctl;
696
697	pi->port_cdev = make_dev(cxgb_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
698	    pi->ifp->if_xname);
699
700	if (pi->port_cdev == NULL)
701		return (ENOMEM);
702
703	pi->port_cdev->si_drv1 = (void *)pi;
704
705	return (0);
706}
707
708
709#ifdef TSO_SUPPORTED
710#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU)
711/* Don't enable TSO6 yet */
712#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU)
713#else
714#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_JUMBO_MTU)
715/* Don't enable TSO6 yet */
716#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM |  IFCAP_JUMBO_MTU)
717#define IFCAP_TSO4 0x0
718#define CSUM_TSO   0x0
719#endif
720
721
722static int
723cxgb_port_attach(device_t dev)
724{
725	struct port_info *p;
726	struct ifnet *ifp;
727	int media_flags;
728	int err;
729	char buf[64];
730
731	p = device_get_softc(dev);
732
733	snprintf(buf, sizeof(buf), "cxgb port %d", p->port);
734	mtx_init(&p->lock, buf, 0, MTX_DEF);
735
736	/* Allocate an ifnet object and set it up */
737	ifp = p->ifp = if_alloc(IFT_ETHER);
738	if (ifp == NULL) {
739		device_printf(dev, "Cannot allocate ifnet\n");
740		return (ENOMEM);
741	}
742
743	/*
744	 * Note that there is currently no watchdog timer.
745	 */
746	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
747	ifp->if_init = cxgb_init;
748	ifp->if_softc = p;
749	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
750	ifp->if_ioctl = cxgb_ioctl;
751	ifp->if_start = cxgb_start;
752	ifp->if_timer = 0;	/* Disable ifnet watchdog */
753	ifp->if_watchdog = NULL;
754
755	ifp->if_snd.ifq_drv_maxlen = TX_ETH_Q_SIZE;
756	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
757	IFQ_SET_READY(&ifp->if_snd);
758
759	ifp->if_hwassist = ifp->if_capabilities = ifp->if_capenable = 0;
760	ifp->if_capabilities |= CXGB_CAP;
761	ifp->if_capenable |= CXGB_CAP_ENABLE;
762	ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO);
763	ifp->if_baudrate = 100000000;
764
765	ether_ifattach(ifp, p->hw_addr);
766#ifdef DEFAULT_JUMBO
767	ifp->if_mtu = 9000;
768#endif
769	if ((err = cxgb_makedev(p)) != 0) {
770		printf("makedev failed %d\n", err);
771		return (err);
772	}
773	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
774	    cxgb_media_status);
775
776	if (!strcmp(p->port_type->desc, "10GBASE-CX4"))
777	        media_flags = IFM_ETHER | IFM_10G_CX4;
778	else if (!strcmp(p->port_type->desc, "10GBASE-SR"))
779	        media_flags = IFM_ETHER | IFM_10G_SR;
780	else if (!strcmp(p->port_type->desc, "10GBASE-XR"))
781	        media_flags = IFM_ETHER | IFM_10G_LR;
782	else {
783	        printf("unsupported media type %s\n", p->port_type->desc);
784		return (ENXIO);
785	}
786
787	ifmedia_add(&p->media, media_flags, 0, NULL);
788	ifmedia_add(&p->media, IFM_ETHER | IFM_AUTO, 0, NULL);
789	ifmedia_set(&p->media, media_flags);
790
791	snprintf(buf, sizeof(buf), "cxgb_port_taskq%d", p->port);
792#ifdef TASKQUEUE_CURRENT
793	/* Create a port for handling TX without starvation */
794	p->tq = taskqueue_create(buf, M_NOWAIT,
795	    taskqueue_thread_enqueue, &p->tq);
796#else
797	/* Create a port for handling TX without starvation */
798	p->tq = taskqueue_create_fast(buf, M_NOWAIT,
799	    taskqueue_thread_enqueue, &p->tq);
800#endif
801
802
803	if (p->tq == NULL) {
804		device_printf(dev, "failed to allocate port task queue\n");
805		return (ENOMEM);
806	}
807	taskqueue_start_threads(&p->tq, 1, PI_NET, "%s taskq",
808	    device_get_nameunit(dev));
809	TASK_INIT(&p->start_task, 0, cxgb_start_proc, ifp);
810
811
812	return (0);
813}
814
815static int
816cxgb_port_detach(device_t dev)
817{
818	struct port_info *p;
819
820	p = device_get_softc(dev);
821	mtx_destroy(&p->lock);
822	if (p->tq != NULL) {
823		taskqueue_drain(p->tq, &p->start_task);
824		taskqueue_free(p->tq);
825		p->tq = NULL;
826	}
827
828	ether_ifdetach(p->ifp);
829	if_free(p->ifp);
830
831	destroy_dev(p->port_cdev);
832
833
834	return (0);
835}
836
837void
838t3_fatal_err(struct adapter *sc)
839{
840	u_int fw_status[4];
841
842	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
843	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
844		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
845		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
846}
847
848int
849t3_os_find_pci_capability(adapter_t *sc, int cap)
850{
851	device_t dev;
852	struct pci_devinfo *dinfo;
853	pcicfgregs *cfg;
854	uint32_t status;
855	uint8_t ptr;
856
857	dev = sc->dev;
858	dinfo = device_get_ivars(dev);
859	cfg = &dinfo->cfg;
860
861	status = pci_read_config(dev, PCIR_STATUS, 2);
862	if (!(status & PCIM_STATUS_CAPPRESENT))
863		return (0);
864
865	switch (cfg->hdrtype & PCIM_HDRTYPE) {
866	case 0:
867	case 1:
868		ptr = PCIR_CAP_PTR;
869		break;
870	case 2:
871		ptr = PCIR_CAP_PTR_2;
872		break;
873	default:
874		return (0);
875		break;
876	}
877	ptr = pci_read_config(dev, ptr, 1);
878
879	while (ptr != 0) {
880		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
881			return (ptr);
882		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
883	}
884
885	return (0);
886}
887
888int
889t3_os_pci_save_state(struct adapter *sc)
890{
891	device_t dev;
892	struct pci_devinfo *dinfo;
893
894	dev = sc->dev;
895	dinfo = device_get_ivars(dev);
896
897	pci_cfg_save(dev, dinfo, 0);
898	return (0);
899}
900
901int
902t3_os_pci_restore_state(struct adapter *sc)
903{
904	device_t dev;
905	struct pci_devinfo *dinfo;
906
907	dev = sc->dev;
908	dinfo = device_get_ivars(dev);
909
910	pci_cfg_restore(dev, dinfo);
911	return (0);
912}
913
914/**
915 *	t3_os_link_changed - handle link status changes
916 *	@adapter: the adapter associated with the link change
917 *	@port_id: the port index whose limk status has changed
918 *	@link_stat: the new status of the link
919 *	@speed: the new speed setting
920 *	@duplex: the new duplex setting
921 *	@fc: the new flow-control setting
922 *
923 *	This is the OS-dependent handler for link status changes.  The OS
924 *	neutral handler takes care of most of the processing for these events,
925 *	then calls this handler for any OS-specific processing.
926 */
927void
928t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
929     int duplex, int fc)
930{
931	struct port_info *pi = &adapter->port[port_id];
932
933	if ((pi->ifp->if_flags & IFF_UP) == 0)
934		return;
935
936	if (link_status)
937		if_link_state_change(pi->ifp, LINK_STATE_UP);
938	else
939		if_link_state_change(pi->ifp, LINK_STATE_DOWN);
940
941}
942
943
944/*
945 * Interrupt-context handler for external (PHY) interrupts.
946 */
947void
948t3_os_ext_intr_handler(adapter_t *sc)
949{
950	if (cxgb_debug)
951		printf("t3_os_ext_intr_handler\n");
952	/*
953	 * Schedule a task to handle external interrupts as they may be slow
954	 * and we use a mutex to protect MDIO registers.  We disable PHY
955	 * interrupts in the meantime and let the task reenable them when
956	 * it's done.
957	 */
958	if (sc->slow_intr_mask) {
959		sc->slow_intr_mask &= ~F_T3DBG;
960		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
961		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
962	}
963}
964
965void
966t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
967{
968
969	/*
970	 * The ifnet might not be allocated before this gets called,
971	 * as this is called early on in attach by t3_prep_adapter
972	 * save the address off in the port structure
973	 */
974	if (cxgb_debug)
975		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
976	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
977}
978
979/**
980 *	link_start - enable a port
981 *	@p: the port to enable
982 *
983 *	Performs the MAC and PHY actions needed to enable a port.
984 */
985static void
986cxgb_link_start(struct port_info *p)
987{
988	struct ifnet *ifp;
989	struct t3_rx_mode rm;
990	struct cmac *mac = &p->mac;
991
992	ifp = p->ifp;
993
994	t3_init_rx_mode(&rm, p);
995	t3_mac_reset(mac);
996	t3_mac_set_mtu(mac, ifp->if_mtu);
997	t3_mac_set_address(mac, 0, p->hw_addr);
998	t3_mac_set_rx_mode(mac, &rm);
999	t3_link_start(&p->phy, mac, &p->link_config);
1000	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1001}
1002
1003/**
1004 *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1005 *	@adap: the adapter
1006 *
1007 *	Sets up RSS to distribute packets to multiple receive queues.  We
1008 *	configure the RSS CPU lookup table to distribute to the number of HW
1009 *	receive queues, and the response queue lookup table to narrow that
1010 *	down to the response queues actually configured for each port.
1011 *	We always configure the RSS mapping for two ports since the mapping
1012 *	table has plenty of entries.
1013 */
1014static void
1015setup_rss(adapter_t *adap)
1016{
1017	int i;
1018	u_int nq0 = adap->port[0].nqsets;
1019	u_int nq1 = max((u_int)adap->port[1].nqsets, 1U);
1020	uint8_t cpus[SGE_QSETS + 1];
1021	uint16_t rspq_map[RSS_TABLE_SIZE];
1022
1023	for (i = 0; i < SGE_QSETS; ++i)
1024		cpus[i] = i;
1025	cpus[SGE_QSETS] = 0xff;
1026
1027	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1028		rspq_map[i] = i % nq0;
1029		rspq_map[i + RSS_TABLE_SIZE / 2] = (i % nq1) + nq0;
1030	}
1031
1032	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1033	    F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN |
1034	    V_RRCPLCPUSIZE(6), cpus, rspq_map);
1035}
1036
1037static void
1038send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1039			      int hi, int port)
1040{
1041	struct mbuf *m;
1042	struct mngt_pktsched_wr *req;
1043
1044	m = m_gethdr(M_NOWAIT, MT_DATA);
1045	req = (struct mngt_pktsched_wr *)m->m_data;
1046	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1047	req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1048	req->sched = sched;
1049	req->idx = qidx;
1050	req->min = lo;
1051	req->max = hi;
1052	req->binding = port;
1053	m->m_len = m->m_pkthdr.len = sizeof(*req);
1054	t3_mgmt_tx(adap, m);
1055}
1056
1057static void
1058bind_qsets(adapter_t *sc)
1059{
1060	int i, j;
1061
1062	for (i = 0; i < (sc)->params.nports; ++i) {
1063		const struct port_info *pi = adap2pinfo(sc, i);
1064
1065		for (j = 0; j < pi->nqsets; ++j)
1066			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1067					  -1, i);
1068	}
1069}
1070
1071static void
1072cxgb_init(void *arg)
1073{
1074	struct port_info *p = arg;
1075
1076	PORT_LOCK(p);
1077	cxgb_init_locked(p);
1078	PORT_UNLOCK(p);
1079}
1080
1081static void
1082cxgb_init_locked(struct port_info *p)
1083{
1084	struct ifnet *ifp;
1085	adapter_t *sc = p->adapter;
1086	int error;
1087
1088	mtx_assert(&p->lock, MA_OWNED);
1089
1090	ifp = p->ifp;
1091	if ((sc->flags & FW_UPTODATE) == 0) {
1092		device_printf(sc->dev, "updating firmware to version %d.%d\n",
1093		    CHELSIO_FW_MAJOR, CHELSIO_FW_MINOR);
1094		if ((error = cxgb_fw_download(sc, sc->dev)) != 0) {
1095			device_printf(sc->dev, "firmware download failed err: %d"
1096			    "interface will be unavailable\n", error);
1097			return;
1098		}
1099		sc->flags |= FW_UPTODATE;
1100	}
1101
1102	cxgb_link_start(p);
1103	ADAPTER_LOCK(p->adapter);
1104	if (p->adapter->open_device_map == 0)
1105		t3_intr_clear(sc);
1106	t3_sge_start(sc);
1107
1108	p->adapter->open_device_map |= (1 << p->port);
1109	ADAPTER_UNLOCK(p->adapter);
1110	t3_intr_enable(sc);
1111	t3_port_intr_enable(sc, p->port);
1112	if ((p->adapter->flags & (USING_MSIX | QUEUES_BOUND)) == USING_MSIX)
1113		bind_qsets(sc);
1114	p->adapter->flags |= QUEUES_BOUND;
1115	callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
1116	    cxgb_tick, sc);
1117
1118
1119	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1120	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1121}
1122
1123static void
1124cxgb_set_rxmode(struct port_info *p)
1125{
1126	struct t3_rx_mode rm;
1127	struct cmac *mac = &p->mac;
1128
1129	t3_init_rx_mode(&rm, p);
1130	t3_mac_set_rx_mode(mac, &rm);
1131}
1132
1133static void
1134cxgb_stop(struct port_info *p)
1135{
1136	struct ifnet *ifp;
1137
1138	callout_drain(&p->adapter->cxgb_tick_ch);
1139	ifp = p->ifp;
1140
1141	PORT_LOCK(p);
1142	ADAPTER_LOCK(p->adapter);
1143	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1144	p->adapter->open_device_map &= ~(1 << p->port);
1145	if (p->adapter->open_device_map == 0)
1146		t3_intr_disable(p->adapter);
1147	ADAPTER_UNLOCK(p->adapter);
1148	t3_port_intr_disable(p->adapter, p->port);
1149	t3_mac_disable(&p->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
1150	PORT_UNLOCK(p);
1151
1152}
1153
1154static int
1155cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1156{
1157	struct port_info *p = ifp->if_softc;
1158	struct ifaddr *ifa = (struct ifaddr *)data;
1159	struct ifreq *ifr = (struct ifreq *)data;
1160	int flags, error = 0;
1161	uint32_t mask;
1162
1163	switch (command) {
1164	case SIOCSIFMTU:
1165		if ((ifr->ifr_mtu < ETHERMIN) ||
1166		    (ifr->ifr_mtu > ETHER_MAX_LEN_JUMBO))
1167			error = EINVAL;
1168		else if (ifp->if_mtu != ifr->ifr_mtu) {
1169			PORT_LOCK(p);
1170			ifp->if_mtu = ifr->ifr_mtu;
1171			t3_mac_set_mtu(&p->mac, ifp->if_mtu);
1172			PORT_UNLOCK(p);
1173		}
1174		break;
1175	case SIOCSIFADDR:
1176	case SIOCGIFADDR:
1177		if (ifa->ifa_addr->sa_family == AF_INET) {
1178			ifp->if_flags |= IFF_UP;
1179			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1180				cxgb_init(p);
1181			}
1182			arp_ifinit(ifp, ifa);
1183		} else
1184			error = ether_ioctl(ifp, command, data);
1185		break;
1186	case SIOCSIFFLAGS:
1187		PORT_LOCK(p);
1188		if (ifp->if_flags & IFF_UP) {
1189			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1190				flags = p->if_flags;
1191				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1192				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI))
1193					cxgb_set_rxmode(p);
1194
1195			} else
1196				cxgb_init_locked(p);
1197		} else {
1198			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1199				cxgb_stop(p);
1200			}
1201		}
1202		p->if_flags = ifp->if_flags;
1203		PORT_UNLOCK(p);
1204		break;
1205	case SIOCSIFMEDIA:
1206	case SIOCGIFMEDIA:
1207		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
1208		break;
1209	case SIOCSIFCAP:
1210		PORT_LOCK(p);
1211		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1212		if (mask & IFCAP_TXCSUM) {
1213			if (IFCAP_TXCSUM & ifp->if_capenable) {
1214				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
1215				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
1216				    | CSUM_TSO);
1217			} else {
1218				ifp->if_capenable |= IFCAP_TXCSUM;
1219				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1220			}
1221		} else if (mask & IFCAP_RXCSUM) {
1222			if (IFCAP_RXCSUM & ifp->if_capenable) {
1223				ifp->if_capenable &= ~IFCAP_RXCSUM;
1224			} else {
1225				ifp->if_capenable |= IFCAP_RXCSUM;
1226			}
1227		}
1228		if (mask & IFCAP_TSO4) {
1229			if (IFCAP_TSO4 & ifp->if_capenable) {
1230				ifp->if_capenable &= ~IFCAP_TSO4;
1231				ifp->if_hwassist &= ~CSUM_TSO;
1232			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
1233				ifp->if_capenable |= IFCAP_TSO4;
1234				ifp->if_hwassist |= CSUM_TSO;
1235			} else {
1236				if (cxgb_debug)
1237					printf("cxgb requires tx checksum offload"
1238					    " be enabled to use TSO\n");
1239				error = EINVAL;
1240			}
1241		}
1242		PORT_UNLOCK(p);
1243		break;
1244	default:
1245		error = ether_ioctl(ifp, command, data);
1246		break;
1247	}
1248
1249	return (error);
1250}
1251
1252static int
1253cxgb_start_tx(struct ifnet *ifp, uint32_t txmax)
1254{
1255	struct sge_qset *qs;
1256	struct sge_txq *txq;
1257	struct port_info *p = ifp->if_softc;
1258	struct mbuf *m = NULL;
1259	int err, in_use_init;
1260
1261
1262	if (!p->link_config.link_ok)
1263		return (ENXIO);
1264
1265	if (IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1266		return (ENOBUFS);
1267
1268	qs = &p->adapter->sge.qs[p->first_qset];
1269	txq = &qs->txq[TXQ_ETH];
1270	err = 0;
1271
1272	mtx_lock(&txq->lock);
1273	in_use_init = txq->in_use;
1274	while ((txq->in_use - in_use_init < txmax) &&
1275	    (txq->size > txq->in_use + TX_MAX_DESC)) {
1276		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
1277		if (m == NULL)
1278			break;
1279		if ((err = t3_encap(p, &m)) != 0)
1280			break;
1281		BPF_MTAP(ifp, m);
1282	}
1283	mtx_unlock(&txq->lock);
1284
1285	if (__predict_false(err)) {
1286		if (cxgb_debug)
1287			printf("would set OFLAGS\n");
1288		if (err == ENOMEM) {
1289			IFQ_LOCK(&ifp->if_snd);
1290			IFQ_DRV_PREPEND(&ifp->if_snd, m);
1291			IFQ_UNLOCK(&ifp->if_snd);
1292		}
1293	}
1294	if (err == 0 && m == NULL)
1295		err = ENOBUFS;
1296
1297	return (err);
1298}
1299
1300static void
1301cxgb_start_proc(void *arg, int ncount)
1302{
1303	struct ifnet *ifp = arg;
1304	struct port_info *pi = ifp->if_softc;
1305	struct sge_qset *qs;
1306	struct sge_txq *txq;
1307	int error = 0;
1308
1309	qs = &pi->adapter->sge.qs[pi->first_qset];
1310	txq = &qs->txq[TXQ_ETH];
1311
1312	while (error == 0) {
1313		if (desc_reclaimable(txq) > TX_START_MAX_DESC)
1314			taskqueue_enqueue(pi->adapter->tq, &pi->adapter->timer_reclaim_task);
1315
1316		error = cxgb_start_tx(ifp, TX_MAX_DESC + 1);
1317	}
1318}
1319
1320static void
1321cxgb_start(struct ifnet *ifp)
1322{
1323	struct port_info *pi = ifp->if_softc;
1324	struct sge_qset *qs;
1325	struct sge_txq *txq;
1326	int err;
1327
1328	qs = &pi->adapter->sge.qs[pi->first_qset];
1329	txq = &qs->txq[TXQ_ETH];
1330
1331	if (desc_reclaimable(txq) > TX_START_MAX_DESC)
1332		taskqueue_enqueue(pi->adapter->tq, &pi->adapter->timer_reclaim_task);
1333
1334	err = cxgb_start_tx(ifp, TX_START_MAX_DESC);
1335
1336	if (err == 0)
1337		taskqueue_enqueue(pi->tq, &pi->start_task);
1338}
1339
1340
1341static int
1342cxgb_media_change(struct ifnet *ifp)
1343{
1344	if_printf(ifp, "media change not supported\n");
1345	return (ENXIO);
1346}
1347
1348static void
1349cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1350{
1351	struct port_info *p = ifp->if_softc;
1352
1353	ifmr->ifm_status = IFM_AVALID;
1354	ifmr->ifm_active = IFM_ETHER;
1355
1356	if (!p->link_config.link_ok)
1357		return;
1358
1359	ifmr->ifm_status |= IFM_ACTIVE;
1360
1361	if (p->link_config.duplex)
1362		ifmr->ifm_active |= IFM_FDX;
1363	else
1364		ifmr->ifm_active |= IFM_HDX;
1365}
1366
1367static void
1368cxgb_async_intr(void *data)
1369{
1370	if (cxgb_debug)
1371		printf("cxgb_async_intr\n");
1372}
1373
1374static void
1375cxgb_ext_intr_handler(void *arg, int count)
1376{
1377	adapter_t *sc = (adapter_t *)arg;
1378
1379	if (cxgb_debug)
1380		printf("cxgb_ext_intr_handler\n");
1381
1382	t3_phy_intr_handler(sc);
1383
1384	/* Now reenable external interrupts */
1385	if (sc->slow_intr_mask) {
1386		sc->slow_intr_mask |= F_T3DBG;
1387		t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
1388		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1389	}
1390}
1391
1392static void
1393cxgb_tick(void *arg)
1394{
1395	adapter_t *sc = (adapter_t *)arg;
1396	const struct adapter_params *p = &sc->params;
1397
1398	if (p->linkpoll_period)
1399		check_link_status(sc);
1400
1401	callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
1402	    cxgb_tick, sc);
1403}
1404
1405static void
1406check_link_status(adapter_t *sc)
1407{
1408	int i;
1409
1410	for (i = 0; i < (sc)->params.nports; ++i) {
1411		struct port_info *p = &sc->port[i];
1412
1413		if (!(p->port_type->caps & SUPPORTED_IRQ))
1414			t3_link_changed(sc, i);
1415	}
1416}
1417
1418static int
1419in_range(int val, int lo, int hi)
1420{
1421	return val < 0 || (val <= hi && val >= lo);
1422}
1423
1424static int
1425cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
1426    int fflag, struct thread *td)
1427{
1428	int mmd, error = 0;
1429	struct port_info *pi = dev->si_drv1;
1430	adapter_t *sc = pi->adapter;
1431
1432#ifdef PRIV_SUPPORTED
1433	if (priv_check(td, PRIV_DRIVER)) {
1434		if (cxgb_debug)
1435			printf("user does not have access to privileged ioctls\n");
1436		return (EPERM);
1437	}
1438#else
1439	if (suser(td)) {
1440		if (cxgb_debug)
1441			printf("user does not have access to privileged ioctls\n");
1442		return (EPERM);
1443	}
1444#endif
1445
1446	switch (cmd) {
1447	case SIOCGMIIREG: {
1448		uint32_t val;
1449		struct cphy *phy = &pi->phy;
1450		struct mii_data *mid = (struct mii_data *)data;
1451
1452		if (!phy->mdio_read)
1453			return (EOPNOTSUPP);
1454		if (is_10G(sc)) {
1455			mmd = mid->phy_id >> 8;
1456			if (!mmd)
1457				mmd = MDIO_DEV_PCS;
1458			else if (mmd > MDIO_DEV_XGXS)
1459				return -EINVAL;
1460
1461			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
1462					     mid->reg_num, &val);
1463		} else
1464		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
1465					     mid->reg_num & 0x1f, &val);
1466		if (error == 0)
1467			mid->val_out = val;
1468		break;
1469	}
1470	case SIOCSMIIREG: {
1471		struct cphy *phy = &pi->phy;
1472		struct mii_data *mid = (struct mii_data *)data;
1473
1474		if (!phy->mdio_write)
1475			return (EOPNOTSUPP);
1476		if (is_10G(sc)) {
1477			mmd = mid->phy_id >> 8;
1478			if (!mmd)
1479				mmd = MDIO_DEV_PCS;
1480			else if (mmd > MDIO_DEV_XGXS)
1481				return (EINVAL);
1482
1483			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
1484					      mmd, mid->reg_num, mid->val_in);
1485		} else
1486			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
1487					      mid->reg_num & 0x1f,
1488					      mid->val_in);
1489		break;
1490	}
1491	case CHELSIO_SETREG: {
1492		struct ch_reg *edata = (struct ch_reg *)data;
1493		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
1494			return (EFAULT);
1495		t3_write_reg(sc, edata->addr, edata->val);
1496		break;
1497	}
1498	case CHELSIO_GETREG: {
1499		struct ch_reg *edata = (struct ch_reg *)data;
1500		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
1501			return (EFAULT);
1502		edata->val = t3_read_reg(sc, edata->addr);
1503		break;
1504	}
1505	case CHELSIO_GET_SGE_CONTEXT: {
1506		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
1507		mtx_lock(&sc->sge.reg_lock);
1508		switch (ecntxt->cntxt_type) {
1509		case CNTXT_TYPE_EGRESS:
1510			error = t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
1511			    ecntxt->data);
1512			break;
1513		case CNTXT_TYPE_FL:
1514			error = t3_sge_read_fl(sc, ecntxt->cntxt_id,
1515			    ecntxt->data);
1516			break;
1517		case CNTXT_TYPE_RSP:
1518			error = t3_sge_read_rspq(sc, ecntxt->cntxt_id,
1519			    ecntxt->data);
1520			break;
1521		case CNTXT_TYPE_CQ:
1522			error = t3_sge_read_cq(sc, ecntxt->cntxt_id,
1523			    ecntxt->data);
1524			break;
1525		default:
1526			error = EINVAL;
1527			break;
1528		}
1529		mtx_unlock(&sc->sge.reg_lock);
1530		break;
1531	}
1532	case CHELSIO_GET_SGE_DESC: {
1533		struct ch_desc *edesc = (struct ch_desc *)data;
1534		int ret;
1535		if (edesc->queue_num >= SGE_QSETS * 6)
1536			return (EINVAL);
1537		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
1538		    edesc->queue_num % 6, edesc->idx, edesc->data);
1539		if (ret < 0)
1540			return (EINVAL);
1541		edesc->size = ret;
1542		break;
1543	}
1544	case CHELSIO_SET_QSET_PARAMS: {
1545		struct qset_params *q;
1546		struct ch_qset_params *t = (struct ch_qset_params *)data;
1547
1548		if (t->qset_idx >= SGE_QSETS)
1549			return -EINVAL;
1550		if (!in_range(t->intr_lat, 0, M_NEWTIMER) ||
1551		    !in_range(t->cong_thres, 0, 255) ||
1552		    !in_range(t->txq_size[0], MIN_TXQ_ENTRIES,
1553			      MAX_TXQ_ENTRIES) ||
1554		    !in_range(t->txq_size[1], MIN_TXQ_ENTRIES,
1555			      MAX_TXQ_ENTRIES) ||
1556		    !in_range(t->txq_size[2], MIN_CTRL_TXQ_ENTRIES,
1557			      MAX_CTRL_TXQ_ENTRIES) ||
1558		    !in_range(t->fl_size[0], MIN_FL_ENTRIES, MAX_RX_BUFFERS) ||
1559		    !in_range(t->fl_size[1], MIN_FL_ENTRIES,
1560			      MAX_RX_JUMBO_BUFFERS) ||
1561		    !in_range(t->rspq_size, MIN_RSPQ_ENTRIES, MAX_RSPQ_ENTRIES))
1562		       return -EINVAL;
1563		if ((sc->flags & FULL_INIT_DONE) &&
1564		    (t->rspq_size >= 0 || t->fl_size[0] >= 0 ||
1565		     t->fl_size[1] >= 0 || t->txq_size[0] >= 0 ||
1566		     t->txq_size[1] >= 0 || t->txq_size[2] >= 0 ||
1567		     t->polling >= 0 || t->cong_thres >= 0))
1568			return -EBUSY;
1569
1570		q = &sc->params.sge.qset[t->qset_idx];
1571
1572		if (t->rspq_size >= 0)
1573			q->rspq_size = t->rspq_size;
1574		if (t->fl_size[0] >= 0)
1575			q->fl_size = t->fl_size[0];
1576		if (t->fl_size[1] >= 0)
1577			q->jumbo_size = t->fl_size[1];
1578		if (t->txq_size[0] >= 0)
1579			q->txq_size[0] = t->txq_size[0];
1580		if (t->txq_size[1] >= 0)
1581			q->txq_size[1] = t->txq_size[1];
1582		if (t->txq_size[2] >= 0)
1583			q->txq_size[2] = t->txq_size[2];
1584		if (t->cong_thres >= 0)
1585			q->cong_thres = t->cong_thres;
1586		if (t->intr_lat >= 0) {
1587			struct sge_qset *qs = &sc->sge.qs[t->qset_idx];
1588
1589			q->coalesce_nsecs = t->intr_lat*1000;
1590			t3_update_qset_coalesce(qs, q);
1591		}
1592		break;
1593	}
1594	case CHELSIO_GET_QSET_PARAMS: {
1595		struct qset_params *q;
1596		struct ch_qset_params *t = (struct ch_qset_params *)data;
1597
1598		if (t->qset_idx >= SGE_QSETS)
1599			return (EINVAL);
1600
1601		q = &(sc)->params.sge.qset[t->qset_idx];
1602		t->rspq_size   = q->rspq_size;
1603		t->txq_size[0] = q->txq_size[0];
1604		t->txq_size[1] = q->txq_size[1];
1605		t->txq_size[2] = q->txq_size[2];
1606		t->fl_size[0]  = q->fl_size;
1607		t->fl_size[1]  = q->jumbo_size;
1608		t->polling     = q->polling;
1609		t->intr_lat    = q->coalesce_nsecs / 1000;
1610		t->cong_thres  = q->cong_thres;
1611		break;
1612	}
1613	case CHELSIO_SET_QSET_NUM: {
1614		struct ch_reg *edata = (struct ch_reg *)data;
1615		unsigned int port_idx = pi->port;
1616
1617		if (sc->flags & FULL_INIT_DONE)
1618			return (EBUSY);
1619		if (edata->val < 1 ||
1620		    (edata->val > 1 && !(sc->flags & USING_MSIX)))
1621			return (EINVAL);
1622		if (edata->val + sc->port[!port_idx].nqsets > SGE_QSETS)
1623			return (EINVAL);
1624		sc->port[port_idx].nqsets = edata->val;
1625		/*
1626		 * XXX we're hardcoding ourselves to 2 ports
1627		 * just like the LEENUX
1628		 */
1629		sc->port[1].first_qset = sc->port[0].nqsets;
1630		break;
1631	}
1632	case CHELSIO_GET_QSET_NUM: {
1633		struct ch_reg *edata = (struct ch_reg *)data;
1634		edata->val = pi->nqsets;
1635		break;
1636	}
1637#ifdef notyet
1638		/*
1639		 * XXX FreeBSD driver does not currently support any
1640		 * offload functionality
1641		 */
1642	case CHELSIO_LOAD_FW:
1643	case CHELSIO_DEVUP:
1644	case CHELSIO_SETMTUTAB:
1645	case CHELSIO_GET_PM:
1646	case CHELSIO_SET_PM:
1647	case CHELSIO_READ_TCAM_WORD:
1648		return (EOPNOTSUPP);
1649		break;
1650#endif
1651	case CHELSIO_GET_MEM: {
1652		struct ch_mem_range *t = (struct ch_mem_range *)data;
1653		struct mc7 *mem;
1654		uint8_t *useraddr;
1655		u64 buf[32];
1656
1657		if (!is_offload(sc))
1658			return (EOPNOTSUPP);
1659		if (!(sc->flags & FULL_INIT_DONE))
1660			return (EIO);         /* need the memory controllers */
1661		if ((t->addr & 0x7) || (t->len & 0x7))
1662			return (EINVAL);
1663		if (t->mem_id == MEM_CM)
1664			mem = &sc->cm;
1665		else if (t->mem_id == MEM_PMRX)
1666			mem = &sc->pmrx;
1667		else if (t->mem_id == MEM_PMTX)
1668			mem = &sc->pmtx;
1669		else
1670			return (EINVAL);
1671
1672		/*
1673		 * Version scheme:
1674		 * bits 0..9: chip version
1675		 * bits 10..15: chip revision
1676		 */
1677		t->version = 3 | (sc->params.rev << 10);
1678
1679		/*
1680		 * Read 256 bytes at a time as len can be large and we don't
1681		 * want to use huge intermediate buffers.
1682		 */
1683		useraddr = (uint8_t *)(t + 1);   /* advance to start of buffer */
1684		while (t->len) {
1685			unsigned int chunk = min(t->len, sizeof(buf));
1686
1687			error = t3_mc7_bd_read(mem, t->addr / 8, chunk / 8, buf);
1688			if (error)
1689				return (-error);
1690			if (copyout(buf, useraddr, chunk))
1691				return (EFAULT);
1692			useraddr += chunk;
1693			t->addr += chunk;
1694			t->len -= chunk;
1695		}
1696		break;
1697	}
1698	case CHELSIO_SET_TRACE_FILTER: {
1699		struct ch_trace *t = (struct ch_trace *)data;
1700		const struct trace_params *tp;
1701
1702		tp = (const struct trace_params *)&t->sip;
1703		if (t->config_tx)
1704			t3_config_trace_filter(sc, tp, 0, t->invert_match,
1705					       t->trace_tx);
1706		if (t->config_rx)
1707			t3_config_trace_filter(sc, tp, 1, t->invert_match,
1708					       t->trace_rx);
1709		break;
1710	}
1711	case CHELSIO_SET_PKTSCHED: {
1712		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
1713		if (sc->open_device_map == 0)
1714			return (EAGAIN);
1715		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
1716		    p->binding);
1717		break;
1718	}
1719	case CHELSIO_IFCONF_GETREGS: {
1720		struct ifconf_regs *regs = (struct ifconf_regs *)data;
1721		int reglen = cxgb_get_regs_len();
1722		uint8_t *buf = malloc(REGDUMP_SIZE, M_DEVBUF, M_NOWAIT);
1723		if (buf == NULL) {
1724			return (ENOMEM);
1725		} if (regs->len > reglen)
1726			regs->len = reglen;
1727		else if (regs->len < reglen) {
1728			error = E2BIG;
1729			goto done;
1730		}
1731		cxgb_get_regs(sc, regs, buf);
1732		error = copyout(buf, regs->data, reglen);
1733
1734		done:
1735		free(buf, M_DEVBUF);
1736
1737		break;
1738	}
1739	default:
1740		return (EOPNOTSUPP);
1741		break;
1742	}
1743
1744	return (error);
1745}
1746
1747static __inline void
1748reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
1749    unsigned int end)
1750{
1751	uint32_t *p = (uint32_t *)buf + start;
1752
1753	for ( ; start <= end; start += sizeof(uint32_t))
1754		*p++ = t3_read_reg(ap, start);
1755}
1756
1757#define T3_REGMAP_SIZE (3 * 1024)
1758static int
1759cxgb_get_regs_len(void)
1760{
1761	return T3_REGMAP_SIZE;
1762}
1763#undef T3_REGMAP_SIZE
1764
1765static void
1766cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf)
1767{
1768
1769	/*
1770	 * Version scheme:
1771	 * bits 0..9: chip version
1772	 * bits 10..15: chip revision
1773	 * bit 31: set for PCIe cards
1774	 */
1775	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
1776
1777	/*
1778	 * We skip the MAC statistics registers because they are clear-on-read.
1779	 * Also reading multi-register stats would need to synchronize with the
1780	 * periodic mac stats accumulation.  Hard to justify the complexity.
1781	 */
1782	memset(buf, 0, REGDUMP_SIZE);
1783	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
1784	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
1785	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
1786	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
1787	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
1788	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
1789		       XGM_REG(A_XGM_SERDES_STAT3, 1));
1790	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
1791		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
1792}
1793