cxgb_main.c revision 167525
1/**************************************************************************
2
3Copyright (c) 2007, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13    notice, this list of conditions and the following disclaimer in the
14    documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Chelsio Corporation nor the names of its
17    contributors may be used to endorse or promote products derived from
18    this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30POSSIBILITY OF SUCH DAMAGE.
31
32***************************************************************************/
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_main.c 167525 2007-03-14 06:34:10Z kmacy $");
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/kernel.h>
40#include <sys/bus.h>
41#include <sys/module.h>
42#include <sys/pciio.h>
43#include <sys/conf.h>
44#include <machine/bus.h>
45#include <machine/resource.h>
46#include <sys/bus_dma.h>
47#include <sys/rman.h>
48#include <sys/ioccom.h>
49#include <sys/mbuf.h>
50#include <sys/linker.h>
51#include <sys/firmware.h>
52#include <sys/socket.h>
53#include <sys/sockio.h>
54#include <sys/smp.h>
55#include <sys/sysctl.h>
56#include <sys/queue.h>
57#include <sys/taskqueue.h>
58
59
60
61#include <net/bpf.h>
62#include <net/ethernet.h>
63#include <net/if.h>
64#include <net/if_arp.h>
65#include <net/if_dl.h>
66#include <net/if_media.h>
67#include <net/if_types.h>
68
69#include <netinet/in_systm.h>
70#include <netinet/in.h>
71#include <netinet/if_ether.h>
72#include <netinet/ip.h>
73#include <netinet/ip.h>
74#include <netinet/tcp.h>
75#include <netinet/udp.h>
76
77#include <dev/pci/pcireg.h>
78#include <dev/pci/pcivar.h>
79#include <dev/pci/pci_private.h>
80
81#include <dev/cxgb/cxgb_osdep.h>
82#include <dev/cxgb/common/cxgb_common.h>
83#include <dev/cxgb/cxgb_ioctl.h>
84#include <dev/cxgb/common/cxgb_regs.h>
85#include <dev/cxgb/common/cxgb_t3_cpl.h>
86#include <dev/cxgb/common/cxgb_firmware_exports.h>
87
88
89#ifdef PRIV_SUPPORTED
90#include <sys/priv.h>
91#endif
92
93static int cxgb_setup_msix(adapter_t *, int);
94static void cxgb_init(void *);
95static void cxgb_init_locked(struct port_info *);
96static void cxgb_stop(struct port_info *);
97static void cxgb_set_rxmode(struct port_info *);
98static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
99static void cxgb_start(struct ifnet *);
100static void cxgb_start_proc(void *, int ncount);
101static int cxgb_media_change(struct ifnet *);
102static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
103static int setup_sge_qsets(adapter_t *);
104static void cxgb_async_intr(void *);
105static void cxgb_ext_intr_handler(void *, int);
106static void cxgb_tick(void *);
107static void check_link_status(adapter_t *sc);
108static void setup_rss(adapter_t *sc);
109
110/* Attachment glue for the PCI controller end of the device.  Each port of
111 * the device is attached separately, as defined later.
112 */
113static int cxgb_controller_probe(device_t);
114static int cxgb_controller_attach(device_t);
115static int cxgb_controller_detach(device_t);
116static void cxgb_free(struct adapter *);
117static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
118    unsigned int end);
119static void cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf);
120static int cxgb_get_regs_len(void);
121
122static device_method_t cxgb_controller_methods[] = {
123	DEVMETHOD(device_probe,		cxgb_controller_probe),
124	DEVMETHOD(device_attach,	cxgb_controller_attach),
125	DEVMETHOD(device_detach,	cxgb_controller_detach),
126
127	/* bus interface */
128	DEVMETHOD(bus_print_child,	bus_generic_print_child),
129	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
130
131	{ 0, 0 }
132};
133
134static driver_t cxgb_controller_driver = {
135	"cxgbc",
136	cxgb_controller_methods,
137	sizeof(struct adapter)
138};
139
140static devclass_t	cxgb_controller_devclass;
141DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
142
143/*
144 * Attachment glue for the ports.  Attachment is done directly to the
145 * controller device.
146 */
147static int cxgb_port_probe(device_t);
148static int cxgb_port_attach(device_t);
149static int cxgb_port_detach(device_t);
150
151static device_method_t cxgb_port_methods[] = {
152	DEVMETHOD(device_probe,		cxgb_port_probe),
153	DEVMETHOD(device_attach,	cxgb_port_attach),
154	DEVMETHOD(device_detach,	cxgb_port_detach),
155	{ 0, 0 }
156};
157
158static driver_t cxgb_port_driver = {
159	"cxgb",
160	cxgb_port_methods,
161	0
162};
163
164static d_ioctl_t cxgb_extension_ioctl;
165
166static devclass_t	cxgb_port_devclass;
167DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
168
169#define SGE_MSIX_COUNT (SGE_QSETS + 1)
170
171/*
172 * The driver uses the best interrupt scheme available on a platform in the
173 * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
174 * of these schemes the driver may consider as follows:
175 *
176 * msi = 2: choose from among all three options
177 * msi = 1 : only consider MSI and pin interrupts
178 * msi = 0: force pin interrupts
179 */
180static int msi_allowed = 0;
181TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
182
183SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
184SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
185    "MSI-X, MSI, INTx selector");
186
187enum {
188	MAX_TXQ_ENTRIES      = 16384,
189	MAX_CTRL_TXQ_ENTRIES = 1024,
190	MAX_RSPQ_ENTRIES     = 16384,
191	MAX_RX_BUFFERS       = 16384,
192	MAX_RX_JUMBO_BUFFERS = 16384,
193	MIN_TXQ_ENTRIES      = 4,
194	MIN_CTRL_TXQ_ENTRIES = 4,
195	MIN_RSPQ_ENTRIES     = 32,
196	MIN_FL_ENTRIES       = 32
197};
198
199#define PORT_MASK ((1 << MAX_NPORTS) - 1)
200
201/* Table for probing the cards.  The desc field isn't actually used */
202struct cxgb_ident {
203	uint16_t	vendor;
204	uint16_t	device;
205	int		index;
206	char		*desc;
207} cxgb_identifiers[] = {
208	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
209	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
210	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
211	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
212	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
213	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
214	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
215	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
216	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
217	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
218	{0, 0, 0, NULL}
219};
220
221static struct cxgb_ident *
222cxgb_get_ident(device_t dev)
223{
224	struct cxgb_ident *id;
225
226	for (id = cxgb_identifiers; id->desc != NULL; id++) {
227		if ((id->vendor == pci_get_vendor(dev)) &&
228		    (id->device == pci_get_device(dev))) {
229			return (id);
230		}
231	}
232	return (NULL);
233}
234
235static const struct adapter_info *
236cxgb_get_adapter_info(device_t dev)
237{
238	struct cxgb_ident *id;
239	const struct adapter_info *ai;
240
241	id = cxgb_get_ident(dev);
242	if (id == NULL)
243		return (NULL);
244
245	ai = t3_get_adapter_info(id->index);
246
247	return (ai);
248}
249
250static int
251cxgb_controller_probe(device_t dev)
252{
253	const struct adapter_info *ai;
254	char *ports, buf[80];
255
256	ai = cxgb_get_adapter_info(dev);
257	if (ai == NULL)
258		return (ENXIO);
259
260	if (ai->nports == 1)
261		ports = "port";
262	else
263		ports = "ports";
264
265	snprintf(buf, sizeof(buf), "%s RNIC, %d %s", ai->desc, ai->nports, ports);
266	device_set_desc_copy(dev, buf);
267	return (BUS_PROBE_DEFAULT);
268}
269
270static int
271cxgb_fw_download(adapter_t *sc, device_t dev)
272{
273	char buf[32];
274#ifdef FIRMWARE_LATEST
275	const struct firmware *fw;
276#else
277	struct firmware *fw;
278#endif
279	int status;
280
281	snprintf(&buf[0], sizeof(buf), "t3fw%d%d", CHELSIO_FW_MAJOR, CHELSIO_FW_MINOR);
282
283	fw = firmware_get(buf);
284
285
286	if (fw == NULL) {
287		device_printf(dev, "Could not find firmware image %s\n", buf);
288		return ENOENT;
289	}
290
291	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
292
293	firmware_put(fw, FIRMWARE_UNLOAD);
294
295	return (status);
296}
297
298
299static int
300cxgb_controller_attach(device_t dev)
301{
302	driver_intr_t *cxgb_intr = NULL;
303	device_t child;
304	const struct adapter_info *ai;
305	struct adapter *sc;
306	int i, msi_count = 0, error = 0;
307	uint32_t vers;
308
309	sc = device_get_softc(dev);
310	sc->dev = dev;
311
312	pci_enable_busmaster(dev);
313
314	/*
315	 * Allocate the registers and make them available to the driver.
316	 * The registers that we care about for NIC mode are in BAR 0
317	 */
318	sc->regs_rid = PCIR_BAR(0);
319	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
320	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
321		device_printf(dev, "Cannot allocate BAR\n");
322		return (ENXIO);
323	}
324
325	mtx_init(&sc->sge.reg_lock, "SGE reg lock", NULL, MTX_DEF);
326	mtx_init(&sc->lock, "cxgb controller lock", NULL, MTX_DEF);
327	mtx_init(&sc->mdio_lock, "cxgb mdio", NULL, MTX_DEF);
328
329	sc->bt = rman_get_bustag(sc->regs_res);
330	sc->bh = rman_get_bushandle(sc->regs_res);
331	sc->mmio_len = rman_get_size(sc->regs_res);
332
333	/* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
334	 * enough messages for the queue sets.  If that fails, try falling
335	 * back to MSI.  If that fails, then try falling back to the legacy
336	 * interrupt pin model.
337	 */
338#ifdef MSI_SUPPORTED
339	sc->msix_regs_rid = 0x20;
340	if ((msi_allowed >= 2) &&
341	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
342	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
343
344		msi_count = SGE_MSIX_COUNT;
345		if ((pci_alloc_msix(dev, &msi_count) != 0) ||
346		    (msi_count != SGE_MSIX_COUNT)) {
347			msi_count = 0;
348			pci_release_msi(dev);
349			bus_release_resource(dev, SYS_RES_MEMORY,
350			    sc->msix_regs_rid, sc->msix_regs_res);
351			sc->msix_regs_res = NULL;
352		} else {
353			sc->flags |= USING_MSIX;
354			cxgb_intr = t3_intr_msix;
355		}
356
357		printf("allocated %d msix intrs\n", msi_count);
358	}
359
360	if ((msi_allowed >= 1) && (msi_count == 0)) {
361		msi_count = 1;
362		if (pci_alloc_msi(dev, &msi_count)) {
363			device_printf(dev, "alloc msi failed\n");
364			msi_count = 0;
365			pci_release_msi(dev);
366		} else {
367			sc->flags |= USING_MSI;
368			sc->irq_rid = 1;
369			cxgb_intr = t3_intr_msi;
370		}
371	}
372#endif
373	if (msi_count == 0) {
374		sc->irq_rid = 0;
375		cxgb_intr = t3b_intr;
376	}
377
378
379	/* Create a private taskqueue thread for handling driver events */
380#ifdef TASKQUEUE_CURRENT
381	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
382	    taskqueue_thread_enqueue, &sc->tq);
383#else
384	sc->tq = taskqueue_create_fast("cxgb_taskq", M_NOWAIT,
385	    taskqueue_thread_enqueue, &sc->tq);
386#endif
387	if (sc->tq == NULL) {
388		device_printf(dev, "failed to allocate controller task queue\n");
389		goto out;
390	}
391
392	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
393	    device_get_nameunit(dev));
394	TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
395
396
397	/* Create a periodic callout for checking adapter status */
398	callout_init_mtx(&sc->cxgb_tick_ch, &sc->lock, 0);
399
400	ai = cxgb_get_adapter_info(dev);
401	if (t3_prep_adapter(sc, ai, 1) < 0) {
402		error = ENODEV;
403		goto out;
404	}
405	if (t3_check_fw_version(sc) != 0) {
406		/*
407		 * Warn user that a firmware update will be attempted in init.
408		 */
409		device_printf(dev, "firmware needs to be updated to version %d.%d\n",
410		    CHELSIO_FW_MAJOR, CHELSIO_FW_MINOR);
411		sc->flags &= ~FW_UPTODATE;
412	} else {
413		sc->flags |= FW_UPTODATE;
414	}
415
416	if (t3_init_hw(sc, 0) != 0) {
417		device_printf(dev, "hw initialization failed\n");
418		error = ENXIO;
419		goto out;
420	}
421	t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
422
423	/*
424	 * Create a child device for each MAC.  The ethernet attachment
425	 * will be done in these children.
426	 */
427	for (i = 0; i < (sc)->params.nports; ++i) {
428		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
429			device_printf(dev, "failed to add child port\n");
430			error = EINVAL;
431			goto out;
432		}
433		sc->portdev[i] = child;
434		sc->port[i].adapter = sc;
435#ifdef MULTIQ
436		sc->port[i].nqsets = mp_ncpus;
437#else
438		sc->port[i].nqsets = 1;
439#endif
440		sc->port[i].first_qset = i;
441		sc->port[i].port = i;
442		device_set_softc(child, &sc->port[i]);
443	}
444	if ((error = bus_generic_attach(dev)) != 0)
445		goto out;;
446
447	if ((error = setup_sge_qsets(sc)) != 0)
448		goto out;
449
450	setup_rss(sc);
451
452	/* If it's MSI or INTx, allocate a single interrupt for everything */
453	if ((sc->flags & USING_MSIX) == 0) {
454		if ((sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
455		   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
456			device_printf(dev, "Cannot allocate interrupt rid=%d\n", sc->irq_rid);
457			error = EINVAL;
458			goto out;
459		}
460		device_printf(dev, "allocated irq_res=%p\n", sc->irq_res);
461
462		if (bus_setup_intr(dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
463#ifdef INTR_FILTERS
464			NULL,
465#endif
466			cxgb_intr, sc, &sc->intr_tag)) {
467			device_printf(dev, "Cannot set up interrupt\n");
468			error = EINVAL;
469			goto out;
470		}
471	} else {
472		cxgb_setup_msix(sc, msi_count);
473	}
474
475	sc->params.stats_update_period = 1;
476
477	/* initialize sge private state */
478	t3_sge_init_sw(sc);
479
480	t3_led_ready(sc);
481
482	error = t3_get_fw_version(sc, &vers);
483	if (error)
484		goto out;
485
486	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d", G_FW_VERSION_MAJOR(vers),
487	    G_FW_VERSION_MINOR(vers));
488
489	t3_add_sysctls(sc);
490
491out:
492	if (error)
493		cxgb_free(sc);
494
495	return (error);
496}
497
498static int
499cxgb_controller_detach(device_t dev)
500{
501	struct adapter *sc;
502
503	sc = device_get_softc(dev);
504
505	cxgb_free(sc);
506
507	return (0);
508}
509
510static void
511cxgb_free(struct adapter *sc)
512{
513	int i;
514
515	for (i = 0; i < (sc)->params.nports; ++i) {
516		if (sc->portdev[i] != NULL)
517			device_delete_child(sc->dev, sc->portdev[i]);
518	}
519
520	t3_sge_deinit_sw(sc);
521
522	if (sc->tq != NULL) {
523		taskqueue_drain(sc->tq, &sc->ext_intr_task);
524		taskqueue_free(sc->tq);
525	}
526
527	callout_drain(&sc->cxgb_tick_ch);
528
529	bus_generic_detach(sc->dev);
530
531	t3_free_sge_resources(sc);
532	t3_sge_free(sc);
533
534	for (i = 0; i < SGE_QSETS; i++) {
535		if (sc->msix_intr_tag[i] != NULL) {
536			bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
537			    sc->msix_intr_tag[i]);
538		}
539		if (sc->msix_irq_res[i] != NULL) {
540			bus_release_resource(sc->dev, SYS_RES_IRQ,
541			    sc->msix_irq_rid[i], sc->msix_irq_res[i]);
542		}
543	}
544
545	if (sc->intr_tag != NULL) {
546		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
547	}
548
549	if (sc->irq_res != NULL) {
550		device_printf(sc->dev, "de-allocating interrupt irq_rid=%d irq_res=%p\n",
551		    sc->irq_rid, sc->irq_res);
552		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
553		    sc->irq_res);
554	}
555#ifdef MSI_SUPPORTED
556	if (sc->flags & (USING_MSI | USING_MSIX)) {
557		device_printf(sc->dev, "releasing msi message(s)\n");
558		pci_release_msi(sc->dev);
559	}
560#endif
561	if (sc->msix_regs_res != NULL) {
562		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
563		    sc->msix_regs_res);
564	}
565
566	if (sc->regs_res != NULL)
567		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
568		    sc->regs_res);
569
570	mtx_destroy(&sc->mdio_lock);
571	mtx_destroy(&sc->sge.reg_lock);
572	mtx_destroy(&sc->lock);
573
574	return;
575}
576
577/**
578 *	setup_sge_qsets - configure SGE Tx/Rx/response queues
579 *	@sc: the controller softc
580 *
581 *	Determines how many sets of SGE queues to use and initializes them.
582 *	We support multiple queue sets per port if we have MSI-X, otherwise
583 *	just one queue set per port.
584 */
585static int
586setup_sge_qsets(adapter_t *sc)
587{
588	int i, j, err, irq_idx, qset_idx;
589	u_int ntxq = 3;
590
591	if ((err = t3_sge_alloc(sc)) != 0) {
592		printf("t3_sge_alloc returned %d\n", err);
593		return (err);
594	}
595
596	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
597		irq_idx = -1;
598	else
599		irq_idx = 0;
600
601	for (qset_idx = 0, i = 0; i < (sc)->params.nports; ++i) {
602		struct port_info *pi = &sc->port[i];
603
604		for (j = 0; j < pi->nqsets; ++j, ++qset_idx) {
605			err = t3_sge_alloc_qset(sc, qset_idx, 1,
606			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
607			    &sc->params.sge.qset[qset_idx], ntxq, pi);
608			if (err) {
609				t3_free_sge_resources(sc);
610				printf("t3_sge_alloc_qset failed with %d\n", err);
611				return (err);
612			}
613		}
614	}
615
616	return (0);
617}
618
619static int
620cxgb_setup_msix(adapter_t *sc, int msix_count)
621{
622	int i, j, k, nqsets, rid;
623
624	/* The first message indicates link changes and error conditions */
625	sc->irq_rid = 1;
626	if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
627	   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
628		device_printf(sc->dev, "Cannot allocate msix interrupt\n");
629		return (EINVAL);
630	}
631	if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
632#ifdef INTR_FILTERS
633			NULL,
634#endif
635		cxgb_async_intr, sc, &sc->intr_tag)) {
636		device_printf(sc->dev, "Cannot set up interrupt\n");
637		return (EINVAL);
638	}
639
640	for (i = 0, k = 0; i < (sc)->params.nports; ++i) {
641		nqsets = sc->port[i].nqsets;
642		for (j = 0; j < nqsets; ++j, k++) {
643			struct sge_qset *qs = &sc->sge.qs[k];
644
645			rid = k + 2;
646			if (cxgb_debug)
647				printf("rid=%d ", rid);
648			if ((sc->msix_irq_res[k] = bus_alloc_resource_any(
649			    sc->dev, SYS_RES_IRQ, &rid,
650			    RF_SHAREABLE | RF_ACTIVE)) == NULL) {
651				device_printf(sc->dev, "Cannot allocate "
652				    "interrupt for message %d\n", rid);
653				return (EINVAL);
654			}
655			sc->msix_irq_rid[k] = rid;
656			if (bus_setup_intr(sc->dev, sc->msix_irq_res[j],
657			    INTR_MPSAFE|INTR_TYPE_NET,
658#ifdef INTR_FILTERS
659			NULL,
660#endif
661				t3_intr_msix, qs, &sc->msix_intr_tag[k])) {
662				device_printf(sc->dev, "Cannot set up "
663				    "interrupt for message %d\n", rid);
664				return (EINVAL);
665			}
666		}
667	}
668	return (0);
669}
670
671static int
672cxgb_port_probe(device_t dev)
673{
674	struct port_info *p;
675	char buf[80];
676
677	p = device_get_softc(dev);
678
679	snprintf(buf, sizeof(buf), "Port %d %s", p->port, p->port_type->desc);
680	device_set_desc_copy(dev, buf);
681	return (0);
682}
683
684
685static int
686cxgb_makedev(struct port_info *pi)
687{
688	struct cdevsw *cxgb_cdevsw;
689
690	if ((cxgb_cdevsw = malloc(sizeof(struct cdevsw), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
691		return (ENOMEM);
692
693	cxgb_cdevsw->d_version = D_VERSION;
694	cxgb_cdevsw->d_name = strdup(pi->ifp->if_xname, M_DEVBUF);
695	cxgb_cdevsw->d_ioctl = cxgb_extension_ioctl;
696
697	pi->port_cdev = make_dev(cxgb_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
698	    pi->ifp->if_xname);
699
700	if (pi->port_cdev == NULL)
701		return (ENOMEM);
702
703	pi->port_cdev->si_drv1 = (void *)pi;
704
705	return (0);
706}
707
708
709#ifdef TSO_SUPPORTED
710#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU)
711/* Don't enable TSO6 yet */
712#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU)
713#else
714#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_JUMBO_MTU)
715/* Don't enable TSO6 yet */
716#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM |  IFCAP_JUMBO_MTU)
717#define IFCAP_TSO4 0x0
718#define CSUM_TSO   0x0
719#endif
720
721
722static int
723cxgb_port_attach(device_t dev)
724{
725	struct port_info *p;
726	struct ifnet *ifp;
727	int media_flags;
728	int err;
729	char buf[64];
730
731	p = device_get_softc(dev);
732
733	snprintf(buf, sizeof(buf), "cxgb port %d", p->port);
734	mtx_init(&p->lock, buf, 0, MTX_DEF);
735
736	/* Allocate an ifnet object and set it up */
737	ifp = p->ifp = if_alloc(IFT_ETHER);
738	if (ifp == NULL) {
739		device_printf(dev, "Cannot allocate ifnet\n");
740		return (ENOMEM);
741	}
742
743	/*
744	 * Note that there is currently no watchdog timer.
745	 */
746	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
747	ifp->if_init = cxgb_init;
748	ifp->if_softc = p;
749	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
750	ifp->if_ioctl = cxgb_ioctl;
751	ifp->if_start = cxgb_start;
752	ifp->if_timer = 0;	/* Disable ifnet watchdog */
753	ifp->if_watchdog = NULL;
754
755	ifp->if_snd.ifq_drv_maxlen = TX_ETH_Q_SIZE;
756	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
757	IFQ_SET_READY(&ifp->if_snd);
758
759	ifp->if_hwassist = ifp->if_capabilities = ifp->if_capenable = 0;
760	ifp->if_capabilities |= CXGB_CAP;
761	ifp->if_capenable |= CXGB_CAP_ENABLE;
762	ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO);
763	ifp->if_baudrate = 100000000;
764
765	ether_ifattach(ifp, p->hw_addr);
766#ifdef DEFAULT_JUMBO
767	ifp->if_mtu = 9000;
768#endif
769	if ((err = cxgb_makedev(p)) != 0) {
770		printf("makedev failed %d\n", err);
771		return (err);
772	}
773	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
774	    cxgb_media_status);
775
776	if (!strcmp(p->port_type->desc, "10GBASE-CX4"))
777	        media_flags = IFM_ETHER | IFM_10G_CX4;
778	else if (!strcmp(p->port_type->desc, "10GBASE-SR"))
779	        media_flags = IFM_ETHER | IFM_10G_SR;
780	else if (!strcmp(p->port_type->desc, "10GBASE-XR"))
781	        media_flags = IFM_ETHER | IFM_10G_LR;
782	else {
783	        printf("unsupported media type %s\n", p->port_type->desc);
784		return (ENXIO);
785	}
786
787	ifmedia_add(&p->media, media_flags, 0, NULL);
788	ifmedia_add(&p->media, IFM_ETHER | IFM_AUTO, 0, NULL);
789	ifmedia_set(&p->media, media_flags);
790
791	snprintf(buf, sizeof(buf), "cxgb_port_taskq%d", p->port);
792#ifdef TASKQUEUE_CURRENT
793	/* Create a port for handling TX without starvation */
794	p->tq = taskqueue_create(buf, M_NOWAIT,
795	    taskqueue_thread_enqueue, &p->tq);
796#else
797	/* Create a port for handling TX without starvation */
798	p->tq = taskqueue_create_fast(buf, M_NOWAIT,
799	    taskqueue_thread_enqueue, &p->tq);
800#endif
801
802
803	if (p->tq == NULL) {
804		device_printf(dev, "failed to allocate port task queue\n");
805		return (ENOMEM);
806	}
807	taskqueue_start_threads(&p->tq, 1, PI_NET, "%s taskq",
808	    device_get_nameunit(dev));
809	TASK_INIT(&p->start_task, 0, cxgb_start_proc, ifp);
810
811
812	return (0);
813}
814
815static int
816cxgb_port_detach(device_t dev)
817{
818	struct port_info *p;
819
820	p = device_get_softc(dev);
821	mtx_destroy(&p->lock);
822	if (p->tq != NULL) {
823		taskqueue_drain(p->tq, &p->start_task);
824		taskqueue_free(p->tq);
825		p->tq = NULL;
826	}
827
828	ether_ifdetach(p->ifp);
829	if_free(p->ifp);
830
831	destroy_dev(p->port_cdev);
832
833
834	return (0);
835}
836
837void
838t3_fatal_err(struct adapter *sc)
839{
840	u_int fw_status[4];
841
842	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
843	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
844		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
845		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
846}
847
848int
849t3_os_find_pci_capability(adapter_t *sc, int cap)
850{
851	device_t dev;
852	struct pci_devinfo *dinfo;
853	pcicfgregs *cfg;
854	uint32_t status;
855	uint8_t ptr;
856
857	dev = sc->dev;
858	dinfo = device_get_ivars(dev);
859	cfg = &dinfo->cfg;
860
861	status = pci_read_config(dev, PCIR_STATUS, 2);
862	if (!(status & PCIM_STATUS_CAPPRESENT))
863		return (0);
864
865	switch (cfg->hdrtype & PCIM_HDRTYPE) {
866	case 0:
867	case 1:
868		ptr = PCIR_CAP_PTR;
869		break;
870	case 2:
871		ptr = PCIR_CAP_PTR_2;
872		break;
873	default:
874		return (0);
875		break;
876	}
877	ptr = pci_read_config(dev, ptr, 1);
878
879	while (ptr != 0) {
880		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
881			return (ptr);
882		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
883	}
884
885	return (0);
886}
887
888int
889t3_os_pci_save_state(struct adapter *sc)
890{
891	device_t dev;
892	struct pci_devinfo *dinfo;
893
894	dev = sc->dev;
895	dinfo = device_get_ivars(dev);
896
897	pci_cfg_save(dev, dinfo, 0);
898	return (0);
899}
900
901int
902t3_os_pci_restore_state(struct adapter *sc)
903{
904	device_t dev;
905	struct pci_devinfo *dinfo;
906
907	dev = sc->dev;
908	dinfo = device_get_ivars(dev);
909
910	pci_cfg_restore(dev, dinfo);
911	return (0);
912}
913
914/**
915 *	t3_os_link_changed - handle link status changes
916 *	@adapter: the adapter associated with the link change
917 *	@port_id: the port index whose limk status has changed
918 *	@link_stat: the new status of the link
919 *	@speed: the new speed setting
920 *	@duplex: the new duplex setting
921 *	@fc: the new flow-control setting
922 *
923 *	This is the OS-dependent handler for link status changes.  The OS
924 *	neutral handler takes care of most of the processing for these events,
925 *	then calls this handler for any OS-specific processing.
926 */
927void
928t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
929     int duplex, int fc)
930{
931	struct port_info *pi = &adapter->port[port_id];
932
933	if ((pi->ifp->if_flags & IFF_UP) == 0)
934		return;
935
936	if (link_status)
937		if_link_state_change(pi->ifp, LINK_STATE_UP);
938	else
939		if_link_state_change(pi->ifp, LINK_STATE_DOWN);
940
941}
942
943
944/*
945 * Interrupt-context handler for external (PHY) interrupts.
946 */
947void
948t3_os_ext_intr_handler(adapter_t *sc)
949{
950	if (cxgb_debug)
951		printf("t3_os_ext_intr_handler\n");
952	/*
953	 * Schedule a task to handle external interrupts as they may be slow
954	 * and we use a mutex to protect MDIO registers.  We disable PHY
955	 * interrupts in the meantime and let the task reenable them when
956	 * it's done.
957	 */
958	if (sc->slow_intr_mask) {
959		sc->slow_intr_mask &= ~F_T3DBG;
960		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
961		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
962	}
963}
964
965void
966t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
967{
968
969	/*
970	 * The ifnet might not be allocated before this gets called,
971	 * as this is called early on in attach by t3_prep_adapter
972	 * save the address off in the port structure
973	 */
974	if (cxgb_debug)
975		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
976	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
977}
978
979/**
980 *	link_start - enable a port
981 *	@p: the port to enable
982 *
983 *	Performs the MAC and PHY actions needed to enable a port.
984 */
985static void
986cxgb_link_start(struct port_info *p)
987{
988	struct ifnet *ifp;
989	struct t3_rx_mode rm;
990	struct cmac *mac = &p->mac;
991
992	ifp = p->ifp;
993
994	t3_init_rx_mode(&rm, p);
995	t3_mac_reset(mac);
996	t3_mac_set_mtu(mac, ifp->if_mtu);
997	t3_mac_set_address(mac, 0, p->hw_addr);
998	t3_mac_set_rx_mode(mac, &rm);
999	t3_link_start(&p->phy, mac, &p->link_config);
1000	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1001}
1002
1003/**
1004 *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1005 *	@adap: the adapter
1006 *
1007 *	Sets up RSS to distribute packets to multiple receive queues.  We
1008 *	configure the RSS CPU lookup table to distribute to the number of HW
1009 *	receive queues, and the response queue lookup table to narrow that
1010 *	down to the response queues actually configured for each port.
1011 *	We always configure the RSS mapping for two ports since the mapping
1012 *	table has plenty of entries.
1013 */
1014static void
1015setup_rss(adapter_t *adap)
1016{
1017	int i;
1018	u_int nq0 = adap->port[0].nqsets;
1019	u_int nq1 = max((u_int)adap->port[1].nqsets, 1U);
1020	uint8_t cpus[SGE_QSETS + 1];
1021	uint16_t rspq_map[RSS_TABLE_SIZE];
1022
1023	for (i = 0; i < SGE_QSETS; ++i)
1024		cpus[i] = i;
1025	cpus[SGE_QSETS] = 0xff;
1026
1027	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1028		rspq_map[i] = i % nq0;
1029		rspq_map[i + RSS_TABLE_SIZE / 2] = (i % nq1) + nq0;
1030	}
1031
1032	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1033	    F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN |
1034	    V_RRCPLCPUSIZE(6), cpus, rspq_map);
1035}
1036
1037static void
1038send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1039			      int hi, int port)
1040{
1041	struct mbuf *m;
1042	struct mngt_pktsched_wr *req;
1043
1044	m = m_gethdr(M_NOWAIT, MT_DATA);
1045	req = (struct mngt_pktsched_wr *)m->m_data;
1046	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1047	req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1048	req->sched = sched;
1049	req->idx = qidx;
1050	req->min = lo;
1051	req->max = hi;
1052	req->binding = port;
1053	m->m_len = m->m_pkthdr.len = sizeof(*req);
1054	t3_mgmt_tx(adap, m);
1055}
1056
1057static void
1058bind_qsets(adapter_t *sc)
1059{
1060	int i, j;
1061
1062	for (i = 0; i < (sc)->params.nports; ++i) {
1063		const struct port_info *pi = adap2pinfo(sc, i);
1064
1065		for (j = 0; j < pi->nqsets; ++j)
1066			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1067					  -1, i);
1068	}
1069}
1070
1071static void
1072cxgb_init(void *arg)
1073{
1074	struct port_info *p = arg;
1075
1076	PORT_LOCK(p);
1077	cxgb_init_locked(p);
1078	PORT_UNLOCK(p);
1079}
1080
1081static void
1082cxgb_init_locked(struct port_info *p)
1083{
1084	struct ifnet *ifp;
1085	adapter_t *sc = p->adapter;
1086	int error;
1087
1088	mtx_assert(&p->lock, MA_OWNED);
1089
1090	ifp = p->ifp;
1091	if ((sc->flags & FW_UPTODATE) == 0) {
1092		device_printf(sc->dev, "updating firmware to version %d.%d\n",
1093		    CHELSIO_FW_MAJOR, CHELSIO_FW_MINOR);
1094		if ((error = cxgb_fw_download(sc, sc->dev)) != 0) {
1095			device_printf(sc->dev, "firmware download failed err: %d"
1096			    "interface will be unavailable\n", error);
1097			return;
1098		}
1099		sc->flags |= FW_UPTODATE;
1100	}
1101
1102	cxgb_link_start(p);
1103	ADAPTER_LOCK(p->adapter);
1104	if (p->adapter->open_device_map == 0)
1105		t3_intr_clear(sc);
1106	t3_sge_start(sc);
1107
1108	p->adapter->open_device_map |= (1 << p->port);
1109	ADAPTER_UNLOCK(p->adapter);
1110	t3_intr_enable(sc);
1111	t3_port_intr_enable(sc, p->port);
1112	if ((p->adapter->flags & (USING_MSIX | QUEUES_BOUND)) == USING_MSIX)
1113		bind_qsets(sc);
1114	p->adapter->flags |= QUEUES_BOUND;
1115	callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
1116	    cxgb_tick, sc);
1117
1118
1119	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1120	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1121}
1122
1123static void
1124cxgb_set_rxmode(struct port_info *p)
1125{
1126	struct t3_rx_mode rm;
1127	struct cmac *mac = &p->mac;
1128
1129	t3_init_rx_mode(&rm, p);
1130	t3_mac_set_rx_mode(mac, &rm);
1131}
1132
1133static void
1134cxgb_stop(struct port_info *p)
1135{
1136	struct ifnet *ifp;
1137
1138	callout_drain(&p->adapter->cxgb_tick_ch);
1139	ifp = p->ifp;
1140
1141	PORT_LOCK(p);
1142	ADAPTER_LOCK(p->adapter);
1143	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1144	p->adapter->open_device_map &= ~(1 << p->port);
1145	if (p->adapter->open_device_map == 0)
1146		t3_intr_disable(p->adapter);
1147	ADAPTER_UNLOCK(p->adapter);
1148	t3_port_intr_disable(p->adapter, p->port);
1149	t3_mac_disable(&p->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
1150	PORT_UNLOCK(p);
1151
1152}
1153
1154static int
1155cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1156{
1157	struct port_info *p = ifp->if_softc;
1158	struct ifaddr *ifa = (struct ifaddr *)data;
1159	struct ifreq *ifr = (struct ifreq *)data;
1160	int flags, error = 0;
1161	uint32_t mask;
1162
1163	switch (command) {
1164	case SIOCSIFMTU:
1165		if ((ifr->ifr_mtu < ETHERMIN) ||
1166		    (ifr->ifr_mtu > ETHER_MAX_LEN_JUMBO))
1167			error = EINVAL;
1168		else if (ifp->if_mtu != ifr->ifr_mtu) {
1169			PORT_LOCK(p);
1170			ifp->if_mtu = ifr->ifr_mtu;
1171			t3_mac_set_mtu(&p->mac, ifp->if_mtu);
1172			PORT_UNLOCK(p);
1173		}
1174		break;
1175	case SIOCSIFADDR:
1176	case SIOCGIFADDR:
1177		if (ifa->ifa_addr->sa_family == AF_INET) {
1178			ifp->if_flags |= IFF_UP;
1179			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1180				cxgb_init(p);
1181			}
1182			arp_ifinit(ifp, ifa);
1183		} else
1184			error = ether_ioctl(ifp, command, data);
1185		break;
1186	case SIOCSIFFLAGS:
1187		PORT_LOCK(p);
1188		if (ifp->if_flags & IFF_UP) {
1189			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1190				flags = p->if_flags;
1191				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1192				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI))
1193					cxgb_set_rxmode(p);
1194
1195			} else
1196				cxgb_init_locked(p);
1197		} else {
1198			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1199				cxgb_stop(p);
1200			}
1201		}
1202		p->if_flags = ifp->if_flags;
1203		PORT_UNLOCK(p);
1204		break;
1205	case SIOCSIFMEDIA:
1206	case SIOCGIFMEDIA:
1207		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
1208		break;
1209	case SIOCSIFCAP:
1210		PORT_LOCK(p);
1211		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1212		if (mask & IFCAP_TXCSUM) {
1213			if (IFCAP_TXCSUM & ifp->if_capenable) {
1214				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
1215				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
1216				    | CSUM_TSO);
1217			} else {
1218				ifp->if_capenable |= IFCAP_TXCSUM;
1219				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1220			}
1221		} else if (mask & IFCAP_RXCSUM) {
1222			if (IFCAP_RXCSUM & ifp->if_capenable) {
1223				ifp->if_capenable &= ~IFCAP_RXCSUM;
1224			} else {
1225				ifp->if_capenable |= IFCAP_RXCSUM;
1226			}
1227		}
1228		if (mask & IFCAP_TSO4) {
1229			if (IFCAP_TSO4 & ifp->if_capenable) {
1230				ifp->if_capenable &= ~IFCAP_TSO4;
1231				ifp->if_hwassist &= ~CSUM_TSO;
1232			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
1233				ifp->if_capenable |= IFCAP_TSO4;
1234				ifp->if_hwassist |= CSUM_TSO;
1235			} else {
1236				if (cxgb_debug)
1237					printf("cxgb requires tx checksum offload"
1238					    " be enabled to use TSO\n");
1239				error = EINVAL;
1240			}
1241		}
1242		PORT_UNLOCK(p);
1243		break;
1244	default:
1245		error = ether_ioctl(ifp, command, data);
1246		break;
1247	}
1248
1249	return (error);
1250}
1251
1252static int
1253cxgb_start_tx(struct ifnet *ifp, uint32_t txmax)
1254{
1255	struct sge_qset *qs;
1256	struct sge_txq *txq;
1257	struct port_info *p = ifp->if_softc;
1258	struct mbuf *m = NULL;
1259	int err, in_use_init;
1260
1261
1262	if (!p->link_config.link_ok)
1263		return (ENXIO);
1264
1265	if (IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1266		return (ENOBUFS);
1267
1268	qs = &p->adapter->sge.qs[p->first_qset];
1269	txq = &qs->txq[TXQ_ETH];
1270	err = 0;
1271
1272	mtx_lock(&txq->lock);
1273	in_use_init = txq->in_use;
1274	while ((txq->in_use - in_use_init < txmax) &&
1275	    (txq->size > txq->in_use + TX_MAX_DESC)) {
1276		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
1277		if (m == NULL)
1278			break;
1279		if ((err = t3_encap(p, &m)) != 0)
1280			break;
1281		BPF_MTAP(ifp, m);
1282	}
1283	mtx_unlock(&txq->lock);
1284
1285	if (__predict_false(err)) {
1286		if (cxgb_debug)
1287			printf("would set OFLAGS\n");
1288		if (err == ENOMEM) {
1289			IFQ_LOCK(&ifp->if_snd);
1290			IFQ_DRV_PREPEND(&ifp->if_snd, m);
1291			IFQ_UNLOCK(&ifp->if_snd);
1292		}
1293	}
1294	if (err == 0 && m == NULL)
1295		err = ENOBUFS;
1296
1297	return (err);
1298}
1299
1300static void
1301cxgb_start_proc(void *arg, int ncount)
1302{
1303	struct ifnet *ifp = arg;
1304	struct port_info *pi = ifp->if_softc;
1305	struct sge_qset *qs;
1306	struct sge_txq *txq;
1307	int error = 0;
1308
1309	qs = &pi->adapter->sge.qs[pi->first_qset];
1310	txq = &qs->txq[TXQ_ETH];
1311
1312	while (error == 0)
1313		error = cxgb_start_tx(ifp, TX_MAX_DESC + 1);
1314
1315}
1316
1317static void
1318cxgb_start(struct ifnet *ifp)
1319{
1320	struct port_info *pi = ifp->if_softc;
1321	struct sge_qset *qs;
1322	struct sge_txq *txq;
1323	int err;
1324
1325	qs = &pi->adapter->sge.qs[pi->first_qset];
1326	txq = &qs->txq[TXQ_ETH];
1327
1328	err = cxgb_start_tx(ifp, TX_START_MAX_DESC);
1329
1330	if (err == 0)
1331		taskqueue_enqueue(pi->tq, &pi->start_task);
1332}
1333
1334
1335static int
1336cxgb_media_change(struct ifnet *ifp)
1337{
1338	if_printf(ifp, "media change not supported\n");
1339	return (ENXIO);
1340}
1341
1342static void
1343cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1344{
1345	struct port_info *p = ifp->if_softc;
1346
1347	ifmr->ifm_status = IFM_AVALID;
1348	ifmr->ifm_active = IFM_ETHER;
1349
1350	if (!p->link_config.link_ok)
1351		return;
1352
1353	ifmr->ifm_status |= IFM_ACTIVE;
1354
1355	if (p->link_config.duplex)
1356		ifmr->ifm_active |= IFM_FDX;
1357	else
1358		ifmr->ifm_active |= IFM_HDX;
1359}
1360
1361static void
1362cxgb_async_intr(void *data)
1363{
1364	if (cxgb_debug)
1365		printf("cxgb_async_intr\n");
1366}
1367
1368static void
1369cxgb_ext_intr_handler(void *arg, int count)
1370{
1371	adapter_t *sc = (adapter_t *)arg;
1372
1373	if (cxgb_debug)
1374		printf("cxgb_ext_intr_handler\n");
1375
1376	t3_phy_intr_handler(sc);
1377
1378	/* Now reenable external interrupts */
1379	if (sc->slow_intr_mask) {
1380		sc->slow_intr_mask |= F_T3DBG;
1381		t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
1382		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1383	}
1384}
1385
1386static void
1387cxgb_tick(void *arg)
1388{
1389	adapter_t *sc = (adapter_t *)arg;
1390	const struct adapter_params *p = &sc->params;
1391
1392	if (p->linkpoll_period)
1393		check_link_status(sc);
1394
1395	callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
1396	    cxgb_tick, sc);
1397}
1398
1399static void
1400check_link_status(adapter_t *sc)
1401{
1402	int i;
1403
1404	for (i = 0; i < (sc)->params.nports; ++i) {
1405		struct port_info *p = &sc->port[i];
1406
1407		if (!(p->port_type->caps & SUPPORTED_IRQ))
1408			t3_link_changed(sc, i);
1409	}
1410}
1411
1412static int
1413in_range(int val, int lo, int hi)
1414{
1415	return val < 0 || (val <= hi && val >= lo);
1416}
1417
1418static int
1419cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
1420    int fflag, struct thread *td)
1421{
1422	int mmd, error = 0;
1423	struct port_info *pi = dev->si_drv1;
1424	adapter_t *sc = pi->adapter;
1425
1426#ifdef PRIV_SUPPORTED
1427	if (priv_check(td, PRIV_DRIVER)) {
1428		if (cxgb_debug)
1429			printf("user does not have access to privileged ioctls\n");
1430		return (EPERM);
1431	}
1432#else
1433	if (suser(td)) {
1434		if (cxgb_debug)
1435			printf("user does not have access to privileged ioctls\n");
1436		return (EPERM);
1437	}
1438#endif
1439
1440	switch (cmd) {
1441	case SIOCGMIIREG: {
1442		uint32_t val;
1443		struct cphy *phy = &pi->phy;
1444		struct mii_data *mid = (struct mii_data *)data;
1445
1446		if (!phy->mdio_read)
1447			return (EOPNOTSUPP);
1448		if (is_10G(sc)) {
1449			mmd = mid->phy_id >> 8;
1450			if (!mmd)
1451				mmd = MDIO_DEV_PCS;
1452			else if (mmd > MDIO_DEV_XGXS)
1453				return -EINVAL;
1454
1455			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
1456					     mid->reg_num, &val);
1457		} else
1458		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
1459					     mid->reg_num & 0x1f, &val);
1460		if (error == 0)
1461			mid->val_out = val;
1462		break;
1463	}
1464	case SIOCSMIIREG: {
1465		struct cphy *phy = &pi->phy;
1466		struct mii_data *mid = (struct mii_data *)data;
1467
1468		if (!phy->mdio_write)
1469			return (EOPNOTSUPP);
1470		if (is_10G(sc)) {
1471			mmd = mid->phy_id >> 8;
1472			if (!mmd)
1473				mmd = MDIO_DEV_PCS;
1474			else if (mmd > MDIO_DEV_XGXS)
1475				return (EINVAL);
1476
1477			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
1478					      mmd, mid->reg_num, mid->val_in);
1479		} else
1480			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
1481					      mid->reg_num & 0x1f,
1482					      mid->val_in);
1483		break;
1484	}
1485	case CHELSIO_SETREG: {
1486		struct ch_reg *edata = (struct ch_reg *)data;
1487		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
1488			return (EFAULT);
1489		t3_write_reg(sc, edata->addr, edata->val);
1490		break;
1491	}
1492	case CHELSIO_GETREG: {
1493		struct ch_reg *edata = (struct ch_reg *)data;
1494		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
1495			return (EFAULT);
1496		edata->val = t3_read_reg(sc, edata->addr);
1497		break;
1498	}
1499	case CHELSIO_GET_SGE_CONTEXT: {
1500		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
1501		mtx_lock(&sc->sge.reg_lock);
1502		switch (ecntxt->cntxt_type) {
1503		case CNTXT_TYPE_EGRESS:
1504			error = t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
1505			    ecntxt->data);
1506			break;
1507		case CNTXT_TYPE_FL:
1508			error = t3_sge_read_fl(sc, ecntxt->cntxt_id,
1509			    ecntxt->data);
1510			break;
1511		case CNTXT_TYPE_RSP:
1512			error = t3_sge_read_rspq(sc, ecntxt->cntxt_id,
1513			    ecntxt->data);
1514			break;
1515		case CNTXT_TYPE_CQ:
1516			error = t3_sge_read_cq(sc, ecntxt->cntxt_id,
1517			    ecntxt->data);
1518			break;
1519		default:
1520			error = EINVAL;
1521			break;
1522		}
1523		mtx_unlock(&sc->sge.reg_lock);
1524		break;
1525	}
1526	case CHELSIO_GET_SGE_DESC: {
1527		struct ch_desc *edesc = (struct ch_desc *)data;
1528		int ret;
1529		if (edesc->queue_num >= SGE_QSETS * 6)
1530			return (EINVAL);
1531		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
1532		    edesc->queue_num % 6, edesc->idx, edesc->data);
1533		if (ret < 0)
1534			return (EINVAL);
1535		edesc->size = ret;
1536		break;
1537	}
1538	case CHELSIO_SET_QSET_PARAMS: {
1539		struct qset_params *q;
1540		struct ch_qset_params *t = (struct ch_qset_params *)data;
1541
1542		if (t->qset_idx >= SGE_QSETS)
1543			return -EINVAL;
1544		if (!in_range(t->intr_lat, 0, M_NEWTIMER) ||
1545		    !in_range(t->cong_thres, 0, 255) ||
1546		    !in_range(t->txq_size[0], MIN_TXQ_ENTRIES,
1547			      MAX_TXQ_ENTRIES) ||
1548		    !in_range(t->txq_size[1], MIN_TXQ_ENTRIES,
1549			      MAX_TXQ_ENTRIES) ||
1550		    !in_range(t->txq_size[2], MIN_CTRL_TXQ_ENTRIES,
1551			      MAX_CTRL_TXQ_ENTRIES) ||
1552		    !in_range(t->fl_size[0], MIN_FL_ENTRIES, MAX_RX_BUFFERS) ||
1553		    !in_range(t->fl_size[1], MIN_FL_ENTRIES,
1554			      MAX_RX_JUMBO_BUFFERS) ||
1555		    !in_range(t->rspq_size, MIN_RSPQ_ENTRIES, MAX_RSPQ_ENTRIES))
1556		       return -EINVAL;
1557		if ((sc->flags & FULL_INIT_DONE) &&
1558		    (t->rspq_size >= 0 || t->fl_size[0] >= 0 ||
1559		     t->fl_size[1] >= 0 || t->txq_size[0] >= 0 ||
1560		     t->txq_size[1] >= 0 || t->txq_size[2] >= 0 ||
1561		     t->polling >= 0 || t->cong_thres >= 0))
1562			return -EBUSY;
1563
1564		q = &sc->params.sge.qset[t->qset_idx];
1565
1566		if (t->rspq_size >= 0)
1567			q->rspq_size = t->rspq_size;
1568		if (t->fl_size[0] >= 0)
1569			q->fl_size = t->fl_size[0];
1570		if (t->fl_size[1] >= 0)
1571			q->jumbo_size = t->fl_size[1];
1572		if (t->txq_size[0] >= 0)
1573			q->txq_size[0] = t->txq_size[0];
1574		if (t->txq_size[1] >= 0)
1575			q->txq_size[1] = t->txq_size[1];
1576		if (t->txq_size[2] >= 0)
1577			q->txq_size[2] = t->txq_size[2];
1578		if (t->cong_thres >= 0)
1579			q->cong_thres = t->cong_thres;
1580		if (t->intr_lat >= 0) {
1581			struct sge_qset *qs = &sc->sge.qs[t->qset_idx];
1582
1583			q->coalesce_nsecs = t->intr_lat*1000;
1584			t3_update_qset_coalesce(qs, q);
1585		}
1586		break;
1587	}
1588	case CHELSIO_GET_QSET_PARAMS: {
1589		struct qset_params *q;
1590		struct ch_qset_params *t = (struct ch_qset_params *)data;
1591
1592		if (t->qset_idx >= SGE_QSETS)
1593			return (EINVAL);
1594
1595		q = &(sc)->params.sge.qset[t->qset_idx];
1596		t->rspq_size   = q->rspq_size;
1597		t->txq_size[0] = q->txq_size[0];
1598		t->txq_size[1] = q->txq_size[1];
1599		t->txq_size[2] = q->txq_size[2];
1600		t->fl_size[0]  = q->fl_size;
1601		t->fl_size[1]  = q->jumbo_size;
1602		t->polling     = q->polling;
1603		t->intr_lat    = q->coalesce_nsecs / 1000;
1604		t->cong_thres  = q->cong_thres;
1605		break;
1606	}
1607	case CHELSIO_SET_QSET_NUM: {
1608		struct ch_reg *edata = (struct ch_reg *)data;
1609		unsigned int port_idx = pi->port;
1610
1611		if (sc->flags & FULL_INIT_DONE)
1612			return (EBUSY);
1613		if (edata->val < 1 ||
1614		    (edata->val > 1 && !(sc->flags & USING_MSIX)))
1615			return (EINVAL);
1616		if (edata->val + sc->port[!port_idx].nqsets > SGE_QSETS)
1617			return (EINVAL);
1618		sc->port[port_idx].nqsets = edata->val;
1619		/*
1620		 * XXX we're hardcoding ourselves to 2 ports
1621		 * just like the LEENUX
1622		 */
1623		sc->port[1].first_qset = sc->port[0].nqsets;
1624		break;
1625	}
1626	case CHELSIO_GET_QSET_NUM: {
1627		struct ch_reg *edata = (struct ch_reg *)data;
1628		edata->val = pi->nqsets;
1629		break;
1630	}
1631#ifdef notyet
1632		/*
1633		 * XXX FreeBSD driver does not currently support any
1634		 * offload functionality
1635		 */
1636	case CHELSIO_LOAD_FW:
1637	case CHELSIO_DEVUP:
1638	case CHELSIO_SETMTUTAB:
1639	case CHELSIO_GET_PM:
1640	case CHELSIO_SET_PM:
1641	case CHELSIO_READ_TCAM_WORD:
1642		return (EOPNOTSUPP);
1643		break;
1644#endif
1645	case CHELSIO_GET_MEM: {
1646		struct ch_mem_range *t = (struct ch_mem_range *)data;
1647		struct mc7 *mem;
1648		uint8_t *useraddr;
1649		u64 buf[32];
1650
1651		if (!is_offload(sc))
1652			return (EOPNOTSUPP);
1653		if (!(sc->flags & FULL_INIT_DONE))
1654			return (EIO);         /* need the memory controllers */
1655		if ((t->addr & 0x7) || (t->len & 0x7))
1656			return (EINVAL);
1657		if (t->mem_id == MEM_CM)
1658			mem = &sc->cm;
1659		else if (t->mem_id == MEM_PMRX)
1660			mem = &sc->pmrx;
1661		else if (t->mem_id == MEM_PMTX)
1662			mem = &sc->pmtx;
1663		else
1664			return (EINVAL);
1665
1666		/*
1667		 * Version scheme:
1668		 * bits 0..9: chip version
1669		 * bits 10..15: chip revision
1670		 */
1671		t->version = 3 | (sc->params.rev << 10);
1672
1673		/*
1674		 * Read 256 bytes at a time as len can be large and we don't
1675		 * want to use huge intermediate buffers.
1676		 */
1677		useraddr = (uint8_t *)(t + 1);   /* advance to start of buffer */
1678		while (t->len) {
1679			unsigned int chunk = min(t->len, sizeof(buf));
1680
1681			error = t3_mc7_bd_read(mem, t->addr / 8, chunk / 8, buf);
1682			if (error)
1683				return (-error);
1684			if (copyout(buf, useraddr, chunk))
1685				return (EFAULT);
1686			useraddr += chunk;
1687			t->addr += chunk;
1688			t->len -= chunk;
1689		}
1690		break;
1691	}
1692	case CHELSIO_SET_TRACE_FILTER: {
1693		struct ch_trace *t = (struct ch_trace *)data;
1694		const struct trace_params *tp;
1695
1696		tp = (const struct trace_params *)&t->sip;
1697		if (t->config_tx)
1698			t3_config_trace_filter(sc, tp, 0, t->invert_match,
1699					       t->trace_tx);
1700		if (t->config_rx)
1701			t3_config_trace_filter(sc, tp, 1, t->invert_match,
1702					       t->trace_rx);
1703		break;
1704	}
1705	case CHELSIO_SET_PKTSCHED: {
1706		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
1707		if (sc->open_device_map == 0)
1708			return (EAGAIN);
1709		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
1710		    p->binding);
1711		break;
1712	}
1713	case CHELSIO_IFCONF_GETREGS: {
1714		struct ifconf_regs *regs = (struct ifconf_regs *)data;
1715		int reglen = cxgb_get_regs_len();
1716		uint8_t *buf = malloc(REGDUMP_SIZE, M_DEVBUF, M_NOWAIT);
1717		if (buf == NULL) {
1718			return (ENOMEM);
1719		} if (regs->len > reglen)
1720			regs->len = reglen;
1721		else if (regs->len < reglen) {
1722			error = E2BIG;
1723			goto done;
1724		}
1725		cxgb_get_regs(sc, regs, buf);
1726		error = copyout(buf, regs->data, reglen);
1727
1728		done:
1729		free(buf, M_DEVBUF);
1730
1731		break;
1732	}
1733	default:
1734		return (EOPNOTSUPP);
1735		break;
1736	}
1737
1738	return (error);
1739}
1740
1741static __inline void
1742reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
1743    unsigned int end)
1744{
1745	uint32_t *p = (uint32_t *)buf + start;
1746
1747	for ( ; start <= end; start += sizeof(uint32_t))
1748		*p++ = t3_read_reg(ap, start);
1749}
1750
1751#define T3_REGMAP_SIZE (3 * 1024)
1752static int
1753cxgb_get_regs_len(void)
1754{
1755	return T3_REGMAP_SIZE;
1756}
1757#undef T3_REGMAP_SIZE
1758
1759static void
1760cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf)
1761{
1762
1763	/*
1764	 * Version scheme:
1765	 * bits 0..9: chip version
1766	 * bits 10..15: chip revision
1767	 * bit 31: set for PCIe cards
1768	 */
1769	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
1770
1771	/*
1772	 * We skip the MAC statistics registers because they are clear-on-read.
1773	 * Also reading multi-register stats would need to synchronize with the
1774	 * periodic mac stats accumulation.  Hard to justify the complexity.
1775	 */
1776	memset(buf, 0, REGDUMP_SIZE);
1777	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
1778	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
1779	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
1780	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
1781	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
1782	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
1783		       XGM_REG(A_XGM_SERDES_STAT3, 1));
1784	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
1785		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
1786}
1787