cxgb_main.c revision 167769
1/**************************************************************************
2
3Copyright (c) 2007, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13    notice, this list of conditions and the following disclaimer in the
14    documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Chelsio Corporation nor the names of its
17    contributors may be used to endorse or promote products derived from
18    this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30POSSIBILITY OF SUCH DAMAGE.
31
32***************************************************************************/
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_main.c 167769 2007-03-21 16:40:37Z kmacy $");
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/kernel.h>
40#include <sys/bus.h>
41#include <sys/module.h>
42#include <sys/pciio.h>
43#include <sys/conf.h>
44#include <machine/bus.h>
45#include <machine/resource.h>
46#include <sys/bus_dma.h>
47#include <sys/rman.h>
48#include <sys/ioccom.h>
49#include <sys/mbuf.h>
50#include <sys/linker.h>
51#include <sys/firmware.h>
52#include <sys/socket.h>
53#include <sys/sockio.h>
54#include <sys/smp.h>
55#include <sys/sysctl.h>
56#include <sys/queue.h>
57#include <sys/taskqueue.h>
58
59
60
61#include <net/bpf.h>
62#include <net/ethernet.h>
63#include <net/if.h>
64#include <net/if_arp.h>
65#include <net/if_dl.h>
66#include <net/if_media.h>
67#include <net/if_types.h>
68
69#include <netinet/in_systm.h>
70#include <netinet/in.h>
71#include <netinet/if_ether.h>
72#include <netinet/ip.h>
73#include <netinet/ip.h>
74#include <netinet/tcp.h>
75#include <netinet/udp.h>
76
77#include <dev/pci/pcireg.h>
78#include <dev/pci/pcivar.h>
79#include <dev/pci/pci_private.h>
80
81#include <dev/cxgb/cxgb_osdep.h>
82#include <dev/cxgb/common/cxgb_common.h>
83#include <dev/cxgb/cxgb_ioctl.h>
84#include <dev/cxgb/common/cxgb_regs.h>
85#include <dev/cxgb/common/cxgb_t3_cpl.h>
86#include <dev/cxgb/common/cxgb_firmware_exports.h>
87
88
89#ifdef PRIV_SUPPORTED
90#include <sys/priv.h>
91#endif
92
93static int cxgb_setup_msix(adapter_t *, int);
94static void cxgb_init(void *);
95static void cxgb_init_locked(struct port_info *);
96static void cxgb_stop_locked(struct port_info *);
97static void cxgb_set_rxmode(struct port_info *);
98static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
99static void cxgb_start(struct ifnet *);
100static void cxgb_start_proc(void *, int ncount);
101static int cxgb_media_change(struct ifnet *);
102static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
103static int setup_sge_qsets(adapter_t *);
104static void cxgb_async_intr(void *);
105static void cxgb_ext_intr_handler(void *, int);
106static void cxgb_tick(void *);
107static void setup_rss(adapter_t *sc);
108
109/* Attachment glue for the PCI controller end of the device.  Each port of
110 * the device is attached separately, as defined later.
111 */
112static int cxgb_controller_probe(device_t);
113static int cxgb_controller_attach(device_t);
114static int cxgb_controller_detach(device_t);
115static void cxgb_free(struct adapter *);
116static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
117    unsigned int end);
118static void cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf);
119static int cxgb_get_regs_len(void);
120
121static device_method_t cxgb_controller_methods[] = {
122	DEVMETHOD(device_probe,		cxgb_controller_probe),
123	DEVMETHOD(device_attach,	cxgb_controller_attach),
124	DEVMETHOD(device_detach,	cxgb_controller_detach),
125
126	/* bus interface */
127	DEVMETHOD(bus_print_child,	bus_generic_print_child),
128	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
129
130	{ 0, 0 }
131};
132
133static driver_t cxgb_controller_driver = {
134	"cxgbc",
135	cxgb_controller_methods,
136	sizeof(struct adapter)
137};
138
139static devclass_t	cxgb_controller_devclass;
140DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
141
142/*
143 * Attachment glue for the ports.  Attachment is done directly to the
144 * controller device.
145 */
146static int cxgb_port_probe(device_t);
147static int cxgb_port_attach(device_t);
148static int cxgb_port_detach(device_t);
149
150static device_method_t cxgb_port_methods[] = {
151	DEVMETHOD(device_probe,		cxgb_port_probe),
152	DEVMETHOD(device_attach,	cxgb_port_attach),
153	DEVMETHOD(device_detach,	cxgb_port_detach),
154	{ 0, 0 }
155};
156
157static driver_t cxgb_port_driver = {
158	"cxgb",
159	cxgb_port_methods,
160	0
161};
162
163static d_ioctl_t cxgb_extension_ioctl;
164
165static devclass_t	cxgb_port_devclass;
166DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
167
168#define SGE_MSIX_COUNT (SGE_QSETS + 1)
169
170/*
171 * The driver uses the best interrupt scheme available on a platform in the
172 * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
173 * of these schemes the driver may consider as follows:
174 *
175 * msi = 2: choose from among all three options
176 * msi = 1 : only consider MSI and pin interrupts
177 * msi = 0: force pin interrupts
178 */
179static int msi_allowed = 2;
180TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
181
182SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
183SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
184    "MSI-X, MSI, INTx selector");
185
186enum {
187	MAX_TXQ_ENTRIES      = 16384,
188	MAX_CTRL_TXQ_ENTRIES = 1024,
189	MAX_RSPQ_ENTRIES     = 16384,
190	MAX_RX_BUFFERS       = 16384,
191	MAX_RX_JUMBO_BUFFERS = 16384,
192	MIN_TXQ_ENTRIES      = 4,
193	MIN_CTRL_TXQ_ENTRIES = 4,
194	MIN_RSPQ_ENTRIES     = 32,
195	MIN_FL_ENTRIES       = 32
196};
197
198#define PORT_MASK ((1 << MAX_NPORTS) - 1)
199
200/* Table for probing the cards.  The desc field isn't actually used */
201struct cxgb_ident {
202	uint16_t	vendor;
203	uint16_t	device;
204	int		index;
205	char		*desc;
206} cxgb_identifiers[] = {
207	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
208	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
209	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
210	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
211	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
212	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
213	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
214	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
215	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
216	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
217	{0, 0, 0, NULL}
218};
219
220static struct cxgb_ident *
221cxgb_get_ident(device_t dev)
222{
223	struct cxgb_ident *id;
224
225	for (id = cxgb_identifiers; id->desc != NULL; id++) {
226		if ((id->vendor == pci_get_vendor(dev)) &&
227		    (id->device == pci_get_device(dev))) {
228			return (id);
229		}
230	}
231	return (NULL);
232}
233
234static const struct adapter_info *
235cxgb_get_adapter_info(device_t dev)
236{
237	struct cxgb_ident *id;
238	const struct adapter_info *ai;
239
240	id = cxgb_get_ident(dev);
241	if (id == NULL)
242		return (NULL);
243
244	ai = t3_get_adapter_info(id->index);
245
246	return (ai);
247}
248
249static int
250cxgb_controller_probe(device_t dev)
251{
252	const struct adapter_info *ai;
253	char *ports, buf[80];
254
255	ai = cxgb_get_adapter_info(dev);
256	if (ai == NULL)
257		return (ENXIO);
258
259	if (ai->nports == 1)
260		ports = "port";
261	else
262		ports = "ports";
263
264	snprintf(buf, sizeof(buf), "%s RNIC, %d %s", ai->desc, ai->nports, ports);
265	device_set_desc_copy(dev, buf);
266	return (BUS_PROBE_DEFAULT);
267}
268
269static int
270cxgb_fw_download(adapter_t *sc, device_t dev)
271{
272	char buf[32];
273#ifdef FIRMWARE_LATEST
274	const struct firmware *fw;
275#else
276	struct firmware *fw;
277#endif
278	int status;
279
280	snprintf(&buf[0], sizeof(buf), "t3fw%d%d", FW_VERSION_MAJOR,
281	    FW_VERSION_MINOR);
282
283	fw = firmware_get(buf);
284
285
286	if (fw == NULL) {
287		device_printf(dev, "Could not find firmware image %s\n", buf);
288		return ENOENT;
289	}
290
291	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
292
293	firmware_put(fw, FIRMWARE_UNLOAD);
294
295	return (status);
296}
297
298
299static int
300cxgb_controller_attach(device_t dev)
301{
302	driver_intr_t *cxgb_intr = NULL;
303	device_t child;
304	const struct adapter_info *ai;
305	struct adapter *sc;
306	int i, msi_needed, msi_count = 0, error = 0;
307	uint32_t vers;
308	int port_qsets = 1;
309
310	sc = device_get_softc(dev);
311	sc->dev = dev;
312
313	pci_enable_busmaster(dev);
314
315	/*
316	 * Allocate the registers and make them available to the driver.
317	 * The registers that we care about for NIC mode are in BAR 0
318	 */
319	sc->regs_rid = PCIR_BAR(0);
320	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
321	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
322		device_printf(dev, "Cannot allocate BAR\n");
323		return (ENXIO);
324	}
325
326	mtx_init(&sc->sge.reg_lock, "SGE reg lock", NULL, MTX_DEF);
327	mtx_init(&sc->lock, "cxgb controller lock", NULL, MTX_DEF);
328	mtx_init(&sc->mdio_lock, "cxgb mdio", NULL, MTX_DEF);
329
330	sc->bt = rman_get_bustag(sc->regs_res);
331	sc->bh = rman_get_bushandle(sc->regs_res);
332	sc->mmio_len = rman_get_size(sc->regs_res);
333
334	ai = cxgb_get_adapter_info(dev);
335	if (t3_prep_adapter(sc, ai, 1) < 0) {
336		error = ENODEV;
337		goto out;
338	}
339
340	/* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
341	 * enough messages for the queue sets.  If that fails, try falling
342	 * back to MSI.  If that fails, then try falling back to the legacy
343	 * interrupt pin model.
344	 */
345#ifdef MSI_SUPPORTED
346
347	sc->msix_regs_rid = 0x20;
348	if ((msi_allowed >= 2) &&
349	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
350	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
351
352		msi_needed = msi_count = SGE_MSIX_COUNT;
353
354		if ((pci_alloc_msix(dev, &msi_count) != 0) ||
355		    (msi_count != msi_needed)) {
356			device_printf(dev, "msix allocation failed"
357			    " will try msi\n");
358			msi_count = 0;
359			pci_release_msi(dev);
360			bus_release_resource(dev, SYS_RES_MEMORY,
361			    sc->msix_regs_rid, sc->msix_regs_res);
362			sc->msix_regs_res = NULL;
363		} else {
364			sc->flags |= USING_MSIX;
365			cxgb_intr = t3_intr_msix;
366		}
367	}
368
369	if ((msi_allowed >= 1) && (msi_count == 0)) {
370		msi_count = 1;
371		if (pci_alloc_msi(dev, &msi_count)) {
372			device_printf(dev, "alloc msi failed - will try INTx\n");
373			msi_count = 0;
374			pci_release_msi(dev);
375		} else {
376			sc->flags |= USING_MSI;
377			sc->irq_rid = 1;
378			cxgb_intr = t3_intr_msi;
379		}
380	}
381#endif
382	if (msi_count == 0) {
383		device_printf(dev, "using line interrupts\n");
384		sc->irq_rid = 0;
385		cxgb_intr = t3b_intr;
386	}
387
388
389	/* Create a private taskqueue thread for handling driver events */
390#ifdef TASKQUEUE_CURRENT
391	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
392	    taskqueue_thread_enqueue, &sc->tq);
393#else
394	sc->tq = taskqueue_create_fast("cxgb_taskq", M_NOWAIT,
395	    taskqueue_thread_enqueue, &sc->tq);
396#endif
397	if (sc->tq == NULL) {
398		device_printf(dev, "failed to allocate controller task queue\n");
399		goto out;
400	}
401
402	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
403	    device_get_nameunit(dev));
404	TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
405
406
407	/* Create a periodic callout for checking adapter status */
408	callout_init_mtx(&sc->cxgb_tick_ch, &sc->lock, CALLOUT_RETURNUNLOCKED);
409
410	if (t3_check_fw_version(sc) != 0) {
411		/*
412		 * Warn user that a firmware update will be attempted in init.
413		 */
414		device_printf(dev, "firmware needs to be updated to version %d.%d\n",
415		    FW_VERSION_MAJOR, FW_VERSION_MINOR);
416		sc->flags &= ~FW_UPTODATE;
417	} else {
418		sc->flags |= FW_UPTODATE;
419	}
420
421	if (t3_init_hw(sc, 0) != 0) {
422		device_printf(dev, "hw initialization failed\n");
423		error = ENXIO;
424		goto out;
425	}
426	t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
427
428	if (sc->flags & USING_MSIX)
429		port_qsets = min((SGE_QSETS/(sc)->params.nports), mp_ncpus);
430
431	/*
432	 * Create a child device for each MAC.  The ethernet attachment
433	 * will be done in these children.
434	 */
435	for (i = 0; i < (sc)->params.nports; i++) {
436		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
437			device_printf(dev, "failed to add child port\n");
438			error = EINVAL;
439			goto out;
440		}
441		sc->portdev[i] = child;
442		sc->port[i].adapter = sc;
443		sc->port[i].nqsets = port_qsets;
444		sc->port[i].first_qset = i*port_qsets;
445		sc->port[i].port = i;
446		device_set_softc(child, &sc->port[i]);
447	}
448	if ((error = bus_generic_attach(dev)) != 0)
449		goto out;;
450
451	if ((error = setup_sge_qsets(sc)) != 0)
452		goto out;
453
454	setup_rss(sc);
455
456	/* If it's MSI or INTx, allocate a single interrupt for everything */
457	if ((sc->flags & USING_MSIX) == 0) {
458		if ((sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
459		   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
460			device_printf(dev, "Cannot allocate interrupt rid=%d\n", sc->irq_rid);
461			error = EINVAL;
462			goto out;
463		}
464		device_printf(dev, "allocated irq_res=%p\n", sc->irq_res);
465
466		if (bus_setup_intr(dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
467#ifdef INTR_FILTERS
468			NULL,
469#endif
470			cxgb_intr, sc, &sc->intr_tag)) {
471			device_printf(dev, "Cannot set up interrupt\n");
472			error = EINVAL;
473			goto out;
474		}
475	} else {
476		cxgb_setup_msix(sc, msi_count);
477	}
478
479	sc->params.stats_update_period = 1;
480
481	/* initialize sge private state */
482	t3_sge_init_sw(sc);
483
484	t3_led_ready(sc);
485
486	error = t3_get_fw_version(sc, &vers);
487	if (error)
488		goto out;
489
490	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d", G_FW_VERSION_MAJOR(vers),
491	    G_FW_VERSION_MINOR(vers));
492
493	t3_add_sysctls(sc);
494
495out:
496	if (error)
497		cxgb_free(sc);
498
499	return (error);
500}
501
502static int
503cxgb_controller_detach(device_t dev)
504{
505	struct adapter *sc;
506
507	sc = device_get_softc(dev);
508
509	cxgb_free(sc);
510
511	return (0);
512}
513
514static void
515cxgb_free(struct adapter *sc)
516{
517	int i;
518
519	callout_drain(&sc->cxgb_tick_ch);
520
521	t3_sge_deinit_sw(sc);
522
523	if (sc->tq != NULL) {
524		taskqueue_drain(sc->tq, &sc->ext_intr_task);
525		taskqueue_free(sc->tq);
526	}
527
528	for (i = 0; i < (sc)->params.nports; ++i) {
529		if (sc->portdev[i] != NULL)
530			device_delete_child(sc->dev, sc->portdev[i]);
531	}
532
533	bus_generic_detach(sc->dev);
534
535	t3_free_sge_resources(sc);
536	t3_sge_free(sc);
537
538	for (i = 0; i < SGE_QSETS; i++) {
539		if (sc->msix_intr_tag[i] != NULL) {
540			bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
541			    sc->msix_intr_tag[i]);
542		}
543		if (sc->msix_irq_res[i] != NULL) {
544			bus_release_resource(sc->dev, SYS_RES_IRQ,
545			    sc->msix_irq_rid[i], sc->msix_irq_res[i]);
546		}
547	}
548
549	if (sc->intr_tag != NULL) {
550		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
551	}
552
553	if (sc->irq_res != NULL) {
554		device_printf(sc->dev, "de-allocating interrupt irq_rid=%d irq_res=%p\n",
555		    sc->irq_rid, sc->irq_res);
556		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
557		    sc->irq_res);
558	}
559#ifdef MSI_SUPPORTED
560	if (sc->flags & (USING_MSI | USING_MSIX)) {
561		device_printf(sc->dev, "releasing msi message(s)\n");
562		pci_release_msi(sc->dev);
563	}
564#endif
565	if (sc->msix_regs_res != NULL) {
566		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
567		    sc->msix_regs_res);
568	}
569
570	if (sc->regs_res != NULL)
571		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
572		    sc->regs_res);
573
574	mtx_destroy(&sc->mdio_lock);
575	mtx_destroy(&sc->sge.reg_lock);
576	mtx_destroy(&sc->lock);
577
578	return;
579}
580
581/**
582 *	setup_sge_qsets - configure SGE Tx/Rx/response queues
583 *	@sc: the controller softc
584 *
585 *	Determines how many sets of SGE queues to use and initializes them.
586 *	We support multiple queue sets per port if we have MSI-X, otherwise
587 *	just one queue set per port.
588 */
589static int
590setup_sge_qsets(adapter_t *sc)
591{
592	int i, j, err, irq_idx, qset_idx;
593	u_int ntxq = 3;
594
595	if ((err = t3_sge_alloc(sc)) != 0) {
596		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
597		return (err);
598	}
599
600	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
601		irq_idx = -1;
602	else
603		irq_idx = 0;
604
605	for (qset_idx = 0, i = 0; i < (sc)->params.nports; ++i) {
606		struct port_info *pi = &sc->port[i];
607
608		for (j = 0; j < pi->nqsets; ++j, ++qset_idx) {
609			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
610			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
611			    &sc->params.sge.qset[qset_idx], ntxq, pi);
612			if (err) {
613				t3_free_sge_resources(sc);
614				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n", err);
615				return (err);
616			}
617		}
618	}
619
620	return (0);
621}
622
623static int
624cxgb_setup_msix(adapter_t *sc, int msix_count)
625{
626	int i, j, k, nqsets, rid;
627
628	/* The first message indicates link changes and error conditions */
629	sc->irq_rid = 1;
630	if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
631	   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
632		device_printf(sc->dev, "Cannot allocate msix interrupt\n");
633		return (EINVAL);
634	}
635
636	if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
637#ifdef INTR_FILTERS
638			NULL,
639#endif
640		cxgb_async_intr, sc, &sc->intr_tag)) {
641		device_printf(sc->dev, "Cannot set up interrupt\n");
642		return (EINVAL);
643	}
644	for (i = 0, k = 0; i < (sc)->params.nports; ++i) {
645		nqsets = sc->port[i].nqsets;
646		for (j = 0; j < nqsets; ++j, k++) {
647			struct sge_qset *qs = &sc->sge.qs[k];
648
649			rid = k + 2;
650			if (cxgb_debug)
651				printf("rid=%d ", rid);
652			if ((sc->msix_irq_res[k] = bus_alloc_resource_any(
653			    sc->dev, SYS_RES_IRQ, &rid,
654			    RF_SHAREABLE | RF_ACTIVE)) == NULL) {
655				device_printf(sc->dev, "Cannot allocate "
656				    "interrupt for message %d\n", rid);
657				return (EINVAL);
658			}
659			sc->msix_irq_rid[k] = rid;
660			if (bus_setup_intr(sc->dev, sc->msix_irq_res[j],
661			    INTR_MPSAFE|INTR_TYPE_NET,
662#ifdef INTR_FILTERS
663			NULL,
664#endif
665				t3_intr_msix, qs, &sc->msix_intr_tag[k])) {
666				device_printf(sc->dev, "Cannot set up "
667				    "interrupt for message %d\n", rid);
668				return (EINVAL);
669			}
670		}
671	}
672
673
674	return (0);
675}
676
677static int
678cxgb_port_probe(device_t dev)
679{
680	struct port_info *p;
681	char buf[80];
682
683	p = device_get_softc(dev);
684
685	snprintf(buf, sizeof(buf), "Port %d %s", p->port, p->port_type->desc);
686	device_set_desc_copy(dev, buf);
687	return (0);
688}
689
690
691static int
692cxgb_makedev(struct port_info *pi)
693{
694	struct cdevsw *cxgb_cdevsw;
695
696	if ((cxgb_cdevsw = malloc(sizeof(struct cdevsw), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
697		return (ENOMEM);
698
699	cxgb_cdevsw->d_version = D_VERSION;
700	cxgb_cdevsw->d_name = strdup(pi->ifp->if_xname, M_DEVBUF);
701	cxgb_cdevsw->d_ioctl = cxgb_extension_ioctl;
702
703	pi->port_cdev = make_dev(cxgb_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
704	    pi->ifp->if_xname);
705
706	if (pi->port_cdev == NULL)
707		return (ENOMEM);
708
709	pi->port_cdev->si_drv1 = (void *)pi;
710
711	return (0);
712}
713
714
715#ifdef TSO_SUPPORTED
716#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU)
717/* Don't enable TSO6 yet */
718#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU)
719#else
720#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_JUMBO_MTU)
721/* Don't enable TSO6 yet */
722#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM |  IFCAP_JUMBO_MTU)
723#define IFCAP_TSO4 0x0
724#define CSUM_TSO   0x0
725#endif
726
727
728static int
729cxgb_port_attach(device_t dev)
730{
731	struct port_info *p;
732	struct ifnet *ifp;
733	int media_flags;
734	int err;
735	char buf[64];
736
737	p = device_get_softc(dev);
738
739	snprintf(buf, sizeof(buf), "cxgb port %d", p->port);
740	mtx_init(&p->lock, buf, 0, MTX_DEF);
741
742	/* Allocate an ifnet object and set it up */
743	ifp = p->ifp = if_alloc(IFT_ETHER);
744	if (ifp == NULL) {
745		device_printf(dev, "Cannot allocate ifnet\n");
746		return (ENOMEM);
747	}
748
749	/*
750	 * Note that there is currently no watchdog timer.
751	 */
752	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
753	ifp->if_init = cxgb_init;
754	ifp->if_softc = p;
755	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
756	ifp->if_ioctl = cxgb_ioctl;
757	ifp->if_start = cxgb_start;
758	ifp->if_timer = 0;	/* Disable ifnet watchdog */
759	ifp->if_watchdog = NULL;
760
761	ifp->if_snd.ifq_drv_maxlen = TX_ETH_Q_SIZE;
762	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
763	IFQ_SET_READY(&ifp->if_snd);
764
765	ifp->if_hwassist = ifp->if_capabilities = ifp->if_capenable = 0;
766	ifp->if_capabilities |= CXGB_CAP;
767	ifp->if_capenable |= CXGB_CAP_ENABLE;
768	ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO);
769	ifp->if_baudrate = 100000000;
770
771	ether_ifattach(ifp, p->hw_addr);
772#ifdef DEFAULT_JUMBO
773	ifp->if_mtu = 9000;
774#endif
775	if ((err = cxgb_makedev(p)) != 0) {
776		printf("makedev failed %d\n", err);
777		return (err);
778	}
779	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
780	    cxgb_media_status);
781
782	if (!strcmp(p->port_type->desc, "10GBASE-CX4"))
783	        media_flags = IFM_ETHER | IFM_10G_CX4;
784	else if (!strcmp(p->port_type->desc, "10GBASE-SR"))
785	        media_flags = IFM_ETHER | IFM_10G_SR;
786	else if (!strcmp(p->port_type->desc, "10GBASE-XR"))
787	        media_flags = IFM_ETHER | IFM_10G_LR;
788	else {
789	        printf("unsupported media type %s\n", p->port_type->desc);
790		return (ENXIO);
791	}
792
793	ifmedia_add(&p->media, media_flags, 0, NULL);
794	ifmedia_add(&p->media, IFM_ETHER | IFM_AUTO, 0, NULL);
795	ifmedia_set(&p->media, media_flags);
796
797	snprintf(buf, sizeof(buf), "cxgb_port_taskq%d", p->port);
798#ifdef TASKQUEUE_CURRENT
799	/* Create a port for handling TX without starvation */
800	p->tq = taskqueue_create(buf, M_NOWAIT,
801	    taskqueue_thread_enqueue, &p->tq);
802#else
803	/* Create a port for handling TX without starvation */
804	p->tq = taskqueue_create_fast(buf, M_NOWAIT,
805	    taskqueue_thread_enqueue, &p->tq);
806#endif
807
808
809	if (p->tq == NULL) {
810		device_printf(dev, "failed to allocate port task queue\n");
811		return (ENOMEM);
812	}
813	taskqueue_start_threads(&p->tq, 1, PI_NET, "%s taskq",
814	    device_get_nameunit(dev));
815	TASK_INIT(&p->start_task, 0, cxgb_start_proc, ifp);
816
817
818	return (0);
819}
820
821static int
822cxgb_port_detach(device_t dev)
823{
824	struct port_info *p;
825
826	p = device_get_softc(dev);
827	mtx_destroy(&p->lock);
828	if (p->tq != NULL) {
829		taskqueue_drain(p->tq, &p->start_task);
830		taskqueue_free(p->tq);
831		p->tq = NULL;
832	}
833
834	ether_ifdetach(p->ifp);
835	if_free(p->ifp);
836
837	destroy_dev(p->port_cdev);
838
839
840	return (0);
841}
842
843void
844t3_fatal_err(struct adapter *sc)
845{
846	u_int fw_status[4];
847
848	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
849	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
850		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
851		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
852}
853
854int
855t3_os_find_pci_capability(adapter_t *sc, int cap)
856{
857	device_t dev;
858	struct pci_devinfo *dinfo;
859	pcicfgregs *cfg;
860	uint32_t status;
861	uint8_t ptr;
862
863	dev = sc->dev;
864	dinfo = device_get_ivars(dev);
865	cfg = &dinfo->cfg;
866
867	status = pci_read_config(dev, PCIR_STATUS, 2);
868	if (!(status & PCIM_STATUS_CAPPRESENT))
869		return (0);
870
871	switch (cfg->hdrtype & PCIM_HDRTYPE) {
872	case 0:
873	case 1:
874		ptr = PCIR_CAP_PTR;
875		break;
876	case 2:
877		ptr = PCIR_CAP_PTR_2;
878		break;
879	default:
880		return (0);
881		break;
882	}
883	ptr = pci_read_config(dev, ptr, 1);
884
885	while (ptr != 0) {
886		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
887			return (ptr);
888		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
889	}
890
891	return (0);
892}
893
894int
895t3_os_pci_save_state(struct adapter *sc)
896{
897	device_t dev;
898	struct pci_devinfo *dinfo;
899
900	dev = sc->dev;
901	dinfo = device_get_ivars(dev);
902
903	pci_cfg_save(dev, dinfo, 0);
904	return (0);
905}
906
907int
908t3_os_pci_restore_state(struct adapter *sc)
909{
910	device_t dev;
911	struct pci_devinfo *dinfo;
912
913	dev = sc->dev;
914	dinfo = device_get_ivars(dev);
915
916	pci_cfg_restore(dev, dinfo);
917	return (0);
918}
919
920/**
921 *	t3_os_link_changed - handle link status changes
922 *	@adapter: the adapter associated with the link change
923 *	@port_id: the port index whose limk status has changed
924 *	@link_stat: the new status of the link
925 *	@speed: the new speed setting
926 *	@duplex: the new duplex setting
927 *	@fc: the new flow-control setting
928 *
929 *	This is the OS-dependent handler for link status changes.  The OS
930 *	neutral handler takes care of most of the processing for these events,
931 *	then calls this handler for any OS-specific processing.
932 */
933void
934t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
935     int duplex, int fc)
936{
937	struct port_info *pi = &adapter->port[port_id];
938
939	if ((pi->ifp->if_flags & IFF_UP) == 0)
940		return;
941
942	if (link_status)
943		if_link_state_change(pi->ifp, LINK_STATE_UP);
944	else
945		if_link_state_change(pi->ifp, LINK_STATE_DOWN);
946
947}
948
949
950/*
951 * Interrupt-context handler for external (PHY) interrupts.
952 */
953void
954t3_os_ext_intr_handler(adapter_t *sc)
955{
956	if (cxgb_debug)
957		printf("t3_os_ext_intr_handler\n");
958	/*
959	 * Schedule a task to handle external interrupts as they may be slow
960	 * and we use a mutex to protect MDIO registers.  We disable PHY
961	 * interrupts in the meantime and let the task reenable them when
962	 * it's done.
963	 */
964	if (sc->slow_intr_mask) {
965		sc->slow_intr_mask &= ~F_T3DBG;
966		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
967		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
968	}
969}
970
971void
972t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
973{
974
975	/*
976	 * The ifnet might not be allocated before this gets called,
977	 * as this is called early on in attach by t3_prep_adapter
978	 * save the address off in the port structure
979	 */
980	if (cxgb_debug)
981		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
982	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
983}
984
985/**
986 *	link_start - enable a port
987 *	@p: the port to enable
988 *
989 *	Performs the MAC and PHY actions needed to enable a port.
990 */
991static void
992cxgb_link_start(struct port_info *p)
993{
994	struct ifnet *ifp;
995	struct t3_rx_mode rm;
996	struct cmac *mac = &p->mac;
997
998	ifp = p->ifp;
999
1000	t3_init_rx_mode(&rm, p);
1001	t3_mac_reset(mac);
1002	t3_mac_set_mtu(mac, ifp->if_mtu);
1003	t3_mac_set_address(mac, 0, p->hw_addr);
1004	t3_mac_set_rx_mode(mac, &rm);
1005	t3_link_start(&p->phy, mac, &p->link_config);
1006	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1007}
1008
1009/**
1010 *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1011 *	@adap: the adapter
1012 *
1013 *	Sets up RSS to distribute packets to multiple receive queues.  We
1014 *	configure the RSS CPU lookup table to distribute to the number of HW
1015 *	receive queues, and the response queue lookup table to narrow that
1016 *	down to the response queues actually configured for each port.
1017 *	We always configure the RSS mapping for two ports since the mapping
1018 *	table has plenty of entries.
1019 */
1020static void
1021setup_rss(adapter_t *adap)
1022{
1023	int i;
1024	u_int nq0 = adap->port[0].nqsets;
1025	u_int nq1 = max((u_int)adap->port[1].nqsets, 1U);
1026	uint8_t cpus[SGE_QSETS + 1];
1027	uint16_t rspq_map[RSS_TABLE_SIZE];
1028
1029	for (i = 0; i < SGE_QSETS; ++i)
1030		cpus[i] = i;
1031	cpus[SGE_QSETS] = 0xff;
1032
1033	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1034		rspq_map[i] = i % nq0;
1035		rspq_map[i + RSS_TABLE_SIZE / 2] = (i % nq1) + nq0;
1036	}
1037
1038	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1039	    F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN |
1040	    V_RRCPLCPUSIZE(6), cpus, rspq_map);
1041}
1042
1043static void
1044send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1045			      int hi, int port)
1046{
1047	struct mbuf *m;
1048	struct mngt_pktsched_wr *req;
1049
1050	m = m_gethdr(M_NOWAIT, MT_DATA);
1051	req = (struct mngt_pktsched_wr *)m->m_data;
1052	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1053	req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1054	req->sched = sched;
1055	req->idx = qidx;
1056	req->min = lo;
1057	req->max = hi;
1058	req->binding = port;
1059	m->m_len = m->m_pkthdr.len = sizeof(*req);
1060	t3_mgmt_tx(adap, m);
1061}
1062
1063static void
1064bind_qsets(adapter_t *sc)
1065{
1066	int i, j;
1067
1068	for (i = 0; i < (sc)->params.nports; ++i) {
1069		const struct port_info *pi = adap2pinfo(sc, i);
1070
1071		for (j = 0; j < pi->nqsets; ++j)
1072			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1073					  -1, i);
1074	}
1075}
1076
1077static void
1078cxgb_init(void *arg)
1079{
1080	struct port_info *p = arg;
1081
1082	PORT_LOCK(p);
1083	cxgb_init_locked(p);
1084	PORT_UNLOCK(p);
1085}
1086
1087static void
1088cxgb_init_locked(struct port_info *p)
1089{
1090	struct ifnet *ifp;
1091	adapter_t *sc = p->adapter;
1092	int error;
1093
1094	mtx_assert(&p->lock, MA_OWNED);
1095
1096	ifp = p->ifp;
1097	if ((sc->flags & FW_UPTODATE) == 0) {
1098		device_printf(sc->dev, "updating firmware to version %d.%d\n",
1099		    FW_VERSION_MAJOR, FW_VERSION_MINOR);
1100		if ((error = cxgb_fw_download(sc, sc->dev)) != 0) {
1101			device_printf(sc->dev, "firmware download failed err: %d"
1102			    "interface will be unavailable\n", error);
1103			return;
1104		}
1105		sc->flags |= FW_UPTODATE;
1106	}
1107
1108	cxgb_link_start(p);
1109	ADAPTER_LOCK(p->adapter);
1110	if (p->adapter->open_device_map == 0)
1111		t3_intr_clear(sc);
1112	t3_sge_start(sc);
1113
1114	p->adapter->open_device_map |= (1 << p->port);
1115	ADAPTER_UNLOCK(p->adapter);
1116	t3_intr_enable(sc);
1117	t3_port_intr_enable(sc, p->port);
1118
1119	if ((p->adapter->flags & (USING_MSIX | QUEUES_BOUND)) == USING_MSIX)
1120		bind_qsets(sc);
1121	p->adapter->flags |= QUEUES_BOUND;
1122
1123	callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
1124	    cxgb_tick, sc);
1125
1126
1127	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1128	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1129}
1130
1131static void
1132cxgb_set_rxmode(struct port_info *p)
1133{
1134	struct t3_rx_mode rm;
1135	struct cmac *mac = &p->mac;
1136
1137	mtx_assert(&p->lock, MA_OWNED);
1138
1139	t3_init_rx_mode(&rm, p);
1140	t3_mac_set_rx_mode(mac, &rm);
1141}
1142
1143static void
1144cxgb_stop_locked(struct port_info *p)
1145{
1146	struct ifnet *ifp;
1147
1148	mtx_assert(&p->lock, MA_OWNED);
1149	mtx_assert(&p->adapter->lock, MA_NOTOWNED);
1150
1151	ifp = p->ifp;
1152
1153	ADAPTER_LOCK(p->adapter);
1154	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1155	p->adapter->open_device_map &= ~(1 << p->port);
1156	if (p->adapter->open_device_map == 0)
1157		t3_intr_disable(p->adapter);
1158	ADAPTER_UNLOCK(p->adapter);
1159	t3_port_intr_disable(p->adapter, p->port);
1160	t3_mac_disable(&p->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
1161
1162}
1163
1164static int
1165cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1166{
1167	struct port_info *p = ifp->if_softc;
1168	struct ifaddr *ifa = (struct ifaddr *)data;
1169	struct ifreq *ifr = (struct ifreq *)data;
1170	int flags, error = 0;
1171	uint32_t mask;
1172
1173	switch (command) {
1174	case SIOCSIFMTU:
1175		if ((ifr->ifr_mtu < ETHERMIN) ||
1176		    (ifr->ifr_mtu > ETHER_MAX_LEN_JUMBO))
1177			error = EINVAL;
1178		else if (ifp->if_mtu != ifr->ifr_mtu) {
1179			PORT_LOCK(p);
1180			ifp->if_mtu = ifr->ifr_mtu;
1181			t3_mac_set_mtu(&p->mac, ifp->if_mtu);
1182			PORT_UNLOCK(p);
1183		}
1184		break;
1185	case SIOCSIFADDR:
1186	case SIOCGIFADDR:
1187		if (ifa->ifa_addr->sa_family == AF_INET) {
1188			ifp->if_flags |= IFF_UP;
1189			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1190				cxgb_init(p);
1191			}
1192			arp_ifinit(ifp, ifa);
1193		} else
1194			error = ether_ioctl(ifp, command, data);
1195		break;
1196	case SIOCSIFFLAGS:
1197
1198		if (ifp->if_flags & IFF_UP) {
1199			PORT_LOCK(p);
1200			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1201				flags = p->if_flags;
1202				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1203				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI))
1204					cxgb_set_rxmode(p);
1205
1206			} else
1207				cxgb_init_locked(p);
1208			p->if_flags = ifp->if_flags;
1209			PORT_UNLOCK(p);
1210		} else {
1211			callout_drain(&p->adapter->cxgb_tick_ch);
1212			PORT_LOCK(p);
1213			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1214				cxgb_stop_locked(p);
1215			} else {
1216				adapter_t *sc = p->adapter;
1217				callout_reset(&sc->cxgb_tick_ch,
1218				    sc->params.stats_update_period * hz,
1219				    cxgb_tick, sc);
1220			}
1221			PORT_UNLOCK(p);
1222		}
1223
1224
1225		break;
1226	case SIOCSIFMEDIA:
1227	case SIOCGIFMEDIA:
1228		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
1229		break;
1230	case SIOCSIFCAP:
1231		PORT_LOCK(p);
1232		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1233		if (mask & IFCAP_TXCSUM) {
1234			if (IFCAP_TXCSUM & ifp->if_capenable) {
1235				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
1236				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
1237				    | CSUM_TSO);
1238			} else {
1239				ifp->if_capenable |= IFCAP_TXCSUM;
1240				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1241			}
1242		} else if (mask & IFCAP_RXCSUM) {
1243			if (IFCAP_RXCSUM & ifp->if_capenable) {
1244				ifp->if_capenable &= ~IFCAP_RXCSUM;
1245			} else {
1246				ifp->if_capenable |= IFCAP_RXCSUM;
1247			}
1248		}
1249		if (mask & IFCAP_TSO4) {
1250			if (IFCAP_TSO4 & ifp->if_capenable) {
1251				ifp->if_capenable &= ~IFCAP_TSO4;
1252				ifp->if_hwassist &= ~CSUM_TSO;
1253			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
1254				ifp->if_capenable |= IFCAP_TSO4;
1255				ifp->if_hwassist |= CSUM_TSO;
1256			} else {
1257				if (cxgb_debug)
1258					printf("cxgb requires tx checksum offload"
1259					    " be enabled to use TSO\n");
1260				error = EINVAL;
1261			}
1262		}
1263		PORT_UNLOCK(p);
1264		break;
1265	default:
1266		error = ether_ioctl(ifp, command, data);
1267		break;
1268	}
1269
1270	return (error);
1271}
1272
1273static int
1274cxgb_start_tx(struct ifnet *ifp, uint32_t txmax)
1275{
1276	struct sge_qset *qs;
1277	struct sge_txq *txq;
1278	struct port_info *p = ifp->if_softc;
1279	struct mbuf *m = NULL;
1280	int err, in_use_init;
1281
1282
1283	if (!p->link_config.link_ok)
1284		return (ENXIO);
1285
1286	if (IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1287		return (ENOBUFS);
1288
1289	qs = &p->adapter->sge.qs[p->first_qset];
1290	txq = &qs->txq[TXQ_ETH];
1291	err = 0;
1292
1293	mtx_lock(&txq->lock);
1294	in_use_init = txq->in_use;
1295	while ((txq->in_use - in_use_init < txmax) &&
1296	    (txq->size > txq->in_use + TX_MAX_DESC)) {
1297		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
1298		if (m == NULL)
1299			break;
1300		if ((err = t3_encap(p, &m)) != 0)
1301			break;
1302		BPF_MTAP(ifp, m);
1303	}
1304	mtx_unlock(&txq->lock);
1305
1306	if (__predict_false(err)) {
1307		if (cxgb_debug)
1308			printf("would set OFLAGS\n");
1309		if (err == ENOMEM) {
1310			IFQ_LOCK(&ifp->if_snd);
1311			IFQ_DRV_PREPEND(&ifp->if_snd, m);
1312			IFQ_UNLOCK(&ifp->if_snd);
1313		}
1314	}
1315	if (err == 0 && m == NULL)
1316		err = ENOBUFS;
1317
1318	return (err);
1319}
1320
1321static void
1322cxgb_start_proc(void *arg, int ncount)
1323{
1324	struct ifnet *ifp = arg;
1325	struct port_info *pi = ifp->if_softc;
1326	struct sge_qset *qs;
1327	struct sge_txq *txq;
1328	int error = 0;
1329
1330	qs = &pi->adapter->sge.qs[pi->first_qset];
1331	txq = &qs->txq[TXQ_ETH];
1332
1333	while (error == 0) {
1334		if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC)
1335			taskqueue_enqueue(pi->adapter->tq,
1336			    &pi->adapter->timer_reclaim_task);
1337
1338		error = cxgb_start_tx(ifp, TX_START_MAX_DESC);
1339	}
1340}
1341
1342static void
1343cxgb_start(struct ifnet *ifp)
1344{
1345	struct port_info *pi = ifp->if_softc;
1346	struct sge_qset *qs;
1347	struct sge_txq *txq;
1348	int err;
1349
1350	qs = &pi->adapter->sge.qs[pi->first_qset];
1351	txq = &qs->txq[TXQ_ETH];
1352
1353	if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC)
1354		taskqueue_enqueue(pi->adapter->tq,
1355		    &pi->adapter->timer_reclaim_task);
1356
1357	err = cxgb_start_tx(ifp, TX_START_MAX_DESC);
1358
1359	if (err == 0)
1360		taskqueue_enqueue(pi->tq, &pi->start_task);
1361}
1362
1363
1364static int
1365cxgb_media_change(struct ifnet *ifp)
1366{
1367	if_printf(ifp, "media change not supported\n");
1368	return (ENXIO);
1369}
1370
1371static void
1372cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1373{
1374	struct port_info *p = ifp->if_softc;
1375
1376	ifmr->ifm_status = IFM_AVALID;
1377	ifmr->ifm_active = IFM_ETHER;
1378
1379	if (!p->link_config.link_ok)
1380		return;
1381
1382	ifmr->ifm_status |= IFM_ACTIVE;
1383
1384	if (p->link_config.duplex)
1385		ifmr->ifm_active |= IFM_FDX;
1386	else
1387		ifmr->ifm_active |= IFM_HDX;
1388}
1389
1390static void
1391cxgb_async_intr(void *data)
1392{
1393	adapter_t *sc = data;
1394
1395	if (cxgb_debug)
1396		device_printf(sc->dev, "cxgb_async_intr\n");
1397
1398	t3_slow_intr_handler(sc);
1399
1400}
1401
1402static void
1403cxgb_ext_intr_handler(void *arg, int count)
1404{
1405	adapter_t *sc = (adapter_t *)arg;
1406
1407	if (cxgb_debug)
1408		printf("cxgb_ext_intr_handler\n");
1409
1410	t3_phy_intr_handler(sc);
1411
1412	/* Now reenable external interrupts */
1413	if (sc->slow_intr_mask) {
1414		sc->slow_intr_mask |= F_T3DBG;
1415		t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
1416		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1417	}
1418}
1419
1420static void
1421check_link_status(adapter_t *sc)
1422{
1423	int i;
1424
1425	for (i = 0; i < (sc)->params.nports; ++i) {
1426		struct port_info *p = &sc->port[i];
1427
1428		if (!(p->port_type->caps & SUPPORTED_IRQ))
1429			t3_link_changed(sc, i);
1430	}
1431}
1432
1433static void
1434check_t3b2_mac(struct adapter *adapter)
1435{
1436	int i;
1437
1438	for_each_port(adapter, i) {
1439		struct port_info *p = &adapter->port[i];
1440		struct ifnet *ifp = p->ifp;
1441		int status;
1442
1443		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1444			continue;
1445
1446		status = 0;
1447		PORT_LOCK(p);
1448		if ((ifp->if_drv_flags & IFF_DRV_RUNNING))
1449			status = t3b2_mac_watchdog_task(&p->mac);
1450		if (status == 1)
1451			p->mac.stats.num_toggled++;
1452		else if (status == 2) {
1453			struct cmac *mac = &p->mac;
1454
1455			t3_mac_set_mtu(mac, ifp->if_mtu);
1456			t3_mac_set_address(mac, 0, p->hw_addr);
1457			cxgb_set_rxmode(p);
1458			t3_link_start(&p->phy, mac, &p->link_config);
1459			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1460			t3_port_intr_enable(adapter, p->port);
1461			p->mac.stats.num_resets++;
1462		}
1463		PORT_UNLOCK(p);
1464	}
1465}
1466
1467static void
1468cxgb_tick(void *arg)
1469{
1470	adapter_t *sc = (adapter_t *)arg;
1471	const struct adapter_params *p = &sc->params;
1472
1473	if (p->linkpoll_period)
1474		check_link_status(sc);
1475	callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
1476	    cxgb_tick, sc);
1477
1478	/*
1479	 * adapter lock can currently only be acquire after the
1480	 * port lock
1481	 */
1482	ADAPTER_UNLOCK(sc);
1483	if (p->rev == T3_REV_B2)
1484		check_t3b2_mac(sc);
1485
1486}
1487
1488static int
1489in_range(int val, int lo, int hi)
1490{
1491	return val < 0 || (val <= hi && val >= lo);
1492}
1493
1494static int
1495cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
1496    int fflag, struct thread *td)
1497{
1498	int mmd, error = 0;
1499	struct port_info *pi = dev->si_drv1;
1500	adapter_t *sc = pi->adapter;
1501
1502#ifdef PRIV_SUPPORTED
1503	if (priv_check(td, PRIV_DRIVER)) {
1504		if (cxgb_debug)
1505			printf("user does not have access to privileged ioctls\n");
1506		return (EPERM);
1507	}
1508#else
1509	if (suser(td)) {
1510		if (cxgb_debug)
1511			printf("user does not have access to privileged ioctls\n");
1512		return (EPERM);
1513	}
1514#endif
1515
1516	switch (cmd) {
1517	case SIOCGMIIREG: {
1518		uint32_t val;
1519		struct cphy *phy = &pi->phy;
1520		struct mii_data *mid = (struct mii_data *)data;
1521
1522		if (!phy->mdio_read)
1523			return (EOPNOTSUPP);
1524		if (is_10G(sc)) {
1525			mmd = mid->phy_id >> 8;
1526			if (!mmd)
1527				mmd = MDIO_DEV_PCS;
1528			else if (mmd > MDIO_DEV_XGXS)
1529				return -EINVAL;
1530
1531			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
1532					     mid->reg_num, &val);
1533		} else
1534		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
1535					     mid->reg_num & 0x1f, &val);
1536		if (error == 0)
1537			mid->val_out = val;
1538		break;
1539	}
1540	case SIOCSMIIREG: {
1541		struct cphy *phy = &pi->phy;
1542		struct mii_data *mid = (struct mii_data *)data;
1543
1544		if (!phy->mdio_write)
1545			return (EOPNOTSUPP);
1546		if (is_10G(sc)) {
1547			mmd = mid->phy_id >> 8;
1548			if (!mmd)
1549				mmd = MDIO_DEV_PCS;
1550			else if (mmd > MDIO_DEV_XGXS)
1551				return (EINVAL);
1552
1553			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
1554					      mmd, mid->reg_num, mid->val_in);
1555		} else
1556			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
1557					      mid->reg_num & 0x1f,
1558					      mid->val_in);
1559		break;
1560	}
1561	case CHELSIO_SETREG: {
1562		struct ch_reg *edata = (struct ch_reg *)data;
1563		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
1564			return (EFAULT);
1565		t3_write_reg(sc, edata->addr, edata->val);
1566		break;
1567	}
1568	case CHELSIO_GETREG: {
1569		struct ch_reg *edata = (struct ch_reg *)data;
1570		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
1571			return (EFAULT);
1572		edata->val = t3_read_reg(sc, edata->addr);
1573		break;
1574	}
1575	case CHELSIO_GET_SGE_CONTEXT: {
1576		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
1577		mtx_lock(&sc->sge.reg_lock);
1578		switch (ecntxt->cntxt_type) {
1579		case CNTXT_TYPE_EGRESS:
1580			error = t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
1581			    ecntxt->data);
1582			break;
1583		case CNTXT_TYPE_FL:
1584			error = t3_sge_read_fl(sc, ecntxt->cntxt_id,
1585			    ecntxt->data);
1586			break;
1587		case CNTXT_TYPE_RSP:
1588			error = t3_sge_read_rspq(sc, ecntxt->cntxt_id,
1589			    ecntxt->data);
1590			break;
1591		case CNTXT_TYPE_CQ:
1592			error = t3_sge_read_cq(sc, ecntxt->cntxt_id,
1593			    ecntxt->data);
1594			break;
1595		default:
1596			error = EINVAL;
1597			break;
1598		}
1599		mtx_unlock(&sc->sge.reg_lock);
1600		break;
1601	}
1602	case CHELSIO_GET_SGE_DESC: {
1603		struct ch_desc *edesc = (struct ch_desc *)data;
1604		int ret;
1605		if (edesc->queue_num >= SGE_QSETS * 6)
1606			return (EINVAL);
1607		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
1608		    edesc->queue_num % 6, edesc->idx, edesc->data);
1609		if (ret < 0)
1610			return (EINVAL);
1611		edesc->size = ret;
1612		break;
1613	}
1614	case CHELSIO_SET_QSET_PARAMS: {
1615		struct qset_params *q;
1616		struct ch_qset_params *t = (struct ch_qset_params *)data;
1617
1618		if (t->qset_idx >= SGE_QSETS)
1619			return -EINVAL;
1620		if (!in_range(t->intr_lat, 0, M_NEWTIMER) ||
1621		    !in_range(t->cong_thres, 0, 255) ||
1622		    !in_range(t->txq_size[0], MIN_TXQ_ENTRIES,
1623			      MAX_TXQ_ENTRIES) ||
1624		    !in_range(t->txq_size[1], MIN_TXQ_ENTRIES,
1625			      MAX_TXQ_ENTRIES) ||
1626		    !in_range(t->txq_size[2], MIN_CTRL_TXQ_ENTRIES,
1627			      MAX_CTRL_TXQ_ENTRIES) ||
1628		    !in_range(t->fl_size[0], MIN_FL_ENTRIES, MAX_RX_BUFFERS) ||
1629		    !in_range(t->fl_size[1], MIN_FL_ENTRIES,
1630			      MAX_RX_JUMBO_BUFFERS) ||
1631		    !in_range(t->rspq_size, MIN_RSPQ_ENTRIES, MAX_RSPQ_ENTRIES))
1632		       return -EINVAL;
1633		if ((sc->flags & FULL_INIT_DONE) &&
1634		    (t->rspq_size >= 0 || t->fl_size[0] >= 0 ||
1635		     t->fl_size[1] >= 0 || t->txq_size[0] >= 0 ||
1636		     t->txq_size[1] >= 0 || t->txq_size[2] >= 0 ||
1637		     t->polling >= 0 || t->cong_thres >= 0))
1638			return -EBUSY;
1639
1640		q = &sc->params.sge.qset[t->qset_idx];
1641
1642		if (t->rspq_size >= 0)
1643			q->rspq_size = t->rspq_size;
1644		if (t->fl_size[0] >= 0)
1645			q->fl_size = t->fl_size[0];
1646		if (t->fl_size[1] >= 0)
1647			q->jumbo_size = t->fl_size[1];
1648		if (t->txq_size[0] >= 0)
1649			q->txq_size[0] = t->txq_size[0];
1650		if (t->txq_size[1] >= 0)
1651			q->txq_size[1] = t->txq_size[1];
1652		if (t->txq_size[2] >= 0)
1653			q->txq_size[2] = t->txq_size[2];
1654		if (t->cong_thres >= 0)
1655			q->cong_thres = t->cong_thres;
1656		if (t->intr_lat >= 0) {
1657			struct sge_qset *qs = &sc->sge.qs[t->qset_idx];
1658
1659			q->coalesce_nsecs = t->intr_lat*1000;
1660			t3_update_qset_coalesce(qs, q);
1661		}
1662		break;
1663	}
1664	case CHELSIO_GET_QSET_PARAMS: {
1665		struct qset_params *q;
1666		struct ch_qset_params *t = (struct ch_qset_params *)data;
1667
1668		if (t->qset_idx >= SGE_QSETS)
1669			return (EINVAL);
1670
1671		q = &(sc)->params.sge.qset[t->qset_idx];
1672		t->rspq_size   = q->rspq_size;
1673		t->txq_size[0] = q->txq_size[0];
1674		t->txq_size[1] = q->txq_size[1];
1675		t->txq_size[2] = q->txq_size[2];
1676		t->fl_size[0]  = q->fl_size;
1677		t->fl_size[1]  = q->jumbo_size;
1678		t->polling     = q->polling;
1679		t->intr_lat    = q->coalesce_nsecs / 1000;
1680		t->cong_thres  = q->cong_thres;
1681		break;
1682	}
1683	case CHELSIO_SET_QSET_NUM: {
1684		struct ch_reg *edata = (struct ch_reg *)data;
1685		unsigned int port_idx = pi->port;
1686
1687		if (sc->flags & FULL_INIT_DONE)
1688			return (EBUSY);
1689		if (edata->val < 1 ||
1690		    (edata->val > 1 && !(sc->flags & USING_MSIX)))
1691			return (EINVAL);
1692		if (edata->val + sc->port[!port_idx].nqsets > SGE_QSETS)
1693			return (EINVAL);
1694		sc->port[port_idx].nqsets = edata->val;
1695		/*
1696		 * XXX we're hardcoding ourselves to 2 ports
1697		 * just like the LEENUX
1698		 */
1699		sc->port[1].first_qset = sc->port[0].nqsets;
1700		break;
1701	}
1702	case CHELSIO_GET_QSET_NUM: {
1703		struct ch_reg *edata = (struct ch_reg *)data;
1704		edata->val = pi->nqsets;
1705		break;
1706	}
1707#ifdef notyet
1708		/*
1709		 * XXX FreeBSD driver does not currently support any
1710		 * offload functionality
1711		 */
1712	case CHELSIO_LOAD_FW:
1713	case CHELSIO_DEVUP:
1714	case CHELSIO_SETMTUTAB:
1715	case CHELSIO_GET_PM:
1716	case CHELSIO_SET_PM:
1717	case CHELSIO_READ_TCAM_WORD:
1718		return (EOPNOTSUPP);
1719		break;
1720#endif
1721	case CHELSIO_GET_MEM: {
1722		struct ch_mem_range *t = (struct ch_mem_range *)data;
1723		struct mc7 *mem;
1724		uint8_t *useraddr;
1725		u64 buf[32];
1726
1727		if (!is_offload(sc))
1728			return (EOPNOTSUPP);
1729		if (!(sc->flags & FULL_INIT_DONE))
1730			return (EIO);         /* need the memory controllers */
1731		if ((t->addr & 0x7) || (t->len & 0x7))
1732			return (EINVAL);
1733		if (t->mem_id == MEM_CM)
1734			mem = &sc->cm;
1735		else if (t->mem_id == MEM_PMRX)
1736			mem = &sc->pmrx;
1737		else if (t->mem_id == MEM_PMTX)
1738			mem = &sc->pmtx;
1739		else
1740			return (EINVAL);
1741
1742		/*
1743		 * Version scheme:
1744		 * bits 0..9: chip version
1745		 * bits 10..15: chip revision
1746		 */
1747		t->version = 3 | (sc->params.rev << 10);
1748
1749		/*
1750		 * Read 256 bytes at a time as len can be large and we don't
1751		 * want to use huge intermediate buffers.
1752		 */
1753		useraddr = (uint8_t *)(t + 1);   /* advance to start of buffer */
1754		while (t->len) {
1755			unsigned int chunk = min(t->len, sizeof(buf));
1756
1757			error = t3_mc7_bd_read(mem, t->addr / 8, chunk / 8, buf);
1758			if (error)
1759				return (-error);
1760			if (copyout(buf, useraddr, chunk))
1761				return (EFAULT);
1762			useraddr += chunk;
1763			t->addr += chunk;
1764			t->len -= chunk;
1765		}
1766		break;
1767	}
1768	case CHELSIO_SET_TRACE_FILTER: {
1769		struct ch_trace *t = (struct ch_trace *)data;
1770		const struct trace_params *tp;
1771
1772		tp = (const struct trace_params *)&t->sip;
1773		if (t->config_tx)
1774			t3_config_trace_filter(sc, tp, 0, t->invert_match,
1775					       t->trace_tx);
1776		if (t->config_rx)
1777			t3_config_trace_filter(sc, tp, 1, t->invert_match,
1778					       t->trace_rx);
1779		break;
1780	}
1781	case CHELSIO_SET_PKTSCHED: {
1782		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
1783		if (sc->open_device_map == 0)
1784			return (EAGAIN);
1785		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
1786		    p->binding);
1787		break;
1788	}
1789	case CHELSIO_IFCONF_GETREGS: {
1790		struct ifconf_regs *regs = (struct ifconf_regs *)data;
1791		int reglen = cxgb_get_regs_len();
1792		uint8_t *buf = malloc(REGDUMP_SIZE, M_DEVBUF, M_NOWAIT);
1793		if (buf == NULL) {
1794			return (ENOMEM);
1795		} if (regs->len > reglen)
1796			regs->len = reglen;
1797		else if (regs->len < reglen) {
1798			error = E2BIG;
1799			goto done;
1800		}
1801		cxgb_get_regs(sc, regs, buf);
1802		error = copyout(buf, regs->data, reglen);
1803
1804		done:
1805		free(buf, M_DEVBUF);
1806
1807		break;
1808	}
1809	default:
1810		return (EOPNOTSUPP);
1811		break;
1812	}
1813
1814	return (error);
1815}
1816
1817static __inline void
1818reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
1819    unsigned int end)
1820{
1821	uint32_t *p = (uint32_t *)buf + start;
1822
1823	for ( ; start <= end; start += sizeof(uint32_t))
1824		*p++ = t3_read_reg(ap, start);
1825}
1826
1827#define T3_REGMAP_SIZE (3 * 1024)
1828static int
1829cxgb_get_regs_len(void)
1830{
1831	return T3_REGMAP_SIZE;
1832}
1833#undef T3_REGMAP_SIZE
1834
1835static void
1836cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf)
1837{
1838
1839	/*
1840	 * Version scheme:
1841	 * bits 0..9: chip version
1842	 * bits 10..15: chip revision
1843	 * bit 31: set for PCIe cards
1844	 */
1845	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
1846
1847	/*
1848	 * We skip the MAC statistics registers because they are clear-on-read.
1849	 * Also reading multi-register stats would need to synchronize with the
1850	 * periodic mac stats accumulation.  Hard to justify the complexity.
1851	 */
1852	memset(buf, 0, REGDUMP_SIZE);
1853	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
1854	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
1855	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
1856	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
1857	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
1858	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
1859		       XGM_REG(A_XGM_SERDES_STAT3, 1));
1860	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
1861		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
1862}
1863