cxgb_main.c revision 219902
1/**************************************************************************
2
3Copyright (c) 2007-2009, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13    contributors may be used to endorse or promote products derived from
14    this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_main.c 219902 2011-03-23 13:10:15Z jhb $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/bus.h>
37#include <sys/module.h>
38#include <sys/pciio.h>
39#include <sys/conf.h>
40#include <machine/bus.h>
41#include <machine/resource.h>
42#include <sys/bus_dma.h>
43#include <sys/ktr.h>
44#include <sys/rman.h>
45#include <sys/ioccom.h>
46#include <sys/mbuf.h>
47#include <sys/linker.h>
48#include <sys/firmware.h>
49#include <sys/socket.h>
50#include <sys/sockio.h>
51#include <sys/smp.h>
52#include <sys/sysctl.h>
53#include <sys/syslog.h>
54#include <sys/queue.h>
55#include <sys/taskqueue.h>
56#include <sys/proc.h>
57
58#include <net/bpf.h>
59#include <net/ethernet.h>
60#include <net/if.h>
61#include <net/if_arp.h>
62#include <net/if_dl.h>
63#include <net/if_media.h>
64#include <net/if_types.h>
65#include <net/if_vlan_var.h>
66
67#include <netinet/in_systm.h>
68#include <netinet/in.h>
69#include <netinet/if_ether.h>
70#include <netinet/ip.h>
71#include <netinet/ip.h>
72#include <netinet/tcp.h>
73#include <netinet/udp.h>
74
75#include <dev/pci/pcireg.h>
76#include <dev/pci/pcivar.h>
77#include <dev/pci/pci_private.h>
78
79#include <cxgb_include.h>
80
81#ifdef PRIV_SUPPORTED
82#include <sys/priv.h>
83#endif
84
85static int cxgb_setup_interrupts(adapter_t *);
86static void cxgb_teardown_interrupts(adapter_t *);
87static void cxgb_init(void *);
88static int cxgb_init_locked(struct port_info *);
89static int cxgb_uninit_locked(struct port_info *);
90static int cxgb_uninit_synchronized(struct port_info *);
91static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92static int cxgb_media_change(struct ifnet *);
93static int cxgb_ifm_type(int);
94static void cxgb_build_medialist(struct port_info *);
95static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
96static int setup_sge_qsets(adapter_t *);
97static void cxgb_async_intr(void *);
98static void cxgb_tick_handler(void *, int);
99static void cxgb_tick(void *);
100static void link_check_callout(void *);
101static void check_link_status(void *, int);
102static void setup_rss(adapter_t *sc);
103static int alloc_filters(struct adapter *);
104static int setup_hw_filters(struct adapter *);
105static int set_filter(struct adapter *, int, const struct filter_info *);
106static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
107    unsigned int, u64, u64);
108static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
109    unsigned int, u64, u64);
110
111/* Attachment glue for the PCI controller end of the device.  Each port of
112 * the device is attached separately, as defined later.
113 */
114static int cxgb_controller_probe(device_t);
115static int cxgb_controller_attach(device_t);
116static int cxgb_controller_detach(device_t);
117static void cxgb_free(struct adapter *);
118static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
119    unsigned int end);
120static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
121static int cxgb_get_regs_len(void);
122static int offload_open(struct port_info *pi);
123static void touch_bars(device_t dev);
124static int offload_close(struct t3cdev *tdev);
125static void cxgb_update_mac_settings(struct port_info *p);
126
127static device_method_t cxgb_controller_methods[] = {
128	DEVMETHOD(device_probe,		cxgb_controller_probe),
129	DEVMETHOD(device_attach,	cxgb_controller_attach),
130	DEVMETHOD(device_detach,	cxgb_controller_detach),
131
132	/* bus interface */
133	DEVMETHOD(bus_print_child,	bus_generic_print_child),
134	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
135
136	{ 0, 0 }
137};
138
139static driver_t cxgb_controller_driver = {
140	"cxgbc",
141	cxgb_controller_methods,
142	sizeof(struct adapter)
143};
144
145static devclass_t	cxgb_controller_devclass;
146DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
147
148/*
149 * Attachment glue for the ports.  Attachment is done directly to the
150 * controller device.
151 */
152static int cxgb_port_probe(device_t);
153static int cxgb_port_attach(device_t);
154static int cxgb_port_detach(device_t);
155
156static device_method_t cxgb_port_methods[] = {
157	DEVMETHOD(device_probe,		cxgb_port_probe),
158	DEVMETHOD(device_attach,	cxgb_port_attach),
159	DEVMETHOD(device_detach,	cxgb_port_detach),
160	{ 0, 0 }
161};
162
163static driver_t cxgb_port_driver = {
164	"cxgb",
165	cxgb_port_methods,
166	0
167};
168
169static d_ioctl_t cxgb_extension_ioctl;
170static d_open_t cxgb_extension_open;
171static d_close_t cxgb_extension_close;
172
173static struct cdevsw cxgb_cdevsw = {
174       .d_version =    D_VERSION,
175       .d_flags =      0,
176       .d_open =       cxgb_extension_open,
177       .d_close =      cxgb_extension_close,
178       .d_ioctl =      cxgb_extension_ioctl,
179       .d_name =       "cxgb",
180};
181
182static devclass_t	cxgb_port_devclass;
183DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
184
185/*
186 * The driver uses the best interrupt scheme available on a platform in the
187 * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
188 * of these schemes the driver may consider as follows:
189 *
190 * msi = 2: choose from among all three options
191 * msi = 1 : only consider MSI and pin interrupts
192 * msi = 0: force pin interrupts
193 */
194static int msi_allowed = 2;
195
196TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
197SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
198SYSCTL_INT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
199    "MSI-X, MSI, INTx selector");
200
201/*
202 * The driver enables offload as a default.
203 * To disable it, use ofld_disable = 1.
204 */
205static int ofld_disable = 0;
206TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
207SYSCTL_INT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
208    "disable ULP offload");
209
210/*
211 * The driver uses an auto-queue algorithm by default.
212 * To disable it and force a single queue-set per port, use multiq = 0
213 */
214static int multiq = 1;
215TUNABLE_INT("hw.cxgb.multiq", &multiq);
216SYSCTL_INT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
217    "use min(ncpus/ports, 8) queue-sets per port");
218
219/*
220 * By default the driver will not update the firmware unless
221 * it was compiled against a newer version
222 *
223 */
224static int force_fw_update = 0;
225TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
226SYSCTL_INT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
227    "update firmware even if up to date");
228
229int cxgb_use_16k_clusters = -1;
230TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
231SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
232    &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
233
234/*
235 * Tune the size of the output queue.
236 */
237int cxgb_snd_queue_len = IFQ_MAXLEN;
238TUNABLE_INT("hw.cxgb.snd_queue_len", &cxgb_snd_queue_len);
239SYSCTL_INT(_hw_cxgb, OID_AUTO, snd_queue_len, CTLFLAG_RDTUN,
240    &cxgb_snd_queue_len, 0, "send queue size ");
241
242static int nfilters = -1;
243TUNABLE_INT("hw.cxgb.nfilters", &nfilters);
244SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
245    &nfilters, 0, "max number of entries in the filter table");
246
247enum {
248	MAX_TXQ_ENTRIES      = 16384,
249	MAX_CTRL_TXQ_ENTRIES = 1024,
250	MAX_RSPQ_ENTRIES     = 16384,
251	MAX_RX_BUFFERS       = 16384,
252	MAX_RX_JUMBO_BUFFERS = 16384,
253	MIN_TXQ_ENTRIES      = 4,
254	MIN_CTRL_TXQ_ENTRIES = 4,
255	MIN_RSPQ_ENTRIES     = 32,
256	MIN_FL_ENTRIES       = 32,
257	MIN_FL_JUMBO_ENTRIES = 32
258};
259
260struct filter_info {
261	u32 sip;
262	u32 sip_mask;
263	u32 dip;
264	u16 sport;
265	u16 dport;
266	u32 vlan:12;
267	u32 vlan_prio:3;
268	u32 mac_hit:1;
269	u32 mac_idx:4;
270	u32 mac_vld:1;
271	u32 pkt_type:2;
272	u32 report_filter_id:1;
273	u32 pass:1;
274	u32 rss:1;
275	u32 qset:3;
276	u32 locked:1;
277	u32 valid:1;
278};
279
280enum { FILTER_NO_VLAN_PRI = 7 };
281
282#define EEPROM_MAGIC 0x38E2F10C
283
284#define PORT_MASK ((1 << MAX_NPORTS) - 1)
285
286/* Table for probing the cards.  The desc field isn't actually used */
287struct cxgb_ident {
288	uint16_t	vendor;
289	uint16_t	device;
290	int		index;
291	char		*desc;
292} cxgb_identifiers[] = {
293	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
294	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
295	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
296	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
297	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
298	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
299	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
300	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
301	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
302	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
303	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
304	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
305	{PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
306	{PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
307	{0, 0, 0, NULL}
308};
309
310static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
311
312
313static __inline char
314t3rev2char(struct adapter *adapter)
315{
316	char rev = 'z';
317
318	switch(adapter->params.rev) {
319	case T3_REV_A:
320		rev = 'a';
321		break;
322	case T3_REV_B:
323	case T3_REV_B2:
324		rev = 'b';
325		break;
326	case T3_REV_C:
327		rev = 'c';
328		break;
329	}
330	return rev;
331}
332
333static struct cxgb_ident *
334cxgb_get_ident(device_t dev)
335{
336	struct cxgb_ident *id;
337
338	for (id = cxgb_identifiers; id->desc != NULL; id++) {
339		if ((id->vendor == pci_get_vendor(dev)) &&
340		    (id->device == pci_get_device(dev))) {
341			return (id);
342		}
343	}
344	return (NULL);
345}
346
347static const struct adapter_info *
348cxgb_get_adapter_info(device_t dev)
349{
350	struct cxgb_ident *id;
351	const struct adapter_info *ai;
352
353	id = cxgb_get_ident(dev);
354	if (id == NULL)
355		return (NULL);
356
357	ai = t3_get_adapter_info(id->index);
358
359	return (ai);
360}
361
362static int
363cxgb_controller_probe(device_t dev)
364{
365	const struct adapter_info *ai;
366	char *ports, buf[80];
367	int nports;
368
369	ai = cxgb_get_adapter_info(dev);
370	if (ai == NULL)
371		return (ENXIO);
372
373	nports = ai->nports0 + ai->nports1;
374	if (nports == 1)
375		ports = "port";
376	else
377		ports = "ports";
378
379	snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
380	device_set_desc_copy(dev, buf);
381	return (BUS_PROBE_DEFAULT);
382}
383
384#define FW_FNAME "cxgb_t3fw"
385#define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
386#define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
387
388static int
389upgrade_fw(adapter_t *sc)
390{
391	const struct firmware *fw;
392	int status;
393	u32 vers;
394
395	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
396		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
397		return (ENOENT);
398	} else
399		device_printf(sc->dev, "installing firmware on card\n");
400	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
401
402	if (status != 0) {
403		device_printf(sc->dev, "failed to install firmware: %d\n",
404		    status);
405	} else {
406		t3_get_fw_version(sc, &vers);
407		snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
408		    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
409		    G_FW_VERSION_MICRO(vers));
410	}
411
412	firmware_put(fw, FIRMWARE_UNLOAD);
413
414	return (status);
415}
416
417/*
418 * The cxgb_controller_attach function is responsible for the initial
419 * bringup of the device.  Its responsibilities include:
420 *
421 *  1. Determine if the device supports MSI or MSI-X.
422 *  2. Allocate bus resources so that we can access the Base Address Register
423 *  3. Create and initialize mutexes for the controller and its control
424 *     logic such as SGE and MDIO.
425 *  4. Call hardware specific setup routine for the adapter as a whole.
426 *  5. Allocate the BAR for doing MSI-X.
427 *  6. Setup the line interrupt iff MSI-X is not supported.
428 *  7. Create the driver's taskq.
429 *  8. Start one task queue service thread.
430 *  9. Check if the firmware and SRAM are up-to-date.  They will be
431 *     auto-updated later (before FULL_INIT_DONE), if required.
432 * 10. Create a child device for each MAC (port)
433 * 11. Initialize T3 private state.
434 * 12. Trigger the LED
435 * 13. Setup offload iff supported.
436 * 14. Reset/restart the tick callout.
437 * 15. Attach sysctls
438 *
439 * NOTE: Any modification or deviation from this list MUST be reflected in
440 * the above comment.  Failure to do so will result in problems on various
441 * error conditions including link flapping.
442 */
443static int
444cxgb_controller_attach(device_t dev)
445{
446	device_t child;
447	const struct adapter_info *ai;
448	struct adapter *sc;
449	int i, error = 0;
450	uint32_t vers;
451	int port_qsets = 1;
452	int msi_needed, reg;
453	char buf[80];
454
455	sc = device_get_softc(dev);
456	sc->dev = dev;
457	sc->msi_count = 0;
458	ai = cxgb_get_adapter_info(dev);
459
460	/* find the PCIe link width and set max read request to 4KB*/
461	if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
462		uint16_t lnk;
463
464		lnk = pci_read_config(dev, reg + PCIR_EXPRESS_LINK_STA, 2);
465		sc->link_width = (lnk & PCIM_LINK_STA_WIDTH) >> 4;
466		if (sc->link_width < 8 &&
467		    (ai->caps & SUPPORTED_10000baseT_Full)) {
468			device_printf(sc->dev,
469			    "PCIe x%d Link, expect reduced performance\n",
470			    sc->link_width);
471		}
472
473		pci_set_max_read_req(dev, 4096);
474	}
475
476	touch_bars(dev);
477	pci_enable_busmaster(dev);
478	/*
479	 * Allocate the registers and make them available to the driver.
480	 * The registers that we care about for NIC mode are in BAR 0
481	 */
482	sc->regs_rid = PCIR_BAR(0);
483	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
484	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
485		device_printf(dev, "Cannot allocate BAR region 0\n");
486		return (ENXIO);
487	}
488	sc->udbs_rid = PCIR_BAR(2);
489	sc->udbs_res = NULL;
490	if (is_offload(sc) &&
491	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
492		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
493		device_printf(dev, "Cannot allocate BAR region 1\n");
494		error = ENXIO;
495		goto out;
496	}
497
498	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
499	    device_get_unit(dev));
500	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
501
502	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
503	    device_get_unit(dev));
504	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
505	    device_get_unit(dev));
506	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
507	    device_get_unit(dev));
508
509	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
510	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
511	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
512
513	sc->bt = rman_get_bustag(sc->regs_res);
514	sc->bh = rman_get_bushandle(sc->regs_res);
515	sc->mmio_len = rman_get_size(sc->regs_res);
516
517	for (i = 0; i < MAX_NPORTS; i++)
518		sc->port[i].adapter = sc;
519
520	if (t3_prep_adapter(sc, ai, 1) < 0) {
521		printf("prep adapter failed\n");
522		error = ENODEV;
523		goto out;
524	}
525        /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
526	 * enough messages for the queue sets.  If that fails, try falling
527	 * back to MSI.  If that fails, then try falling back to the legacy
528	 * interrupt pin model.
529	 */
530	sc->msix_regs_rid = 0x20;
531	if ((msi_allowed >= 2) &&
532	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
533	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
534
535		if (multiq)
536			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
537		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
538
539		if (pci_msix_count(dev) == 0 ||
540		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
541		    sc->msi_count != msi_needed) {
542			device_printf(dev, "alloc msix failed - "
543				      "msi_count=%d, msi_needed=%d, err=%d; "
544				      "will try MSI\n", sc->msi_count,
545				      msi_needed, error);
546			sc->msi_count = 0;
547			port_qsets = 1;
548			pci_release_msi(dev);
549			bus_release_resource(dev, SYS_RES_MEMORY,
550			    sc->msix_regs_rid, sc->msix_regs_res);
551			sc->msix_regs_res = NULL;
552		} else {
553			sc->flags |= USING_MSIX;
554			sc->cxgb_intr = cxgb_async_intr;
555			device_printf(dev,
556				      "using MSI-X interrupts (%u vectors)\n",
557				      sc->msi_count);
558		}
559	}
560
561	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
562		sc->msi_count = 1;
563		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
564			device_printf(dev, "alloc msi failed - "
565				      "err=%d; will try INTx\n", error);
566			sc->msi_count = 0;
567			port_qsets = 1;
568			pci_release_msi(dev);
569		} else {
570			sc->flags |= USING_MSI;
571			sc->cxgb_intr = t3_intr_msi;
572			device_printf(dev, "using MSI interrupts\n");
573		}
574	}
575	if (sc->msi_count == 0) {
576		device_printf(dev, "using line interrupts\n");
577		sc->cxgb_intr = t3b_intr;
578	}
579
580	/* Create a private taskqueue thread for handling driver events */
581	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
582	    taskqueue_thread_enqueue, &sc->tq);
583	if (sc->tq == NULL) {
584		device_printf(dev, "failed to allocate controller task queue\n");
585		goto out;
586	}
587
588	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
589	    device_get_nameunit(dev));
590	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
591
592
593	/* Create a periodic callout for checking adapter status */
594	callout_init(&sc->cxgb_tick_ch, TRUE);
595
596	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
597		/*
598		 * Warn user that a firmware update will be attempted in init.
599		 */
600		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
601		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
602		sc->flags &= ~FW_UPTODATE;
603	} else {
604		sc->flags |= FW_UPTODATE;
605	}
606
607	if (t3_check_tpsram_version(sc) < 0) {
608		/*
609		 * Warn user that a firmware update will be attempted in init.
610		 */
611		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
612		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
613		sc->flags &= ~TPS_UPTODATE;
614	} else {
615		sc->flags |= TPS_UPTODATE;
616	}
617
618	/*
619	 * Create a child device for each MAC.  The ethernet attachment
620	 * will be done in these children.
621	 */
622	for (i = 0; i < (sc)->params.nports; i++) {
623		struct port_info *pi;
624
625		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
626			device_printf(dev, "failed to add child port\n");
627			error = EINVAL;
628			goto out;
629		}
630		pi = &sc->port[i];
631		pi->adapter = sc;
632		pi->nqsets = port_qsets;
633		pi->first_qset = i*port_qsets;
634		pi->port_id = i;
635		pi->tx_chan = i >= ai->nports0;
636		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
637		sc->rxpkt_map[pi->txpkt_intf] = i;
638		sc->port[i].tx_chan = i >= ai->nports0;
639		sc->portdev[i] = child;
640		device_set_softc(child, pi);
641	}
642	if ((error = bus_generic_attach(dev)) != 0)
643		goto out;
644
645	/* initialize sge private state */
646	t3_sge_init_adapter(sc);
647
648	t3_led_ready(sc);
649
650	cxgb_offload_init();
651	if (is_offload(sc)) {
652		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
653		cxgb_adapter_ofld(sc);
654        }
655	error = t3_get_fw_version(sc, &vers);
656	if (error)
657		goto out;
658
659	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
660	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
661	    G_FW_VERSION_MICRO(vers));
662
663	snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
664		 ai->desc, is_offload(sc) ? "R" : "",
665		 sc->params.vpd.ec, sc->params.vpd.sn);
666	device_set_desc_copy(dev, buf);
667
668	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
669		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
670		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
671
672	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
673	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
674	t3_add_attach_sysctls(sc);
675out:
676	if (error)
677		cxgb_free(sc);
678
679	return (error);
680}
681
682/*
683 * The cxgb_controller_detach routine is called with the device is
684 * unloaded from the system.
685 */
686
687static int
688cxgb_controller_detach(device_t dev)
689{
690	struct adapter *sc;
691
692	sc = device_get_softc(dev);
693
694	cxgb_free(sc);
695
696	return (0);
697}
698
699/*
700 * The cxgb_free() is called by the cxgb_controller_detach() routine
701 * to tear down the structures that were built up in
702 * cxgb_controller_attach(), and should be the final piece of work
703 * done when fully unloading the driver.
704 *
705 *
706 *  1. Shutting down the threads started by the cxgb_controller_attach()
707 *     routine.
708 *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
709 *  3. Detaching all of the port devices created during the
710 *     cxgb_controller_attach() routine.
711 *  4. Removing the device children created via cxgb_controller_attach().
712 *  5. Releasing PCI resources associated with the device.
713 *  6. Turning off the offload support, iff it was turned on.
714 *  7. Destroying the mutexes created in cxgb_controller_attach().
715 *
716 */
717static void
718cxgb_free(struct adapter *sc)
719{
720	int i;
721
722	ADAPTER_LOCK(sc);
723	sc->flags |= CXGB_SHUTDOWN;
724	ADAPTER_UNLOCK(sc);
725
726	/*
727	 * Make sure all child devices are gone.
728	 */
729	bus_generic_detach(sc->dev);
730	for (i = 0; i < (sc)->params.nports; i++) {
731		if (sc->portdev[i] &&
732		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
733			device_printf(sc->dev, "failed to delete child port\n");
734	}
735
736	/*
737	 * At this point, it is as if cxgb_port_detach has run on all ports, and
738	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
739	 * all open devices have been closed.
740	 */
741	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
742					   __func__, sc->open_device_map));
743	for (i = 0; i < sc->params.nports; i++) {
744		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
745						  __func__, i));
746	}
747
748	/*
749	 * Finish off the adapter's callouts.
750	 */
751	callout_drain(&sc->cxgb_tick_ch);
752	callout_drain(&sc->sge_timer_ch);
753
754	/*
755	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
756	 * sysctls are cleaned up by the kernel linker.
757	 */
758	if (sc->flags & FULL_INIT_DONE) {
759 		t3_free_sge_resources(sc);
760 		sc->flags &= ~FULL_INIT_DONE;
761 	}
762
763	/*
764	 * Release all interrupt resources.
765	 */
766	cxgb_teardown_interrupts(sc);
767	if (sc->flags & (USING_MSI | USING_MSIX)) {
768		device_printf(sc->dev, "releasing msi message(s)\n");
769		pci_release_msi(sc->dev);
770	} else {
771		device_printf(sc->dev, "no msi message to release\n");
772	}
773
774	if (sc->msix_regs_res != NULL) {
775		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
776		    sc->msix_regs_res);
777	}
778
779	/*
780	 * Free the adapter's taskqueue.
781	 */
782	if (sc->tq != NULL) {
783		taskqueue_free(sc->tq);
784		sc->tq = NULL;
785	}
786
787	if (is_offload(sc)) {
788		clrbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
789		cxgb_adapter_unofld(sc);
790	}
791
792#ifdef notyet
793	if (sc->flags & CXGB_OFLD_INIT)
794		cxgb_offload_deactivate(sc);
795#endif
796	free(sc->filters, M_DEVBUF);
797	t3_sge_free(sc);
798
799	cxgb_offload_exit();
800
801	if (sc->udbs_res != NULL)
802		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
803		    sc->udbs_res);
804
805	if (sc->regs_res != NULL)
806		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
807		    sc->regs_res);
808
809	MTX_DESTROY(&sc->mdio_lock);
810	MTX_DESTROY(&sc->sge.reg_lock);
811	MTX_DESTROY(&sc->elmer_lock);
812	ADAPTER_LOCK_DEINIT(sc);
813}
814
815/**
816 *	setup_sge_qsets - configure SGE Tx/Rx/response queues
817 *	@sc: the controller softc
818 *
819 *	Determines how many sets of SGE queues to use and initializes them.
820 *	We support multiple queue sets per port if we have MSI-X, otherwise
821 *	just one queue set per port.
822 */
823static int
824setup_sge_qsets(adapter_t *sc)
825{
826	int i, j, err, irq_idx = 0, qset_idx = 0;
827	u_int ntxq = SGE_TXQ_PER_SET;
828
829	if ((err = t3_sge_alloc(sc)) != 0) {
830		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
831		return (err);
832	}
833
834	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
835		irq_idx = -1;
836
837	for (i = 0; i < (sc)->params.nports; i++) {
838		struct port_info *pi = &sc->port[i];
839
840		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
841			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
842			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
843			    &sc->params.sge.qset[qset_idx], ntxq, pi);
844			if (err) {
845				t3_free_sge_resources(sc);
846				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n",
847				    err);
848				return (err);
849			}
850		}
851	}
852
853	return (0);
854}
855
856static void
857cxgb_teardown_interrupts(adapter_t *sc)
858{
859	int i;
860
861	for (i = 0; i < SGE_QSETS; i++) {
862		if (sc->msix_intr_tag[i] == NULL) {
863
864			/* Should have been setup fully or not at all */
865			KASSERT(sc->msix_irq_res[i] == NULL &&
866				sc->msix_irq_rid[i] == 0,
867				("%s: half-done interrupt (%d).", __func__, i));
868
869			continue;
870		}
871
872		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
873				  sc->msix_intr_tag[i]);
874		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
875				     sc->msix_irq_res[i]);
876
877		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
878		sc->msix_irq_rid[i] = 0;
879	}
880
881	if (sc->intr_tag) {
882		KASSERT(sc->irq_res != NULL,
883			("%s: half-done interrupt.", __func__));
884
885		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
886		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
887				     sc->irq_res);
888
889		sc->irq_res = sc->intr_tag = NULL;
890		sc->irq_rid = 0;
891	}
892}
893
894static int
895cxgb_setup_interrupts(adapter_t *sc)
896{
897	struct resource *res;
898	void *tag;
899	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
900
901	sc->irq_rid = intr_flag ? 1 : 0;
902	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
903					     RF_SHAREABLE | RF_ACTIVE);
904	if (sc->irq_res == NULL) {
905		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
906			      intr_flag, sc->irq_rid);
907		err = EINVAL;
908		sc->irq_rid = 0;
909	} else {
910		err = bus_setup_intr(sc->dev, sc->irq_res,
911		    INTR_MPSAFE | INTR_TYPE_NET, NULL,
912		    sc->cxgb_intr, sc, &sc->intr_tag);
913
914		if (err) {
915			device_printf(sc->dev,
916				      "Cannot set up interrupt (%x, %u, %d)\n",
917				      intr_flag, sc->irq_rid, err);
918			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
919					     sc->irq_res);
920			sc->irq_res = sc->intr_tag = NULL;
921			sc->irq_rid = 0;
922		}
923	}
924
925	/* That's all for INTx or MSI */
926	if (!(intr_flag & USING_MSIX) || err)
927		return (err);
928
929	for (i = 0; i < sc->msi_count - 1; i++) {
930		rid = i + 2;
931		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
932					     RF_SHAREABLE | RF_ACTIVE);
933		if (res == NULL) {
934			device_printf(sc->dev, "Cannot allocate interrupt "
935				      "for message %d\n", rid);
936			err = EINVAL;
937			break;
938		}
939
940		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
941				     NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
942		if (err) {
943			device_printf(sc->dev, "Cannot set up interrupt "
944				      "for message %d (%d)\n", rid, err);
945			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
946			break;
947		}
948
949		sc->msix_irq_rid[i] = rid;
950		sc->msix_irq_res[i] = res;
951		sc->msix_intr_tag[i] = tag;
952	}
953
954	if (err)
955		cxgb_teardown_interrupts(sc);
956
957	return (err);
958}
959
960
961static int
962cxgb_port_probe(device_t dev)
963{
964	struct port_info *p;
965	char buf[80];
966	const char *desc;
967
968	p = device_get_softc(dev);
969	desc = p->phy.desc;
970	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
971	device_set_desc_copy(dev, buf);
972	return (0);
973}
974
975
976static int
977cxgb_makedev(struct port_info *pi)
978{
979
980	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
981	    UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
982
983	if (pi->port_cdev == NULL)
984		return (ENOMEM);
985
986	pi->port_cdev->si_drv1 = (void *)pi;
987
988	return (0);
989}
990
991#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
992    IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
993    IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE)
994#define CXGB_CAP_ENABLE (CXGB_CAP & ~IFCAP_TSO6)
995
996static int
997cxgb_port_attach(device_t dev)
998{
999	struct port_info *p;
1000	struct ifnet *ifp;
1001	int err;
1002	struct adapter *sc;
1003
1004	p = device_get_softc(dev);
1005	sc = p->adapter;
1006	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1007	    device_get_unit(device_get_parent(dev)), p->port_id);
1008	PORT_LOCK_INIT(p, p->lockbuf);
1009
1010	callout_init(&p->link_check_ch, CALLOUT_MPSAFE);
1011	TASK_INIT(&p->link_check_task, 0, check_link_status, p);
1012
1013	/* Allocate an ifnet object and set it up */
1014	ifp = p->ifp = if_alloc(IFT_ETHER);
1015	if (ifp == NULL) {
1016		device_printf(dev, "Cannot allocate ifnet\n");
1017		return (ENOMEM);
1018	}
1019
1020	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1021	ifp->if_init = cxgb_init;
1022	ifp->if_softc = p;
1023	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1024	ifp->if_ioctl = cxgb_ioctl;
1025	ifp->if_start = cxgb_start;
1026
1027	ifp->if_snd.ifq_drv_maxlen = max(cxgb_snd_queue_len, ifqmaxlen);
1028	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
1029	IFQ_SET_READY(&ifp->if_snd);
1030
1031	ifp->if_capabilities = CXGB_CAP;
1032	ifp->if_capenable = CXGB_CAP_ENABLE;
1033	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO;
1034
1035	/*
1036	 * Disable TSO on 4-port - it isn't supported by the firmware.
1037	 */
1038	if (sc->params.nports > 2) {
1039		ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1040		ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1041		ifp->if_hwassist &= ~CSUM_TSO;
1042	}
1043
1044	ether_ifattach(ifp, p->hw_addr);
1045	ifp->if_transmit = cxgb_transmit;
1046	ifp->if_qflush = cxgb_qflush;
1047
1048#ifdef DEFAULT_JUMBO
1049	if (sc->params.nports <= 2)
1050		ifp->if_mtu = ETHERMTU_JUMBO;
1051#endif
1052	if ((err = cxgb_makedev(p)) != 0) {
1053		printf("makedev failed %d\n", err);
1054		return (err);
1055	}
1056
1057	/* Create a list of media supported by this port */
1058	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1059	    cxgb_media_status);
1060	cxgb_build_medialist(p);
1061
1062	t3_sge_init_port(p);
1063
1064	return (err);
1065}
1066
1067/*
1068 * cxgb_port_detach() is called via the device_detach methods when
1069 * cxgb_free() calls the bus_generic_detach.  It is responsible for
1070 * removing the device from the view of the kernel, i.e. from all
1071 * interfaces lists etc.  This routine is only called when the driver is
1072 * being unloaded, not when the link goes down.
1073 */
1074static int
1075cxgb_port_detach(device_t dev)
1076{
1077	struct port_info *p;
1078	struct adapter *sc;
1079	int i;
1080
1081	p = device_get_softc(dev);
1082	sc = p->adapter;
1083
1084	/* Tell cxgb_ioctl and if_init that the port is going away */
1085	ADAPTER_LOCK(sc);
1086	SET_DOOMED(p);
1087	wakeup(&sc->flags);
1088	while (IS_BUSY(sc))
1089		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1090	SET_BUSY(sc);
1091	ADAPTER_UNLOCK(sc);
1092
1093	if (p->port_cdev != NULL)
1094		destroy_dev(p->port_cdev);
1095
1096	cxgb_uninit_synchronized(p);
1097	ether_ifdetach(p->ifp);
1098
1099	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1100		struct sge_qset *qs = &sc->sge.qs[i];
1101		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1102
1103		callout_drain(&txq->txq_watchdog);
1104		callout_drain(&txq->txq_timer);
1105	}
1106
1107	PORT_LOCK_DEINIT(p);
1108	if_free(p->ifp);
1109	p->ifp = NULL;
1110
1111	ADAPTER_LOCK(sc);
1112	CLR_BUSY(sc);
1113	wakeup_one(&sc->flags);
1114	ADAPTER_UNLOCK(sc);
1115	return (0);
1116}
1117
1118void
1119t3_fatal_err(struct adapter *sc)
1120{
1121	u_int fw_status[4];
1122
1123	if (sc->flags & FULL_INIT_DONE) {
1124		t3_sge_stop(sc);
1125		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1126		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1127		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1128		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1129		t3_intr_disable(sc);
1130	}
1131	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1132	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1133		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1134		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1135}
1136
1137int
1138t3_os_find_pci_capability(adapter_t *sc, int cap)
1139{
1140	device_t dev;
1141	struct pci_devinfo *dinfo;
1142	pcicfgregs *cfg;
1143	uint32_t status;
1144	uint8_t ptr;
1145
1146	dev = sc->dev;
1147	dinfo = device_get_ivars(dev);
1148	cfg = &dinfo->cfg;
1149
1150	status = pci_read_config(dev, PCIR_STATUS, 2);
1151	if (!(status & PCIM_STATUS_CAPPRESENT))
1152		return (0);
1153
1154	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1155	case 0:
1156	case 1:
1157		ptr = PCIR_CAP_PTR;
1158		break;
1159	case 2:
1160		ptr = PCIR_CAP_PTR_2;
1161		break;
1162	default:
1163		return (0);
1164		break;
1165	}
1166	ptr = pci_read_config(dev, ptr, 1);
1167
1168	while (ptr != 0) {
1169		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1170			return (ptr);
1171		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1172	}
1173
1174	return (0);
1175}
1176
1177int
1178t3_os_pci_save_state(struct adapter *sc)
1179{
1180	device_t dev;
1181	struct pci_devinfo *dinfo;
1182
1183	dev = sc->dev;
1184	dinfo = device_get_ivars(dev);
1185
1186	pci_cfg_save(dev, dinfo, 0);
1187	return (0);
1188}
1189
1190int
1191t3_os_pci_restore_state(struct adapter *sc)
1192{
1193	device_t dev;
1194	struct pci_devinfo *dinfo;
1195
1196	dev = sc->dev;
1197	dinfo = device_get_ivars(dev);
1198
1199	pci_cfg_restore(dev, dinfo);
1200	return (0);
1201}
1202
1203/**
1204 *	t3_os_link_changed - handle link status changes
1205 *	@sc: the adapter associated with the link change
1206 *	@port_id: the port index whose link status has changed
1207 *	@link_status: the new status of the link
1208 *	@speed: the new speed setting
1209 *	@duplex: the new duplex setting
1210 *	@fc: the new flow-control setting
1211 *
1212 *	This is the OS-dependent handler for link status changes.  The OS
1213 *	neutral handler takes care of most of the processing for these events,
1214 *	then calls this handler for any OS-specific processing.
1215 */
1216void
1217t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1218     int duplex, int fc, int mac_was_reset)
1219{
1220	struct port_info *pi = &adapter->port[port_id];
1221	struct ifnet *ifp = pi->ifp;
1222
1223	/* no race with detach, so ifp should always be good */
1224	KASSERT(ifp, ("%s: if detached.", __func__));
1225
1226	/* Reapply mac settings if they were lost due to a reset */
1227	if (mac_was_reset) {
1228		PORT_LOCK(pi);
1229		cxgb_update_mac_settings(pi);
1230		PORT_UNLOCK(pi);
1231	}
1232
1233	if (link_status) {
1234		ifp->if_baudrate = IF_Mbps(speed);
1235		if_link_state_change(ifp, LINK_STATE_UP);
1236	} else
1237		if_link_state_change(ifp, LINK_STATE_DOWN);
1238}
1239
1240/**
1241 *	t3_os_phymod_changed - handle PHY module changes
1242 *	@phy: the PHY reporting the module change
1243 *	@mod_type: new module type
1244 *
1245 *	This is the OS-dependent handler for PHY module changes.  It is
1246 *	invoked when a PHY module is removed or inserted for any OS-specific
1247 *	processing.
1248 */
1249void t3_os_phymod_changed(struct adapter *adap, int port_id)
1250{
1251	static const char *mod_str[] = {
1252		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1253	};
1254	struct port_info *pi = &adap->port[port_id];
1255	int mod = pi->phy.modtype;
1256
1257	if (mod != pi->media.ifm_cur->ifm_data)
1258		cxgb_build_medialist(pi);
1259
1260	if (mod == phy_modtype_none)
1261		if_printf(pi->ifp, "PHY module unplugged\n");
1262	else {
1263		KASSERT(mod < ARRAY_SIZE(mod_str),
1264			("invalid PHY module type %d", mod));
1265		if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1266	}
1267}
1268
1269void
1270t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1271{
1272
1273	/*
1274	 * The ifnet might not be allocated before this gets called,
1275	 * as this is called early on in attach by t3_prep_adapter
1276	 * save the address off in the port structure
1277	 */
1278	if (cxgb_debug)
1279		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1280	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1281}
1282
1283/*
1284 * Programs the XGMAC based on the settings in the ifnet.  These settings
1285 * include MTU, MAC address, mcast addresses, etc.
1286 */
1287static void
1288cxgb_update_mac_settings(struct port_info *p)
1289{
1290	struct ifnet *ifp = p->ifp;
1291	struct t3_rx_mode rm;
1292	struct cmac *mac = &p->mac;
1293	int mtu, hwtagging;
1294
1295	PORT_LOCK_ASSERT_OWNED(p);
1296
1297	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1298
1299	mtu = ifp->if_mtu;
1300	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1301		mtu += ETHER_VLAN_ENCAP_LEN;
1302
1303	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1304
1305	t3_mac_set_mtu(mac, mtu);
1306	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1307	t3_mac_set_address(mac, 0, p->hw_addr);
1308	t3_init_rx_mode(&rm, p);
1309	t3_mac_set_rx_mode(mac, &rm);
1310}
1311
1312
1313static int
1314await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1315			      unsigned long n)
1316{
1317	int attempts = 5;
1318
1319	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1320		if (!--attempts)
1321			return (ETIMEDOUT);
1322		t3_os_sleep(10);
1323	}
1324	return 0;
1325}
1326
1327static int
1328init_tp_parity(struct adapter *adap)
1329{
1330	int i;
1331	struct mbuf *m;
1332	struct cpl_set_tcb_field *greq;
1333	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1334
1335	t3_tp_set_offload_mode(adap, 1);
1336
1337	for (i = 0; i < 16; i++) {
1338		struct cpl_smt_write_req *req;
1339
1340		m = m_gethdr(M_WAITOK, MT_DATA);
1341		req = mtod(m, struct cpl_smt_write_req *);
1342		m->m_len = m->m_pkthdr.len = sizeof(*req);
1343		memset(req, 0, sizeof(*req));
1344		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1345		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1346		req->iff = i;
1347		t3_mgmt_tx(adap, m);
1348	}
1349
1350	for (i = 0; i < 2048; i++) {
1351		struct cpl_l2t_write_req *req;
1352
1353		m = m_gethdr(M_WAITOK, MT_DATA);
1354		req = mtod(m, struct cpl_l2t_write_req *);
1355		m->m_len = m->m_pkthdr.len = sizeof(*req);
1356		memset(req, 0, sizeof(*req));
1357		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1358		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1359		req->params = htonl(V_L2T_W_IDX(i));
1360		t3_mgmt_tx(adap, m);
1361	}
1362
1363	for (i = 0; i < 2048; i++) {
1364		struct cpl_rte_write_req *req;
1365
1366		m = m_gethdr(M_WAITOK, MT_DATA);
1367		req = mtod(m, struct cpl_rte_write_req *);
1368		m->m_len = m->m_pkthdr.len = sizeof(*req);
1369		memset(req, 0, sizeof(*req));
1370		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1371		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1372		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1373		t3_mgmt_tx(adap, m);
1374	}
1375
1376	m = m_gethdr(M_WAITOK, MT_DATA);
1377	greq = mtod(m, struct cpl_set_tcb_field *);
1378	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1379	memset(greq, 0, sizeof(*greq));
1380	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1381	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1382	greq->mask = htobe64(1);
1383	t3_mgmt_tx(adap, m);
1384
1385	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1386	t3_tp_set_offload_mode(adap, 0);
1387	return (i);
1388}
1389
1390/**
1391 *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1392 *	@adap: the adapter
1393 *
1394 *	Sets up RSS to distribute packets to multiple receive queues.  We
1395 *	configure the RSS CPU lookup table to distribute to the number of HW
1396 *	receive queues, and the response queue lookup table to narrow that
1397 *	down to the response queues actually configured for each port.
1398 *	We always configure the RSS mapping for two ports since the mapping
1399 *	table has plenty of entries.
1400 */
1401static void
1402setup_rss(adapter_t *adap)
1403{
1404	int i;
1405	u_int nq[2];
1406	uint8_t cpus[SGE_QSETS + 1];
1407	uint16_t rspq_map[RSS_TABLE_SIZE];
1408
1409	for (i = 0; i < SGE_QSETS; ++i)
1410		cpus[i] = i;
1411	cpus[SGE_QSETS] = 0xff;
1412
1413	nq[0] = nq[1] = 0;
1414	for_each_port(adap, i) {
1415		const struct port_info *pi = adap2pinfo(adap, i);
1416
1417		nq[pi->tx_chan] += pi->nqsets;
1418	}
1419	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1420		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1421		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1422	}
1423
1424	/* Calculate the reverse RSS map table */
1425	for (i = 0; i < SGE_QSETS; ++i)
1426		adap->rrss_map[i] = 0xff;
1427	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1428		if (adap->rrss_map[rspq_map[i]] == 0xff)
1429			adap->rrss_map[rspq_map[i]] = i;
1430
1431	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1432		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1433	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1434	              cpus, rspq_map);
1435
1436}
1437
1438/*
1439 * Sends an mbuf to an offload queue driver
1440 * after dealing with any active network taps.
1441 */
1442static inline int
1443offload_tx(struct t3cdev *tdev, struct mbuf *m)
1444{
1445	int ret;
1446
1447	ret = t3_offload_tx(tdev, m);
1448	return (ret);
1449}
1450
1451static int
1452write_smt_entry(struct adapter *adapter, int idx)
1453{
1454	struct port_info *pi = &adapter->port[idx];
1455	struct cpl_smt_write_req *req;
1456	struct mbuf *m;
1457
1458	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1459		return (ENOMEM);
1460
1461	req = mtod(m, struct cpl_smt_write_req *);
1462	m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1463
1464	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1465	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1466	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1467	req->iff = idx;
1468	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1469	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1470
1471	m_set_priority(m, 1);
1472
1473	offload_tx(&adapter->tdev, m);
1474
1475	return (0);
1476}
1477
1478static int
1479init_smt(struct adapter *adapter)
1480{
1481	int i;
1482
1483	for_each_port(adapter, i)
1484		write_smt_entry(adapter, i);
1485	return 0;
1486}
1487
1488static void
1489init_port_mtus(adapter_t *adapter)
1490{
1491	unsigned int mtus = ETHERMTU | (ETHERMTU << 16);
1492
1493	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1494}
1495
1496static void
1497send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1498			      int hi, int port)
1499{
1500	struct mbuf *m;
1501	struct mngt_pktsched_wr *req;
1502
1503	m = m_gethdr(M_DONTWAIT, MT_DATA);
1504	if (m) {
1505		req = mtod(m, struct mngt_pktsched_wr *);
1506		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1507		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1508		req->sched = sched;
1509		req->idx = qidx;
1510		req->min = lo;
1511		req->max = hi;
1512		req->binding = port;
1513		m->m_len = m->m_pkthdr.len = sizeof(*req);
1514		t3_mgmt_tx(adap, m);
1515	}
1516}
1517
1518static void
1519bind_qsets(adapter_t *sc)
1520{
1521	int i, j;
1522
1523	for (i = 0; i < (sc)->params.nports; ++i) {
1524		const struct port_info *pi = adap2pinfo(sc, i);
1525
1526		for (j = 0; j < pi->nqsets; ++j) {
1527			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1528					  -1, pi->tx_chan);
1529
1530		}
1531	}
1532}
1533
1534static void
1535update_tpeeprom(struct adapter *adap)
1536{
1537	const struct firmware *tpeeprom;
1538
1539	uint32_t version;
1540	unsigned int major, minor;
1541	int ret, len;
1542	char rev, name[32];
1543
1544	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1545
1546	major = G_TP_VERSION_MAJOR(version);
1547	minor = G_TP_VERSION_MINOR(version);
1548	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1549		return;
1550
1551	rev = t3rev2char(adap);
1552	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1553
1554	tpeeprom = firmware_get(name);
1555	if (tpeeprom == NULL) {
1556		device_printf(adap->dev,
1557			      "could not load TP EEPROM: unable to load %s\n",
1558			      name);
1559		return;
1560	}
1561
1562	len = tpeeprom->datasize - 4;
1563
1564	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1565	if (ret)
1566		goto release_tpeeprom;
1567
1568	if (len != TP_SRAM_LEN) {
1569		device_printf(adap->dev,
1570			      "%s length is wrong len=%d expected=%d\n", name,
1571			      len, TP_SRAM_LEN);
1572		return;
1573	}
1574
1575	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1576	    TP_SRAM_OFFSET);
1577
1578	if (!ret) {
1579		device_printf(adap->dev,
1580			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1581			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1582	} else
1583		device_printf(adap->dev,
1584			      "Protocol SRAM image update in EEPROM failed\n");
1585
1586release_tpeeprom:
1587	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1588
1589	return;
1590}
1591
1592static int
1593update_tpsram(struct adapter *adap)
1594{
1595	const struct firmware *tpsram;
1596	int ret;
1597	char rev, name[32];
1598
1599	rev = t3rev2char(adap);
1600	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1601
1602	update_tpeeprom(adap);
1603
1604	tpsram = firmware_get(name);
1605	if (tpsram == NULL){
1606		device_printf(adap->dev, "could not load TP SRAM\n");
1607		return (EINVAL);
1608	} else
1609		device_printf(adap->dev, "updating TP SRAM\n");
1610
1611	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1612	if (ret)
1613		goto release_tpsram;
1614
1615	ret = t3_set_proto_sram(adap, tpsram->data);
1616	if (ret)
1617		device_printf(adap->dev, "loading protocol SRAM failed\n");
1618
1619release_tpsram:
1620	firmware_put(tpsram, FIRMWARE_UNLOAD);
1621
1622	return ret;
1623}
1624
1625/**
1626 *	cxgb_up - enable the adapter
1627 *	@adap: adapter being enabled
1628 *
1629 *	Called when the first port is enabled, this function performs the
1630 *	actions necessary to make an adapter operational, such as completing
1631 *	the initialization of HW modules, and enabling interrupts.
1632 */
1633static int
1634cxgb_up(struct adapter *sc)
1635{
1636	int err = 0;
1637	unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1638
1639	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1640					   __func__, sc->open_device_map));
1641
1642	if ((sc->flags & FULL_INIT_DONE) == 0) {
1643
1644		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1645
1646		if ((sc->flags & FW_UPTODATE) == 0)
1647			if ((err = upgrade_fw(sc)))
1648				goto out;
1649
1650		if ((sc->flags & TPS_UPTODATE) == 0)
1651			if ((err = update_tpsram(sc)))
1652				goto out;
1653
1654		if (is_offload(sc) && nfilters != 0) {
1655			sc->params.mc5.nservers = 0;
1656
1657			if (nfilters < 0)
1658				sc->params.mc5.nfilters = mxf;
1659			else
1660				sc->params.mc5.nfilters = min(nfilters, mxf);
1661		}
1662
1663		err = t3_init_hw(sc, 0);
1664		if (err)
1665			goto out;
1666
1667		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1668		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1669
1670		err = setup_sge_qsets(sc);
1671		if (err)
1672			goto out;
1673
1674		alloc_filters(sc);
1675		setup_rss(sc);
1676
1677		t3_intr_clear(sc);
1678		err = cxgb_setup_interrupts(sc);
1679		if (err)
1680			goto out;
1681
1682		t3_add_configured_sysctls(sc);
1683		sc->flags |= FULL_INIT_DONE;
1684	}
1685
1686	t3_intr_clear(sc);
1687	t3_sge_start(sc);
1688	t3_intr_enable(sc);
1689
1690	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1691	    is_offload(sc) && init_tp_parity(sc) == 0)
1692		sc->flags |= TP_PARITY_INIT;
1693
1694	if (sc->flags & TP_PARITY_INIT) {
1695		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1696		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1697	}
1698
1699	if (!(sc->flags & QUEUES_BOUND)) {
1700		bind_qsets(sc);
1701		setup_hw_filters(sc);
1702		sc->flags |= QUEUES_BOUND;
1703	}
1704
1705	t3_sge_reset_adapter(sc);
1706out:
1707	return (err);
1708}
1709
1710/*
1711 * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1712 * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1713 * during controller_detach, not here.
1714 */
1715static void
1716cxgb_down(struct adapter *sc)
1717{
1718	t3_sge_stop(sc);
1719	t3_intr_disable(sc);
1720}
1721
1722static int
1723offload_open(struct port_info *pi)
1724{
1725	struct adapter *sc = pi->adapter;
1726	struct t3cdev *tdev = &sc->tdev;
1727
1728	setbit(&sc->open_device_map, OFFLOAD_DEVMAP_BIT);
1729
1730	t3_tp_set_offload_mode(sc, 1);
1731	tdev->lldev = pi->ifp;
1732	init_port_mtus(sc);
1733	t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
1734		     sc->params.rev == 0 ?  sc->port[0].ifp->if_mtu : 0xffff);
1735	init_smt(sc);
1736	cxgb_add_clients(tdev);
1737
1738	return (0);
1739}
1740
1741static int
1742offload_close(struct t3cdev *tdev)
1743{
1744	struct adapter *adapter = tdev2adap(tdev);
1745
1746	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1747		return (0);
1748
1749	/* Call back all registered clients */
1750	cxgb_remove_clients(tdev);
1751
1752	tdev->lldev = NULL;
1753	cxgb_set_dummy_ops(tdev);
1754	t3_tp_set_offload_mode(adapter, 0);
1755
1756	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1757
1758	return (0);
1759}
1760
1761/*
1762 * if_init for cxgb ports.
1763 */
1764static void
1765cxgb_init(void *arg)
1766{
1767	struct port_info *p = arg;
1768	struct adapter *sc = p->adapter;
1769
1770	ADAPTER_LOCK(sc);
1771	cxgb_init_locked(p); /* releases adapter lock */
1772	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1773}
1774
1775static int
1776cxgb_init_locked(struct port_info *p)
1777{
1778	struct adapter *sc = p->adapter;
1779	struct ifnet *ifp = p->ifp;
1780	struct cmac *mac = &p->mac;
1781	int i, rc = 0, may_sleep = 0, gave_up_lock = 0;
1782
1783	ADAPTER_LOCK_ASSERT_OWNED(sc);
1784
1785	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1786		gave_up_lock = 1;
1787		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1788			rc = EINTR;
1789			goto done;
1790		}
1791	}
1792	if (IS_DOOMED(p)) {
1793		rc = ENXIO;
1794		goto done;
1795	}
1796	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1797
1798	/*
1799	 * The code that runs during one-time adapter initialization can sleep
1800	 * so it's important not to hold any locks across it.
1801	 */
1802	may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1803
1804	if (may_sleep) {
1805		SET_BUSY(sc);
1806		gave_up_lock = 1;
1807		ADAPTER_UNLOCK(sc);
1808	}
1809
1810	if (sc->open_device_map == 0) {
1811		if ((rc = cxgb_up(sc)) != 0)
1812			goto done;
1813
1814		if (is_offload(sc) && !ofld_disable && offload_open(p))
1815			log(LOG_WARNING,
1816			    "Could not initialize offload capabilities\n");
1817	}
1818
1819	PORT_LOCK(p);
1820	if (isset(&sc->open_device_map, p->port_id) &&
1821	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1822		PORT_UNLOCK(p);
1823		goto done;
1824	}
1825	t3_port_intr_enable(sc, p->port_id);
1826	if (!mac->multiport)
1827		t3_mac_init(mac);
1828	cxgb_update_mac_settings(p);
1829	t3_link_start(&p->phy, mac, &p->link_config);
1830	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1831	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1832	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1833	PORT_UNLOCK(p);
1834
1835	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1836		struct sge_qset *qs = &sc->sge.qs[i];
1837		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1838
1839		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1840				 txq->txq_watchdog.c_cpu);
1841	}
1842
1843	/* all ok */
1844	setbit(&sc->open_device_map, p->port_id);
1845	callout_reset(&p->link_check_ch,
1846	    p->phy.caps & SUPPORTED_LINK_IRQ ?  hz * 3 : hz / 4,
1847	    link_check_callout, p);
1848
1849done:
1850	if (may_sleep) {
1851		ADAPTER_LOCK(sc);
1852		KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1853		CLR_BUSY(sc);
1854	}
1855	if (gave_up_lock)
1856		wakeup_one(&sc->flags);
1857	ADAPTER_UNLOCK(sc);
1858	return (rc);
1859}
1860
1861static int
1862cxgb_uninit_locked(struct port_info *p)
1863{
1864	struct adapter *sc = p->adapter;
1865	int rc;
1866
1867	ADAPTER_LOCK_ASSERT_OWNED(sc);
1868
1869	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1870		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1871			rc = EINTR;
1872			goto done;
1873		}
1874	}
1875	if (IS_DOOMED(p)) {
1876		rc = ENXIO;
1877		goto done;
1878	}
1879	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1880	SET_BUSY(sc);
1881	ADAPTER_UNLOCK(sc);
1882
1883	rc = cxgb_uninit_synchronized(p);
1884
1885	ADAPTER_LOCK(sc);
1886	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1887	CLR_BUSY(sc);
1888	wakeup_one(&sc->flags);
1889done:
1890	ADAPTER_UNLOCK(sc);
1891	return (rc);
1892}
1893
1894/*
1895 * Called on "ifconfig down", and from port_detach
1896 */
1897static int
1898cxgb_uninit_synchronized(struct port_info *pi)
1899{
1900	struct adapter *sc = pi->adapter;
1901	struct ifnet *ifp = pi->ifp;
1902
1903	/*
1904	 * taskqueue_drain may cause a deadlock if the adapter lock is held.
1905	 */
1906	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1907
1908	/*
1909	 * Clear this port's bit from the open device map, and then drain all
1910	 * the tasks that can access/manipulate this port's port_info or ifp.
1911	 * We disable this port's interrupts here and so the slow/ext
1912	 * interrupt tasks won't be enqueued.  The tick task will continue to
1913	 * be enqueued every second but the runs after this drain will not see
1914	 * this port in the open device map.
1915	 *
1916	 * A well behaved task must take open_device_map into account and ignore
1917	 * ports that are not open.
1918	 */
1919	clrbit(&sc->open_device_map, pi->port_id);
1920	t3_port_intr_disable(sc, pi->port_id);
1921	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1922	taskqueue_drain(sc->tq, &sc->tick_task);
1923
1924	callout_drain(&pi->link_check_ch);
1925	taskqueue_drain(sc->tq, &pi->link_check_task);
1926
1927	PORT_LOCK(pi);
1928	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1929
1930	/* disable pause frames */
1931	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1932
1933	/* Reset RX FIFO HWM */
1934	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1935			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1936
1937	DELAY(100 * 1000);
1938
1939	/* Wait for TXFIFO empty */
1940	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1941			F_TXFIFO_EMPTY, 1, 20, 5);
1942
1943	DELAY(100 * 1000);
1944	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1945
1946
1947	pi->phy.ops->power_down(&pi->phy, 1);
1948
1949	PORT_UNLOCK(pi);
1950
1951	pi->link_config.link_ok = 0;
1952	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1953
1954	if ((sc->open_device_map & PORT_MASK) == 0)
1955		offload_close(&sc->tdev);
1956
1957	if (sc->open_device_map == 0)
1958		cxgb_down(pi->adapter);
1959
1960	return (0);
1961}
1962
1963/*
1964 * Mark lro enabled or disabled in all qsets for this port
1965 */
1966static int
1967cxgb_set_lro(struct port_info *p, int enabled)
1968{
1969	int i;
1970	struct adapter *adp = p->adapter;
1971	struct sge_qset *q;
1972
1973	for (i = 0; i < p->nqsets; i++) {
1974		q = &adp->sge.qs[p->first_qset + i];
1975		q->lro.enabled = (enabled != 0);
1976	}
1977	return (0);
1978}
1979
1980static int
1981cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1982{
1983	struct port_info *p = ifp->if_softc;
1984	struct adapter *sc = p->adapter;
1985	struct ifreq *ifr = (struct ifreq *)data;
1986	int flags, error = 0, mtu;
1987	uint32_t mask;
1988
1989	switch (command) {
1990	case SIOCSIFMTU:
1991		ADAPTER_LOCK(sc);
1992		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1993		if (error) {
1994fail:
1995			ADAPTER_UNLOCK(sc);
1996			return (error);
1997		}
1998
1999		mtu = ifr->ifr_mtu;
2000		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
2001			error = EINVAL;
2002		} else {
2003			ifp->if_mtu = mtu;
2004			PORT_LOCK(p);
2005			cxgb_update_mac_settings(p);
2006			PORT_UNLOCK(p);
2007		}
2008		ADAPTER_UNLOCK(sc);
2009		break;
2010	case SIOCSIFFLAGS:
2011		ADAPTER_LOCK(sc);
2012		if (IS_DOOMED(p)) {
2013			error = ENXIO;
2014			goto fail;
2015		}
2016		if (ifp->if_flags & IFF_UP) {
2017			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2018				flags = p->if_flags;
2019				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
2020				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
2021					if (IS_BUSY(sc)) {
2022						error = EBUSY;
2023						goto fail;
2024					}
2025					PORT_LOCK(p);
2026					cxgb_update_mac_settings(p);
2027					PORT_UNLOCK(p);
2028				}
2029				ADAPTER_UNLOCK(sc);
2030			} else
2031				error = cxgb_init_locked(p);
2032			p->if_flags = ifp->if_flags;
2033		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2034			error = cxgb_uninit_locked(p);
2035		else
2036			ADAPTER_UNLOCK(sc);
2037
2038		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
2039		break;
2040	case SIOCADDMULTI:
2041	case SIOCDELMULTI:
2042		ADAPTER_LOCK(sc);
2043		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2044		if (error)
2045			goto fail;
2046
2047		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2048			PORT_LOCK(p);
2049			cxgb_update_mac_settings(p);
2050			PORT_UNLOCK(p);
2051		}
2052		ADAPTER_UNLOCK(sc);
2053
2054		break;
2055	case SIOCSIFCAP:
2056		ADAPTER_LOCK(sc);
2057		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2058		if (error)
2059			goto fail;
2060
2061		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2062		if (mask & IFCAP_TXCSUM) {
2063			ifp->if_capenable ^= IFCAP_TXCSUM;
2064			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2065
2066			if (IFCAP_TSO & ifp->if_capenable &&
2067			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2068				ifp->if_capenable &= ~IFCAP_TSO;
2069				ifp->if_hwassist &= ~CSUM_TSO;
2070				if_printf(ifp,
2071				    "tso disabled due to -txcsum.\n");
2072			}
2073		}
2074		if (mask & IFCAP_RXCSUM)
2075			ifp->if_capenable ^= IFCAP_RXCSUM;
2076		if (mask & IFCAP_TSO4) {
2077			ifp->if_capenable ^= IFCAP_TSO4;
2078
2079			if (IFCAP_TSO & ifp->if_capenable) {
2080				if (IFCAP_TXCSUM & ifp->if_capenable)
2081					ifp->if_hwassist |= CSUM_TSO;
2082				else {
2083					ifp->if_capenable &= ~IFCAP_TSO;
2084					ifp->if_hwassist &= ~CSUM_TSO;
2085					if_printf(ifp,
2086					    "enable txcsum first.\n");
2087					error = EAGAIN;
2088				}
2089			} else
2090				ifp->if_hwassist &= ~CSUM_TSO;
2091		}
2092		if (mask & IFCAP_LRO) {
2093			ifp->if_capenable ^= IFCAP_LRO;
2094
2095			/* Safe to do this even if cxgb_up not called yet */
2096			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2097		}
2098		if (mask & IFCAP_VLAN_HWTAGGING) {
2099			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2100			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2101				PORT_LOCK(p);
2102				cxgb_update_mac_settings(p);
2103				PORT_UNLOCK(p);
2104			}
2105		}
2106		if (mask & IFCAP_VLAN_MTU) {
2107			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2108			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2109				PORT_LOCK(p);
2110				cxgb_update_mac_settings(p);
2111				PORT_UNLOCK(p);
2112			}
2113		}
2114		if (mask & IFCAP_VLAN_HWTSO)
2115			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2116		if (mask & IFCAP_VLAN_HWCSUM)
2117			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2118
2119#ifdef VLAN_CAPABILITIES
2120		VLAN_CAPABILITIES(ifp);
2121#endif
2122		ADAPTER_UNLOCK(sc);
2123		break;
2124	case SIOCSIFMEDIA:
2125	case SIOCGIFMEDIA:
2126		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2127		break;
2128	default:
2129		error = ether_ioctl(ifp, command, data);
2130	}
2131
2132	return (error);
2133}
2134
2135static int
2136cxgb_media_change(struct ifnet *ifp)
2137{
2138	return (EOPNOTSUPP);
2139}
2140
2141/*
2142 * Translates phy->modtype to the correct Ethernet media subtype.
2143 */
2144static int
2145cxgb_ifm_type(int mod)
2146{
2147	switch (mod) {
2148	case phy_modtype_sr:
2149		return (IFM_10G_SR);
2150	case phy_modtype_lr:
2151		return (IFM_10G_LR);
2152	case phy_modtype_lrm:
2153		return (IFM_10G_LRM);
2154	case phy_modtype_twinax:
2155		return (IFM_10G_TWINAX);
2156	case phy_modtype_twinax_long:
2157		return (IFM_10G_TWINAX_LONG);
2158	case phy_modtype_none:
2159		return (IFM_NONE);
2160	case phy_modtype_unknown:
2161		return (IFM_UNKNOWN);
2162	}
2163
2164	KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2165	return (IFM_UNKNOWN);
2166}
2167
2168/*
2169 * Rebuilds the ifmedia list for this port, and sets the current media.
2170 */
2171static void
2172cxgb_build_medialist(struct port_info *p)
2173{
2174	struct cphy *phy = &p->phy;
2175	struct ifmedia *media = &p->media;
2176	int mod = phy->modtype;
2177	int m = IFM_ETHER | IFM_FDX;
2178
2179	PORT_LOCK(p);
2180
2181	ifmedia_removeall(media);
2182	if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2183		/* Copper (RJ45) */
2184
2185		if (phy->caps & SUPPORTED_10000baseT_Full)
2186			ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2187
2188		if (phy->caps & SUPPORTED_1000baseT_Full)
2189			ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2190
2191		if (phy->caps & SUPPORTED_100baseT_Full)
2192			ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2193
2194		if (phy->caps & SUPPORTED_10baseT_Full)
2195			ifmedia_add(media, m | IFM_10_T, mod, NULL);
2196
2197		ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2198		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2199
2200	} else if (phy->caps & SUPPORTED_TP) {
2201		/* Copper (CX4) */
2202
2203		KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2204			("%s: unexpected cap 0x%x", __func__, phy->caps));
2205
2206		ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2207		ifmedia_set(media, m | IFM_10G_CX4);
2208
2209	} else if (phy->caps & SUPPORTED_FIBRE &&
2210		   phy->caps & SUPPORTED_10000baseT_Full) {
2211		/* 10G optical (but includes SFP+ twinax) */
2212
2213		m |= cxgb_ifm_type(mod);
2214		if (IFM_SUBTYPE(m) == IFM_NONE)
2215			m &= ~IFM_FDX;
2216
2217		ifmedia_add(media, m, mod, NULL);
2218		ifmedia_set(media, m);
2219
2220	} else if (phy->caps & SUPPORTED_FIBRE &&
2221		   phy->caps & SUPPORTED_1000baseT_Full) {
2222		/* 1G optical */
2223
2224		/* XXX: Lie and claim to be SX, could actually be any 1G-X */
2225		ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2226		ifmedia_set(media, m | IFM_1000_SX);
2227
2228	} else {
2229		KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2230			    phy->caps));
2231	}
2232
2233	PORT_UNLOCK(p);
2234}
2235
2236static void
2237cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2238{
2239	struct port_info *p = ifp->if_softc;
2240	struct ifmedia_entry *cur = p->media.ifm_cur;
2241	int speed = p->link_config.speed;
2242
2243	if (cur->ifm_data != p->phy.modtype) {
2244		cxgb_build_medialist(p);
2245		cur = p->media.ifm_cur;
2246	}
2247
2248	ifmr->ifm_status = IFM_AVALID;
2249	if (!p->link_config.link_ok)
2250		return;
2251
2252	ifmr->ifm_status |= IFM_ACTIVE;
2253
2254	/*
2255	 * active and current will differ iff current media is autoselect.  That
2256	 * can happen only for copper RJ45.
2257	 */
2258	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2259		return;
2260	KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2261		("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2262
2263	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2264	if (speed == SPEED_10000)
2265		ifmr->ifm_active |= IFM_10G_T;
2266	else if (speed == SPEED_1000)
2267		ifmr->ifm_active |= IFM_1000_T;
2268	else if (speed == SPEED_100)
2269		ifmr->ifm_active |= IFM_100_TX;
2270	else if (speed == SPEED_10)
2271		ifmr->ifm_active |= IFM_10_T;
2272	else
2273		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2274			    speed));
2275}
2276
2277static void
2278cxgb_async_intr(void *data)
2279{
2280	adapter_t *sc = data;
2281
2282	t3_write_reg(sc, A_PL_INT_ENABLE0, 0);
2283	(void) t3_read_reg(sc, A_PL_INT_ENABLE0);
2284	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2285}
2286
2287static void
2288link_check_callout(void *arg)
2289{
2290	struct port_info *pi = arg;
2291	struct adapter *sc = pi->adapter;
2292
2293	if (!isset(&sc->open_device_map, pi->port_id))
2294		return;
2295
2296	taskqueue_enqueue(sc->tq, &pi->link_check_task);
2297}
2298
2299static void
2300check_link_status(void *arg, int pending)
2301{
2302	struct port_info *pi = arg;
2303	struct adapter *sc = pi->adapter;
2304
2305	if (!isset(&sc->open_device_map, pi->port_id))
2306		return;
2307
2308	t3_link_changed(sc, pi->port_id);
2309
2310	if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ))
2311		callout_reset(&pi->link_check_ch, hz, link_check_callout, pi);
2312}
2313
2314void
2315t3_os_link_intr(struct port_info *pi)
2316{
2317	/*
2318	 * Schedule a link check in the near future.  If the link is flapping
2319	 * rapidly we'll keep resetting the callout and delaying the check until
2320	 * things stabilize a bit.
2321	 */
2322	callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi);
2323}
2324
2325static void
2326check_t3b2_mac(struct adapter *sc)
2327{
2328	int i;
2329
2330	if (sc->flags & CXGB_SHUTDOWN)
2331		return;
2332
2333	for_each_port(sc, i) {
2334		struct port_info *p = &sc->port[i];
2335		int status;
2336#ifdef INVARIANTS
2337		struct ifnet *ifp = p->ifp;
2338#endif
2339
2340		if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2341		    !p->link_config.link_ok)
2342			continue;
2343
2344		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2345			("%s: state mismatch (drv_flags %x, device_map %x)",
2346			 __func__, ifp->if_drv_flags, sc->open_device_map));
2347
2348		PORT_LOCK(p);
2349		status = t3b2_mac_watchdog_task(&p->mac);
2350		if (status == 1)
2351			p->mac.stats.num_toggled++;
2352		else if (status == 2) {
2353			struct cmac *mac = &p->mac;
2354
2355			cxgb_update_mac_settings(p);
2356			t3_link_start(&p->phy, mac, &p->link_config);
2357			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2358			t3_port_intr_enable(sc, p->port_id);
2359			p->mac.stats.num_resets++;
2360		}
2361		PORT_UNLOCK(p);
2362	}
2363}
2364
2365static void
2366cxgb_tick(void *arg)
2367{
2368	adapter_t *sc = (adapter_t *)arg;
2369
2370	if (sc->flags & CXGB_SHUTDOWN)
2371		return;
2372
2373	taskqueue_enqueue(sc->tq, &sc->tick_task);
2374	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2375}
2376
2377static void
2378cxgb_tick_handler(void *arg, int count)
2379{
2380	adapter_t *sc = (adapter_t *)arg;
2381	const struct adapter_params *p = &sc->params;
2382	int i;
2383	uint32_t cause, reset;
2384
2385	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2386		return;
2387
2388	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2389		check_t3b2_mac(sc);
2390
2391	cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2392	if (cause) {
2393		struct sge_qset *qs = &sc->sge.qs[0];
2394		uint32_t mask, v;
2395
2396		v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2397
2398		mask = 1;
2399		for (i = 0; i < SGE_QSETS; i++) {
2400			if (v & mask)
2401				qs[i].rspq.starved++;
2402			mask <<= 1;
2403		}
2404
2405		mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2406
2407		for (i = 0; i < SGE_QSETS * 2; i++) {
2408			if (v & mask) {
2409				qs[i / 2].fl[i % 2].empty++;
2410			}
2411			mask <<= 1;
2412		}
2413
2414		/* clear */
2415		t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2416		t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2417	}
2418
2419	for (i = 0; i < sc->params.nports; i++) {
2420		struct port_info *pi = &sc->port[i];
2421		struct ifnet *ifp = pi->ifp;
2422		struct cmac *mac = &pi->mac;
2423		struct mac_stats *mstats = &mac->stats;
2424		int drops, j;
2425
2426		if (!isset(&sc->open_device_map, pi->port_id))
2427			continue;
2428
2429		PORT_LOCK(pi);
2430		t3_mac_update_stats(mac);
2431		PORT_UNLOCK(pi);
2432
2433		ifp->if_opackets = mstats->tx_frames;
2434		ifp->if_ipackets = mstats->rx_frames;
2435		ifp->if_obytes = mstats->tx_octets;
2436		ifp->if_ibytes = mstats->rx_octets;
2437		ifp->if_omcasts = mstats->tx_mcast_frames;
2438		ifp->if_imcasts = mstats->rx_mcast_frames;
2439		ifp->if_collisions = mstats->tx_total_collisions;
2440		ifp->if_iqdrops = mstats->rx_cong_drops;
2441
2442		drops = 0;
2443		for (j = pi->first_qset; j < pi->first_qset + pi->nqsets; j++)
2444			drops += sc->sge.qs[j].txq[TXQ_ETH].txq_mr->br_drops;
2445		ifp->if_snd.ifq_drops = drops;
2446
2447		ifp->if_oerrors =
2448		    mstats->tx_excess_collisions +
2449		    mstats->tx_underrun +
2450		    mstats->tx_len_errs +
2451		    mstats->tx_mac_internal_errs +
2452		    mstats->tx_excess_deferral +
2453		    mstats->tx_fcs_errs;
2454		ifp->if_ierrors =
2455		    mstats->rx_jabber +
2456		    mstats->rx_data_errs +
2457		    mstats->rx_sequence_errs +
2458		    mstats->rx_runt +
2459		    mstats->rx_too_long +
2460		    mstats->rx_mac_internal_errs +
2461		    mstats->rx_short +
2462		    mstats->rx_fcs_errs;
2463
2464		if (mac->multiport)
2465			continue;
2466
2467		/* Count rx fifo overflows, once per second */
2468		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2469		reset = 0;
2470		if (cause & F_RXFIFO_OVERFLOW) {
2471			mac->stats.rx_fifo_ovfl++;
2472			reset |= F_RXFIFO_OVERFLOW;
2473		}
2474		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2475	}
2476}
2477
2478static void
2479touch_bars(device_t dev)
2480{
2481	/*
2482	 * Don't enable yet
2483	 */
2484#if !defined(__LP64__) && 0
2485	u32 v;
2486
2487	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2488	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2489	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2490	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2491	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2492	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2493#endif
2494}
2495
2496static int
2497set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2498{
2499	uint8_t *buf;
2500	int err = 0;
2501	u32 aligned_offset, aligned_len, *p;
2502	struct adapter *adapter = pi->adapter;
2503
2504
2505	aligned_offset = offset & ~3;
2506	aligned_len = (len + (offset & 3) + 3) & ~3;
2507
2508	if (aligned_offset != offset || aligned_len != len) {
2509		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2510		if (!buf)
2511			return (ENOMEM);
2512		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2513		if (!err && aligned_len > 4)
2514			err = t3_seeprom_read(adapter,
2515					      aligned_offset + aligned_len - 4,
2516					      (u32 *)&buf[aligned_len - 4]);
2517		if (err)
2518			goto out;
2519		memcpy(buf + (offset & 3), data, len);
2520	} else
2521		buf = (uint8_t *)(uintptr_t)data;
2522
2523	err = t3_seeprom_wp(adapter, 0);
2524	if (err)
2525		goto out;
2526
2527	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2528		err = t3_seeprom_write(adapter, aligned_offset, *p);
2529		aligned_offset += 4;
2530	}
2531
2532	if (!err)
2533		err = t3_seeprom_wp(adapter, 1);
2534out:
2535	if (buf != data)
2536		free(buf, M_DEVBUF);
2537	return err;
2538}
2539
2540
2541static int
2542in_range(int val, int lo, int hi)
2543{
2544	return val < 0 || (val <= hi && val >= lo);
2545}
2546
2547static int
2548cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2549{
2550       return (0);
2551}
2552
2553static int
2554cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2555{
2556       return (0);
2557}
2558
2559static int
2560cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2561    int fflag, struct thread *td)
2562{
2563	int mmd, error = 0;
2564	struct port_info *pi = dev->si_drv1;
2565	adapter_t *sc = pi->adapter;
2566
2567#ifdef PRIV_SUPPORTED
2568	if (priv_check(td, PRIV_DRIVER)) {
2569		if (cxgb_debug)
2570			printf("user does not have access to privileged ioctls\n");
2571		return (EPERM);
2572	}
2573#else
2574	if (suser(td)) {
2575		if (cxgb_debug)
2576			printf("user does not have access to privileged ioctls\n");
2577		return (EPERM);
2578	}
2579#endif
2580
2581	switch (cmd) {
2582	case CHELSIO_GET_MIIREG: {
2583		uint32_t val;
2584		struct cphy *phy = &pi->phy;
2585		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2586
2587		if (!phy->mdio_read)
2588			return (EOPNOTSUPP);
2589		if (is_10G(sc)) {
2590			mmd = mid->phy_id >> 8;
2591			if (!mmd)
2592				mmd = MDIO_DEV_PCS;
2593			else if (mmd > MDIO_DEV_VEND2)
2594				return (EINVAL);
2595
2596			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2597					     mid->reg_num, &val);
2598		} else
2599		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2600					     mid->reg_num & 0x1f, &val);
2601		if (error == 0)
2602			mid->val_out = val;
2603		break;
2604	}
2605	case CHELSIO_SET_MIIREG: {
2606		struct cphy *phy = &pi->phy;
2607		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2608
2609		if (!phy->mdio_write)
2610			return (EOPNOTSUPP);
2611		if (is_10G(sc)) {
2612			mmd = mid->phy_id >> 8;
2613			if (!mmd)
2614				mmd = MDIO_DEV_PCS;
2615			else if (mmd > MDIO_DEV_VEND2)
2616				return (EINVAL);
2617
2618			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2619					      mmd, mid->reg_num, mid->val_in);
2620		} else
2621			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2622					      mid->reg_num & 0x1f,
2623					      mid->val_in);
2624		break;
2625	}
2626	case CHELSIO_SETREG: {
2627		struct ch_reg *edata = (struct ch_reg *)data;
2628		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2629			return (EFAULT);
2630		t3_write_reg(sc, edata->addr, edata->val);
2631		break;
2632	}
2633	case CHELSIO_GETREG: {
2634		struct ch_reg *edata = (struct ch_reg *)data;
2635		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2636			return (EFAULT);
2637		edata->val = t3_read_reg(sc, edata->addr);
2638		break;
2639	}
2640	case CHELSIO_GET_SGE_CONTEXT: {
2641		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2642		mtx_lock_spin(&sc->sge.reg_lock);
2643		switch (ecntxt->cntxt_type) {
2644		case CNTXT_TYPE_EGRESS:
2645			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2646			    ecntxt->data);
2647			break;
2648		case CNTXT_TYPE_FL:
2649			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2650			    ecntxt->data);
2651			break;
2652		case CNTXT_TYPE_RSP:
2653			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2654			    ecntxt->data);
2655			break;
2656		case CNTXT_TYPE_CQ:
2657			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2658			    ecntxt->data);
2659			break;
2660		default:
2661			error = EINVAL;
2662			break;
2663		}
2664		mtx_unlock_spin(&sc->sge.reg_lock);
2665		break;
2666	}
2667	case CHELSIO_GET_SGE_DESC: {
2668		struct ch_desc *edesc = (struct ch_desc *)data;
2669		int ret;
2670		if (edesc->queue_num >= SGE_QSETS * 6)
2671			return (EINVAL);
2672		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2673		    edesc->queue_num % 6, edesc->idx, edesc->data);
2674		if (ret < 0)
2675			return (EINVAL);
2676		edesc->size = ret;
2677		break;
2678	}
2679	case CHELSIO_GET_QSET_PARAMS: {
2680		struct qset_params *q;
2681		struct ch_qset_params *t = (struct ch_qset_params *)data;
2682		int q1 = pi->first_qset;
2683		int nqsets = pi->nqsets;
2684		int i;
2685
2686		if (t->qset_idx >= nqsets)
2687			return EINVAL;
2688
2689		i = q1 + t->qset_idx;
2690		q = &sc->params.sge.qset[i];
2691		t->rspq_size   = q->rspq_size;
2692		t->txq_size[0] = q->txq_size[0];
2693		t->txq_size[1] = q->txq_size[1];
2694		t->txq_size[2] = q->txq_size[2];
2695		t->fl_size[0]  = q->fl_size;
2696		t->fl_size[1]  = q->jumbo_size;
2697		t->polling     = q->polling;
2698		t->lro         = q->lro;
2699		t->intr_lat    = q->coalesce_usecs;
2700		t->cong_thres  = q->cong_thres;
2701		t->qnum        = i;
2702
2703		if ((sc->flags & FULL_INIT_DONE) == 0)
2704			t->vector = 0;
2705		else if (sc->flags & USING_MSIX)
2706			t->vector = rman_get_start(sc->msix_irq_res[i]);
2707		else
2708			t->vector = rman_get_start(sc->irq_res);
2709
2710		break;
2711	}
2712	case CHELSIO_GET_QSET_NUM: {
2713		struct ch_reg *edata = (struct ch_reg *)data;
2714		edata->val = pi->nqsets;
2715		break;
2716	}
2717	case CHELSIO_LOAD_FW: {
2718		uint8_t *fw_data;
2719		uint32_t vers;
2720		struct ch_mem_range *t = (struct ch_mem_range *)data;
2721
2722		/*
2723		 * You're allowed to load a firmware only before FULL_INIT_DONE
2724		 *
2725		 * FW_UPTODATE is also set so the rest of the initialization
2726		 * will not overwrite what was loaded here.  This gives you the
2727		 * flexibility to load any firmware (and maybe shoot yourself in
2728		 * the foot).
2729		 */
2730
2731		ADAPTER_LOCK(sc);
2732		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2733			ADAPTER_UNLOCK(sc);
2734			return (EBUSY);
2735		}
2736
2737		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2738		if (!fw_data)
2739			error = ENOMEM;
2740		else
2741			error = copyin(t->buf, fw_data, t->len);
2742
2743		if (!error)
2744			error = -t3_load_fw(sc, fw_data, t->len);
2745
2746		if (t3_get_fw_version(sc, &vers) == 0) {
2747			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2748			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2749			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2750		}
2751
2752		if (!error)
2753			sc->flags |= FW_UPTODATE;
2754
2755		free(fw_data, M_DEVBUF);
2756		ADAPTER_UNLOCK(sc);
2757		break;
2758	}
2759	case CHELSIO_LOAD_BOOT: {
2760		uint8_t *boot_data;
2761		struct ch_mem_range *t = (struct ch_mem_range *)data;
2762
2763		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2764		if (!boot_data)
2765			return ENOMEM;
2766
2767		error = copyin(t->buf, boot_data, t->len);
2768		if (!error)
2769			error = -t3_load_boot(sc, boot_data, t->len);
2770
2771		free(boot_data, M_DEVBUF);
2772		break;
2773	}
2774	case CHELSIO_GET_PM: {
2775		struct ch_pm *m = (struct ch_pm *)data;
2776		struct tp_params *p = &sc->params.tp;
2777
2778		if (!is_offload(sc))
2779			return (EOPNOTSUPP);
2780
2781		m->tx_pg_sz = p->tx_pg_size;
2782		m->tx_num_pg = p->tx_num_pgs;
2783		m->rx_pg_sz  = p->rx_pg_size;
2784		m->rx_num_pg = p->rx_num_pgs;
2785		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2786
2787		break;
2788	}
2789	case CHELSIO_SET_PM: {
2790		struct ch_pm *m = (struct ch_pm *)data;
2791		struct tp_params *p = &sc->params.tp;
2792
2793		if (!is_offload(sc))
2794			return (EOPNOTSUPP);
2795		if (sc->flags & FULL_INIT_DONE)
2796			return (EBUSY);
2797
2798		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2799		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2800			return (EINVAL);	/* not power of 2 */
2801		if (!(m->rx_pg_sz & 0x14000))
2802			return (EINVAL);	/* not 16KB or 64KB */
2803		if (!(m->tx_pg_sz & 0x1554000))
2804			return (EINVAL);
2805		if (m->tx_num_pg == -1)
2806			m->tx_num_pg = p->tx_num_pgs;
2807		if (m->rx_num_pg == -1)
2808			m->rx_num_pg = p->rx_num_pgs;
2809		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2810			return (EINVAL);
2811		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2812		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2813			return (EINVAL);
2814
2815		p->rx_pg_size = m->rx_pg_sz;
2816		p->tx_pg_size = m->tx_pg_sz;
2817		p->rx_num_pgs = m->rx_num_pg;
2818		p->tx_num_pgs = m->tx_num_pg;
2819		break;
2820	}
2821	case CHELSIO_SETMTUTAB: {
2822		struct ch_mtus *m = (struct ch_mtus *)data;
2823		int i;
2824
2825		if (!is_offload(sc))
2826			return (EOPNOTSUPP);
2827		if (offload_running(sc))
2828			return (EBUSY);
2829		if (m->nmtus != NMTUS)
2830			return (EINVAL);
2831		if (m->mtus[0] < 81)         /* accommodate SACK */
2832			return (EINVAL);
2833
2834		/*
2835		 * MTUs must be in ascending order
2836		 */
2837		for (i = 1; i < NMTUS; ++i)
2838			if (m->mtus[i] < m->mtus[i - 1])
2839				return (EINVAL);
2840
2841		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2842		break;
2843	}
2844	case CHELSIO_GETMTUTAB: {
2845		struct ch_mtus *m = (struct ch_mtus *)data;
2846
2847		if (!is_offload(sc))
2848			return (EOPNOTSUPP);
2849
2850		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2851		m->nmtus = NMTUS;
2852		break;
2853	}
2854	case CHELSIO_GET_MEM: {
2855		struct ch_mem_range *t = (struct ch_mem_range *)data;
2856		struct mc7 *mem;
2857		uint8_t *useraddr;
2858		u64 buf[32];
2859
2860		/*
2861		 * Use these to avoid modifying len/addr in the return
2862		 * struct
2863		 */
2864		uint32_t len = t->len, addr = t->addr;
2865
2866		if (!is_offload(sc))
2867			return (EOPNOTSUPP);
2868		if (!(sc->flags & FULL_INIT_DONE))
2869			return (EIO);         /* need the memory controllers */
2870		if ((addr & 0x7) || (len & 0x7))
2871			return (EINVAL);
2872		if (t->mem_id == MEM_CM)
2873			mem = &sc->cm;
2874		else if (t->mem_id == MEM_PMRX)
2875			mem = &sc->pmrx;
2876		else if (t->mem_id == MEM_PMTX)
2877			mem = &sc->pmtx;
2878		else
2879			return (EINVAL);
2880
2881		/*
2882		 * Version scheme:
2883		 * bits 0..9: chip version
2884		 * bits 10..15: chip revision
2885		 */
2886		t->version = 3 | (sc->params.rev << 10);
2887
2888		/*
2889		 * Read 256 bytes at a time as len can be large and we don't
2890		 * want to use huge intermediate buffers.
2891		 */
2892		useraddr = (uint8_t *)t->buf;
2893		while (len) {
2894			unsigned int chunk = min(len, sizeof(buf));
2895
2896			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2897			if (error)
2898				return (-error);
2899			if (copyout(buf, useraddr, chunk))
2900				return (EFAULT);
2901			useraddr += chunk;
2902			addr += chunk;
2903			len -= chunk;
2904		}
2905		break;
2906	}
2907	case CHELSIO_READ_TCAM_WORD: {
2908		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2909
2910		if (!is_offload(sc))
2911			return (EOPNOTSUPP);
2912		if (!(sc->flags & FULL_INIT_DONE))
2913			return (EIO);         /* need MC5 */
2914		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2915		break;
2916	}
2917	case CHELSIO_SET_TRACE_FILTER: {
2918		struct ch_trace *t = (struct ch_trace *)data;
2919		const struct trace_params *tp;
2920
2921		tp = (const struct trace_params *)&t->sip;
2922		if (t->config_tx)
2923			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2924					       t->trace_tx);
2925		if (t->config_rx)
2926			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2927					       t->trace_rx);
2928		break;
2929	}
2930	case CHELSIO_SET_PKTSCHED: {
2931		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2932		if (sc->open_device_map == 0)
2933			return (EAGAIN);
2934		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2935		    p->binding);
2936		break;
2937	}
2938	case CHELSIO_IFCONF_GETREGS: {
2939		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2940		int reglen = cxgb_get_regs_len();
2941		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2942		if (buf == NULL) {
2943			return (ENOMEM);
2944		}
2945		if (regs->len > reglen)
2946			regs->len = reglen;
2947		else if (regs->len < reglen)
2948			error = ENOBUFS;
2949
2950		if (!error) {
2951			cxgb_get_regs(sc, regs, buf);
2952			error = copyout(buf, regs->data, reglen);
2953		}
2954		free(buf, M_DEVBUF);
2955
2956		break;
2957	}
2958	case CHELSIO_SET_HW_SCHED: {
2959		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2960		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2961
2962		if ((sc->flags & FULL_INIT_DONE) == 0)
2963			return (EAGAIN);       /* need TP to be initialized */
2964		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2965		    !in_range(t->channel, 0, 1) ||
2966		    !in_range(t->kbps, 0, 10000000) ||
2967		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2968		    !in_range(t->flow_ipg, 0,
2969			      dack_ticks_to_usec(sc, 0x7ff)))
2970			return (EINVAL);
2971
2972		if (t->kbps >= 0) {
2973			error = t3_config_sched(sc, t->kbps, t->sched);
2974			if (error < 0)
2975				return (-error);
2976		}
2977		if (t->class_ipg >= 0)
2978			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2979		if (t->flow_ipg >= 0) {
2980			t->flow_ipg *= 1000;     /* us -> ns */
2981			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2982		}
2983		if (t->mode >= 0) {
2984			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2985
2986			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2987					 bit, t->mode ? bit : 0);
2988		}
2989		if (t->channel >= 0)
2990			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2991					 1 << t->sched, t->channel << t->sched);
2992		break;
2993	}
2994	case CHELSIO_GET_EEPROM: {
2995		int i;
2996		struct ch_eeprom *e = (struct ch_eeprom *)data;
2997		uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2998
2999		if (buf == NULL) {
3000			return (ENOMEM);
3001		}
3002		e->magic = EEPROM_MAGIC;
3003		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
3004			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
3005
3006		if (!error)
3007			error = copyout(buf + e->offset, e->data, e->len);
3008
3009		free(buf, M_DEVBUF);
3010		break;
3011	}
3012	case CHELSIO_CLEAR_STATS: {
3013		if (!(sc->flags & FULL_INIT_DONE))
3014			return EAGAIN;
3015
3016		PORT_LOCK(pi);
3017		t3_mac_update_stats(&pi->mac);
3018		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3019		PORT_UNLOCK(pi);
3020		break;
3021	}
3022	case CHELSIO_GET_UP_LA: {
3023		struct ch_up_la *la = (struct ch_up_la *)data;
3024		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3025		if (buf == NULL) {
3026			return (ENOMEM);
3027		}
3028		if (la->bufsize < LA_BUFSIZE)
3029			error = ENOBUFS;
3030
3031		if (!error)
3032			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3033					      &la->bufsize, buf);
3034		if (!error)
3035			error = copyout(buf, la->data, la->bufsize);
3036
3037		free(buf, M_DEVBUF);
3038		break;
3039	}
3040	case CHELSIO_GET_UP_IOQS: {
3041		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3042		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3043		uint32_t *v;
3044
3045		if (buf == NULL) {
3046			return (ENOMEM);
3047		}
3048		if (ioqs->bufsize < IOQS_BUFSIZE)
3049			error = ENOBUFS;
3050
3051		if (!error)
3052			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3053
3054		if (!error) {
3055			v = (uint32_t *)buf;
3056
3057			ioqs->ioq_rx_enable = *v++;
3058			ioqs->ioq_tx_enable = *v++;
3059			ioqs->ioq_rx_status = *v++;
3060			ioqs->ioq_tx_status = *v++;
3061
3062			error = copyout(v, ioqs->data, ioqs->bufsize);
3063		}
3064
3065		free(buf, M_DEVBUF);
3066		break;
3067	}
3068	case CHELSIO_SET_FILTER: {
3069		struct ch_filter *f = (struct ch_filter *)data;;
3070		struct filter_info *p;
3071		unsigned int nfilters = sc->params.mc5.nfilters;
3072
3073		if (!is_offload(sc))
3074			return (EOPNOTSUPP);	/* No TCAM */
3075		if (!(sc->flags & FULL_INIT_DONE))
3076			return (EAGAIN);	/* mc5 not setup yet */
3077		if (nfilters == 0)
3078			return (EBUSY);		/* TOE will use TCAM */
3079
3080		/* sanity checks */
3081		if (f->filter_id >= nfilters ||
3082		    (f->val.dip && f->mask.dip != 0xffffffff) ||
3083		    (f->val.sport && f->mask.sport != 0xffff) ||
3084		    (f->val.dport && f->mask.dport != 0xffff) ||
3085		    (f->val.vlan && f->mask.vlan != 0xfff) ||
3086		    (f->val.vlan_prio &&
3087			f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3088		    (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3089		    f->qset >= SGE_QSETS ||
3090		    sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3091			return (EINVAL);
3092
3093		/* Was allocated with M_WAITOK */
3094		KASSERT(sc->filters, ("filter table NULL\n"));
3095
3096		p = &sc->filters[f->filter_id];
3097		if (p->locked)
3098			return (EPERM);
3099
3100		bzero(p, sizeof(*p));
3101		p->sip = f->val.sip;
3102		p->sip_mask = f->mask.sip;
3103		p->dip = f->val.dip;
3104		p->sport = f->val.sport;
3105		p->dport = f->val.dport;
3106		p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3107		p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3108		    FILTER_NO_VLAN_PRI;
3109		p->mac_hit = f->mac_hit;
3110		p->mac_vld = f->mac_addr_idx != 0xffff;
3111		p->mac_idx = f->mac_addr_idx;
3112		p->pkt_type = f->proto;
3113		p->report_filter_id = f->want_filter_id;
3114		p->pass = f->pass;
3115		p->rss = f->rss;
3116		p->qset = f->qset;
3117
3118		error = set_filter(sc, f->filter_id, p);
3119		if (error == 0)
3120			p->valid = 1;
3121		break;
3122	}
3123	case CHELSIO_DEL_FILTER: {
3124		struct ch_filter *f = (struct ch_filter *)data;
3125		struct filter_info *p;
3126		unsigned int nfilters = sc->params.mc5.nfilters;
3127
3128		if (!is_offload(sc))
3129			return (EOPNOTSUPP);
3130		if (!(sc->flags & FULL_INIT_DONE))
3131			return (EAGAIN);
3132		if (nfilters == 0 || sc->filters == NULL)
3133			return (EINVAL);
3134		if (f->filter_id >= nfilters)
3135		       return (EINVAL);
3136
3137		p = &sc->filters[f->filter_id];
3138		if (p->locked)
3139			return (EPERM);
3140		if (!p->valid)
3141			return (EFAULT); /* Read "Bad address" as "Bad index" */
3142
3143		bzero(p, sizeof(*p));
3144		p->sip = p->sip_mask = 0xffffffff;
3145		p->vlan = 0xfff;
3146		p->vlan_prio = FILTER_NO_VLAN_PRI;
3147		p->pkt_type = 1;
3148		error = set_filter(sc, f->filter_id, p);
3149		break;
3150	}
3151	case CHELSIO_GET_FILTER: {
3152		struct ch_filter *f = (struct ch_filter *)data;
3153		struct filter_info *p;
3154		unsigned int i, nfilters = sc->params.mc5.nfilters;
3155
3156		if (!is_offload(sc))
3157			return (EOPNOTSUPP);
3158		if (!(sc->flags & FULL_INIT_DONE))
3159			return (EAGAIN);
3160		if (nfilters == 0 || sc->filters == NULL)
3161			return (EINVAL);
3162
3163		i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3164		for (; i < nfilters; i++) {
3165			p = &sc->filters[i];
3166			if (!p->valid)
3167				continue;
3168
3169			bzero(f, sizeof(*f));
3170
3171			f->filter_id = i;
3172			f->val.sip = p->sip;
3173			f->mask.sip = p->sip_mask;
3174			f->val.dip = p->dip;
3175			f->mask.dip = p->dip ? 0xffffffff : 0;
3176			f->val.sport = p->sport;
3177			f->mask.sport = p->sport ? 0xffff : 0;
3178			f->val.dport = p->dport;
3179			f->mask.dport = p->dport ? 0xffff : 0;
3180			f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3181			f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3182			f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3183			    0 : p->vlan_prio;
3184			f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3185			    0 : FILTER_NO_VLAN_PRI;
3186			f->mac_hit = p->mac_hit;
3187			f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3188			f->proto = p->pkt_type;
3189			f->want_filter_id = p->report_filter_id;
3190			f->pass = p->pass;
3191			f->rss = p->rss;
3192			f->qset = p->qset;
3193
3194			break;
3195		}
3196
3197		if (i == nfilters)
3198			f->filter_id = 0xffffffff;
3199		break;
3200	}
3201	default:
3202		return (EOPNOTSUPP);
3203		break;
3204	}
3205
3206	return (error);
3207}
3208
3209static __inline void
3210reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3211    unsigned int end)
3212{
3213	uint32_t *p = (uint32_t *)(buf + start);
3214
3215	for ( ; start <= end; start += sizeof(uint32_t))
3216		*p++ = t3_read_reg(ap, start);
3217}
3218
3219#define T3_REGMAP_SIZE (3 * 1024)
3220static int
3221cxgb_get_regs_len(void)
3222{
3223	return T3_REGMAP_SIZE;
3224}
3225
3226static void
3227cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3228{
3229
3230	/*
3231	 * Version scheme:
3232	 * bits 0..9: chip version
3233	 * bits 10..15: chip revision
3234	 * bit 31: set for PCIe cards
3235	 */
3236	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3237
3238	/*
3239	 * We skip the MAC statistics registers because they are clear-on-read.
3240	 * Also reading multi-register stats would need to synchronize with the
3241	 * periodic mac stats accumulation.  Hard to justify the complexity.
3242	 */
3243	memset(buf, 0, cxgb_get_regs_len());
3244	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3245	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3246	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3247	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3248	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3249	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3250		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3251	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3252		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3253}
3254
3255static int
3256alloc_filters(struct adapter *sc)
3257{
3258	struct filter_info *p;
3259	unsigned int nfilters = sc->params.mc5.nfilters;
3260
3261	if (nfilters == 0)
3262		return (0);
3263
3264	p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3265	sc->filters = p;
3266
3267	p = &sc->filters[nfilters - 1];
3268	p->vlan = 0xfff;
3269	p->vlan_prio = FILTER_NO_VLAN_PRI;
3270	p->pass = p->rss = p->valid = p->locked = 1;
3271
3272	return (0);
3273}
3274
3275static int
3276setup_hw_filters(struct adapter *sc)
3277{
3278	int i, rc;
3279	unsigned int nfilters = sc->params.mc5.nfilters;
3280
3281	if (!sc->filters)
3282		return (0);
3283
3284	t3_enable_filters(sc);
3285
3286	for (i = rc = 0; i < nfilters && !rc; i++) {
3287		if (sc->filters[i].locked)
3288			rc = set_filter(sc, i, &sc->filters[i]);
3289	}
3290
3291	return (rc);
3292}
3293
3294static int
3295set_filter(struct adapter *sc, int id, const struct filter_info *f)
3296{
3297	int len;
3298	struct mbuf *m;
3299	struct ulp_txpkt *txpkt;
3300	struct work_request_hdr *wr;
3301	struct cpl_pass_open_req *oreq;
3302	struct cpl_set_tcb_field *sreq;
3303
3304	len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3305	KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3306
3307	id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3308	      sc->params.mc5.nfilters;
3309
3310	m = m_gethdr(M_WAITOK, MT_DATA);
3311	m->m_len = m->m_pkthdr.len = len;
3312	bzero(mtod(m, char *), len);
3313
3314	wr = mtod(m, struct work_request_hdr *);
3315	wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3316
3317	oreq = (struct cpl_pass_open_req *)(wr + 1);
3318	txpkt = (struct ulp_txpkt *)oreq;
3319	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3320	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3321	OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3322	oreq->local_port = htons(f->dport);
3323	oreq->peer_port = htons(f->sport);
3324	oreq->local_ip = htonl(f->dip);
3325	oreq->peer_ip = htonl(f->sip);
3326	oreq->peer_netmask = htonl(f->sip_mask);
3327	oreq->opt0h = 0;
3328	oreq->opt0l = htonl(F_NO_OFFLOAD);
3329	oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3330			 V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3331			 V_VLAN_PRI(f->vlan_prio >> 1) |
3332			 V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3333			 V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3334			 V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3335
3336	sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3337	set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3338			  (f->report_filter_id << 15) | (1 << 23) |
3339			  ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3340	set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3341	t3_mgmt_tx(sc, m);
3342
3343	if (f->pass && !f->rss) {
3344		len = sizeof(*sreq);
3345		m = m_gethdr(M_WAITOK, MT_DATA);
3346		m->m_len = m->m_pkthdr.len = len;
3347		bzero(mtod(m, char *), len);
3348		sreq = mtod(m, struct cpl_set_tcb_field *);
3349		sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3350		mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3351				 (u64)sc->rrss_map[f->qset] << 19);
3352		t3_mgmt_tx(sc, m);
3353	}
3354	return 0;
3355}
3356
3357static inline void
3358mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3359    unsigned int word, u64 mask, u64 val)
3360{
3361	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3362	req->reply = V_NO_REPLY(1);
3363	req->cpu_idx = 0;
3364	req->word = htons(word);
3365	req->mask = htobe64(mask);
3366	req->val = htobe64(val);
3367}
3368
3369static inline void
3370set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3371    unsigned int word, u64 mask, u64 val)
3372{
3373	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3374
3375	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3376	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3377	mk_set_tcb_field(req, tid, word, mask, val);
3378}
3379