cxgb_main.c revision 194039
1114402Sru/**************************************************************************
2114402Sru
3114402SruCopyright (c) 2007-2009, Chelsio Inc.
4114402SruAll rights reserved.
5114402Sru
6114402SruRedistribution and use in source and binary forms, with or without
7114402Srumodification, are permitted provided that the following conditions are met:
8114402Sru
9114402Sru 1. Redistributions of source code must retain the above copyright notice,
10114402Sru    this list of conditions and the following disclaimer.
11114402Sru
12114402Sru 2. Neither the name of the Chelsio Corporation nor the names of its
13114402Sru    contributors may be used to endorse or promote products derived from
14114402Sru    this software without specific prior written permission.
15114402Sru
16114402SruTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17114402SruAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18151497SruIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19114402SruARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20114402SruLIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21114402SruCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22114402SruSUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23114402SruINTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24114402SruCONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25114402SruARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26114402SruPOSSIBILITY OF SUCH DAMAGE.
27114402Sru
28114402Sru***************************************************************************/
29114402Sru
30114402Sru#include <sys/cdefs.h>
31114402Sru__FBSDID("$FreeBSD: head/sys/dev/cxgb/cxgb_main.c 194039 2009-06-11 21:32:26Z gnn $");
32114402Sru
33114402Sru#include <sys/param.h>
34114402Sru#include <sys/systm.h>
35114402Sru#include <sys/kernel.h>
36114402Sru#include <sys/bus.h>
37114402Sru#include <sys/module.h>
38114402Sru#include <sys/pciio.h>
39114402Sru#include <sys/conf.h>
40114402Sru#include <machine/bus.h>
41114402Sru#include <machine/resource.h>
42114402Sru#include <sys/bus_dma.h>
43114402Sru#include <sys/ktr.h>
44114402Sru#include <sys/rman.h>
45114402Sru#include <sys/ioccom.h>
46114402Sru#include <sys/mbuf.h>
47114402Sru#include <sys/linker.h>
48114402Sru#include <sys/firmware.h>
49114402Sru#include <sys/socket.h>
50114402Sru#include <sys/sockio.h>
51114402Sru#include <sys/smp.h>
52114402Sru#include <sys/sysctl.h>
53#include <sys/syslog.h>
54#include <sys/queue.h>
55#include <sys/taskqueue.h>
56#include <sys/proc.h>
57
58#include <net/bpf.h>
59#include <net/ethernet.h>
60#include <net/if.h>
61#include <net/if_arp.h>
62#include <net/if_dl.h>
63#include <net/if_media.h>
64#include <net/if_types.h>
65#include <net/if_vlan_var.h>
66
67#include <netinet/in_systm.h>
68#include <netinet/in.h>
69#include <netinet/if_ether.h>
70#include <netinet/ip.h>
71#include <netinet/ip.h>
72#include <netinet/tcp.h>
73#include <netinet/udp.h>
74
75#include <dev/pci/pcireg.h>
76#include <dev/pci/pcivar.h>
77#include <dev/pci/pci_private.h>
78
79#include <cxgb_include.h>
80
81#ifdef PRIV_SUPPORTED
82#include <sys/priv.h>
83#endif
84
85static int cxgb_setup_interrupts(adapter_t *);
86static void cxgb_teardown_interrupts(adapter_t *);
87static void cxgb_init(void *);
88static void cxgb_init_locked(struct port_info *);
89static void cxgb_stop_locked(struct port_info *);
90static void cxgb_set_rxmode(struct port_info *);
91static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92static int cxgb_media_change(struct ifnet *);
93static int cxgb_ifm_type(int);
94static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
95static int setup_sge_qsets(adapter_t *);
96static void cxgb_async_intr(void *);
97static void cxgb_ext_intr_handler(void *, int);
98static void cxgb_tick_handler(void *, int);
99static void cxgb_down_locked(struct adapter *sc);
100static void cxgb_tick(void *);
101static void setup_rss(adapter_t *sc);
102
103/* Attachment glue for the PCI controller end of the device.  Each port of
104 * the device is attached separately, as defined later.
105 */
106static int cxgb_controller_probe(device_t);
107static int cxgb_controller_attach(device_t);
108static int cxgb_controller_detach(device_t);
109static void cxgb_free(struct adapter *);
110static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
111    unsigned int end);
112static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
113static int cxgb_get_regs_len(void);
114static int offload_open(struct port_info *pi);
115static void touch_bars(device_t dev);
116static int offload_close(struct t3cdev *tdev);
117static void cxgb_link_start(struct port_info *p);
118int t3_detect_link_fault(adapter_t *adapter, int port_id);
119
120static device_method_t cxgb_controller_methods[] = {
121	DEVMETHOD(device_probe,		cxgb_controller_probe),
122	DEVMETHOD(device_attach,	cxgb_controller_attach),
123	DEVMETHOD(device_detach,	cxgb_controller_detach),
124
125	/* bus interface */
126	DEVMETHOD(bus_print_child,	bus_generic_print_child),
127	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
128
129	{ 0, 0 }
130};
131
132static driver_t cxgb_controller_driver = {
133	"cxgbc",
134	cxgb_controller_methods,
135	sizeof(struct adapter)
136};
137
138static devclass_t	cxgb_controller_devclass;
139DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
140
141/*
142 * Attachment glue for the ports.  Attachment is done directly to the
143 * controller device.
144 */
145static int cxgb_port_probe(device_t);
146static int cxgb_port_attach(device_t);
147static int cxgb_port_detach(device_t);
148
149static device_method_t cxgb_port_methods[] = {
150	DEVMETHOD(device_probe,		cxgb_port_probe),
151	DEVMETHOD(device_attach,	cxgb_port_attach),
152	DEVMETHOD(device_detach,	cxgb_port_detach),
153	{ 0, 0 }
154};
155
156static driver_t cxgb_port_driver = {
157	"cxgb",
158	cxgb_port_methods,
159	0
160};
161
162static d_ioctl_t cxgb_extension_ioctl;
163static d_open_t cxgb_extension_open;
164static d_close_t cxgb_extension_close;
165
166static struct cdevsw cxgb_cdevsw = {
167       .d_version =    D_VERSION,
168       .d_flags =      0,
169       .d_open =       cxgb_extension_open,
170       .d_close =      cxgb_extension_close,
171       .d_ioctl =      cxgb_extension_ioctl,
172       .d_name =       "cxgb",
173};
174
175static devclass_t	cxgb_port_devclass;
176DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
177
178/*
179 * The driver uses the best interrupt scheme available on a platform in the
180 * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
181 * of these schemes the driver may consider as follows:
182 *
183 * msi = 2: choose from among all three options
184 * msi = 1 : only consider MSI and pin interrupts
185 * msi = 0: force pin interrupts
186 */
187static int msi_allowed = 2;
188
189TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
190SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
191SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
192    "MSI-X, MSI, INTx selector");
193
194/*
195 * The driver enables offload as a default.
196 * To disable it, use ofld_disable = 1.
197 */
198static int ofld_disable = 0;
199TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
200SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
201    "disable ULP offload");
202
203/*
204 * The driver uses an auto-queue algorithm by default.
205 * To disable it and force a single queue-set per port, use multiq = 0
206 */
207static int multiq = 1;
208TUNABLE_INT("hw.cxgb.multiq", &multiq);
209SYSCTL_UINT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
210    "use min(ncpus/ports, 8) queue-sets per port");
211
212/*
213 * By default the driver will not update the firmware unless
214 * it was compiled against a newer version
215 *
216 */
217static int force_fw_update = 0;
218TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
219SYSCTL_UINT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
220    "update firmware even if up to date");
221
222int cxgb_use_16k_clusters = 1;
223TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
224SYSCTL_UINT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
225    &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
226
227/*
228 * Tune the size of the output queue.
229 */
230int cxgb_snd_queue_len = IFQ_MAXLEN;
231TUNABLE_INT("hw.cxgb.snd_queue_len", &cxgb_snd_queue_len);
232SYSCTL_UINT(_hw_cxgb, OID_AUTO, snd_queue_len, CTLFLAG_RDTUN,
233    &cxgb_snd_queue_len, 0, "send queue size ");
234
235
236enum {
237	MAX_TXQ_ENTRIES      = 16384,
238	MAX_CTRL_TXQ_ENTRIES = 1024,
239	MAX_RSPQ_ENTRIES     = 16384,
240	MAX_RX_BUFFERS       = 16384,
241	MAX_RX_JUMBO_BUFFERS = 16384,
242	MIN_TXQ_ENTRIES      = 4,
243	MIN_CTRL_TXQ_ENTRIES = 4,
244	MIN_RSPQ_ENTRIES     = 32,
245	MIN_FL_ENTRIES       = 32,
246	MIN_FL_JUMBO_ENTRIES = 32
247};
248
249struct filter_info {
250	u32 sip;
251	u32 sip_mask;
252	u32 dip;
253	u16 sport;
254	u16 dport;
255	u32 vlan:12;
256	u32 vlan_prio:3;
257	u32 mac_hit:1;
258	u32 mac_idx:4;
259	u32 mac_vld:1;
260	u32 pkt_type:2;
261	u32 report_filter_id:1;
262	u32 pass:1;
263	u32 rss:1;
264	u32 qset:3;
265	u32 locked:1;
266	u32 valid:1;
267};
268
269enum { FILTER_NO_VLAN_PRI = 7 };
270
271#define EEPROM_MAGIC 0x38E2F10C
272
273#define PORT_MASK ((1 << MAX_NPORTS) - 1)
274
275/* Table for probing the cards.  The desc field isn't actually used */
276struct cxgb_ident {
277	uint16_t	vendor;
278	uint16_t	device;
279	int		index;
280	char		*desc;
281} cxgb_identifiers[] = {
282	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
283	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
284	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
285	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
286	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
287	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
288	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
289	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
290	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
291	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
292	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
293	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "N310E"},
294	{0, 0, 0, NULL}
295};
296
297static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
298
299
300static __inline char
301t3rev2char(struct adapter *adapter)
302{
303	char rev = 'z';
304
305	switch(adapter->params.rev) {
306	case T3_REV_A:
307		rev = 'a';
308		break;
309	case T3_REV_B:
310	case T3_REV_B2:
311		rev = 'b';
312		break;
313	case T3_REV_C:
314		rev = 'c';
315		break;
316	}
317	return rev;
318}
319
320static struct cxgb_ident *
321cxgb_get_ident(device_t dev)
322{
323	struct cxgb_ident *id;
324
325	for (id = cxgb_identifiers; id->desc != NULL; id++) {
326		if ((id->vendor == pci_get_vendor(dev)) &&
327		    (id->device == pci_get_device(dev))) {
328			return (id);
329		}
330	}
331	return (NULL);
332}
333
334static const struct adapter_info *
335cxgb_get_adapter_info(device_t dev)
336{
337	struct cxgb_ident *id;
338	const struct adapter_info *ai;
339
340	id = cxgb_get_ident(dev);
341	if (id == NULL)
342		return (NULL);
343
344	ai = t3_get_adapter_info(id->index);
345
346	return (ai);
347}
348
349static int
350cxgb_controller_probe(device_t dev)
351{
352	const struct adapter_info *ai;
353	char *ports, buf[80];
354	int nports;
355	struct adapter *sc = device_get_softc(dev);
356
357	ai = cxgb_get_adapter_info(dev);
358	if (ai == NULL)
359		return (ENXIO);
360
361	nports = ai->nports0 + ai->nports1;
362	if (nports == 1)
363		ports = "port";
364	else
365		ports = "ports";
366
367	snprintf(buf, sizeof(buf), "%s %sNIC, rev: %d nports: %d %s",
368	    ai->desc, is_offload(sc) ? "R" : "",
369	    sc->params.rev, nports, ports);
370	device_set_desc_copy(dev, buf);
371	return (BUS_PROBE_DEFAULT);
372}
373
374#define FW_FNAME "cxgb_t3fw"
375#define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
376#define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
377
378static int
379upgrade_fw(adapter_t *sc)
380{
381#ifdef FIRMWARE_LATEST
382	const struct firmware *fw;
383#else
384	struct firmware *fw;
385#endif
386	int status;
387
388	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
389		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
390		return (ENOENT);
391	} else
392		device_printf(sc->dev, "updating firmware on card\n");
393	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
394
395	device_printf(sc->dev, "firmware update returned %s %d\n", (status == 0) ? "success" : "fail", status);
396
397	firmware_put(fw, FIRMWARE_UNLOAD);
398
399	return (status);
400}
401
402/*
403 * The cxgb_controller_attach function is responsible for the initial
404 * bringup of the device.  Its responsibilities include:
405 *
406 *  1. Determine if the device supports MSI or MSI-X.
407 *  2. Allocate bus resources so that we can access the Base Address Register
408 *  3. Create and initialize mutexes for the controller and its control
409 *     logic such as SGE and MDIO.
410 *  4. Call hardware specific setup routine for the adapter as a whole.
411 *  5. Allocate the BAR for doing MSI-X.
412 *  6. Setup the line interrupt iff MSI-X is not supported.
413 *  7. Create the driver's taskq.
414 *  8. Start one task queue service thread.
415 *  9. Check if the firmware and SRAM are up-to-date.  They will be
416 *     auto-updated later (before FULL_INIT_DONE), if required.
417 * 10. Create a child device for each MAC (port)
418 * 11. Initialize T3 private state.
419 * 12. Trigger the LED
420 * 13. Setup offload iff supported.
421 * 14. Reset/restart the tick callout.
422 * 15. Attach sysctls
423 *
424 * NOTE: Any modification or deviation from this list MUST be reflected in
425 * the above comment.  Failure to do so will result in problems on various
426 * error conditions including link flapping.
427 */
428static int
429cxgb_controller_attach(device_t dev)
430{
431	device_t child;
432	const struct adapter_info *ai;
433	struct adapter *sc;
434	int i, error = 0;
435	uint32_t vers;
436	int port_qsets = 1;
437#ifdef MSI_SUPPORTED
438	int msi_needed, reg;
439#endif
440	char buf[80];
441
442	sc = device_get_softc(dev);
443	sc->dev = dev;
444	sc->msi_count = 0;
445	ai = cxgb_get_adapter_info(dev);
446
447	/*
448	 * XXX not really related but a recent addition
449	 */
450#ifdef MSI_SUPPORTED
451	/* find the PCIe link width and set max read request to 4KB*/
452	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
453		uint16_t lnk, pectl;
454		lnk = pci_read_config(dev, reg + 0x12, 2);
455		sc->link_width = (lnk >> 4) & 0x3f;
456
457		pectl = pci_read_config(dev, reg + 0x8, 2);
458		pectl = (pectl & ~0x7000) | (5 << 12);
459		pci_write_config(dev, reg + 0x8, pectl, 2);
460	}
461
462	if (sc->link_width != 0 && sc->link_width <= 4 &&
463	    (ai->nports0 + ai->nports1) <= 2) {
464		device_printf(sc->dev,
465		    "PCIe x%d Link, expect reduced performance\n",
466		    sc->link_width);
467	}
468#endif
469	touch_bars(dev);
470	pci_enable_busmaster(dev);
471	/*
472	 * Allocate the registers and make them available to the driver.
473	 * The registers that we care about for NIC mode are in BAR 0
474	 */
475	sc->regs_rid = PCIR_BAR(0);
476	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
477	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
478		device_printf(dev, "Cannot allocate BAR region 0\n");
479		return (ENXIO);
480	}
481	sc->udbs_rid = PCIR_BAR(2);
482	sc->udbs_res = NULL;
483	if (is_offload(sc) &&
484	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
485		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
486		device_printf(dev, "Cannot allocate BAR region 1\n");
487		error = ENXIO;
488		goto out;
489	}
490
491	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
492	    device_get_unit(dev));
493	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
494
495	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
496	    device_get_unit(dev));
497	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
498	    device_get_unit(dev));
499	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
500	    device_get_unit(dev));
501
502	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
503	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
504	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
505
506	sc->bt = rman_get_bustag(sc->regs_res);
507	sc->bh = rman_get_bushandle(sc->regs_res);
508	sc->mmio_len = rman_get_size(sc->regs_res);
509
510	if (t3_prep_adapter(sc, ai, 1) < 0) {
511		printf("prep adapter failed\n");
512		error = ENODEV;
513		goto out;
514	}
515        /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
516	 * enough messages for the queue sets.  If that fails, try falling
517	 * back to MSI.  If that fails, then try falling back to the legacy
518	 * interrupt pin model.
519	 */
520#ifdef MSI_SUPPORTED
521
522	sc->msix_regs_rid = 0x20;
523	if ((msi_allowed >= 2) &&
524	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
525	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
526
527		if (multiq)
528			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
529		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
530
531		if (pci_msix_count(dev) == 0 ||
532		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
533		    sc->msi_count != msi_needed) {
534			device_printf(dev, "alloc msix failed - "
535				      "msi_count=%d, msi_needed=%d, err=%d; "
536				      "will try MSI\n", sc->msi_count,
537				      msi_needed, error);
538			sc->msi_count = 0;
539			port_qsets = 1;
540			pci_release_msi(dev);
541			bus_release_resource(dev, SYS_RES_MEMORY,
542			    sc->msix_regs_rid, sc->msix_regs_res);
543			sc->msix_regs_res = NULL;
544		} else {
545			sc->flags |= USING_MSIX;
546			sc->cxgb_intr = cxgb_async_intr;
547			device_printf(dev,
548				      "using MSI-X interrupts (%u vectors)\n",
549				      sc->msi_count);
550		}
551	}
552
553	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
554		sc->msi_count = 1;
555		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
556			device_printf(dev, "alloc msi failed - "
557				      "err=%d; will try INTx\n", error);
558			sc->msi_count = 0;
559			port_qsets = 1;
560			pci_release_msi(dev);
561		} else {
562			sc->flags |= USING_MSI;
563			sc->cxgb_intr = t3_intr_msi;
564			device_printf(dev, "using MSI interrupts\n");
565		}
566	}
567#endif
568	if (sc->msi_count == 0) {
569		device_printf(dev, "using line interrupts\n");
570		sc->cxgb_intr = t3b_intr;
571	}
572
573	/* Create a private taskqueue thread for handling driver events */
574#ifdef TASKQUEUE_CURRENT
575	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
576	    taskqueue_thread_enqueue, &sc->tq);
577#else
578	sc->tq = taskqueue_create_fast("cxgb_taskq", M_NOWAIT,
579	    taskqueue_thread_enqueue, &sc->tq);
580#endif
581	if (sc->tq == NULL) {
582		device_printf(dev, "failed to allocate controller task queue\n");
583		goto out;
584	}
585
586	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
587	    device_get_nameunit(dev));
588	TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
589	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
590
591
592	/* Create a periodic callout for checking adapter status */
593	callout_init(&sc->cxgb_tick_ch, TRUE);
594
595	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
596		/*
597		 * Warn user that a firmware update will be attempted in init.
598		 */
599		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
600		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
601		sc->flags &= ~FW_UPTODATE;
602	} else {
603		sc->flags |= FW_UPTODATE;
604	}
605
606	if (t3_check_tpsram_version(sc) < 0) {
607		/*
608		 * Warn user that a firmware update will be attempted in init.
609		 */
610		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
611		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
612		sc->flags &= ~TPS_UPTODATE;
613	} else {
614		sc->flags |= TPS_UPTODATE;
615	}
616
617	/*
618	 * Create a child device for each MAC.  The ethernet attachment
619	 * will be done in these children.
620	 */
621	for (i = 0; i < (sc)->params.nports; i++) {
622		struct port_info *pi;
623
624		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
625			device_printf(dev, "failed to add child port\n");
626			error = EINVAL;
627			goto out;
628		}
629		pi = &sc->port[i];
630		pi->adapter = sc;
631		pi->nqsets = port_qsets;
632		pi->first_qset = i*port_qsets;
633		pi->port_id = i;
634		pi->tx_chan = i >= ai->nports0;
635		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
636		sc->rxpkt_map[pi->txpkt_intf] = i;
637		sc->port[i].tx_chan = i >= ai->nports0;
638		sc->portdev[i] = child;
639		device_set_softc(child, pi);
640	}
641	if ((error = bus_generic_attach(dev)) != 0)
642		goto out;
643
644	/* initialize sge private state */
645	t3_sge_init_adapter(sc);
646
647	t3_led_ready(sc);
648
649	cxgb_offload_init();
650	if (is_offload(sc)) {
651		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
652		cxgb_adapter_ofld(sc);
653        }
654	error = t3_get_fw_version(sc, &vers);
655	if (error)
656		goto out;
657
658	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
659	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
660	    G_FW_VERSION_MICRO(vers));
661
662	snprintf(buf, sizeof(buf), "%s\t E/C: %s S/N: %s",
663		 ai->desc,
664		 sc->params.vpd.ec, sc->params.vpd.sn);
665	device_set_desc_copy(dev, buf);
666
667	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
668		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
669		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
670
671	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
672	callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
673	t3_add_attach_sysctls(sc);
674out:
675	if (error)
676		cxgb_free(sc);
677
678	return (error);
679}
680
681/*
682 * The cxgb_controller_detach routine is called with the device is
683 * unloaded from the system.
684 */
685
686static int
687cxgb_controller_detach(device_t dev)
688{
689	struct adapter *sc;
690
691	sc = device_get_softc(dev);
692
693	cxgb_free(sc);
694
695	return (0);
696}
697
698/*
699 * The cxgb_free() is called by the cxgb_controller_detach() routine
700 * to tear down the structures that were built up in
701 * cxgb_controller_attach(), and should be the final piece of work
702 * done when fully unloading the driver.
703 *
704 *
705 *  1. Shutting down the threads started by the cxgb_controller_attach()
706 *     routine.
707 *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
708 *  3. Detaching all of the port devices created during the
709 *     cxgb_controller_attach() routine.
710 *  4. Removing the device children created via cxgb_controller_attach().
711 *  5. Releasing PCI resources associated with the device.
712 *  6. Turning off the offload support, iff it was turned on.
713 *  7. Destroying the mutexes created in cxgb_controller_attach().
714 *
715 */
716static void
717cxgb_free(struct adapter *sc)
718{
719	int i;
720
721	ADAPTER_LOCK(sc);
722	sc->flags |= CXGB_SHUTDOWN;
723	ADAPTER_UNLOCK(sc);
724
725	cxgb_pcpu_shutdown_threads(sc);
726
727	ADAPTER_LOCK(sc);
728	cxgb_down_locked(sc);
729	ADAPTER_UNLOCK(sc);
730
731	t3_sge_deinit_sw(sc);
732	/*
733	 * Wait for last callout
734	 */
735
736	DELAY(hz*100);
737
738	bus_generic_detach(sc->dev);
739
740	for (i = 0; i < (sc)->params.nports; i++) {
741		if (sc->portdev[i] &&
742		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
743			device_printf(sc->dev, "failed to delete child port\n");
744	}
745
746	cxgb_teardown_interrupts(sc);
747
748#ifdef MSI_SUPPORTED
749	if (sc->flags & (USING_MSI | USING_MSIX)) {
750		device_printf(sc->dev, "releasing msi message(s)\n");
751		pci_release_msi(sc->dev);
752	} else {
753		device_printf(sc->dev, "no msi message to release\n");
754	}
755
756	if (sc->msix_regs_res != NULL) {
757		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
758		    sc->msix_regs_res);
759	}
760#endif
761
762	if (sc->tq != NULL) {
763		taskqueue_free(sc->tq);
764		sc->tq = NULL;
765	}
766
767	if (is_offload(sc)) {
768		cxgb_adapter_unofld(sc);
769		if (isset(&sc->open_device_map,	OFFLOAD_DEVMAP_BIT))
770			offload_close(&sc->tdev);
771		else
772			printf("cxgb_free: DEVMAP_BIT not set\n");
773	} else
774		printf("not offloading set\n");
775#ifdef notyet
776	if (sc->flags & CXGB_OFLD_INIT)
777		cxgb_offload_deactivate(sc);
778#endif
779	free(sc->filters, M_DEVBUF);
780	t3_sge_free(sc);
781
782	cxgb_offload_exit();
783
784	if (sc->udbs_res != NULL)
785		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
786		    sc->udbs_res);
787
788	if (sc->regs_res != NULL)
789		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
790		    sc->regs_res);
791
792	MTX_DESTROY(&sc->mdio_lock);
793	MTX_DESTROY(&sc->sge.reg_lock);
794	MTX_DESTROY(&sc->elmer_lock);
795	ADAPTER_LOCK_DEINIT(sc);
796}
797
798/**
799 *	setup_sge_qsets - configure SGE Tx/Rx/response queues
800 *	@sc: the controller softc
801 *
802 *	Determines how many sets of SGE queues to use and initializes them.
803 *	We support multiple queue sets per port if we have MSI-X, otherwise
804 *	just one queue set per port.
805 */
806static int
807setup_sge_qsets(adapter_t *sc)
808{
809	int i, j, err, irq_idx = 0, qset_idx = 0;
810	u_int ntxq = SGE_TXQ_PER_SET;
811
812	if ((err = t3_sge_alloc(sc)) != 0) {
813		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
814		return (err);
815	}
816
817	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
818		irq_idx = -1;
819
820	for (i = 0; i < (sc)->params.nports; i++) {
821		struct port_info *pi = &sc->port[i];
822
823		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
824			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
825			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
826			    &sc->params.sge.qset[qset_idx], ntxq, pi);
827			if (err) {
828				t3_free_sge_resources(sc);
829				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n",
830				    err);
831				return (err);
832			}
833		}
834	}
835
836	return (0);
837}
838
839static void
840cxgb_teardown_interrupts(adapter_t *sc)
841{
842	int i;
843
844	for (i = 0; i < SGE_QSETS; i++) {
845		if (sc->msix_intr_tag[i] == NULL) {
846
847			/* Should have been setup fully or not at all */
848			KASSERT(sc->msix_irq_res[i] == NULL &&
849				sc->msix_irq_rid[i] == 0,
850				("%s: half-done interrupt (%d).", __func__, i));
851
852			continue;
853		}
854
855		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
856				  sc->msix_intr_tag[i]);
857		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
858				     sc->msix_irq_res[i]);
859
860		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
861		sc->msix_irq_rid[i] = 0;
862	}
863
864	if (sc->intr_tag) {
865		KASSERT(sc->irq_res != NULL,
866			("%s: half-done interrupt.", __func__));
867
868		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
869		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
870				     sc->irq_res);
871
872		sc->irq_res = sc->intr_tag = NULL;
873		sc->irq_rid = 0;
874	}
875}
876
877static int
878cxgb_setup_interrupts(adapter_t *sc)
879{
880	struct resource *res;
881	void *tag;
882	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
883
884	sc->irq_rid = intr_flag ? 1 : 0;
885	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
886					     RF_SHAREABLE | RF_ACTIVE);
887	if (sc->irq_res == NULL) {
888		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
889			      intr_flag, sc->irq_rid);
890		err = EINVAL;
891		sc->irq_rid = 0;
892	} else {
893		err = bus_setup_intr(sc->dev, sc->irq_res,
894				     INTR_MPSAFE | INTR_TYPE_NET,
895#ifdef INTR_FILTERS
896				     NULL,
897#endif
898				     sc->cxgb_intr, sc, &sc->intr_tag);
899
900		if (err) {
901			device_printf(sc->dev,
902				      "Cannot set up interrupt (%x, %u, %d)\n",
903				      intr_flag, sc->irq_rid, err);
904			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
905					     sc->irq_res);
906			sc->irq_res = sc->intr_tag = NULL;
907			sc->irq_rid = 0;
908		}
909	}
910
911	/* That's all for INTx or MSI */
912	if (!(intr_flag & USING_MSIX) || err)
913		return (err);
914
915	for (i = 0; i < sc->msi_count - 1; i++) {
916		rid = i + 2;
917		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
918					     RF_SHAREABLE | RF_ACTIVE);
919		if (res == NULL) {
920			device_printf(sc->dev, "Cannot allocate interrupt "
921				      "for message %d\n", rid);
922			err = EINVAL;
923			break;
924		}
925
926		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
927#ifdef INTR_FILTERS
928				     NULL,
929#endif
930				     t3_intr_msix, &sc->sge.qs[i], &tag);
931		if (err) {
932			device_printf(sc->dev, "Cannot set up interrupt "
933				      "for message %d (%d)\n", rid, err);
934			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
935			break;
936		}
937
938		sc->msix_irq_rid[i] = rid;
939		sc->msix_irq_res[i] = res;
940		sc->msix_intr_tag[i] = tag;
941	}
942
943	if (err)
944		cxgb_teardown_interrupts(sc);
945
946	return (err);
947}
948
949
950static int
951cxgb_port_probe(device_t dev)
952{
953	struct port_info *p;
954	char buf[80];
955	const char *desc;
956
957	p = device_get_softc(dev);
958	desc = p->phy.desc;
959	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
960	device_set_desc_copy(dev, buf);
961	return (0);
962}
963
964
965static int
966cxgb_makedev(struct port_info *pi)
967{
968
969	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
970	    UID_ROOT, GID_WHEEL, 0600, if_name(pi->ifp));
971
972	if (pi->port_cdev == NULL)
973		return (ENOMEM);
974
975	pi->port_cdev->si_drv1 = (void *)pi;
976
977	return (0);
978}
979
980#ifndef LRO_SUPPORTED
981#ifdef IFCAP_LRO
982#undef IFCAP_LRO
983#endif
984#define IFCAP_LRO 0x0
985#endif
986
987#ifdef TSO_SUPPORTED
988#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO)
989/* Don't enable TSO6 yet */
990#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU | IFCAP_LRO)
991#else
992#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_JUMBO_MTU)
993/* Don't enable TSO6 yet */
994#define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM |  IFCAP_JUMBO_MTU)
995#define IFCAP_TSO4 0x0
996#define IFCAP_TSO6 0x0
997#define CSUM_TSO   0x0
998#endif
999
1000
1001static int
1002cxgb_port_attach(device_t dev)
1003{
1004	struct port_info *p;
1005	struct ifnet *ifp;
1006	int err, media_flags;
1007	struct adapter *sc;
1008
1009
1010	p = device_get_softc(dev);
1011	sc = p->adapter;
1012	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1013	    device_get_unit(device_get_parent(dev)), p->port_id);
1014	PORT_LOCK_INIT(p, p->lockbuf);
1015
1016	/* Allocate an ifnet object and set it up */
1017	ifp = p->ifp = if_alloc(IFT_ETHER);
1018	if (ifp == NULL) {
1019		device_printf(dev, "Cannot allocate ifnet\n");
1020		return (ENOMEM);
1021	}
1022
1023	/*
1024	 * Note that there is currently no watchdog timer.
1025	 */
1026	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1027	ifp->if_init = cxgb_init;
1028	ifp->if_softc = p;
1029	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1030	ifp->if_ioctl = cxgb_ioctl;
1031	ifp->if_start = cxgb_start;
1032
1033
1034	ifp->if_timer = 0;	/* Disable ifnet watchdog */
1035	ifp->if_watchdog = NULL;
1036
1037	ifp->if_snd.ifq_drv_maxlen = cxgb_snd_queue_len;
1038	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
1039	IFQ_SET_READY(&ifp->if_snd);
1040
1041	ifp->if_hwassist = ifp->if_capabilities = ifp->if_capenable = 0;
1042	ifp->if_capabilities |= CXGB_CAP;
1043	ifp->if_capenable |= CXGB_CAP_ENABLE;
1044	ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO);
1045	/*
1046	 * disable TSO on 4-port - it isn't supported by the firmware yet
1047	 */
1048	if (p->adapter->params.nports > 2) {
1049		ifp->if_capabilities &= ~(IFCAP_TSO4 | IFCAP_TSO6);
1050		ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_TSO6);
1051		ifp->if_hwassist &= ~CSUM_TSO;
1052	}
1053
1054	ether_ifattach(ifp, p->hw_addr);
1055
1056#ifdef IFNET_MULTIQUEUE
1057	ifp->if_transmit = cxgb_pcpu_transmit;
1058#endif
1059	/*
1060	 * Only default to jumbo frames on 10GigE
1061	 */
1062	if (p->adapter->params.nports <= 2)
1063		ifp->if_mtu = ETHERMTU_JUMBO;
1064	if ((err = cxgb_makedev(p)) != 0) {
1065		printf("makedev failed %d\n", err);
1066		return (err);
1067	}
1068	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1069	    cxgb_media_status);
1070
1071	if (!strcmp(p->phy.desc,	"10GBASE-CX4")) {
1072		media_flags = IFM_ETHER | IFM_10G_CX4 | IFM_FDX;
1073	} else if (!strcmp(p->phy.desc, "10GBASE-SR")) {
1074		media_flags = IFM_ETHER | IFM_10G_SR | IFM_FDX;
1075	} else if (!strcmp(p->phy.desc, "10GBASE-R")) {
1076		media_flags = cxgb_ifm_type(p->phy.modtype);
1077	} else if (!strcmp(p->phy.desc, "10/100/1000BASE-T")) {
1078		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T, 0, NULL);
1079		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T | IFM_FDX,
1080			    0, NULL);
1081		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX,
1082			    0, NULL);
1083		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
1084			    0, NULL);
1085		ifmedia_add(&p->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
1086			    0, NULL);
1087		media_flags = 0;
1088	} else if (!strcmp(p->phy.desc, "1000BASE-X")) {
1089		/*
1090		 * XXX: This is not very accurate.  Fix when common code
1091		 * returns more specific value - eg 1000BASE-SX, LX, etc.
1092		 *
1093		 * XXX: In the meantime, don't lie. Consider setting IFM_AUTO
1094		 * instead of SX.
1095		 */
1096		media_flags = IFM_ETHER | IFM_1000_SX | IFM_FDX;
1097	} else {
1098	        printf("unsupported media type %s\n", p->phy.desc);
1099		return (ENXIO);
1100	}
1101	if (media_flags) {
1102		/*
1103		 * Note the modtype on which we based our flags.  If modtype
1104		 * changes, we'll redo the ifmedia for this ifp.  modtype may
1105		 * change when transceivers are plugged in/out, and in other
1106		 * situations.
1107		 */
1108		ifmedia_add(&p->media, media_flags, p->phy.modtype, NULL);
1109		ifmedia_set(&p->media, media_flags);
1110	} else {
1111		ifmedia_add(&p->media, IFM_ETHER | IFM_AUTO, 0, NULL);
1112		ifmedia_set(&p->media, IFM_ETHER | IFM_AUTO);
1113	}
1114
1115	/* Get the latest mac address, User can use a LAA */
1116	bcopy(IF_LLADDR(p->ifp), p->hw_addr, ETHER_ADDR_LEN);
1117	t3_sge_init_port(p);
1118
1119#if defined(LINK_ATTACH)
1120	cxgb_link_start(p);
1121	t3_link_changed(sc, p->port_id);
1122#endif
1123
1124	return (err);
1125}
1126
1127/*
1128 * cxgb_port_detach() is called via the device_detach methods when
1129 * cxgb_free() calls the bus_generic_detach.  It is responsible for
1130 * removing the device from the view of the kernel, i.e. from all
1131 * interfaces lists etc.  This routine is only called when the driver is
1132 * being unloaded, not when the link goes down.
1133 *
1134 */
1135static int
1136cxgb_port_detach(device_t dev)
1137{
1138	struct port_info *p;
1139	struct adapter *sc;
1140
1141	p = device_get_softc(dev);
1142	sc = p->adapter;
1143
1144	if (p->port_cdev != NULL)
1145		destroy_dev(p->port_cdev);
1146
1147	ether_ifdetach(p->ifp);
1148
1149	PORT_LOCK(p);
1150	if (p->ifp->if_drv_flags & IFF_DRV_RUNNING)
1151		cxgb_stop_locked(p);
1152	PORT_UNLOCK(p);
1153
1154	callout_drain(&sc->cxgb_tick_ch);
1155	callout_drain(&sc->sge_timer_ch);
1156
1157	if (sc->tq != NULL) {
1158		printf("draining slow intr\n");
1159
1160		taskqueue_drain(sc->tq, &sc->slow_intr_task);
1161			printf("draining ext intr\n");
1162		taskqueue_drain(sc->tq, &sc->ext_intr_task);
1163		printf("draining tick task\n");
1164		taskqueue_drain(sc->tq, &sc->tick_task);
1165	}
1166
1167	/*
1168	 * the lock may be acquired in ifdetach
1169	 */
1170	PORT_LOCK_DEINIT(p);
1171	if_free(p->ifp);
1172
1173	return (0);
1174}
1175
1176void
1177t3_fatal_err(struct adapter *sc)
1178{
1179	u_int fw_status[4];
1180
1181	if (sc->flags & FULL_INIT_DONE) {
1182		t3_sge_stop(sc);
1183		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1184		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1185		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1186		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1187		t3_intr_disable(sc);
1188	}
1189	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1190	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1191		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1192		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1193}
1194
1195int
1196t3_os_find_pci_capability(adapter_t *sc, int cap)
1197{
1198	device_t dev;
1199	struct pci_devinfo *dinfo;
1200	pcicfgregs *cfg;
1201	uint32_t status;
1202	uint8_t ptr;
1203
1204	dev = sc->dev;
1205	dinfo = device_get_ivars(dev);
1206	cfg = &dinfo->cfg;
1207
1208	status = pci_read_config(dev, PCIR_STATUS, 2);
1209	if (!(status & PCIM_STATUS_CAPPRESENT))
1210		return (0);
1211
1212	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1213	case 0:
1214	case 1:
1215		ptr = PCIR_CAP_PTR;
1216		break;
1217	case 2:
1218		ptr = PCIR_CAP_PTR_2;
1219		break;
1220	default:
1221		return (0);
1222		break;
1223	}
1224	ptr = pci_read_config(dev, ptr, 1);
1225
1226	while (ptr != 0) {
1227		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1228			return (ptr);
1229		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1230	}
1231
1232	return (0);
1233}
1234
1235int
1236t3_os_pci_save_state(struct adapter *sc)
1237{
1238	device_t dev;
1239	struct pci_devinfo *dinfo;
1240
1241	dev = sc->dev;
1242	dinfo = device_get_ivars(dev);
1243
1244	pci_cfg_save(dev, dinfo, 0);
1245	return (0);
1246}
1247
1248int
1249t3_os_pci_restore_state(struct adapter *sc)
1250{
1251	device_t dev;
1252	struct pci_devinfo *dinfo;
1253
1254	dev = sc->dev;
1255	dinfo = device_get_ivars(dev);
1256
1257	pci_cfg_restore(dev, dinfo);
1258	return (0);
1259}
1260
1261/**
1262 *	t3_os_link_changed - handle link status changes
1263 *	@adapter: the adapter associated with the link change
1264 *	@port_id: the port index whose limk status has changed
1265 *	@link_status: the new status of the link
1266 *	@speed: the new speed setting
1267 *	@duplex: the new duplex setting
1268 *	@fc: the new flow-control setting
1269 *
1270 *	This is the OS-dependent handler for link status changes.  The OS
1271 *	neutral handler takes care of most of the processing for these events,
1272 *	then calls this handler for any OS-specific processing.
1273 */
1274void
1275t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1276     int duplex, int fc)
1277{
1278	struct port_info *pi = &adapter->port[port_id];
1279
1280	if (link_status) {
1281		pi->ifp->if_baudrate = IF_Mbps(speed);
1282		if_link_state_change(pi->ifp, LINK_STATE_UP);
1283	} else
1284		if_link_state_change(pi->ifp, LINK_STATE_DOWN);
1285}
1286
1287/**
1288 *	t3_os_phymod_changed - handle PHY module changes
1289 *	@phy: the PHY reporting the module change
1290 *	@mod_type: new module type
1291 *
1292 *	This is the OS-dependent handler for PHY module changes.  It is
1293 *	invoked when a PHY module is removed or inserted for any OS-specific
1294 *	processing.
1295 */
1296void t3_os_phymod_changed(struct adapter *adap, int port_id)
1297{
1298	static const char *mod_str[] = {
1299		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX", "unknown"
1300	};
1301
1302	struct port_info *pi = &adap->port[port_id];
1303
1304	if (pi->phy.modtype == phy_modtype_none)
1305		device_printf(adap->dev, "PHY module unplugged\n");
1306	else {
1307		KASSERT(pi->phy.modtype < ARRAY_SIZE(mod_str),
1308		    ("invalid PHY module type %d", pi->phy.modtype));
1309		device_printf(adap->dev, "%s PHY module inserted\n",
1310		    mod_str[pi->phy.modtype]);
1311	}
1312}
1313
1314/*
1315 * Interrupt-context handler for external (PHY) interrupts.
1316 */
1317void
1318t3_os_ext_intr_handler(adapter_t *sc)
1319{
1320	if (cxgb_debug)
1321		printf("t3_os_ext_intr_handler\n");
1322	/*
1323	 * Schedule a task to handle external interrupts as they may be slow
1324	 * and we use a mutex to protect MDIO registers.  We disable PHY
1325	 * interrupts in the meantime and let the task reenable them when
1326	 * it's done.
1327	 */
1328	ADAPTER_LOCK(sc);
1329	if (sc->slow_intr_mask) {
1330		sc->slow_intr_mask &= ~F_T3DBG;
1331		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1332		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
1333	}
1334	ADAPTER_UNLOCK(sc);
1335}
1336
1337void
1338t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1339{
1340
1341	/*
1342	 * The ifnet might not be allocated before this gets called,
1343	 * as this is called early on in attach by t3_prep_adapter
1344	 * save the address off in the port structure
1345	 */
1346	if (cxgb_debug)
1347		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1348	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1349}
1350
1351/**
1352 *	link_start - enable a port
1353 *	@p: the port to enable
1354 *
1355 *	Performs the MAC and PHY actions needed to enable a port.
1356 */
1357static void
1358cxgb_link_start(struct port_info *p)
1359{
1360	struct ifnet *ifp;
1361	struct t3_rx_mode rm;
1362	struct cmac *mac = &p->mac;
1363	int mtu, hwtagging;
1364
1365	ifp = p->ifp;
1366
1367	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1368
1369	mtu = ifp->if_mtu;
1370	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1371		mtu += ETHER_VLAN_ENCAP_LEN;
1372
1373	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1374
1375	t3_init_rx_mode(&rm, p);
1376	if (!mac->multiport)
1377		t3_mac_reset(mac);
1378	t3_mac_set_mtu(mac, mtu);
1379	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1380	t3_mac_set_address(mac, 0, p->hw_addr);
1381	t3_mac_set_rx_mode(mac, &rm);
1382	t3_link_start(&p->phy, mac, &p->link_config);
1383	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1384}
1385
1386
1387static int
1388await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1389			      unsigned long n)
1390{
1391	int attempts = 5;
1392
1393	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1394		if (!--attempts)
1395			return (ETIMEDOUT);
1396		t3_os_sleep(10);
1397	}
1398	return 0;
1399}
1400
1401static int
1402init_tp_parity(struct adapter *adap)
1403{
1404	int i;
1405	struct mbuf *m;
1406	struct cpl_set_tcb_field *greq;
1407	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1408
1409	t3_tp_set_offload_mode(adap, 1);
1410
1411	for (i = 0; i < 16; i++) {
1412		struct cpl_smt_write_req *req;
1413
1414		m = m_gethdr(M_WAITOK, MT_DATA);
1415		req = mtod(m, struct cpl_smt_write_req *);
1416		m->m_len = m->m_pkthdr.len = sizeof(*req);
1417		memset(req, 0, sizeof(*req));
1418		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1419		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1420		req->iff = i;
1421		t3_mgmt_tx(adap, m);
1422	}
1423
1424	for (i = 0; i < 2048; i++) {
1425		struct cpl_l2t_write_req *req;
1426
1427		m = m_gethdr(M_WAITOK, MT_DATA);
1428		req = mtod(m, struct cpl_l2t_write_req *);
1429		m->m_len = m->m_pkthdr.len = sizeof(*req);
1430		memset(req, 0, sizeof(*req));
1431		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1432		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1433		req->params = htonl(V_L2T_W_IDX(i));
1434		t3_mgmt_tx(adap, m);
1435	}
1436
1437	for (i = 0; i < 2048; i++) {
1438		struct cpl_rte_write_req *req;
1439
1440		m = m_gethdr(M_WAITOK, MT_DATA);
1441		req = mtod(m, struct cpl_rte_write_req *);
1442		m->m_len = m->m_pkthdr.len = sizeof(*req);
1443		memset(req, 0, sizeof(*req));
1444		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1445		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1446		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1447		t3_mgmt_tx(adap, m);
1448	}
1449
1450	m = m_gethdr(M_WAITOK, MT_DATA);
1451	greq = mtod(m, struct cpl_set_tcb_field *);
1452	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1453	memset(greq, 0, sizeof(*greq));
1454	greq->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1455	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1456	greq->mask = htobe64(1);
1457	t3_mgmt_tx(adap, m);
1458
1459	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1460	t3_tp_set_offload_mode(adap, 0);
1461	return (i);
1462}
1463
1464/**
1465 *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1466 *	@adap: the adapter
1467 *
1468 *	Sets up RSS to distribute packets to multiple receive queues.  We
1469 *	configure the RSS CPU lookup table to distribute to the number of HW
1470 *	receive queues, and the response queue lookup table to narrow that
1471 *	down to the response queues actually configured for each port.
1472 *	We always configure the RSS mapping for two ports since the mapping
1473 *	table has plenty of entries.
1474 */
1475static void
1476setup_rss(adapter_t *adap)
1477{
1478	int i;
1479	u_int nq[2];
1480	uint8_t cpus[SGE_QSETS + 1];
1481	uint16_t rspq_map[RSS_TABLE_SIZE];
1482
1483	for (i = 0; i < SGE_QSETS; ++i)
1484		cpus[i] = i;
1485	cpus[SGE_QSETS] = 0xff;
1486
1487	nq[0] = nq[1] = 0;
1488	for_each_port(adap, i) {
1489		const struct port_info *pi = adap2pinfo(adap, i);
1490
1491		nq[pi->tx_chan] += pi->nqsets;
1492	}
1493	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1494		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1495		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1496	}
1497	/* Calculate the reverse RSS map table */
1498	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1499		if (adap->rrss_map[rspq_map[i]] == 0xff)
1500			adap->rrss_map[rspq_map[i]] = i;
1501
1502	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1503		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1504	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1505	              cpus, rspq_map);
1506
1507}
1508
1509/*
1510 * Sends an mbuf to an offload queue driver
1511 * after dealing with any active network taps.
1512 */
1513static inline int
1514offload_tx(struct t3cdev *tdev, struct mbuf *m)
1515{
1516	int ret;
1517
1518	ret = t3_offload_tx(tdev, m);
1519	return (ret);
1520}
1521
1522static int
1523write_smt_entry(struct adapter *adapter, int idx)
1524{
1525	struct port_info *pi = &adapter->port[idx];
1526	struct cpl_smt_write_req *req;
1527	struct mbuf *m;
1528
1529	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1530		return (ENOMEM);
1531
1532	req = mtod(m, struct cpl_smt_write_req *);
1533	m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1534
1535	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1536	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1537	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1538	req->iff = idx;
1539	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1540	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1541
1542	m_set_priority(m, 1);
1543
1544	offload_tx(&adapter->tdev, m);
1545
1546	return (0);
1547}
1548
1549static int
1550init_smt(struct adapter *adapter)
1551{
1552	int i;
1553
1554	for_each_port(adapter, i)
1555		write_smt_entry(adapter, i);
1556	return 0;
1557}
1558
1559static void
1560init_port_mtus(adapter_t *adapter)
1561{
1562	unsigned int mtus = adapter->port[0].ifp->if_mtu;
1563
1564	if (adapter->port[1].ifp)
1565		mtus |= adapter->port[1].ifp->if_mtu << 16;
1566	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1567}
1568
1569static void
1570send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1571			      int hi, int port)
1572{
1573	struct mbuf *m;
1574	struct mngt_pktsched_wr *req;
1575
1576	m = m_gethdr(M_DONTWAIT, MT_DATA);
1577	if (m) {
1578		req = mtod(m, struct mngt_pktsched_wr *);
1579		req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1580		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1581		req->sched = sched;
1582		req->idx = qidx;
1583		req->min = lo;
1584		req->max = hi;
1585		req->binding = port;
1586		m->m_len = m->m_pkthdr.len = sizeof(*req);
1587		t3_mgmt_tx(adap, m);
1588	}
1589}
1590
1591static void
1592bind_qsets(adapter_t *sc)
1593{
1594	int i, j;
1595
1596	cxgb_pcpu_startup_threads(sc);
1597	for (i = 0; i < (sc)->params.nports; ++i) {
1598		const struct port_info *pi = adap2pinfo(sc, i);
1599
1600		for (j = 0; j < pi->nqsets; ++j) {
1601			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1602					  -1, pi->tx_chan);
1603
1604		}
1605	}
1606}
1607
1608static void
1609update_tpeeprom(struct adapter *adap)
1610{
1611#ifdef FIRMWARE_LATEST
1612	const struct firmware *tpeeprom;
1613#else
1614	struct firmware *tpeeprom;
1615#endif
1616
1617	uint32_t version;
1618	unsigned int major, minor;
1619	int ret, len;
1620	char rev, name[32];
1621
1622	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1623
1624	major = G_TP_VERSION_MAJOR(version);
1625	minor = G_TP_VERSION_MINOR(version);
1626	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1627		return;
1628
1629	rev = t3rev2char(adap);
1630	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1631
1632	tpeeprom = firmware_get(name);
1633	if (tpeeprom == NULL) {
1634		device_printf(adap->dev,
1635			      "could not load TP EEPROM: unable to load %s\n",
1636			      name);
1637		return;
1638	}
1639
1640	len = tpeeprom->datasize - 4;
1641
1642	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1643	if (ret)
1644		goto release_tpeeprom;
1645
1646	if (len != TP_SRAM_LEN) {
1647		device_printf(adap->dev,
1648			      "%s length is wrong len=%d expected=%d\n", name,
1649			      len, TP_SRAM_LEN);
1650		return;
1651	}
1652
1653	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1654	    TP_SRAM_OFFSET);
1655
1656	if (!ret) {
1657		device_printf(adap->dev,
1658			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1659			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1660	} else
1661		device_printf(adap->dev,
1662			      "Protocol SRAM image update in EEPROM failed\n");
1663
1664release_tpeeprom:
1665	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1666
1667	return;
1668}
1669
1670static int
1671update_tpsram(struct adapter *adap)
1672{
1673#ifdef FIRMWARE_LATEST
1674	const struct firmware *tpsram;
1675#else
1676	struct firmware *tpsram;
1677#endif
1678	int ret;
1679	char rev, name[32];
1680
1681	rev = t3rev2char(adap);
1682	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1683
1684	update_tpeeprom(adap);
1685
1686	tpsram = firmware_get(name);
1687	if (tpsram == NULL){
1688		device_printf(adap->dev, "could not load TP SRAM\n");
1689		return (EINVAL);
1690	} else
1691		device_printf(adap->dev, "updating TP SRAM\n");
1692
1693	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1694	if (ret)
1695		goto release_tpsram;
1696
1697	ret = t3_set_proto_sram(adap, tpsram->data);
1698	if (ret)
1699		device_printf(adap->dev, "loading protocol SRAM failed\n");
1700
1701release_tpsram:
1702	firmware_put(tpsram, FIRMWARE_UNLOAD);
1703
1704	return ret;
1705}
1706
1707/**
1708 *	cxgb_up - enable the adapter
1709 *	@adap: adapter being enabled
1710 *
1711 *	Called when the first port is enabled, this function performs the
1712 *	actions necessary to make an adapter operational, such as completing
1713 *	the initialization of HW modules, and enabling interrupts.
1714 */
1715static int
1716cxgb_up(struct adapter *sc)
1717{
1718	int err = 0;
1719
1720	if ((sc->flags & FULL_INIT_DONE) == 0) {
1721
1722		if ((sc->flags & FW_UPTODATE) == 0)
1723			if ((err = upgrade_fw(sc)))
1724				goto out;
1725		if ((sc->flags & TPS_UPTODATE) == 0)
1726			if ((err = update_tpsram(sc)))
1727				goto out;
1728		err = t3_init_hw(sc, 0);
1729		if (err)
1730			goto out;
1731
1732		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1733		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1734
1735		err = setup_sge_qsets(sc);
1736		if (err)
1737			goto out;
1738
1739		setup_rss(sc);
1740
1741		t3_intr_clear(sc);
1742		err = cxgb_setup_interrupts(sc);
1743		if (err)
1744			goto out;
1745
1746		t3_add_configured_sysctls(sc);
1747		sc->flags |= FULL_INIT_DONE;
1748	}
1749
1750	t3_intr_clear(sc);
1751	t3_sge_start(sc);
1752	t3_intr_enable(sc);
1753
1754	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1755	    is_offload(sc) && init_tp_parity(sc) == 0)
1756		sc->flags |= TP_PARITY_INIT;
1757
1758	if (sc->flags & TP_PARITY_INIT) {
1759		t3_write_reg(sc, A_TP_INT_CAUSE,
1760				F_CMCACHEPERR | F_ARPLUTPERR);
1761		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1762	}
1763
1764
1765	if (!(sc->flags & QUEUES_BOUND)) {
1766		bind_qsets(sc);
1767		sc->flags |= QUEUES_BOUND;
1768	}
1769out:
1770	return (err);
1771}
1772
1773
1774/*
1775 * Bring down the interface but do not free any resources.
1776 */
1777static void
1778cxgb_down_locked(struct adapter *sc)
1779{
1780
1781	t3_sge_stop(sc);
1782	t3_intr_disable(sc);
1783
1784	callout_stop(&sc->cxgb_tick_ch);
1785	callout_stop(&sc->sge_timer_ch);
1786}
1787
1788static int
1789offload_open(struct port_info *pi)
1790{
1791	struct adapter *adapter = pi->adapter;
1792	struct t3cdev *tdev = &adapter->tdev;
1793
1794	int adap_up = adapter->open_device_map & PORT_MASK;
1795	int err = 0;
1796
1797	if (atomic_cmpset_int(&adapter->open_device_map,
1798		(adapter->open_device_map & ~(1<<OFFLOAD_DEVMAP_BIT)),
1799		(adapter->open_device_map | (1<<OFFLOAD_DEVMAP_BIT))) == 0)
1800		return (0);
1801
1802	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1803		printf("offload_open: DEVMAP_BIT did not get set 0x%x\n",
1804		    adapter->open_device_map);
1805	ADAPTER_LOCK(pi->adapter);
1806	if (!adap_up)
1807		err = cxgb_up(adapter);
1808	ADAPTER_UNLOCK(pi->adapter);
1809	if (err)
1810		return (err);
1811
1812	t3_tp_set_offload_mode(adapter, 1);
1813	tdev->lldev = pi->ifp;
1814
1815	init_port_mtus(adapter);
1816	t3_load_mtus(adapter, adapter->params.mtus, adapter->params.a_wnd,
1817		     adapter->params.b_wnd,
1818		     adapter->params.rev == 0 ?
1819		       adapter->port[0].ifp->if_mtu : 0xffff);
1820	init_smt(adapter);
1821	/* Call back all registered clients */
1822	cxgb_add_clients(tdev);
1823
1824	/* restore them in case the offload module has changed them */
1825	if (err) {
1826		t3_tp_set_offload_mode(adapter, 0);
1827		clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1828		cxgb_set_dummy_ops(tdev);
1829	}
1830	return (err);
1831}
1832
1833static int
1834offload_close(struct t3cdev *tdev)
1835{
1836	struct adapter *adapter = tdev2adap(tdev);
1837
1838	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1839		return (0);
1840
1841	/* Call back all registered clients */
1842	cxgb_remove_clients(tdev);
1843
1844	tdev->lldev = NULL;
1845	cxgb_set_dummy_ops(tdev);
1846	t3_tp_set_offload_mode(adapter, 0);
1847	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1848
1849	ADAPTER_LOCK(adapter);
1850	if (!adapter->open_device_map)
1851		cxgb_down_locked(adapter);
1852
1853	ADAPTER_UNLOCK(adapter);
1854
1855	return (0);
1856}
1857
1858
1859static void
1860cxgb_init(void *arg)
1861{
1862	struct port_info *p = arg;
1863
1864	PORT_LOCK(p);
1865	cxgb_init_locked(p);
1866	PORT_UNLOCK(p);
1867}
1868
1869static void
1870cxgb_init_locked(struct port_info *p)
1871{
1872	struct ifnet *ifp;
1873	adapter_t *sc = p->adapter;
1874	int err;
1875
1876	PORT_LOCK_ASSERT_OWNED(p);
1877	ifp = p->ifp;
1878
1879	ADAPTER_LOCK(p->adapter);
1880	if ((sc->open_device_map == 0) && (err = cxgb_up(sc))) {
1881		ADAPTER_UNLOCK(p->adapter);
1882		cxgb_stop_locked(p);
1883		return;
1884	}
1885	if (p->adapter->open_device_map == 0) {
1886		t3_intr_clear(sc);
1887	}
1888	setbit(&p->adapter->open_device_map, p->port_id);
1889	ADAPTER_UNLOCK(p->adapter);
1890
1891	if (is_offload(sc) && !ofld_disable) {
1892		err = offload_open(p);
1893		if (err)
1894			log(LOG_WARNING,
1895			    "Could not initialize offload capabilities\n");
1896	}
1897
1898	device_printf(sc->dev, "enabling interrupts on port=%d\n", p->port_id);
1899	t3_port_intr_enable(sc, p->port_id);
1900
1901#if !defined(LINK_ATTACH)
1902	cxgb_link_start(p);
1903	t3_link_changed(sc, p->port_id);
1904#endif
1905	ifp->if_baudrate = IF_Mbps(p->link_config.speed);
1906
1907 	callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
1908	t3_sge_reset_adapter(sc);
1909
1910	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1911	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1912}
1913
1914static void
1915cxgb_set_rxmode(struct port_info *p)
1916{
1917	struct t3_rx_mode rm;
1918	struct cmac *mac = &p->mac;
1919
1920	t3_init_rx_mode(&rm, p);
1921	mtx_lock(&p->adapter->mdio_lock);
1922	t3_mac_set_rx_mode(mac, &rm);
1923	mtx_unlock(&p->adapter->mdio_lock);
1924}
1925
1926static void
1927cxgb_stop_locked(struct port_info *pi)
1928{
1929	struct ifnet *ifp;
1930
1931	PORT_LOCK_ASSERT_OWNED(pi);
1932	ADAPTER_LOCK_ASSERT_NOTOWNED(pi->adapter);
1933
1934	ifp = pi->ifp;
1935	t3_port_intr_disable(pi->adapter, pi->port_id);
1936	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1937
1938	/* disable pause frames */
1939	t3_set_reg_field(pi->adapter, A_XGM_TX_CFG + pi->mac.offset,
1940			 F_TXPAUSEEN, 0);
1941
1942	/* Reset RX FIFO HWM */
1943        t3_set_reg_field(pi->adapter, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1944			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1945
1946
1947	ADAPTER_LOCK(pi->adapter);
1948	clrbit(&pi->adapter->open_device_map, pi->port_id);
1949
1950	if (pi->adapter->open_device_map == 0)
1951		cxgb_down_locked(pi->adapter);
1952
1953	ADAPTER_UNLOCK(pi->adapter);
1954
1955#if !defined(LINK_ATTACH)
1956	DELAY(100);
1957
1958	/* Wait for TXFIFO empty */
1959	t3_wait_op_done(pi->adapter, A_XGM_TXFIFO_CFG + pi->mac.offset,
1960			F_TXFIFO_EMPTY, 1, 20, 5);
1961
1962	DELAY(100);
1963	t3_mac_disable(&pi->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
1964
1965	pi->phy.ops->power_down(&pi->phy, 1);
1966#endif
1967
1968}
1969
1970static int
1971cxgb_set_mtu(struct port_info *p, int mtu)
1972{
1973	struct ifnet *ifp = p->ifp;
1974	int error = 0;
1975
1976	if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO))
1977		error = EINVAL;
1978	else if (ifp->if_mtu != mtu) {
1979		PORT_LOCK(p);
1980		ifp->if_mtu = mtu;
1981		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1982			cxgb_stop_locked(p);
1983			cxgb_init_locked(p);
1984		}
1985		PORT_UNLOCK(p);
1986	}
1987	return (error);
1988}
1989
1990#ifdef LRO_SUPPORTED
1991/*
1992 * Mark lro enabled or disabled in all qsets for this port
1993 */
1994static int
1995cxgb_set_lro(struct port_info *p, int enabled)
1996{
1997	int i;
1998	struct adapter *adp = p->adapter;
1999	struct sge_qset *q;
2000
2001	PORT_LOCK_ASSERT_OWNED(p);
2002	for (i = 0; i < p->nqsets; i++) {
2003		q = &adp->sge.qs[p->first_qset + i];
2004		q->lro.enabled = (enabled != 0);
2005	}
2006	return (0);
2007}
2008#endif
2009
2010static int
2011cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
2012{
2013	struct port_info *p = ifp->if_softc;
2014#ifdef INET
2015	struct ifaddr *ifa = (struct ifaddr *)data;
2016#endif
2017	struct ifreq *ifr = (struct ifreq *)data;
2018	int flags, error = 0, reinit = 0;
2019	uint32_t mask;
2020
2021	/*
2022	 * XXX need to check that we aren't in the middle of an unload
2023	 */
2024	switch (command) {
2025	case SIOCSIFMTU:
2026		error = cxgb_set_mtu(p, ifr->ifr_mtu);
2027		break;
2028	case SIOCSIFADDR:
2029#ifdef INET
2030		if (ifa->ifa_addr->sa_family == AF_INET) {
2031			ifp->if_flags |= IFF_UP;
2032			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
2033				PORT_LOCK(p);
2034				cxgb_init_locked(p);
2035				PORT_UNLOCK(p);
2036			}
2037			arp_ifinit(ifp, ifa);
2038		} else
2039#endif
2040			error = ether_ioctl(ifp, command, data);
2041		break;
2042	case SIOCSIFFLAGS:
2043		PORT_LOCK(p);
2044		if (ifp->if_flags & IFF_UP) {
2045			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2046				flags = p->if_flags;
2047				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
2048				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI))
2049					cxgb_set_rxmode(p);
2050			} else
2051				cxgb_init_locked(p);
2052			p->if_flags = ifp->if_flags;
2053		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2054			cxgb_stop_locked(p);
2055
2056		PORT_UNLOCK(p);
2057		break;
2058	case SIOCADDMULTI:
2059	case SIOCDELMULTI:
2060		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2061			cxgb_set_rxmode(p);
2062		}
2063		break;
2064	case SIOCSIFMEDIA:
2065	case SIOCGIFMEDIA:
2066		PORT_LOCK(p);
2067		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2068		PORT_UNLOCK(p);
2069		break;
2070	case SIOCSIFCAP:
2071		PORT_LOCK(p);
2072		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2073		if (mask & IFCAP_TXCSUM) {
2074			if (IFCAP_TXCSUM & ifp->if_capenable) {
2075				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
2076				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
2077				    | CSUM_IP | CSUM_TSO);
2078			} else {
2079				ifp->if_capenable |= IFCAP_TXCSUM;
2080				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP
2081				    | CSUM_IP);
2082			}
2083		}
2084		if (mask & IFCAP_RXCSUM) {
2085			ifp->if_capenable ^= IFCAP_RXCSUM;
2086		}
2087		if (mask & IFCAP_TSO4) {
2088			if (IFCAP_TSO4 & ifp->if_capenable) {
2089				ifp->if_capenable &= ~IFCAP_TSO4;
2090				ifp->if_hwassist &= ~CSUM_TSO;
2091			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
2092				ifp->if_capenable |= IFCAP_TSO4;
2093				ifp->if_hwassist |= CSUM_TSO;
2094			} else {
2095				if (cxgb_debug)
2096					printf("cxgb requires tx checksum offload"
2097					    " be enabled to use TSO\n");
2098				error = EINVAL;
2099			}
2100		}
2101#ifdef LRO_SUPPORTED
2102		if (mask & IFCAP_LRO) {
2103			ifp->if_capenable ^= IFCAP_LRO;
2104
2105			/* Safe to do this even if cxgb_up not called yet */
2106			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2107		}
2108#endif
2109		if (mask & IFCAP_VLAN_HWTAGGING) {
2110			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2111			reinit = ifp->if_drv_flags & IFF_DRV_RUNNING;
2112		}
2113		if (mask & IFCAP_VLAN_MTU) {
2114			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2115			reinit = ifp->if_drv_flags & IFF_DRV_RUNNING;
2116		}
2117		if (mask & IFCAP_VLAN_HWCSUM) {
2118			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2119		}
2120		if (reinit) {
2121			cxgb_stop_locked(p);
2122			cxgb_init_locked(p);
2123		}
2124		PORT_UNLOCK(p);
2125
2126#ifdef VLAN_CAPABILITIES
2127		VLAN_CAPABILITIES(ifp);
2128#endif
2129		break;
2130	default:
2131		error = ether_ioctl(ifp, command, data);
2132		break;
2133	}
2134	return (error);
2135}
2136
2137static int
2138cxgb_media_change(struct ifnet *ifp)
2139{
2140	if_printf(ifp, "media change not supported\n");
2141	return (ENXIO);
2142}
2143
2144/*
2145 * Translates from phy->modtype to IFM_TYPE.
2146 */
2147static int
2148cxgb_ifm_type(int phymod)
2149{
2150	int rc = IFM_ETHER | IFM_FDX;
2151
2152	switch (phymod) {
2153	case phy_modtype_sr:
2154		rc |= IFM_10G_SR;
2155		break;
2156	case phy_modtype_lr:
2157		rc |= IFM_10G_LR;
2158		break;
2159	case phy_modtype_lrm:
2160#ifdef IFM_10G_LRM
2161		rc |= IFM_10G_LRM;
2162#endif
2163		break;
2164	case phy_modtype_twinax:
2165#ifdef IFM_10G_TWINAX
2166		rc |= IFM_10G_TWINAX;
2167#endif
2168		break;
2169	case phy_modtype_twinax_long:
2170#ifdef IFM_10G_TWINAX_LONG
2171		rc |= IFM_10G_TWINAX_LONG;
2172#endif
2173		break;
2174	case phy_modtype_none:
2175		rc = IFM_ETHER | IFM_NONE;
2176		break;
2177	case phy_modtype_unknown:
2178		break;
2179	}
2180
2181	return (rc);
2182}
2183
2184static void
2185cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2186{
2187	struct port_info *p = ifp->if_softc;
2188	struct ifmedia_entry *cur = p->media.ifm_cur;
2189	int m;
2190
2191	if (cur->ifm_data != p->phy.modtype) {
2192		/* p->media about to be rebuilt, must hold lock */
2193		PORT_LOCK_ASSERT_OWNED(p);
2194
2195		m = cxgb_ifm_type(p->phy.modtype);
2196		ifmedia_removeall(&p->media);
2197		ifmedia_add(&p->media, m, p->phy.modtype, NULL);
2198		ifmedia_set(&p->media, m);
2199		cur = p->media.ifm_cur; /* ifmedia_set modified ifm_cur */
2200		ifmr->ifm_current = m;
2201	}
2202
2203	ifmr->ifm_status = IFM_AVALID;
2204	ifmr->ifm_active = IFM_ETHER;
2205
2206	if (!p->link_config.link_ok)
2207		return;
2208
2209	ifmr->ifm_status |= IFM_ACTIVE;
2210
2211	switch (p->link_config.speed) {
2212	case 10:
2213		ifmr->ifm_active |= IFM_10_T;
2214		break;
2215	case 100:
2216		ifmr->ifm_active |= IFM_100_TX;
2217			break;
2218	case 1000:
2219		ifmr->ifm_active |= IFM_1000_T;
2220		break;
2221	case 10000:
2222		ifmr->ifm_active |= IFM_SUBTYPE(cur->ifm_media);
2223		break;
2224	}
2225
2226	if (p->link_config.duplex)
2227		ifmr->ifm_active |= IFM_FDX;
2228	else
2229		ifmr->ifm_active |= IFM_HDX;
2230}
2231
2232static void
2233cxgb_async_intr(void *data)
2234{
2235	adapter_t *sc = data;
2236
2237	if (cxgb_debug)
2238		device_printf(sc->dev, "cxgb_async_intr\n");
2239	/*
2240	 * May need to sleep - defer to taskqueue
2241	 */
2242	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2243}
2244
2245static void
2246cxgb_ext_intr_handler(void *arg, int count)
2247{
2248	adapter_t *sc = (adapter_t *)arg;
2249
2250	if (cxgb_debug)
2251		printf("cxgb_ext_intr_handler\n");
2252
2253	t3_phy_intr_handler(sc);
2254
2255	/* Now reenable external interrupts */
2256	ADAPTER_LOCK(sc);
2257	if (sc->slow_intr_mask) {
2258		sc->slow_intr_mask |= F_T3DBG;
2259		t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
2260		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
2261	}
2262	ADAPTER_UNLOCK(sc);
2263}
2264
2265static void
2266check_link_status(adapter_t *sc)
2267{
2268	int i;
2269
2270	/* For synchronized access to open_device_map */
2271	ADAPTER_LOCK_ASSERT_OWNED(sc);
2272
2273	for (i = 0; i < (sc)->params.nports; ++i) {
2274		struct port_info *p = &sc->port[i];
2275		struct link_config *lc = &p->link_config;
2276
2277		if (!isset(&sc->open_device_map, p->port_id)) {
2278			/*
2279			 * port is down, report link down too.  Note
2280			 * that we do this for IRQ based PHYs too.
2281			 */
2282			lc->link_ok = 0;
2283			t3_os_link_changed(sc, i, lc->link_ok, lc->speed,
2284					   lc->duplex, lc->fc);
2285		} else if (p->link_fault || !(p->phy.caps & SUPPORTED_IRQ))
2286			t3_link_changed(sc, i);
2287	}
2288}
2289
2290static void
2291check_t3b2_mac(struct adapter *adapter)
2292{
2293	int i;
2294
2295	if(adapter->flags & CXGB_SHUTDOWN)
2296		return;
2297
2298	for_each_port(adapter, i) {
2299		struct port_info *p = &adapter->port[i];
2300		struct ifnet *ifp = p->ifp;
2301		int status;
2302
2303		if(adapter->flags & CXGB_SHUTDOWN)
2304			return;
2305
2306		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2307			continue;
2308
2309		status = 0;
2310		PORT_LOCK(p);
2311		if ((ifp->if_drv_flags & IFF_DRV_RUNNING))
2312			status = t3b2_mac_watchdog_task(&p->mac);
2313		if (status == 1)
2314			p->mac.stats.num_toggled++;
2315		else if (status == 2) {
2316			struct cmac *mac = &p->mac;
2317			int mtu = ifp->if_mtu;
2318
2319			if (ifp->if_capenable & IFCAP_VLAN_MTU)
2320				mtu += ETHER_VLAN_ENCAP_LEN;
2321			t3_mac_set_mtu(mac, mtu);
2322			t3_mac_set_address(mac, 0, p->hw_addr);
2323			cxgb_set_rxmode(p);
2324			t3_link_start(&p->phy, mac, &p->link_config);
2325			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2326			t3_port_intr_enable(adapter, p->port_id);
2327			p->mac.stats.num_resets++;
2328		}
2329		PORT_UNLOCK(p);
2330	}
2331}
2332
2333static void
2334cxgb_tick(void *arg)
2335{
2336	adapter_t *sc = (adapter_t *)arg;
2337
2338	if(sc->flags & CXGB_SHUTDOWN)
2339		return;
2340
2341	taskqueue_enqueue(sc->tq, &sc->tick_task);
2342	callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
2343}
2344
2345static void
2346cxgb_tick_handler(void *arg, int count)
2347{
2348	adapter_t *sc = (adapter_t *)arg;
2349	const struct adapter_params *p = &sc->params;
2350	int i;
2351	uint32_t cause, reset;
2352
2353	if(sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2354		return;
2355
2356	ADAPTER_LOCK(sc);
2357
2358	check_link_status(sc);
2359
2360	sc->check_task_cnt++;
2361
2362	/*
2363	 * adapter lock can currently only be acquired after the
2364	 * port lock
2365	 */
2366	ADAPTER_UNLOCK(sc);
2367
2368	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2369		check_t3b2_mac(sc);
2370
2371	cause = t3_read_reg(sc, A_SG_INT_CAUSE);
2372	reset = 0;
2373	if (cause & F_FLEMPTY) {
2374		struct sge_qset *qs = &sc->sge.qs[0];
2375
2376		i = 0;
2377		reset |= F_FLEMPTY;
2378
2379		cause = (t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) >>
2380			 S_FL0EMPTY) & 0xffff;
2381		while (cause) {
2382			qs->fl[i].empty += (cause & 1);
2383			if (i)
2384				qs++;
2385			i ^= 1;
2386			cause >>= 1;
2387		}
2388	}
2389	t3_write_reg(sc, A_SG_INT_CAUSE, reset);
2390
2391	for (i = 0; i < sc->params.nports; i++) {
2392		struct port_info *pi = &sc->port[i];
2393		struct ifnet *ifp = pi->ifp;
2394		struct cmac *mac = &pi->mac;
2395		struct mac_stats *mstats = &mac->stats;
2396		PORT_LOCK(pi);
2397		t3_mac_update_stats(mac);
2398		PORT_UNLOCK(pi);
2399
2400		ifp->if_opackets =
2401		    mstats->tx_frames_64 +
2402		    mstats->tx_frames_65_127 +
2403		    mstats->tx_frames_128_255 +
2404		    mstats->tx_frames_256_511 +
2405		    mstats->tx_frames_512_1023 +
2406		    mstats->tx_frames_1024_1518 +
2407		    mstats->tx_frames_1519_max;
2408
2409		ifp->if_ipackets =
2410		    mstats->rx_frames_64 +
2411		    mstats->rx_frames_65_127 +
2412		    mstats->rx_frames_128_255 +
2413		    mstats->rx_frames_256_511 +
2414		    mstats->rx_frames_512_1023 +
2415		    mstats->rx_frames_1024_1518 +
2416		    mstats->rx_frames_1519_max;
2417
2418		ifp->if_obytes = mstats->tx_octets;
2419		ifp->if_ibytes = mstats->rx_octets;
2420		ifp->if_omcasts = mstats->tx_mcast_frames;
2421		ifp->if_imcasts = mstats->rx_mcast_frames;
2422
2423		ifp->if_collisions =
2424		    mstats->tx_total_collisions;
2425
2426		ifp->if_iqdrops = mstats->rx_cong_drops;
2427
2428		ifp->if_oerrors =
2429		    mstats->tx_excess_collisions +
2430		    mstats->tx_underrun +
2431		    mstats->tx_len_errs +
2432		    mstats->tx_mac_internal_errs +
2433		    mstats->tx_excess_deferral +
2434		    mstats->tx_fcs_errs;
2435		ifp->if_ierrors =
2436		    mstats->rx_jabber +
2437		    mstats->rx_data_errs +
2438		    mstats->rx_sequence_errs +
2439		    mstats->rx_runt +
2440		    mstats->rx_too_long +
2441		    mstats->rx_mac_internal_errs +
2442		    mstats->rx_short +
2443		    mstats->rx_fcs_errs;
2444
2445		if (mac->multiport)
2446			continue;
2447
2448		/* Count rx fifo overflows, once per second */
2449		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2450		reset = 0;
2451		if (cause & F_RXFIFO_OVERFLOW) {
2452			mac->stats.rx_fifo_ovfl++;
2453			reset |= F_RXFIFO_OVERFLOW;
2454		}
2455		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2456	}
2457}
2458
2459static void
2460touch_bars(device_t dev)
2461{
2462	/*
2463	 * Don't enable yet
2464	 */
2465#if !defined(__LP64__) && 0
2466	u32 v;
2467
2468	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2469	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2470	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2471	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2472	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2473	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2474#endif
2475}
2476
2477static int
2478set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2479{
2480	uint8_t *buf;
2481	int err = 0;
2482	u32 aligned_offset, aligned_len, *p;
2483	struct adapter *adapter = pi->adapter;
2484
2485
2486	aligned_offset = offset & ~3;
2487	aligned_len = (len + (offset & 3) + 3) & ~3;
2488
2489	if (aligned_offset != offset || aligned_len != len) {
2490		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2491		if (!buf)
2492			return (ENOMEM);
2493		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2494		if (!err && aligned_len > 4)
2495			err = t3_seeprom_read(adapter,
2496					      aligned_offset + aligned_len - 4,
2497					      (u32 *)&buf[aligned_len - 4]);
2498		if (err)
2499			goto out;
2500		memcpy(buf + (offset & 3), data, len);
2501	} else
2502		buf = (uint8_t *)(uintptr_t)data;
2503
2504	err = t3_seeprom_wp(adapter, 0);
2505	if (err)
2506		goto out;
2507
2508	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2509		err = t3_seeprom_write(adapter, aligned_offset, *p);
2510		aligned_offset += 4;
2511	}
2512
2513	if (!err)
2514		err = t3_seeprom_wp(adapter, 1);
2515out:
2516	if (buf != data)
2517		free(buf, M_DEVBUF);
2518	return err;
2519}
2520
2521
2522static int
2523in_range(int val, int lo, int hi)
2524{
2525	return val < 0 || (val <= hi && val >= lo);
2526}
2527
2528static int
2529cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2530{
2531       return (0);
2532}
2533
2534static int
2535cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2536{
2537       return (0);
2538}
2539
2540static int
2541cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2542    int fflag, struct thread *td)
2543{
2544	int mmd, error = 0;
2545	struct port_info *pi = dev->si_drv1;
2546	adapter_t *sc = pi->adapter;
2547
2548#ifdef PRIV_SUPPORTED
2549	if (priv_check(td, PRIV_DRIVER)) {
2550		if (cxgb_debug)
2551			printf("user does not have access to privileged ioctls\n");
2552		return (EPERM);
2553	}
2554#else
2555	if (suser(td)) {
2556		if (cxgb_debug)
2557			printf("user does not have access to privileged ioctls\n");
2558		return (EPERM);
2559	}
2560#endif
2561
2562	switch (cmd) {
2563	case CHELSIO_GET_MIIREG: {
2564		uint32_t val;
2565		struct cphy *phy = &pi->phy;
2566		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2567
2568		if (!phy->mdio_read)
2569			return (EOPNOTSUPP);
2570		if (is_10G(sc)) {
2571			mmd = mid->phy_id >> 8;
2572			if (!mmd)
2573				mmd = MDIO_DEV_PCS;
2574			else if (mmd > MDIO_DEV_VEND2)
2575				return (EINVAL);
2576
2577			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2578					     mid->reg_num, &val);
2579		} else
2580		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2581					     mid->reg_num & 0x1f, &val);
2582		if (error == 0)
2583			mid->val_out = val;
2584		break;
2585	}
2586	case CHELSIO_SET_MIIREG: {
2587		struct cphy *phy = &pi->phy;
2588		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2589
2590		if (!phy->mdio_write)
2591			return (EOPNOTSUPP);
2592		if (is_10G(sc)) {
2593			mmd = mid->phy_id >> 8;
2594			if (!mmd)
2595				mmd = MDIO_DEV_PCS;
2596			else if (mmd > MDIO_DEV_VEND2)
2597				return (EINVAL);
2598
2599			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2600					      mmd, mid->reg_num, mid->val_in);
2601		} else
2602			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2603					      mid->reg_num & 0x1f,
2604					      mid->val_in);
2605		break;
2606	}
2607	case CHELSIO_SETREG: {
2608		struct ch_reg *edata = (struct ch_reg *)data;
2609		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2610			return (EFAULT);
2611		t3_write_reg(sc, edata->addr, edata->val);
2612		break;
2613	}
2614	case CHELSIO_GETREG: {
2615		struct ch_reg *edata = (struct ch_reg *)data;
2616		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2617			return (EFAULT);
2618		edata->val = t3_read_reg(sc, edata->addr);
2619		break;
2620	}
2621	case CHELSIO_GET_SGE_CONTEXT: {
2622		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2623		mtx_lock_spin(&sc->sge.reg_lock);
2624		switch (ecntxt->cntxt_type) {
2625		case CNTXT_TYPE_EGRESS:
2626			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2627			    ecntxt->data);
2628			break;
2629		case CNTXT_TYPE_FL:
2630			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2631			    ecntxt->data);
2632			break;
2633		case CNTXT_TYPE_RSP:
2634			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2635			    ecntxt->data);
2636			break;
2637		case CNTXT_TYPE_CQ:
2638			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2639			    ecntxt->data);
2640			break;
2641		default:
2642			error = EINVAL;
2643			break;
2644		}
2645		mtx_unlock_spin(&sc->sge.reg_lock);
2646		break;
2647	}
2648	case CHELSIO_GET_SGE_DESC: {
2649		struct ch_desc *edesc = (struct ch_desc *)data;
2650		int ret;
2651		if (edesc->queue_num >= SGE_QSETS * 6)
2652			return (EINVAL);
2653		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2654		    edesc->queue_num % 6, edesc->idx, edesc->data);
2655		if (ret < 0)
2656			return (EINVAL);
2657		edesc->size = ret;
2658		break;
2659	}
2660	case CHELSIO_GET_QSET_PARAMS: {
2661		struct qset_params *q;
2662		struct ch_qset_params *t = (struct ch_qset_params *)data;
2663		int q1 = pi->first_qset;
2664		int nqsets = pi->nqsets;
2665		int i;
2666
2667		if (t->qset_idx >= nqsets)
2668			return EINVAL;
2669
2670		i = q1 + t->qset_idx;
2671		q = &sc->params.sge.qset[i];
2672		t->rspq_size   = q->rspq_size;
2673		t->txq_size[0] = q->txq_size[0];
2674		t->txq_size[1] = q->txq_size[1];
2675		t->txq_size[2] = q->txq_size[2];
2676		t->fl_size[0]  = q->fl_size;
2677		t->fl_size[1]  = q->jumbo_size;
2678		t->polling     = q->polling;
2679		t->lro         = q->lro;
2680		t->intr_lat    = q->coalesce_usecs;
2681		t->cong_thres  = q->cong_thres;
2682		t->qnum        = i;
2683
2684		if (sc->flags & USING_MSIX)
2685			t->vector = rman_get_start(sc->msix_irq_res[i]);
2686		else
2687			t->vector = rman_get_start(sc->irq_res);
2688
2689		break;
2690	}
2691	case CHELSIO_GET_QSET_NUM: {
2692		struct ch_reg *edata = (struct ch_reg *)data;
2693		edata->val = pi->nqsets;
2694		break;
2695	}
2696	case CHELSIO_LOAD_FW: {
2697		uint8_t *fw_data;
2698		uint32_t vers;
2699		struct ch_mem_range *t = (struct ch_mem_range *)data;
2700
2701		/*
2702		 * You're allowed to load a firmware only before FULL_INIT_DONE
2703		 *
2704		 * FW_UPTODATE is also set so the rest of the initialization
2705		 * will not overwrite what was loaded here.  This gives you the
2706		 * flexibility to load any firmware (and maybe shoot yourself in
2707		 * the foot).
2708		 */
2709
2710		ADAPTER_LOCK(sc);
2711		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2712			ADAPTER_UNLOCK(sc);
2713			return (EBUSY);
2714		}
2715
2716		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2717		if (!fw_data)
2718			error = ENOMEM;
2719		else
2720			error = copyin(t->buf, fw_data, t->len);
2721
2722		if (!error)
2723			error = -t3_load_fw(sc, fw_data, t->len);
2724
2725		if (t3_get_fw_version(sc, &vers) == 0) {
2726			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2727			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2728			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2729		}
2730
2731		if (!error)
2732			sc->flags |= FW_UPTODATE;
2733
2734		free(fw_data, M_DEVBUF);
2735		ADAPTER_UNLOCK(sc);
2736		break;
2737	}
2738	case CHELSIO_LOAD_BOOT: {
2739		uint8_t *boot_data;
2740		struct ch_mem_range *t = (struct ch_mem_range *)data;
2741
2742		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2743		if (!boot_data)
2744			return ENOMEM;
2745
2746		error = copyin(t->buf, boot_data, t->len);
2747		if (!error)
2748			error = -t3_load_boot(sc, boot_data, t->len);
2749
2750		free(boot_data, M_DEVBUF);
2751		break;
2752	}
2753	case CHELSIO_GET_PM: {
2754		struct ch_pm *m = (struct ch_pm *)data;
2755		struct tp_params *p = &sc->params.tp;
2756
2757		if (!is_offload(sc))
2758			return (EOPNOTSUPP);
2759
2760		m->tx_pg_sz = p->tx_pg_size;
2761		m->tx_num_pg = p->tx_num_pgs;
2762		m->rx_pg_sz  = p->rx_pg_size;
2763		m->rx_num_pg = p->rx_num_pgs;
2764		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2765
2766		break;
2767	}
2768	case CHELSIO_SET_PM: {
2769		struct ch_pm *m = (struct ch_pm *)data;
2770		struct tp_params *p = &sc->params.tp;
2771
2772		if (!is_offload(sc))
2773			return (EOPNOTSUPP);
2774		if (sc->flags & FULL_INIT_DONE)
2775			return (EBUSY);
2776
2777		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2778		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2779			return (EINVAL);	/* not power of 2 */
2780		if (!(m->rx_pg_sz & 0x14000))
2781			return (EINVAL);	/* not 16KB or 64KB */
2782		if (!(m->tx_pg_sz & 0x1554000))
2783			return (EINVAL);
2784		if (m->tx_num_pg == -1)
2785			m->tx_num_pg = p->tx_num_pgs;
2786		if (m->rx_num_pg == -1)
2787			m->rx_num_pg = p->rx_num_pgs;
2788		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2789			return (EINVAL);
2790		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2791		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2792			return (EINVAL);
2793
2794		p->rx_pg_size = m->rx_pg_sz;
2795		p->tx_pg_size = m->tx_pg_sz;
2796		p->rx_num_pgs = m->rx_num_pg;
2797		p->tx_num_pgs = m->tx_num_pg;
2798		break;
2799	}
2800	case CHELSIO_SETMTUTAB: {
2801		struct ch_mtus *m = (struct ch_mtus *)data;
2802		int i;
2803
2804		if (!is_offload(sc))
2805			return (EOPNOTSUPP);
2806		if (offload_running(sc))
2807			return (EBUSY);
2808		if (m->nmtus != NMTUS)
2809			return (EINVAL);
2810		if (m->mtus[0] < 81)         /* accommodate SACK */
2811			return (EINVAL);
2812
2813		/*
2814		 * MTUs must be in ascending order
2815		 */
2816		for (i = 1; i < NMTUS; ++i)
2817			if (m->mtus[i] < m->mtus[i - 1])
2818				return (EINVAL);
2819
2820		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2821		break;
2822	}
2823	case CHELSIO_GETMTUTAB: {
2824		struct ch_mtus *m = (struct ch_mtus *)data;
2825
2826		if (!is_offload(sc))
2827			return (EOPNOTSUPP);
2828
2829		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2830		m->nmtus = NMTUS;
2831		break;
2832	}
2833	case CHELSIO_GET_MEM: {
2834		struct ch_mem_range *t = (struct ch_mem_range *)data;
2835		struct mc7 *mem;
2836		uint8_t *useraddr;
2837		u64 buf[32];
2838
2839		/*
2840		 * Use these to avoid modifying len/addr in the the return
2841		 * struct
2842		 */
2843		uint32_t len = t->len, addr = t->addr;
2844
2845		if (!is_offload(sc))
2846			return (EOPNOTSUPP);
2847		if (!(sc->flags & FULL_INIT_DONE))
2848			return (EIO);         /* need the memory controllers */
2849		if ((addr & 0x7) || (len & 0x7))
2850			return (EINVAL);
2851		if (t->mem_id == MEM_CM)
2852			mem = &sc->cm;
2853		else if (t->mem_id == MEM_PMRX)
2854			mem = &sc->pmrx;
2855		else if (t->mem_id == MEM_PMTX)
2856			mem = &sc->pmtx;
2857		else
2858			return (EINVAL);
2859
2860		/*
2861		 * Version scheme:
2862		 * bits 0..9: chip version
2863		 * bits 10..15: chip revision
2864		 */
2865		t->version = 3 | (sc->params.rev << 10);
2866
2867		/*
2868		 * Read 256 bytes at a time as len can be large and we don't
2869		 * want to use huge intermediate buffers.
2870		 */
2871		useraddr = (uint8_t *)t->buf;
2872		while (len) {
2873			unsigned int chunk = min(len, sizeof(buf));
2874
2875			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2876			if (error)
2877				return (-error);
2878			if (copyout(buf, useraddr, chunk))
2879				return (EFAULT);
2880			useraddr += chunk;
2881			addr += chunk;
2882			len -= chunk;
2883		}
2884		break;
2885	}
2886	case CHELSIO_READ_TCAM_WORD: {
2887		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2888
2889		if (!is_offload(sc))
2890			return (EOPNOTSUPP);
2891		if (!(sc->flags & FULL_INIT_DONE))
2892			return (EIO);         /* need MC5 */
2893		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2894		break;
2895	}
2896	case CHELSIO_SET_TRACE_FILTER: {
2897		struct ch_trace *t = (struct ch_trace *)data;
2898		const struct trace_params *tp;
2899
2900		tp = (const struct trace_params *)&t->sip;
2901		if (t->config_tx)
2902			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2903					       t->trace_tx);
2904		if (t->config_rx)
2905			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2906					       t->trace_rx);
2907		break;
2908	}
2909	case CHELSIO_SET_PKTSCHED: {
2910		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2911		if (sc->open_device_map == 0)
2912			return (EAGAIN);
2913		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2914		    p->binding);
2915		break;
2916	}
2917	case CHELSIO_IFCONF_GETREGS: {
2918		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2919		int reglen = cxgb_get_regs_len();
2920		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2921		if (buf == NULL) {
2922			return (ENOMEM);
2923		}
2924		if (regs->len > reglen)
2925			regs->len = reglen;
2926		else if (regs->len < reglen)
2927			error = ENOBUFS;
2928
2929		if (!error) {
2930			cxgb_get_regs(sc, regs, buf);
2931			error = copyout(buf, regs->data, reglen);
2932		}
2933		free(buf, M_DEVBUF);
2934
2935		break;
2936	}
2937	case CHELSIO_SET_HW_SCHED: {
2938		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2939		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2940
2941		if ((sc->flags & FULL_INIT_DONE) == 0)
2942			return (EAGAIN);       /* need TP to be initialized */
2943		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2944		    !in_range(t->channel, 0, 1) ||
2945		    !in_range(t->kbps, 0, 10000000) ||
2946		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2947		    !in_range(t->flow_ipg, 0,
2948			      dack_ticks_to_usec(sc, 0x7ff)))
2949			return (EINVAL);
2950
2951		if (t->kbps >= 0) {
2952			error = t3_config_sched(sc, t->kbps, t->sched);
2953			if (error < 0)
2954				return (-error);
2955		}
2956		if (t->class_ipg >= 0)
2957			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2958		if (t->flow_ipg >= 0) {
2959			t->flow_ipg *= 1000;     /* us -> ns */
2960			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2961		}
2962		if (t->mode >= 0) {
2963			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2964
2965			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2966					 bit, t->mode ? bit : 0);
2967		}
2968		if (t->channel >= 0)
2969			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2970					 1 << t->sched, t->channel << t->sched);
2971		break;
2972	}
2973	case CHELSIO_GET_EEPROM: {
2974		int i;
2975		struct ch_eeprom *e = (struct ch_eeprom *)data;
2976		uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2977
2978		if (buf == NULL) {
2979			return (ENOMEM);
2980		}
2981		e->magic = EEPROM_MAGIC;
2982		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
2983			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
2984
2985		if (!error)
2986			error = copyout(buf + e->offset, e->data, e->len);
2987
2988		free(buf, M_DEVBUF);
2989		break;
2990	}
2991	case CHELSIO_CLEAR_STATS: {
2992		if (!(sc->flags & FULL_INIT_DONE))
2993			return EAGAIN;
2994
2995		PORT_LOCK(pi);
2996		t3_mac_update_stats(&pi->mac);
2997		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
2998		PORT_UNLOCK(pi);
2999		break;
3000	}
3001	case CHELSIO_GET_UP_LA: {
3002		struct ch_up_la *la = (struct ch_up_la *)data;
3003		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3004		if (buf == NULL) {
3005			return (ENOMEM);
3006		}
3007		if (la->bufsize < LA_BUFSIZE)
3008			error = ENOBUFS;
3009
3010		if (!error)
3011			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3012					      &la->bufsize, buf);
3013		if (!error)
3014			error = copyout(buf, la->data, la->bufsize);
3015
3016		free(buf, M_DEVBUF);
3017		break;
3018	}
3019	case CHELSIO_GET_UP_IOQS: {
3020		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3021		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3022		uint32_t *v;
3023
3024		if (buf == NULL) {
3025			return (ENOMEM);
3026		}
3027		if (ioqs->bufsize < IOQS_BUFSIZE)
3028			error = ENOBUFS;
3029
3030		if (!error)
3031			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3032
3033		if (!error) {
3034			v = (uint32_t *)buf;
3035
3036			ioqs->bufsize -= 4 * sizeof(uint32_t);
3037			ioqs->ioq_rx_enable = *v++;
3038			ioqs->ioq_tx_enable = *v++;
3039			ioqs->ioq_rx_status = *v++;
3040			ioqs->ioq_tx_status = *v++;
3041
3042			error = copyout(v, ioqs->data, ioqs->bufsize);
3043		}
3044
3045		free(buf, M_DEVBUF);
3046		break;
3047	}
3048	default:
3049		return (EOPNOTSUPP);
3050		break;
3051	}
3052
3053	return (error);
3054}
3055
3056static __inline void
3057reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3058    unsigned int end)
3059{
3060	uint32_t *p = (uint32_t *)(buf + start);
3061
3062	for ( ; start <= end; start += sizeof(uint32_t))
3063		*p++ = t3_read_reg(ap, start);
3064}
3065
3066#define T3_REGMAP_SIZE (3 * 1024)
3067static int
3068cxgb_get_regs_len(void)
3069{
3070	return T3_REGMAP_SIZE;
3071}
3072
3073static void
3074cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3075{
3076
3077	/*
3078	 * Version scheme:
3079	 * bits 0..9: chip version
3080	 * bits 10..15: chip revision
3081	 * bit 31: set for PCIe cards
3082	 */
3083	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3084
3085	/*
3086	 * We skip the MAC statistics registers because they are clear-on-read.
3087	 * Also reading multi-register stats would need to synchronize with the
3088	 * periodic mac stats accumulation.  Hard to justify the complexity.
3089	 */
3090	memset(buf, 0, cxgb_get_regs_len());
3091	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3092	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3093	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3094	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3095	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3096	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3097		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3098	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3099		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3100}
3101
3102
3103MODULE_DEPEND(if_cxgb, cxgb_t3fw, 1, 1, 1);
3104