t4_main.c revision 346954
1/*-
2 * Copyright (c) 2011 Chelsio Communications, Inc.
3 * All rights reserved.
4 * Written by: Navdeep Parhar <np@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: stable/11/sys/dev/cxgbe/t4_main.c 346954 2019-04-30 08:17:11Z np $");
30
31#include "opt_ddb.h"
32#include "opt_inet.h"
33#include "opt_inet6.h"
34#include "opt_rss.h"
35
36#include <sys/param.h>
37#include <sys/conf.h>
38#include <sys/priv.h>
39#include <sys/kernel.h>
40#include <sys/bus.h>
41#include <sys/module.h>
42#include <sys/malloc.h>
43#include <sys/queue.h>
44#include <sys/taskqueue.h>
45#include <sys/pciio.h>
46#include <dev/pci/pcireg.h>
47#include <dev/pci/pcivar.h>
48#include <dev/pci/pci_private.h>
49#include <sys/firmware.h>
50#include <sys/sbuf.h>
51#include <sys/smp.h>
52#include <sys/socket.h>
53#include <sys/sockio.h>
54#include <sys/sysctl.h>
55#include <net/ethernet.h>
56#include <net/if.h>
57#include <net/if_types.h>
58#include <net/if_dl.h>
59#include <net/if_vlan_var.h>
60#ifdef RSS
61#include <net/rss_config.h>
62#endif
63#include <netinet/in.h>
64#include <netinet/ip.h>
65#if defined(__i386__) || defined(__amd64__)
66#include <machine/md_var.h>
67#include <machine/cputypes.h>
68#include <vm/vm.h>
69#include <vm/pmap.h>
70#endif
71#include <crypto/rijndael/rijndael.h>
72#ifdef DDB
73#include <ddb/ddb.h>
74#include <ddb/db_lex.h>
75#endif
76
77#include "common/common.h"
78#include "common/t4_msg.h"
79#include "common/t4_regs.h"
80#include "common/t4_regs_values.h"
81#include "cudbg/cudbg.h"
82#include "t4_clip.h"
83#include "t4_ioctl.h"
84#include "t4_l2t.h"
85#include "t4_mp_ring.h"
86#include "t4_if.h"
87#include "t4_smt.h"
88
89/* T4 bus driver interface */
90static int t4_probe(device_t);
91static int t4_attach(device_t);
92static int t4_detach(device_t);
93static int t4_child_location_str(device_t, device_t, char *, size_t);
94static int t4_ready(device_t);
95static int t4_read_port_device(device_t, int, device_t *);
96static device_method_t t4_methods[] = {
97	DEVMETHOD(device_probe,		t4_probe),
98	DEVMETHOD(device_attach,	t4_attach),
99	DEVMETHOD(device_detach,	t4_detach),
100
101	DEVMETHOD(bus_child_location_str, t4_child_location_str),
102
103	DEVMETHOD(t4_is_main_ready,	t4_ready),
104	DEVMETHOD(t4_read_port_device,	t4_read_port_device),
105
106	DEVMETHOD_END
107};
108static driver_t t4_driver = {
109	"t4nex",
110	t4_methods,
111	sizeof(struct adapter)
112};
113
114
115/* T4 port (cxgbe) interface */
116static int cxgbe_probe(device_t);
117static int cxgbe_attach(device_t);
118static int cxgbe_detach(device_t);
119device_method_t cxgbe_methods[] = {
120	DEVMETHOD(device_probe,		cxgbe_probe),
121	DEVMETHOD(device_attach,	cxgbe_attach),
122	DEVMETHOD(device_detach,	cxgbe_detach),
123	{ 0, 0 }
124};
125static driver_t cxgbe_driver = {
126	"cxgbe",
127	cxgbe_methods,
128	sizeof(struct port_info)
129};
130
131/* T4 VI (vcxgbe) interface */
132static int vcxgbe_probe(device_t);
133static int vcxgbe_attach(device_t);
134static int vcxgbe_detach(device_t);
135static device_method_t vcxgbe_methods[] = {
136	DEVMETHOD(device_probe,		vcxgbe_probe),
137	DEVMETHOD(device_attach,	vcxgbe_attach),
138	DEVMETHOD(device_detach,	vcxgbe_detach),
139	{ 0, 0 }
140};
141static driver_t vcxgbe_driver = {
142	"vcxgbe",
143	vcxgbe_methods,
144	sizeof(struct vi_info)
145};
146
147static d_ioctl_t t4_ioctl;
148
149static struct cdevsw t4_cdevsw = {
150       .d_version = D_VERSION,
151       .d_ioctl = t4_ioctl,
152       .d_name = "t4nex",
153};
154
155/* T5 bus driver interface */
156static int t5_probe(device_t);
157static device_method_t t5_methods[] = {
158	DEVMETHOD(device_probe,		t5_probe),
159	DEVMETHOD(device_attach,	t4_attach),
160	DEVMETHOD(device_detach,	t4_detach),
161
162	DEVMETHOD(bus_child_location_str, t4_child_location_str),
163
164	DEVMETHOD(t4_is_main_ready,	t4_ready),
165	DEVMETHOD(t4_read_port_device,	t4_read_port_device),
166
167	DEVMETHOD_END
168};
169static driver_t t5_driver = {
170	"t5nex",
171	t5_methods,
172	sizeof(struct adapter)
173};
174
175
176/* T5 port (cxl) interface */
177static driver_t cxl_driver = {
178	"cxl",
179	cxgbe_methods,
180	sizeof(struct port_info)
181};
182
183/* T5 VI (vcxl) interface */
184static driver_t vcxl_driver = {
185	"vcxl",
186	vcxgbe_methods,
187	sizeof(struct vi_info)
188};
189
190/* T6 bus driver interface */
191static int t6_probe(device_t);
192static device_method_t t6_methods[] = {
193	DEVMETHOD(device_probe,		t6_probe),
194	DEVMETHOD(device_attach,	t4_attach),
195	DEVMETHOD(device_detach,	t4_detach),
196
197	DEVMETHOD(bus_child_location_str, t4_child_location_str),
198
199	DEVMETHOD(t4_is_main_ready,	t4_ready),
200	DEVMETHOD(t4_read_port_device,	t4_read_port_device),
201
202	DEVMETHOD_END
203};
204static driver_t t6_driver = {
205	"t6nex",
206	t6_methods,
207	sizeof(struct adapter)
208};
209
210
211/* T6 port (cc) interface */
212static driver_t cc_driver = {
213	"cc",
214	cxgbe_methods,
215	sizeof(struct port_info)
216};
217
218/* T6 VI (vcc) interface */
219static driver_t vcc_driver = {
220	"vcc",
221	vcxgbe_methods,
222	sizeof(struct vi_info)
223};
224
225/* ifnet interface */
226static void cxgbe_init(void *);
227static int cxgbe_ioctl(struct ifnet *, unsigned long, caddr_t);
228static int cxgbe_transmit(struct ifnet *, struct mbuf *);
229static void cxgbe_qflush(struct ifnet *);
230
231MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4/T5 Ethernet driver and services");
232
233/*
234 * Correct lock order when you need to acquire multiple locks is t4_list_lock,
235 * then ADAPTER_LOCK, then t4_uld_list_lock.
236 */
237static struct sx t4_list_lock;
238SLIST_HEAD(, adapter) t4_list;
239#ifdef TCP_OFFLOAD
240static struct sx t4_uld_list_lock;
241SLIST_HEAD(, uld_info) t4_uld_list;
242#endif
243
244/*
245 * Tunables.  See tweak_tunables() too.
246 *
247 * Each tunable is set to a default value here if it's known at compile-time.
248 * Otherwise it is set to -n as an indication to tweak_tunables() that it should
249 * provide a reasonable default (upto n) when the driver is loaded.
250 *
251 * Tunables applicable to both T4 and T5 are under hw.cxgbe.  Those specific to
252 * T5 are under hw.cxl.
253 */
254SYSCTL_NODE(_hw, OID_AUTO, cxgbe, CTLFLAG_RD, 0, "cxgbe(4) parameters");
255SYSCTL_NODE(_hw, OID_AUTO, cxl, CTLFLAG_RD, 0, "cxgbe(4) T5+ parameters");
256SYSCTL_NODE(_hw_cxgbe, OID_AUTO, toe, CTLFLAG_RD, 0, "cxgbe(4) TOE parameters");
257
258/*
259 * Number of queues for tx and rx, NIC and offload.
260 */
261#define NTXQ 16
262int t4_ntxq = -NTXQ;
263SYSCTL_INT(_hw_cxgbe, OID_AUTO, ntxq, CTLFLAG_RDTUN, &t4_ntxq, 0,
264    "Number of TX queues per port");
265TUNABLE_INT("hw.cxgbe.ntxq10g", &t4_ntxq);	/* Old name, undocumented */
266
267#define NRXQ 8
268int t4_nrxq = -NRXQ;
269SYSCTL_INT(_hw_cxgbe, OID_AUTO, nrxq, CTLFLAG_RDTUN, &t4_nrxq, 0,
270    "Number of RX queues per port");
271TUNABLE_INT("hw.cxgbe.nrxq10g", &t4_nrxq);	/* Old name, undocumented */
272
273#define NTXQ_VI 1
274static int t4_ntxq_vi = -NTXQ_VI;
275SYSCTL_INT(_hw_cxgbe, OID_AUTO, ntxq_vi, CTLFLAG_RDTUN, &t4_ntxq_vi, 0,
276    "Number of TX queues per VI");
277
278#define NRXQ_VI 1
279static int t4_nrxq_vi = -NRXQ_VI;
280SYSCTL_INT(_hw_cxgbe, OID_AUTO, nrxq_vi, CTLFLAG_RDTUN, &t4_nrxq_vi, 0,
281    "Number of RX queues per VI");
282
283static int t4_rsrv_noflowq = 0;
284SYSCTL_INT(_hw_cxgbe, OID_AUTO, rsrv_noflowq, CTLFLAG_RDTUN, &t4_rsrv_noflowq,
285    0, "Reserve TX queue 0 of each VI for non-flowid packets");
286
287#ifdef TCP_OFFLOAD
288#define NOFLDTXQ 8
289static int t4_nofldtxq = -NOFLDTXQ;
290SYSCTL_INT(_hw_cxgbe, OID_AUTO, nofldtxq, CTLFLAG_RDTUN, &t4_nofldtxq, 0,
291    "Number of offload TX queues per port");
292
293#define NOFLDRXQ 2
294static int t4_nofldrxq = -NOFLDRXQ;
295SYSCTL_INT(_hw_cxgbe, OID_AUTO, nofldrxq, CTLFLAG_RDTUN, &t4_nofldrxq, 0,
296    "Number of offload RX queues per port");
297
298#define NOFLDTXQ_VI 1
299static int t4_nofldtxq_vi = -NOFLDTXQ_VI;
300SYSCTL_INT(_hw_cxgbe, OID_AUTO, nofldtxq_vi, CTLFLAG_RDTUN, &t4_nofldtxq_vi, 0,
301    "Number of offload TX queues per VI");
302
303#define NOFLDRXQ_VI 1
304static int t4_nofldrxq_vi = -NOFLDRXQ_VI;
305SYSCTL_INT(_hw_cxgbe, OID_AUTO, nofldrxq_vi, CTLFLAG_RDTUN, &t4_nofldrxq_vi, 0,
306    "Number of offload RX queues per VI");
307
308#define TMR_IDX_OFLD 1
309int t4_tmr_idx_ofld = TMR_IDX_OFLD;
310SYSCTL_INT(_hw_cxgbe, OID_AUTO, holdoff_timer_idx_ofld, CTLFLAG_RDTUN,
311    &t4_tmr_idx_ofld, 0, "Holdoff timer index for offload queues");
312
313#define PKTC_IDX_OFLD (-1)
314int t4_pktc_idx_ofld = PKTC_IDX_OFLD;
315SYSCTL_INT(_hw_cxgbe, OID_AUTO, holdoff_pktc_idx_ofld, CTLFLAG_RDTUN,
316    &t4_pktc_idx_ofld, 0, "holdoff packet counter index for offload queues");
317
318/* 0 means chip/fw default, non-zero number is value in microseconds */
319static u_long t4_toe_keepalive_idle = 0;
320SYSCTL_ULONG(_hw_cxgbe_toe, OID_AUTO, keepalive_idle, CTLFLAG_RDTUN,
321    &t4_toe_keepalive_idle, 0, "TOE keepalive idle timer (us)");
322
323/* 0 means chip/fw default, non-zero number is value in microseconds */
324static u_long t4_toe_keepalive_interval = 0;
325SYSCTL_ULONG(_hw_cxgbe_toe, OID_AUTO, keepalive_interval, CTLFLAG_RDTUN,
326    &t4_toe_keepalive_interval, 0, "TOE keepalive interval timer (us)");
327
328/* 0 means chip/fw default, non-zero number is # of keepalives before abort */
329static int t4_toe_keepalive_count = 0;
330SYSCTL_INT(_hw_cxgbe_toe, OID_AUTO, keepalive_count, CTLFLAG_RDTUN,
331    &t4_toe_keepalive_count, 0, "Number of TOE keepalive probes before abort");
332
333/* 0 means chip/fw default, non-zero number is value in microseconds */
334static u_long t4_toe_rexmt_min = 0;
335SYSCTL_ULONG(_hw_cxgbe_toe, OID_AUTO, rexmt_min, CTLFLAG_RDTUN,
336    &t4_toe_rexmt_min, 0, "Minimum TOE retransmit interval (us)");
337
338/* 0 means chip/fw default, non-zero number is value in microseconds */
339static u_long t4_toe_rexmt_max = 0;
340SYSCTL_ULONG(_hw_cxgbe_toe, OID_AUTO, rexmt_max, CTLFLAG_RDTUN,
341    &t4_toe_rexmt_max, 0, "Maximum TOE retransmit interval (us)");
342
343/* 0 means chip/fw default, non-zero number is # of rexmt before abort */
344static int t4_toe_rexmt_count = 0;
345SYSCTL_INT(_hw_cxgbe_toe, OID_AUTO, rexmt_count, CTLFLAG_RDTUN,
346    &t4_toe_rexmt_count, 0, "Number of TOE retransmissions before abort");
347
348/* -1 means chip/fw default, other values are raw backoff values to use */
349static int t4_toe_rexmt_backoff[16] = {
350	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
351};
352SYSCTL_NODE(_hw_cxgbe_toe, OID_AUTO, rexmt_backoff, CTLFLAG_RD, 0,
353    "cxgbe(4) TOE retransmit backoff values");
354SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 0, CTLFLAG_RDTUN,
355    &t4_toe_rexmt_backoff[0], 0, "");
356SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 1, CTLFLAG_RDTUN,
357    &t4_toe_rexmt_backoff[1], 0, "");
358SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 2, CTLFLAG_RDTUN,
359    &t4_toe_rexmt_backoff[2], 0, "");
360SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 3, CTLFLAG_RDTUN,
361    &t4_toe_rexmt_backoff[3], 0, "");
362SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 4, CTLFLAG_RDTUN,
363    &t4_toe_rexmt_backoff[4], 0, "");
364SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 5, CTLFLAG_RDTUN,
365    &t4_toe_rexmt_backoff[5], 0, "");
366SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 6, CTLFLAG_RDTUN,
367    &t4_toe_rexmt_backoff[6], 0, "");
368SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 7, CTLFLAG_RDTUN,
369    &t4_toe_rexmt_backoff[7], 0, "");
370SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 8, CTLFLAG_RDTUN,
371    &t4_toe_rexmt_backoff[8], 0, "");
372SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 9, CTLFLAG_RDTUN,
373    &t4_toe_rexmt_backoff[9], 0, "");
374SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 10, CTLFLAG_RDTUN,
375    &t4_toe_rexmt_backoff[10], 0, "");
376SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 11, CTLFLAG_RDTUN,
377    &t4_toe_rexmt_backoff[11], 0, "");
378SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 12, CTLFLAG_RDTUN,
379    &t4_toe_rexmt_backoff[12], 0, "");
380SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 13, CTLFLAG_RDTUN,
381    &t4_toe_rexmt_backoff[13], 0, "");
382SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 14, CTLFLAG_RDTUN,
383    &t4_toe_rexmt_backoff[14], 0, "");
384SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 15, CTLFLAG_RDTUN,
385    &t4_toe_rexmt_backoff[15], 0, "");
386#endif
387
388#ifdef DEV_NETMAP
389#define NNMTXQ_VI 2
390static int t4_nnmtxq_vi = -NNMTXQ_VI;
391SYSCTL_INT(_hw_cxgbe, OID_AUTO, nnmtxq_vi, CTLFLAG_RDTUN, &t4_nnmtxq_vi, 0,
392    "Number of netmap TX queues per VI");
393
394#define NNMRXQ_VI 2
395static int t4_nnmrxq_vi = -NNMRXQ_VI;
396SYSCTL_INT(_hw_cxgbe, OID_AUTO, nnmrxq_vi, CTLFLAG_RDTUN, &t4_nnmrxq_vi, 0,
397    "Number of netmap RX queues per VI");
398#endif
399
400/*
401 * Holdoff parameters for ports.
402 */
403#define TMR_IDX 1
404int t4_tmr_idx = TMR_IDX;
405SYSCTL_INT(_hw_cxgbe, OID_AUTO, holdoff_timer_idx, CTLFLAG_RDTUN, &t4_tmr_idx,
406    0, "Holdoff timer index");
407TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &t4_tmr_idx);	/* Old name */
408
409#define PKTC_IDX (-1)
410int t4_pktc_idx = PKTC_IDX;
411SYSCTL_INT(_hw_cxgbe, OID_AUTO, holdoff_pktc_idx, CTLFLAG_RDTUN, &t4_pktc_idx,
412    0, "Holdoff packet counter index");
413TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &t4_pktc_idx);	/* Old name */
414
415/*
416 * Size (# of entries) of each tx and rx queue.
417 */
418unsigned int t4_qsize_txq = TX_EQ_QSIZE;
419SYSCTL_INT(_hw_cxgbe, OID_AUTO, qsize_txq, CTLFLAG_RDTUN, &t4_qsize_txq, 0,
420    "Number of descriptors in each TX queue");
421
422unsigned int t4_qsize_rxq = RX_IQ_QSIZE;
423SYSCTL_INT(_hw_cxgbe, OID_AUTO, qsize_rxq, CTLFLAG_RDTUN, &t4_qsize_rxq, 0,
424    "Number of descriptors in each RX queue");
425
426/*
427 * Interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively).
428 */
429int t4_intr_types = INTR_MSIX | INTR_MSI | INTR_INTX;
430SYSCTL_INT(_hw_cxgbe, OID_AUTO, interrupt_types, CTLFLAG_RDTUN, &t4_intr_types,
431    0, "Interrupt types allowed (bit 0 = INTx, 1 = MSI, 2 = MSI-X)");
432
433/*
434 * Configuration file.  All the _CF names here are special.
435 */
436#define DEFAULT_CF	"default"
437#define BUILTIN_CF	"built-in"
438#define FLASH_CF	"flash"
439#define UWIRE_CF	"uwire"
440#define FPGA_CF		"fpga"
441static char t4_cfg_file[32] = DEFAULT_CF;
442SYSCTL_STRING(_hw_cxgbe, OID_AUTO, config_file, CTLFLAG_RDTUN, t4_cfg_file,
443    sizeof(t4_cfg_file), "Firmware configuration file");
444
445/*
446 * PAUSE settings (bit 0, 1, 2 = rx_pause, tx_pause, pause_autoneg respectively).
447 * rx_pause = 1 to heed incoming PAUSE frames, 0 to ignore them.
448 * tx_pause = 1 to emit PAUSE frames when the rx FIFO reaches its high water
449 *            mark or when signalled to do so, 0 to never emit PAUSE.
450 * pause_autoneg = 1 means PAUSE will be negotiated if possible and the
451 *                 negotiated settings will override rx_pause/tx_pause.
452 *                 Otherwise rx_pause/tx_pause are applied forcibly.
453 */
454static int t4_pause_settings = PAUSE_RX | PAUSE_TX | PAUSE_AUTONEG;
455SYSCTL_INT(_hw_cxgbe, OID_AUTO, pause_settings, CTLFLAG_RDTUN,
456    &t4_pause_settings, 0,
457    "PAUSE settings (bit 0 = rx_pause, 1 = tx_pause, 2 = pause_autoneg)");
458
459/*
460 * Forward Error Correction settings (bit 0, 1 = RS, BASER respectively).
461 * -1 to run with the firmware default.  Same as FEC_AUTO (bit 5)
462 *  0 to disable FEC.
463 */
464static int t4_fec = -1;
465SYSCTL_INT(_hw_cxgbe, OID_AUTO, fec, CTLFLAG_RDTUN, &t4_fec, 0,
466    "Forward Error Correction (bit 0 = RS, bit 1 = BASER_RS)");
467
468/*
469 * Link autonegotiation.
470 * -1 to run with the firmware default.
471 *  0 to disable.
472 *  1 to enable.
473 */
474static int t4_autoneg = -1;
475SYSCTL_INT(_hw_cxgbe, OID_AUTO, autoneg, CTLFLAG_RDTUN, &t4_autoneg, 0,
476    "Link autonegotiation");
477
478/*
479 * Firmware auto-install by driver during attach (0, 1, 2 = prohibited, allowed,
480 * encouraged respectively).  '-n' is the same as 'n' except the firmware
481 * version used in the checks is read from the firmware bundled with the driver.
482 */
483static int t4_fw_install = 1;
484SYSCTL_INT(_hw_cxgbe, OID_AUTO, fw_install, CTLFLAG_RDTUN, &t4_fw_install, 0,
485    "Firmware auto-install (0 = prohibited, 1 = allowed, 2 = encouraged)");
486
487/*
488 * ASIC features that will be used.  Disable the ones you don't want so that the
489 * chip resources aren't wasted on features that will not be used.
490 */
491static int t4_nbmcaps_allowed = 0;
492SYSCTL_INT(_hw_cxgbe, OID_AUTO, nbmcaps_allowed, CTLFLAG_RDTUN,
493    &t4_nbmcaps_allowed, 0, "Default NBM capabilities");
494
495static int t4_linkcaps_allowed = 0;	/* No DCBX, PPP, etc. by default */
496SYSCTL_INT(_hw_cxgbe, OID_AUTO, linkcaps_allowed, CTLFLAG_RDTUN,
497    &t4_linkcaps_allowed, 0, "Default link capabilities");
498
499static int t4_switchcaps_allowed = FW_CAPS_CONFIG_SWITCH_INGRESS |
500    FW_CAPS_CONFIG_SWITCH_EGRESS;
501SYSCTL_INT(_hw_cxgbe, OID_AUTO, switchcaps_allowed, CTLFLAG_RDTUN,
502    &t4_switchcaps_allowed, 0, "Default switch capabilities");
503
504#ifdef RATELIMIT
505static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC |
506	FW_CAPS_CONFIG_NIC_HASHFILTER | FW_CAPS_CONFIG_NIC_ETHOFLD;
507#else
508static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC |
509	FW_CAPS_CONFIG_NIC_HASHFILTER;
510#endif
511SYSCTL_INT(_hw_cxgbe, OID_AUTO, niccaps_allowed, CTLFLAG_RDTUN,
512    &t4_niccaps_allowed, 0, "Default NIC capabilities");
513
514static int t4_toecaps_allowed = -1;
515SYSCTL_INT(_hw_cxgbe, OID_AUTO, toecaps_allowed, CTLFLAG_RDTUN,
516    &t4_toecaps_allowed, 0, "Default TCP offload capabilities");
517
518static int t4_rdmacaps_allowed = -1;
519SYSCTL_INT(_hw_cxgbe, OID_AUTO, rdmacaps_allowed, CTLFLAG_RDTUN,
520    &t4_rdmacaps_allowed, 0, "Default RDMA capabilities");
521
522static int t4_cryptocaps_allowed = -1;
523SYSCTL_INT(_hw_cxgbe, OID_AUTO, cryptocaps_allowed, CTLFLAG_RDTUN,
524    &t4_cryptocaps_allowed, 0, "Default crypto capabilities");
525
526static int t4_iscsicaps_allowed = -1;
527SYSCTL_INT(_hw_cxgbe, OID_AUTO, iscsicaps_allowed, CTLFLAG_RDTUN,
528    &t4_iscsicaps_allowed, 0, "Default iSCSI capabilities");
529
530static int t4_fcoecaps_allowed = 0;
531SYSCTL_INT(_hw_cxgbe, OID_AUTO, fcoecaps_allowed, CTLFLAG_RDTUN,
532    &t4_fcoecaps_allowed, 0, "Default FCoE capabilities");
533
534static int t5_write_combine = 0;
535SYSCTL_INT(_hw_cxl, OID_AUTO, write_combine, CTLFLAG_RDTUN, &t5_write_combine,
536    0, "Use WC instead of UC for BAR2");
537
538static int t4_num_vis = 1;
539SYSCTL_INT(_hw_cxgbe, OID_AUTO, num_vis, CTLFLAG_RDTUN, &t4_num_vis, 0,
540    "Number of VIs per port");
541
542/*
543 * PCIe Relaxed Ordering.
544 * -1: driver should figure out a good value.
545 * 0: disable RO.
546 * 1: enable RO.
547 * 2: leave RO alone.
548 */
549static int pcie_relaxed_ordering = -1;
550SYSCTL_INT(_hw_cxgbe, OID_AUTO, pcie_relaxed_ordering, CTLFLAG_RDTUN,
551    &pcie_relaxed_ordering, 0,
552    "PCIe Relaxed Ordering: 0 = disable, 1 = enable, 2 = leave alone");
553
554static int t4_panic_on_fatal_err = 0;
555TUNABLE_INT("hw.cxgbe.panic_on_fatal_err", &t4_panic_on_fatal_err);
556
557#ifdef TCP_OFFLOAD
558/*
559 * TOE tunables.
560 */
561static int t4_cop_managed_offloading = 0;
562TUNABLE_INT("hw.cxgbe.cop_managed_offloading", &t4_cop_managed_offloading);
563#endif
564
565/* Functions used by VIs to obtain unique MAC addresses for each VI. */
566static int vi_mac_funcs[] = {
567	FW_VI_FUNC_ETH,
568	FW_VI_FUNC_OFLD,
569	FW_VI_FUNC_IWARP,
570	FW_VI_FUNC_OPENISCSI,
571	FW_VI_FUNC_OPENFCOE,
572	FW_VI_FUNC_FOISCSI,
573	FW_VI_FUNC_FOFCOE,
574};
575
576struct intrs_and_queues {
577	uint16_t intr_type;	/* INTx, MSI, or MSI-X */
578	uint16_t num_vis;	/* number of VIs for each port */
579	uint16_t nirq;		/* Total # of vectors */
580	uint16_t ntxq;		/* # of NIC txq's for each port */
581	uint16_t nrxq;		/* # of NIC rxq's for each port */
582	uint16_t nofldtxq;	/* # of TOE txq's for each port */
583	uint16_t nofldrxq;	/* # of TOE rxq's for each port */
584
585	/* The vcxgbe/vcxl interfaces use these and not the ones above. */
586	uint16_t ntxq_vi;	/* # of NIC txq's */
587	uint16_t nrxq_vi;	/* # of NIC rxq's */
588	uint16_t nofldtxq_vi;	/* # of TOE txq's */
589	uint16_t nofldrxq_vi;	/* # of TOE rxq's */
590	uint16_t nnmtxq_vi;	/* # of netmap txq's */
591	uint16_t nnmrxq_vi;	/* # of netmap rxq's */
592};
593
594static void setup_memwin(struct adapter *);
595static void position_memwin(struct adapter *, int, uint32_t);
596static int validate_mem_range(struct adapter *, uint32_t, uint32_t);
597static int fwmtype_to_hwmtype(int);
598static int validate_mt_off_len(struct adapter *, int, uint32_t, uint32_t,
599    uint32_t *);
600static int fixup_devlog_params(struct adapter *);
601static int cfg_itype_and_nqueues(struct adapter *, struct intrs_and_queues *);
602static int contact_firmware(struct adapter *);
603static int partition_resources(struct adapter *);
604static int get_params__pre_init(struct adapter *);
605static int get_params__post_init(struct adapter *);
606static int set_params__post_init(struct adapter *);
607static void t4_set_desc(struct adapter *);
608static bool fixed_ifmedia(struct port_info *);
609static void build_medialist(struct port_info *);
610static void init_link_config(struct port_info *);
611static int fixup_link_config(struct port_info *);
612static int apply_link_config(struct port_info *);
613static int cxgbe_init_synchronized(struct vi_info *);
614static int cxgbe_uninit_synchronized(struct vi_info *);
615static void quiesce_txq(struct adapter *, struct sge_txq *);
616static void quiesce_wrq(struct adapter *, struct sge_wrq *);
617static void quiesce_iq(struct adapter *, struct sge_iq *);
618static void quiesce_fl(struct adapter *, struct sge_fl *);
619static int t4_alloc_irq(struct adapter *, struct irq *, int rid,
620    driver_intr_t *, void *, char *);
621static int t4_free_irq(struct adapter *, struct irq *);
622static void get_regs(struct adapter *, struct t4_regdump *, uint8_t *);
623static void vi_refresh_stats(struct adapter *, struct vi_info *);
624static void cxgbe_refresh_stats(struct adapter *, struct port_info *);
625static void cxgbe_tick(void *);
626static void cxgbe_sysctls(struct port_info *);
627static int sysctl_int_array(SYSCTL_HANDLER_ARGS);
628static int sysctl_bitfield_8b(SYSCTL_HANDLER_ARGS);
629static int sysctl_bitfield_16b(SYSCTL_HANDLER_ARGS);
630static int sysctl_btphy(SYSCTL_HANDLER_ARGS);
631static int sysctl_noflowq(SYSCTL_HANDLER_ARGS);
632static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS);
633static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS);
634static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS);
635static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS);
636static int sysctl_pause_settings(SYSCTL_HANDLER_ARGS);
637static int sysctl_fec(SYSCTL_HANDLER_ARGS);
638static int sysctl_autoneg(SYSCTL_HANDLER_ARGS);
639static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS);
640static int sysctl_temperature(SYSCTL_HANDLER_ARGS);
641static int sysctl_loadavg(SYSCTL_HANDLER_ARGS);
642static int sysctl_cctrl(SYSCTL_HANDLER_ARGS);
643static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS);
644static int sysctl_cim_la(SYSCTL_HANDLER_ARGS);
645static int sysctl_cim_la_t6(SYSCTL_HANDLER_ARGS);
646static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS);
647static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS);
648static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS);
649static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS);
650static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS);
651static int sysctl_devlog(SYSCTL_HANDLER_ARGS);
652static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS);
653static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS);
654static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS);
655static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS);
656static int sysctl_meminfo(SYSCTL_HANDLER_ARGS);
657static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS);
658static int sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS);
659static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS);
660static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS);
661static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS);
662static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS);
663static int sysctl_tids(SYSCTL_HANDLER_ARGS);
664static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS);
665static int sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS);
666static int sysctl_tp_la(SYSCTL_HANDLER_ARGS);
667static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS);
668static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS);
669static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS);
670static int sysctl_cpus(SYSCTL_HANDLER_ARGS);
671#ifdef TCP_OFFLOAD
672static int sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS);
673static int sysctl_tp_tick(SYSCTL_HANDLER_ARGS);
674static int sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS);
675static int sysctl_tp_timer(SYSCTL_HANDLER_ARGS);
676static int sysctl_tp_shift_cnt(SYSCTL_HANDLER_ARGS);
677static int sysctl_tp_backoff(SYSCTL_HANDLER_ARGS);
678static int sysctl_holdoff_tmr_idx_ofld(SYSCTL_HANDLER_ARGS);
679static int sysctl_holdoff_pktc_idx_ofld(SYSCTL_HANDLER_ARGS);
680#endif
681static int get_sge_context(struct adapter *, struct t4_sge_context *);
682static int load_fw(struct adapter *, struct t4_data *);
683static int load_cfg(struct adapter *, struct t4_data *);
684static int load_boot(struct adapter *, struct t4_bootrom *);
685static int load_bootcfg(struct adapter *, struct t4_data *);
686static int cudbg_dump(struct adapter *, struct t4_cudbg_dump *);
687static void free_offload_policy(struct t4_offload_policy *);
688static int set_offload_policy(struct adapter *, struct t4_offload_policy *);
689static int read_card_mem(struct adapter *, int, struct t4_mem_range *);
690static int read_i2c(struct adapter *, struct t4_i2c_data *);
691#ifdef TCP_OFFLOAD
692static int toe_capability(struct vi_info *, int);
693#endif
694static int mod_event(module_t, int, void *);
695static int notify_siblings(device_t, int);
696
697struct {
698	uint16_t device;
699	char *desc;
700} t4_pciids[] = {
701	{0xa000, "Chelsio Terminator 4 FPGA"},
702	{0x4400, "Chelsio T440-dbg"},
703	{0x4401, "Chelsio T420-CR"},
704	{0x4402, "Chelsio T422-CR"},
705	{0x4403, "Chelsio T440-CR"},
706	{0x4404, "Chelsio T420-BCH"},
707	{0x4405, "Chelsio T440-BCH"},
708	{0x4406, "Chelsio T440-CH"},
709	{0x4407, "Chelsio T420-SO"},
710	{0x4408, "Chelsio T420-CX"},
711	{0x4409, "Chelsio T420-BT"},
712	{0x440a, "Chelsio T404-BT"},
713	{0x440e, "Chelsio T440-LP-CR"},
714}, t5_pciids[] = {
715	{0xb000, "Chelsio Terminator 5 FPGA"},
716	{0x5400, "Chelsio T580-dbg"},
717	{0x5401,  "Chelsio T520-CR"},		/* 2 x 10G */
718	{0x5402,  "Chelsio T522-CR"},		/* 2 x 10G, 2 X 1G */
719	{0x5403,  "Chelsio T540-CR"},		/* 4 x 10G */
720	{0x5407,  "Chelsio T520-SO"},		/* 2 x 10G, nomem */
721	{0x5409,  "Chelsio T520-BT"},		/* 2 x 10GBaseT */
722	{0x540a,  "Chelsio T504-BT"},		/* 4 x 1G */
723	{0x540d,  "Chelsio T580-CR"},		/* 2 x 40G */
724	{0x540e,  "Chelsio T540-LP-CR"},	/* 4 x 10G */
725	{0x5410,  "Chelsio T580-LP-CR"},	/* 2 x 40G */
726	{0x5411,  "Chelsio T520-LL-CR"},	/* 2 x 10G */
727	{0x5412,  "Chelsio T560-CR"},		/* 1 x 40G, 2 x 10G */
728	{0x5414,  "Chelsio T580-LP-SO-CR"},	/* 2 x 40G, nomem */
729	{0x5415,  "Chelsio T502-BT"},		/* 2 x 1G */
730	{0x5418,  "Chelsio T540-BT"},		/* 4 x 10GBaseT */
731	{0x5419,  "Chelsio T540-LP-BT"},	/* 4 x 10GBaseT */
732	{0x541a,  "Chelsio T540-SO-BT"},	/* 4 x 10GBaseT, nomem */
733	{0x541b,  "Chelsio T540-SO-CR"},	/* 4 x 10G, nomem */
734
735	/* Custom */
736	{0x5483, "Custom T540-CR"},
737	{0x5484, "Custom T540-BT"},
738}, t6_pciids[] = {
739	{0xc006, "Chelsio Terminator 6 FPGA"},	/* T6 PE10K6 FPGA (PF0) */
740	{0x6400, "Chelsio T6-DBG-25"},		/* 2 x 10/25G, debug */
741	{0x6401, "Chelsio T6225-CR"},		/* 2 x 10/25G */
742	{0x6402, "Chelsio T6225-SO-CR"},	/* 2 x 10/25G, nomem */
743	{0x6403, "Chelsio T6425-CR"},		/* 4 x 10/25G */
744	{0x6404, "Chelsio T6425-SO-CR"},	/* 4 x 10/25G, nomem */
745	{0x6405, "Chelsio T6225-OCP-SO"},	/* 2 x 10/25G, nomem */
746	{0x6406, "Chelsio T62100-OCP-SO"},	/* 2 x 40/50/100G, nomem */
747	{0x6407, "Chelsio T62100-LP-CR"},	/* 2 x 40/50/100G */
748	{0x6408, "Chelsio T62100-SO-CR"},	/* 2 x 40/50/100G, nomem */
749	{0x6409, "Chelsio T6210-BT"},		/* 2 x 10GBASE-T */
750	{0x640d, "Chelsio T62100-CR"},		/* 2 x 40/50/100G */
751	{0x6410, "Chelsio T6-DBG-100"},		/* 2 x 40/50/100G, debug */
752	{0x6411, "Chelsio T6225-LL-CR"},	/* 2 x 10/25G */
753	{0x6414, "Chelsio T61100-OCP-SO"},	/* 1 x 40/50/100G, nomem */
754	{0x6415, "Chelsio T6201-BT"},		/* 2 x 1000BASE-T */
755
756	/* Custom */
757	{0x6480, "Custom T6225-CR"},
758	{0x6481, "Custom T62100-CR"},
759	{0x6482, "Custom T6225-CR"},
760	{0x6483, "Custom T62100-CR"},
761	{0x6484, "Custom T64100-CR"},
762	{0x6485, "Custom T6240-SO"},
763	{0x6486, "Custom T6225-SO-CR"},
764	{0x6487, "Custom T6225-CR"},
765};
766
767#ifdef TCP_OFFLOAD
768/*
769 * service_iq_fl() has an iq and needs the fl.  Offset of fl from the iq should
770 * be exactly the same for both rxq and ofld_rxq.
771 */
772CTASSERT(offsetof(struct sge_ofld_rxq, iq) == offsetof(struct sge_rxq, iq));
773CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl));
774#endif
775CTASSERT(sizeof(struct cluster_metadata) <= CL_METADATA_SIZE);
776
777static int
778t4_probe(device_t dev)
779{
780	int i;
781	uint16_t v = pci_get_vendor(dev);
782	uint16_t d = pci_get_device(dev);
783	uint8_t f = pci_get_function(dev);
784
785	if (v != PCI_VENDOR_ID_CHELSIO)
786		return (ENXIO);
787
788	/* Attach only to PF0 of the FPGA */
789	if (d == 0xa000 && f != 0)
790		return (ENXIO);
791
792	for (i = 0; i < nitems(t4_pciids); i++) {
793		if (d == t4_pciids[i].device) {
794			device_set_desc(dev, t4_pciids[i].desc);
795			return (BUS_PROBE_DEFAULT);
796		}
797	}
798
799	return (ENXIO);
800}
801
802static int
803t5_probe(device_t dev)
804{
805	int i;
806	uint16_t v = pci_get_vendor(dev);
807	uint16_t d = pci_get_device(dev);
808	uint8_t f = pci_get_function(dev);
809
810	if (v != PCI_VENDOR_ID_CHELSIO)
811		return (ENXIO);
812
813	/* Attach only to PF0 of the FPGA */
814	if (d == 0xb000 && f != 0)
815		return (ENXIO);
816
817	for (i = 0; i < nitems(t5_pciids); i++) {
818		if (d == t5_pciids[i].device) {
819			device_set_desc(dev, t5_pciids[i].desc);
820			return (BUS_PROBE_DEFAULT);
821		}
822	}
823
824	return (ENXIO);
825}
826
827static int
828t6_probe(device_t dev)
829{
830	int i;
831	uint16_t v = pci_get_vendor(dev);
832	uint16_t d = pci_get_device(dev);
833
834	if (v != PCI_VENDOR_ID_CHELSIO)
835		return (ENXIO);
836
837	for (i = 0; i < nitems(t6_pciids); i++) {
838		if (d == t6_pciids[i].device) {
839			device_set_desc(dev, t6_pciids[i].desc);
840			return (BUS_PROBE_DEFAULT);
841		}
842	}
843
844	return (ENXIO);
845}
846
847static void
848t5_attribute_workaround(device_t dev)
849{
850	device_t root_port;
851	uint32_t v;
852
853	/*
854	 * The T5 chips do not properly echo the No Snoop and Relaxed
855	 * Ordering attributes when replying to a TLP from a Root
856	 * Port.  As a workaround, find the parent Root Port and
857	 * disable No Snoop and Relaxed Ordering.  Note that this
858	 * affects all devices under this root port.
859	 */
860	root_port = pci_find_pcie_root_port(dev);
861	if (root_port == NULL) {
862		device_printf(dev, "Unable to find parent root port\n");
863		return;
864	}
865
866	v = pcie_adjust_config(root_port, PCIER_DEVICE_CTL,
867	    PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE, 0, 2);
868	if ((v & (PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE)) !=
869	    0)
870		device_printf(dev, "Disabled No Snoop/Relaxed Ordering on %s\n",
871		    device_get_nameunit(root_port));
872}
873
874static const struct devnames devnames[] = {
875	{
876		.nexus_name = "t4nex",
877		.ifnet_name = "cxgbe",
878		.vi_ifnet_name = "vcxgbe",
879		.pf03_drv_name = "t4iov",
880		.vf_nexus_name = "t4vf",
881		.vf_ifnet_name = "cxgbev"
882	}, {
883		.nexus_name = "t5nex",
884		.ifnet_name = "cxl",
885		.vi_ifnet_name = "vcxl",
886		.pf03_drv_name = "t5iov",
887		.vf_nexus_name = "t5vf",
888		.vf_ifnet_name = "cxlv"
889	}, {
890		.nexus_name = "t6nex",
891		.ifnet_name = "cc",
892		.vi_ifnet_name = "vcc",
893		.pf03_drv_name = "t6iov",
894		.vf_nexus_name = "t6vf",
895		.vf_ifnet_name = "ccv"
896	}
897};
898
899void
900t4_init_devnames(struct adapter *sc)
901{
902	int id;
903
904	id = chip_id(sc);
905	if (id >= CHELSIO_T4 && id - CHELSIO_T4 < nitems(devnames))
906		sc->names = &devnames[id - CHELSIO_T4];
907	else {
908		device_printf(sc->dev, "chip id %d is not supported.\n", id);
909		sc->names = NULL;
910	}
911}
912
913static int
914t4_ifnet_unit(struct adapter *sc, struct port_info *pi)
915{
916	const char *parent, *name;
917	long value;
918	int line, unit;
919
920	line = 0;
921	parent = device_get_nameunit(sc->dev);
922	name = sc->names->ifnet_name;
923	while (resource_find_dev(&line, name, &unit, "at", parent) == 0) {
924		if (resource_long_value(name, unit, "port", &value) == 0 &&
925		    value == pi->port_id)
926			return (unit);
927	}
928	return (-1);
929}
930
931static int
932t4_attach(device_t dev)
933{
934	struct adapter *sc;
935	int rc = 0, i, j, rqidx, tqidx, nports;
936	struct make_dev_args mda;
937	struct intrs_and_queues iaq;
938	struct sge *s;
939	uint32_t *buf;
940#ifdef TCP_OFFLOAD
941	int ofld_rqidx, ofld_tqidx;
942#endif
943#ifdef DEV_NETMAP
944	int nm_rqidx, nm_tqidx;
945#endif
946	int num_vis;
947
948	sc = device_get_softc(dev);
949	sc->dev = dev;
950	TUNABLE_INT_FETCH("hw.cxgbe.dflags", &sc->debug_flags);
951
952	if ((pci_get_device(dev) & 0xff00) == 0x5400)
953		t5_attribute_workaround(dev);
954	pci_enable_busmaster(dev);
955	if (pci_find_cap(dev, PCIY_EXPRESS, &i) == 0) {
956		uint32_t v;
957
958		pci_set_max_read_req(dev, 4096);
959		v = pci_read_config(dev, i + PCIER_DEVICE_CTL, 2);
960		sc->params.pci.mps = 128 << ((v & PCIEM_CTL_MAX_PAYLOAD) >> 5);
961		if (pcie_relaxed_ordering == 0 &&
962		    (v & PCIEM_CTL_RELAXED_ORD_ENABLE) != 0) {
963			v &= ~PCIEM_CTL_RELAXED_ORD_ENABLE;
964			pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2);
965		} else if (pcie_relaxed_ordering == 1 &&
966		    (v & PCIEM_CTL_RELAXED_ORD_ENABLE) == 0) {
967			v |= PCIEM_CTL_RELAXED_ORD_ENABLE;
968			pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2);
969		}
970	}
971
972	sc->sge_gts_reg = MYPF_REG(A_SGE_PF_GTS);
973	sc->sge_kdoorbell_reg = MYPF_REG(A_SGE_PF_KDOORBELL);
974	sc->traceq = -1;
975	mtx_init(&sc->ifp_lock, sc->ifp_lockname, 0, MTX_DEF);
976	snprintf(sc->ifp_lockname, sizeof(sc->ifp_lockname), "%s tracer",
977	    device_get_nameunit(dev));
978
979	snprintf(sc->lockname, sizeof(sc->lockname), "%s",
980	    device_get_nameunit(dev));
981	mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF);
982	t4_add_adapter(sc);
983
984	mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF);
985	TAILQ_INIT(&sc->sfl);
986	callout_init_mtx(&sc->sfl_callout, &sc->sfl_lock, 0);
987
988	mtx_init(&sc->reg_lock, "indirect register access", 0, MTX_DEF);
989
990	sc->policy = NULL;
991	rw_init(&sc->policy_lock, "connection offload policy");
992
993	rc = t4_map_bars_0_and_4(sc);
994	if (rc != 0)
995		goto done; /* error message displayed already */
996
997	memset(sc->chan_map, 0xff, sizeof(sc->chan_map));
998
999	/* Prepare the adapter for operation. */
1000	buf = malloc(PAGE_SIZE, M_CXGBE, M_ZERO | M_WAITOK);
1001	rc = -t4_prep_adapter(sc, buf);
1002	free(buf, M_CXGBE);
1003	if (rc != 0) {
1004		device_printf(dev, "failed to prepare adapter: %d.\n", rc);
1005		goto done;
1006	}
1007
1008	/*
1009	 * This is the real PF# to which we're attaching.  Works from within PCI
1010	 * passthrough environments too, where pci_get_function() could return a
1011	 * different PF# depending on the passthrough configuration.  We need to
1012	 * use the real PF# in all our communication with the firmware.
1013	 */
1014	j = t4_read_reg(sc, A_PL_WHOAMI);
1015	sc->pf = chip_id(sc) <= CHELSIO_T5 ? G_SOURCEPF(j) : G_T6_SOURCEPF(j);
1016	sc->mbox = sc->pf;
1017
1018	t4_init_devnames(sc);
1019	if (sc->names == NULL) {
1020		rc = ENOTSUP;
1021		goto done; /* error message displayed already */
1022	}
1023
1024	/*
1025	 * Do this really early, with the memory windows set up even before the
1026	 * character device.  The userland tool's register i/o and mem read
1027	 * will work even in "recovery mode".
1028	 */
1029	setup_memwin(sc);
1030	if (t4_init_devlog_params(sc, 0) == 0)
1031		fixup_devlog_params(sc);
1032	make_dev_args_init(&mda);
1033	mda.mda_devsw = &t4_cdevsw;
1034	mda.mda_uid = UID_ROOT;
1035	mda.mda_gid = GID_WHEEL;
1036	mda.mda_mode = 0600;
1037	mda.mda_si_drv1 = sc;
1038	rc = make_dev_s(&mda, &sc->cdev, "%s", device_get_nameunit(dev));
1039	if (rc != 0)
1040		device_printf(dev, "failed to create nexus char device: %d.\n",
1041		    rc);
1042
1043	/* Go no further if recovery mode has been requested. */
1044	if (TUNABLE_INT_FETCH("hw.cxgbe.sos", &i) && i != 0) {
1045		device_printf(dev, "recovery mode.\n");
1046		goto done;
1047	}
1048
1049#if defined(__i386__)
1050	if ((cpu_feature & CPUID_CX8) == 0) {
1051		device_printf(dev, "64 bit atomics not available.\n");
1052		rc = ENOTSUP;
1053		goto done;
1054	}
1055#endif
1056
1057	/* Contact the firmware and try to become the master driver. */
1058	rc = contact_firmware(sc);
1059	if (rc != 0)
1060		goto done; /* error message displayed already */
1061	MPASS(sc->flags & FW_OK);
1062
1063	rc = get_params__pre_init(sc);
1064	if (rc != 0)
1065		goto done; /* error message displayed already */
1066
1067	if (sc->flags & MASTER_PF) {
1068		rc = partition_resources(sc);
1069		if (rc != 0)
1070			goto done; /* error message displayed already */
1071	}
1072
1073	rc = get_params__post_init(sc);
1074	if (rc != 0)
1075		goto done; /* error message displayed already */
1076
1077	rc = set_params__post_init(sc);
1078	if (rc != 0)
1079		goto done; /* error message displayed already */
1080
1081	rc = t4_map_bar_2(sc);
1082	if (rc != 0)
1083		goto done; /* error message displayed already */
1084
1085	rc = t4_create_dma_tag(sc);
1086	if (rc != 0)
1087		goto done; /* error message displayed already */
1088
1089	/*
1090	 * First pass over all the ports - allocate VIs and initialize some
1091	 * basic parameters like mac address, port type, etc.
1092	 */
1093	for_each_port(sc, i) {
1094		struct port_info *pi;
1095
1096		pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK);
1097		sc->port[i] = pi;
1098
1099		/* These must be set before t4_port_init */
1100		pi->adapter = sc;
1101		pi->port_id = i;
1102		/*
1103		 * XXX: vi[0] is special so we can't delay this allocation until
1104		 * pi->nvi's final value is known.
1105		 */
1106		pi->vi = malloc(sizeof(struct vi_info) * t4_num_vis, M_CXGBE,
1107		    M_ZERO | M_WAITOK);
1108
1109		/*
1110		 * Allocate the "main" VI and initialize parameters
1111		 * like mac addr.
1112		 */
1113		rc = -t4_port_init(sc, sc->mbox, sc->pf, 0, i);
1114		if (rc != 0) {
1115			device_printf(dev, "unable to initialize port %d: %d\n",
1116			    i, rc);
1117			free(pi->vi, M_CXGBE);
1118			free(pi, M_CXGBE);
1119			sc->port[i] = NULL;
1120			goto done;
1121		}
1122
1123		snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d",
1124		    device_get_nameunit(dev), i);
1125		mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF);
1126		sc->chan_map[pi->tx_chan] = i;
1127
1128		/* All VIs on this port share this media. */
1129		ifmedia_init(&pi->media, IFM_IMASK, cxgbe_media_change,
1130		    cxgbe_media_status);
1131
1132		PORT_LOCK(pi);
1133		init_link_config(pi);
1134		fixup_link_config(pi);
1135		build_medialist(pi);
1136		if (fixed_ifmedia(pi))
1137			pi->flags |= FIXED_IFMEDIA;
1138		PORT_UNLOCK(pi);
1139
1140		pi->dev = device_add_child(dev, sc->names->ifnet_name,
1141		    t4_ifnet_unit(sc, pi));
1142		if (pi->dev == NULL) {
1143			device_printf(dev,
1144			    "failed to add device for port %d.\n", i);
1145			rc = ENXIO;
1146			goto done;
1147		}
1148		pi->vi[0].dev = pi->dev;
1149		device_set_softc(pi->dev, pi);
1150	}
1151
1152	/*
1153	 * Interrupt type, # of interrupts, # of rx/tx queues, etc.
1154	 */
1155	nports = sc->params.nports;
1156	rc = cfg_itype_and_nqueues(sc, &iaq);
1157	if (rc != 0)
1158		goto done; /* error message displayed already */
1159
1160	num_vis = iaq.num_vis;
1161	sc->intr_type = iaq.intr_type;
1162	sc->intr_count = iaq.nirq;
1163
1164	s = &sc->sge;
1165	s->nrxq = nports * iaq.nrxq;
1166	s->ntxq = nports * iaq.ntxq;
1167	if (num_vis > 1) {
1168		s->nrxq += nports * (num_vis - 1) * iaq.nrxq_vi;
1169		s->ntxq += nports * (num_vis - 1) * iaq.ntxq_vi;
1170	}
1171	s->neq = s->ntxq + s->nrxq;	/* the free list in an rxq is an eq */
1172	s->neq += nports;		/* ctrl queues: 1 per port */
1173	s->niq = s->nrxq + 1;		/* 1 extra for firmware event queue */
1174#ifdef TCP_OFFLOAD
1175	if (is_offload(sc)) {
1176		s->nofldrxq = nports * iaq.nofldrxq;
1177		s->nofldtxq = nports * iaq.nofldtxq;
1178		if (num_vis > 1) {
1179			s->nofldrxq += nports * (num_vis - 1) * iaq.nofldrxq_vi;
1180			s->nofldtxq += nports * (num_vis - 1) * iaq.nofldtxq_vi;
1181		}
1182		s->neq += s->nofldtxq + s->nofldrxq;
1183		s->niq += s->nofldrxq;
1184
1185		s->ofld_rxq = malloc(s->nofldrxq * sizeof(struct sge_ofld_rxq),
1186		    M_CXGBE, M_ZERO | M_WAITOK);
1187		s->ofld_txq = malloc(s->nofldtxq * sizeof(struct sge_wrq),
1188		    M_CXGBE, M_ZERO | M_WAITOK);
1189	}
1190#endif
1191#ifdef DEV_NETMAP
1192	if (num_vis > 1) {
1193		s->nnmrxq = nports * (num_vis - 1) * iaq.nnmrxq_vi;
1194		s->nnmtxq = nports * (num_vis - 1) * iaq.nnmtxq_vi;
1195	}
1196	s->neq += s->nnmtxq + s->nnmrxq;
1197	s->niq += s->nnmrxq;
1198
1199	s->nm_rxq = malloc(s->nnmrxq * sizeof(struct sge_nm_rxq),
1200	    M_CXGBE, M_ZERO | M_WAITOK);
1201	s->nm_txq = malloc(s->nnmtxq * sizeof(struct sge_nm_txq),
1202	    M_CXGBE, M_ZERO | M_WAITOK);
1203#endif
1204
1205	s->ctrlq = malloc(nports * sizeof(struct sge_wrq), M_CXGBE,
1206	    M_ZERO | M_WAITOK);
1207	s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE,
1208	    M_ZERO | M_WAITOK);
1209	s->txq = malloc(s->ntxq * sizeof(struct sge_txq), M_CXGBE,
1210	    M_ZERO | M_WAITOK);
1211	s->iqmap = malloc(s->niq * sizeof(struct sge_iq *), M_CXGBE,
1212	    M_ZERO | M_WAITOK);
1213	s->eqmap = malloc(s->neq * sizeof(struct sge_eq *), M_CXGBE,
1214	    M_ZERO | M_WAITOK);
1215
1216	sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE,
1217	    M_ZERO | M_WAITOK);
1218
1219	t4_init_l2t(sc, M_WAITOK);
1220	t4_init_smt(sc, M_WAITOK);
1221	t4_init_tx_sched(sc);
1222#ifdef INET6
1223	t4_init_clip_table(sc);
1224#endif
1225	if (sc->vres.key.size != 0)
1226		sc->key_map = vmem_create("T4TLS key map", sc->vres.key.start,
1227		    sc->vres.key.size, 32, 0, M_FIRSTFIT | M_WAITOK);
1228
1229	/*
1230	 * Second pass over the ports.  This time we know the number of rx and
1231	 * tx queues that each port should get.
1232	 */
1233	rqidx = tqidx = 0;
1234#ifdef TCP_OFFLOAD
1235	ofld_rqidx = ofld_tqidx = 0;
1236#endif
1237#ifdef DEV_NETMAP
1238	nm_rqidx = nm_tqidx = 0;
1239#endif
1240	for_each_port(sc, i) {
1241		struct port_info *pi = sc->port[i];
1242		struct vi_info *vi;
1243
1244		if (pi == NULL)
1245			continue;
1246
1247		pi->nvi = num_vis;
1248		for_each_vi(pi, j, vi) {
1249			vi->pi = pi;
1250			vi->qsize_rxq = t4_qsize_rxq;
1251			vi->qsize_txq = t4_qsize_txq;
1252
1253			vi->first_rxq = rqidx;
1254			vi->first_txq = tqidx;
1255			vi->tmr_idx = t4_tmr_idx;
1256			vi->pktc_idx = t4_pktc_idx;
1257			vi->nrxq = j == 0 ? iaq.nrxq : iaq.nrxq_vi;
1258			vi->ntxq = j == 0 ? iaq.ntxq : iaq.ntxq_vi;
1259
1260			rqidx += vi->nrxq;
1261			tqidx += vi->ntxq;
1262
1263			if (j == 0 && vi->ntxq > 1)
1264				vi->rsrv_noflowq = t4_rsrv_noflowq ? 1 : 0;
1265			else
1266				vi->rsrv_noflowq = 0;
1267
1268#ifdef TCP_OFFLOAD
1269			vi->ofld_tmr_idx = t4_tmr_idx_ofld;
1270			vi->ofld_pktc_idx = t4_pktc_idx_ofld;
1271			vi->first_ofld_rxq = ofld_rqidx;
1272			vi->first_ofld_txq = ofld_tqidx;
1273			vi->nofldrxq = j == 0 ? iaq.nofldrxq : iaq.nofldrxq_vi;
1274			vi->nofldtxq = j == 0 ? iaq.nofldtxq : iaq.nofldtxq_vi;
1275
1276			ofld_rqidx += vi->nofldrxq;
1277			ofld_tqidx += vi->nofldtxq;
1278#endif
1279#ifdef DEV_NETMAP
1280			if (j > 0) {
1281				vi->first_nm_rxq = nm_rqidx;
1282				vi->first_nm_txq = nm_tqidx;
1283				vi->nnmrxq = iaq.nnmrxq_vi;
1284				vi->nnmtxq = iaq.nnmtxq_vi;
1285				nm_rqidx += vi->nnmrxq;
1286				nm_tqidx += vi->nnmtxq;
1287			}
1288#endif
1289		}
1290	}
1291
1292	rc = t4_setup_intr_handlers(sc);
1293	if (rc != 0) {
1294		device_printf(dev,
1295		    "failed to setup interrupt handlers: %d\n", rc);
1296		goto done;
1297	}
1298
1299	rc = bus_generic_probe(dev);
1300	if (rc != 0) {
1301		device_printf(dev, "failed to probe child drivers: %d\n", rc);
1302		goto done;
1303	}
1304
1305	/*
1306	 * Ensure thread-safe mailbox access (in debug builds).
1307	 *
1308	 * So far this was the only thread accessing the mailbox but various
1309	 * ifnets and sysctls are about to be created and their handlers/ioctls
1310	 * will access the mailbox from different threads.
1311	 */
1312	sc->flags |= CHK_MBOX_ACCESS;
1313
1314	rc = bus_generic_attach(dev);
1315	if (rc != 0) {
1316		device_printf(dev,
1317		    "failed to attach all child ports: %d\n", rc);
1318		goto done;
1319	}
1320
1321	device_printf(dev,
1322	    "PCIe gen%d x%d, %d ports, %d %s interrupt%s, %d eq, %d iq\n",
1323	    sc->params.pci.speed, sc->params.pci.width, sc->params.nports,
1324	    sc->intr_count, sc->intr_type == INTR_MSIX ? "MSI-X" :
1325	    (sc->intr_type == INTR_MSI ? "MSI" : "INTx"),
1326	    sc->intr_count > 1 ? "s" : "", sc->sge.neq, sc->sge.niq);
1327
1328	t4_set_desc(sc);
1329
1330	notify_siblings(dev, 0);
1331
1332done:
1333	if (rc != 0 && sc->cdev) {
1334		/* cdev was created and so cxgbetool works; recover that way. */
1335		device_printf(dev,
1336		    "error during attach, adapter is now in recovery mode.\n");
1337		rc = 0;
1338	}
1339
1340	if (rc != 0)
1341		t4_detach_common(dev);
1342	else
1343		t4_sysctls(sc);
1344
1345	return (rc);
1346}
1347
1348static int
1349t4_child_location_str(device_t bus, device_t dev, char *buf, size_t buflen)
1350{
1351	struct adapter *sc;
1352	struct port_info *pi;
1353	int i;
1354
1355	sc = device_get_softc(bus);
1356	buf[0] = '\0';
1357	for_each_port(sc, i) {
1358		pi = sc->port[i];
1359		if (pi != NULL && pi->dev == dev) {
1360			snprintf(buf, buflen, "port=%d", pi->port_id);
1361			break;
1362		}
1363	}
1364	return (0);
1365}
1366
1367static int
1368t4_ready(device_t dev)
1369{
1370	struct adapter *sc;
1371
1372	sc = device_get_softc(dev);
1373	if (sc->flags & FW_OK)
1374		return (0);
1375	return (ENXIO);
1376}
1377
1378static int
1379t4_read_port_device(device_t dev, int port, device_t *child)
1380{
1381	struct adapter *sc;
1382	struct port_info *pi;
1383
1384	sc = device_get_softc(dev);
1385	if (port < 0 || port >= MAX_NPORTS)
1386		return (EINVAL);
1387	pi = sc->port[port];
1388	if (pi == NULL || pi->dev == NULL)
1389		return (ENXIO);
1390	*child = pi->dev;
1391	return (0);
1392}
1393
1394static int
1395notify_siblings(device_t dev, int detaching)
1396{
1397	device_t sibling;
1398	int error, i;
1399
1400	error = 0;
1401	for (i = 0; i < PCI_FUNCMAX; i++) {
1402		if (i == pci_get_function(dev))
1403			continue;
1404		sibling = pci_find_dbsf(pci_get_domain(dev), pci_get_bus(dev),
1405		    pci_get_slot(dev), i);
1406		if (sibling == NULL || !device_is_attached(sibling))
1407			continue;
1408		if (detaching)
1409			error = T4_DETACH_CHILD(sibling);
1410		else
1411			(void)T4_ATTACH_CHILD(sibling);
1412		if (error)
1413			break;
1414	}
1415	return (error);
1416}
1417
1418/*
1419 * Idempotent
1420 */
1421static int
1422t4_detach(device_t dev)
1423{
1424	struct adapter *sc;
1425	int rc;
1426
1427	sc = device_get_softc(dev);
1428
1429	rc = notify_siblings(dev, 1);
1430	if (rc) {
1431		device_printf(dev,
1432		    "failed to detach sibling devices: %d\n", rc);
1433		return (rc);
1434	}
1435
1436	return (t4_detach_common(dev));
1437}
1438
1439int
1440t4_detach_common(device_t dev)
1441{
1442	struct adapter *sc;
1443	struct port_info *pi;
1444	int i, rc;
1445
1446	sc = device_get_softc(dev);
1447
1448	if (sc->cdev) {
1449		destroy_dev(sc->cdev);
1450		sc->cdev = NULL;
1451	}
1452
1453	sc->flags &= ~CHK_MBOX_ACCESS;
1454	if (sc->flags & FULL_INIT_DONE) {
1455		if (!(sc->flags & IS_VF))
1456			t4_intr_disable(sc);
1457	}
1458
1459	if (device_is_attached(dev)) {
1460		rc = bus_generic_detach(dev);
1461		if (rc) {
1462			device_printf(dev,
1463			    "failed to detach child devices: %d\n", rc);
1464			return (rc);
1465		}
1466	}
1467
1468	for (i = 0; i < sc->intr_count; i++)
1469		t4_free_irq(sc, &sc->irq[i]);
1470
1471	if ((sc->flags & (IS_VF | FW_OK)) == FW_OK)
1472		t4_free_tx_sched(sc);
1473
1474	for (i = 0; i < MAX_NPORTS; i++) {
1475		pi = sc->port[i];
1476		if (pi) {
1477			t4_free_vi(sc, sc->mbox, sc->pf, 0, pi->vi[0].viid);
1478			if (pi->dev)
1479				device_delete_child(dev, pi->dev);
1480
1481			mtx_destroy(&pi->pi_lock);
1482			free(pi->vi, M_CXGBE);
1483			free(pi, M_CXGBE);
1484		}
1485	}
1486
1487	device_delete_children(dev);
1488
1489	if (sc->flags & FULL_INIT_DONE)
1490		adapter_full_uninit(sc);
1491
1492	if ((sc->flags & (IS_VF | FW_OK)) == FW_OK)
1493		t4_fw_bye(sc, sc->mbox);
1494
1495	if (sc->intr_type == INTR_MSI || sc->intr_type == INTR_MSIX)
1496		pci_release_msi(dev);
1497
1498	if (sc->regs_res)
1499		bus_release_resource(dev, SYS_RES_MEMORY, sc->regs_rid,
1500		    sc->regs_res);
1501
1502	if (sc->udbs_res)
1503		bus_release_resource(dev, SYS_RES_MEMORY, sc->udbs_rid,
1504		    sc->udbs_res);
1505
1506	if (sc->msix_res)
1507		bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_rid,
1508		    sc->msix_res);
1509
1510	if (sc->l2t)
1511		t4_free_l2t(sc->l2t);
1512	if (sc->key_map)
1513		vmem_destroy(sc->key_map);
1514	if (sc->smt)
1515		t4_free_smt(sc->smt);
1516#ifdef INET6
1517	t4_destroy_clip_table(sc);
1518#endif
1519
1520#ifdef TCP_OFFLOAD
1521	free(sc->sge.ofld_rxq, M_CXGBE);
1522	free(sc->sge.ofld_txq, M_CXGBE);
1523#endif
1524#ifdef DEV_NETMAP
1525	free(sc->sge.nm_rxq, M_CXGBE);
1526	free(sc->sge.nm_txq, M_CXGBE);
1527#endif
1528	free(sc->irq, M_CXGBE);
1529	free(sc->sge.rxq, M_CXGBE);
1530	free(sc->sge.txq, M_CXGBE);
1531	free(sc->sge.ctrlq, M_CXGBE);
1532	free(sc->sge.iqmap, M_CXGBE);
1533	free(sc->sge.eqmap, M_CXGBE);
1534	free(sc->tids.ftid_tab, M_CXGBE);
1535	free(sc->tids.hpftid_tab, M_CXGBE);
1536	free_hftid_hash(&sc->tids);
1537	free(sc->tids.atid_tab, M_CXGBE);
1538	free(sc->tids.tid_tab, M_CXGBE);
1539	free(sc->tt.tls_rx_ports, M_CXGBE);
1540	t4_destroy_dma_tag(sc);
1541	if (mtx_initialized(&sc->sc_lock)) {
1542		sx_xlock(&t4_list_lock);
1543		SLIST_REMOVE(&t4_list, sc, adapter, link);
1544		sx_xunlock(&t4_list_lock);
1545		mtx_destroy(&sc->sc_lock);
1546	}
1547
1548	callout_drain(&sc->sfl_callout);
1549	if (mtx_initialized(&sc->tids.ftid_lock)) {
1550		mtx_destroy(&sc->tids.ftid_lock);
1551		cv_destroy(&sc->tids.ftid_cv);
1552	}
1553	if (mtx_initialized(&sc->tids.atid_lock))
1554		mtx_destroy(&sc->tids.atid_lock);
1555	if (mtx_initialized(&sc->sfl_lock))
1556		mtx_destroy(&sc->sfl_lock);
1557	if (mtx_initialized(&sc->ifp_lock))
1558		mtx_destroy(&sc->ifp_lock);
1559	if (mtx_initialized(&sc->reg_lock))
1560		mtx_destroy(&sc->reg_lock);
1561
1562	if (rw_initialized(&sc->policy_lock)) {
1563		rw_destroy(&sc->policy_lock);
1564#ifdef TCP_OFFLOAD
1565		if (sc->policy != NULL)
1566			free_offload_policy(sc->policy);
1567#endif
1568	}
1569
1570	for (i = 0; i < NUM_MEMWIN; i++) {
1571		struct memwin *mw = &sc->memwin[i];
1572
1573		if (rw_initialized(&mw->mw_lock))
1574			rw_destroy(&mw->mw_lock);
1575	}
1576
1577	bzero(sc, sizeof(*sc));
1578
1579	return (0);
1580}
1581
1582static int
1583cxgbe_probe(device_t dev)
1584{
1585	char buf[128];
1586	struct port_info *pi = device_get_softc(dev);
1587
1588	snprintf(buf, sizeof(buf), "port %d", pi->port_id);
1589	device_set_desc_copy(dev, buf);
1590
1591	return (BUS_PROBE_DEFAULT);
1592}
1593
1594#define T4_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
1595    IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
1596    IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6 | IFCAP_HWSTATS)
1597#define T4_CAP_ENABLE (T4_CAP)
1598
1599static int
1600cxgbe_vi_attach(device_t dev, struct vi_info *vi)
1601{
1602	struct ifnet *ifp;
1603	struct sbuf *sb;
1604
1605	vi->xact_addr_filt = -1;
1606	callout_init(&vi->tick, 1);
1607
1608	/* Allocate an ifnet and set it up */
1609	ifp = if_alloc(IFT_ETHER);
1610	if (ifp == NULL) {
1611		device_printf(dev, "Cannot allocate ifnet\n");
1612		return (ENOMEM);
1613	}
1614	vi->ifp = ifp;
1615	ifp->if_softc = vi;
1616
1617	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1618	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1619
1620	ifp->if_init = cxgbe_init;
1621	ifp->if_ioctl = cxgbe_ioctl;
1622	ifp->if_transmit = cxgbe_transmit;
1623	ifp->if_qflush = cxgbe_qflush;
1624	ifp->if_get_counter = cxgbe_get_counter;
1625
1626	ifp->if_capabilities = T4_CAP;
1627#ifdef TCP_OFFLOAD
1628	if (vi->nofldrxq != 0)
1629		ifp->if_capabilities |= IFCAP_TOE;
1630#endif
1631	ifp->if_capenable = T4_CAP_ENABLE;
1632	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
1633	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
1634
1635	ifp->if_hw_tsomax = IP_MAXPACKET;
1636	ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_TSO;
1637#ifdef RATELIMIT
1638	if (is_ethoffload(vi->pi->adapter) && vi->nofldtxq != 0)
1639		ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_EO_TSO;
1640#endif
1641	ifp->if_hw_tsomaxsegsize = 65536;
1642
1643	ether_ifattach(ifp, vi->hw_addr);
1644#ifdef DEV_NETMAP
1645	if (vi->nnmrxq != 0)
1646		cxgbe_nm_attach(vi);
1647#endif
1648	sb = sbuf_new_auto();
1649	sbuf_printf(sb, "%d txq, %d rxq (NIC)", vi->ntxq, vi->nrxq);
1650#ifdef TCP_OFFLOAD
1651	if (ifp->if_capabilities & IFCAP_TOE)
1652		sbuf_printf(sb, "; %d txq, %d rxq (TOE)",
1653		    vi->nofldtxq, vi->nofldrxq);
1654#endif
1655#ifdef DEV_NETMAP
1656	if (ifp->if_capabilities & IFCAP_NETMAP)
1657		sbuf_printf(sb, "; %d txq, %d rxq (netmap)",
1658		    vi->nnmtxq, vi->nnmrxq);
1659#endif
1660	sbuf_finish(sb);
1661	device_printf(dev, "%s\n", sbuf_data(sb));
1662	sbuf_delete(sb);
1663
1664	vi_sysctls(vi);
1665
1666	return (0);
1667}
1668
1669static int
1670cxgbe_attach(device_t dev)
1671{
1672	struct port_info *pi = device_get_softc(dev);
1673	struct adapter *sc = pi->adapter;
1674	struct vi_info *vi;
1675	int i, rc;
1676
1677	callout_init_mtx(&pi->tick, &pi->pi_lock, 0);
1678
1679	rc = cxgbe_vi_attach(dev, &pi->vi[0]);
1680	if (rc)
1681		return (rc);
1682
1683	for_each_vi(pi, i, vi) {
1684		if (i == 0)
1685			continue;
1686		vi->dev = device_add_child(dev, sc->names->vi_ifnet_name, -1);
1687		if (vi->dev == NULL) {
1688			device_printf(dev, "failed to add VI %d\n", i);
1689			continue;
1690		}
1691		device_set_softc(vi->dev, vi);
1692	}
1693
1694	cxgbe_sysctls(pi);
1695
1696	bus_generic_attach(dev);
1697
1698	return (0);
1699}
1700
1701static void
1702cxgbe_vi_detach(struct vi_info *vi)
1703{
1704	struct ifnet *ifp = vi->ifp;
1705
1706	ether_ifdetach(ifp);
1707
1708	/* Let detach proceed even if these fail. */
1709#ifdef DEV_NETMAP
1710	if (ifp->if_capabilities & IFCAP_NETMAP)
1711		cxgbe_nm_detach(vi);
1712#endif
1713	cxgbe_uninit_synchronized(vi);
1714	callout_drain(&vi->tick);
1715	vi_full_uninit(vi);
1716
1717	if_free(vi->ifp);
1718	vi->ifp = NULL;
1719}
1720
1721static int
1722cxgbe_detach(device_t dev)
1723{
1724	struct port_info *pi = device_get_softc(dev);
1725	struct adapter *sc = pi->adapter;
1726	int rc;
1727
1728	/* Detach the extra VIs first. */
1729	rc = bus_generic_detach(dev);
1730	if (rc)
1731		return (rc);
1732	device_delete_children(dev);
1733
1734	doom_vi(sc, &pi->vi[0]);
1735
1736	if (pi->flags & HAS_TRACEQ) {
1737		sc->traceq = -1;	/* cloner should not create ifnet */
1738		t4_tracer_port_detach(sc);
1739	}
1740
1741	cxgbe_vi_detach(&pi->vi[0]);
1742	callout_drain(&pi->tick);
1743	ifmedia_removeall(&pi->media);
1744
1745	end_synchronized_op(sc, 0);
1746
1747	return (0);
1748}
1749
1750static void
1751cxgbe_init(void *arg)
1752{
1753	struct vi_info *vi = arg;
1754	struct adapter *sc = vi->pi->adapter;
1755
1756	if (begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4init") != 0)
1757		return;
1758	cxgbe_init_synchronized(vi);
1759	end_synchronized_op(sc, 0);
1760}
1761
1762static int
1763cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data)
1764{
1765	int rc = 0, mtu, can_sleep, if_flags, if_drv_flags, vi_if_flags;
1766	struct vi_info *vi = ifp->if_softc;
1767	struct port_info *pi = vi->pi;
1768	struct adapter *sc = pi->adapter;
1769	struct ifreq *ifr = (struct ifreq *)data;
1770	uint32_t mask;
1771
1772	switch (cmd) {
1773	case SIOCSIFMTU:
1774		mtu = ifr->ifr_mtu;
1775		if (mtu < ETHERMIN || mtu > MAX_MTU)
1776			return (EINVAL);
1777
1778		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4mtu");
1779		if (rc)
1780			return (rc);
1781		ifp->if_mtu = mtu;
1782		if (vi->flags & VI_INIT_DONE) {
1783			t4_update_fl_bufsize(ifp);
1784			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1785				rc = update_mac_settings(ifp, XGMAC_MTU);
1786		}
1787		end_synchronized_op(sc, 0);
1788		break;
1789
1790	case SIOCSIFFLAGS:
1791		/*
1792		 * Decide what to do, with the port lock held.
1793		 */
1794		PORT_LOCK(pi);
1795		if_flags = ifp->if_flags;
1796		if_drv_flags = ifp->if_drv_flags;
1797		vi_if_flags = vi->if_flags;
1798		if (if_flags & IFF_UP && if_drv_flags & IFF_DRV_RUNNING &&
1799		    (vi_if_flags ^ if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) {
1800			can_sleep = 0;
1801		} else {
1802			can_sleep = 1;
1803		}
1804		PORT_UNLOCK(pi);
1805
1806		/*
1807		 * ifp/vi flags may change here but we'll just do what our local
1808		 * copy of the flags indicates and then update the driver owned
1809		 * ifp/vi flags (in a synch-op and with the port lock held) to
1810		 * reflect what we did.
1811		 */
1812
1813		rc = begin_synchronized_op(sc, vi,
1814		    can_sleep ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4flg");
1815		if (rc) {
1816			if_printf(ifp, "%ssleepable synch operation failed: %d."
1817			    "  if_flags 0x%08x, if_drv_flags 0x%08x\n",
1818			    can_sleep ? "" : "non-", rc, if_flags,
1819			    if_drv_flags);
1820			return (rc);
1821		}
1822
1823		if (if_flags & IFF_UP) {
1824			if (if_drv_flags & IFF_DRV_RUNNING) {
1825				if ((if_flags ^ vi_if_flags) &
1826				    (IFF_PROMISC | IFF_ALLMULTI)) {
1827					MPASS(can_sleep == 0);
1828					rc = update_mac_settings(ifp,
1829					    XGMAC_PROMISC | XGMAC_ALLMULTI);
1830				}
1831			} else {
1832				MPASS(can_sleep == 1);
1833				rc = cxgbe_init_synchronized(vi);
1834			}
1835		} else if (if_drv_flags & IFF_DRV_RUNNING) {
1836			MPASS(can_sleep == 1);
1837			rc = cxgbe_uninit_synchronized(vi);
1838		}
1839		PORT_LOCK(pi);
1840		vi->if_flags = if_flags;
1841		PORT_UNLOCK(pi);
1842		end_synchronized_op(sc, can_sleep ? 0 : LOCK_HELD);
1843		break;
1844
1845	case SIOCADDMULTI:
1846	case SIOCDELMULTI: /* these two are called with a mutex held :-( */
1847		rc = begin_synchronized_op(sc, vi, HOLD_LOCK, "t4multi");
1848		if (rc)
1849			return (rc);
1850		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1851			rc = update_mac_settings(ifp, XGMAC_MCADDRS);
1852		end_synchronized_op(sc, LOCK_HELD);
1853		break;
1854
1855	case SIOCSIFCAP:
1856		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4cap");
1857		if (rc)
1858			return (rc);
1859
1860		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1861		if (mask & IFCAP_TXCSUM) {
1862			ifp->if_capenable ^= IFCAP_TXCSUM;
1863			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
1864
1865			if (IFCAP_TSO4 & ifp->if_capenable &&
1866			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1867				ifp->if_capenable &= ~IFCAP_TSO4;
1868				if_printf(ifp,
1869				    "tso4 disabled due to -txcsum.\n");
1870			}
1871		}
1872		if (mask & IFCAP_TXCSUM_IPV6) {
1873			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1874			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
1875
1876			if (IFCAP_TSO6 & ifp->if_capenable &&
1877			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1878				ifp->if_capenable &= ~IFCAP_TSO6;
1879				if_printf(ifp,
1880				    "tso6 disabled due to -txcsum6.\n");
1881			}
1882		}
1883		if (mask & IFCAP_RXCSUM)
1884			ifp->if_capenable ^= IFCAP_RXCSUM;
1885		if (mask & IFCAP_RXCSUM_IPV6)
1886			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1887
1888		/*
1889		 * Note that we leave CSUM_TSO alone (it is always set).  The
1890		 * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
1891		 * sending a TSO request our way, so it's sufficient to toggle
1892		 * IFCAP_TSOx only.
1893		 */
1894		if (mask & IFCAP_TSO4) {
1895			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
1896			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1897				if_printf(ifp, "enable txcsum first.\n");
1898				rc = EAGAIN;
1899				goto fail;
1900			}
1901			ifp->if_capenable ^= IFCAP_TSO4;
1902		}
1903		if (mask & IFCAP_TSO6) {
1904			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
1905			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1906				if_printf(ifp, "enable txcsum6 first.\n");
1907				rc = EAGAIN;
1908				goto fail;
1909			}
1910			ifp->if_capenable ^= IFCAP_TSO6;
1911		}
1912		if (mask & IFCAP_LRO) {
1913#if defined(INET) || defined(INET6)
1914			int i;
1915			struct sge_rxq *rxq;
1916
1917			ifp->if_capenable ^= IFCAP_LRO;
1918			for_each_rxq(vi, i, rxq) {
1919				if (ifp->if_capenable & IFCAP_LRO)
1920					rxq->iq.flags |= IQ_LRO_ENABLED;
1921				else
1922					rxq->iq.flags &= ~IQ_LRO_ENABLED;
1923			}
1924#endif
1925		}
1926#ifdef TCP_OFFLOAD
1927		if (mask & IFCAP_TOE) {
1928			int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE;
1929
1930			rc = toe_capability(vi, enable);
1931			if (rc != 0)
1932				goto fail;
1933
1934			ifp->if_capenable ^= mask;
1935		}
1936#endif
1937		if (mask & IFCAP_VLAN_HWTAGGING) {
1938			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1939			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1940				rc = update_mac_settings(ifp, XGMAC_VLANEX);
1941		}
1942		if (mask & IFCAP_VLAN_MTU) {
1943			ifp->if_capenable ^= IFCAP_VLAN_MTU;
1944
1945			/* Need to find out how to disable auto-mtu-inflation */
1946		}
1947		if (mask & IFCAP_VLAN_HWTSO)
1948			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1949		if (mask & IFCAP_VLAN_HWCSUM)
1950			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
1951
1952#ifdef VLAN_CAPABILITIES
1953		VLAN_CAPABILITIES(ifp);
1954#endif
1955fail:
1956		end_synchronized_op(sc, 0);
1957		break;
1958
1959	case SIOCSIFMEDIA:
1960	case SIOCGIFMEDIA:
1961	case SIOCGIFXMEDIA:
1962		ifmedia_ioctl(ifp, ifr, &pi->media, cmd);
1963		break;
1964
1965	case SIOCGI2C: {
1966		struct ifi2creq i2c;
1967
1968		rc = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
1969		if (rc != 0)
1970			break;
1971		if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) {
1972			rc = EPERM;
1973			break;
1974		}
1975		if (i2c.len > sizeof(i2c.data)) {
1976			rc = EINVAL;
1977			break;
1978		}
1979		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4i2c");
1980		if (rc)
1981			return (rc);
1982		rc = -t4_i2c_rd(sc, sc->mbox, pi->port_id, i2c.dev_addr,
1983		    i2c.offset, i2c.len, &i2c.data[0]);
1984		end_synchronized_op(sc, 0);
1985		if (rc == 0)
1986			rc = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c));
1987		break;
1988	}
1989
1990	default:
1991		rc = ether_ioctl(ifp, cmd, data);
1992	}
1993
1994	return (rc);
1995}
1996
1997static int
1998cxgbe_transmit(struct ifnet *ifp, struct mbuf *m)
1999{
2000	struct vi_info *vi = ifp->if_softc;
2001	struct port_info *pi = vi->pi;
2002	struct adapter *sc = pi->adapter;
2003	struct sge_txq *txq;
2004	void *items[1];
2005	int rc;
2006
2007	M_ASSERTPKTHDR(m);
2008	MPASS(m->m_nextpkt == NULL);	/* not quite ready for this yet */
2009
2010	if (__predict_false(pi->link_cfg.link_ok == false)) {
2011		m_freem(m);
2012		return (ENETDOWN);
2013	}
2014
2015	rc = parse_pkt(sc, &m);
2016	if (__predict_false(rc != 0)) {
2017		MPASS(m == NULL);			/* was freed already */
2018		atomic_add_int(&pi->tx_parse_error, 1);	/* rare, atomic is ok */
2019		return (rc);
2020	}
2021
2022	/* Select a txq. */
2023	txq = &sc->sge.txq[vi->first_txq];
2024	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
2025		txq += ((m->m_pkthdr.flowid % (vi->ntxq - vi->rsrv_noflowq)) +
2026		    vi->rsrv_noflowq);
2027
2028	items[0] = m;
2029	rc = mp_ring_enqueue(txq->r, items, 1, 4096);
2030	if (__predict_false(rc != 0))
2031		m_freem(m);
2032
2033	return (rc);
2034}
2035
2036static void
2037cxgbe_qflush(struct ifnet *ifp)
2038{
2039	struct vi_info *vi = ifp->if_softc;
2040	struct sge_txq *txq;
2041	int i;
2042
2043	/* queues do not exist if !VI_INIT_DONE. */
2044	if (vi->flags & VI_INIT_DONE) {
2045		for_each_txq(vi, i, txq) {
2046			TXQ_LOCK(txq);
2047			txq->eq.flags |= EQ_QFLUSH;
2048			TXQ_UNLOCK(txq);
2049			while (!mp_ring_is_idle(txq->r)) {
2050				mp_ring_check_drainage(txq->r, 0);
2051				pause("qflush", 1);
2052			}
2053			TXQ_LOCK(txq);
2054			txq->eq.flags &= ~EQ_QFLUSH;
2055			TXQ_UNLOCK(txq);
2056		}
2057	}
2058	if_qflush(ifp);
2059}
2060
2061static uint64_t
2062vi_get_counter(struct ifnet *ifp, ift_counter c)
2063{
2064	struct vi_info *vi = ifp->if_softc;
2065	struct fw_vi_stats_vf *s = &vi->stats;
2066
2067	vi_refresh_stats(vi->pi->adapter, vi);
2068
2069	switch (c) {
2070	case IFCOUNTER_IPACKETS:
2071		return (s->rx_bcast_frames + s->rx_mcast_frames +
2072		    s->rx_ucast_frames);
2073	case IFCOUNTER_IERRORS:
2074		return (s->rx_err_frames);
2075	case IFCOUNTER_OPACKETS:
2076		return (s->tx_bcast_frames + s->tx_mcast_frames +
2077		    s->tx_ucast_frames + s->tx_offload_frames);
2078	case IFCOUNTER_OERRORS:
2079		return (s->tx_drop_frames);
2080	case IFCOUNTER_IBYTES:
2081		return (s->rx_bcast_bytes + s->rx_mcast_bytes +
2082		    s->rx_ucast_bytes);
2083	case IFCOUNTER_OBYTES:
2084		return (s->tx_bcast_bytes + s->tx_mcast_bytes +
2085		    s->tx_ucast_bytes + s->tx_offload_bytes);
2086	case IFCOUNTER_IMCASTS:
2087		return (s->rx_mcast_frames);
2088	case IFCOUNTER_OMCASTS:
2089		return (s->tx_mcast_frames);
2090	case IFCOUNTER_OQDROPS: {
2091		uint64_t drops;
2092
2093		drops = 0;
2094		if (vi->flags & VI_INIT_DONE) {
2095			int i;
2096			struct sge_txq *txq;
2097
2098			for_each_txq(vi, i, txq)
2099				drops += counter_u64_fetch(txq->r->drops);
2100		}
2101
2102		return (drops);
2103
2104	}
2105
2106	default:
2107		return (if_get_counter_default(ifp, c));
2108	}
2109}
2110
2111uint64_t
2112cxgbe_get_counter(struct ifnet *ifp, ift_counter c)
2113{
2114	struct vi_info *vi = ifp->if_softc;
2115	struct port_info *pi = vi->pi;
2116	struct adapter *sc = pi->adapter;
2117	struct port_stats *s = &pi->stats;
2118
2119	if (pi->nvi > 1 || sc->flags & IS_VF)
2120		return (vi_get_counter(ifp, c));
2121
2122	cxgbe_refresh_stats(sc, pi);
2123
2124	switch (c) {
2125	case IFCOUNTER_IPACKETS:
2126		return (s->rx_frames);
2127
2128	case IFCOUNTER_IERRORS:
2129		return (s->rx_jabber + s->rx_runt + s->rx_too_long +
2130		    s->rx_fcs_err + s->rx_len_err);
2131
2132	case IFCOUNTER_OPACKETS:
2133		return (s->tx_frames);
2134
2135	case IFCOUNTER_OERRORS:
2136		return (s->tx_error_frames);
2137
2138	case IFCOUNTER_IBYTES:
2139		return (s->rx_octets);
2140
2141	case IFCOUNTER_OBYTES:
2142		return (s->tx_octets);
2143
2144	case IFCOUNTER_IMCASTS:
2145		return (s->rx_mcast_frames);
2146
2147	case IFCOUNTER_OMCASTS:
2148		return (s->tx_mcast_frames);
2149
2150	case IFCOUNTER_IQDROPS:
2151		return (s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 +
2152		    s->rx_ovflow3 + s->rx_trunc0 + s->rx_trunc1 + s->rx_trunc2 +
2153		    s->rx_trunc3 + pi->tnl_cong_drops);
2154
2155	case IFCOUNTER_OQDROPS: {
2156		uint64_t drops;
2157
2158		drops = s->tx_drop;
2159		if (vi->flags & VI_INIT_DONE) {
2160			int i;
2161			struct sge_txq *txq;
2162
2163			for_each_txq(vi, i, txq)
2164				drops += counter_u64_fetch(txq->r->drops);
2165		}
2166
2167		return (drops);
2168
2169	}
2170
2171	default:
2172		return (if_get_counter_default(ifp, c));
2173	}
2174}
2175
2176/*
2177 * The kernel picks a media from the list we had provided but we still validate
2178 * the requeste.
2179 */
2180int
2181cxgbe_media_change(struct ifnet *ifp)
2182{
2183	struct vi_info *vi = ifp->if_softc;
2184	struct port_info *pi = vi->pi;
2185	struct ifmedia *ifm = &pi->media;
2186	struct link_config *lc = &pi->link_cfg;
2187	struct adapter *sc = pi->adapter;
2188	int rc;
2189
2190	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4mec");
2191	if (rc != 0)
2192		return (rc);
2193	PORT_LOCK(pi);
2194	if (IFM_SUBTYPE(ifm->ifm_media) == IFM_AUTO) {
2195		/* ifconfig .. media autoselect */
2196		if (!(lc->supported & FW_PORT_CAP32_ANEG)) {
2197			rc = ENOTSUP; /* AN not supported by transceiver */
2198			goto done;
2199		}
2200		lc->requested_aneg = AUTONEG_ENABLE;
2201		lc->requested_speed = 0;
2202		lc->requested_fc |= PAUSE_AUTONEG;
2203	} else {
2204		lc->requested_aneg = AUTONEG_DISABLE;
2205		lc->requested_speed =
2206		    ifmedia_baudrate(ifm->ifm_media) / 1000000;
2207		lc->requested_fc = 0;
2208		if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_RXPAUSE)
2209			lc->requested_fc |= PAUSE_RX;
2210		if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_TXPAUSE)
2211			lc->requested_fc |= PAUSE_TX;
2212	}
2213	if (pi->up_vis > 0) {
2214		fixup_link_config(pi);
2215		rc = apply_link_config(pi);
2216	}
2217done:
2218	PORT_UNLOCK(pi);
2219	end_synchronized_op(sc, 0);
2220	return (rc);
2221}
2222
2223/*
2224 * Base media word (without ETHER, pause, link active, etc.) for the port at the
2225 * given speed.
2226 */
2227static int
2228port_mword(struct port_info *pi, uint32_t speed)
2229{
2230
2231	MPASS(speed & M_FW_PORT_CAP32_SPEED);
2232	MPASS(powerof2(speed));
2233
2234	switch(pi->port_type) {
2235	case FW_PORT_TYPE_BT_SGMII:
2236	case FW_PORT_TYPE_BT_XFI:
2237	case FW_PORT_TYPE_BT_XAUI:
2238		/* BaseT */
2239		switch (speed) {
2240		case FW_PORT_CAP32_SPEED_100M:
2241			return (IFM_100_T);
2242		case FW_PORT_CAP32_SPEED_1G:
2243			return (IFM_1000_T);
2244		case FW_PORT_CAP32_SPEED_10G:
2245			return (IFM_10G_T);
2246		}
2247		break;
2248	case FW_PORT_TYPE_KX4:
2249		if (speed == FW_PORT_CAP32_SPEED_10G)
2250			return (IFM_10G_KX4);
2251		break;
2252	case FW_PORT_TYPE_CX4:
2253		if (speed == FW_PORT_CAP32_SPEED_10G)
2254			return (IFM_10G_CX4);
2255		break;
2256	case FW_PORT_TYPE_KX:
2257		if (speed == FW_PORT_CAP32_SPEED_1G)
2258			return (IFM_1000_KX);
2259		break;
2260	case FW_PORT_TYPE_KR:
2261	case FW_PORT_TYPE_BP_AP:
2262	case FW_PORT_TYPE_BP4_AP:
2263	case FW_PORT_TYPE_BP40_BA:
2264	case FW_PORT_TYPE_KR4_100G:
2265	case FW_PORT_TYPE_KR_SFP28:
2266	case FW_PORT_TYPE_KR_XLAUI:
2267		switch (speed) {
2268		case FW_PORT_CAP32_SPEED_1G:
2269			return (IFM_1000_KX);
2270		case FW_PORT_CAP32_SPEED_10G:
2271			return (IFM_10G_KR);
2272		case FW_PORT_CAP32_SPEED_25G:
2273			return (IFM_25G_KR);
2274		case FW_PORT_CAP32_SPEED_40G:
2275			return (IFM_40G_KR4);
2276		case FW_PORT_CAP32_SPEED_50G:
2277			return (IFM_50G_KR2);
2278		case FW_PORT_CAP32_SPEED_100G:
2279			return (IFM_100G_KR4);
2280		}
2281		break;
2282	case FW_PORT_TYPE_FIBER_XFI:
2283	case FW_PORT_TYPE_FIBER_XAUI:
2284	case FW_PORT_TYPE_SFP:
2285	case FW_PORT_TYPE_QSFP_10G:
2286	case FW_PORT_TYPE_QSA:
2287	case FW_PORT_TYPE_QSFP:
2288	case FW_PORT_TYPE_CR4_QSFP:
2289	case FW_PORT_TYPE_CR_QSFP:
2290	case FW_PORT_TYPE_CR2_QSFP:
2291	case FW_PORT_TYPE_SFP28:
2292		/* Pluggable transceiver */
2293		switch (pi->mod_type) {
2294		case FW_PORT_MOD_TYPE_LR:
2295			switch (speed) {
2296			case FW_PORT_CAP32_SPEED_1G:
2297				return (IFM_1000_LX);
2298			case FW_PORT_CAP32_SPEED_10G:
2299				return (IFM_10G_LR);
2300			case FW_PORT_CAP32_SPEED_25G:
2301				return (IFM_25G_LR);
2302			case FW_PORT_CAP32_SPEED_40G:
2303				return (IFM_40G_LR4);
2304			case FW_PORT_CAP32_SPEED_50G:
2305				return (IFM_50G_LR2);
2306			case FW_PORT_CAP32_SPEED_100G:
2307				return (IFM_100G_LR4);
2308			}
2309			break;
2310		case FW_PORT_MOD_TYPE_SR:
2311			switch (speed) {
2312			case FW_PORT_CAP32_SPEED_1G:
2313				return (IFM_1000_SX);
2314			case FW_PORT_CAP32_SPEED_10G:
2315				return (IFM_10G_SR);
2316			case FW_PORT_CAP32_SPEED_25G:
2317				return (IFM_25G_SR);
2318			case FW_PORT_CAP32_SPEED_40G:
2319				return (IFM_40G_SR4);
2320			case FW_PORT_CAP32_SPEED_50G:
2321				return (IFM_50G_SR2);
2322			case FW_PORT_CAP32_SPEED_100G:
2323				return (IFM_100G_SR4);
2324			}
2325			break;
2326		case FW_PORT_MOD_TYPE_ER:
2327			if (speed == FW_PORT_CAP32_SPEED_10G)
2328				return (IFM_10G_ER);
2329			break;
2330		case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
2331		case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
2332			switch (speed) {
2333			case FW_PORT_CAP32_SPEED_1G:
2334				return (IFM_1000_CX);
2335			case FW_PORT_CAP32_SPEED_10G:
2336				return (IFM_10G_TWINAX);
2337			case FW_PORT_CAP32_SPEED_25G:
2338				return (IFM_25G_CR);
2339			case FW_PORT_CAP32_SPEED_40G:
2340				return (IFM_40G_CR4);
2341			case FW_PORT_CAP32_SPEED_50G:
2342				return (IFM_50G_CR2);
2343			case FW_PORT_CAP32_SPEED_100G:
2344				return (IFM_100G_CR4);
2345			}
2346			break;
2347		case FW_PORT_MOD_TYPE_LRM:
2348			if (speed == FW_PORT_CAP32_SPEED_10G)
2349				return (IFM_10G_LRM);
2350			break;
2351		case FW_PORT_MOD_TYPE_NA:
2352			MPASS(0);	/* Not pluggable? */
2353			/* fall throough */
2354		case FW_PORT_MOD_TYPE_ERROR:
2355		case FW_PORT_MOD_TYPE_UNKNOWN:
2356		case FW_PORT_MOD_TYPE_NOTSUPPORTED:
2357			break;
2358		case FW_PORT_MOD_TYPE_NONE:
2359			return (IFM_NONE);
2360		}
2361		break;
2362	case FW_PORT_TYPE_NONE:
2363		return (IFM_NONE);
2364	}
2365
2366	return (IFM_UNKNOWN);
2367}
2368
2369void
2370cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2371{
2372	struct vi_info *vi = ifp->if_softc;
2373	struct port_info *pi = vi->pi;
2374	struct adapter *sc = pi->adapter;
2375	struct link_config *lc = &pi->link_cfg;
2376
2377	if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4med") != 0)
2378		return;
2379	PORT_LOCK(pi);
2380
2381	if (pi->up_vis == 0) {
2382		/*
2383		 * If all the interfaces are administratively down the firmware
2384		 * does not report transceiver changes.  Refresh port info here
2385		 * so that ifconfig displays accurate ifmedia at all times.
2386		 * This is the only reason we have a synchronized op in this
2387		 * function.  Just PORT_LOCK would have been enough otherwise.
2388		 */
2389		t4_update_port_info(pi);
2390		build_medialist(pi);
2391	}
2392
2393	/* ifm_status */
2394	ifmr->ifm_status = IFM_AVALID;
2395	if (lc->link_ok == false)
2396		goto done;
2397	ifmr->ifm_status |= IFM_ACTIVE;
2398
2399	/* ifm_active */
2400	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2401	ifmr->ifm_active &= ~(IFM_ETH_TXPAUSE | IFM_ETH_RXPAUSE);
2402	if (lc->fc & PAUSE_RX)
2403		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
2404	if (lc->fc & PAUSE_TX)
2405		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
2406	ifmr->ifm_active |= port_mword(pi, speed_to_fwcap(lc->speed));
2407done:
2408	PORT_UNLOCK(pi);
2409	end_synchronized_op(sc, 0);
2410}
2411
2412static int
2413vcxgbe_probe(device_t dev)
2414{
2415	char buf[128];
2416	struct vi_info *vi = device_get_softc(dev);
2417
2418	snprintf(buf, sizeof(buf), "port %d vi %td", vi->pi->port_id,
2419	    vi - vi->pi->vi);
2420	device_set_desc_copy(dev, buf);
2421
2422	return (BUS_PROBE_DEFAULT);
2423}
2424
2425static int
2426alloc_extra_vi(struct adapter *sc, struct port_info *pi, struct vi_info *vi)
2427{
2428	int func, index, rc;
2429	uint32_t param, val;
2430
2431	ASSERT_SYNCHRONIZED_OP(sc);
2432
2433	index = vi - pi->vi;
2434	MPASS(index > 0);	/* This function deals with _extra_ VIs only */
2435	KASSERT(index < nitems(vi_mac_funcs),
2436	    ("%s: VI %s doesn't have a MAC func", __func__,
2437	    device_get_nameunit(vi->dev)));
2438	func = vi_mac_funcs[index];
2439	rc = t4_alloc_vi_func(sc, sc->mbox, pi->tx_chan, sc->pf, 0, 1,
2440	    vi->hw_addr, &vi->rss_size, func, 0);
2441	if (rc < 0) {
2442		device_printf(vi->dev, "failed to allocate virtual interface %d"
2443		    "for port %d: %d\n", index, pi->port_id, -rc);
2444		return (-rc);
2445	}
2446	vi->viid = rc;
2447	if (chip_id(sc) <= CHELSIO_T5)
2448		vi->smt_idx = (rc & 0x7f) << 1;
2449	else
2450		vi->smt_idx = (rc & 0x7f);
2451
2452	if (vi->rss_size == 1) {
2453		/*
2454		 * This VI didn't get a slice of the RSS table.  Reduce the
2455		 * number of VIs being created (hw.cxgbe.num_vis) or modify the
2456		 * configuration file (nvi, rssnvi for this PF) if this is a
2457		 * problem.
2458		 */
2459		device_printf(vi->dev, "RSS table not available.\n");
2460		vi->rss_base = 0xffff;
2461
2462		return (0);
2463	}
2464
2465	param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
2466	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_RSSINFO) |
2467	    V_FW_PARAMS_PARAM_YZ(vi->viid);
2468	rc = t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
2469	if (rc)
2470		vi->rss_base = 0xffff;
2471	else {
2472		MPASS((val >> 16) == vi->rss_size);
2473		vi->rss_base = val & 0xffff;
2474	}
2475
2476	return (0);
2477}
2478
2479static int
2480vcxgbe_attach(device_t dev)
2481{
2482	struct vi_info *vi;
2483	struct port_info *pi;
2484	struct adapter *sc;
2485	int rc;
2486
2487	vi = device_get_softc(dev);
2488	pi = vi->pi;
2489	sc = pi->adapter;
2490
2491	rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4via");
2492	if (rc)
2493		return (rc);
2494	rc = alloc_extra_vi(sc, pi, vi);
2495	end_synchronized_op(sc, 0);
2496	if (rc)
2497		return (rc);
2498
2499	rc = cxgbe_vi_attach(dev, vi);
2500	if (rc) {
2501		t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
2502		return (rc);
2503	}
2504	return (0);
2505}
2506
2507static int
2508vcxgbe_detach(device_t dev)
2509{
2510	struct vi_info *vi;
2511	struct adapter *sc;
2512
2513	vi = device_get_softc(dev);
2514	sc = vi->pi->adapter;
2515
2516	doom_vi(sc, vi);
2517
2518	cxgbe_vi_detach(vi);
2519	t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
2520
2521	end_synchronized_op(sc, 0);
2522
2523	return (0);
2524}
2525
2526void
2527t4_fatal_err(struct adapter *sc)
2528{
2529	t4_set_reg_field(sc, A_SGE_CONTROL, F_GLOBALENABLE, 0);
2530	t4_intr_disable(sc);
2531	log(LOG_EMERG, "%s: encountered fatal error, adapter stopped.\n",
2532	    device_get_nameunit(sc->dev));
2533	if (t4_panic_on_fatal_err)
2534		panic("panic requested on fatal error");
2535}
2536
2537void
2538t4_add_adapter(struct adapter *sc)
2539{
2540	sx_xlock(&t4_list_lock);
2541	SLIST_INSERT_HEAD(&t4_list, sc, link);
2542	sx_xunlock(&t4_list_lock);
2543}
2544
2545int
2546t4_map_bars_0_and_4(struct adapter *sc)
2547{
2548	sc->regs_rid = PCIR_BAR(0);
2549	sc->regs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2550	    &sc->regs_rid, RF_ACTIVE);
2551	if (sc->regs_res == NULL) {
2552		device_printf(sc->dev, "cannot map registers.\n");
2553		return (ENXIO);
2554	}
2555	sc->bt = rman_get_bustag(sc->regs_res);
2556	sc->bh = rman_get_bushandle(sc->regs_res);
2557	sc->mmio_len = rman_get_size(sc->regs_res);
2558	setbit(&sc->doorbells, DOORBELL_KDB);
2559
2560	sc->msix_rid = PCIR_BAR(4);
2561	sc->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2562	    &sc->msix_rid, RF_ACTIVE);
2563	if (sc->msix_res == NULL) {
2564		device_printf(sc->dev, "cannot map MSI-X BAR.\n");
2565		return (ENXIO);
2566	}
2567
2568	return (0);
2569}
2570
2571int
2572t4_map_bar_2(struct adapter *sc)
2573{
2574
2575	/*
2576	 * T4: only iWARP driver uses the userspace doorbells.  There is no need
2577	 * to map it if RDMA is disabled.
2578	 */
2579	if (is_t4(sc) && sc->rdmacaps == 0)
2580		return (0);
2581
2582	sc->udbs_rid = PCIR_BAR(2);
2583	sc->udbs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2584	    &sc->udbs_rid, RF_ACTIVE);
2585	if (sc->udbs_res == NULL) {
2586		device_printf(sc->dev, "cannot map doorbell BAR.\n");
2587		return (ENXIO);
2588	}
2589	sc->udbs_base = rman_get_virtual(sc->udbs_res);
2590
2591	if (chip_id(sc) >= CHELSIO_T5) {
2592		setbit(&sc->doorbells, DOORBELL_UDB);
2593#if defined(__i386__) || defined(__amd64__)
2594		if (t5_write_combine) {
2595			int rc, mode;
2596
2597			/*
2598			 * Enable write combining on BAR2.  This is the
2599			 * userspace doorbell BAR and is split into 128B
2600			 * (UDBS_SEG_SIZE) doorbell regions, each associated
2601			 * with an egress queue.  The first 64B has the doorbell
2602			 * and the second 64B can be used to submit a tx work
2603			 * request with an implicit doorbell.
2604			 */
2605
2606			rc = pmap_change_attr((vm_offset_t)sc->udbs_base,
2607			    rman_get_size(sc->udbs_res), PAT_WRITE_COMBINING);
2608			if (rc == 0) {
2609				clrbit(&sc->doorbells, DOORBELL_UDB);
2610				setbit(&sc->doorbells, DOORBELL_WCWR);
2611				setbit(&sc->doorbells, DOORBELL_UDBWC);
2612			} else {
2613				device_printf(sc->dev,
2614				    "couldn't enable write combining: %d\n",
2615				    rc);
2616			}
2617
2618			mode = is_t5(sc) ? V_STATMODE(0) : V_T6_STATMODE(0);
2619			t4_write_reg(sc, A_SGE_STAT_CFG,
2620			    V_STATSOURCE_T5(7) | mode);
2621		}
2622#endif
2623	}
2624	sc->iwt.wc_en = isset(&sc->doorbells, DOORBELL_UDBWC) ? 1 : 0;
2625
2626	return (0);
2627}
2628
2629struct memwin_init {
2630	uint32_t base;
2631	uint32_t aperture;
2632};
2633
2634static const struct memwin_init t4_memwin[NUM_MEMWIN] = {
2635	{ MEMWIN0_BASE, MEMWIN0_APERTURE },
2636	{ MEMWIN1_BASE, MEMWIN1_APERTURE },
2637	{ MEMWIN2_BASE_T4, MEMWIN2_APERTURE_T4 }
2638};
2639
2640static const struct memwin_init t5_memwin[NUM_MEMWIN] = {
2641	{ MEMWIN0_BASE, MEMWIN0_APERTURE },
2642	{ MEMWIN1_BASE, MEMWIN1_APERTURE },
2643	{ MEMWIN2_BASE_T5, MEMWIN2_APERTURE_T5 },
2644};
2645
2646static void
2647setup_memwin(struct adapter *sc)
2648{
2649	const struct memwin_init *mw_init;
2650	struct memwin *mw;
2651	int i;
2652	uint32_t bar0;
2653
2654	if (is_t4(sc)) {
2655		/*
2656		 * Read low 32b of bar0 indirectly via the hardware backdoor
2657		 * mechanism.  Works from within PCI passthrough environments
2658		 * too, where rman_get_start() can return a different value.  We
2659		 * need to program the T4 memory window decoders with the actual
2660		 * addresses that will be coming across the PCIe link.
2661		 */
2662		bar0 = t4_hw_pci_read_cfg4(sc, PCIR_BAR(0));
2663		bar0 &= (uint32_t) PCIM_BAR_MEM_BASE;
2664
2665		mw_init = &t4_memwin[0];
2666	} else {
2667		/* T5+ use the relative offset inside the PCIe BAR */
2668		bar0 = 0;
2669
2670		mw_init = &t5_memwin[0];
2671	}
2672
2673	for (i = 0, mw = &sc->memwin[0]; i < NUM_MEMWIN; i++, mw_init++, mw++) {
2674		rw_init(&mw->mw_lock, "memory window access");
2675		mw->mw_base = mw_init->base;
2676		mw->mw_aperture = mw_init->aperture;
2677		mw->mw_curpos = 0;
2678		t4_write_reg(sc,
2679		    PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, i),
2680		    (mw->mw_base + bar0) | V_BIR(0) |
2681		    V_WINDOW(ilog2(mw->mw_aperture) - 10));
2682		rw_wlock(&mw->mw_lock);
2683		position_memwin(sc, i, 0);
2684		rw_wunlock(&mw->mw_lock);
2685	}
2686
2687	/* flush */
2688	t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 2));
2689}
2690
2691/*
2692 * Positions the memory window at the given address in the card's address space.
2693 * There are some alignment requirements and the actual position may be at an
2694 * address prior to the requested address.  mw->mw_curpos always has the actual
2695 * position of the window.
2696 */
2697static void
2698position_memwin(struct adapter *sc, int idx, uint32_t addr)
2699{
2700	struct memwin *mw;
2701	uint32_t pf;
2702	uint32_t reg;
2703
2704	MPASS(idx >= 0 && idx < NUM_MEMWIN);
2705	mw = &sc->memwin[idx];
2706	rw_assert(&mw->mw_lock, RA_WLOCKED);
2707
2708	if (is_t4(sc)) {
2709		pf = 0;
2710		mw->mw_curpos = addr & ~0xf;	/* start must be 16B aligned */
2711	} else {
2712		pf = V_PFNUM(sc->pf);
2713		mw->mw_curpos = addr & ~0x7f;	/* start must be 128B aligned */
2714	}
2715	reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, idx);
2716	t4_write_reg(sc, reg, mw->mw_curpos | pf);
2717	t4_read_reg(sc, reg);	/* flush */
2718}
2719
2720int
2721rw_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val,
2722    int len, int rw)
2723{
2724	struct memwin *mw;
2725	uint32_t mw_end, v;
2726
2727	MPASS(idx >= 0 && idx < NUM_MEMWIN);
2728
2729	/* Memory can only be accessed in naturally aligned 4 byte units */
2730	if (addr & 3 || len & 3 || len <= 0)
2731		return (EINVAL);
2732
2733	mw = &sc->memwin[idx];
2734	while (len > 0) {
2735		rw_rlock(&mw->mw_lock);
2736		mw_end = mw->mw_curpos + mw->mw_aperture;
2737		if (addr >= mw_end || addr < mw->mw_curpos) {
2738			/* Will need to reposition the window */
2739			if (!rw_try_upgrade(&mw->mw_lock)) {
2740				rw_runlock(&mw->mw_lock);
2741				rw_wlock(&mw->mw_lock);
2742			}
2743			rw_assert(&mw->mw_lock, RA_WLOCKED);
2744			position_memwin(sc, idx, addr);
2745			rw_downgrade(&mw->mw_lock);
2746			mw_end = mw->mw_curpos + mw->mw_aperture;
2747		}
2748		rw_assert(&mw->mw_lock, RA_RLOCKED);
2749		while (addr < mw_end && len > 0) {
2750			if (rw == 0) {
2751				v = t4_read_reg(sc, mw->mw_base + addr -
2752				    mw->mw_curpos);
2753				*val++ = le32toh(v);
2754			} else {
2755				v = *val++;
2756				t4_write_reg(sc, mw->mw_base + addr -
2757				    mw->mw_curpos, htole32(v));
2758			}
2759			addr += 4;
2760			len -= 4;
2761		}
2762		rw_runlock(&mw->mw_lock);
2763	}
2764
2765	return (0);
2766}
2767
2768int
2769alloc_atid_tab(struct tid_info *t, int flags)
2770{
2771	int i;
2772
2773	MPASS(t->natids > 0);
2774	MPASS(t->atid_tab == NULL);
2775
2776	t->atid_tab = malloc(t->natids * sizeof(*t->atid_tab), M_CXGBE,
2777	    M_ZERO | flags);
2778	if (t->atid_tab == NULL)
2779		return (ENOMEM);
2780	mtx_init(&t->atid_lock, "atid lock", NULL, MTX_DEF);
2781	t->afree = t->atid_tab;
2782	t->atids_in_use = 0;
2783	for (i = 1; i < t->natids; i++)
2784		t->atid_tab[i - 1].next = &t->atid_tab[i];
2785	t->atid_tab[t->natids - 1].next = NULL;
2786
2787	return (0);
2788}
2789
2790void
2791free_atid_tab(struct tid_info *t)
2792{
2793
2794	KASSERT(t->atids_in_use == 0,
2795	    ("%s: %d atids still in use.", __func__, t->atids_in_use));
2796
2797	if (mtx_initialized(&t->atid_lock))
2798		mtx_destroy(&t->atid_lock);
2799	free(t->atid_tab, M_CXGBE);
2800	t->atid_tab = NULL;
2801}
2802
2803int
2804alloc_atid(struct adapter *sc, void *ctx)
2805{
2806	struct tid_info *t = &sc->tids;
2807	int atid = -1;
2808
2809	mtx_lock(&t->atid_lock);
2810	if (t->afree) {
2811		union aopen_entry *p = t->afree;
2812
2813		atid = p - t->atid_tab;
2814		MPASS(atid <= M_TID_TID);
2815		t->afree = p->next;
2816		p->data = ctx;
2817		t->atids_in_use++;
2818	}
2819	mtx_unlock(&t->atid_lock);
2820	return (atid);
2821}
2822
2823void *
2824lookup_atid(struct adapter *sc, int atid)
2825{
2826	struct tid_info *t = &sc->tids;
2827
2828	return (t->atid_tab[atid].data);
2829}
2830
2831void
2832free_atid(struct adapter *sc, int atid)
2833{
2834	struct tid_info *t = &sc->tids;
2835	union aopen_entry *p = &t->atid_tab[atid];
2836
2837	mtx_lock(&t->atid_lock);
2838	p->next = t->afree;
2839	t->afree = p;
2840	t->atids_in_use--;
2841	mtx_unlock(&t->atid_lock);
2842}
2843
2844static void
2845queue_tid_release(struct adapter *sc, int tid)
2846{
2847
2848	CXGBE_UNIMPLEMENTED("deferred tid release");
2849}
2850
2851void
2852release_tid(struct adapter *sc, int tid, struct sge_wrq *ctrlq)
2853{
2854	struct wrqe *wr;
2855	struct cpl_tid_release *req;
2856
2857	wr = alloc_wrqe(sizeof(*req), ctrlq);
2858	if (wr == NULL) {
2859		queue_tid_release(sc, tid);	/* defer */
2860		return;
2861	}
2862	req = wrtod(wr);
2863
2864	INIT_TP_WR_MIT_CPL(req, CPL_TID_RELEASE, tid);
2865
2866	t4_wrq_tx(sc, wr);
2867}
2868
2869static int
2870t4_range_cmp(const void *a, const void *b)
2871{
2872	return ((const struct t4_range *)a)->start -
2873	       ((const struct t4_range *)b)->start;
2874}
2875
2876/*
2877 * Verify that the memory range specified by the addr/len pair is valid within
2878 * the card's address space.
2879 */
2880static int
2881validate_mem_range(struct adapter *sc, uint32_t addr, uint32_t len)
2882{
2883	struct t4_range mem_ranges[4], *r, *next;
2884	uint32_t em, addr_len;
2885	int i, n, remaining;
2886
2887	/* Memory can only be accessed in naturally aligned 4 byte units */
2888	if (addr & 3 || len & 3 || len == 0)
2889		return (EINVAL);
2890
2891	/* Enabled memories */
2892	em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
2893
2894	r = &mem_ranges[0];
2895	n = 0;
2896	bzero(r, sizeof(mem_ranges));
2897	if (em & F_EDRAM0_ENABLE) {
2898		addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR);
2899		r->size = G_EDRAM0_SIZE(addr_len) << 20;
2900		if (r->size > 0) {
2901			r->start = G_EDRAM0_BASE(addr_len) << 20;
2902			if (addr >= r->start &&
2903			    addr + len <= r->start + r->size)
2904				return (0);
2905			r++;
2906			n++;
2907		}
2908	}
2909	if (em & F_EDRAM1_ENABLE) {
2910		addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR);
2911		r->size = G_EDRAM1_SIZE(addr_len) << 20;
2912		if (r->size > 0) {
2913			r->start = G_EDRAM1_BASE(addr_len) << 20;
2914			if (addr >= r->start &&
2915			    addr + len <= r->start + r->size)
2916				return (0);
2917			r++;
2918			n++;
2919		}
2920	}
2921	if (em & F_EXT_MEM_ENABLE) {
2922		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
2923		r->size = G_EXT_MEM_SIZE(addr_len) << 20;
2924		if (r->size > 0) {
2925			r->start = G_EXT_MEM_BASE(addr_len) << 20;
2926			if (addr >= r->start &&
2927			    addr + len <= r->start + r->size)
2928				return (0);
2929			r++;
2930			n++;
2931		}
2932	}
2933	if (is_t5(sc) && em & F_EXT_MEM1_ENABLE) {
2934		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
2935		r->size = G_EXT_MEM1_SIZE(addr_len) << 20;
2936		if (r->size > 0) {
2937			r->start = G_EXT_MEM1_BASE(addr_len) << 20;
2938			if (addr >= r->start &&
2939			    addr + len <= r->start + r->size)
2940				return (0);
2941			r++;
2942			n++;
2943		}
2944	}
2945	MPASS(n <= nitems(mem_ranges));
2946
2947	if (n > 1) {
2948		/* Sort and merge the ranges. */
2949		qsort(mem_ranges, n, sizeof(struct t4_range), t4_range_cmp);
2950
2951		/* Start from index 0 and examine the next n - 1 entries. */
2952		r = &mem_ranges[0];
2953		for (remaining = n - 1; remaining > 0; remaining--, r++) {
2954
2955			MPASS(r->size > 0);	/* r is a valid entry. */
2956			next = r + 1;
2957			MPASS(next->size > 0);	/* and so is the next one. */
2958
2959			while (r->start + r->size >= next->start) {
2960				/* Merge the next one into the current entry. */
2961				r->size = max(r->start + r->size,
2962				    next->start + next->size) - r->start;
2963				n--;	/* One fewer entry in total. */
2964				if (--remaining == 0)
2965					goto done;	/* short circuit */
2966				next++;
2967			}
2968			if (next != r + 1) {
2969				/*
2970				 * Some entries were merged into r and next
2971				 * points to the first valid entry that couldn't
2972				 * be merged.
2973				 */
2974				MPASS(next->size > 0);	/* must be valid */
2975				memcpy(r + 1, next, remaining * sizeof(*r));
2976#ifdef INVARIANTS
2977				/*
2978				 * This so that the foo->size assertion in the
2979				 * next iteration of the loop do the right
2980				 * thing for entries that were pulled up and are
2981				 * no longer valid.
2982				 */
2983				MPASS(n < nitems(mem_ranges));
2984				bzero(&mem_ranges[n], (nitems(mem_ranges) - n) *
2985				    sizeof(struct t4_range));
2986#endif
2987			}
2988		}
2989done:
2990		/* Done merging the ranges. */
2991		MPASS(n > 0);
2992		r = &mem_ranges[0];
2993		for (i = 0; i < n; i++, r++) {
2994			if (addr >= r->start &&
2995			    addr + len <= r->start + r->size)
2996				return (0);
2997		}
2998	}
2999
3000	return (EFAULT);
3001}
3002
3003static int
3004fwmtype_to_hwmtype(int mtype)
3005{
3006
3007	switch (mtype) {
3008	case FW_MEMTYPE_EDC0:
3009		return (MEM_EDC0);
3010	case FW_MEMTYPE_EDC1:
3011		return (MEM_EDC1);
3012	case FW_MEMTYPE_EXTMEM:
3013		return (MEM_MC0);
3014	case FW_MEMTYPE_EXTMEM1:
3015		return (MEM_MC1);
3016	default:
3017		panic("%s: cannot translate fw mtype %d.", __func__, mtype);
3018	}
3019}
3020
3021/*
3022 * Verify that the memory range specified by the memtype/offset/len pair is
3023 * valid and lies entirely within the memtype specified.  The global address of
3024 * the start of the range is returned in addr.
3025 */
3026static int
3027validate_mt_off_len(struct adapter *sc, int mtype, uint32_t off, uint32_t len,
3028    uint32_t *addr)
3029{
3030	uint32_t em, addr_len, maddr;
3031
3032	/* Memory can only be accessed in naturally aligned 4 byte units */
3033	if (off & 3 || len & 3 || len == 0)
3034		return (EINVAL);
3035
3036	em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
3037	switch (fwmtype_to_hwmtype(mtype)) {
3038	case MEM_EDC0:
3039		if (!(em & F_EDRAM0_ENABLE))
3040			return (EINVAL);
3041		addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR);
3042		maddr = G_EDRAM0_BASE(addr_len) << 20;
3043		break;
3044	case MEM_EDC1:
3045		if (!(em & F_EDRAM1_ENABLE))
3046			return (EINVAL);
3047		addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR);
3048		maddr = G_EDRAM1_BASE(addr_len) << 20;
3049		break;
3050	case MEM_MC:
3051		if (!(em & F_EXT_MEM_ENABLE))
3052			return (EINVAL);
3053		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
3054		maddr = G_EXT_MEM_BASE(addr_len) << 20;
3055		break;
3056	case MEM_MC1:
3057		if (!is_t5(sc) || !(em & F_EXT_MEM1_ENABLE))
3058			return (EINVAL);
3059		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
3060		maddr = G_EXT_MEM1_BASE(addr_len) << 20;
3061		break;
3062	default:
3063		return (EINVAL);
3064	}
3065
3066	*addr = maddr + off;	/* global address */
3067	return (validate_mem_range(sc, *addr, len));
3068}
3069
3070static int
3071fixup_devlog_params(struct adapter *sc)
3072{
3073	struct devlog_params *dparams = &sc->params.devlog;
3074	int rc;
3075
3076	rc = validate_mt_off_len(sc, dparams->memtype, dparams->start,
3077	    dparams->size, &dparams->addr);
3078
3079	return (rc);
3080}
3081
3082static void
3083update_nirq(struct intrs_and_queues *iaq, int nports)
3084{
3085	int extra = T4_EXTRA_INTR;
3086
3087	iaq->nirq = extra;
3088	iaq->nirq += nports * (iaq->nrxq + iaq->nofldrxq);
3089	iaq->nirq += nports * (iaq->num_vis - 1) *
3090	    max(iaq->nrxq_vi, iaq->nnmrxq_vi);
3091	iaq->nirq += nports * (iaq->num_vis - 1) * iaq->nofldrxq_vi;
3092}
3093
3094/*
3095 * Adjust requirements to fit the number of interrupts available.
3096 */
3097static void
3098calculate_iaq(struct adapter *sc, struct intrs_and_queues *iaq, int itype,
3099    int navail)
3100{
3101	int old_nirq;
3102	const int nports = sc->params.nports;
3103
3104	MPASS(nports > 0);
3105	MPASS(navail > 0);
3106
3107	bzero(iaq, sizeof(*iaq));
3108	iaq->intr_type = itype;
3109	iaq->num_vis = t4_num_vis;
3110	iaq->ntxq = t4_ntxq;
3111	iaq->ntxq_vi = t4_ntxq_vi;
3112	iaq->nrxq = t4_nrxq;
3113	iaq->nrxq_vi = t4_nrxq_vi;
3114#ifdef TCP_OFFLOAD
3115	if (is_offload(sc)) {
3116		iaq->nofldtxq = t4_nofldtxq;
3117		iaq->nofldtxq_vi = t4_nofldtxq_vi;
3118		iaq->nofldrxq = t4_nofldrxq;
3119		iaq->nofldrxq_vi = t4_nofldrxq_vi;
3120	}
3121#endif
3122#ifdef DEV_NETMAP
3123	iaq->nnmtxq_vi = t4_nnmtxq_vi;
3124	iaq->nnmrxq_vi = t4_nnmrxq_vi;
3125#endif
3126
3127	update_nirq(iaq, nports);
3128	if (iaq->nirq <= navail &&
3129	    (itype != INTR_MSI || powerof2(iaq->nirq))) {
3130		/*
3131		 * This is the normal case -- there are enough interrupts for
3132		 * everything.
3133		 */
3134		goto done;
3135	}
3136
3137	/*
3138	 * If extra VIs have been configured try reducing their count and see if
3139	 * that works.
3140	 */
3141	while (iaq->num_vis > 1) {
3142		iaq->num_vis--;
3143		update_nirq(iaq, nports);
3144		if (iaq->nirq <= navail &&
3145		    (itype != INTR_MSI || powerof2(iaq->nirq))) {
3146			device_printf(sc->dev, "virtual interfaces per port "
3147			    "reduced to %d from %d.  nrxq=%u, nofldrxq=%u, "
3148			    "nrxq_vi=%u nofldrxq_vi=%u, nnmrxq_vi=%u.  "
3149			    "itype %d, navail %u, nirq %d.\n",
3150			    iaq->num_vis, t4_num_vis, iaq->nrxq, iaq->nofldrxq,
3151			    iaq->nrxq_vi, iaq->nofldrxq_vi, iaq->nnmrxq_vi,
3152			    itype, navail, iaq->nirq);
3153			goto done;
3154		}
3155	}
3156
3157	/*
3158	 * Extra VIs will not be created.  Log a message if they were requested.
3159	 */
3160	MPASS(iaq->num_vis == 1);
3161	iaq->ntxq_vi = iaq->nrxq_vi = 0;
3162	iaq->nofldtxq_vi = iaq->nofldrxq_vi = 0;
3163	iaq->nnmtxq_vi = iaq->nnmrxq_vi = 0;
3164	if (iaq->num_vis != t4_num_vis) {
3165		device_printf(sc->dev, "extra virtual interfaces disabled.  "
3166		    "nrxq=%u, nofldrxq=%u, nrxq_vi=%u nofldrxq_vi=%u, "
3167		    "nnmrxq_vi=%u.  itype %d, navail %u, nirq %d.\n",
3168		    iaq->nrxq, iaq->nofldrxq, iaq->nrxq_vi, iaq->nofldrxq_vi,
3169		    iaq->nnmrxq_vi, itype, navail, iaq->nirq);
3170	}
3171
3172	/*
3173	 * Keep reducing the number of NIC rx queues to the next lower power of
3174	 * 2 (for even RSS distribution) and halving the TOE rx queues and see
3175	 * if that works.
3176	 */
3177	do {
3178		if (iaq->nrxq > 1) {
3179			do {
3180				iaq->nrxq--;
3181			} while (!powerof2(iaq->nrxq));
3182		}
3183		if (iaq->nofldrxq > 1)
3184			iaq->nofldrxq >>= 1;
3185
3186		old_nirq = iaq->nirq;
3187		update_nirq(iaq, nports);
3188		if (iaq->nirq <= navail &&
3189		    (itype != INTR_MSI || powerof2(iaq->nirq))) {
3190			device_printf(sc->dev, "running with reduced number of "
3191			    "rx queues because of shortage of interrupts.  "
3192			    "nrxq=%u, nofldrxq=%u.  "
3193			    "itype %d, navail %u, nirq %d.\n", iaq->nrxq,
3194			    iaq->nofldrxq, itype, navail, iaq->nirq);
3195			goto done;
3196		}
3197	} while (old_nirq != iaq->nirq);
3198
3199	/* One interrupt for everything.  Ugh. */
3200	device_printf(sc->dev, "running with minimal number of queues.  "
3201	    "itype %d, navail %u.\n", itype, navail);
3202	iaq->nirq = 1;
3203	MPASS(iaq->nrxq == 1);
3204	iaq->ntxq = 1;
3205	if (iaq->nofldrxq > 1)
3206		iaq->nofldtxq = 1;
3207done:
3208	MPASS(iaq->num_vis > 0);
3209	if (iaq->num_vis > 1) {
3210		MPASS(iaq->nrxq_vi > 0);
3211		MPASS(iaq->ntxq_vi > 0);
3212	}
3213	MPASS(iaq->nirq > 0);
3214	MPASS(iaq->nrxq > 0);
3215	MPASS(iaq->ntxq > 0);
3216	if (itype == INTR_MSI) {
3217		MPASS(powerof2(iaq->nirq));
3218	}
3219}
3220
3221static int
3222cfg_itype_and_nqueues(struct adapter *sc, struct intrs_and_queues *iaq)
3223{
3224	int rc, itype, navail, nalloc;
3225
3226	for (itype = INTR_MSIX; itype; itype >>= 1) {
3227
3228		if ((itype & t4_intr_types) == 0)
3229			continue;	/* not allowed */
3230
3231		if (itype == INTR_MSIX)
3232			navail = pci_msix_count(sc->dev);
3233		else if (itype == INTR_MSI)
3234			navail = pci_msi_count(sc->dev);
3235		else
3236			navail = 1;
3237restart:
3238		if (navail == 0)
3239			continue;
3240
3241		calculate_iaq(sc, iaq, itype, navail);
3242		nalloc = iaq->nirq;
3243		rc = 0;
3244		if (itype == INTR_MSIX)
3245			rc = pci_alloc_msix(sc->dev, &nalloc);
3246		else if (itype == INTR_MSI)
3247			rc = pci_alloc_msi(sc->dev, &nalloc);
3248
3249		if (rc == 0 && nalloc > 0) {
3250			if (nalloc == iaq->nirq)
3251				return (0);
3252
3253			/*
3254			 * Didn't get the number requested.  Use whatever number
3255			 * the kernel is willing to allocate.
3256			 */
3257			device_printf(sc->dev, "fewer vectors than requested, "
3258			    "type=%d, req=%d, rcvd=%d; will downshift req.\n",
3259			    itype, iaq->nirq, nalloc);
3260			pci_release_msi(sc->dev);
3261			navail = nalloc;
3262			goto restart;
3263		}
3264
3265		device_printf(sc->dev,
3266		    "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n",
3267		    itype, rc, iaq->nirq, nalloc);
3268	}
3269
3270	device_printf(sc->dev,
3271	    "failed to find a usable interrupt type.  "
3272	    "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types,
3273	    pci_msix_count(sc->dev), pci_msi_count(sc->dev));
3274
3275	return (ENXIO);
3276}
3277
3278#define FW_VERSION(chip) ( \
3279    V_FW_HDR_FW_VER_MAJOR(chip##FW_VERSION_MAJOR) | \
3280    V_FW_HDR_FW_VER_MINOR(chip##FW_VERSION_MINOR) | \
3281    V_FW_HDR_FW_VER_MICRO(chip##FW_VERSION_MICRO) | \
3282    V_FW_HDR_FW_VER_BUILD(chip##FW_VERSION_BUILD))
3283#define FW_INTFVER(chip, intf) (chip##FW_HDR_INTFVER_##intf)
3284
3285/* Just enough of fw_hdr to cover all version info. */
3286struct fw_h {
3287	__u8	ver;
3288	__u8	chip;
3289	__be16	len512;
3290	__be32	fw_ver;
3291	__be32	tp_microcode_ver;
3292	__u8	intfver_nic;
3293	__u8	intfver_vnic;
3294	__u8	intfver_ofld;
3295	__u8	intfver_ri;
3296	__u8	intfver_iscsipdu;
3297	__u8	intfver_iscsi;
3298	__u8	intfver_fcoepdu;
3299	__u8	intfver_fcoe;
3300};
3301/* Spot check a couple of fields. */
3302CTASSERT(offsetof(struct fw_h, fw_ver) == offsetof(struct fw_hdr, fw_ver));
3303CTASSERT(offsetof(struct fw_h, intfver_nic) == offsetof(struct fw_hdr, intfver_nic));
3304CTASSERT(offsetof(struct fw_h, intfver_fcoe) == offsetof(struct fw_hdr, intfver_fcoe));
3305
3306struct fw_info {
3307	uint8_t chip;
3308	char *kld_name;
3309	char *fw_mod_name;
3310	struct fw_h fw_h;
3311} fw_info[] = {
3312	{
3313		.chip = CHELSIO_T4,
3314		.kld_name = "t4fw_cfg",
3315		.fw_mod_name = "t4fw",
3316		.fw_h = {
3317			.chip = FW_HDR_CHIP_T4,
3318			.fw_ver = htobe32(FW_VERSION(T4)),
3319			.intfver_nic = FW_INTFVER(T4, NIC),
3320			.intfver_vnic = FW_INTFVER(T4, VNIC),
3321			.intfver_ofld = FW_INTFVER(T4, OFLD),
3322			.intfver_ri = FW_INTFVER(T4, RI),
3323			.intfver_iscsipdu = FW_INTFVER(T4, ISCSIPDU),
3324			.intfver_iscsi = FW_INTFVER(T4, ISCSI),
3325			.intfver_fcoepdu = FW_INTFVER(T4, FCOEPDU),
3326			.intfver_fcoe = FW_INTFVER(T4, FCOE),
3327		},
3328	}, {
3329		.chip = CHELSIO_T5,
3330		.kld_name = "t5fw_cfg",
3331		.fw_mod_name = "t5fw",
3332		.fw_h = {
3333			.chip = FW_HDR_CHIP_T5,
3334			.fw_ver = htobe32(FW_VERSION(T5)),
3335			.intfver_nic = FW_INTFVER(T5, NIC),
3336			.intfver_vnic = FW_INTFVER(T5, VNIC),
3337			.intfver_ofld = FW_INTFVER(T5, OFLD),
3338			.intfver_ri = FW_INTFVER(T5, RI),
3339			.intfver_iscsipdu = FW_INTFVER(T5, ISCSIPDU),
3340			.intfver_iscsi = FW_INTFVER(T5, ISCSI),
3341			.intfver_fcoepdu = FW_INTFVER(T5, FCOEPDU),
3342			.intfver_fcoe = FW_INTFVER(T5, FCOE),
3343		},
3344	}, {
3345		.chip = CHELSIO_T6,
3346		.kld_name = "t6fw_cfg",
3347		.fw_mod_name = "t6fw",
3348		.fw_h = {
3349			.chip = FW_HDR_CHIP_T6,
3350			.fw_ver = htobe32(FW_VERSION(T6)),
3351			.intfver_nic = FW_INTFVER(T6, NIC),
3352			.intfver_vnic = FW_INTFVER(T6, VNIC),
3353			.intfver_ofld = FW_INTFVER(T6, OFLD),
3354			.intfver_ri = FW_INTFVER(T6, RI),
3355			.intfver_iscsipdu = FW_INTFVER(T6, ISCSIPDU),
3356			.intfver_iscsi = FW_INTFVER(T6, ISCSI),
3357			.intfver_fcoepdu = FW_INTFVER(T6, FCOEPDU),
3358			.intfver_fcoe = FW_INTFVER(T6, FCOE),
3359		},
3360	}
3361};
3362
3363static struct fw_info *
3364find_fw_info(int chip)
3365{
3366	int i;
3367
3368	for (i = 0; i < nitems(fw_info); i++) {
3369		if (fw_info[i].chip == chip)
3370			return (&fw_info[i]);
3371	}
3372	return (NULL);
3373}
3374
3375/*
3376 * Is the given firmware API compatible with the one the driver was compiled
3377 * with?
3378 */
3379static int
3380fw_compatible(const struct fw_h *hdr1, const struct fw_h *hdr2)
3381{
3382
3383	/* short circuit if it's the exact same firmware version */
3384	if (hdr1->chip == hdr2->chip && hdr1->fw_ver == hdr2->fw_ver)
3385		return (1);
3386
3387	/*
3388	 * XXX: Is this too conservative?  Perhaps I should limit this to the
3389	 * features that are supported in the driver.
3390	 */
3391#define SAME_INTF(x) (hdr1->intfver_##x == hdr2->intfver_##x)
3392	if (hdr1->chip == hdr2->chip && SAME_INTF(nic) && SAME_INTF(vnic) &&
3393	    SAME_INTF(ofld) && SAME_INTF(ri) && SAME_INTF(iscsipdu) &&
3394	    SAME_INTF(iscsi) && SAME_INTF(fcoepdu) && SAME_INTF(fcoe))
3395		return (1);
3396#undef SAME_INTF
3397
3398	return (0);
3399}
3400
3401static int
3402load_fw_module(struct adapter *sc, const struct firmware **dcfg,
3403    const struct firmware **fw)
3404{
3405	struct fw_info *fw_info;
3406
3407	*dcfg = NULL;
3408	if (fw != NULL)
3409		*fw = NULL;
3410
3411	fw_info = find_fw_info(chip_id(sc));
3412	if (fw_info == NULL) {
3413		device_printf(sc->dev,
3414		    "unable to look up firmware information for chip %d.\n",
3415		    chip_id(sc));
3416		return (EINVAL);
3417	}
3418
3419	*dcfg = firmware_get(fw_info->kld_name);
3420	if (*dcfg != NULL) {
3421		if (fw != NULL)
3422			*fw = firmware_get(fw_info->fw_mod_name);
3423		return (0);
3424	}
3425
3426	return (ENOENT);
3427}
3428
3429static void
3430unload_fw_module(struct adapter *sc, const struct firmware *dcfg,
3431    const struct firmware *fw)
3432{
3433
3434	if (fw != NULL)
3435		firmware_put(fw, FIRMWARE_UNLOAD);
3436	if (dcfg != NULL)
3437		firmware_put(dcfg, FIRMWARE_UNLOAD);
3438}
3439
3440/*
3441 * Return values:
3442 * 0 means no firmware install attempted.
3443 * ERESTART means a firmware install was attempted and was successful.
3444 * +ve errno means a firmware install was attempted but failed.
3445 */
3446static int
3447install_kld_firmware(struct adapter *sc, struct fw_h *card_fw,
3448    const struct fw_h *drv_fw, const char *reason, int *already)
3449{
3450	const struct firmware *cfg, *fw;
3451	const uint32_t c = be32toh(card_fw->fw_ver);
3452	uint32_t d, k;
3453	int rc, fw_install;
3454	struct fw_h bundled_fw;
3455	bool load_attempted;
3456
3457	cfg = fw = NULL;
3458	load_attempted = false;
3459	fw_install = t4_fw_install < 0 ? -t4_fw_install : t4_fw_install;
3460
3461	memcpy(&bundled_fw, drv_fw, sizeof(bundled_fw));
3462	if (t4_fw_install < 0) {
3463		rc = load_fw_module(sc, &cfg, &fw);
3464		if (rc != 0 || fw == NULL) {
3465			device_printf(sc->dev,
3466			    "failed to load firmware module: %d. cfg %p, fw %p;"
3467			    " will use compiled-in firmware version for"
3468			    "hw.cxgbe.fw_install checks.\n",
3469			    rc, cfg, fw);
3470		} else {
3471			memcpy(&bundled_fw, fw->data, sizeof(bundled_fw));
3472		}
3473		load_attempted = true;
3474	}
3475	d = be32toh(bundled_fw.fw_ver);
3476
3477	if (reason != NULL)
3478		goto install;
3479
3480	if ((sc->flags & FW_OK) == 0) {
3481
3482		if (c == 0xffffffff) {
3483			reason = "missing";
3484			goto install;
3485		}
3486
3487		rc = 0;
3488		goto done;
3489	}
3490
3491	if (!fw_compatible(card_fw, &bundled_fw)) {
3492		reason = "incompatible or unusable";
3493		goto install;
3494	}
3495
3496	if (d > c) {
3497		reason = "older than the version bundled with this driver";
3498		goto install;
3499	}
3500
3501	if (fw_install == 2 && d != c) {
3502		reason = "different than the version bundled with this driver";
3503		goto install;
3504	}
3505
3506	/* No reason to do anything to the firmware already on the card. */
3507	rc = 0;
3508	goto done;
3509
3510install:
3511	rc = 0;
3512	if ((*already)++)
3513		goto done;
3514
3515	if (fw_install == 0) {
3516		device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
3517		    "but the driver is prohibited from installing a firmware "
3518		    "on the card.\n",
3519		    G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
3520		    G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason);
3521
3522		goto done;
3523	}
3524
3525	/*
3526	 * We'll attempt to install a firmware.  Load the module first (if it
3527	 * hasn't been loaded already).
3528	 */
3529	if (!load_attempted) {
3530		rc = load_fw_module(sc, &cfg, &fw);
3531		if (rc != 0 || fw == NULL) {
3532			device_printf(sc->dev,
3533			    "failed to load firmware module: %d. cfg %p, fw %p\n",
3534			    rc, cfg, fw);
3535			/* carry on */
3536		}
3537	}
3538	if (fw == NULL) {
3539		device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
3540		    "but the driver cannot take corrective action because it "
3541		    "is unable to load the firmware module.\n",
3542		    G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
3543		    G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason);
3544		rc = sc->flags & FW_OK ? 0 : ENOENT;
3545		goto done;
3546	}
3547	k = be32toh(((const struct fw_hdr *)fw->data)->fw_ver);
3548	if (k != d) {
3549		MPASS(t4_fw_install > 0);
3550		device_printf(sc->dev,
3551		    "firmware in KLD (%u.%u.%u.%u) is not what the driver was "
3552		    "expecting (%u.%u.%u.%u) and will not be used.\n",
3553		    G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k),
3554		    G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k),
3555		    G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d),
3556		    G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d));
3557		rc = sc->flags & FW_OK ? 0 : EINVAL;
3558		goto done;
3559	}
3560
3561	device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
3562	    "installing firmware %u.%u.%u.%u on card.\n",
3563	    G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
3564	    G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason,
3565	    G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d),
3566	    G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d));
3567
3568	rc = -t4_fw_upgrade(sc, sc->mbox, fw->data, fw->datasize, 0);
3569	if (rc != 0) {
3570		device_printf(sc->dev, "failed to install firmware: %d\n", rc);
3571	} else {
3572		/* Installed successfully, update the cached header too. */
3573		rc = ERESTART;
3574		memcpy(card_fw, fw->data, sizeof(*card_fw));
3575	}
3576done:
3577	unload_fw_module(sc, cfg, fw);
3578
3579	return (rc);
3580}
3581
3582/*
3583 * Establish contact with the firmware and attempt to become the master driver.
3584 *
3585 * A firmware will be installed to the card if needed (if the driver is allowed
3586 * to do so).
3587 */
3588static int
3589contact_firmware(struct adapter *sc)
3590{
3591	int rc, already = 0;
3592	enum dev_state state;
3593	struct fw_info *fw_info;
3594	struct fw_hdr *card_fw;		/* fw on the card */
3595	const struct fw_h *drv_fw;
3596
3597	fw_info = find_fw_info(chip_id(sc));
3598	if (fw_info == NULL) {
3599		device_printf(sc->dev,
3600		    "unable to look up firmware information for chip %d.\n",
3601		    chip_id(sc));
3602		return (EINVAL);
3603	}
3604	drv_fw = &fw_info->fw_h;
3605
3606	/* Read the header of the firmware on the card */
3607	card_fw = malloc(sizeof(*card_fw), M_CXGBE, M_ZERO | M_WAITOK);
3608restart:
3609	rc = -t4_get_fw_hdr(sc, card_fw);
3610	if (rc != 0) {
3611		device_printf(sc->dev,
3612		    "unable to read firmware header from card's flash: %d\n",
3613		    rc);
3614		goto done;
3615	}
3616
3617	rc = install_kld_firmware(sc, (struct fw_h *)card_fw, drv_fw, NULL,
3618	    &already);
3619	if (rc == ERESTART)
3620		goto restart;
3621	if (rc != 0)
3622		goto done;
3623
3624	rc = t4_fw_hello(sc, sc->mbox, sc->mbox, MASTER_MAY, &state);
3625	if (rc < 0 || state == DEV_STATE_ERR) {
3626		rc = -rc;
3627		device_printf(sc->dev,
3628		    "failed to connect to the firmware: %d, %d.  "
3629		    "PCIE_FW 0x%08x\n", rc, state, t4_read_reg(sc, A_PCIE_FW));
3630#if 0
3631		if (install_kld_firmware(sc, (struct fw_h *)card_fw, drv_fw,
3632		    "not responding properly to HELLO", &already) == ERESTART)
3633			goto restart;
3634#endif
3635		goto done;
3636	}
3637	MPASS(be32toh(card_fw->flags) & FW_HDR_FLAGS_RESET_HALT);
3638	sc->flags |= FW_OK;	/* The firmware responded to the FW_HELLO. */
3639
3640	if (rc == sc->pf) {
3641		sc->flags |= MASTER_PF;
3642		rc = install_kld_firmware(sc, (struct fw_h *)card_fw, drv_fw,
3643		    NULL, &already);
3644		if (rc == ERESTART)
3645			rc = 0;
3646		else if (rc != 0)
3647			goto done;
3648	} else if (state == DEV_STATE_UNINIT) {
3649		/*
3650		 * We didn't get to be the master so we definitely won't be
3651		 * configuring the chip.  It's a bug if someone else hasn't
3652		 * configured it already.
3653		 */
3654		device_printf(sc->dev, "couldn't be master(%d), "
3655		    "device not already initialized either(%d).  "
3656		    "PCIE_FW 0x%08x\n", rc, state, t4_read_reg(sc, A_PCIE_FW));
3657		rc = EPROTO;
3658		goto done;
3659	} else {
3660		/*
3661		 * Some other PF is the master and has configured the chip.
3662		 * This is allowed but untested.
3663		 */
3664		device_printf(sc->dev, "PF%d is master, device state %d.  "
3665		    "PCIE_FW 0x%08x\n", rc, state, t4_read_reg(sc, A_PCIE_FW));
3666		snprintf(sc->cfg_file, sizeof(sc->cfg_file), "pf%d", rc);
3667		sc->cfcsum = 0;
3668		rc = 0;
3669	}
3670done:
3671	if (rc != 0 && sc->flags & FW_OK) {
3672		t4_fw_bye(sc, sc->mbox);
3673		sc->flags &= ~FW_OK;
3674	}
3675	free(card_fw, M_CXGBE);
3676	return (rc);
3677}
3678
3679static int
3680copy_cfg_file_to_card(struct adapter *sc, char *cfg_file,
3681    uint32_t mtype, uint32_t moff)
3682{
3683	struct fw_info *fw_info;
3684	const struct firmware *dcfg, *rcfg = NULL;
3685	const uint32_t *cfdata;
3686	uint32_t cflen, addr;
3687	int rc;
3688
3689	load_fw_module(sc, &dcfg, NULL);
3690
3691	/* Card specific interpretation of "default". */
3692	if (strncmp(cfg_file, DEFAULT_CF, sizeof(t4_cfg_file)) == 0) {
3693		if (pci_get_device(sc->dev) == 0x440a)
3694			snprintf(cfg_file, sizeof(t4_cfg_file), UWIRE_CF);
3695		if (is_fpga(sc))
3696			snprintf(cfg_file, sizeof(t4_cfg_file), FPGA_CF);
3697	}
3698
3699	if (strncmp(cfg_file, DEFAULT_CF, sizeof(t4_cfg_file)) == 0) {
3700		if (dcfg == NULL) {
3701			device_printf(sc->dev,
3702			    "KLD with default config is not available.\n");
3703			rc = ENOENT;
3704			goto done;
3705		}
3706		cfdata = dcfg->data;
3707		cflen = dcfg->datasize & ~3;
3708	} else {
3709		char s[32];
3710
3711		fw_info = find_fw_info(chip_id(sc));
3712		if (fw_info == NULL) {
3713			device_printf(sc->dev,
3714			    "unable to look up firmware information for chip %d.\n",
3715			    chip_id(sc));
3716			rc = EINVAL;
3717			goto done;
3718		}
3719		snprintf(s, sizeof(s), "%s_%s", fw_info->kld_name, cfg_file);
3720
3721		rcfg = firmware_get(s);
3722		if (rcfg == NULL) {
3723			device_printf(sc->dev,
3724			    "unable to load module \"%s\" for configuration "
3725			    "profile \"%s\".\n", s, cfg_file);
3726			rc = ENOENT;
3727			goto done;
3728		}
3729		cfdata = rcfg->data;
3730		cflen = rcfg->datasize & ~3;
3731	}
3732
3733	if (cflen > FLASH_CFG_MAX_SIZE) {
3734		device_printf(sc->dev,
3735		    "config file too long (%d, max allowed is %d).\n",
3736		    cflen, FLASH_CFG_MAX_SIZE);
3737		rc = EINVAL;
3738		goto done;
3739	}
3740
3741	rc = validate_mt_off_len(sc, mtype, moff, cflen, &addr);
3742	if (rc != 0) {
3743		device_printf(sc->dev,
3744		    "%s: addr (%d/0x%x) or len %d is not valid: %d.\n",
3745		    __func__, mtype, moff, cflen, rc);
3746		rc = EINVAL;
3747		goto done;
3748	}
3749	write_via_memwin(sc, 2, addr, cfdata, cflen);
3750done:
3751	if (rcfg != NULL)
3752		firmware_put(rcfg, FIRMWARE_UNLOAD);
3753	unload_fw_module(sc, dcfg, NULL);
3754	return (rc);
3755}
3756
3757struct caps_allowed {
3758	uint16_t nbmcaps;
3759	uint16_t linkcaps;
3760	uint16_t switchcaps;
3761	uint16_t niccaps;
3762	uint16_t toecaps;
3763	uint16_t rdmacaps;
3764	uint16_t cryptocaps;
3765	uint16_t iscsicaps;
3766	uint16_t fcoecaps;
3767};
3768
3769#define FW_PARAM_DEV(param) \
3770	(V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
3771	 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
3772#define FW_PARAM_PFVF(param) \
3773	(V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
3774	 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param))
3775
3776/*
3777 * Provide a configuration profile to the firmware and have it initialize the
3778 * chip accordingly.  This may involve uploading a configuration file to the
3779 * card.
3780 */
3781static int
3782apply_cfg_and_initialize(struct adapter *sc, char *cfg_file,
3783    const struct caps_allowed *caps_allowed)
3784{
3785	int rc;
3786	struct fw_caps_config_cmd caps;
3787	uint32_t mtype, moff, finicsum, cfcsum, param, val;
3788
3789	rc = -t4_fw_reset(sc, sc->mbox, F_PIORSTMODE | F_PIORST);
3790	if (rc != 0) {
3791		device_printf(sc->dev, "firmware reset failed: %d.\n", rc);
3792		return (rc);
3793	}
3794
3795	bzero(&caps, sizeof(caps));
3796	caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
3797	    F_FW_CMD_REQUEST | F_FW_CMD_READ);
3798	if (strncmp(cfg_file, BUILTIN_CF, sizeof(t4_cfg_file)) == 0) {
3799		mtype = 0;
3800		moff = 0;
3801		caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
3802	} else if (strncmp(cfg_file, FLASH_CF, sizeof(t4_cfg_file)) == 0) {
3803		mtype = FW_MEMTYPE_FLASH;
3804		moff = t4_flash_cfg_addr(sc);
3805		caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID |
3806		    V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
3807		    V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(moff >> 16) |
3808		    FW_LEN16(caps));
3809	} else {
3810		/*
3811		 * Ask the firmware where it wants us to upload the config file.
3812		 */
3813		param = FW_PARAM_DEV(CF);
3814		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
3815		if (rc != 0) {
3816			/* No support for config file?  Shouldn't happen. */
3817			device_printf(sc->dev,
3818			    "failed to query config file location: %d.\n", rc);
3819			goto done;
3820		}
3821		mtype = G_FW_PARAMS_PARAM_Y(val);
3822		moff = G_FW_PARAMS_PARAM_Z(val) << 16;
3823		caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID |
3824		    V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
3825		    V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(moff >> 16) |
3826		    FW_LEN16(caps));
3827
3828		rc = copy_cfg_file_to_card(sc, cfg_file, mtype, moff);
3829		if (rc != 0) {
3830			device_printf(sc->dev,
3831			    "failed to upload config file to card: %d.\n", rc);
3832			goto done;
3833		}
3834	}
3835	rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
3836	if (rc != 0) {
3837		device_printf(sc->dev, "failed to pre-process config file: %d "
3838		    "(mtype %d, moff 0x%x).\n", rc, mtype, moff);
3839		goto done;
3840	}
3841
3842	finicsum = be32toh(caps.finicsum);
3843	cfcsum = be32toh(caps.cfcsum);	/* actual */
3844	if (finicsum != cfcsum) {
3845		device_printf(sc->dev,
3846		    "WARNING: config file checksum mismatch: %08x %08x\n",
3847		    finicsum, cfcsum);
3848	}
3849	sc->cfcsum = cfcsum;
3850	snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", cfg_file);
3851
3852	/*
3853	 * Let the firmware know what features will (not) be used so it can tune
3854	 * things accordingly.
3855	 */
3856#define LIMIT_CAPS(x) do { \
3857	caps.x##caps &= htobe16(caps_allowed->x##caps); \
3858} while (0)
3859	LIMIT_CAPS(nbm);
3860	LIMIT_CAPS(link);
3861	LIMIT_CAPS(switch);
3862	LIMIT_CAPS(nic);
3863	LIMIT_CAPS(toe);
3864	LIMIT_CAPS(rdma);
3865	LIMIT_CAPS(crypto);
3866	LIMIT_CAPS(iscsi);
3867	LIMIT_CAPS(fcoe);
3868#undef LIMIT_CAPS
3869	if (caps.niccaps & htobe16(FW_CAPS_CONFIG_NIC_HASHFILTER)) {
3870		/*
3871		 * TOE and hashfilters are mutually exclusive.  It is a config
3872		 * file or firmware bug if both are reported as available.  Try
3873		 * to cope with the situation in non-debug builds by disabling
3874		 * TOE.
3875		 */
3876		MPASS(caps.toecaps == 0);
3877
3878		caps.toecaps = 0;
3879		caps.rdmacaps = 0;
3880		caps.iscsicaps = 0;
3881	}
3882
3883	caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
3884	    F_FW_CMD_REQUEST | F_FW_CMD_WRITE);
3885	caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
3886	rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), NULL);
3887	if (rc != 0) {
3888		device_printf(sc->dev,
3889		    "failed to process config file: %d.\n", rc);
3890		goto done;
3891	}
3892
3893	t4_tweak_chip_settings(sc);
3894
3895	/* get basic stuff going */
3896	rc = -t4_fw_initialize(sc, sc->mbox);
3897	if (rc != 0) {
3898		device_printf(sc->dev, "fw_initialize failed: %d.\n", rc);
3899		goto done;
3900	}
3901done:
3902	return (rc);
3903}
3904
3905/*
3906 * Partition chip resources for use between various PFs, VFs, etc.
3907 */
3908static int
3909partition_resources(struct adapter *sc)
3910{
3911	char cfg_file[sizeof(t4_cfg_file)];
3912	struct caps_allowed caps_allowed;
3913	int rc;
3914	bool fallback;
3915
3916	/* Only the master driver gets to configure the chip resources. */
3917	MPASS(sc->flags & MASTER_PF);
3918
3919#define COPY_CAPS(x) do { \
3920	caps_allowed.x##caps = t4_##x##caps_allowed; \
3921} while (0)
3922	bzero(&caps_allowed, sizeof(caps_allowed));
3923	COPY_CAPS(nbm);
3924	COPY_CAPS(link);
3925	COPY_CAPS(switch);
3926	COPY_CAPS(nic);
3927	COPY_CAPS(toe);
3928	COPY_CAPS(rdma);
3929	COPY_CAPS(crypto);
3930	COPY_CAPS(iscsi);
3931	COPY_CAPS(fcoe);
3932	fallback = sc->debug_flags & DF_DISABLE_CFG_RETRY ? false : true;
3933	snprintf(cfg_file, sizeof(cfg_file), "%s", t4_cfg_file);
3934retry:
3935	rc = apply_cfg_and_initialize(sc, cfg_file, &caps_allowed);
3936	if (rc != 0 && fallback) {
3937		device_printf(sc->dev,
3938		    "failed (%d) to configure card with \"%s\" profile, "
3939		    "will fall back to a basic configuration and retry.\n",
3940		    rc, cfg_file);
3941		snprintf(cfg_file, sizeof(cfg_file), "%s", BUILTIN_CF);
3942		bzero(&caps_allowed, sizeof(caps_allowed));
3943		COPY_CAPS(nbm);
3944		COPY_CAPS(link);
3945		COPY_CAPS(switch);
3946		COPY_CAPS(nic);
3947		fallback = false;
3948		goto retry;
3949	}
3950#undef COPY_CAPS
3951	return (rc);
3952}
3953
3954/*
3955 * Retrieve parameters that are needed (or nice to have) very early.
3956 */
3957static int
3958get_params__pre_init(struct adapter *sc)
3959{
3960	int rc;
3961	uint32_t param[2], val[2];
3962
3963	t4_get_version_info(sc);
3964
3965	snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u",
3966	    G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers),
3967	    G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers),
3968	    G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers),
3969	    G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers));
3970
3971	snprintf(sc->bs_version, sizeof(sc->bs_version), "%u.%u.%u.%u",
3972	    G_FW_HDR_FW_VER_MAJOR(sc->params.bs_vers),
3973	    G_FW_HDR_FW_VER_MINOR(sc->params.bs_vers),
3974	    G_FW_HDR_FW_VER_MICRO(sc->params.bs_vers),
3975	    G_FW_HDR_FW_VER_BUILD(sc->params.bs_vers));
3976
3977	snprintf(sc->tp_version, sizeof(sc->tp_version), "%u.%u.%u.%u",
3978	    G_FW_HDR_FW_VER_MAJOR(sc->params.tp_vers),
3979	    G_FW_HDR_FW_VER_MINOR(sc->params.tp_vers),
3980	    G_FW_HDR_FW_VER_MICRO(sc->params.tp_vers),
3981	    G_FW_HDR_FW_VER_BUILD(sc->params.tp_vers));
3982
3983	snprintf(sc->er_version, sizeof(sc->er_version), "%u.%u.%u.%u",
3984	    G_FW_HDR_FW_VER_MAJOR(sc->params.er_vers),
3985	    G_FW_HDR_FW_VER_MINOR(sc->params.er_vers),
3986	    G_FW_HDR_FW_VER_MICRO(sc->params.er_vers),
3987	    G_FW_HDR_FW_VER_BUILD(sc->params.er_vers));
3988
3989	param[0] = FW_PARAM_DEV(PORTVEC);
3990	param[1] = FW_PARAM_DEV(CCLK);
3991	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
3992	if (rc != 0) {
3993		device_printf(sc->dev,
3994		    "failed to query parameters (pre_init): %d.\n", rc);
3995		return (rc);
3996	}
3997
3998	sc->params.portvec = val[0];
3999	sc->params.nports = bitcount32(val[0]);
4000	sc->params.vpd.cclk = val[1];
4001
4002	/* Read device log parameters. */
4003	rc = -t4_init_devlog_params(sc, 1);
4004	if (rc == 0)
4005		fixup_devlog_params(sc);
4006	else {
4007		device_printf(sc->dev,
4008		    "failed to get devlog parameters: %d.\n", rc);
4009		rc = 0;	/* devlog isn't critical for device operation */
4010	}
4011
4012	return (rc);
4013}
4014
4015/*
4016 * Retrieve various parameters that are of interest to the driver.  The device
4017 * has been initialized by the firmware at this point.
4018 */
4019static int
4020get_params__post_init(struct adapter *sc)
4021{
4022	int rc;
4023	uint32_t param[7], val[7];
4024	struct fw_caps_config_cmd caps;
4025
4026	param[0] = FW_PARAM_PFVF(IQFLINT_START);
4027	param[1] = FW_PARAM_PFVF(EQ_START);
4028	param[2] = FW_PARAM_PFVF(FILTER_START);
4029	param[3] = FW_PARAM_PFVF(FILTER_END);
4030	param[4] = FW_PARAM_PFVF(L2T_START);
4031	param[5] = FW_PARAM_PFVF(L2T_END);
4032	param[6] = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
4033	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
4034	    V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_VDD);
4035	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 7, param, val);
4036	if (rc != 0) {
4037		device_printf(sc->dev,
4038		    "failed to query parameters (post_init): %d.\n", rc);
4039		return (rc);
4040	}
4041
4042	sc->sge.iq_start = val[0];
4043	sc->sge.eq_start = val[1];
4044	if ((int)val[3] > (int)val[2]) {
4045		sc->tids.ftid_base = val[2];
4046		sc->tids.ftid_end = val[3];
4047		sc->tids.nftids = val[3] - val[2] + 1;
4048	}
4049	sc->vres.l2t.start = val[4];
4050	sc->vres.l2t.size = val[5] - val[4] + 1;
4051	KASSERT(sc->vres.l2t.size <= L2T_SIZE,
4052	    ("%s: L2 table size (%u) larger than expected (%u)",
4053	    __func__, sc->vres.l2t.size, L2T_SIZE));
4054	sc->params.core_vdd = val[6];
4055
4056	if (chip_id(sc) >= CHELSIO_T6) {
4057
4058#ifdef INVARIANTS
4059		if (sc->params.fw_vers >=
4060		    (V_FW_HDR_FW_VER_MAJOR(1) | V_FW_HDR_FW_VER_MINOR(20) |
4061		    V_FW_HDR_FW_VER_MICRO(1) | V_FW_HDR_FW_VER_BUILD(0))) {
4062			/*
4063			 * Note that the code to enable the region should run
4064			 * before t4_fw_initialize and not here.  This is just a
4065			 * reminder to add said code.
4066			 */
4067			device_printf(sc->dev,
4068			    "hpfilter region not enabled.\n");
4069		}
4070#endif
4071
4072		sc->tids.tid_base = t4_read_reg(sc,
4073		    A_LE_DB_ACTIVE_TABLE_START_INDEX);
4074
4075		param[0] = FW_PARAM_PFVF(HPFILTER_START);
4076		param[1] = FW_PARAM_PFVF(HPFILTER_END);
4077		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4078		if (rc != 0) {
4079			device_printf(sc->dev,
4080			   "failed to query hpfilter parameters: %d.\n", rc);
4081			return (rc);
4082		}
4083		if ((int)val[1] > (int)val[0]) {
4084			sc->tids.hpftid_base = val[0];
4085			sc->tids.hpftid_end = val[1];
4086			sc->tids.nhpftids = val[1] - val[0] + 1;
4087
4088			/*
4089			 * These should go off if the layout changes and the
4090			 * driver needs to catch up.
4091			 */
4092			MPASS(sc->tids.hpftid_base == 0);
4093			MPASS(sc->tids.tid_base == sc->tids.nhpftids);
4094		}
4095	}
4096
4097	/*
4098	 * MPSBGMAP is queried separately because only recent firmwares support
4099	 * it as a parameter and we don't want the compound query above to fail
4100	 * on older firmwares.
4101	 */
4102	param[0] = FW_PARAM_DEV(MPSBGMAP);
4103	val[0] = 0;
4104	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4105	if (rc == 0)
4106		sc->params.mps_bg_map = val[0];
4107	else
4108		sc->params.mps_bg_map = 0;
4109
4110	/*
4111	 * Determine whether the firmware supports the filter2 work request.
4112	 * This is queried separately for the same reason as MPSBGMAP above.
4113	 */
4114	param[0] = FW_PARAM_DEV(FILTER2_WR);
4115	val[0] = 0;
4116	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4117	if (rc == 0)
4118		sc->params.filter2_wr_support = val[0] != 0;
4119	else
4120		sc->params.filter2_wr_support = 0;
4121
4122	/*
4123	 * Find out whether we're allowed to use the ULPTX MEMWRITE DSGL.
4124	 * This is queried separately for the same reason as other params above.
4125	 */
4126	param[0] = FW_PARAM_DEV(ULPTX_MEMWRITE_DSGL);
4127	val[0] = 0;
4128	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4129	if (rc == 0)
4130		sc->params.ulptx_memwrite_dsgl = val[0] != 0;
4131	else
4132		sc->params.ulptx_memwrite_dsgl = false;
4133
4134	/* get capabilites */
4135	bzero(&caps, sizeof(caps));
4136	caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
4137	    F_FW_CMD_REQUEST | F_FW_CMD_READ);
4138	caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
4139	rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
4140	if (rc != 0) {
4141		device_printf(sc->dev,
4142		    "failed to get card capabilities: %d.\n", rc);
4143		return (rc);
4144	}
4145
4146#define READ_CAPS(x) do { \
4147	sc->x = htobe16(caps.x); \
4148} while (0)
4149	READ_CAPS(nbmcaps);
4150	READ_CAPS(linkcaps);
4151	READ_CAPS(switchcaps);
4152	READ_CAPS(niccaps);
4153	READ_CAPS(toecaps);
4154	READ_CAPS(rdmacaps);
4155	READ_CAPS(cryptocaps);
4156	READ_CAPS(iscsicaps);
4157	READ_CAPS(fcoecaps);
4158
4159	if (sc->niccaps & FW_CAPS_CONFIG_NIC_HASHFILTER) {
4160		MPASS(chip_id(sc) > CHELSIO_T4);
4161		MPASS(sc->toecaps == 0);
4162		sc->toecaps = 0;
4163
4164		param[0] = FW_PARAM_DEV(NTID);
4165		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4166		if (rc != 0) {
4167			device_printf(sc->dev,
4168			    "failed to query HASHFILTER parameters: %d.\n", rc);
4169			return (rc);
4170		}
4171		sc->tids.ntids = val[0];
4172		if (sc->params.fw_vers <
4173		    (V_FW_HDR_FW_VER_MAJOR(1) | V_FW_HDR_FW_VER_MINOR(20) |
4174		    V_FW_HDR_FW_VER_MICRO(5) | V_FW_HDR_FW_VER_BUILD(0))) {
4175			MPASS(sc->tids.ntids >= sc->tids.nhpftids);
4176			sc->tids.ntids -= sc->tids.nhpftids;
4177		}
4178		sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS);
4179		sc->params.hash_filter = 1;
4180	}
4181	if (sc->niccaps & FW_CAPS_CONFIG_NIC_ETHOFLD) {
4182		param[0] = FW_PARAM_PFVF(ETHOFLD_START);
4183		param[1] = FW_PARAM_PFVF(ETHOFLD_END);
4184		param[2] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
4185		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 3, param, val);
4186		if (rc != 0) {
4187			device_printf(sc->dev,
4188			    "failed to query NIC parameters: %d.\n", rc);
4189			return (rc);
4190		}
4191		if ((int)val[1] > (int)val[0]) {
4192			sc->tids.etid_base = val[0];
4193			sc->tids.etid_end = val[1];
4194			sc->tids.netids = val[1] - val[0] + 1;
4195			sc->params.eo_wr_cred = val[2];
4196			sc->params.ethoffload = 1;
4197		}
4198	}
4199	if (sc->toecaps) {
4200		/* query offload-related parameters */
4201		param[0] = FW_PARAM_DEV(NTID);
4202		param[1] = FW_PARAM_PFVF(SERVER_START);
4203		param[2] = FW_PARAM_PFVF(SERVER_END);
4204		param[3] = FW_PARAM_PFVF(TDDP_START);
4205		param[4] = FW_PARAM_PFVF(TDDP_END);
4206		param[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
4207		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
4208		if (rc != 0) {
4209			device_printf(sc->dev,
4210			    "failed to query TOE parameters: %d.\n", rc);
4211			return (rc);
4212		}
4213		sc->tids.ntids = val[0];
4214		if (sc->params.fw_vers <
4215		    (V_FW_HDR_FW_VER_MAJOR(1) | V_FW_HDR_FW_VER_MINOR(20) |
4216		    V_FW_HDR_FW_VER_MICRO(5) | V_FW_HDR_FW_VER_BUILD(0))) {
4217			MPASS(sc->tids.ntids >= sc->tids.nhpftids);
4218			sc->tids.ntids -= sc->tids.nhpftids;
4219		}
4220		sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS);
4221		if ((int)val[2] > (int)val[1]) {
4222			sc->tids.stid_base = val[1];
4223			sc->tids.nstids = val[2] - val[1] + 1;
4224		}
4225		sc->vres.ddp.start = val[3];
4226		sc->vres.ddp.size = val[4] - val[3] + 1;
4227		sc->params.ofldq_wr_cred = val[5];
4228		sc->params.offload = 1;
4229	} else {
4230		/*
4231		 * The firmware attempts memfree TOE configuration for -SO cards
4232		 * and will report toecaps=0 if it runs out of resources (this
4233		 * depends on the config file).  It may not report 0 for other
4234		 * capabilities dependent on the TOE in this case.  Set them to
4235		 * 0 here so that the driver doesn't bother tracking resources
4236		 * that will never be used.
4237		 */
4238		sc->iscsicaps = 0;
4239		sc->rdmacaps = 0;
4240	}
4241	if (sc->rdmacaps) {
4242		param[0] = FW_PARAM_PFVF(STAG_START);
4243		param[1] = FW_PARAM_PFVF(STAG_END);
4244		param[2] = FW_PARAM_PFVF(RQ_START);
4245		param[3] = FW_PARAM_PFVF(RQ_END);
4246		param[4] = FW_PARAM_PFVF(PBL_START);
4247		param[5] = FW_PARAM_PFVF(PBL_END);
4248		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
4249		if (rc != 0) {
4250			device_printf(sc->dev,
4251			    "failed to query RDMA parameters(1): %d.\n", rc);
4252			return (rc);
4253		}
4254		sc->vres.stag.start = val[0];
4255		sc->vres.stag.size = val[1] - val[0] + 1;
4256		sc->vres.rq.start = val[2];
4257		sc->vres.rq.size = val[3] - val[2] + 1;
4258		sc->vres.pbl.start = val[4];
4259		sc->vres.pbl.size = val[5] - val[4] + 1;
4260
4261		param[0] = FW_PARAM_PFVF(SQRQ_START);
4262		param[1] = FW_PARAM_PFVF(SQRQ_END);
4263		param[2] = FW_PARAM_PFVF(CQ_START);
4264		param[3] = FW_PARAM_PFVF(CQ_END);
4265		param[4] = FW_PARAM_PFVF(OCQ_START);
4266		param[5] = FW_PARAM_PFVF(OCQ_END);
4267		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
4268		if (rc != 0) {
4269			device_printf(sc->dev,
4270			    "failed to query RDMA parameters(2): %d.\n", rc);
4271			return (rc);
4272		}
4273		sc->vres.qp.start = val[0];
4274		sc->vres.qp.size = val[1] - val[0] + 1;
4275		sc->vres.cq.start = val[2];
4276		sc->vres.cq.size = val[3] - val[2] + 1;
4277		sc->vres.ocq.start = val[4];
4278		sc->vres.ocq.size = val[5] - val[4] + 1;
4279
4280		param[0] = FW_PARAM_PFVF(SRQ_START);
4281		param[1] = FW_PARAM_PFVF(SRQ_END);
4282		param[2] = FW_PARAM_DEV(MAXORDIRD_QP);
4283		param[3] = FW_PARAM_DEV(MAXIRD_ADAPTER);
4284		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 4, param, val);
4285		if (rc != 0) {
4286			device_printf(sc->dev,
4287			    "failed to query RDMA parameters(3): %d.\n", rc);
4288			return (rc);
4289		}
4290		sc->vres.srq.start = val[0];
4291		sc->vres.srq.size = val[1] - val[0] + 1;
4292		sc->params.max_ordird_qp = val[2];
4293		sc->params.max_ird_adapter = val[3];
4294	}
4295	if (sc->iscsicaps) {
4296		param[0] = FW_PARAM_PFVF(ISCSI_START);
4297		param[1] = FW_PARAM_PFVF(ISCSI_END);
4298		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4299		if (rc != 0) {
4300			device_printf(sc->dev,
4301			    "failed to query iSCSI parameters: %d.\n", rc);
4302			return (rc);
4303		}
4304		sc->vres.iscsi.start = val[0];
4305		sc->vres.iscsi.size = val[1] - val[0] + 1;
4306	}
4307	if (sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS) {
4308		param[0] = FW_PARAM_PFVF(TLS_START);
4309		param[1] = FW_PARAM_PFVF(TLS_END);
4310		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4311		if (rc != 0) {
4312			device_printf(sc->dev,
4313			    "failed to query TLS parameters: %d.\n", rc);
4314			return (rc);
4315		}
4316		sc->vres.key.start = val[0];
4317		sc->vres.key.size = val[1] - val[0] + 1;
4318	}
4319
4320	t4_init_sge_params(sc);
4321
4322	/*
4323	 * We've got the params we wanted to query via the firmware.  Now grab
4324	 * some others directly from the chip.
4325	 */
4326	rc = t4_read_chip_settings(sc);
4327
4328	return (rc);
4329}
4330
4331static int
4332set_params__post_init(struct adapter *sc)
4333{
4334	uint32_t param, val;
4335#ifdef TCP_OFFLOAD
4336	int i, v, shift;
4337#endif
4338
4339	/* ask for encapsulated CPLs */
4340	param = FW_PARAM_PFVF(CPLFW4MSG_ENCAP);
4341	val = 1;
4342	(void)t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
4343
4344	/* Enable 32b port caps if the firmware supports it. */
4345	param = FW_PARAM_PFVF(PORT_CAPS32);
4346	val = 1;
4347	if (t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val) == 0)
4348		sc->params.port_caps32 = 1;
4349
4350#ifdef TCP_OFFLOAD
4351	/*
4352	 * Override the TOE timers with user provided tunables.  This is not the
4353	 * recommended way to change the timers (the firmware config file is) so
4354	 * these tunables are not documented.
4355	 *
4356	 * All the timer tunables are in microseconds.
4357	 */
4358	if (t4_toe_keepalive_idle != 0) {
4359		v = us_to_tcp_ticks(sc, t4_toe_keepalive_idle);
4360		v &= M_KEEPALIVEIDLE;
4361		t4_set_reg_field(sc, A_TP_KEEP_IDLE,
4362		    V_KEEPALIVEIDLE(M_KEEPALIVEIDLE), V_KEEPALIVEIDLE(v));
4363	}
4364	if (t4_toe_keepalive_interval != 0) {
4365		v = us_to_tcp_ticks(sc, t4_toe_keepalive_interval);
4366		v &= M_KEEPALIVEINTVL;
4367		t4_set_reg_field(sc, A_TP_KEEP_INTVL,
4368		    V_KEEPALIVEINTVL(M_KEEPALIVEINTVL), V_KEEPALIVEINTVL(v));
4369	}
4370	if (t4_toe_keepalive_count != 0) {
4371		v = t4_toe_keepalive_count & M_KEEPALIVEMAXR2;
4372		t4_set_reg_field(sc, A_TP_SHIFT_CNT,
4373		    V_KEEPALIVEMAXR1(M_KEEPALIVEMAXR1) |
4374		    V_KEEPALIVEMAXR2(M_KEEPALIVEMAXR2),
4375		    V_KEEPALIVEMAXR1(1) | V_KEEPALIVEMAXR2(v));
4376	}
4377	if (t4_toe_rexmt_min != 0) {
4378		v = us_to_tcp_ticks(sc, t4_toe_rexmt_min);
4379		v &= M_RXTMIN;
4380		t4_set_reg_field(sc, A_TP_RXT_MIN,
4381		    V_RXTMIN(M_RXTMIN), V_RXTMIN(v));
4382	}
4383	if (t4_toe_rexmt_max != 0) {
4384		v = us_to_tcp_ticks(sc, t4_toe_rexmt_max);
4385		v &= M_RXTMAX;
4386		t4_set_reg_field(sc, A_TP_RXT_MAX,
4387		    V_RXTMAX(M_RXTMAX), V_RXTMAX(v));
4388	}
4389	if (t4_toe_rexmt_count != 0) {
4390		v = t4_toe_rexmt_count & M_RXTSHIFTMAXR2;
4391		t4_set_reg_field(sc, A_TP_SHIFT_CNT,
4392		    V_RXTSHIFTMAXR1(M_RXTSHIFTMAXR1) |
4393		    V_RXTSHIFTMAXR2(M_RXTSHIFTMAXR2),
4394		    V_RXTSHIFTMAXR1(1) | V_RXTSHIFTMAXR2(v));
4395	}
4396	for (i = 0; i < nitems(t4_toe_rexmt_backoff); i++) {
4397		if (t4_toe_rexmt_backoff[i] != -1) {
4398			v = t4_toe_rexmt_backoff[i] & M_TIMERBACKOFFINDEX0;
4399			shift = (i & 3) << 3;
4400			t4_set_reg_field(sc, A_TP_TCP_BACKOFF_REG0 + (i & ~3),
4401			    M_TIMERBACKOFFINDEX0 << shift, v << shift);
4402		}
4403	}
4404#endif
4405	return (0);
4406}
4407
4408#undef FW_PARAM_PFVF
4409#undef FW_PARAM_DEV
4410
4411static void
4412t4_set_desc(struct adapter *sc)
4413{
4414	char buf[128];
4415	struct adapter_params *p = &sc->params;
4416
4417	snprintf(buf, sizeof(buf), "Chelsio %s", p->vpd.id);
4418
4419	device_set_desc_copy(sc->dev, buf);
4420}
4421
4422static inline void
4423ifmedia_add4(struct ifmedia *ifm, int m)
4424{
4425
4426	ifmedia_add(ifm, m, 0, NULL);
4427	ifmedia_add(ifm, m | IFM_ETH_TXPAUSE, 0, NULL);
4428	ifmedia_add(ifm, m | IFM_ETH_RXPAUSE, 0, NULL);
4429	ifmedia_add(ifm, m | IFM_ETH_TXPAUSE | IFM_ETH_RXPAUSE, 0, NULL);
4430}
4431
4432/*
4433 * This is the selected media, which is not quite the same as the active media.
4434 * The media line in ifconfig is "media: Ethernet selected (active)" if selected
4435 * and active are not the same, and "media: Ethernet selected" otherwise.
4436 */
4437static void
4438set_current_media(struct port_info *pi)
4439{
4440	struct link_config *lc;
4441	struct ifmedia *ifm;
4442	int mword;
4443	u_int speed;
4444
4445	PORT_LOCK_ASSERT_OWNED(pi);
4446
4447	/* Leave current media alone if it's already set to IFM_NONE. */
4448	ifm = &pi->media;
4449	if (ifm->ifm_cur != NULL &&
4450	    IFM_SUBTYPE(ifm->ifm_cur->ifm_media) == IFM_NONE)
4451		return;
4452
4453	lc = &pi->link_cfg;
4454	if (lc->requested_aneg != AUTONEG_DISABLE &&
4455	    lc->supported & FW_PORT_CAP32_ANEG) {
4456		ifmedia_set(ifm, IFM_ETHER | IFM_AUTO);
4457		return;
4458	}
4459	mword = IFM_ETHER | IFM_FDX;
4460	if (lc->requested_fc & PAUSE_TX)
4461		mword |= IFM_ETH_TXPAUSE;
4462	if (lc->requested_fc & PAUSE_RX)
4463		mword |= IFM_ETH_RXPAUSE;
4464	if (lc->requested_speed == 0)
4465		speed = port_top_speed(pi) * 1000;	/* Gbps -> Mbps */
4466	else
4467		speed = lc->requested_speed;
4468	mword |= port_mword(pi, speed_to_fwcap(speed));
4469	ifmedia_set(ifm, mword);
4470}
4471
4472/*
4473 * Returns true if the ifmedia list for the port cannot change.
4474 */
4475static bool
4476fixed_ifmedia(struct port_info *pi)
4477{
4478
4479	return (pi->port_type == FW_PORT_TYPE_BT_SGMII ||
4480	    pi->port_type == FW_PORT_TYPE_BT_XFI ||
4481	    pi->port_type == FW_PORT_TYPE_BT_XAUI ||
4482	    pi->port_type == FW_PORT_TYPE_KX4 ||
4483	    pi->port_type == FW_PORT_TYPE_KX ||
4484	    pi->port_type == FW_PORT_TYPE_KR ||
4485	    pi->port_type == FW_PORT_TYPE_BP_AP ||
4486	    pi->port_type == FW_PORT_TYPE_BP4_AP ||
4487	    pi->port_type == FW_PORT_TYPE_BP40_BA ||
4488	    pi->port_type == FW_PORT_TYPE_KR4_100G ||
4489	    pi->port_type == FW_PORT_TYPE_KR_SFP28 ||
4490	    pi->port_type == FW_PORT_TYPE_KR_XLAUI);
4491}
4492
4493static void
4494build_medialist(struct port_info *pi)
4495{
4496	uint32_t ss, speed;
4497	int unknown, mword, bit;
4498	struct link_config *lc;
4499	struct ifmedia *ifm;
4500
4501	PORT_LOCK_ASSERT_OWNED(pi);
4502
4503	if (pi->flags & FIXED_IFMEDIA)
4504		return;
4505
4506	/*
4507	 * Rebuild the ifmedia list.
4508	 */
4509	ifm = &pi->media;
4510	ifmedia_removeall(ifm);
4511	lc = &pi->link_cfg;
4512	ss = G_FW_PORT_CAP32_SPEED(lc->supported); /* Supported Speeds */
4513	if (__predict_false(ss == 0)) {	/* not supposed to happen. */
4514		MPASS(ss != 0);
4515no_media:
4516		MPASS(LIST_EMPTY(&ifm->ifm_list));
4517		ifmedia_add(ifm, IFM_ETHER | IFM_NONE, 0, NULL);
4518		ifmedia_set(ifm, IFM_ETHER | IFM_NONE);
4519		return;
4520	}
4521
4522	unknown = 0;
4523	for (bit = S_FW_PORT_CAP32_SPEED; bit < fls(ss); bit++) {
4524		speed = 1 << bit;
4525		MPASS(speed & M_FW_PORT_CAP32_SPEED);
4526		if (ss & speed) {
4527			mword = port_mword(pi, speed);
4528			if (mword == IFM_NONE) {
4529				goto no_media;
4530			} else if (mword == IFM_UNKNOWN)
4531				unknown++;
4532			else
4533				ifmedia_add4(ifm, IFM_ETHER | IFM_FDX | mword);
4534		}
4535	}
4536	if (unknown > 0) /* Add one unknown for all unknown media types. */
4537		ifmedia_add4(ifm, IFM_ETHER | IFM_FDX | IFM_UNKNOWN);
4538	if (lc->supported & FW_PORT_CAP32_ANEG)
4539		ifmedia_add(ifm, IFM_ETHER | IFM_AUTO, 0, NULL);
4540
4541	set_current_media(pi);
4542}
4543
4544/*
4545 * Initialize the requested fields in the link config based on driver tunables.
4546 */
4547static void
4548init_link_config(struct port_info *pi)
4549{
4550	struct link_config *lc = &pi->link_cfg;
4551
4552	PORT_LOCK_ASSERT_OWNED(pi);
4553
4554	lc->requested_speed = 0;
4555
4556	if (t4_autoneg == 0)
4557		lc->requested_aneg = AUTONEG_DISABLE;
4558	else if (t4_autoneg == 1)
4559		lc->requested_aneg = AUTONEG_ENABLE;
4560	else
4561		lc->requested_aneg = AUTONEG_AUTO;
4562
4563	lc->requested_fc = t4_pause_settings & (PAUSE_TX | PAUSE_RX |
4564	    PAUSE_AUTONEG);
4565
4566	if (t4_fec == -1 || t4_fec & FEC_AUTO)
4567		lc->requested_fec = FEC_AUTO;
4568	else {
4569		lc->requested_fec = FEC_NONE;
4570		if (t4_fec & FEC_RS)
4571			lc->requested_fec |= FEC_RS;
4572		if (t4_fec & FEC_BASER_RS)
4573			lc->requested_fec |= FEC_BASER_RS;
4574	}
4575}
4576
4577/*
4578 * Makes sure that all requested settings comply with what's supported by the
4579 * port.  Returns the number of settings that were invalid and had to be fixed.
4580 */
4581static int
4582fixup_link_config(struct port_info *pi)
4583{
4584	int n = 0;
4585	struct link_config *lc = &pi->link_cfg;
4586	uint32_t fwspeed;
4587
4588	PORT_LOCK_ASSERT_OWNED(pi);
4589
4590	/* Speed (when not autonegotiating) */
4591	if (lc->requested_speed != 0) {
4592		fwspeed = speed_to_fwcap(lc->requested_speed);
4593		if ((fwspeed & lc->supported) == 0) {
4594			n++;
4595			lc->requested_speed = 0;
4596		}
4597	}
4598
4599	/* Link autonegotiation */
4600	MPASS(lc->requested_aneg == AUTONEG_ENABLE ||
4601	    lc->requested_aneg == AUTONEG_DISABLE ||
4602	    lc->requested_aneg == AUTONEG_AUTO);
4603	if (lc->requested_aneg == AUTONEG_ENABLE &&
4604	    !(lc->supported & FW_PORT_CAP32_ANEG)) {
4605		n++;
4606		lc->requested_aneg = AUTONEG_AUTO;
4607	}
4608
4609	/* Flow control */
4610	MPASS((lc->requested_fc & ~(PAUSE_TX | PAUSE_RX | PAUSE_AUTONEG)) == 0);
4611	if (lc->requested_fc & PAUSE_TX &&
4612	    !(lc->supported & FW_PORT_CAP32_FC_TX)) {
4613		n++;
4614		lc->requested_fc &= ~PAUSE_TX;
4615	}
4616	if (lc->requested_fc & PAUSE_RX &&
4617	    !(lc->supported & FW_PORT_CAP32_FC_RX)) {
4618		n++;
4619		lc->requested_fc &= ~PAUSE_RX;
4620	}
4621	if (!(lc->requested_fc & PAUSE_AUTONEG) &&
4622	    !(lc->supported & FW_PORT_CAP32_FORCE_PAUSE)) {
4623		n++;
4624		lc->requested_fc |= PAUSE_AUTONEG;
4625	}
4626
4627	/* FEC */
4628	if ((lc->requested_fec & FEC_RS &&
4629	    !(lc->supported & FW_PORT_CAP32_FEC_RS)) ||
4630	    (lc->requested_fec & FEC_BASER_RS &&
4631	    !(lc->supported & FW_PORT_CAP32_FEC_BASER_RS))) {
4632		n++;
4633		lc->requested_fec = FEC_AUTO;
4634	}
4635
4636	return (n);
4637}
4638
4639/*
4640 * Apply the requested L1 settings, which are expected to be valid, to the
4641 * hardware.
4642 */
4643static int
4644apply_link_config(struct port_info *pi)
4645{
4646	struct adapter *sc = pi->adapter;
4647	struct link_config *lc = &pi->link_cfg;
4648	int rc;
4649
4650#ifdef INVARIANTS
4651	ASSERT_SYNCHRONIZED_OP(sc);
4652	PORT_LOCK_ASSERT_OWNED(pi);
4653
4654	if (lc->requested_aneg == AUTONEG_ENABLE)
4655		MPASS(lc->supported & FW_PORT_CAP32_ANEG);
4656	if (!(lc->requested_fc & PAUSE_AUTONEG))
4657		MPASS(lc->supported & FW_PORT_CAP32_FORCE_PAUSE);
4658	if (lc->requested_fc & PAUSE_TX)
4659		MPASS(lc->supported & FW_PORT_CAP32_FC_TX);
4660	if (lc->requested_fc & PAUSE_RX)
4661		MPASS(lc->supported & FW_PORT_CAP32_FC_RX);
4662	if (lc->requested_fec & FEC_RS)
4663		MPASS(lc->supported & FW_PORT_CAP32_FEC_RS);
4664	if (lc->requested_fec & FEC_BASER_RS)
4665		MPASS(lc->supported & FW_PORT_CAP32_FEC_BASER_RS);
4666#endif
4667	rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc);
4668	if (rc != 0) {
4669		/* Don't complain if the VF driver gets back an EPERM. */
4670		if (!(sc->flags & IS_VF) || rc != FW_EPERM)
4671			device_printf(pi->dev, "l1cfg failed: %d\n", rc);
4672	} else {
4673		/*
4674		 * An L1_CFG will almost always result in a link-change event if
4675		 * the link is up, and the driver will refresh the actual
4676		 * fec/fc/etc. when the notification is processed.  If the link
4677		 * is down then the actual settings are meaningless.
4678		 *
4679		 * This takes care of the case where a change in the L1 settings
4680		 * may not result in a notification.
4681		 */
4682		if (lc->link_ok && !(lc->requested_fc & PAUSE_AUTONEG))
4683			lc->fc = lc->requested_fc & (PAUSE_TX | PAUSE_RX);
4684	}
4685	return (rc);
4686}
4687
4688#define FW_MAC_EXACT_CHUNK	7
4689
4690/*
4691 * Program the port's XGMAC based on parameters in ifnet.  The caller also
4692 * indicates which parameters should be programmed (the rest are left alone).
4693 */
4694int
4695update_mac_settings(struct ifnet *ifp, int flags)
4696{
4697	int rc = 0;
4698	struct vi_info *vi = ifp->if_softc;
4699	struct port_info *pi = vi->pi;
4700	struct adapter *sc = pi->adapter;
4701	int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1;
4702
4703	ASSERT_SYNCHRONIZED_OP(sc);
4704	KASSERT(flags, ("%s: not told what to update.", __func__));
4705
4706	if (flags & XGMAC_MTU)
4707		mtu = ifp->if_mtu;
4708
4709	if (flags & XGMAC_PROMISC)
4710		promisc = ifp->if_flags & IFF_PROMISC ? 1 : 0;
4711
4712	if (flags & XGMAC_ALLMULTI)
4713		allmulti = ifp->if_flags & IFF_ALLMULTI ? 1 : 0;
4714
4715	if (flags & XGMAC_VLANEX)
4716		vlanex = ifp->if_capenable & IFCAP_VLAN_HWTAGGING ? 1 : 0;
4717
4718	if (flags & (XGMAC_MTU|XGMAC_PROMISC|XGMAC_ALLMULTI|XGMAC_VLANEX)) {
4719		rc = -t4_set_rxmode(sc, sc->mbox, vi->viid, mtu, promisc,
4720		    allmulti, 1, vlanex, false);
4721		if (rc) {
4722			if_printf(ifp, "set_rxmode (%x) failed: %d\n", flags,
4723			    rc);
4724			return (rc);
4725		}
4726	}
4727
4728	if (flags & XGMAC_UCADDR) {
4729		uint8_t ucaddr[ETHER_ADDR_LEN];
4730
4731		bcopy(IF_LLADDR(ifp), ucaddr, sizeof(ucaddr));
4732		rc = t4_change_mac(sc, sc->mbox, vi->viid, vi->xact_addr_filt,
4733		    ucaddr, true, true);
4734		if (rc < 0) {
4735			rc = -rc;
4736			if_printf(ifp, "change_mac failed: %d\n", rc);
4737			return (rc);
4738		} else {
4739			vi->xact_addr_filt = rc;
4740			rc = 0;
4741		}
4742	}
4743
4744	if (flags & XGMAC_MCADDRS) {
4745		const uint8_t *mcaddr[FW_MAC_EXACT_CHUNK];
4746		int del = 1;
4747		uint64_t hash = 0;
4748		struct ifmultiaddr *ifma;
4749		int i = 0, j;
4750
4751		if_maddr_rlock(ifp);
4752		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
4753			if (ifma->ifma_addr->sa_family != AF_LINK)
4754				continue;
4755			mcaddr[i] =
4756			    LLADDR((struct sockaddr_dl *)ifma->ifma_addr);
4757			MPASS(ETHER_IS_MULTICAST(mcaddr[i]));
4758			i++;
4759
4760			if (i == FW_MAC_EXACT_CHUNK) {
4761				rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid,
4762				    del, i, mcaddr, NULL, &hash, 0);
4763				if (rc < 0) {
4764					rc = -rc;
4765					for (j = 0; j < i; j++) {
4766						if_printf(ifp,
4767						    "failed to add mc address"
4768						    " %02x:%02x:%02x:"
4769						    "%02x:%02x:%02x rc=%d\n",
4770						    mcaddr[j][0], mcaddr[j][1],
4771						    mcaddr[j][2], mcaddr[j][3],
4772						    mcaddr[j][4], mcaddr[j][5],
4773						    rc);
4774					}
4775					goto mcfail;
4776				}
4777				del = 0;
4778				i = 0;
4779			}
4780		}
4781		if (i > 0) {
4782			rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, del, i,
4783			    mcaddr, NULL, &hash, 0);
4784			if (rc < 0) {
4785				rc = -rc;
4786				for (j = 0; j < i; j++) {
4787					if_printf(ifp,
4788					    "failed to add mc address"
4789					    " %02x:%02x:%02x:"
4790					    "%02x:%02x:%02x rc=%d\n",
4791					    mcaddr[j][0], mcaddr[j][1],
4792					    mcaddr[j][2], mcaddr[j][3],
4793					    mcaddr[j][4], mcaddr[j][5],
4794					    rc);
4795				}
4796				goto mcfail;
4797			}
4798		}
4799
4800		rc = -t4_set_addr_hash(sc, sc->mbox, vi->viid, 0, hash, 0);
4801		if (rc != 0)
4802			if_printf(ifp, "failed to set mc address hash: %d", rc);
4803mcfail:
4804		if_maddr_runlock(ifp);
4805	}
4806
4807	return (rc);
4808}
4809
4810/*
4811 * {begin|end}_synchronized_op must be called from the same thread.
4812 */
4813int
4814begin_synchronized_op(struct adapter *sc, struct vi_info *vi, int flags,
4815    char *wmesg)
4816{
4817	int rc, pri;
4818
4819#ifdef WITNESS
4820	/* the caller thinks it's ok to sleep, but is it really? */
4821	if (flags & SLEEP_OK)
4822		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
4823		    "begin_synchronized_op");
4824#endif
4825
4826	if (INTR_OK)
4827		pri = PCATCH;
4828	else
4829		pri = 0;
4830
4831	ADAPTER_LOCK(sc);
4832	for (;;) {
4833
4834		if (vi && IS_DOOMED(vi)) {
4835			rc = ENXIO;
4836			goto done;
4837		}
4838
4839		if (!IS_BUSY(sc)) {
4840			rc = 0;
4841			break;
4842		}
4843
4844		if (!(flags & SLEEP_OK)) {
4845			rc = EBUSY;
4846			goto done;
4847		}
4848
4849		if (mtx_sleep(&sc->flags, &sc->sc_lock, pri, wmesg, 0)) {
4850			rc = EINTR;
4851			goto done;
4852		}
4853	}
4854
4855	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
4856	SET_BUSY(sc);
4857#ifdef INVARIANTS
4858	sc->last_op = wmesg;
4859	sc->last_op_thr = curthread;
4860	sc->last_op_flags = flags;
4861#endif
4862
4863done:
4864	if (!(flags & HOLD_LOCK) || rc)
4865		ADAPTER_UNLOCK(sc);
4866
4867	return (rc);
4868}
4869
4870/*
4871 * Tell if_ioctl and if_init that the VI is going away.  This is
4872 * special variant of begin_synchronized_op and must be paired with a
4873 * call to end_synchronized_op.
4874 */
4875void
4876doom_vi(struct adapter *sc, struct vi_info *vi)
4877{
4878
4879	ADAPTER_LOCK(sc);
4880	SET_DOOMED(vi);
4881	wakeup(&sc->flags);
4882	while (IS_BUSY(sc))
4883		mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0);
4884	SET_BUSY(sc);
4885#ifdef INVARIANTS
4886	sc->last_op = "t4detach";
4887	sc->last_op_thr = curthread;
4888	sc->last_op_flags = 0;
4889#endif
4890	ADAPTER_UNLOCK(sc);
4891}
4892
4893/*
4894 * {begin|end}_synchronized_op must be called from the same thread.
4895 */
4896void
4897end_synchronized_op(struct adapter *sc, int flags)
4898{
4899
4900	if (flags & LOCK_HELD)
4901		ADAPTER_LOCK_ASSERT_OWNED(sc);
4902	else
4903		ADAPTER_LOCK(sc);
4904
4905	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
4906	CLR_BUSY(sc);
4907	wakeup(&sc->flags);
4908	ADAPTER_UNLOCK(sc);
4909}
4910
4911static int
4912cxgbe_init_synchronized(struct vi_info *vi)
4913{
4914	struct port_info *pi = vi->pi;
4915	struct adapter *sc = pi->adapter;
4916	struct ifnet *ifp = vi->ifp;
4917	int rc = 0, i;
4918	struct sge_txq *txq;
4919
4920	ASSERT_SYNCHRONIZED_OP(sc);
4921
4922	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
4923		return (0);	/* already running */
4924
4925	if (!(sc->flags & FULL_INIT_DONE) &&
4926	    ((rc = adapter_full_init(sc)) != 0))
4927		return (rc);	/* error message displayed already */
4928
4929	if (!(vi->flags & VI_INIT_DONE) &&
4930	    ((rc = vi_full_init(vi)) != 0))
4931		return (rc); /* error message displayed already */
4932
4933	rc = update_mac_settings(ifp, XGMAC_ALL);
4934	if (rc)
4935		goto done;	/* error message displayed already */
4936
4937	PORT_LOCK(pi);
4938	if (pi->up_vis == 0) {
4939		t4_update_port_info(pi);
4940		fixup_link_config(pi);
4941		build_medialist(pi);
4942		apply_link_config(pi);
4943	}
4944
4945	rc = -t4_enable_vi(sc, sc->mbox, vi->viid, true, true);
4946	if (rc != 0) {
4947		if_printf(ifp, "enable_vi failed: %d\n", rc);
4948		PORT_UNLOCK(pi);
4949		goto done;
4950	}
4951
4952	/*
4953	 * Can't fail from this point onwards.  Review cxgbe_uninit_synchronized
4954	 * if this changes.
4955	 */
4956
4957	for_each_txq(vi, i, txq) {
4958		TXQ_LOCK(txq);
4959		txq->eq.flags |= EQ_ENABLED;
4960		TXQ_UNLOCK(txq);
4961	}
4962
4963	/*
4964	 * The first iq of the first port to come up is used for tracing.
4965	 */
4966	if (sc->traceq < 0 && IS_MAIN_VI(vi)) {
4967		sc->traceq = sc->sge.rxq[vi->first_rxq].iq.abs_id;
4968		t4_write_reg(sc, is_t4(sc) ?  A_MPS_TRC_RSS_CONTROL :
4969		    A_MPS_T5_TRC_RSS_CONTROL, V_RSSCONTROL(pi->tx_chan) |
4970		    V_QUEUENUMBER(sc->traceq));
4971		pi->flags |= HAS_TRACEQ;
4972	}
4973
4974	/* all ok */
4975	pi->up_vis++;
4976	ifp->if_drv_flags |= IFF_DRV_RUNNING;
4977
4978	if (pi->nvi > 1 || sc->flags & IS_VF)
4979		callout_reset(&vi->tick, hz, vi_tick, vi);
4980	else
4981		callout_reset(&pi->tick, hz, cxgbe_tick, pi);
4982	PORT_UNLOCK(pi);
4983done:
4984	if (rc != 0)
4985		cxgbe_uninit_synchronized(vi);
4986
4987	return (rc);
4988}
4989
4990/*
4991 * Idempotent.
4992 */
4993static int
4994cxgbe_uninit_synchronized(struct vi_info *vi)
4995{
4996	struct port_info *pi = vi->pi;
4997	struct adapter *sc = pi->adapter;
4998	struct ifnet *ifp = vi->ifp;
4999	int rc, i;
5000	struct sge_txq *txq;
5001
5002	ASSERT_SYNCHRONIZED_OP(sc);
5003
5004	if (!(vi->flags & VI_INIT_DONE)) {
5005		if (__predict_false(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
5006			KASSERT(0, ("uninited VI is running"));
5007			if_printf(ifp, "uninited VI with running ifnet.  "
5008			    "vi->flags 0x%016lx, if_flags 0x%08x, "
5009			    "if_drv_flags 0x%08x\n", vi->flags, ifp->if_flags,
5010			    ifp->if_drv_flags);
5011		}
5012		return (0);
5013	}
5014
5015	/*
5016	 * Disable the VI so that all its data in either direction is discarded
5017	 * by the MPS.  Leave everything else (the queues, interrupts, and 1Hz
5018	 * tick) intact as the TP can deliver negative advice or data that it's
5019	 * holding in its RAM (for an offloaded connection) even after the VI is
5020	 * disabled.
5021	 */
5022	rc = -t4_enable_vi(sc, sc->mbox, vi->viid, false, false);
5023	if (rc) {
5024		if_printf(ifp, "disable_vi failed: %d\n", rc);
5025		return (rc);
5026	}
5027
5028	for_each_txq(vi, i, txq) {
5029		TXQ_LOCK(txq);
5030		txq->eq.flags &= ~EQ_ENABLED;
5031		TXQ_UNLOCK(txq);
5032	}
5033
5034	PORT_LOCK(pi);
5035	if (pi->nvi > 1 || sc->flags & IS_VF)
5036		callout_stop(&vi->tick);
5037	else
5038		callout_stop(&pi->tick);
5039	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
5040		PORT_UNLOCK(pi);
5041		return (0);
5042	}
5043	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
5044	pi->up_vis--;
5045	if (pi->up_vis > 0) {
5046		PORT_UNLOCK(pi);
5047		return (0);
5048	}
5049
5050	pi->link_cfg.link_ok = false;
5051	pi->link_cfg.speed = 0;
5052	pi->link_cfg.link_down_rc = 255;
5053	t4_os_link_changed(pi);
5054	PORT_UNLOCK(pi);
5055
5056	return (0);
5057}
5058
5059/*
5060 * It is ok for this function to fail midway and return right away.  t4_detach
5061 * will walk the entire sc->irq list and clean up whatever is valid.
5062 */
5063int
5064t4_setup_intr_handlers(struct adapter *sc)
5065{
5066	int rc, rid, p, q, v;
5067	char s[8];
5068	struct irq *irq;
5069	struct port_info *pi;
5070	struct vi_info *vi;
5071	struct sge *sge = &sc->sge;
5072	struct sge_rxq *rxq;
5073#ifdef TCP_OFFLOAD
5074	struct sge_ofld_rxq *ofld_rxq;
5075#endif
5076#ifdef DEV_NETMAP
5077	struct sge_nm_rxq *nm_rxq;
5078#endif
5079#ifdef RSS
5080	int nbuckets = rss_getnumbuckets();
5081#endif
5082
5083	/*
5084	 * Setup interrupts.
5085	 */
5086	irq = &sc->irq[0];
5087	rid = sc->intr_type == INTR_INTX ? 0 : 1;
5088	if (forwarding_intr_to_fwq(sc))
5089		return (t4_alloc_irq(sc, irq, rid, t4_intr_all, sc, "all"));
5090
5091	/* Multiple interrupts. */
5092	if (sc->flags & IS_VF)
5093		KASSERT(sc->intr_count >= T4VF_EXTRA_INTR + sc->params.nports,
5094		    ("%s: too few intr.", __func__));
5095	else
5096		KASSERT(sc->intr_count >= T4_EXTRA_INTR + sc->params.nports,
5097		    ("%s: too few intr.", __func__));
5098
5099	/* The first one is always error intr on PFs */
5100	if (!(sc->flags & IS_VF)) {
5101		rc = t4_alloc_irq(sc, irq, rid, t4_intr_err, sc, "err");
5102		if (rc != 0)
5103			return (rc);
5104		irq++;
5105		rid++;
5106	}
5107
5108	/* The second one is always the firmware event queue (first on VFs) */
5109	rc = t4_alloc_irq(sc, irq, rid, t4_intr_evt, &sge->fwq, "evt");
5110	if (rc != 0)
5111		return (rc);
5112	irq++;
5113	rid++;
5114
5115	for_each_port(sc, p) {
5116		pi = sc->port[p];
5117		for_each_vi(pi, v, vi) {
5118			vi->first_intr = rid - 1;
5119
5120			if (vi->nnmrxq > 0) {
5121				int n = max(vi->nrxq, vi->nnmrxq);
5122
5123				rxq = &sge->rxq[vi->first_rxq];
5124#ifdef DEV_NETMAP
5125				nm_rxq = &sge->nm_rxq[vi->first_nm_rxq];
5126#endif
5127				for (q = 0; q < n; q++) {
5128					snprintf(s, sizeof(s), "%x%c%x", p,
5129					    'a' + v, q);
5130					if (q < vi->nrxq)
5131						irq->rxq = rxq++;
5132#ifdef DEV_NETMAP
5133					if (q < vi->nnmrxq)
5134						irq->nm_rxq = nm_rxq++;
5135
5136					if (irq->nm_rxq != NULL &&
5137					    irq->rxq == NULL) {
5138						/* Netmap rx only */
5139						rc = t4_alloc_irq(sc, irq, rid,
5140						    t4_nm_intr, irq->nm_rxq, s);
5141					}
5142					if (irq->nm_rxq != NULL &&
5143					    irq->rxq != NULL) {
5144						/* NIC and Netmap rx */
5145						rc = t4_alloc_irq(sc, irq, rid,
5146						    t4_vi_intr, irq, s);
5147					}
5148#endif
5149					if (irq->rxq != NULL &&
5150					    irq->nm_rxq == NULL) {
5151						/* NIC rx only */
5152						rc = t4_alloc_irq(sc, irq, rid,
5153						    t4_intr, irq->rxq, s);
5154					}
5155					if (rc != 0)
5156						return (rc);
5157#ifdef RSS
5158					if (q < vi->nrxq) {
5159						bus_bind_intr(sc->dev, irq->res,
5160						    rss_getcpu(q % nbuckets));
5161					}
5162#endif
5163					irq++;
5164					rid++;
5165					vi->nintr++;
5166				}
5167			} else {
5168				for_each_rxq(vi, q, rxq) {
5169					snprintf(s, sizeof(s), "%x%c%x", p,
5170					    'a' + v, q);
5171					rc = t4_alloc_irq(sc, irq, rid,
5172					    t4_intr, rxq, s);
5173					if (rc != 0)
5174						return (rc);
5175#ifdef RSS
5176					bus_bind_intr(sc->dev, irq->res,
5177					    rss_getcpu(q % nbuckets));
5178#endif
5179					irq++;
5180					rid++;
5181					vi->nintr++;
5182				}
5183			}
5184#ifdef TCP_OFFLOAD
5185			for_each_ofld_rxq(vi, q, ofld_rxq) {
5186				snprintf(s, sizeof(s), "%x%c%x", p, 'A' + v, q);
5187				rc = t4_alloc_irq(sc, irq, rid, t4_intr,
5188				    ofld_rxq, s);
5189				if (rc != 0)
5190					return (rc);
5191				irq++;
5192				rid++;
5193				vi->nintr++;
5194			}
5195#endif
5196		}
5197	}
5198	MPASS(irq == &sc->irq[sc->intr_count]);
5199
5200	return (0);
5201}
5202
5203int
5204adapter_full_init(struct adapter *sc)
5205{
5206	int rc, i;
5207#ifdef RSS
5208	uint32_t raw_rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
5209	uint32_t rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
5210#endif
5211
5212	ASSERT_SYNCHRONIZED_OP(sc);
5213	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
5214	KASSERT((sc->flags & FULL_INIT_DONE) == 0,
5215	    ("%s: FULL_INIT_DONE already", __func__));
5216
5217	/*
5218	 * queues that belong to the adapter (not any particular port).
5219	 */
5220	rc = t4_setup_adapter_queues(sc);
5221	if (rc != 0)
5222		goto done;
5223
5224	for (i = 0; i < nitems(sc->tq); i++) {
5225		sc->tq[i] = taskqueue_create("t4 taskq", M_NOWAIT,
5226		    taskqueue_thread_enqueue, &sc->tq[i]);
5227		if (sc->tq[i] == NULL) {
5228			device_printf(sc->dev,
5229			    "failed to allocate task queue %d\n", i);
5230			rc = ENOMEM;
5231			goto done;
5232		}
5233		taskqueue_start_threads(&sc->tq[i], 1, PI_NET, "%s tq%d",
5234		    device_get_nameunit(sc->dev), i);
5235	}
5236#ifdef RSS
5237	MPASS(RSS_KEYSIZE == 40);
5238	rss_getkey((void *)&raw_rss_key[0]);
5239	for (i = 0; i < nitems(rss_key); i++) {
5240		rss_key[i] = htobe32(raw_rss_key[nitems(rss_key) - 1 - i]);
5241	}
5242	t4_write_rss_key(sc, &rss_key[0], -1, 1);
5243#endif
5244
5245	if (!(sc->flags & IS_VF))
5246		t4_intr_enable(sc);
5247	sc->flags |= FULL_INIT_DONE;
5248done:
5249	if (rc != 0)
5250		adapter_full_uninit(sc);
5251
5252	return (rc);
5253}
5254
5255int
5256adapter_full_uninit(struct adapter *sc)
5257{
5258	int i;
5259
5260	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
5261
5262	t4_teardown_adapter_queues(sc);
5263
5264	for (i = 0; i < nitems(sc->tq) && sc->tq[i]; i++) {
5265		taskqueue_free(sc->tq[i]);
5266		sc->tq[i] = NULL;
5267	}
5268
5269	sc->flags &= ~FULL_INIT_DONE;
5270
5271	return (0);
5272}
5273
5274#ifdef RSS
5275#define SUPPORTED_RSS_HASHTYPES (RSS_HASHTYPE_RSS_IPV4 | \
5276    RSS_HASHTYPE_RSS_TCP_IPV4 | RSS_HASHTYPE_RSS_IPV6 | \
5277    RSS_HASHTYPE_RSS_TCP_IPV6 | RSS_HASHTYPE_RSS_UDP_IPV4 | \
5278    RSS_HASHTYPE_RSS_UDP_IPV6)
5279
5280/* Translates kernel hash types to hardware. */
5281static int
5282hashconfig_to_hashen(int hashconfig)
5283{
5284	int hashen = 0;
5285
5286	if (hashconfig & RSS_HASHTYPE_RSS_IPV4)
5287		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN;
5288	if (hashconfig & RSS_HASHTYPE_RSS_IPV6)
5289		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN;
5290	if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV4) {
5291		hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN |
5292		    F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
5293	}
5294	if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV6) {
5295		hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN |
5296		    F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
5297	}
5298	if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV4)
5299		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
5300	if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV6)
5301		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
5302
5303	return (hashen);
5304}
5305
5306/* Translates hardware hash types to kernel. */
5307static int
5308hashen_to_hashconfig(int hashen)
5309{
5310	int hashconfig = 0;
5311
5312	if (hashen & F_FW_RSS_VI_CONFIG_CMD_UDPEN) {
5313		/*
5314		 * If UDP hashing was enabled it must have been enabled for
5315		 * either IPv4 or IPv6 (inclusive or).  Enabling UDP without
5316		 * enabling any 4-tuple hash is nonsense configuration.
5317		 */
5318		MPASS(hashen & (F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
5319		    F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN));
5320
5321		if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
5322			hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV4;
5323		if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
5324			hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV6;
5325	}
5326	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
5327		hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV4;
5328	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
5329		hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV6;
5330	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
5331		hashconfig |= RSS_HASHTYPE_RSS_IPV4;
5332	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
5333		hashconfig |= RSS_HASHTYPE_RSS_IPV6;
5334
5335	return (hashconfig);
5336}
5337#endif
5338
5339int
5340vi_full_init(struct vi_info *vi)
5341{
5342	struct adapter *sc = vi->pi->adapter;
5343	struct ifnet *ifp = vi->ifp;
5344	uint16_t *rss;
5345	struct sge_rxq *rxq;
5346	int rc, i, j, hashen;
5347#ifdef RSS
5348	int nbuckets = rss_getnumbuckets();
5349	int hashconfig = rss_gethashconfig();
5350	int extra;
5351#endif
5352
5353	ASSERT_SYNCHRONIZED_OP(sc);
5354	KASSERT((vi->flags & VI_INIT_DONE) == 0,
5355	    ("%s: VI_INIT_DONE already", __func__));
5356
5357	sysctl_ctx_init(&vi->ctx);
5358	vi->flags |= VI_SYSCTL_CTX;
5359
5360	/*
5361	 * Allocate tx/rx/fl queues for this VI.
5362	 */
5363	rc = t4_setup_vi_queues(vi);
5364	if (rc != 0)
5365		goto done;	/* error message displayed already */
5366
5367	/*
5368	 * Setup RSS for this VI.  Save a copy of the RSS table for later use.
5369	 */
5370	if (vi->nrxq > vi->rss_size) {
5371		if_printf(ifp, "nrxq (%d) > hw RSS table size (%d); "
5372		    "some queues will never receive traffic.\n", vi->nrxq,
5373		    vi->rss_size);
5374	} else if (vi->rss_size % vi->nrxq) {
5375		if_printf(ifp, "nrxq (%d), hw RSS table size (%d); "
5376		    "expect uneven traffic distribution.\n", vi->nrxq,
5377		    vi->rss_size);
5378	}
5379#ifdef RSS
5380	if (vi->nrxq != nbuckets) {
5381		if_printf(ifp, "nrxq (%d) != kernel RSS buckets (%d);"
5382		    "performance will be impacted.\n", vi->nrxq, nbuckets);
5383	}
5384#endif
5385	rss = malloc(vi->rss_size * sizeof (*rss), M_CXGBE, M_ZERO | M_WAITOK);
5386	for (i = 0; i < vi->rss_size;) {
5387#ifdef RSS
5388		j = rss_get_indirection_to_bucket(i);
5389		j %= vi->nrxq;
5390		rxq = &sc->sge.rxq[vi->first_rxq + j];
5391		rss[i++] = rxq->iq.abs_id;
5392#else
5393		for_each_rxq(vi, j, rxq) {
5394			rss[i++] = rxq->iq.abs_id;
5395			if (i == vi->rss_size)
5396				break;
5397		}
5398#endif
5399	}
5400
5401	rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, rss,
5402	    vi->rss_size);
5403	if (rc != 0) {
5404		free(rss, M_CXGBE);
5405		if_printf(ifp, "rss_config failed: %d\n", rc);
5406		goto done;
5407	}
5408
5409#ifdef RSS
5410	hashen = hashconfig_to_hashen(hashconfig);
5411
5412	/*
5413	 * We may have had to enable some hashes even though the global config
5414	 * wants them disabled.  This is a potential problem that must be
5415	 * reported to the user.
5416	 */
5417	extra = hashen_to_hashconfig(hashen) ^ hashconfig;
5418
5419	/*
5420	 * If we consider only the supported hash types, then the enabled hashes
5421	 * are a superset of the requested hashes.  In other words, there cannot
5422	 * be any supported hash that was requested but not enabled, but there
5423	 * can be hashes that were not requested but had to be enabled.
5424	 */
5425	extra &= SUPPORTED_RSS_HASHTYPES;
5426	MPASS((extra & hashconfig) == 0);
5427
5428	if (extra) {
5429		if_printf(ifp,
5430		    "global RSS config (0x%x) cannot be accommodated.\n",
5431		    hashconfig);
5432	}
5433	if (extra & RSS_HASHTYPE_RSS_IPV4)
5434		if_printf(ifp, "IPv4 2-tuple hashing forced on.\n");
5435	if (extra & RSS_HASHTYPE_RSS_TCP_IPV4)
5436		if_printf(ifp, "TCP/IPv4 4-tuple hashing forced on.\n");
5437	if (extra & RSS_HASHTYPE_RSS_IPV6)
5438		if_printf(ifp, "IPv6 2-tuple hashing forced on.\n");
5439	if (extra & RSS_HASHTYPE_RSS_TCP_IPV6)
5440		if_printf(ifp, "TCP/IPv6 4-tuple hashing forced on.\n");
5441	if (extra & RSS_HASHTYPE_RSS_UDP_IPV4)
5442		if_printf(ifp, "UDP/IPv4 4-tuple hashing forced on.\n");
5443	if (extra & RSS_HASHTYPE_RSS_UDP_IPV6)
5444		if_printf(ifp, "UDP/IPv6 4-tuple hashing forced on.\n");
5445#else
5446	hashen = F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN |
5447	    F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN |
5448	    F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
5449	    F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_UDPEN;
5450#endif
5451	rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, hashen, rss[0], 0, 0);
5452	if (rc != 0) {
5453		free(rss, M_CXGBE);
5454		if_printf(ifp, "rss hash/defaultq config failed: %d\n", rc);
5455		goto done;
5456	}
5457
5458	vi->rss = rss;
5459	vi->flags |= VI_INIT_DONE;
5460done:
5461	if (rc != 0)
5462		vi_full_uninit(vi);
5463
5464	return (rc);
5465}
5466
5467/*
5468 * Idempotent.
5469 */
5470int
5471vi_full_uninit(struct vi_info *vi)
5472{
5473	struct port_info *pi = vi->pi;
5474	struct adapter *sc = pi->adapter;
5475	int i;
5476	struct sge_rxq *rxq;
5477	struct sge_txq *txq;
5478#ifdef TCP_OFFLOAD
5479	struct sge_ofld_rxq *ofld_rxq;
5480	struct sge_wrq *ofld_txq;
5481#endif
5482
5483	if (vi->flags & VI_INIT_DONE) {
5484
5485		/* Need to quiesce queues.  */
5486
5487		/* XXX: Only for the first VI? */
5488		if (IS_MAIN_VI(vi) && !(sc->flags & IS_VF))
5489			quiesce_wrq(sc, &sc->sge.ctrlq[pi->port_id]);
5490
5491		for_each_txq(vi, i, txq) {
5492			quiesce_txq(sc, txq);
5493		}
5494
5495#ifdef TCP_OFFLOAD
5496		for_each_ofld_txq(vi, i, ofld_txq) {
5497			quiesce_wrq(sc, ofld_txq);
5498		}
5499#endif
5500
5501		for_each_rxq(vi, i, rxq) {
5502			quiesce_iq(sc, &rxq->iq);
5503			quiesce_fl(sc, &rxq->fl);
5504		}
5505
5506#ifdef TCP_OFFLOAD
5507		for_each_ofld_rxq(vi, i, ofld_rxq) {
5508			quiesce_iq(sc, &ofld_rxq->iq);
5509			quiesce_fl(sc, &ofld_rxq->fl);
5510		}
5511#endif
5512		free(vi->rss, M_CXGBE);
5513		free(vi->nm_rss, M_CXGBE);
5514	}
5515
5516	t4_teardown_vi_queues(vi);
5517	vi->flags &= ~VI_INIT_DONE;
5518
5519	return (0);
5520}
5521
5522static void
5523quiesce_txq(struct adapter *sc, struct sge_txq *txq)
5524{
5525	struct sge_eq *eq = &txq->eq;
5526	struct sge_qstat *spg = (void *)&eq->desc[eq->sidx];
5527
5528	(void) sc;	/* unused */
5529
5530#ifdef INVARIANTS
5531	TXQ_LOCK(txq);
5532	MPASS((eq->flags & EQ_ENABLED) == 0);
5533	TXQ_UNLOCK(txq);
5534#endif
5535
5536	/* Wait for the mp_ring to empty. */
5537	while (!mp_ring_is_idle(txq->r)) {
5538		mp_ring_check_drainage(txq->r, 0);
5539		pause("rquiesce", 1);
5540	}
5541
5542	/* Then wait for the hardware to finish. */
5543	while (spg->cidx != htobe16(eq->pidx))
5544		pause("equiesce", 1);
5545
5546	/* Finally, wait for the driver to reclaim all descriptors. */
5547	while (eq->cidx != eq->pidx)
5548		pause("dquiesce", 1);
5549}
5550
5551static void
5552quiesce_wrq(struct adapter *sc, struct sge_wrq *wrq)
5553{
5554
5555	/* XXXTX */
5556}
5557
5558static void
5559quiesce_iq(struct adapter *sc, struct sge_iq *iq)
5560{
5561	(void) sc;	/* unused */
5562
5563	/* Synchronize with the interrupt handler */
5564	while (!atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_DISABLED))
5565		pause("iqfree", 1);
5566}
5567
5568static void
5569quiesce_fl(struct adapter *sc, struct sge_fl *fl)
5570{
5571	mtx_lock(&sc->sfl_lock);
5572	FL_LOCK(fl);
5573	fl->flags |= FL_DOOMED;
5574	FL_UNLOCK(fl);
5575	callout_stop(&sc->sfl_callout);
5576	mtx_unlock(&sc->sfl_lock);
5577
5578	KASSERT((fl->flags & FL_STARVING) == 0,
5579	    ("%s: still starving", __func__));
5580}
5581
5582static int
5583t4_alloc_irq(struct adapter *sc, struct irq *irq, int rid,
5584    driver_intr_t *handler, void *arg, char *name)
5585{
5586	int rc;
5587
5588	irq->rid = rid;
5589	irq->res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &irq->rid,
5590	    RF_SHAREABLE | RF_ACTIVE);
5591	if (irq->res == NULL) {
5592		device_printf(sc->dev,
5593		    "failed to allocate IRQ for rid %d, name %s.\n", rid, name);
5594		return (ENOMEM);
5595	}
5596
5597	rc = bus_setup_intr(sc->dev, irq->res, INTR_MPSAFE | INTR_TYPE_NET,
5598	    NULL, handler, arg, &irq->tag);
5599	if (rc != 0) {
5600		device_printf(sc->dev,
5601		    "failed to setup interrupt for rid %d, name %s: %d\n",
5602		    rid, name, rc);
5603	} else if (name)
5604		bus_describe_intr(sc->dev, irq->res, irq->tag, "%s", name);
5605
5606	return (rc);
5607}
5608
5609static int
5610t4_free_irq(struct adapter *sc, struct irq *irq)
5611{
5612	if (irq->tag)
5613		bus_teardown_intr(sc->dev, irq->res, irq->tag);
5614	if (irq->res)
5615		bus_release_resource(sc->dev, SYS_RES_IRQ, irq->rid, irq->res);
5616
5617	bzero(irq, sizeof(*irq));
5618
5619	return (0);
5620}
5621
5622static void
5623get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf)
5624{
5625
5626	regs->version = chip_id(sc) | chip_rev(sc) << 10;
5627	t4_get_regs(sc, buf, regs->len);
5628}
5629
5630#define	A_PL_INDIR_CMD	0x1f8
5631
5632#define	S_PL_AUTOINC	31
5633#define	M_PL_AUTOINC	0x1U
5634#define	V_PL_AUTOINC(x)	((x) << S_PL_AUTOINC)
5635#define	G_PL_AUTOINC(x)	(((x) >> S_PL_AUTOINC) & M_PL_AUTOINC)
5636
5637#define	S_PL_VFID	20
5638#define	M_PL_VFID	0xffU
5639#define	V_PL_VFID(x)	((x) << S_PL_VFID)
5640#define	G_PL_VFID(x)	(((x) >> S_PL_VFID) & M_PL_VFID)
5641
5642#define	S_PL_ADDR	0
5643#define	M_PL_ADDR	0xfffffU
5644#define	V_PL_ADDR(x)	((x) << S_PL_ADDR)
5645#define	G_PL_ADDR(x)	(((x) >> S_PL_ADDR) & M_PL_ADDR)
5646
5647#define	A_PL_INDIR_DATA	0x1fc
5648
5649static uint64_t
5650read_vf_stat(struct adapter *sc, unsigned int viid, int reg)
5651{
5652	u32 stats[2];
5653
5654	mtx_assert(&sc->reg_lock, MA_OWNED);
5655	if (sc->flags & IS_VF) {
5656		stats[0] = t4_read_reg(sc, VF_MPS_REG(reg));
5657		stats[1] = t4_read_reg(sc, VF_MPS_REG(reg + 4));
5658	} else {
5659		t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) |
5660		    V_PL_VFID(G_FW_VIID_VIN(viid)) |
5661		    V_PL_ADDR(VF_MPS_REG(reg)));
5662		stats[0] = t4_read_reg(sc, A_PL_INDIR_DATA);
5663		stats[1] = t4_read_reg(sc, A_PL_INDIR_DATA);
5664	}
5665	return (((uint64_t)stats[1]) << 32 | stats[0]);
5666}
5667
5668static void
5669t4_get_vi_stats(struct adapter *sc, unsigned int viid,
5670    struct fw_vi_stats_vf *stats)
5671{
5672
5673#define GET_STAT(name) \
5674	read_vf_stat(sc, viid, A_MPS_VF_STAT_##name##_L)
5675
5676	stats->tx_bcast_bytes    = GET_STAT(TX_VF_BCAST_BYTES);
5677	stats->tx_bcast_frames   = GET_STAT(TX_VF_BCAST_FRAMES);
5678	stats->tx_mcast_bytes    = GET_STAT(TX_VF_MCAST_BYTES);
5679	stats->tx_mcast_frames   = GET_STAT(TX_VF_MCAST_FRAMES);
5680	stats->tx_ucast_bytes    = GET_STAT(TX_VF_UCAST_BYTES);
5681	stats->tx_ucast_frames   = GET_STAT(TX_VF_UCAST_FRAMES);
5682	stats->tx_drop_frames    = GET_STAT(TX_VF_DROP_FRAMES);
5683	stats->tx_offload_bytes  = GET_STAT(TX_VF_OFFLOAD_BYTES);
5684	stats->tx_offload_frames = GET_STAT(TX_VF_OFFLOAD_FRAMES);
5685	stats->rx_bcast_bytes    = GET_STAT(RX_VF_BCAST_BYTES);
5686	stats->rx_bcast_frames   = GET_STAT(RX_VF_BCAST_FRAMES);
5687	stats->rx_mcast_bytes    = GET_STAT(RX_VF_MCAST_BYTES);
5688	stats->rx_mcast_frames   = GET_STAT(RX_VF_MCAST_FRAMES);
5689	stats->rx_ucast_bytes    = GET_STAT(RX_VF_UCAST_BYTES);
5690	stats->rx_ucast_frames   = GET_STAT(RX_VF_UCAST_FRAMES);
5691	stats->rx_err_frames     = GET_STAT(RX_VF_ERR_FRAMES);
5692
5693#undef GET_STAT
5694}
5695
5696static void
5697t4_clr_vi_stats(struct adapter *sc, unsigned int viid)
5698{
5699	int reg;
5700
5701	t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) |
5702	    V_PL_VFID(G_FW_VIID_VIN(viid)) |
5703	    V_PL_ADDR(VF_MPS_REG(A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L)));
5704	for (reg = A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L;
5705	     reg <= A_MPS_VF_STAT_RX_VF_ERR_FRAMES_H; reg += 4)
5706		t4_write_reg(sc, A_PL_INDIR_DATA, 0);
5707}
5708
5709static void
5710vi_refresh_stats(struct adapter *sc, struct vi_info *vi)
5711{
5712	struct timeval tv;
5713	const struct timeval interval = {0, 250000};	/* 250ms */
5714
5715	if (!(vi->flags & VI_INIT_DONE))
5716		return;
5717
5718	getmicrotime(&tv);
5719	timevalsub(&tv, &interval);
5720	if (timevalcmp(&tv, &vi->last_refreshed, <))
5721		return;
5722
5723	mtx_lock(&sc->reg_lock);
5724	t4_get_vi_stats(sc, vi->viid, &vi->stats);
5725	getmicrotime(&vi->last_refreshed);
5726	mtx_unlock(&sc->reg_lock);
5727}
5728
5729static void
5730cxgbe_refresh_stats(struct adapter *sc, struct port_info *pi)
5731{
5732	u_int i, v, tnl_cong_drops, bg_map;
5733	struct timeval tv;
5734	const struct timeval interval = {0, 250000};	/* 250ms */
5735
5736	getmicrotime(&tv);
5737	timevalsub(&tv, &interval);
5738	if (timevalcmp(&tv, &pi->last_refreshed, <))
5739		return;
5740
5741	tnl_cong_drops = 0;
5742	t4_get_port_stats(sc, pi->tx_chan, &pi->stats);
5743	bg_map = pi->mps_bg_map;
5744	while (bg_map) {
5745		i = ffs(bg_map) - 1;
5746		mtx_lock(&sc->reg_lock);
5747		t4_read_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v, 1,
5748		    A_TP_MIB_TNL_CNG_DROP_0 + i);
5749		mtx_unlock(&sc->reg_lock);
5750		tnl_cong_drops += v;
5751		bg_map &= ~(1 << i);
5752	}
5753	pi->tnl_cong_drops = tnl_cong_drops;
5754	getmicrotime(&pi->last_refreshed);
5755}
5756
5757static void
5758cxgbe_tick(void *arg)
5759{
5760	struct port_info *pi = arg;
5761	struct adapter *sc = pi->adapter;
5762
5763	PORT_LOCK_ASSERT_OWNED(pi);
5764	cxgbe_refresh_stats(sc, pi);
5765
5766	callout_schedule(&pi->tick, hz);
5767}
5768
5769void
5770vi_tick(void *arg)
5771{
5772	struct vi_info *vi = arg;
5773	struct adapter *sc = vi->pi->adapter;
5774
5775	vi_refresh_stats(sc, vi);
5776
5777	callout_schedule(&vi->tick, hz);
5778}
5779
5780/*
5781 * Should match fw_caps_config_<foo> enums in t4fw_interface.h
5782 */
5783static char *caps_decoder[] = {
5784	"\20\001IPMI\002NCSI",				/* 0: NBM */
5785	"\20\001PPP\002QFC\003DCBX",			/* 1: link */
5786	"\20\001INGRESS\002EGRESS",			/* 2: switch */
5787	"\20\001NIC\002VM\003IDS\004UM\005UM_ISGL"	/* 3: NIC */
5788	    "\006HASHFILTER\007ETHOFLD",
5789	"\20\001TOE",					/* 4: TOE */
5790	"\20\001RDDP\002RDMAC",				/* 5: RDMA */
5791	"\20\001INITIATOR_PDU\002TARGET_PDU"		/* 6: iSCSI */
5792	    "\003INITIATOR_CNXOFLD\004TARGET_CNXOFLD"
5793	    "\005INITIATOR_SSNOFLD\006TARGET_SSNOFLD"
5794	    "\007T10DIF"
5795	    "\010INITIATOR_CMDOFLD\011TARGET_CMDOFLD",
5796	"\20\001LOOKASIDE\002TLSKEYS",			/* 7: Crypto */
5797	"\20\001INITIATOR\002TARGET\003CTRL_OFLD"	/* 8: FCoE */
5798		    "\004PO_INITIATOR\005PO_TARGET",
5799};
5800
5801void
5802t4_sysctls(struct adapter *sc)
5803{
5804	struct sysctl_ctx_list *ctx;
5805	struct sysctl_oid *oid;
5806	struct sysctl_oid_list *children, *c0;
5807	static char *doorbells = {"\20\1UDB\2WCWR\3UDBWC\4KDB"};
5808
5809	ctx = device_get_sysctl_ctx(sc->dev);
5810
5811	/*
5812	 * dev.t4nex.X.
5813	 */
5814	oid = device_get_sysctl_tree(sc->dev);
5815	c0 = children = SYSCTL_CHILDREN(oid);
5816
5817	sc->sc_do_rxcopy = 1;
5818	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "do_rx_copy", CTLFLAG_RW,
5819	    &sc->sc_do_rxcopy, 1, "Do RX copy of small frames");
5820
5821	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nports", CTLFLAG_RD, NULL,
5822	    sc->params.nports, "# of ports");
5823
5824	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "doorbells",
5825	    CTLTYPE_STRING | CTLFLAG_RD, doorbells, (uintptr_t)&sc->doorbells,
5826	    sysctl_bitfield_8b, "A", "available doorbells");
5827
5828	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD, NULL,
5829	    sc->params.vpd.cclk, "core clock frequency (in KHz)");
5830
5831	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_timers",
5832	    CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.timer_val,
5833	    sizeof(sc->params.sge.timer_val), sysctl_int_array, "A",
5834	    "interrupt holdoff timer values (us)");
5835
5836	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pkt_counts",
5837	    CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.counter_val,
5838	    sizeof(sc->params.sge.counter_val), sysctl_int_array, "A",
5839	    "interrupt holdoff packet counter values");
5840
5841	t4_sge_sysctls(sc, ctx, children);
5842
5843	sc->lro_timeout = 100;
5844	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lro_timeout", CTLFLAG_RW,
5845	    &sc->lro_timeout, 0, "lro inactive-flush timeout (in us)");
5846
5847	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dflags", CTLFLAG_RW,
5848	    &sc->debug_flags, 0, "flags to enable runtime debugging");
5849
5850	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "tp_version",
5851	    CTLFLAG_RD, sc->tp_version, 0, "TP microcode version");
5852
5853	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version",
5854	    CTLFLAG_RD, sc->fw_version, 0, "firmware version");
5855
5856	if (sc->flags & IS_VF)
5857		return;
5858
5859	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD,
5860	    NULL, chip_rev(sc), "chip hardware revision");
5861
5862	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "sn",
5863	    CTLFLAG_RD, sc->params.vpd.sn, 0, "serial number");
5864
5865	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "pn",
5866	    CTLFLAG_RD, sc->params.vpd.pn, 0, "part number");
5867
5868	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "ec",
5869	    CTLFLAG_RD, sc->params.vpd.ec, 0, "engineering change");
5870
5871	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "md_version",
5872	    CTLFLAG_RD, sc->params.vpd.md, 0, "manufacturing diags version");
5873
5874	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "na",
5875	    CTLFLAG_RD, sc->params.vpd.na, 0, "network address");
5876
5877	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "er_version", CTLFLAG_RD,
5878	    sc->er_version, 0, "expansion ROM version");
5879
5880	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "bs_version", CTLFLAG_RD,
5881	    sc->bs_version, 0, "bootstrap firmware version");
5882
5883	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "scfg_version", CTLFLAG_RD,
5884	    NULL, sc->params.scfg_vers, "serial config version");
5885
5886	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "vpd_version", CTLFLAG_RD,
5887	    NULL, sc->params.vpd_vers, "VPD version");
5888
5889	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "cf",
5890	    CTLFLAG_RD, sc->cfg_file, 0, "configuration file");
5891
5892	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cfcsum", CTLFLAG_RD, NULL,
5893	    sc->cfcsum, "config file checksum");
5894
5895#define SYSCTL_CAP(name, n, text) \
5896	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, #name, \
5897	    CTLTYPE_STRING | CTLFLAG_RD, caps_decoder[n], (uintptr_t)&sc->name, \
5898	    sysctl_bitfield_16b, "A", "available " text " capabilities")
5899
5900	SYSCTL_CAP(nbmcaps, 0, "NBM");
5901	SYSCTL_CAP(linkcaps, 1, "link");
5902	SYSCTL_CAP(switchcaps, 2, "switch");
5903	SYSCTL_CAP(niccaps, 3, "NIC");
5904	SYSCTL_CAP(toecaps, 4, "TCP offload");
5905	SYSCTL_CAP(rdmacaps, 5, "RDMA");
5906	SYSCTL_CAP(iscsicaps, 6, "iSCSI");
5907	SYSCTL_CAP(cryptocaps, 7, "crypto");
5908	SYSCTL_CAP(fcoecaps, 8, "FCoE");
5909#undef SYSCTL_CAP
5910
5911	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nfilters", CTLFLAG_RD,
5912	    NULL, sc->tids.nftids, "number of filters");
5913
5914	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", CTLTYPE_INT |
5915	    CTLFLAG_RD, sc, 0, sysctl_temperature, "I",
5916	    "chip temperature (in Celsius)");
5917
5918	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "loadavg", CTLTYPE_STRING |
5919	    CTLFLAG_RD, sc, 0, sysctl_loadavg, "A",
5920	    "microprocessor load averages (debug firmwares only)");
5921
5922	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_vdd", CTLFLAG_RD,
5923	    &sc->params.core_vdd, 0, "core Vdd (in mV)");
5924
5925	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "local_cpus",
5926	    CTLTYPE_STRING | CTLFLAG_RD, sc, LOCAL_CPUS,
5927	    sysctl_cpus, "A", "local CPUs");
5928
5929	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_cpus",
5930	    CTLTYPE_STRING | CTLFLAG_RD, sc, INTR_CPUS,
5931	    sysctl_cpus, "A", "preferred CPUs for interrupts");
5932
5933	/*
5934	 * dev.t4nex.X.misc.  Marked CTLFLAG_SKIP to avoid information overload.
5935	 */
5936	oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "misc",
5937	    CTLFLAG_RD | CTLFLAG_SKIP, NULL,
5938	    "logs and miscellaneous information");
5939	children = SYSCTL_CHILDREN(oid);
5940
5941	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cctrl",
5942	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5943	    sysctl_cctrl, "A", "congestion control");
5944
5945	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp0",
5946	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5947	    sysctl_cim_ibq_obq, "A", "CIM IBQ 0 (TP0)");
5948
5949	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp1",
5950	    CTLTYPE_STRING | CTLFLAG_RD, sc, 1,
5951	    sysctl_cim_ibq_obq, "A", "CIM IBQ 1 (TP1)");
5952
5953	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ulp",
5954	    CTLTYPE_STRING | CTLFLAG_RD, sc, 2,
5955	    sysctl_cim_ibq_obq, "A", "CIM IBQ 2 (ULP)");
5956
5957	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge0",
5958	    CTLTYPE_STRING | CTLFLAG_RD, sc, 3,
5959	    sysctl_cim_ibq_obq, "A", "CIM IBQ 3 (SGE0)");
5960
5961	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge1",
5962	    CTLTYPE_STRING | CTLFLAG_RD, sc, 4,
5963	    sysctl_cim_ibq_obq, "A", "CIM IBQ 4 (SGE1)");
5964
5965	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ncsi",
5966	    CTLTYPE_STRING | CTLFLAG_RD, sc, 5,
5967	    sysctl_cim_ibq_obq, "A", "CIM IBQ 5 (NCSI)");
5968
5969	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_la",
5970	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5971	    chip_id(sc) <= CHELSIO_T5 ? sysctl_cim_la : sysctl_cim_la_t6,
5972	    "A", "CIM logic analyzer");
5973
5974	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ma_la",
5975	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5976	    sysctl_cim_ma_la, "A", "CIM MA logic analyzer");
5977
5978	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp0",
5979	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0 + CIM_NUM_IBQ,
5980	    sysctl_cim_ibq_obq, "A", "CIM OBQ 0 (ULP0)");
5981
5982	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp1",
5983	    CTLTYPE_STRING | CTLFLAG_RD, sc, 1 + CIM_NUM_IBQ,
5984	    sysctl_cim_ibq_obq, "A", "CIM OBQ 1 (ULP1)");
5985
5986	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp2",
5987	    CTLTYPE_STRING | CTLFLAG_RD, sc, 2 + CIM_NUM_IBQ,
5988	    sysctl_cim_ibq_obq, "A", "CIM OBQ 2 (ULP2)");
5989
5990	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp3",
5991	    CTLTYPE_STRING | CTLFLAG_RD, sc, 3 + CIM_NUM_IBQ,
5992	    sysctl_cim_ibq_obq, "A", "CIM OBQ 3 (ULP3)");
5993
5994	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge",
5995	    CTLTYPE_STRING | CTLFLAG_RD, sc, 4 + CIM_NUM_IBQ,
5996	    sysctl_cim_ibq_obq, "A", "CIM OBQ 4 (SGE)");
5997
5998	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ncsi",
5999	    CTLTYPE_STRING | CTLFLAG_RD, sc, 5 + CIM_NUM_IBQ,
6000	    sysctl_cim_ibq_obq, "A", "CIM OBQ 5 (NCSI)");
6001
6002	if (chip_id(sc) > CHELSIO_T4) {
6003		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge0_rx",
6004		    CTLTYPE_STRING | CTLFLAG_RD, sc, 6 + CIM_NUM_IBQ,
6005		    sysctl_cim_ibq_obq, "A", "CIM OBQ 6 (SGE0-RX)");
6006
6007		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge1_rx",
6008		    CTLTYPE_STRING | CTLFLAG_RD, sc, 7 + CIM_NUM_IBQ,
6009		    sysctl_cim_ibq_obq, "A", "CIM OBQ 7 (SGE1-RX)");
6010	}
6011
6012	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_pif_la",
6013	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6014	    sysctl_cim_pif_la, "A", "CIM PIF logic analyzer");
6015
6016	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_qcfg",
6017	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6018	    sysctl_cim_qcfg, "A", "CIM queue configuration");
6019
6020	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cpl_stats",
6021	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6022	    sysctl_cpl_stats, "A", "CPL statistics");
6023
6024	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ddp_stats",
6025	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6026	    sysctl_ddp_stats, "A", "non-TCP DDP statistics");
6027
6028	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "devlog",
6029	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6030	    sysctl_devlog, "A", "firmware's device log");
6031
6032	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoe_stats",
6033	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6034	    sysctl_fcoe_stats, "A", "FCoE statistics");
6035
6036	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "hw_sched",
6037	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6038	    sysctl_hw_sched, "A", "hardware scheduler ");
6039
6040	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "l2t",
6041	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6042	    sysctl_l2t, "A", "hardware L2 table");
6043
6044	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "smt",
6045	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6046	    sysctl_smt, "A", "hardware source MAC table");
6047
6048#ifdef INET6
6049	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "clip",
6050	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6051	    sysctl_clip, "A", "active CLIP table entries");
6052#endif
6053
6054	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "lb_stats",
6055	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6056	    sysctl_lb_stats, "A", "loopback statistics");
6057
6058	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "meminfo",
6059	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6060	    sysctl_meminfo, "A", "memory regions");
6061
6062	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "mps_tcam",
6063	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6064	    chip_id(sc) <= CHELSIO_T5 ? sysctl_mps_tcam : sysctl_mps_tcam_t6,
6065	    "A", "MPS TCAM entries");
6066
6067	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "path_mtus",
6068	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6069	    sysctl_path_mtus, "A", "path MTUs");
6070
6071	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pm_stats",
6072	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6073	    sysctl_pm_stats, "A", "PM statistics");
6074
6075	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_stats",
6076	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6077	    sysctl_rdma_stats, "A", "RDMA statistics");
6078
6079	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tcp_stats",
6080	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6081	    sysctl_tcp_stats, "A", "TCP statistics");
6082
6083	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tids",
6084	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6085	    sysctl_tids, "A", "TID information");
6086
6087	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_err_stats",
6088	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6089	    sysctl_tp_err_stats, "A", "TP error statistics");
6090
6091	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la_mask",
6092	    CTLTYPE_INT | CTLFLAG_RW, sc, 0, sysctl_tp_la_mask, "I",
6093	    "TP logic analyzer event capture mask");
6094
6095	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la",
6096	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6097	    sysctl_tp_la, "A", "TP logic analyzer");
6098
6099	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_rate",
6100	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6101	    sysctl_tx_rate, "A", "Tx rate");
6102
6103	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ulprx_la",
6104	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6105	    sysctl_ulprx_la, "A", "ULPRX logic analyzer");
6106
6107	if (chip_id(sc) >= CHELSIO_T5) {
6108		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "wcwr_stats",
6109		    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6110		    sysctl_wcwr_stats, "A", "write combined work requests");
6111	}
6112
6113#ifdef TCP_OFFLOAD
6114	if (is_offload(sc)) {
6115		int i;
6116		char s[4];
6117
6118		/*
6119		 * dev.t4nex.X.toe.
6120		 */
6121		oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "toe", CTLFLAG_RD,
6122		    NULL, "TOE parameters");
6123		children = SYSCTL_CHILDREN(oid);
6124
6125		sc->tt.cong_algorithm = -1;
6126		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_algorithm",
6127		    CTLFLAG_RW, &sc->tt.cong_algorithm, 0, "congestion control "
6128		    "(-1 = default, 0 = reno, 1 = tahoe, 2 = newreno, "
6129		    "3 = highspeed)");
6130
6131		sc->tt.sndbuf = 256 * 1024;
6132		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sndbuf", CTLFLAG_RW,
6133		    &sc->tt.sndbuf, 0, "max hardware send buffer size");
6134
6135		sc->tt.ddp = 0;
6136		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp", CTLFLAG_RW,
6137		    &sc->tt.ddp, 0, "DDP allowed");
6138
6139		sc->tt.rx_coalesce = 1;
6140		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_coalesce",
6141		    CTLFLAG_RW, &sc->tt.rx_coalesce, 0, "receive coalescing");
6142
6143		sc->tt.tls = 0;
6144		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tls", CTLFLAG_RW,
6145		    &sc->tt.tls, 0, "Inline TLS allowed");
6146
6147		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tls_rx_ports",
6148		    CTLTYPE_INT | CTLFLAG_RW, sc, 0, sysctl_tls_rx_ports,
6149		    "I", "TCP ports that use inline TLS+TOE RX");
6150
6151		sc->tt.tx_align = 1;
6152		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_align",
6153		    CTLFLAG_RW, &sc->tt.tx_align, 0, "chop and align payload");
6154
6155		sc->tt.tx_zcopy = 0;
6156		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_zcopy",
6157		    CTLFLAG_RW, &sc->tt.tx_zcopy, 0,
6158		    "Enable zero-copy aio_write(2)");
6159
6160		sc->tt.cop_managed_offloading = !!t4_cop_managed_offloading;
6161		SYSCTL_ADD_INT(ctx, children, OID_AUTO,
6162		    "cop_managed_offloading", CTLFLAG_RW,
6163		    &sc->tt.cop_managed_offloading, 0,
6164		    "COP (Connection Offload Policy) controls all TOE offload");
6165
6166		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timer_tick",
6167		    CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_tick, "A",
6168		    "TP timer tick (us)");
6169
6170		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timestamp_tick",
6171		    CTLTYPE_STRING | CTLFLAG_RD, sc, 1, sysctl_tp_tick, "A",
6172		    "TCP timestamp tick (us)");
6173
6174		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_tick",
6175		    CTLTYPE_STRING | CTLFLAG_RD, sc, 2, sysctl_tp_tick, "A",
6176		    "DACK tick (us)");
6177
6178		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_timer",
6179		    CTLTYPE_UINT | CTLFLAG_RD, sc, 0, sysctl_tp_dack_timer,
6180		    "IU", "DACK timer (us)");
6181
6182		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_min",
6183		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MIN,
6184		    sysctl_tp_timer, "LU", "Minimum retransmit interval (us)");
6185
6186		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_max",
6187		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MAX,
6188		    sysctl_tp_timer, "LU", "Maximum retransmit interval (us)");
6189
6190		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_min",
6191		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MIN,
6192		    sysctl_tp_timer, "LU", "Persist timer min (us)");
6193
6194		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_max",
6195		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MAX,
6196		    sysctl_tp_timer, "LU", "Persist timer max (us)");
6197
6198		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_idle",
6199		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_IDLE,
6200		    sysctl_tp_timer, "LU", "Keepalive idle timer (us)");
6201
6202		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_interval",
6203		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_INTVL,
6204		    sysctl_tp_timer, "LU", "Keepalive interval timer (us)");
6205
6206		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "initial_srtt",
6207		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_INIT_SRTT,
6208		    sysctl_tp_timer, "LU", "Initial SRTT (us)");
6209
6210		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "finwait2_timer",
6211		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_FINWAIT2_TIMER,
6212		    sysctl_tp_timer, "LU", "FINWAIT2 timer (us)");
6213
6214		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "syn_rexmt_count",
6215		    CTLTYPE_UINT | CTLFLAG_RD, sc, S_SYNSHIFTMAX,
6216		    sysctl_tp_shift_cnt, "IU",
6217		    "Number of SYN retransmissions before abort");
6218
6219		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_count",
6220		    CTLTYPE_UINT | CTLFLAG_RD, sc, S_RXTSHIFTMAXR2,
6221		    sysctl_tp_shift_cnt, "IU",
6222		    "Number of retransmissions before abort");
6223
6224		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_count",
6225		    CTLTYPE_UINT | CTLFLAG_RD, sc, S_KEEPALIVEMAXR2,
6226		    sysctl_tp_shift_cnt, "IU",
6227		    "Number of keepalive probes before abort");
6228
6229		oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "rexmt_backoff",
6230		    CTLFLAG_RD, NULL, "TOE retransmit backoffs");
6231		children = SYSCTL_CHILDREN(oid);
6232		for (i = 0; i < 16; i++) {
6233			snprintf(s, sizeof(s), "%u", i);
6234			SYSCTL_ADD_PROC(ctx, children, OID_AUTO, s,
6235			    CTLTYPE_UINT | CTLFLAG_RD, sc, i, sysctl_tp_backoff,
6236			    "IU", "TOE retransmit backoff");
6237		}
6238	}
6239#endif
6240}
6241
6242void
6243vi_sysctls(struct vi_info *vi)
6244{
6245	struct sysctl_ctx_list *ctx;
6246	struct sysctl_oid *oid;
6247	struct sysctl_oid_list *children;
6248
6249	ctx = device_get_sysctl_ctx(vi->dev);
6250
6251	/*
6252	 * dev.v?(cxgbe|cxl).X.
6253	 */
6254	oid = device_get_sysctl_tree(vi->dev);
6255	children = SYSCTL_CHILDREN(oid);
6256
6257	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "viid", CTLFLAG_RD, NULL,
6258	    vi->viid, "VI identifer");
6259	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nrxq", CTLFLAG_RD,
6260	    &vi->nrxq, 0, "# of rx queues");
6261	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ntxq", CTLFLAG_RD,
6262	    &vi->ntxq, 0, "# of tx queues");
6263	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_rxq", CTLFLAG_RD,
6264	    &vi->first_rxq, 0, "index of first rx queue");
6265	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_txq", CTLFLAG_RD,
6266	    &vi->first_txq, 0, "index of first tx queue");
6267	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rss_base", CTLFLAG_RD, NULL,
6268	    vi->rss_base, "start of RSS indirection table");
6269	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rss_size", CTLFLAG_RD, NULL,
6270	    vi->rss_size, "size of RSS indirection table");
6271
6272	if (IS_MAIN_VI(vi)) {
6273		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rsrv_noflowq",
6274		    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_noflowq, "IU",
6275		    "Reserve queue 0 for non-flowid packets");
6276	}
6277
6278#ifdef TCP_OFFLOAD
6279	if (vi->nofldrxq != 0) {
6280		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD,
6281		    &vi->nofldrxq, 0,
6282		    "# of rx queues for offloaded TCP connections");
6283		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldtxq", CTLFLAG_RD,
6284		    &vi->nofldtxq, 0,
6285		    "# of tx queues for offloaded TCP connections");
6286		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_rxq",
6287		    CTLFLAG_RD, &vi->first_ofld_rxq, 0,
6288		    "index of first TOE rx queue");
6289		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_txq",
6290		    CTLFLAG_RD, &vi->first_ofld_txq, 0,
6291		    "index of first TOE tx queue");
6292		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx_ofld",
6293		    CTLTYPE_INT | CTLFLAG_RW, vi, 0,
6294		    sysctl_holdoff_tmr_idx_ofld, "I",
6295		    "holdoff timer index for TOE queues");
6296		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx_ofld",
6297		    CTLTYPE_INT | CTLFLAG_RW, vi, 0,
6298		    sysctl_holdoff_pktc_idx_ofld, "I",
6299		    "holdoff packet counter index for TOE queues");
6300	}
6301#endif
6302#ifdef DEV_NETMAP
6303	if (vi->nnmrxq != 0) {
6304		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmrxq", CTLFLAG_RD,
6305		    &vi->nnmrxq, 0, "# of netmap rx queues");
6306		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmtxq", CTLFLAG_RD,
6307		    &vi->nnmtxq, 0, "# of netmap tx queues");
6308		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_rxq",
6309		    CTLFLAG_RD, &vi->first_nm_rxq, 0,
6310		    "index of first netmap rx queue");
6311		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_txq",
6312		    CTLFLAG_RD, &vi->first_nm_txq, 0,
6313		    "index of first netmap tx queue");
6314	}
6315#endif
6316
6317	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx",
6318	    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_tmr_idx, "I",
6319	    "holdoff timer index");
6320	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx",
6321	    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_pktc_idx, "I",
6322	    "holdoff packet counter index");
6323
6324	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_rxq",
6325	    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_rxq, "I",
6326	    "rx queue size");
6327	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_txq",
6328	    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_txq, "I",
6329	    "tx queue size");
6330}
6331
6332static void
6333cxgbe_sysctls(struct port_info *pi)
6334{
6335	struct sysctl_ctx_list *ctx;
6336	struct sysctl_oid *oid;
6337	struct sysctl_oid_list *children, *children2;
6338	struct adapter *sc = pi->adapter;
6339	int i;
6340	char name[16];
6341	static char *tc_flags = {"\20\1USER\2SYNC\3ASYNC\4ERR"};
6342
6343	ctx = device_get_sysctl_ctx(pi->dev);
6344
6345	/*
6346	 * dev.cxgbe.X.
6347	 */
6348	oid = device_get_sysctl_tree(pi->dev);
6349	children = SYSCTL_CHILDREN(oid);
6350
6351	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkdnrc", CTLTYPE_STRING |
6352	   CTLFLAG_RD, pi, 0, sysctl_linkdnrc, "A", "reason why link is down");
6353	if (pi->port_type == FW_PORT_TYPE_BT_XAUI) {
6354		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature",
6355		    CTLTYPE_INT | CTLFLAG_RD, pi, 0, sysctl_btphy, "I",
6356		    "PHY temperature (in Celsius)");
6357		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fw_version",
6358		    CTLTYPE_INT | CTLFLAG_RD, pi, 1, sysctl_btphy, "I",
6359		    "PHY firmware version");
6360	}
6361
6362	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pause_settings",
6363	    CTLTYPE_STRING | CTLFLAG_RW, pi, 0, sysctl_pause_settings, "A",
6364    "PAUSE settings (bit 0 = rx_pause, 1 = tx_pause, 2 = pause_autoneg)");
6365	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fec",
6366	    CTLTYPE_STRING | CTLFLAG_RW, pi, 0, sysctl_fec, "A",
6367	    "Forward Error Correction (bit 0 = RS, bit 1 = BASER_RS)");
6368	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "autoneg",
6369	    CTLTYPE_INT | CTLFLAG_RW, pi, 0, sysctl_autoneg, "I",
6370	    "autonegotiation (-1 = not supported)");
6371
6372	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "max_speed", CTLFLAG_RD, NULL,
6373	    port_top_speed(pi), "max speed (in Gbps)");
6374	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "mps_bg_map", CTLFLAG_RD, NULL,
6375	    pi->mps_bg_map, "MPS buffer group map");
6376	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_e_chan_map", CTLFLAG_RD,
6377	    NULL, pi->rx_e_chan_map, "TP rx e-channel map");
6378
6379	if (sc->flags & IS_VF)
6380		return;
6381
6382	/*
6383	 * dev.(cxgbe|cxl).X.tc.
6384	 */
6385	oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "tc", CTLFLAG_RD, NULL,
6386	    "Tx scheduler traffic classes (cl_rl)");
6387	children2 = SYSCTL_CHILDREN(oid);
6388	SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "pktsize",
6389	    CTLFLAG_RW, &pi->sched_params->pktsize, 0,
6390	    "pktsize for per-flow cl-rl (0 means up to the driver )");
6391	SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "burstsize",
6392	    CTLFLAG_RW, &pi->sched_params->burstsize, 0,
6393	    "burstsize for per-flow cl-rl (0 means up to the driver)");
6394	for (i = 0; i < sc->chip_params->nsched_cls; i++) {
6395		struct tx_cl_rl_params *tc = &pi->sched_params->cl_rl[i];
6396
6397		snprintf(name, sizeof(name), "%d", i);
6398		children2 = SYSCTL_CHILDREN(SYSCTL_ADD_NODE(ctx,
6399		    SYSCTL_CHILDREN(oid), OID_AUTO, name, CTLFLAG_RD, NULL,
6400		    "traffic class"));
6401		SYSCTL_ADD_PROC(ctx, children2, OID_AUTO, "flags",
6402		    CTLTYPE_STRING | CTLFLAG_RD, tc_flags, (uintptr_t)&tc->flags,
6403		    sysctl_bitfield_8b, "A", "flags");
6404		SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "refcount",
6405		    CTLFLAG_RD, &tc->refcount, 0, "references to this class");
6406		SYSCTL_ADD_PROC(ctx, children2, OID_AUTO, "params",
6407		    CTLTYPE_STRING | CTLFLAG_RD, sc, (pi->port_id << 16) | i,
6408		    sysctl_tc_params, "A", "traffic class parameters");
6409	}
6410
6411	/*
6412	 * dev.cxgbe.X.stats.
6413	 */
6414	oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "stats", CTLFLAG_RD,
6415	    NULL, "port statistics");
6416	children = SYSCTL_CHILDREN(oid);
6417	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_parse_error", CTLFLAG_RD,
6418	    &pi->tx_parse_error, 0,
6419	    "# of tx packets with invalid length or # of segments");
6420
6421#define SYSCTL_ADD_T4_REG64(pi, name, desc, reg) \
6422	SYSCTL_ADD_OID(ctx, children, OID_AUTO, name, \
6423	    CTLTYPE_U64 | CTLFLAG_RD, sc, reg, \
6424	    sysctl_handle_t4_reg64, "QU", desc)
6425
6426	SYSCTL_ADD_T4_REG64(pi, "tx_octets", "# of octets in good frames",
6427	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BYTES_L));
6428	SYSCTL_ADD_T4_REG64(pi, "tx_frames", "total # of good frames",
6429	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_FRAMES_L));
6430	SYSCTL_ADD_T4_REG64(pi, "tx_bcast_frames", "# of broadcast frames",
6431	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BCAST_L));
6432	SYSCTL_ADD_T4_REG64(pi, "tx_mcast_frames", "# of multicast frames",
6433	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_MCAST_L));
6434	SYSCTL_ADD_T4_REG64(pi, "tx_ucast_frames", "# of unicast frames",
6435	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_UCAST_L));
6436	SYSCTL_ADD_T4_REG64(pi, "tx_error_frames", "# of error frames",
6437	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_ERROR_L));
6438	SYSCTL_ADD_T4_REG64(pi, "tx_frames_64",
6439	    "# of tx frames in this range",
6440	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_64B_L));
6441	SYSCTL_ADD_T4_REG64(pi, "tx_frames_65_127",
6442	    "# of tx frames in this range",
6443	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_65B_127B_L));
6444	SYSCTL_ADD_T4_REG64(pi, "tx_frames_128_255",
6445	    "# of tx frames in this range",
6446	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_128B_255B_L));
6447	SYSCTL_ADD_T4_REG64(pi, "tx_frames_256_511",
6448	    "# of tx frames in this range",
6449	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_256B_511B_L));
6450	SYSCTL_ADD_T4_REG64(pi, "tx_frames_512_1023",
6451	    "# of tx frames in this range",
6452	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_512B_1023B_L));
6453	SYSCTL_ADD_T4_REG64(pi, "tx_frames_1024_1518",
6454	    "# of tx frames in this range",
6455	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1024B_1518B_L));
6456	SYSCTL_ADD_T4_REG64(pi, "tx_frames_1519_max",
6457	    "# of tx frames in this range",
6458	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1519B_MAX_L));
6459	SYSCTL_ADD_T4_REG64(pi, "tx_drop", "# of dropped tx frames",
6460	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_DROP_L));
6461	SYSCTL_ADD_T4_REG64(pi, "tx_pause", "# of pause frames transmitted",
6462	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PAUSE_L));
6463	SYSCTL_ADD_T4_REG64(pi, "tx_ppp0", "# of PPP prio 0 frames transmitted",
6464	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP0_L));
6465	SYSCTL_ADD_T4_REG64(pi, "tx_ppp1", "# of PPP prio 1 frames transmitted",
6466	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP1_L));
6467	SYSCTL_ADD_T4_REG64(pi, "tx_ppp2", "# of PPP prio 2 frames transmitted",
6468	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP2_L));
6469	SYSCTL_ADD_T4_REG64(pi, "tx_ppp3", "# of PPP prio 3 frames transmitted",
6470	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP3_L));
6471	SYSCTL_ADD_T4_REG64(pi, "tx_ppp4", "# of PPP prio 4 frames transmitted",
6472	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP4_L));
6473	SYSCTL_ADD_T4_REG64(pi, "tx_ppp5", "# of PPP prio 5 frames transmitted",
6474	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP5_L));
6475	SYSCTL_ADD_T4_REG64(pi, "tx_ppp6", "# of PPP prio 6 frames transmitted",
6476	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP6_L));
6477	SYSCTL_ADD_T4_REG64(pi, "tx_ppp7", "# of PPP prio 7 frames transmitted",
6478	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP7_L));
6479
6480	SYSCTL_ADD_T4_REG64(pi, "rx_octets", "# of octets in good frames",
6481	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BYTES_L));
6482	SYSCTL_ADD_T4_REG64(pi, "rx_frames", "total # of good frames",
6483	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_FRAMES_L));
6484	SYSCTL_ADD_T4_REG64(pi, "rx_bcast_frames", "# of broadcast frames",
6485	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BCAST_L));
6486	SYSCTL_ADD_T4_REG64(pi, "rx_mcast_frames", "# of multicast frames",
6487	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MCAST_L));
6488	SYSCTL_ADD_T4_REG64(pi, "rx_ucast_frames", "# of unicast frames",
6489	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_UCAST_L));
6490	SYSCTL_ADD_T4_REG64(pi, "rx_too_long", "# of frames exceeding MTU",
6491	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_ERROR_L));
6492	SYSCTL_ADD_T4_REG64(pi, "rx_jabber", "# of jabber frames",
6493	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_CRC_ERROR_L));
6494	SYSCTL_ADD_T4_REG64(pi, "rx_fcs_err",
6495	    "# of frames received with bad FCS",
6496	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_CRC_ERROR_L));
6497	SYSCTL_ADD_T4_REG64(pi, "rx_len_err",
6498	    "# of frames received with length error",
6499	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LEN_ERROR_L));
6500	SYSCTL_ADD_T4_REG64(pi, "rx_symbol_err", "symbol errors",
6501	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_SYM_ERROR_L));
6502	SYSCTL_ADD_T4_REG64(pi, "rx_runt", "# of short frames received",
6503	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LESS_64B_L));
6504	SYSCTL_ADD_T4_REG64(pi, "rx_frames_64",
6505	    "# of rx frames in this range",
6506	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_64B_L));
6507	SYSCTL_ADD_T4_REG64(pi, "rx_frames_65_127",
6508	    "# of rx frames in this range",
6509	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_65B_127B_L));
6510	SYSCTL_ADD_T4_REG64(pi, "rx_frames_128_255",
6511	    "# of rx frames in this range",
6512	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_128B_255B_L));
6513	SYSCTL_ADD_T4_REG64(pi, "rx_frames_256_511",
6514	    "# of rx frames in this range",
6515	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_256B_511B_L));
6516	SYSCTL_ADD_T4_REG64(pi, "rx_frames_512_1023",
6517	    "# of rx frames in this range",
6518	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_512B_1023B_L));
6519	SYSCTL_ADD_T4_REG64(pi, "rx_frames_1024_1518",
6520	    "# of rx frames in this range",
6521	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1024B_1518B_L));
6522	SYSCTL_ADD_T4_REG64(pi, "rx_frames_1519_max",
6523	    "# of rx frames in this range",
6524	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1519B_MAX_L));
6525	SYSCTL_ADD_T4_REG64(pi, "rx_pause", "# of pause frames received",
6526	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PAUSE_L));
6527	SYSCTL_ADD_T4_REG64(pi, "rx_ppp0", "# of PPP prio 0 frames received",
6528	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP0_L));
6529	SYSCTL_ADD_T4_REG64(pi, "rx_ppp1", "# of PPP prio 1 frames received",
6530	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP1_L));
6531	SYSCTL_ADD_T4_REG64(pi, "rx_ppp2", "# of PPP prio 2 frames received",
6532	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP2_L));
6533	SYSCTL_ADD_T4_REG64(pi, "rx_ppp3", "# of PPP prio 3 frames received",
6534	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP3_L));
6535	SYSCTL_ADD_T4_REG64(pi, "rx_ppp4", "# of PPP prio 4 frames received",
6536	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP4_L));
6537	SYSCTL_ADD_T4_REG64(pi, "rx_ppp5", "# of PPP prio 5 frames received",
6538	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP5_L));
6539	SYSCTL_ADD_T4_REG64(pi, "rx_ppp6", "# of PPP prio 6 frames received",
6540	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP6_L));
6541	SYSCTL_ADD_T4_REG64(pi, "rx_ppp7", "# of PPP prio 7 frames received",
6542	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP7_L));
6543
6544#undef SYSCTL_ADD_T4_REG64
6545
6546#define SYSCTL_ADD_T4_PORTSTAT(name, desc) \
6547	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, #name, CTLFLAG_RD, \
6548	    &pi->stats.name, desc)
6549
6550	/* We get these from port_stats and they may be stale by up to 1s */
6551	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow0,
6552	    "# drops due to buffer-group 0 overflows");
6553	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow1,
6554	    "# drops due to buffer-group 1 overflows");
6555	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow2,
6556	    "# drops due to buffer-group 2 overflows");
6557	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow3,
6558	    "# drops due to buffer-group 3 overflows");
6559	SYSCTL_ADD_T4_PORTSTAT(rx_trunc0,
6560	    "# of buffer-group 0 truncated packets");
6561	SYSCTL_ADD_T4_PORTSTAT(rx_trunc1,
6562	    "# of buffer-group 1 truncated packets");
6563	SYSCTL_ADD_T4_PORTSTAT(rx_trunc2,
6564	    "# of buffer-group 2 truncated packets");
6565	SYSCTL_ADD_T4_PORTSTAT(rx_trunc3,
6566	    "# of buffer-group 3 truncated packets");
6567
6568#undef SYSCTL_ADD_T4_PORTSTAT
6569
6570	SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tx_tls_records",
6571	    CTLFLAG_RD, &pi->tx_tls_records,
6572	    "# of TLS records transmitted");
6573	SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tx_tls_octets",
6574	    CTLFLAG_RD, &pi->tx_tls_octets,
6575	    "# of payload octets in transmitted TLS records");
6576	SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "rx_tls_records",
6577	    CTLFLAG_RD, &pi->rx_tls_records,
6578	    "# of TLS records received");
6579	SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "rx_tls_octets",
6580	    CTLFLAG_RD, &pi->rx_tls_octets,
6581	    "# of payload octets in received TLS records");
6582}
6583
6584static int
6585sysctl_int_array(SYSCTL_HANDLER_ARGS)
6586{
6587	int rc, *i, space = 0;
6588	struct sbuf sb;
6589
6590	sbuf_new_for_sysctl(&sb, NULL, 64, req);
6591	for (i = arg1; arg2; arg2 -= sizeof(int), i++) {
6592		if (space)
6593			sbuf_printf(&sb, " ");
6594		sbuf_printf(&sb, "%d", *i);
6595		space = 1;
6596	}
6597	rc = sbuf_finish(&sb);
6598	sbuf_delete(&sb);
6599	return (rc);
6600}
6601
6602static int
6603sysctl_bitfield_8b(SYSCTL_HANDLER_ARGS)
6604{
6605	int rc;
6606	struct sbuf *sb;
6607
6608	rc = sysctl_wire_old_buffer(req, 0);
6609	if (rc != 0)
6610		return(rc);
6611
6612	sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6613	if (sb == NULL)
6614		return (ENOMEM);
6615
6616	sbuf_printf(sb, "%b", *(uint8_t *)(uintptr_t)arg2, (char *)arg1);
6617	rc = sbuf_finish(sb);
6618	sbuf_delete(sb);
6619
6620	return (rc);
6621}
6622
6623static int
6624sysctl_bitfield_16b(SYSCTL_HANDLER_ARGS)
6625{
6626	int rc;
6627	struct sbuf *sb;
6628
6629	rc = sysctl_wire_old_buffer(req, 0);
6630	if (rc != 0)
6631		return(rc);
6632
6633	sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6634	if (sb == NULL)
6635		return (ENOMEM);
6636
6637	sbuf_printf(sb, "%b", *(uint16_t *)(uintptr_t)arg2, (char *)arg1);
6638	rc = sbuf_finish(sb);
6639	sbuf_delete(sb);
6640
6641	return (rc);
6642}
6643
6644static int
6645sysctl_btphy(SYSCTL_HANDLER_ARGS)
6646{
6647	struct port_info *pi = arg1;
6648	int op = arg2;
6649	struct adapter *sc = pi->adapter;
6650	u_int v;
6651	int rc;
6652
6653	rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4btt");
6654	if (rc)
6655		return (rc);
6656	/* XXX: magic numbers */
6657	rc = -t4_mdio_rd(sc, sc->mbox, pi->mdio_addr, 0x1e, op ? 0x20 : 0xc820,
6658	    &v);
6659	end_synchronized_op(sc, 0);
6660	if (rc)
6661		return (rc);
6662	if (op == 0)
6663		v /= 256;
6664
6665	rc = sysctl_handle_int(oidp, &v, 0, req);
6666	return (rc);
6667}
6668
6669static int
6670sysctl_noflowq(SYSCTL_HANDLER_ARGS)
6671{
6672	struct vi_info *vi = arg1;
6673	int rc, val;
6674
6675	val = vi->rsrv_noflowq;
6676	rc = sysctl_handle_int(oidp, &val, 0, req);
6677	if (rc != 0 || req->newptr == NULL)
6678		return (rc);
6679
6680	if ((val >= 1) && (vi->ntxq > 1))
6681		vi->rsrv_noflowq = 1;
6682	else
6683		vi->rsrv_noflowq = 0;
6684
6685	return (rc);
6686}
6687
6688static int
6689sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS)
6690{
6691	struct vi_info *vi = arg1;
6692	struct adapter *sc = vi->pi->adapter;
6693	int idx, rc, i;
6694	struct sge_rxq *rxq;
6695	uint8_t v;
6696
6697	idx = vi->tmr_idx;
6698
6699	rc = sysctl_handle_int(oidp, &idx, 0, req);
6700	if (rc != 0 || req->newptr == NULL)
6701		return (rc);
6702
6703	if (idx < 0 || idx >= SGE_NTIMERS)
6704		return (EINVAL);
6705
6706	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
6707	    "t4tmr");
6708	if (rc)
6709		return (rc);
6710
6711	v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->pktc_idx != -1);
6712	for_each_rxq(vi, i, rxq) {
6713#ifdef atomic_store_rel_8
6714		atomic_store_rel_8(&rxq->iq.intr_params, v);
6715#else
6716		rxq->iq.intr_params = v;
6717#endif
6718	}
6719	vi->tmr_idx = idx;
6720
6721	end_synchronized_op(sc, LOCK_HELD);
6722	return (0);
6723}
6724
6725static int
6726sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS)
6727{
6728	struct vi_info *vi = arg1;
6729	struct adapter *sc = vi->pi->adapter;
6730	int idx, rc;
6731
6732	idx = vi->pktc_idx;
6733
6734	rc = sysctl_handle_int(oidp, &idx, 0, req);
6735	if (rc != 0 || req->newptr == NULL)
6736		return (rc);
6737
6738	if (idx < -1 || idx >= SGE_NCOUNTERS)
6739		return (EINVAL);
6740
6741	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
6742	    "t4pktc");
6743	if (rc)
6744		return (rc);
6745
6746	if (vi->flags & VI_INIT_DONE)
6747		rc = EBUSY; /* cannot be changed once the queues are created */
6748	else
6749		vi->pktc_idx = idx;
6750
6751	end_synchronized_op(sc, LOCK_HELD);
6752	return (rc);
6753}
6754
6755static int
6756sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS)
6757{
6758	struct vi_info *vi = arg1;
6759	struct adapter *sc = vi->pi->adapter;
6760	int qsize, rc;
6761
6762	qsize = vi->qsize_rxq;
6763
6764	rc = sysctl_handle_int(oidp, &qsize, 0, req);
6765	if (rc != 0 || req->newptr == NULL)
6766		return (rc);
6767
6768	if (qsize < 128 || (qsize & 7))
6769		return (EINVAL);
6770
6771	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
6772	    "t4rxqs");
6773	if (rc)
6774		return (rc);
6775
6776	if (vi->flags & VI_INIT_DONE)
6777		rc = EBUSY; /* cannot be changed once the queues are created */
6778	else
6779		vi->qsize_rxq = qsize;
6780
6781	end_synchronized_op(sc, LOCK_HELD);
6782	return (rc);
6783}
6784
6785static int
6786sysctl_qsize_txq(SYSCTL_HANDLER_ARGS)
6787{
6788	struct vi_info *vi = arg1;
6789	struct adapter *sc = vi->pi->adapter;
6790	int qsize, rc;
6791
6792	qsize = vi->qsize_txq;
6793
6794	rc = sysctl_handle_int(oidp, &qsize, 0, req);
6795	if (rc != 0 || req->newptr == NULL)
6796		return (rc);
6797
6798	if (qsize < 128 || qsize > 65536)
6799		return (EINVAL);
6800
6801	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
6802	    "t4txqs");
6803	if (rc)
6804		return (rc);
6805
6806	if (vi->flags & VI_INIT_DONE)
6807		rc = EBUSY; /* cannot be changed once the queues are created */
6808	else
6809		vi->qsize_txq = qsize;
6810
6811	end_synchronized_op(sc, LOCK_HELD);
6812	return (rc);
6813}
6814
6815static int
6816sysctl_pause_settings(SYSCTL_HANDLER_ARGS)
6817{
6818	struct port_info *pi = arg1;
6819	struct adapter *sc = pi->adapter;
6820	struct link_config *lc = &pi->link_cfg;
6821	int rc;
6822
6823	if (req->newptr == NULL) {
6824		struct sbuf *sb;
6825		static char *bits = "\20\1RX\2TX\3AUTO";
6826
6827		rc = sysctl_wire_old_buffer(req, 0);
6828		if (rc != 0)
6829			return(rc);
6830
6831		sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6832		if (sb == NULL)
6833			return (ENOMEM);
6834
6835		if (lc->link_ok) {
6836			sbuf_printf(sb, "%b", (lc->fc & (PAUSE_TX | PAUSE_RX)) |
6837			    (lc->requested_fc & PAUSE_AUTONEG), bits);
6838		} else {
6839			sbuf_printf(sb, "%b", lc->requested_fc & (PAUSE_TX |
6840			    PAUSE_RX | PAUSE_AUTONEG), bits);
6841		}
6842		rc = sbuf_finish(sb);
6843		sbuf_delete(sb);
6844	} else {
6845		char s[2];
6846		int n;
6847
6848		s[0] = '0' + (lc->requested_fc & (PAUSE_TX | PAUSE_RX |
6849		    PAUSE_AUTONEG));
6850		s[1] = 0;
6851
6852		rc = sysctl_handle_string(oidp, s, sizeof(s), req);
6853		if (rc != 0)
6854			return(rc);
6855
6856		if (s[1] != 0)
6857			return (EINVAL);
6858		if (s[0] < '0' || s[0] > '9')
6859			return (EINVAL);	/* not a number */
6860		n = s[0] - '0';
6861		if (n & ~(PAUSE_TX | PAUSE_RX | PAUSE_AUTONEG))
6862			return (EINVAL);	/* some other bit is set too */
6863
6864		rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
6865		    "t4PAUSE");
6866		if (rc)
6867			return (rc);
6868		PORT_LOCK(pi);
6869		lc->requested_fc = n;
6870		fixup_link_config(pi);
6871		if (pi->up_vis > 0)
6872			rc = apply_link_config(pi);
6873		set_current_media(pi);
6874		PORT_UNLOCK(pi);
6875		end_synchronized_op(sc, 0);
6876	}
6877
6878	return (rc);
6879}
6880
6881static int
6882sysctl_fec(SYSCTL_HANDLER_ARGS)
6883{
6884	struct port_info *pi = arg1;
6885	struct adapter *sc = pi->adapter;
6886	struct link_config *lc = &pi->link_cfg;
6887	int rc;
6888	int8_t old;
6889
6890	if (req->newptr == NULL) {
6891		struct sbuf *sb;
6892		static char *bits = "\20\1RS\2BASE-R\3RSVD1\4RSVD2\5RSVD3\6AUTO";
6893
6894		rc = sysctl_wire_old_buffer(req, 0);
6895		if (rc != 0)
6896			return(rc);
6897
6898		sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6899		if (sb == NULL)
6900			return (ENOMEM);
6901
6902		/*
6903		 * Display the requested_fec when the link is down -- the actual
6904		 * FEC makes sense only when the link is up.
6905		 */
6906		if (lc->link_ok) {
6907			sbuf_printf(sb, "%b", (lc->fec & M_FW_PORT_CAP32_FEC) |
6908			    (lc->requested_fec & FEC_AUTO), bits);
6909		} else {
6910			sbuf_printf(sb, "%b", lc->requested_fec, bits);
6911		}
6912		rc = sbuf_finish(sb);
6913		sbuf_delete(sb);
6914	} else {
6915		char s[3];
6916		int n;
6917
6918		snprintf(s, sizeof(s), "%d",
6919		    lc->requested_fec == FEC_AUTO ? -1 :
6920		    lc->requested_fec & M_FW_PORT_CAP32_FEC);
6921
6922		rc = sysctl_handle_string(oidp, s, sizeof(s), req);
6923		if (rc != 0)
6924			return(rc);
6925
6926		n = strtol(&s[0], NULL, 0);
6927		if (n < 0 || n & FEC_AUTO)
6928			n = FEC_AUTO;
6929		else {
6930			if (n & ~M_FW_PORT_CAP32_FEC)
6931				return (EINVAL);/* some other bit is set too */
6932			if (!powerof2(n))
6933				return (EINVAL);/* one bit can be set at most */
6934		}
6935
6936		rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
6937		    "t4fec");
6938		if (rc)
6939			return (rc);
6940		PORT_LOCK(pi);
6941		old = lc->requested_fec;
6942		if (n == FEC_AUTO)
6943			lc->requested_fec = FEC_AUTO;
6944		else if (n == 0)
6945			lc->requested_fec = FEC_NONE;
6946		else {
6947			if ((lc->supported | V_FW_PORT_CAP32_FEC(n)) !=
6948			    lc->supported) {
6949				rc = ENOTSUP;
6950				goto done;
6951			}
6952			lc->requested_fec = n;
6953		}
6954		fixup_link_config(pi);
6955		if (pi->up_vis > 0) {
6956			rc = apply_link_config(pi);
6957			if (rc != 0) {
6958				lc->requested_fec = old;
6959				if (rc == FW_EPROTO)
6960					rc = ENOTSUP;
6961			}
6962		}
6963done:
6964		PORT_UNLOCK(pi);
6965		end_synchronized_op(sc, 0);
6966	}
6967
6968	return (rc);
6969}
6970
6971static int
6972sysctl_autoneg(SYSCTL_HANDLER_ARGS)
6973{
6974	struct port_info *pi = arg1;
6975	struct adapter *sc = pi->adapter;
6976	struct link_config *lc = &pi->link_cfg;
6977	int rc, val;
6978
6979	if (lc->supported & FW_PORT_CAP32_ANEG)
6980		val = lc->requested_aneg == AUTONEG_DISABLE ? 0 : 1;
6981	else
6982		val = -1;
6983	rc = sysctl_handle_int(oidp, &val, 0, req);
6984	if (rc != 0 || req->newptr == NULL)
6985		return (rc);
6986	if (val == 0)
6987		val = AUTONEG_DISABLE;
6988	else if (val == 1)
6989		val = AUTONEG_ENABLE;
6990	else
6991		val = AUTONEG_AUTO;
6992
6993	rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
6994	    "t4aneg");
6995	if (rc)
6996		return (rc);
6997	PORT_LOCK(pi);
6998	if (val == AUTONEG_ENABLE && !(lc->supported & FW_PORT_CAP32_ANEG)) {
6999		rc = ENOTSUP;
7000		goto done;
7001	}
7002	lc->requested_aneg = val;
7003	fixup_link_config(pi);
7004	if (pi->up_vis > 0)
7005		rc = apply_link_config(pi);
7006	set_current_media(pi);
7007done:
7008	PORT_UNLOCK(pi);
7009	end_synchronized_op(sc, 0);
7010	return (rc);
7011}
7012
7013static int
7014sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS)
7015{
7016	struct adapter *sc = arg1;
7017	int reg = arg2;
7018	uint64_t val;
7019
7020	val = t4_read_reg64(sc, reg);
7021
7022	return (sysctl_handle_64(oidp, &val, 0, req));
7023}
7024
7025static int
7026sysctl_temperature(SYSCTL_HANDLER_ARGS)
7027{
7028	struct adapter *sc = arg1;
7029	int rc, t;
7030	uint32_t param, val;
7031
7032	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4temp");
7033	if (rc)
7034		return (rc);
7035	param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
7036	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
7037	    V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_TMP);
7038	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
7039	end_synchronized_op(sc, 0);
7040	if (rc)
7041		return (rc);
7042
7043	/* unknown is returned as 0 but we display -1 in that case */
7044	t = val == 0 ? -1 : val;
7045
7046	rc = sysctl_handle_int(oidp, &t, 0, req);
7047	return (rc);
7048}
7049
7050static int
7051sysctl_loadavg(SYSCTL_HANDLER_ARGS)
7052{
7053	struct adapter *sc = arg1;
7054	struct sbuf *sb;
7055	int rc;
7056	uint32_t param, val;
7057
7058	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4lavg");
7059	if (rc)
7060		return (rc);
7061	param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
7062	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_LOAD);
7063	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
7064	end_synchronized_op(sc, 0);
7065	if (rc)
7066		return (rc);
7067
7068	rc = sysctl_wire_old_buffer(req, 0);
7069	if (rc != 0)
7070		return (rc);
7071
7072	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7073	if (sb == NULL)
7074		return (ENOMEM);
7075
7076	if (val == 0xffffffff) {
7077		/* Only debug and custom firmwares report load averages. */
7078		sbuf_printf(sb, "not available");
7079	} else {
7080		sbuf_printf(sb, "%d %d %d", val & 0xff, (val >> 8) & 0xff,
7081		    (val >> 16) & 0xff);
7082	}
7083	rc = sbuf_finish(sb);
7084	sbuf_delete(sb);
7085
7086	return (rc);
7087}
7088
7089static int
7090sysctl_cctrl(SYSCTL_HANDLER_ARGS)
7091{
7092	struct adapter *sc = arg1;
7093	struct sbuf *sb;
7094	int rc, i;
7095	uint16_t incr[NMTUS][NCCTRL_WIN];
7096	static const char *dec_fac[] = {
7097		"0.5", "0.5625", "0.625", "0.6875", "0.75", "0.8125", "0.875",
7098		"0.9375"
7099	};
7100
7101	rc = sysctl_wire_old_buffer(req, 0);
7102	if (rc != 0)
7103		return (rc);
7104
7105	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7106	if (sb == NULL)
7107		return (ENOMEM);
7108
7109	t4_read_cong_tbl(sc, incr);
7110
7111	for (i = 0; i < NCCTRL_WIN; ++i) {
7112		sbuf_printf(sb, "%2d: %4u %4u %4u %4u %4u %4u %4u %4u\n", i,
7113		    incr[0][i], incr[1][i], incr[2][i], incr[3][i], incr[4][i],
7114		    incr[5][i], incr[6][i], incr[7][i]);
7115		sbuf_printf(sb, "%8u %4u %4u %4u %4u %4u %4u %4u %5u %s\n",
7116		    incr[8][i], incr[9][i], incr[10][i], incr[11][i],
7117		    incr[12][i], incr[13][i], incr[14][i], incr[15][i],
7118		    sc->params.a_wnd[i], dec_fac[sc->params.b_wnd[i]]);
7119	}
7120
7121	rc = sbuf_finish(sb);
7122	sbuf_delete(sb);
7123
7124	return (rc);
7125}
7126
7127static const char *qname[CIM_NUM_IBQ + CIM_NUM_OBQ_T5] = {
7128	"TP0", "TP1", "ULP", "SGE0", "SGE1", "NC-SI",	/* ibq's */
7129	"ULP0", "ULP1", "ULP2", "ULP3", "SGE", "NC-SI",	/* obq's */
7130	"SGE0-RX", "SGE1-RX"	/* additional obq's (T5 onwards) */
7131};
7132
7133static int
7134sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS)
7135{
7136	struct adapter *sc = arg1;
7137	struct sbuf *sb;
7138	int rc, i, n, qid = arg2;
7139	uint32_t *buf, *p;
7140	char *qtype;
7141	u_int cim_num_obq = sc->chip_params->cim_num_obq;
7142
7143	KASSERT(qid >= 0 && qid < CIM_NUM_IBQ + cim_num_obq,
7144	    ("%s: bad qid %d\n", __func__, qid));
7145
7146	if (qid < CIM_NUM_IBQ) {
7147		/* inbound queue */
7148		qtype = "IBQ";
7149		n = 4 * CIM_IBQ_SIZE;
7150		buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
7151		rc = t4_read_cim_ibq(sc, qid, buf, n);
7152	} else {
7153		/* outbound queue */
7154		qtype = "OBQ";
7155		qid -= CIM_NUM_IBQ;
7156		n = 4 * cim_num_obq * CIM_OBQ_SIZE;
7157		buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
7158		rc = t4_read_cim_obq(sc, qid, buf, n);
7159	}
7160
7161	if (rc < 0) {
7162		rc = -rc;
7163		goto done;
7164	}
7165	n = rc * sizeof(uint32_t);	/* rc has # of words actually read */
7166
7167	rc = sysctl_wire_old_buffer(req, 0);
7168	if (rc != 0)
7169		goto done;
7170
7171	sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req);
7172	if (sb == NULL) {
7173		rc = ENOMEM;
7174		goto done;
7175	}
7176
7177	sbuf_printf(sb, "%s%d %s", qtype , qid, qname[arg2]);
7178	for (i = 0, p = buf; i < n; i += 16, p += 4)
7179		sbuf_printf(sb, "\n%#06x: %08x %08x %08x %08x", i, p[0], p[1],
7180		    p[2], p[3]);
7181
7182	rc = sbuf_finish(sb);
7183	sbuf_delete(sb);
7184done:
7185	free(buf, M_CXGBE);
7186	return (rc);
7187}
7188
7189static int
7190sysctl_cim_la(SYSCTL_HANDLER_ARGS)
7191{
7192	struct adapter *sc = arg1;
7193	u_int cfg;
7194	struct sbuf *sb;
7195	uint32_t *buf, *p;
7196	int rc;
7197
7198	MPASS(chip_id(sc) <= CHELSIO_T5);
7199
7200	rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg);
7201	if (rc != 0)
7202		return (rc);
7203
7204	rc = sysctl_wire_old_buffer(req, 0);
7205	if (rc != 0)
7206		return (rc);
7207
7208	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7209	if (sb == NULL)
7210		return (ENOMEM);
7211
7212	buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE,
7213	    M_ZERO | M_WAITOK);
7214
7215	rc = -t4_cim_read_la(sc, buf, NULL);
7216	if (rc != 0)
7217		goto done;
7218
7219	sbuf_printf(sb, "Status   Data      PC%s",
7220	    cfg & F_UPDBGLACAPTPCONLY ? "" :
7221	    "     LS0Stat  LS0Addr             LS0Data");
7222
7223	for (p = buf; p <= &buf[sc->params.cim_la_size - 8]; p += 8) {
7224		if (cfg & F_UPDBGLACAPTPCONLY) {
7225			sbuf_printf(sb, "\n  %02x   %08x %08x", p[5] & 0xff,
7226			    p[6], p[7]);
7227			sbuf_printf(sb, "\n  %02x   %02x%06x %02x%06x",
7228			    (p[3] >> 8) & 0xff, p[3] & 0xff, p[4] >> 8,
7229			    p[4] & 0xff, p[5] >> 8);
7230			sbuf_printf(sb, "\n  %02x   %x%07x %x%07x",
7231			    (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
7232			    p[1] & 0xf, p[2] >> 4);
7233		} else {
7234			sbuf_printf(sb,
7235			    "\n  %02x   %x%07x %x%07x %08x %08x "
7236			    "%08x%08x%08x%08x",
7237			    (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
7238			    p[1] & 0xf, p[2] >> 4, p[2] & 0xf, p[3], p[4], p[5],
7239			    p[6], p[7]);
7240		}
7241	}
7242
7243	rc = sbuf_finish(sb);
7244	sbuf_delete(sb);
7245done:
7246	free(buf, M_CXGBE);
7247	return (rc);
7248}
7249
7250static int
7251sysctl_cim_la_t6(SYSCTL_HANDLER_ARGS)
7252{
7253	struct adapter *sc = arg1;
7254	u_int cfg;
7255	struct sbuf *sb;
7256	uint32_t *buf, *p;
7257	int rc;
7258
7259	MPASS(chip_id(sc) > CHELSIO_T5);
7260
7261	rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg);
7262	if (rc != 0)
7263		return (rc);
7264
7265	rc = sysctl_wire_old_buffer(req, 0);
7266	if (rc != 0)
7267		return (rc);
7268
7269	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7270	if (sb == NULL)
7271		return (ENOMEM);
7272
7273	buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE,
7274	    M_ZERO | M_WAITOK);
7275
7276	rc = -t4_cim_read_la(sc, buf, NULL);
7277	if (rc != 0)
7278		goto done;
7279
7280	sbuf_printf(sb, "Status   Inst    Data      PC%s",
7281	    cfg & F_UPDBGLACAPTPCONLY ? "" :
7282	    "     LS0Stat  LS0Addr  LS0Data  LS1Stat  LS1Addr  LS1Data");
7283
7284	for (p = buf; p <= &buf[sc->params.cim_la_size - 10]; p += 10) {
7285		if (cfg & F_UPDBGLACAPTPCONLY) {
7286			sbuf_printf(sb, "\n  %02x   %08x %08x %08x",
7287			    p[3] & 0xff, p[2], p[1], p[0]);
7288			sbuf_printf(sb, "\n  %02x   %02x%06x %02x%06x %02x%06x",
7289			    (p[6] >> 8) & 0xff, p[6] & 0xff, p[5] >> 8,
7290			    p[5] & 0xff, p[4] >> 8, p[4] & 0xff, p[3] >> 8);
7291			sbuf_printf(sb, "\n  %02x   %04x%04x %04x%04x %04x%04x",
7292			    (p[9] >> 16) & 0xff, p[9] & 0xffff, p[8] >> 16,
7293			    p[8] & 0xffff, p[7] >> 16, p[7] & 0xffff,
7294			    p[6] >> 16);
7295		} else {
7296			sbuf_printf(sb, "\n  %02x   %04x%04x %04x%04x %04x%04x "
7297			    "%08x %08x %08x %08x %08x %08x",
7298			    (p[9] >> 16) & 0xff,
7299			    p[9] & 0xffff, p[8] >> 16,
7300			    p[8] & 0xffff, p[7] >> 16,
7301			    p[7] & 0xffff, p[6] >> 16,
7302			    p[2], p[1], p[0], p[5], p[4], p[3]);
7303		}
7304	}
7305
7306	rc = sbuf_finish(sb);
7307	sbuf_delete(sb);
7308done:
7309	free(buf, M_CXGBE);
7310	return (rc);
7311}
7312
7313static int
7314sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS)
7315{
7316	struct adapter *sc = arg1;
7317	u_int i;
7318	struct sbuf *sb;
7319	uint32_t *buf, *p;
7320	int rc;
7321
7322	rc = sysctl_wire_old_buffer(req, 0);
7323	if (rc != 0)
7324		return (rc);
7325
7326	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7327	if (sb == NULL)
7328		return (ENOMEM);
7329
7330	buf = malloc(2 * CIM_MALA_SIZE * 5 * sizeof(uint32_t), M_CXGBE,
7331	    M_ZERO | M_WAITOK);
7332
7333	t4_cim_read_ma_la(sc, buf, buf + 5 * CIM_MALA_SIZE);
7334	p = buf;
7335
7336	for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) {
7337		sbuf_printf(sb, "\n%02x%08x%08x%08x%08x", p[4], p[3], p[2],
7338		    p[1], p[0]);
7339	}
7340
7341	sbuf_printf(sb, "\n\nCnt ID Tag UE       Data       RDY VLD");
7342	for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) {
7343		sbuf_printf(sb, "\n%3u %2u  %x   %u %08x%08x  %u   %u",
7344		    (p[2] >> 10) & 0xff, (p[2] >> 7) & 7,
7345		    (p[2] >> 3) & 0xf, (p[2] >> 2) & 1,
7346		    (p[1] >> 2) | ((p[2] & 3) << 30),
7347		    (p[0] >> 2) | ((p[1] & 3) << 30), (p[0] >> 1) & 1,
7348		    p[0] & 1);
7349	}
7350
7351	rc = sbuf_finish(sb);
7352	sbuf_delete(sb);
7353	free(buf, M_CXGBE);
7354	return (rc);
7355}
7356
7357static int
7358sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS)
7359{
7360	struct adapter *sc = arg1;
7361	u_int i;
7362	struct sbuf *sb;
7363	uint32_t *buf, *p;
7364	int rc;
7365
7366	rc = sysctl_wire_old_buffer(req, 0);
7367	if (rc != 0)
7368		return (rc);
7369
7370	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7371	if (sb == NULL)
7372		return (ENOMEM);
7373
7374	buf = malloc(2 * CIM_PIFLA_SIZE * 6 * sizeof(uint32_t), M_CXGBE,
7375	    M_ZERO | M_WAITOK);
7376
7377	t4_cim_read_pif_la(sc, buf, buf + 6 * CIM_PIFLA_SIZE, NULL, NULL);
7378	p = buf;
7379
7380	sbuf_printf(sb, "Cntl ID DataBE   Addr                 Data");
7381	for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) {
7382		sbuf_printf(sb, "\n %02x  %02x  %04x  %08x %08x%08x%08x%08x",
7383		    (p[5] >> 22) & 0xff, (p[5] >> 16) & 0x3f, p[5] & 0xffff,
7384		    p[4], p[3], p[2], p[1], p[0]);
7385	}
7386
7387	sbuf_printf(sb, "\n\nCntl ID               Data");
7388	for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) {
7389		sbuf_printf(sb, "\n %02x  %02x %08x%08x%08x%08x",
7390		    (p[4] >> 6) & 0xff, p[4] & 0x3f, p[3], p[2], p[1], p[0]);
7391	}
7392
7393	rc = sbuf_finish(sb);
7394	sbuf_delete(sb);
7395	free(buf, M_CXGBE);
7396	return (rc);
7397}
7398
7399static int
7400sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS)
7401{
7402	struct adapter *sc = arg1;
7403	struct sbuf *sb;
7404	int rc, i;
7405	uint16_t base[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
7406	uint16_t size[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
7407	uint16_t thres[CIM_NUM_IBQ];
7408	uint32_t obq_wr[2 * CIM_NUM_OBQ_T5], *wr = obq_wr;
7409	uint32_t stat[4 * (CIM_NUM_IBQ + CIM_NUM_OBQ_T5)], *p = stat;
7410	u_int cim_num_obq, ibq_rdaddr, obq_rdaddr, nq;
7411
7412	cim_num_obq = sc->chip_params->cim_num_obq;
7413	if (is_t4(sc)) {
7414		ibq_rdaddr = A_UP_IBQ_0_RDADDR;
7415		obq_rdaddr = A_UP_OBQ_0_REALADDR;
7416	} else {
7417		ibq_rdaddr = A_UP_IBQ_0_SHADOW_RDADDR;
7418		obq_rdaddr = A_UP_OBQ_0_SHADOW_REALADDR;
7419	}
7420	nq = CIM_NUM_IBQ + cim_num_obq;
7421
7422	rc = -t4_cim_read(sc, ibq_rdaddr, 4 * nq, stat);
7423	if (rc == 0)
7424		rc = -t4_cim_read(sc, obq_rdaddr, 2 * cim_num_obq, obq_wr);
7425	if (rc != 0)
7426		return (rc);
7427
7428	t4_read_cimq_cfg(sc, base, size, thres);
7429
7430	rc = sysctl_wire_old_buffer(req, 0);
7431	if (rc != 0)
7432		return (rc);
7433
7434	sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req);
7435	if (sb == NULL)
7436		return (ENOMEM);
7437
7438	sbuf_printf(sb,
7439	    "  Queue  Base  Size Thres  RdPtr WrPtr  SOP  EOP Avail");
7440
7441	for (i = 0; i < CIM_NUM_IBQ; i++, p += 4)
7442		sbuf_printf(sb, "\n%7s %5x %5u %5u %6x  %4x %4u %4u %5u",
7443		    qname[i], base[i], size[i], thres[i], G_IBQRDADDR(p[0]),
7444		    G_IBQWRADDR(p[1]), G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
7445		    G_QUEREMFLITS(p[2]) * 16);
7446	for ( ; i < nq; i++, p += 4, wr += 2)
7447		sbuf_printf(sb, "\n%7s %5x %5u %12x  %4x %4u %4u %5u", qname[i],
7448		    base[i], size[i], G_QUERDADDR(p[0]) & 0x3fff,
7449		    wr[0] - base[i], G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
7450		    G_QUEREMFLITS(p[2]) * 16);
7451
7452	rc = sbuf_finish(sb);
7453	sbuf_delete(sb);
7454
7455	return (rc);
7456}
7457
7458static int
7459sysctl_cpl_stats(SYSCTL_HANDLER_ARGS)
7460{
7461	struct adapter *sc = arg1;
7462	struct sbuf *sb;
7463	int rc;
7464	struct tp_cpl_stats stats;
7465
7466	rc = sysctl_wire_old_buffer(req, 0);
7467	if (rc != 0)
7468		return (rc);
7469
7470	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7471	if (sb == NULL)
7472		return (ENOMEM);
7473
7474	mtx_lock(&sc->reg_lock);
7475	t4_tp_get_cpl_stats(sc, &stats, 0);
7476	mtx_unlock(&sc->reg_lock);
7477
7478	if (sc->chip_params->nchan > 2) {
7479		sbuf_printf(sb, "                 channel 0  channel 1"
7480		    "  channel 2  channel 3");
7481		sbuf_printf(sb, "\nCPL requests:   %10u %10u %10u %10u",
7482		    stats.req[0], stats.req[1], stats.req[2], stats.req[3]);
7483		sbuf_printf(sb, "\nCPL responses:   %10u %10u %10u %10u",
7484		    stats.rsp[0], stats.rsp[1], stats.rsp[2], stats.rsp[3]);
7485	} else {
7486		sbuf_printf(sb, "                 channel 0  channel 1");
7487		sbuf_printf(sb, "\nCPL requests:   %10u %10u",
7488		    stats.req[0], stats.req[1]);
7489		sbuf_printf(sb, "\nCPL responses:   %10u %10u",
7490		    stats.rsp[0], stats.rsp[1]);
7491	}
7492
7493	rc = sbuf_finish(sb);
7494	sbuf_delete(sb);
7495
7496	return (rc);
7497}
7498
7499static int
7500sysctl_ddp_stats(SYSCTL_HANDLER_ARGS)
7501{
7502	struct adapter *sc = arg1;
7503	struct sbuf *sb;
7504	int rc;
7505	struct tp_usm_stats stats;
7506
7507	rc = sysctl_wire_old_buffer(req, 0);
7508	if (rc != 0)
7509		return(rc);
7510
7511	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7512	if (sb == NULL)
7513		return (ENOMEM);
7514
7515	t4_get_usm_stats(sc, &stats, 1);
7516
7517	sbuf_printf(sb, "Frames: %u\n", stats.frames);
7518	sbuf_printf(sb, "Octets: %ju\n", stats.octets);
7519	sbuf_printf(sb, "Drops:  %u", stats.drops);
7520
7521	rc = sbuf_finish(sb);
7522	sbuf_delete(sb);
7523
7524	return (rc);
7525}
7526
7527static const char * const devlog_level_strings[] = {
7528	[FW_DEVLOG_LEVEL_EMERG]		= "EMERG",
7529	[FW_DEVLOG_LEVEL_CRIT]		= "CRIT",
7530	[FW_DEVLOG_LEVEL_ERR]		= "ERR",
7531	[FW_DEVLOG_LEVEL_NOTICE]	= "NOTICE",
7532	[FW_DEVLOG_LEVEL_INFO]		= "INFO",
7533	[FW_DEVLOG_LEVEL_DEBUG]		= "DEBUG"
7534};
7535
7536static const char * const devlog_facility_strings[] = {
7537	[FW_DEVLOG_FACILITY_CORE]	= "CORE",
7538	[FW_DEVLOG_FACILITY_CF]		= "CF",
7539	[FW_DEVLOG_FACILITY_SCHED]	= "SCHED",
7540	[FW_DEVLOG_FACILITY_TIMER]	= "TIMER",
7541	[FW_DEVLOG_FACILITY_RES]	= "RES",
7542	[FW_DEVLOG_FACILITY_HW]		= "HW",
7543	[FW_DEVLOG_FACILITY_FLR]	= "FLR",
7544	[FW_DEVLOG_FACILITY_DMAQ]	= "DMAQ",
7545	[FW_DEVLOG_FACILITY_PHY]	= "PHY",
7546	[FW_DEVLOG_FACILITY_MAC]	= "MAC",
7547	[FW_DEVLOG_FACILITY_PORT]	= "PORT",
7548	[FW_DEVLOG_FACILITY_VI]		= "VI",
7549	[FW_DEVLOG_FACILITY_FILTER]	= "FILTER",
7550	[FW_DEVLOG_FACILITY_ACL]	= "ACL",
7551	[FW_DEVLOG_FACILITY_TM]		= "TM",
7552	[FW_DEVLOG_FACILITY_QFC]	= "QFC",
7553	[FW_DEVLOG_FACILITY_DCB]	= "DCB",
7554	[FW_DEVLOG_FACILITY_ETH]	= "ETH",
7555	[FW_DEVLOG_FACILITY_OFLD]	= "OFLD",
7556	[FW_DEVLOG_FACILITY_RI]		= "RI",
7557	[FW_DEVLOG_FACILITY_ISCSI]	= "ISCSI",
7558	[FW_DEVLOG_FACILITY_FCOE]	= "FCOE",
7559	[FW_DEVLOG_FACILITY_FOISCSI]	= "FOISCSI",
7560	[FW_DEVLOG_FACILITY_FOFCOE]	= "FOFCOE",
7561	[FW_DEVLOG_FACILITY_CHNET]	= "CHNET",
7562};
7563
7564static int
7565sysctl_devlog(SYSCTL_HANDLER_ARGS)
7566{
7567	struct adapter *sc = arg1;
7568	struct devlog_params *dparams = &sc->params.devlog;
7569	struct fw_devlog_e *buf, *e;
7570	int i, j, rc, nentries, first = 0;
7571	struct sbuf *sb;
7572	uint64_t ftstamp = UINT64_MAX;
7573
7574	if (dparams->addr == 0)
7575		return (ENXIO);
7576
7577	buf = malloc(dparams->size, M_CXGBE, M_NOWAIT);
7578	if (buf == NULL)
7579		return (ENOMEM);
7580
7581	rc = read_via_memwin(sc, 1, dparams->addr, (void *)buf, dparams->size);
7582	if (rc != 0)
7583		goto done;
7584
7585	nentries = dparams->size / sizeof(struct fw_devlog_e);
7586	for (i = 0; i < nentries; i++) {
7587		e = &buf[i];
7588
7589		if (e->timestamp == 0)
7590			break;	/* end */
7591
7592		e->timestamp = be64toh(e->timestamp);
7593		e->seqno = be32toh(e->seqno);
7594		for (j = 0; j < 8; j++)
7595			e->params[j] = be32toh(e->params[j]);
7596
7597		if (e->timestamp < ftstamp) {
7598			ftstamp = e->timestamp;
7599			first = i;
7600		}
7601	}
7602
7603	if (buf[first].timestamp == 0)
7604		goto done;	/* nothing in the log */
7605
7606	rc = sysctl_wire_old_buffer(req, 0);
7607	if (rc != 0)
7608		goto done;
7609
7610	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7611	if (sb == NULL) {
7612		rc = ENOMEM;
7613		goto done;
7614	}
7615	sbuf_printf(sb, "%10s  %15s  %8s  %8s  %s\n",
7616	    "Seq#", "Tstamp", "Level", "Facility", "Message");
7617
7618	i = first;
7619	do {
7620		e = &buf[i];
7621		if (e->timestamp == 0)
7622			break;	/* end */
7623
7624		sbuf_printf(sb, "%10d  %15ju  %8s  %8s  ",
7625		    e->seqno, e->timestamp,
7626		    (e->level < nitems(devlog_level_strings) ?
7627			devlog_level_strings[e->level] : "UNKNOWN"),
7628		    (e->facility < nitems(devlog_facility_strings) ?
7629			devlog_facility_strings[e->facility] : "UNKNOWN"));
7630		sbuf_printf(sb, e->fmt, e->params[0], e->params[1],
7631		    e->params[2], e->params[3], e->params[4],
7632		    e->params[5], e->params[6], e->params[7]);
7633
7634		if (++i == nentries)
7635			i = 0;
7636	} while (i != first);
7637
7638	rc = sbuf_finish(sb);
7639	sbuf_delete(sb);
7640done:
7641	free(buf, M_CXGBE);
7642	return (rc);
7643}
7644
7645static int
7646sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS)
7647{
7648	struct adapter *sc = arg1;
7649	struct sbuf *sb;
7650	int rc;
7651	struct tp_fcoe_stats stats[MAX_NCHAN];
7652	int i, nchan = sc->chip_params->nchan;
7653
7654	rc = sysctl_wire_old_buffer(req, 0);
7655	if (rc != 0)
7656		return (rc);
7657
7658	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7659	if (sb == NULL)
7660		return (ENOMEM);
7661
7662	for (i = 0; i < nchan; i++)
7663		t4_get_fcoe_stats(sc, i, &stats[i], 1);
7664
7665	if (nchan > 2) {
7666		sbuf_printf(sb, "                   channel 0        channel 1"
7667		    "        channel 2        channel 3");
7668		sbuf_printf(sb, "\noctetsDDP:  %16ju %16ju %16ju %16ju",
7669		    stats[0].octets_ddp, stats[1].octets_ddp,
7670		    stats[2].octets_ddp, stats[3].octets_ddp);
7671		sbuf_printf(sb, "\nframesDDP:  %16u %16u %16u %16u",
7672		    stats[0].frames_ddp, stats[1].frames_ddp,
7673		    stats[2].frames_ddp, stats[3].frames_ddp);
7674		sbuf_printf(sb, "\nframesDrop: %16u %16u %16u %16u",
7675		    stats[0].frames_drop, stats[1].frames_drop,
7676		    stats[2].frames_drop, stats[3].frames_drop);
7677	} else {
7678		sbuf_printf(sb, "                   channel 0        channel 1");
7679		sbuf_printf(sb, "\noctetsDDP:  %16ju %16ju",
7680		    stats[0].octets_ddp, stats[1].octets_ddp);
7681		sbuf_printf(sb, "\nframesDDP:  %16u %16u",
7682		    stats[0].frames_ddp, stats[1].frames_ddp);
7683		sbuf_printf(sb, "\nframesDrop: %16u %16u",
7684		    stats[0].frames_drop, stats[1].frames_drop);
7685	}
7686
7687	rc = sbuf_finish(sb);
7688	sbuf_delete(sb);
7689
7690	return (rc);
7691}
7692
7693static int
7694sysctl_hw_sched(SYSCTL_HANDLER_ARGS)
7695{
7696	struct adapter *sc = arg1;
7697	struct sbuf *sb;
7698	int rc, i;
7699	unsigned int map, kbps, ipg, mode;
7700	unsigned int pace_tab[NTX_SCHED];
7701
7702	rc = sysctl_wire_old_buffer(req, 0);
7703	if (rc != 0)
7704		return (rc);
7705
7706	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7707	if (sb == NULL)
7708		return (ENOMEM);
7709
7710	map = t4_read_reg(sc, A_TP_TX_MOD_QUEUE_REQ_MAP);
7711	mode = G_TIMERMODE(t4_read_reg(sc, A_TP_MOD_CONFIG));
7712	t4_read_pace_tbl(sc, pace_tab);
7713
7714	sbuf_printf(sb, "Scheduler  Mode   Channel  Rate (Kbps)   "
7715	    "Class IPG (0.1 ns)   Flow IPG (us)");
7716
7717	for (i = 0; i < NTX_SCHED; ++i, map >>= 2) {
7718		t4_get_tx_sched(sc, i, &kbps, &ipg, 1);
7719		sbuf_printf(sb, "\n    %u      %-5s     %u     ", i,
7720		    (mode & (1 << i)) ? "flow" : "class", map & 3);
7721		if (kbps)
7722			sbuf_printf(sb, "%9u     ", kbps);
7723		else
7724			sbuf_printf(sb, " disabled     ");
7725
7726		if (ipg)
7727			sbuf_printf(sb, "%13u        ", ipg);
7728		else
7729			sbuf_printf(sb, "     disabled        ");
7730
7731		if (pace_tab[i])
7732			sbuf_printf(sb, "%10u", pace_tab[i]);
7733		else
7734			sbuf_printf(sb, "  disabled");
7735	}
7736
7737	rc = sbuf_finish(sb);
7738	sbuf_delete(sb);
7739
7740	return (rc);
7741}
7742
7743static int
7744sysctl_lb_stats(SYSCTL_HANDLER_ARGS)
7745{
7746	struct adapter *sc = arg1;
7747	struct sbuf *sb;
7748	int rc, i, j;
7749	uint64_t *p0, *p1;
7750	struct lb_port_stats s[2];
7751	static const char *stat_name[] = {
7752		"OctetsOK:", "FramesOK:", "BcastFrames:", "McastFrames:",
7753		"UcastFrames:", "ErrorFrames:", "Frames64:", "Frames65To127:",
7754		"Frames128To255:", "Frames256To511:", "Frames512To1023:",
7755		"Frames1024To1518:", "Frames1519ToMax:", "FramesDropped:",
7756		"BG0FramesDropped:", "BG1FramesDropped:", "BG2FramesDropped:",
7757		"BG3FramesDropped:", "BG0FramesTrunc:", "BG1FramesTrunc:",
7758		"BG2FramesTrunc:", "BG3FramesTrunc:"
7759	};
7760
7761	rc = sysctl_wire_old_buffer(req, 0);
7762	if (rc != 0)
7763		return (rc);
7764
7765	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7766	if (sb == NULL)
7767		return (ENOMEM);
7768
7769	memset(s, 0, sizeof(s));
7770
7771	for (i = 0; i < sc->chip_params->nchan; i += 2) {
7772		t4_get_lb_stats(sc, i, &s[0]);
7773		t4_get_lb_stats(sc, i + 1, &s[1]);
7774
7775		p0 = &s[0].octets;
7776		p1 = &s[1].octets;
7777		sbuf_printf(sb, "%s                       Loopback %u"
7778		    "           Loopback %u", i == 0 ? "" : "\n", i, i + 1);
7779
7780		for (j = 0; j < nitems(stat_name); j++)
7781			sbuf_printf(sb, "\n%-17s %20ju %20ju", stat_name[j],
7782				   *p0++, *p1++);
7783	}
7784
7785	rc = sbuf_finish(sb);
7786	sbuf_delete(sb);
7787
7788	return (rc);
7789}
7790
7791static int
7792sysctl_linkdnrc(SYSCTL_HANDLER_ARGS)
7793{
7794	int rc = 0;
7795	struct port_info *pi = arg1;
7796	struct link_config *lc = &pi->link_cfg;
7797	struct sbuf *sb;
7798
7799	rc = sysctl_wire_old_buffer(req, 0);
7800	if (rc != 0)
7801		return(rc);
7802	sb = sbuf_new_for_sysctl(NULL, NULL, 64, req);
7803	if (sb == NULL)
7804		return (ENOMEM);
7805
7806	if (lc->link_ok || lc->link_down_rc == 255)
7807		sbuf_printf(sb, "n/a");
7808	else
7809		sbuf_printf(sb, "%s", t4_link_down_rc_str(lc->link_down_rc));
7810
7811	rc = sbuf_finish(sb);
7812	sbuf_delete(sb);
7813
7814	return (rc);
7815}
7816
7817struct mem_desc {
7818	unsigned int base;
7819	unsigned int limit;
7820	unsigned int idx;
7821};
7822
7823static int
7824mem_desc_cmp(const void *a, const void *b)
7825{
7826	return ((const struct mem_desc *)a)->base -
7827	       ((const struct mem_desc *)b)->base;
7828}
7829
7830static void
7831mem_region_show(struct sbuf *sb, const char *name, unsigned int from,
7832    unsigned int to)
7833{
7834	unsigned int size;
7835
7836	if (from == to)
7837		return;
7838
7839	size = to - from + 1;
7840	if (size == 0)
7841		return;
7842
7843	/* XXX: need humanize_number(3) in libkern for a more readable 'size' */
7844	sbuf_printf(sb, "%-15s %#x-%#x [%u]\n", name, from, to, size);
7845}
7846
7847static int
7848sysctl_meminfo(SYSCTL_HANDLER_ARGS)
7849{
7850	struct adapter *sc = arg1;
7851	struct sbuf *sb;
7852	int rc, i, n;
7853	uint32_t lo, hi, used, alloc;
7854	static const char *memory[] = {"EDC0:", "EDC1:", "MC:", "MC0:", "MC1:"};
7855	static const char *region[] = {
7856		"DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:",
7857		"Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:",
7858		"Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:",
7859		"TDDP region:", "TPT region:", "STAG region:", "RQ region:",
7860		"RQUDP region:", "PBL region:", "TXPBL region:",
7861		"DBVFIFO region:", "ULPRX state:", "ULPTX state:",
7862		"On-chip queues:", "TLS keys:",
7863	};
7864	struct mem_desc avail[4];
7865	struct mem_desc mem[nitems(region) + 3];	/* up to 3 holes */
7866	struct mem_desc *md = mem;
7867
7868	rc = sysctl_wire_old_buffer(req, 0);
7869	if (rc != 0)
7870		return (rc);
7871
7872	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7873	if (sb == NULL)
7874		return (ENOMEM);
7875
7876	for (i = 0; i < nitems(mem); i++) {
7877		mem[i].limit = 0;
7878		mem[i].idx = i;
7879	}
7880
7881	/* Find and sort the populated memory ranges */
7882	i = 0;
7883	lo = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
7884	if (lo & F_EDRAM0_ENABLE) {
7885		hi = t4_read_reg(sc, A_MA_EDRAM0_BAR);
7886		avail[i].base = G_EDRAM0_BASE(hi) << 20;
7887		avail[i].limit = avail[i].base + (G_EDRAM0_SIZE(hi) << 20);
7888		avail[i].idx = 0;
7889		i++;
7890	}
7891	if (lo & F_EDRAM1_ENABLE) {
7892		hi = t4_read_reg(sc, A_MA_EDRAM1_BAR);
7893		avail[i].base = G_EDRAM1_BASE(hi) << 20;
7894		avail[i].limit = avail[i].base + (G_EDRAM1_SIZE(hi) << 20);
7895		avail[i].idx = 1;
7896		i++;
7897	}
7898	if (lo & F_EXT_MEM_ENABLE) {
7899		hi = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
7900		avail[i].base = G_EXT_MEM_BASE(hi) << 20;
7901		avail[i].limit = avail[i].base +
7902		    (G_EXT_MEM_SIZE(hi) << 20);
7903		avail[i].idx = is_t5(sc) ? 3 : 2;	/* Call it MC0 for T5 */
7904		i++;
7905	}
7906	if (is_t5(sc) && lo & F_EXT_MEM1_ENABLE) {
7907		hi = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
7908		avail[i].base = G_EXT_MEM1_BASE(hi) << 20;
7909		avail[i].limit = avail[i].base +
7910		    (G_EXT_MEM1_SIZE(hi) << 20);
7911		avail[i].idx = 4;
7912		i++;
7913	}
7914	if (!i)                                    /* no memory available */
7915		return 0;
7916	qsort(avail, i, sizeof(struct mem_desc), mem_desc_cmp);
7917
7918	(md++)->base = t4_read_reg(sc, A_SGE_DBQ_CTXT_BADDR);
7919	(md++)->base = t4_read_reg(sc, A_SGE_IMSG_CTXT_BADDR);
7920	(md++)->base = t4_read_reg(sc, A_SGE_FLM_CACHE_BADDR);
7921	(md++)->base = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
7922	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_BASE);
7923	(md++)->base = t4_read_reg(sc, A_TP_CMM_TIMER_BASE);
7924	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_RX_FLST_BASE);
7925	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_TX_FLST_BASE);
7926	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_PS_FLST_BASE);
7927
7928	/* the next few have explicit upper bounds */
7929	md->base = t4_read_reg(sc, A_TP_PMM_TX_BASE);
7930	md->limit = md->base - 1 +
7931		    t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE) *
7932		    G_PMTXMAXPAGE(t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE));
7933	md++;
7934
7935	md->base = t4_read_reg(sc, A_TP_PMM_RX_BASE);
7936	md->limit = md->base - 1 +
7937		    t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) *
7938		    G_PMRXMAXPAGE(t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE));
7939	md++;
7940
7941	if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
7942		if (chip_id(sc) <= CHELSIO_T5)
7943			md->base = t4_read_reg(sc, A_LE_DB_HASH_TID_BASE);
7944		else
7945			md->base = t4_read_reg(sc, A_LE_DB_HASH_TBL_BASE_ADDR);
7946		md->limit = 0;
7947	} else {
7948		md->base = 0;
7949		md->idx = nitems(region);  /* hide it */
7950	}
7951	md++;
7952
7953#define ulp_region(reg) \
7954	md->base = t4_read_reg(sc, A_ULP_ ## reg ## _LLIMIT);\
7955	(md++)->limit = t4_read_reg(sc, A_ULP_ ## reg ## _ULIMIT)
7956
7957	ulp_region(RX_ISCSI);
7958	ulp_region(RX_TDDP);
7959	ulp_region(TX_TPT);
7960	ulp_region(RX_STAG);
7961	ulp_region(RX_RQ);
7962	ulp_region(RX_RQUDP);
7963	ulp_region(RX_PBL);
7964	ulp_region(TX_PBL);
7965#undef ulp_region
7966
7967	md->base = 0;
7968	md->idx = nitems(region);
7969	if (!is_t4(sc)) {
7970		uint32_t size = 0;
7971		uint32_t sge_ctrl = t4_read_reg(sc, A_SGE_CONTROL2);
7972		uint32_t fifo_size = t4_read_reg(sc, A_SGE_DBVFIFO_SIZE);
7973
7974		if (is_t5(sc)) {
7975			if (sge_ctrl & F_VFIFO_ENABLE)
7976				size = G_DBVFIFO_SIZE(fifo_size);
7977		} else
7978			size = G_T6_DBVFIFO_SIZE(fifo_size);
7979
7980		if (size) {
7981			md->base = G_BASEADDR(t4_read_reg(sc,
7982			    A_SGE_DBVFIFO_BADDR));
7983			md->limit = md->base + (size << 2) - 1;
7984		}
7985	}
7986	md++;
7987
7988	md->base = t4_read_reg(sc, A_ULP_RX_CTX_BASE);
7989	md->limit = 0;
7990	md++;
7991	md->base = t4_read_reg(sc, A_ULP_TX_ERR_TABLE_BASE);
7992	md->limit = 0;
7993	md++;
7994
7995	md->base = sc->vres.ocq.start;
7996	if (sc->vres.ocq.size)
7997		md->limit = md->base + sc->vres.ocq.size - 1;
7998	else
7999		md->idx = nitems(region);  /* hide it */
8000	md++;
8001
8002	md->base = sc->vres.key.start;
8003	if (sc->vres.key.size)
8004		md->limit = md->base + sc->vres.key.size - 1;
8005	else
8006		md->idx = nitems(region);  /* hide it */
8007	md++;
8008
8009	/* add any address-space holes, there can be up to 3 */
8010	for (n = 0; n < i - 1; n++)
8011		if (avail[n].limit < avail[n + 1].base)
8012			(md++)->base = avail[n].limit;
8013	if (avail[n].limit)
8014		(md++)->base = avail[n].limit;
8015
8016	n = md - mem;
8017	qsort(mem, n, sizeof(struct mem_desc), mem_desc_cmp);
8018
8019	for (lo = 0; lo < i; lo++)
8020		mem_region_show(sb, memory[avail[lo].idx], avail[lo].base,
8021				avail[lo].limit - 1);
8022
8023	sbuf_printf(sb, "\n");
8024	for (i = 0; i < n; i++) {
8025		if (mem[i].idx >= nitems(region))
8026			continue;                        /* skip holes */
8027		if (!mem[i].limit)
8028			mem[i].limit = i < n - 1 ? mem[i + 1].base - 1 : ~0;
8029		mem_region_show(sb, region[mem[i].idx], mem[i].base,
8030				mem[i].limit);
8031	}
8032
8033	sbuf_printf(sb, "\n");
8034	lo = t4_read_reg(sc, A_CIM_SDRAM_BASE_ADDR);
8035	hi = t4_read_reg(sc, A_CIM_SDRAM_ADDR_SIZE) + lo - 1;
8036	mem_region_show(sb, "uP RAM:", lo, hi);
8037
8038	lo = t4_read_reg(sc, A_CIM_EXTMEM2_BASE_ADDR);
8039	hi = t4_read_reg(sc, A_CIM_EXTMEM2_ADDR_SIZE) + lo - 1;
8040	mem_region_show(sb, "uP Extmem2:", lo, hi);
8041
8042	lo = t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE);
8043	sbuf_printf(sb, "\n%u Rx pages of size %uKiB for %u channels\n",
8044		   G_PMRXMAXPAGE(lo),
8045		   t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) >> 10,
8046		   (lo & F_PMRXNUMCHN) ? 2 : 1);
8047
8048	lo = t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE);
8049	hi = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE);
8050	sbuf_printf(sb, "%u Tx pages of size %u%ciB for %u channels\n",
8051		   G_PMTXMAXPAGE(lo),
8052		   hi >= (1 << 20) ? (hi >> 20) : (hi >> 10),
8053		   hi >= (1 << 20) ? 'M' : 'K', 1 << G_PMTXNUMCHN(lo));
8054	sbuf_printf(sb, "%u p-structs\n",
8055		   t4_read_reg(sc, A_TP_CMM_MM_MAX_PSTRUCT));
8056
8057	for (i = 0; i < 4; i++) {
8058		if (chip_id(sc) > CHELSIO_T5)
8059			lo = t4_read_reg(sc, A_MPS_RX_MAC_BG_PG_CNT0 + i * 4);
8060		else
8061			lo = t4_read_reg(sc, A_MPS_RX_PG_RSV0 + i * 4);
8062		if (is_t5(sc)) {
8063			used = G_T5_USED(lo);
8064			alloc = G_T5_ALLOC(lo);
8065		} else {
8066			used = G_USED(lo);
8067			alloc = G_ALLOC(lo);
8068		}
8069		/* For T6 these are MAC buffer groups */
8070		sbuf_printf(sb, "\nPort %d using %u pages out of %u allocated",
8071		    i, used, alloc);
8072	}
8073	for (i = 0; i < sc->chip_params->nchan; i++) {
8074		if (chip_id(sc) > CHELSIO_T5)
8075			lo = t4_read_reg(sc, A_MPS_RX_LPBK_BG_PG_CNT0 + i * 4);
8076		else
8077			lo = t4_read_reg(sc, A_MPS_RX_PG_RSV4 + i * 4);
8078		if (is_t5(sc)) {
8079			used = G_T5_USED(lo);
8080			alloc = G_T5_ALLOC(lo);
8081		} else {
8082			used = G_USED(lo);
8083			alloc = G_ALLOC(lo);
8084		}
8085		/* For T6 these are MAC buffer groups */
8086		sbuf_printf(sb,
8087		    "\nLoopback %d using %u pages out of %u allocated",
8088		    i, used, alloc);
8089	}
8090
8091	rc = sbuf_finish(sb);
8092	sbuf_delete(sb);
8093
8094	return (rc);
8095}
8096
8097static inline void
8098tcamxy2valmask(uint64_t x, uint64_t y, uint8_t *addr, uint64_t *mask)
8099{
8100	*mask = x | y;
8101	y = htobe64(y);
8102	memcpy(addr, (char *)&y + 2, ETHER_ADDR_LEN);
8103}
8104
8105static int
8106sysctl_mps_tcam(SYSCTL_HANDLER_ARGS)
8107{
8108	struct adapter *sc = arg1;
8109	struct sbuf *sb;
8110	int rc, i;
8111
8112	MPASS(chip_id(sc) <= CHELSIO_T5);
8113
8114	rc = sysctl_wire_old_buffer(req, 0);
8115	if (rc != 0)
8116		return (rc);
8117
8118	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8119	if (sb == NULL)
8120		return (ENOMEM);
8121
8122	sbuf_printf(sb,
8123	    "Idx  Ethernet address     Mask     Vld Ports PF"
8124	    "  VF              Replication             P0 P1 P2 P3  ML");
8125	for (i = 0; i < sc->chip_params->mps_tcam_size; i++) {
8126		uint64_t tcamx, tcamy, mask;
8127		uint32_t cls_lo, cls_hi;
8128		uint8_t addr[ETHER_ADDR_LEN];
8129
8130		tcamy = t4_read_reg64(sc, MPS_CLS_TCAM_Y_L(i));
8131		tcamx = t4_read_reg64(sc, MPS_CLS_TCAM_X_L(i));
8132		if (tcamx & tcamy)
8133			continue;
8134		tcamxy2valmask(tcamx, tcamy, addr, &mask);
8135		cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i));
8136		cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i));
8137		sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x %012jx"
8138			   "  %c   %#x%4u%4d", i, addr[0], addr[1], addr[2],
8139			   addr[3], addr[4], addr[5], (uintmax_t)mask,
8140			   (cls_lo & F_SRAM_VLD) ? 'Y' : 'N',
8141			   G_PORTMAP(cls_hi), G_PF(cls_lo),
8142			   (cls_lo & F_VF_VALID) ? G_VF(cls_lo) : -1);
8143
8144		if (cls_lo & F_REPLICATE) {
8145			struct fw_ldst_cmd ldst_cmd;
8146
8147			memset(&ldst_cmd, 0, sizeof(ldst_cmd));
8148			ldst_cmd.op_to_addrspace =
8149			    htobe32(V_FW_CMD_OP(FW_LDST_CMD) |
8150				F_FW_CMD_REQUEST | F_FW_CMD_READ |
8151				V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS));
8152			ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd));
8153			ldst_cmd.u.mps.rplc.fid_idx =
8154			    htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) |
8155				V_FW_LDST_CMD_IDX(i));
8156
8157			rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
8158			    "t4mps");
8159			if (rc)
8160				break;
8161			rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd,
8162			    sizeof(ldst_cmd), &ldst_cmd);
8163			end_synchronized_op(sc, 0);
8164
8165			if (rc != 0) {
8166				sbuf_printf(sb, "%36d", rc);
8167				rc = 0;
8168			} else {
8169				sbuf_printf(sb, " %08x %08x %08x %08x",
8170				    be32toh(ldst_cmd.u.mps.rplc.rplc127_96),
8171				    be32toh(ldst_cmd.u.mps.rplc.rplc95_64),
8172				    be32toh(ldst_cmd.u.mps.rplc.rplc63_32),
8173				    be32toh(ldst_cmd.u.mps.rplc.rplc31_0));
8174			}
8175		} else
8176			sbuf_printf(sb, "%36s", "");
8177
8178		sbuf_printf(sb, "%4u%3u%3u%3u %#3x", G_SRAM_PRIO0(cls_lo),
8179		    G_SRAM_PRIO1(cls_lo), G_SRAM_PRIO2(cls_lo),
8180		    G_SRAM_PRIO3(cls_lo), (cls_lo >> S_MULTILISTEN0) & 0xf);
8181	}
8182
8183	if (rc)
8184		(void) sbuf_finish(sb);
8185	else
8186		rc = sbuf_finish(sb);
8187	sbuf_delete(sb);
8188
8189	return (rc);
8190}
8191
8192static int
8193sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS)
8194{
8195	struct adapter *sc = arg1;
8196	struct sbuf *sb;
8197	int rc, i;
8198
8199	MPASS(chip_id(sc) > CHELSIO_T5);
8200
8201	rc = sysctl_wire_old_buffer(req, 0);
8202	if (rc != 0)
8203		return (rc);
8204
8205	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8206	if (sb == NULL)
8207		return (ENOMEM);
8208
8209	sbuf_printf(sb, "Idx  Ethernet address     Mask       VNI   Mask"
8210	    "   IVLAN Vld DIP_Hit   Lookup  Port Vld Ports PF  VF"
8211	    "                           Replication"
8212	    "                                    P0 P1 P2 P3  ML\n");
8213
8214	for (i = 0; i < sc->chip_params->mps_tcam_size; i++) {
8215		uint8_t dip_hit, vlan_vld, lookup_type, port_num;
8216		uint16_t ivlan;
8217		uint64_t tcamx, tcamy, val, mask;
8218		uint32_t cls_lo, cls_hi, ctl, data2, vnix, vniy;
8219		uint8_t addr[ETHER_ADDR_LEN];
8220
8221		ctl = V_CTLREQID(1) | V_CTLCMDTYPE(0) | V_CTLXYBITSEL(0);
8222		if (i < 256)
8223			ctl |= V_CTLTCAMINDEX(i) | V_CTLTCAMSEL(0);
8224		else
8225			ctl |= V_CTLTCAMINDEX(i - 256) | V_CTLTCAMSEL(1);
8226		t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl);
8227		val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1);
8228		tcamy = G_DMACH(val) << 32;
8229		tcamy |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1);
8230		data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1);
8231		lookup_type = G_DATALKPTYPE(data2);
8232		port_num = G_DATAPORTNUM(data2);
8233		if (lookup_type && lookup_type != M_DATALKPTYPE) {
8234			/* Inner header VNI */
8235			vniy = ((data2 & F_DATAVIDH2) << 23) |
8236				       (G_DATAVIDH1(data2) << 16) | G_VIDL(val);
8237			dip_hit = data2 & F_DATADIPHIT;
8238			vlan_vld = 0;
8239		} else {
8240			vniy = 0;
8241			dip_hit = 0;
8242			vlan_vld = data2 & F_DATAVIDH2;
8243			ivlan = G_VIDL(val);
8244		}
8245
8246		ctl |= V_CTLXYBITSEL(1);
8247		t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl);
8248		val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1);
8249		tcamx = G_DMACH(val) << 32;
8250		tcamx |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1);
8251		data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1);
8252		if (lookup_type && lookup_type != M_DATALKPTYPE) {
8253			/* Inner header VNI mask */
8254			vnix = ((data2 & F_DATAVIDH2) << 23) |
8255			       (G_DATAVIDH1(data2) << 16) | G_VIDL(val);
8256		} else
8257			vnix = 0;
8258
8259		if (tcamx & tcamy)
8260			continue;
8261		tcamxy2valmask(tcamx, tcamy, addr, &mask);
8262
8263		cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i));
8264		cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i));
8265
8266		if (lookup_type && lookup_type != M_DATALKPTYPE) {
8267			sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x "
8268			    "%012jx %06x %06x    -    -   %3c"
8269			    "      'I'  %4x   %3c   %#x%4u%4d", i, addr[0],
8270			    addr[1], addr[2], addr[3], addr[4], addr[5],
8271			    (uintmax_t)mask, vniy, vnix, dip_hit ? 'Y' : 'N',
8272			    port_num, cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N',
8273			    G_PORTMAP(cls_hi), G_T6_PF(cls_lo),
8274			    cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1);
8275		} else {
8276			sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x "
8277			    "%012jx    -       -   ", i, addr[0], addr[1],
8278			    addr[2], addr[3], addr[4], addr[5],
8279			    (uintmax_t)mask);
8280
8281			if (vlan_vld)
8282				sbuf_printf(sb, "%4u   Y     ", ivlan);
8283			else
8284				sbuf_printf(sb, "  -    N     ");
8285
8286			sbuf_printf(sb, "-      %3c  %4x   %3c   %#x%4u%4d",
8287			    lookup_type ? 'I' : 'O', port_num,
8288			    cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N',
8289			    G_PORTMAP(cls_hi), G_T6_PF(cls_lo),
8290			    cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1);
8291		}
8292
8293
8294		if (cls_lo & F_T6_REPLICATE) {
8295			struct fw_ldst_cmd ldst_cmd;
8296
8297			memset(&ldst_cmd, 0, sizeof(ldst_cmd));
8298			ldst_cmd.op_to_addrspace =
8299			    htobe32(V_FW_CMD_OP(FW_LDST_CMD) |
8300				F_FW_CMD_REQUEST | F_FW_CMD_READ |
8301				V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS));
8302			ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd));
8303			ldst_cmd.u.mps.rplc.fid_idx =
8304			    htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) |
8305				V_FW_LDST_CMD_IDX(i));
8306
8307			rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
8308			    "t6mps");
8309			if (rc)
8310				break;
8311			rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd,
8312			    sizeof(ldst_cmd), &ldst_cmd);
8313			end_synchronized_op(sc, 0);
8314
8315			if (rc != 0) {
8316				sbuf_printf(sb, "%72d", rc);
8317				rc = 0;
8318			} else {
8319				sbuf_printf(sb, " %08x %08x %08x %08x"
8320				    " %08x %08x %08x %08x",
8321				    be32toh(ldst_cmd.u.mps.rplc.rplc255_224),
8322				    be32toh(ldst_cmd.u.mps.rplc.rplc223_192),
8323				    be32toh(ldst_cmd.u.mps.rplc.rplc191_160),
8324				    be32toh(ldst_cmd.u.mps.rplc.rplc159_128),
8325				    be32toh(ldst_cmd.u.mps.rplc.rplc127_96),
8326				    be32toh(ldst_cmd.u.mps.rplc.rplc95_64),
8327				    be32toh(ldst_cmd.u.mps.rplc.rplc63_32),
8328				    be32toh(ldst_cmd.u.mps.rplc.rplc31_0));
8329			}
8330		} else
8331			sbuf_printf(sb, "%72s", "");
8332
8333		sbuf_printf(sb, "%4u%3u%3u%3u %#x",
8334		    G_T6_SRAM_PRIO0(cls_lo), G_T6_SRAM_PRIO1(cls_lo),
8335		    G_T6_SRAM_PRIO2(cls_lo), G_T6_SRAM_PRIO3(cls_lo),
8336		    (cls_lo >> S_T6_MULTILISTEN0) & 0xf);
8337	}
8338
8339	if (rc)
8340		(void) sbuf_finish(sb);
8341	else
8342		rc = sbuf_finish(sb);
8343	sbuf_delete(sb);
8344
8345	return (rc);
8346}
8347
8348static int
8349sysctl_path_mtus(SYSCTL_HANDLER_ARGS)
8350{
8351	struct adapter *sc = arg1;
8352	struct sbuf *sb;
8353	int rc;
8354	uint16_t mtus[NMTUS];
8355
8356	rc = sysctl_wire_old_buffer(req, 0);
8357	if (rc != 0)
8358		return (rc);
8359
8360	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8361	if (sb == NULL)
8362		return (ENOMEM);
8363
8364	t4_read_mtu_tbl(sc, mtus, NULL);
8365
8366	sbuf_printf(sb, "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u",
8367	    mtus[0], mtus[1], mtus[2], mtus[3], mtus[4], mtus[5], mtus[6],
8368	    mtus[7], mtus[8], mtus[9], mtus[10], mtus[11], mtus[12], mtus[13],
8369	    mtus[14], mtus[15]);
8370
8371	rc = sbuf_finish(sb);
8372	sbuf_delete(sb);
8373
8374	return (rc);
8375}
8376
8377static int
8378sysctl_pm_stats(SYSCTL_HANDLER_ARGS)
8379{
8380	struct adapter *sc = arg1;
8381	struct sbuf *sb;
8382	int rc, i;
8383	uint32_t tx_cnt[MAX_PM_NSTATS], rx_cnt[MAX_PM_NSTATS];
8384	uint64_t tx_cyc[MAX_PM_NSTATS], rx_cyc[MAX_PM_NSTATS];
8385	static const char *tx_stats[MAX_PM_NSTATS] = {
8386		"Read:", "Write bypass:", "Write mem:", "Bypass + mem:",
8387		"Tx FIFO wait", NULL, "Tx latency"
8388	};
8389	static const char *rx_stats[MAX_PM_NSTATS] = {
8390		"Read:", "Write bypass:", "Write mem:", "Flush:",
8391		"Rx FIFO wait", NULL, "Rx latency"
8392	};
8393
8394	rc = sysctl_wire_old_buffer(req, 0);
8395	if (rc != 0)
8396		return (rc);
8397
8398	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8399	if (sb == NULL)
8400		return (ENOMEM);
8401
8402	t4_pmtx_get_stats(sc, tx_cnt, tx_cyc);
8403	t4_pmrx_get_stats(sc, rx_cnt, rx_cyc);
8404
8405	sbuf_printf(sb, "                Tx pcmds             Tx bytes");
8406	for (i = 0; i < 4; i++) {
8407		sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
8408		    tx_cyc[i]);
8409	}
8410
8411	sbuf_printf(sb, "\n                Rx pcmds             Rx bytes");
8412	for (i = 0; i < 4; i++) {
8413		sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
8414		    rx_cyc[i]);
8415	}
8416
8417	if (chip_id(sc) > CHELSIO_T5) {
8418		sbuf_printf(sb,
8419		    "\n              Total wait      Total occupancy");
8420		sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
8421		    tx_cyc[i]);
8422		sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
8423		    rx_cyc[i]);
8424
8425		i += 2;
8426		MPASS(i < nitems(tx_stats));
8427
8428		sbuf_printf(sb,
8429		    "\n                   Reads           Total wait");
8430		sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
8431		    tx_cyc[i]);
8432		sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
8433		    rx_cyc[i]);
8434	}
8435
8436	rc = sbuf_finish(sb);
8437	sbuf_delete(sb);
8438
8439	return (rc);
8440}
8441
8442static int
8443sysctl_rdma_stats(SYSCTL_HANDLER_ARGS)
8444{
8445	struct adapter *sc = arg1;
8446	struct sbuf *sb;
8447	int rc;
8448	struct tp_rdma_stats stats;
8449
8450	rc = sysctl_wire_old_buffer(req, 0);
8451	if (rc != 0)
8452		return (rc);
8453
8454	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8455	if (sb == NULL)
8456		return (ENOMEM);
8457
8458	mtx_lock(&sc->reg_lock);
8459	t4_tp_get_rdma_stats(sc, &stats, 0);
8460	mtx_unlock(&sc->reg_lock);
8461
8462	sbuf_printf(sb, "NoRQEModDefferals: %u\n", stats.rqe_dfr_mod);
8463	sbuf_printf(sb, "NoRQEPktDefferals: %u", stats.rqe_dfr_pkt);
8464
8465	rc = sbuf_finish(sb);
8466	sbuf_delete(sb);
8467
8468	return (rc);
8469}
8470
8471static int
8472sysctl_tcp_stats(SYSCTL_HANDLER_ARGS)
8473{
8474	struct adapter *sc = arg1;
8475	struct sbuf *sb;
8476	int rc;
8477	struct tp_tcp_stats v4, v6;
8478
8479	rc = sysctl_wire_old_buffer(req, 0);
8480	if (rc != 0)
8481		return (rc);
8482
8483	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8484	if (sb == NULL)
8485		return (ENOMEM);
8486
8487	mtx_lock(&sc->reg_lock);
8488	t4_tp_get_tcp_stats(sc, &v4, &v6, 0);
8489	mtx_unlock(&sc->reg_lock);
8490
8491	sbuf_printf(sb,
8492	    "                                IP                 IPv6\n");
8493	sbuf_printf(sb, "OutRsts:      %20u %20u\n",
8494	    v4.tcp_out_rsts, v6.tcp_out_rsts);
8495	sbuf_printf(sb, "InSegs:       %20ju %20ju\n",
8496	    v4.tcp_in_segs, v6.tcp_in_segs);
8497	sbuf_printf(sb, "OutSegs:      %20ju %20ju\n",
8498	    v4.tcp_out_segs, v6.tcp_out_segs);
8499	sbuf_printf(sb, "RetransSegs:  %20ju %20ju",
8500	    v4.tcp_retrans_segs, v6.tcp_retrans_segs);
8501
8502	rc = sbuf_finish(sb);
8503	sbuf_delete(sb);
8504
8505	return (rc);
8506}
8507
8508static int
8509sysctl_tids(SYSCTL_HANDLER_ARGS)
8510{
8511	struct adapter *sc = arg1;
8512	struct sbuf *sb;
8513	int rc;
8514	struct tid_info *t = &sc->tids;
8515
8516	rc = sysctl_wire_old_buffer(req, 0);
8517	if (rc != 0)
8518		return (rc);
8519
8520	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8521	if (sb == NULL)
8522		return (ENOMEM);
8523
8524	if (t->natids) {
8525		sbuf_printf(sb, "ATID range: 0-%u, in use: %u\n", t->natids - 1,
8526		    t->atids_in_use);
8527	}
8528
8529	if (t->nhpftids) {
8530		sbuf_printf(sb, "HPFTID range: %u-%u, in use: %u\n",
8531		    t->hpftid_base, t->hpftid_end, t->hpftids_in_use);
8532	}
8533
8534	if (t->ntids) {
8535		sbuf_printf(sb, "TID range: ");
8536		if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
8537			uint32_t b, hb;
8538
8539			if (chip_id(sc) <= CHELSIO_T5) {
8540				b = t4_read_reg(sc, A_LE_DB_SERVER_INDEX) / 4;
8541				hb = t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4;
8542			} else {
8543				b = t4_read_reg(sc, A_LE_DB_SRVR_START_INDEX);
8544				hb = t4_read_reg(sc, A_T6_LE_DB_HASH_TID_BASE);
8545			}
8546
8547			if (b)
8548				sbuf_printf(sb, "%u-%u, ", t->tid_base, b - 1);
8549			sbuf_printf(sb, "%u-%u", hb, t->ntids - 1);
8550		} else
8551			sbuf_printf(sb, "%u-%u", t->tid_base, t->ntids - 1);
8552		sbuf_printf(sb, ", in use: %u\n",
8553		    atomic_load_acq_int(&t->tids_in_use));
8554	}
8555
8556	if (t->nstids) {
8557		sbuf_printf(sb, "STID range: %u-%u, in use: %u\n", t->stid_base,
8558		    t->stid_base + t->nstids - 1, t->stids_in_use);
8559	}
8560
8561	if (t->nftids) {
8562		sbuf_printf(sb, "FTID range: %u-%u, in use: %u\n", t->ftid_base,
8563		    t->ftid_end, t->ftids_in_use);
8564	}
8565
8566	if (t->netids) {
8567		sbuf_printf(sb, "ETID range: %u-%u\n", t->etid_base,
8568		    t->etid_base + t->netids - 1);
8569	}
8570
8571	sbuf_printf(sb, "HW TID usage: %u IP users, %u IPv6 users",
8572	    t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV4),
8573	    t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV6));
8574
8575	rc = sbuf_finish(sb);
8576	sbuf_delete(sb);
8577
8578	return (rc);
8579}
8580
8581static int
8582sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS)
8583{
8584	struct adapter *sc = arg1;
8585	struct sbuf *sb;
8586	int rc;
8587	struct tp_err_stats stats;
8588
8589	rc = sysctl_wire_old_buffer(req, 0);
8590	if (rc != 0)
8591		return (rc);
8592
8593	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8594	if (sb == NULL)
8595		return (ENOMEM);
8596
8597	mtx_lock(&sc->reg_lock);
8598	t4_tp_get_err_stats(sc, &stats, 0);
8599	mtx_unlock(&sc->reg_lock);
8600
8601	if (sc->chip_params->nchan > 2) {
8602		sbuf_printf(sb, "                 channel 0  channel 1"
8603		    "  channel 2  channel 3\n");
8604		sbuf_printf(sb, "macInErrs:      %10u %10u %10u %10u\n",
8605		    stats.mac_in_errs[0], stats.mac_in_errs[1],
8606		    stats.mac_in_errs[2], stats.mac_in_errs[3]);
8607		sbuf_printf(sb, "hdrInErrs:      %10u %10u %10u %10u\n",
8608		    stats.hdr_in_errs[0], stats.hdr_in_errs[1],
8609		    stats.hdr_in_errs[2], stats.hdr_in_errs[3]);
8610		sbuf_printf(sb, "tcpInErrs:      %10u %10u %10u %10u\n",
8611		    stats.tcp_in_errs[0], stats.tcp_in_errs[1],
8612		    stats.tcp_in_errs[2], stats.tcp_in_errs[3]);
8613		sbuf_printf(sb, "tcp6InErrs:     %10u %10u %10u %10u\n",
8614		    stats.tcp6_in_errs[0], stats.tcp6_in_errs[1],
8615		    stats.tcp6_in_errs[2], stats.tcp6_in_errs[3]);
8616		sbuf_printf(sb, "tnlCongDrops:   %10u %10u %10u %10u\n",
8617		    stats.tnl_cong_drops[0], stats.tnl_cong_drops[1],
8618		    stats.tnl_cong_drops[2], stats.tnl_cong_drops[3]);
8619		sbuf_printf(sb, "tnlTxDrops:     %10u %10u %10u %10u\n",
8620		    stats.tnl_tx_drops[0], stats.tnl_tx_drops[1],
8621		    stats.tnl_tx_drops[2], stats.tnl_tx_drops[3]);
8622		sbuf_printf(sb, "ofldVlanDrops:  %10u %10u %10u %10u\n",
8623		    stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1],
8624		    stats.ofld_vlan_drops[2], stats.ofld_vlan_drops[3]);
8625		sbuf_printf(sb, "ofldChanDrops:  %10u %10u %10u %10u\n\n",
8626		    stats.ofld_chan_drops[0], stats.ofld_chan_drops[1],
8627		    stats.ofld_chan_drops[2], stats.ofld_chan_drops[3]);
8628	} else {
8629		sbuf_printf(sb, "                 channel 0  channel 1\n");
8630		sbuf_printf(sb, "macInErrs:      %10u %10u\n",
8631		    stats.mac_in_errs[0], stats.mac_in_errs[1]);
8632		sbuf_printf(sb, "hdrInErrs:      %10u %10u\n",
8633		    stats.hdr_in_errs[0], stats.hdr_in_errs[1]);
8634		sbuf_printf(sb, "tcpInErrs:      %10u %10u\n",
8635		    stats.tcp_in_errs[0], stats.tcp_in_errs[1]);
8636		sbuf_printf(sb, "tcp6InErrs:     %10u %10u\n",
8637		    stats.tcp6_in_errs[0], stats.tcp6_in_errs[1]);
8638		sbuf_printf(sb, "tnlCongDrops:   %10u %10u\n",
8639		    stats.tnl_cong_drops[0], stats.tnl_cong_drops[1]);
8640		sbuf_printf(sb, "tnlTxDrops:     %10u %10u\n",
8641		    stats.tnl_tx_drops[0], stats.tnl_tx_drops[1]);
8642		sbuf_printf(sb, "ofldVlanDrops:  %10u %10u\n",
8643		    stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1]);
8644		sbuf_printf(sb, "ofldChanDrops:  %10u %10u\n\n",
8645		    stats.ofld_chan_drops[0], stats.ofld_chan_drops[1]);
8646	}
8647
8648	sbuf_printf(sb, "ofldNoNeigh:    %u\nofldCongDefer:  %u",
8649	    stats.ofld_no_neigh, stats.ofld_cong_defer);
8650
8651	rc = sbuf_finish(sb);
8652	sbuf_delete(sb);
8653
8654	return (rc);
8655}
8656
8657static int
8658sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS)
8659{
8660	struct adapter *sc = arg1;
8661	struct tp_params *tpp = &sc->params.tp;
8662	u_int mask;
8663	int rc;
8664
8665	mask = tpp->la_mask >> 16;
8666	rc = sysctl_handle_int(oidp, &mask, 0, req);
8667	if (rc != 0 || req->newptr == NULL)
8668		return (rc);
8669	if (mask > 0xffff)
8670		return (EINVAL);
8671	tpp->la_mask = mask << 16;
8672	t4_set_reg_field(sc, A_TP_DBG_LA_CONFIG, 0xffff0000U, tpp->la_mask);
8673
8674	return (0);
8675}
8676
8677struct field_desc {
8678	const char *name;
8679	u_int start;
8680	u_int width;
8681};
8682
8683static void
8684field_desc_show(struct sbuf *sb, uint64_t v, const struct field_desc *f)
8685{
8686	char buf[32];
8687	int line_size = 0;
8688
8689	while (f->name) {
8690		uint64_t mask = (1ULL << f->width) - 1;
8691		int len = snprintf(buf, sizeof(buf), "%s: %ju", f->name,
8692		    ((uintmax_t)v >> f->start) & mask);
8693
8694		if (line_size + len >= 79) {
8695			line_size = 8;
8696			sbuf_printf(sb, "\n        ");
8697		}
8698		sbuf_printf(sb, "%s ", buf);
8699		line_size += len + 1;
8700		f++;
8701	}
8702	sbuf_printf(sb, "\n");
8703}
8704
8705static const struct field_desc tp_la0[] = {
8706	{ "RcfOpCodeOut", 60, 4 },
8707	{ "State", 56, 4 },
8708	{ "WcfState", 52, 4 },
8709	{ "RcfOpcSrcOut", 50, 2 },
8710	{ "CRxError", 49, 1 },
8711	{ "ERxError", 48, 1 },
8712	{ "SanityFailed", 47, 1 },
8713	{ "SpuriousMsg", 46, 1 },
8714	{ "FlushInputMsg", 45, 1 },
8715	{ "FlushInputCpl", 44, 1 },
8716	{ "RssUpBit", 43, 1 },
8717	{ "RssFilterHit", 42, 1 },
8718	{ "Tid", 32, 10 },
8719	{ "InitTcb", 31, 1 },
8720	{ "LineNumber", 24, 7 },
8721	{ "Emsg", 23, 1 },
8722	{ "EdataOut", 22, 1 },
8723	{ "Cmsg", 21, 1 },
8724	{ "CdataOut", 20, 1 },
8725	{ "EreadPdu", 19, 1 },
8726	{ "CreadPdu", 18, 1 },
8727	{ "TunnelPkt", 17, 1 },
8728	{ "RcfPeerFin", 16, 1 },
8729	{ "RcfReasonOut", 12, 4 },
8730	{ "TxCchannel", 10, 2 },
8731	{ "RcfTxChannel", 8, 2 },
8732	{ "RxEchannel", 6, 2 },
8733	{ "RcfRxChannel", 5, 1 },
8734	{ "RcfDataOutSrdy", 4, 1 },
8735	{ "RxDvld", 3, 1 },
8736	{ "RxOoDvld", 2, 1 },
8737	{ "RxCongestion", 1, 1 },
8738	{ "TxCongestion", 0, 1 },
8739	{ NULL }
8740};
8741
8742static const struct field_desc tp_la1[] = {
8743	{ "CplCmdIn", 56, 8 },
8744	{ "CplCmdOut", 48, 8 },
8745	{ "ESynOut", 47, 1 },
8746	{ "EAckOut", 46, 1 },
8747	{ "EFinOut", 45, 1 },
8748	{ "ERstOut", 44, 1 },
8749	{ "SynIn", 43, 1 },
8750	{ "AckIn", 42, 1 },
8751	{ "FinIn", 41, 1 },
8752	{ "RstIn", 40, 1 },
8753	{ "DataIn", 39, 1 },
8754	{ "DataInVld", 38, 1 },
8755	{ "PadIn", 37, 1 },
8756	{ "RxBufEmpty", 36, 1 },
8757	{ "RxDdp", 35, 1 },
8758	{ "RxFbCongestion", 34, 1 },
8759	{ "TxFbCongestion", 33, 1 },
8760	{ "TxPktSumSrdy", 32, 1 },
8761	{ "RcfUlpType", 28, 4 },
8762	{ "Eread", 27, 1 },
8763	{ "Ebypass", 26, 1 },
8764	{ "Esave", 25, 1 },
8765	{ "Static0", 24, 1 },
8766	{ "Cread", 23, 1 },
8767	{ "Cbypass", 22, 1 },
8768	{ "Csave", 21, 1 },
8769	{ "CPktOut", 20, 1 },
8770	{ "RxPagePoolFull", 18, 2 },
8771	{ "RxLpbkPkt", 17, 1 },
8772	{ "TxLpbkPkt", 16, 1 },
8773	{ "RxVfValid", 15, 1 },
8774	{ "SynLearned", 14, 1 },
8775	{ "SetDelEntry", 13, 1 },
8776	{ "SetInvEntry", 12, 1 },
8777	{ "CpcmdDvld", 11, 1 },
8778	{ "CpcmdSave", 10, 1 },
8779	{ "RxPstructsFull", 8, 2 },
8780	{ "EpcmdDvld", 7, 1 },
8781	{ "EpcmdFlush", 6, 1 },
8782	{ "EpcmdTrimPrefix", 5, 1 },
8783	{ "EpcmdTrimPostfix", 4, 1 },
8784	{ "ERssIp4Pkt", 3, 1 },
8785	{ "ERssIp6Pkt", 2, 1 },
8786	{ "ERssTcpUdpPkt", 1, 1 },
8787	{ "ERssFceFipPkt", 0, 1 },
8788	{ NULL }
8789};
8790
8791static const struct field_desc tp_la2[] = {
8792	{ "CplCmdIn", 56, 8 },
8793	{ "MpsVfVld", 55, 1 },
8794	{ "MpsPf", 52, 3 },
8795	{ "MpsVf", 44, 8 },
8796	{ "SynIn", 43, 1 },
8797	{ "AckIn", 42, 1 },
8798	{ "FinIn", 41, 1 },
8799	{ "RstIn", 40, 1 },
8800	{ "DataIn", 39, 1 },
8801	{ "DataInVld", 38, 1 },
8802	{ "PadIn", 37, 1 },
8803	{ "RxBufEmpty", 36, 1 },
8804	{ "RxDdp", 35, 1 },
8805	{ "RxFbCongestion", 34, 1 },
8806	{ "TxFbCongestion", 33, 1 },
8807	{ "TxPktSumSrdy", 32, 1 },
8808	{ "RcfUlpType", 28, 4 },
8809	{ "Eread", 27, 1 },
8810	{ "Ebypass", 26, 1 },
8811	{ "Esave", 25, 1 },
8812	{ "Static0", 24, 1 },
8813	{ "Cread", 23, 1 },
8814	{ "Cbypass", 22, 1 },
8815	{ "Csave", 21, 1 },
8816	{ "CPktOut", 20, 1 },
8817	{ "RxPagePoolFull", 18, 2 },
8818	{ "RxLpbkPkt", 17, 1 },
8819	{ "TxLpbkPkt", 16, 1 },
8820	{ "RxVfValid", 15, 1 },
8821	{ "SynLearned", 14, 1 },
8822	{ "SetDelEntry", 13, 1 },
8823	{ "SetInvEntry", 12, 1 },
8824	{ "CpcmdDvld", 11, 1 },
8825	{ "CpcmdSave", 10, 1 },
8826	{ "RxPstructsFull", 8, 2 },
8827	{ "EpcmdDvld", 7, 1 },
8828	{ "EpcmdFlush", 6, 1 },
8829	{ "EpcmdTrimPrefix", 5, 1 },
8830	{ "EpcmdTrimPostfix", 4, 1 },
8831	{ "ERssIp4Pkt", 3, 1 },
8832	{ "ERssIp6Pkt", 2, 1 },
8833	{ "ERssTcpUdpPkt", 1, 1 },
8834	{ "ERssFceFipPkt", 0, 1 },
8835	{ NULL }
8836};
8837
8838static void
8839tp_la_show(struct sbuf *sb, uint64_t *p, int idx)
8840{
8841
8842	field_desc_show(sb, *p, tp_la0);
8843}
8844
8845static void
8846tp_la_show2(struct sbuf *sb, uint64_t *p, int idx)
8847{
8848
8849	if (idx)
8850		sbuf_printf(sb, "\n");
8851	field_desc_show(sb, p[0], tp_la0);
8852	if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL)
8853		field_desc_show(sb, p[1], tp_la0);
8854}
8855
8856static void
8857tp_la_show3(struct sbuf *sb, uint64_t *p, int idx)
8858{
8859
8860	if (idx)
8861		sbuf_printf(sb, "\n");
8862	field_desc_show(sb, p[0], tp_la0);
8863	if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL)
8864		field_desc_show(sb, p[1], (p[0] & (1 << 17)) ? tp_la2 : tp_la1);
8865}
8866
8867static int
8868sysctl_tp_la(SYSCTL_HANDLER_ARGS)
8869{
8870	struct adapter *sc = arg1;
8871	struct sbuf *sb;
8872	uint64_t *buf, *p;
8873	int rc;
8874	u_int i, inc;
8875	void (*show_func)(struct sbuf *, uint64_t *, int);
8876
8877	rc = sysctl_wire_old_buffer(req, 0);
8878	if (rc != 0)
8879		return (rc);
8880
8881	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8882	if (sb == NULL)
8883		return (ENOMEM);
8884
8885	buf = malloc(TPLA_SIZE * sizeof(uint64_t), M_CXGBE, M_ZERO | M_WAITOK);
8886
8887	t4_tp_read_la(sc, buf, NULL);
8888	p = buf;
8889
8890	switch (G_DBGLAMODE(t4_read_reg(sc, A_TP_DBG_LA_CONFIG))) {
8891	case 2:
8892		inc = 2;
8893		show_func = tp_la_show2;
8894		break;
8895	case 3:
8896		inc = 2;
8897		show_func = tp_la_show3;
8898		break;
8899	default:
8900		inc = 1;
8901		show_func = tp_la_show;
8902	}
8903
8904	for (i = 0; i < TPLA_SIZE / inc; i++, p += inc)
8905		(*show_func)(sb, p, i);
8906
8907	rc = sbuf_finish(sb);
8908	sbuf_delete(sb);
8909	free(buf, M_CXGBE);
8910	return (rc);
8911}
8912
8913static int
8914sysctl_tx_rate(SYSCTL_HANDLER_ARGS)
8915{
8916	struct adapter *sc = arg1;
8917	struct sbuf *sb;
8918	int rc;
8919	u64 nrate[MAX_NCHAN], orate[MAX_NCHAN];
8920
8921	rc = sysctl_wire_old_buffer(req, 0);
8922	if (rc != 0)
8923		return (rc);
8924
8925	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8926	if (sb == NULL)
8927		return (ENOMEM);
8928
8929	t4_get_chan_txrate(sc, nrate, orate);
8930
8931	if (sc->chip_params->nchan > 2) {
8932		sbuf_printf(sb, "              channel 0   channel 1"
8933		    "   channel 2   channel 3\n");
8934		sbuf_printf(sb, "NIC B/s:     %10ju  %10ju  %10ju  %10ju\n",
8935		    nrate[0], nrate[1], nrate[2], nrate[3]);
8936		sbuf_printf(sb, "Offload B/s: %10ju  %10ju  %10ju  %10ju",
8937		    orate[0], orate[1], orate[2], orate[3]);
8938	} else {
8939		sbuf_printf(sb, "              channel 0   channel 1\n");
8940		sbuf_printf(sb, "NIC B/s:     %10ju  %10ju\n",
8941		    nrate[0], nrate[1]);
8942		sbuf_printf(sb, "Offload B/s: %10ju  %10ju",
8943		    orate[0], orate[1]);
8944	}
8945
8946	rc = sbuf_finish(sb);
8947	sbuf_delete(sb);
8948
8949	return (rc);
8950}
8951
8952static int
8953sysctl_ulprx_la(SYSCTL_HANDLER_ARGS)
8954{
8955	struct adapter *sc = arg1;
8956	struct sbuf *sb;
8957	uint32_t *buf, *p;
8958	int rc, i;
8959
8960	rc = sysctl_wire_old_buffer(req, 0);
8961	if (rc != 0)
8962		return (rc);
8963
8964	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8965	if (sb == NULL)
8966		return (ENOMEM);
8967
8968	buf = malloc(ULPRX_LA_SIZE * 8 * sizeof(uint32_t), M_CXGBE,
8969	    M_ZERO | M_WAITOK);
8970
8971	t4_ulprx_read_la(sc, buf);
8972	p = buf;
8973
8974	sbuf_printf(sb, "      Pcmd        Type   Message"
8975	    "                Data");
8976	for (i = 0; i < ULPRX_LA_SIZE; i++, p += 8) {
8977		sbuf_printf(sb, "\n%08x%08x  %4x  %08x  %08x%08x%08x%08x",
8978		    p[1], p[0], p[2], p[3], p[7], p[6], p[5], p[4]);
8979	}
8980
8981	rc = sbuf_finish(sb);
8982	sbuf_delete(sb);
8983	free(buf, M_CXGBE);
8984	return (rc);
8985}
8986
8987static int
8988sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS)
8989{
8990	struct adapter *sc = arg1;
8991	struct sbuf *sb;
8992	int rc, v;
8993
8994	MPASS(chip_id(sc) >= CHELSIO_T5);
8995
8996	rc = sysctl_wire_old_buffer(req, 0);
8997	if (rc != 0)
8998		return (rc);
8999
9000	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
9001	if (sb == NULL)
9002		return (ENOMEM);
9003
9004	v = t4_read_reg(sc, A_SGE_STAT_CFG);
9005	if (G_STATSOURCE_T5(v) == 7) {
9006		int mode;
9007
9008		mode = is_t5(sc) ? G_STATMODE(v) : G_T6_STATMODE(v);
9009		if (mode == 0) {
9010			sbuf_printf(sb, "total %d, incomplete %d",
9011			    t4_read_reg(sc, A_SGE_STAT_TOTAL),
9012			    t4_read_reg(sc, A_SGE_STAT_MATCH));
9013		} else if (mode == 1) {
9014			sbuf_printf(sb, "total %d, data overflow %d",
9015			    t4_read_reg(sc, A_SGE_STAT_TOTAL),
9016			    t4_read_reg(sc, A_SGE_STAT_MATCH));
9017		} else {
9018			sbuf_printf(sb, "unknown mode %d", mode);
9019		}
9020	}
9021	rc = sbuf_finish(sb);
9022	sbuf_delete(sb);
9023
9024	return (rc);
9025}
9026
9027static int
9028sysctl_cpus(SYSCTL_HANDLER_ARGS)
9029{
9030	struct adapter *sc = arg1;
9031	enum cpu_sets op = arg2;
9032	cpuset_t cpuset;
9033	struct sbuf *sb;
9034	int i, rc;
9035
9036	MPASS(op == LOCAL_CPUS || op == INTR_CPUS);
9037
9038	CPU_ZERO(&cpuset);
9039	rc = bus_get_cpus(sc->dev, op, sizeof(cpuset), &cpuset);
9040	if (rc != 0)
9041		return (rc);
9042
9043	rc = sysctl_wire_old_buffer(req, 0);
9044	if (rc != 0)
9045		return (rc);
9046
9047	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
9048	if (sb == NULL)
9049		return (ENOMEM);
9050
9051	CPU_FOREACH(i)
9052		sbuf_printf(sb, "%d ", i);
9053	rc = sbuf_finish(sb);
9054	sbuf_delete(sb);
9055
9056	return (rc);
9057
9058}
9059
9060#ifdef TCP_OFFLOAD
9061static int
9062sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS)
9063{
9064	struct adapter *sc = arg1;
9065	int *old_ports, *new_ports;
9066	int i, new_count, rc;
9067
9068	if (req->newptr == NULL && req->oldptr == NULL)
9069		return (SYSCTL_OUT(req, NULL, imax(sc->tt.num_tls_rx_ports, 1) *
9070		    sizeof(sc->tt.tls_rx_ports[0])));
9071
9072	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4tlsrx");
9073	if (rc)
9074		return (rc);
9075
9076	if (sc->tt.num_tls_rx_ports == 0) {
9077		i = -1;
9078		rc = SYSCTL_OUT(req, &i, sizeof(i));
9079	} else
9080		rc = SYSCTL_OUT(req, sc->tt.tls_rx_ports,
9081		    sc->tt.num_tls_rx_ports * sizeof(sc->tt.tls_rx_ports[0]));
9082	if (rc == 0 && req->newptr != NULL) {
9083		new_count = req->newlen / sizeof(new_ports[0]);
9084		new_ports = malloc(new_count * sizeof(new_ports[0]), M_CXGBE,
9085		    M_WAITOK);
9086		rc = SYSCTL_IN(req, new_ports, new_count *
9087		    sizeof(new_ports[0]));
9088		if (rc)
9089			goto err;
9090
9091		/* Allow setting to a single '-1' to clear the list. */
9092		if (new_count == 1 && new_ports[0] == -1) {
9093			ADAPTER_LOCK(sc);
9094			old_ports = sc->tt.tls_rx_ports;
9095			sc->tt.tls_rx_ports = NULL;
9096			sc->tt.num_tls_rx_ports = 0;
9097			ADAPTER_UNLOCK(sc);
9098			free(old_ports, M_CXGBE);
9099		} else {
9100			for (i = 0; i < new_count; i++) {
9101				if (new_ports[i] < 1 ||
9102				    new_ports[i] > IPPORT_MAX) {
9103					rc = EINVAL;
9104					goto err;
9105				}
9106			}
9107
9108			ADAPTER_LOCK(sc);
9109			old_ports = sc->tt.tls_rx_ports;
9110			sc->tt.tls_rx_ports = new_ports;
9111			sc->tt.num_tls_rx_ports = new_count;
9112			ADAPTER_UNLOCK(sc);
9113			free(old_ports, M_CXGBE);
9114			new_ports = NULL;
9115		}
9116	err:
9117		free(new_ports, M_CXGBE);
9118	}
9119	end_synchronized_op(sc, 0);
9120	return (rc);
9121}
9122
9123static void
9124unit_conv(char *buf, size_t len, u_int val, u_int factor)
9125{
9126	u_int rem = val % factor;
9127
9128	if (rem == 0)
9129		snprintf(buf, len, "%u", val / factor);
9130	else {
9131		while (rem % 10 == 0)
9132			rem /= 10;
9133		snprintf(buf, len, "%u.%u", val / factor, rem);
9134	}
9135}
9136
9137static int
9138sysctl_tp_tick(SYSCTL_HANDLER_ARGS)
9139{
9140	struct adapter *sc = arg1;
9141	char buf[16];
9142	u_int res, re;
9143	u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
9144
9145	res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION);
9146	switch (arg2) {
9147	case 0:
9148		/* timer_tick */
9149		re = G_TIMERRESOLUTION(res);
9150		break;
9151	case 1:
9152		/* TCP timestamp tick */
9153		re = G_TIMESTAMPRESOLUTION(res);
9154		break;
9155	case 2:
9156		/* DACK tick */
9157		re = G_DELAYEDACKRESOLUTION(res);
9158		break;
9159	default:
9160		return (EDOOFUS);
9161	}
9162
9163	unit_conv(buf, sizeof(buf), (cclk_ps << re), 1000000);
9164
9165	return (sysctl_handle_string(oidp, buf, sizeof(buf), req));
9166}
9167
9168static int
9169sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS)
9170{
9171	struct adapter *sc = arg1;
9172	u_int res, dack_re, v;
9173	u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
9174
9175	res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION);
9176	dack_re = G_DELAYEDACKRESOLUTION(res);
9177	v = ((cclk_ps << dack_re) / 1000000) * t4_read_reg(sc, A_TP_DACK_TIMER);
9178
9179	return (sysctl_handle_int(oidp, &v, 0, req));
9180}
9181
9182static int
9183sysctl_tp_timer(SYSCTL_HANDLER_ARGS)
9184{
9185	struct adapter *sc = arg1;
9186	int reg = arg2;
9187	u_int tre;
9188	u_long tp_tick_us, v;
9189	u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
9190
9191	MPASS(reg == A_TP_RXT_MIN || reg == A_TP_RXT_MAX ||
9192	    reg == A_TP_PERS_MIN  || reg == A_TP_PERS_MAX ||
9193	    reg == A_TP_KEEP_IDLE || reg == A_TP_KEEP_INTVL ||
9194	    reg == A_TP_INIT_SRTT || reg == A_TP_FINWAIT2_TIMER);
9195
9196	tre = G_TIMERRESOLUTION(t4_read_reg(sc, A_TP_TIMER_RESOLUTION));
9197	tp_tick_us = (cclk_ps << tre) / 1000000;
9198
9199	if (reg == A_TP_INIT_SRTT)
9200		v = tp_tick_us * G_INITSRTT(t4_read_reg(sc, reg));
9201	else
9202		v = tp_tick_us * t4_read_reg(sc, reg);
9203
9204	return (sysctl_handle_long(oidp, &v, 0, req));
9205}
9206
9207/*
9208 * All fields in TP_SHIFT_CNT are 4b and the starting location of the field is
9209 * passed to this function.
9210 */
9211static int
9212sysctl_tp_shift_cnt(SYSCTL_HANDLER_ARGS)
9213{
9214	struct adapter *sc = arg1;
9215	int idx = arg2;
9216	u_int v;
9217
9218	MPASS(idx >= 0 && idx <= 24);
9219
9220	v = (t4_read_reg(sc, A_TP_SHIFT_CNT) >> idx) & 0xf;
9221
9222	return (sysctl_handle_int(oidp, &v, 0, req));
9223}
9224
9225static int
9226sysctl_tp_backoff(SYSCTL_HANDLER_ARGS)
9227{
9228	struct adapter *sc = arg1;
9229	int idx = arg2;
9230	u_int shift, v, r;
9231
9232	MPASS(idx >= 0 && idx < 16);
9233
9234	r = A_TP_TCP_BACKOFF_REG0 + (idx & ~3);
9235	shift = (idx & 3) << 3;
9236	v = (t4_read_reg(sc, r) >> shift) & M_TIMERBACKOFFINDEX0;
9237
9238	return (sysctl_handle_int(oidp, &v, 0, req));
9239}
9240
9241static int
9242sysctl_holdoff_tmr_idx_ofld(SYSCTL_HANDLER_ARGS)
9243{
9244	struct vi_info *vi = arg1;
9245	struct adapter *sc = vi->pi->adapter;
9246	int idx, rc, i;
9247	struct sge_ofld_rxq *ofld_rxq;
9248	uint8_t v;
9249
9250	idx = vi->ofld_tmr_idx;
9251
9252	rc = sysctl_handle_int(oidp, &idx, 0, req);
9253	if (rc != 0 || req->newptr == NULL)
9254		return (rc);
9255
9256	if (idx < 0 || idx >= SGE_NTIMERS)
9257		return (EINVAL);
9258
9259	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
9260	    "t4otmr");
9261	if (rc)
9262		return (rc);
9263
9264	v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->ofld_pktc_idx != -1);
9265	for_each_ofld_rxq(vi, i, ofld_rxq) {
9266#ifdef atomic_store_rel_8
9267		atomic_store_rel_8(&ofld_rxq->iq.intr_params, v);
9268#else
9269		ofld_rxq->iq.intr_params = v;
9270#endif
9271	}
9272	vi->ofld_tmr_idx = idx;
9273
9274	end_synchronized_op(sc, LOCK_HELD);
9275	return (0);
9276}
9277
9278static int
9279sysctl_holdoff_pktc_idx_ofld(SYSCTL_HANDLER_ARGS)
9280{
9281	struct vi_info *vi = arg1;
9282	struct adapter *sc = vi->pi->adapter;
9283	int idx, rc;
9284
9285	idx = vi->ofld_pktc_idx;
9286
9287	rc = sysctl_handle_int(oidp, &idx, 0, req);
9288	if (rc != 0 || req->newptr == NULL)
9289		return (rc);
9290
9291	if (idx < -1 || idx >= SGE_NCOUNTERS)
9292		return (EINVAL);
9293
9294	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
9295	    "t4opktc");
9296	if (rc)
9297		return (rc);
9298
9299	if (vi->flags & VI_INIT_DONE)
9300		rc = EBUSY; /* cannot be changed once the queues are created */
9301	else
9302		vi->ofld_pktc_idx = idx;
9303
9304	end_synchronized_op(sc, LOCK_HELD);
9305	return (rc);
9306}
9307#endif
9308
9309static int
9310get_sge_context(struct adapter *sc, struct t4_sge_context *cntxt)
9311{
9312	int rc;
9313
9314	if (cntxt->cid > M_CTXTQID)
9315		return (EINVAL);
9316
9317	if (cntxt->mem_id != CTXT_EGRESS && cntxt->mem_id != CTXT_INGRESS &&
9318	    cntxt->mem_id != CTXT_FLM && cntxt->mem_id != CTXT_CNM)
9319		return (EINVAL);
9320
9321	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ctxt");
9322	if (rc)
9323		return (rc);
9324
9325	if (sc->flags & FW_OK) {
9326		rc = -t4_sge_ctxt_rd(sc, sc->mbox, cntxt->cid, cntxt->mem_id,
9327		    &cntxt->data[0]);
9328		if (rc == 0)
9329			goto done;
9330	}
9331
9332	/*
9333	 * Read via firmware failed or wasn't even attempted.  Read directly via
9334	 * the backdoor.
9335	 */
9336	rc = -t4_sge_ctxt_rd_bd(sc, cntxt->cid, cntxt->mem_id, &cntxt->data[0]);
9337done:
9338	end_synchronized_op(sc, 0);
9339	return (rc);
9340}
9341
9342static int
9343load_fw(struct adapter *sc, struct t4_data *fw)
9344{
9345	int rc;
9346	uint8_t *fw_data;
9347
9348	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldfw");
9349	if (rc)
9350		return (rc);
9351
9352	/*
9353	 * The firmware, with the sole exception of the memory parity error
9354	 * handler, runs from memory and not flash.  It is almost always safe to
9355	 * install a new firmware on a running system.  Just set bit 1 in
9356	 * hw.cxgbe.dflags or dev.<nexus>.<n>.dflags first.
9357	 */
9358	if (sc->flags & FULL_INIT_DONE &&
9359	    (sc->debug_flags & DF_LOAD_FW_ANYTIME) == 0) {
9360		rc = EBUSY;
9361		goto done;
9362	}
9363
9364	fw_data = malloc(fw->len, M_CXGBE, M_WAITOK);
9365	if (fw_data == NULL) {
9366		rc = ENOMEM;
9367		goto done;
9368	}
9369
9370	rc = copyin(fw->data, fw_data, fw->len);
9371	if (rc == 0)
9372		rc = -t4_load_fw(sc, fw_data, fw->len);
9373
9374	free(fw_data, M_CXGBE);
9375done:
9376	end_synchronized_op(sc, 0);
9377	return (rc);
9378}
9379
9380static int
9381load_cfg(struct adapter *sc, struct t4_data *cfg)
9382{
9383	int rc;
9384	uint8_t *cfg_data = NULL;
9385
9386	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldcf");
9387	if (rc)
9388		return (rc);
9389
9390	if (cfg->len == 0) {
9391		/* clear */
9392		rc = -t4_load_cfg(sc, NULL, 0);
9393		goto done;
9394	}
9395
9396	cfg_data = malloc(cfg->len, M_CXGBE, M_WAITOK);
9397	if (cfg_data == NULL) {
9398		rc = ENOMEM;
9399		goto done;
9400	}
9401
9402	rc = copyin(cfg->data, cfg_data, cfg->len);
9403	if (rc == 0)
9404		rc = -t4_load_cfg(sc, cfg_data, cfg->len);
9405
9406	free(cfg_data, M_CXGBE);
9407done:
9408	end_synchronized_op(sc, 0);
9409	return (rc);
9410}
9411
9412static int
9413load_boot(struct adapter *sc, struct t4_bootrom *br)
9414{
9415	int rc;
9416	uint8_t *br_data = NULL;
9417	u_int offset;
9418
9419	if (br->len > 1024 * 1024)
9420		return (EFBIG);
9421
9422	if (br->pf_offset == 0) {
9423		/* pfidx */
9424		if (br->pfidx_addr > 7)
9425			return (EINVAL);
9426		offset = G_OFFSET(t4_read_reg(sc, PF_REG(br->pfidx_addr,
9427		    A_PCIE_PF_EXPROM_OFST)));
9428	} else if (br->pf_offset == 1) {
9429		/* offset */
9430		offset = G_OFFSET(br->pfidx_addr);
9431	} else {
9432		return (EINVAL);
9433	}
9434
9435	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldbr");
9436	if (rc)
9437		return (rc);
9438
9439	if (br->len == 0) {
9440		/* clear */
9441		rc = -t4_load_boot(sc, NULL, offset, 0);
9442		goto done;
9443	}
9444
9445	br_data = malloc(br->len, M_CXGBE, M_WAITOK);
9446	if (br_data == NULL) {
9447		rc = ENOMEM;
9448		goto done;
9449	}
9450
9451	rc = copyin(br->data, br_data, br->len);
9452	if (rc == 0)
9453		rc = -t4_load_boot(sc, br_data, offset, br->len);
9454
9455	free(br_data, M_CXGBE);
9456done:
9457	end_synchronized_op(sc, 0);
9458	return (rc);
9459}
9460
9461static int
9462load_bootcfg(struct adapter *sc, struct t4_data *bc)
9463{
9464	int rc;
9465	uint8_t *bc_data = NULL;
9466
9467	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldcf");
9468	if (rc)
9469		return (rc);
9470
9471	if (bc->len == 0) {
9472		/* clear */
9473		rc = -t4_load_bootcfg(sc, NULL, 0);
9474		goto done;
9475	}
9476
9477	bc_data = malloc(bc->len, M_CXGBE, M_WAITOK);
9478	if (bc_data == NULL) {
9479		rc = ENOMEM;
9480		goto done;
9481	}
9482
9483	rc = copyin(bc->data, bc_data, bc->len);
9484	if (rc == 0)
9485		rc = -t4_load_bootcfg(sc, bc_data, bc->len);
9486
9487	free(bc_data, M_CXGBE);
9488done:
9489	end_synchronized_op(sc, 0);
9490	return (rc);
9491}
9492
9493static int
9494cudbg_dump(struct adapter *sc, struct t4_cudbg_dump *dump)
9495{
9496	int rc;
9497	struct cudbg_init *cudbg;
9498	void *handle, *buf;
9499
9500	/* buf is large, don't block if no memory is available */
9501	buf = malloc(dump->len, M_CXGBE, M_NOWAIT | M_ZERO);
9502	if (buf == NULL)
9503		return (ENOMEM);
9504
9505	handle = cudbg_alloc_handle();
9506	if (handle == NULL) {
9507		rc = ENOMEM;
9508		goto done;
9509	}
9510
9511	cudbg = cudbg_get_init(handle);
9512	cudbg->adap = sc;
9513	cudbg->print = (cudbg_print_cb)printf;
9514
9515#ifndef notyet
9516	device_printf(sc->dev, "%s: wr_flash %u, len %u, data %p.\n",
9517	    __func__, dump->wr_flash, dump->len, dump->data);
9518#endif
9519
9520	if (dump->wr_flash)
9521		cudbg->use_flash = 1;
9522	MPASS(sizeof(cudbg->dbg_bitmap) == sizeof(dump->bitmap));
9523	memcpy(cudbg->dbg_bitmap, dump->bitmap, sizeof(cudbg->dbg_bitmap));
9524
9525	rc = cudbg_collect(handle, buf, &dump->len);
9526	if (rc != 0)
9527		goto done;
9528
9529	rc = copyout(buf, dump->data, dump->len);
9530done:
9531	cudbg_free_handle(handle);
9532	free(buf, M_CXGBE);
9533	return (rc);
9534}
9535
9536static void
9537free_offload_policy(struct t4_offload_policy *op)
9538{
9539	struct offload_rule *r;
9540	int i;
9541
9542	if (op == NULL)
9543		return;
9544
9545	r = &op->rule[0];
9546	for (i = 0; i < op->nrules; i++, r++) {
9547		free(r->bpf_prog.bf_insns, M_CXGBE);
9548	}
9549	free(op->rule, M_CXGBE);
9550	free(op, M_CXGBE);
9551}
9552
9553static int
9554set_offload_policy(struct adapter *sc, struct t4_offload_policy *uop)
9555{
9556	int i, rc, len;
9557	struct t4_offload_policy *op, *old;
9558	struct bpf_program *bf;
9559	const struct offload_settings *s;
9560	struct offload_rule *r;
9561	void *u;
9562
9563	if (!is_offload(sc))
9564		return (ENODEV);
9565
9566	if (uop->nrules == 0) {
9567		/* Delete installed policies. */
9568		op = NULL;
9569		goto set_policy;
9570	} if (uop->nrules > 256) { /* arbitrary */
9571		return (E2BIG);
9572	}
9573
9574	/* Copy userspace offload policy to kernel */
9575	op = malloc(sizeof(*op), M_CXGBE, M_ZERO | M_WAITOK);
9576	op->nrules = uop->nrules;
9577	len = op->nrules * sizeof(struct offload_rule);
9578	op->rule = malloc(len, M_CXGBE, M_ZERO | M_WAITOK);
9579	rc = copyin(uop->rule, op->rule, len);
9580	if (rc) {
9581		free(op->rule, M_CXGBE);
9582		free(op, M_CXGBE);
9583		return (rc);
9584	}
9585
9586	r = &op->rule[0];
9587	for (i = 0; i < op->nrules; i++, r++) {
9588
9589		/* Validate open_type */
9590		if (r->open_type != OPEN_TYPE_LISTEN &&
9591		    r->open_type != OPEN_TYPE_ACTIVE &&
9592		    r->open_type != OPEN_TYPE_PASSIVE &&
9593		    r->open_type != OPEN_TYPE_DONTCARE) {
9594error:
9595			/*
9596			 * Rules 0 to i have malloc'd filters that need to be
9597			 * freed.  Rules i+1 to nrules have userspace pointers
9598			 * and should be left alone.
9599			 */
9600			op->nrules = i;
9601			free_offload_policy(op);
9602			return (rc);
9603		}
9604
9605		/* Validate settings */
9606		s = &r->settings;
9607		if ((s->offload != 0 && s->offload != 1) ||
9608		    s->cong_algo < -1 || s->cong_algo > CONG_ALG_HIGHSPEED ||
9609		    s->sched_class < -1 ||
9610		    s->sched_class >= sc->chip_params->nsched_cls) {
9611			rc = EINVAL;
9612			goto error;
9613		}
9614
9615		bf = &r->bpf_prog;
9616		u = bf->bf_insns;	/* userspace ptr */
9617		bf->bf_insns = NULL;
9618		if (bf->bf_len == 0) {
9619			/* legal, matches everything */
9620			continue;
9621		}
9622		len = bf->bf_len * sizeof(*bf->bf_insns);
9623		bf->bf_insns = malloc(len, M_CXGBE, M_ZERO | M_WAITOK);
9624		rc = copyin(u, bf->bf_insns, len);
9625		if (rc != 0)
9626			goto error;
9627
9628		if (!bpf_validate(bf->bf_insns, bf->bf_len)) {
9629			rc = EINVAL;
9630			goto error;
9631		}
9632	}
9633set_policy:
9634	rw_wlock(&sc->policy_lock);
9635	old = sc->policy;
9636	sc->policy = op;
9637	rw_wunlock(&sc->policy_lock);
9638	free_offload_policy(old);
9639
9640	return (0);
9641}
9642
9643#define MAX_READ_BUF_SIZE (128 * 1024)
9644static int
9645read_card_mem(struct adapter *sc, int win, struct t4_mem_range *mr)
9646{
9647	uint32_t addr, remaining, n;
9648	uint32_t *buf;
9649	int rc;
9650	uint8_t *dst;
9651
9652	rc = validate_mem_range(sc, mr->addr, mr->len);
9653	if (rc != 0)
9654		return (rc);
9655
9656	buf = malloc(min(mr->len, MAX_READ_BUF_SIZE), M_CXGBE, M_WAITOK);
9657	addr = mr->addr;
9658	remaining = mr->len;
9659	dst = (void *)mr->data;
9660
9661	while (remaining) {
9662		n = min(remaining, MAX_READ_BUF_SIZE);
9663		read_via_memwin(sc, 2, addr, buf, n);
9664
9665		rc = copyout(buf, dst, n);
9666		if (rc != 0)
9667			break;
9668
9669		dst += n;
9670		remaining -= n;
9671		addr += n;
9672	}
9673
9674	free(buf, M_CXGBE);
9675	return (rc);
9676}
9677#undef MAX_READ_BUF_SIZE
9678
9679static int
9680read_i2c(struct adapter *sc, struct t4_i2c_data *i2cd)
9681{
9682	int rc;
9683
9684	if (i2cd->len == 0 || i2cd->port_id >= sc->params.nports)
9685		return (EINVAL);
9686
9687	if (i2cd->len > sizeof(i2cd->data))
9688		return (EFBIG);
9689
9690	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4i2crd");
9691	if (rc)
9692		return (rc);
9693	rc = -t4_i2c_rd(sc, sc->mbox, i2cd->port_id, i2cd->dev_addr,
9694	    i2cd->offset, i2cd->len, &i2cd->data[0]);
9695	end_synchronized_op(sc, 0);
9696
9697	return (rc);
9698}
9699
9700int
9701t4_os_find_pci_capability(struct adapter *sc, int cap)
9702{
9703	int i;
9704
9705	return (pci_find_cap(sc->dev, cap, &i) == 0 ? i : 0);
9706}
9707
9708int
9709t4_os_pci_save_state(struct adapter *sc)
9710{
9711	device_t dev;
9712	struct pci_devinfo *dinfo;
9713
9714	dev = sc->dev;
9715	dinfo = device_get_ivars(dev);
9716
9717	pci_cfg_save(dev, dinfo, 0);
9718	return (0);
9719}
9720
9721int
9722t4_os_pci_restore_state(struct adapter *sc)
9723{
9724	device_t dev;
9725	struct pci_devinfo *dinfo;
9726
9727	dev = sc->dev;
9728	dinfo = device_get_ivars(dev);
9729
9730	pci_cfg_restore(dev, dinfo);
9731	return (0);
9732}
9733
9734void
9735t4_os_portmod_changed(struct port_info *pi)
9736{
9737	struct adapter *sc = pi->adapter;
9738	struct vi_info *vi;
9739	struct ifnet *ifp;
9740	static const char *mod_str[] = {
9741		NULL, "LR", "SR", "ER", "TWINAX", "active TWINAX", "LRM"
9742	};
9743
9744	KASSERT((pi->flags & FIXED_IFMEDIA) == 0,
9745	    ("%s: port_type %u", __func__, pi->port_type));
9746
9747	vi = &pi->vi[0];
9748	if (begin_synchronized_op(sc, vi, HOLD_LOCK, "t4mod") == 0) {
9749		PORT_LOCK(pi);
9750		build_medialist(pi);
9751		if (pi->mod_type != FW_PORT_MOD_TYPE_NONE) {
9752			fixup_link_config(pi);
9753			apply_link_config(pi);
9754		}
9755		PORT_UNLOCK(pi);
9756		end_synchronized_op(sc, LOCK_HELD);
9757	}
9758
9759	ifp = vi->ifp;
9760	if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
9761		if_printf(ifp, "transceiver unplugged.\n");
9762	else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
9763		if_printf(ifp, "unknown transceiver inserted.\n");
9764	else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
9765		if_printf(ifp, "unsupported transceiver inserted.\n");
9766	else if (pi->mod_type > 0 && pi->mod_type < nitems(mod_str)) {
9767		if_printf(ifp, "%dGbps %s transceiver inserted.\n",
9768		    port_top_speed(pi), mod_str[pi->mod_type]);
9769	} else {
9770		if_printf(ifp, "transceiver (type %d) inserted.\n",
9771		    pi->mod_type);
9772	}
9773}
9774
9775void
9776t4_os_link_changed(struct port_info *pi)
9777{
9778	struct vi_info *vi;
9779	struct ifnet *ifp;
9780	struct link_config *lc;
9781	int v;
9782
9783	PORT_LOCK_ASSERT_OWNED(pi);
9784
9785	for_each_vi(pi, v, vi) {
9786		ifp = vi->ifp;
9787		if (ifp == NULL)
9788			continue;
9789
9790		lc = &pi->link_cfg;
9791		if (lc->link_ok) {
9792			ifp->if_baudrate = IF_Mbps(lc->speed);
9793			if_link_state_change(ifp, LINK_STATE_UP);
9794		} else {
9795			if_link_state_change(ifp, LINK_STATE_DOWN);
9796		}
9797	}
9798}
9799
9800void
9801t4_iterate(void (*func)(struct adapter *, void *), void *arg)
9802{
9803	struct adapter *sc;
9804
9805	sx_slock(&t4_list_lock);
9806	SLIST_FOREACH(sc, &t4_list, link) {
9807		/*
9808		 * func should not make any assumptions about what state sc is
9809		 * in - the only guarantee is that sc->sc_lock is a valid lock.
9810		 */
9811		func(sc, arg);
9812	}
9813	sx_sunlock(&t4_list_lock);
9814}
9815
9816static int
9817t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag,
9818    struct thread *td)
9819{
9820	int rc;
9821	struct adapter *sc = dev->si_drv1;
9822
9823	rc = priv_check(td, PRIV_DRIVER);
9824	if (rc != 0)
9825		return (rc);
9826
9827	switch (cmd) {
9828	case CHELSIO_T4_GETREG: {
9829		struct t4_reg *edata = (struct t4_reg *)data;
9830
9831		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
9832			return (EFAULT);
9833
9834		if (edata->size == 4)
9835			edata->val = t4_read_reg(sc, edata->addr);
9836		else if (edata->size == 8)
9837			edata->val = t4_read_reg64(sc, edata->addr);
9838		else
9839			return (EINVAL);
9840
9841		break;
9842	}
9843	case CHELSIO_T4_SETREG: {
9844		struct t4_reg *edata = (struct t4_reg *)data;
9845
9846		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
9847			return (EFAULT);
9848
9849		if (edata->size == 4) {
9850			if (edata->val & 0xffffffff00000000)
9851				return (EINVAL);
9852			t4_write_reg(sc, edata->addr, (uint32_t) edata->val);
9853		} else if (edata->size == 8)
9854			t4_write_reg64(sc, edata->addr, edata->val);
9855		else
9856			return (EINVAL);
9857		break;
9858	}
9859	case CHELSIO_T4_REGDUMP: {
9860		struct t4_regdump *regs = (struct t4_regdump *)data;
9861		int reglen = t4_get_regs_len(sc);
9862		uint8_t *buf;
9863
9864		if (regs->len < reglen) {
9865			regs->len = reglen; /* hint to the caller */
9866			return (ENOBUFS);
9867		}
9868
9869		regs->len = reglen;
9870		buf = malloc(reglen, M_CXGBE, M_WAITOK | M_ZERO);
9871		get_regs(sc, regs, buf);
9872		rc = copyout(buf, regs->data, reglen);
9873		free(buf, M_CXGBE);
9874		break;
9875	}
9876	case CHELSIO_T4_GET_FILTER_MODE:
9877		rc = get_filter_mode(sc, (uint32_t *)data);
9878		break;
9879	case CHELSIO_T4_SET_FILTER_MODE:
9880		rc = set_filter_mode(sc, *(uint32_t *)data);
9881		break;
9882	case CHELSIO_T4_GET_FILTER:
9883		rc = get_filter(sc, (struct t4_filter *)data);
9884		break;
9885	case CHELSIO_T4_SET_FILTER:
9886		rc = set_filter(sc, (struct t4_filter *)data);
9887		break;
9888	case CHELSIO_T4_DEL_FILTER:
9889		rc = del_filter(sc, (struct t4_filter *)data);
9890		break;
9891	case CHELSIO_T4_GET_SGE_CONTEXT:
9892		rc = get_sge_context(sc, (struct t4_sge_context *)data);
9893		break;
9894	case CHELSIO_T4_LOAD_FW:
9895		rc = load_fw(sc, (struct t4_data *)data);
9896		break;
9897	case CHELSIO_T4_GET_MEM:
9898		rc = read_card_mem(sc, 2, (struct t4_mem_range *)data);
9899		break;
9900	case CHELSIO_T4_GET_I2C:
9901		rc = read_i2c(sc, (struct t4_i2c_data *)data);
9902		break;
9903	case CHELSIO_T4_CLEAR_STATS: {
9904		int i, v, bg_map;
9905		u_int port_id = *(uint32_t *)data;
9906		struct port_info *pi;
9907		struct vi_info *vi;
9908
9909		if (port_id >= sc->params.nports)
9910			return (EINVAL);
9911		pi = sc->port[port_id];
9912		if (pi == NULL)
9913			return (EIO);
9914
9915		/* MAC stats */
9916		t4_clr_port_stats(sc, pi->tx_chan);
9917		pi->tx_parse_error = 0;
9918		pi->tnl_cong_drops = 0;
9919		mtx_lock(&sc->reg_lock);
9920		for_each_vi(pi, v, vi) {
9921			if (vi->flags & VI_INIT_DONE)
9922				t4_clr_vi_stats(sc, vi->viid);
9923		}
9924		bg_map = pi->mps_bg_map;
9925		v = 0;	/* reuse */
9926		while (bg_map) {
9927			i = ffs(bg_map) - 1;
9928			t4_write_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v,
9929			    1, A_TP_MIB_TNL_CNG_DROP_0 + i);
9930			bg_map &= ~(1 << i);
9931		}
9932		mtx_unlock(&sc->reg_lock);
9933
9934		/*
9935		 * Since this command accepts a port, clear stats for
9936		 * all VIs on this port.
9937		 */
9938		for_each_vi(pi, v, vi) {
9939			if (vi->flags & VI_INIT_DONE) {
9940				struct sge_rxq *rxq;
9941				struct sge_txq *txq;
9942				struct sge_wrq *wrq;
9943
9944				for_each_rxq(vi, i, rxq) {
9945#if defined(INET) || defined(INET6)
9946					rxq->lro.lro_queued = 0;
9947					rxq->lro.lro_flushed = 0;
9948#endif
9949					rxq->rxcsum = 0;
9950					rxq->vlan_extraction = 0;
9951				}
9952
9953				for_each_txq(vi, i, txq) {
9954					txq->txcsum = 0;
9955					txq->tso_wrs = 0;
9956					txq->vlan_insertion = 0;
9957					txq->imm_wrs = 0;
9958					txq->sgl_wrs = 0;
9959					txq->txpkt_wrs = 0;
9960					txq->txpkts0_wrs = 0;
9961					txq->txpkts1_wrs = 0;
9962					txq->txpkts0_pkts = 0;
9963					txq->txpkts1_pkts = 0;
9964					mp_ring_reset_stats(txq->r);
9965				}
9966
9967#ifdef TCP_OFFLOAD
9968				/* nothing to clear for each ofld_rxq */
9969
9970				for_each_ofld_txq(vi, i, wrq) {
9971					wrq->tx_wrs_direct = 0;
9972					wrq->tx_wrs_copied = 0;
9973				}
9974#endif
9975
9976				if (IS_MAIN_VI(vi)) {
9977					wrq = &sc->sge.ctrlq[pi->port_id];
9978					wrq->tx_wrs_direct = 0;
9979					wrq->tx_wrs_copied = 0;
9980				}
9981			}
9982		}
9983		break;
9984	}
9985	case CHELSIO_T4_SCHED_CLASS:
9986		rc = t4_set_sched_class(sc, (struct t4_sched_params *)data);
9987		break;
9988	case CHELSIO_T4_SCHED_QUEUE:
9989		rc = t4_set_sched_queue(sc, (struct t4_sched_queue *)data);
9990		break;
9991	case CHELSIO_T4_GET_TRACER:
9992		rc = t4_get_tracer(sc, (struct t4_tracer *)data);
9993		break;
9994	case CHELSIO_T4_SET_TRACER:
9995		rc = t4_set_tracer(sc, (struct t4_tracer *)data);
9996		break;
9997	case CHELSIO_T4_LOAD_CFG:
9998		rc = load_cfg(sc, (struct t4_data *)data);
9999		break;
10000	case CHELSIO_T4_LOAD_BOOT:
10001		rc = load_boot(sc, (struct t4_bootrom *)data);
10002		break;
10003	case CHELSIO_T4_LOAD_BOOTCFG:
10004		rc = load_bootcfg(sc, (struct t4_data *)data);
10005		break;
10006	case CHELSIO_T4_CUDBG_DUMP:
10007		rc = cudbg_dump(sc, (struct t4_cudbg_dump *)data);
10008		break;
10009	case CHELSIO_T4_SET_OFLD_POLICY:
10010		rc = set_offload_policy(sc, (struct t4_offload_policy *)data);
10011		break;
10012	default:
10013		rc = ENOTTY;
10014	}
10015
10016	return (rc);
10017}
10018
10019void
10020t4_db_full(struct adapter *sc)
10021{
10022
10023	CXGBE_UNIMPLEMENTED(__func__);
10024}
10025
10026void
10027t4_db_dropped(struct adapter *sc)
10028{
10029
10030	CXGBE_UNIMPLEMENTED(__func__);
10031}
10032
10033#ifdef TCP_OFFLOAD
10034void
10035t4_iscsi_init(struct adapter *sc, u_int tag_mask, const u_int *pgsz_order)
10036{
10037
10038	t4_write_reg(sc, A_ULP_RX_ISCSI_TAGMASK, tag_mask);
10039	t4_write_reg(sc, A_ULP_RX_ISCSI_PSZ, V_HPZ0(pgsz_order[0]) |
10040		V_HPZ1(pgsz_order[1]) | V_HPZ2(pgsz_order[2]) |
10041		V_HPZ3(pgsz_order[3]));
10042}
10043
10044static int
10045toe_capability(struct vi_info *vi, int enable)
10046{
10047	int rc;
10048	struct port_info *pi = vi->pi;
10049	struct adapter *sc = pi->adapter;
10050
10051	ASSERT_SYNCHRONIZED_OP(sc);
10052
10053	if (!is_offload(sc))
10054		return (ENODEV);
10055
10056	if (enable) {
10057		if ((vi->ifp->if_capenable & IFCAP_TOE) != 0) {
10058			/* TOE is already enabled. */
10059			return (0);
10060		}
10061
10062		/*
10063		 * We need the port's queues around so that we're able to send
10064		 * and receive CPLs to/from the TOE even if the ifnet for this
10065		 * port has never been UP'd administratively.
10066		 */
10067		if (!(vi->flags & VI_INIT_DONE)) {
10068			rc = vi_full_init(vi);
10069			if (rc)
10070				return (rc);
10071		}
10072		if (!(pi->vi[0].flags & VI_INIT_DONE)) {
10073			rc = vi_full_init(&pi->vi[0]);
10074			if (rc)
10075				return (rc);
10076		}
10077
10078		if (isset(&sc->offload_map, pi->port_id)) {
10079			/* TOE is enabled on another VI of this port. */
10080			pi->uld_vis++;
10081			return (0);
10082		}
10083
10084		if (!uld_active(sc, ULD_TOM)) {
10085			rc = t4_activate_uld(sc, ULD_TOM);
10086			if (rc == EAGAIN) {
10087				log(LOG_WARNING,
10088				    "You must kldload t4_tom.ko before trying "
10089				    "to enable TOE on a cxgbe interface.\n");
10090			}
10091			if (rc != 0)
10092				return (rc);
10093			KASSERT(sc->tom_softc != NULL,
10094			    ("%s: TOM activated but softc NULL", __func__));
10095			KASSERT(uld_active(sc, ULD_TOM),
10096			    ("%s: TOM activated but flag not set", __func__));
10097		}
10098
10099		/* Activate iWARP and iSCSI too, if the modules are loaded. */
10100		if (!uld_active(sc, ULD_IWARP))
10101			(void) t4_activate_uld(sc, ULD_IWARP);
10102		if (!uld_active(sc, ULD_ISCSI))
10103			(void) t4_activate_uld(sc, ULD_ISCSI);
10104
10105		pi->uld_vis++;
10106		setbit(&sc->offload_map, pi->port_id);
10107	} else {
10108		pi->uld_vis--;
10109
10110		if (!isset(&sc->offload_map, pi->port_id) || pi->uld_vis > 0)
10111			return (0);
10112
10113		KASSERT(uld_active(sc, ULD_TOM),
10114		    ("%s: TOM never initialized?", __func__));
10115		clrbit(&sc->offload_map, pi->port_id);
10116	}
10117
10118	return (0);
10119}
10120
10121/*
10122 * Add an upper layer driver to the global list.
10123 */
10124int
10125t4_register_uld(struct uld_info *ui)
10126{
10127	int rc = 0;
10128	struct uld_info *u;
10129
10130	sx_xlock(&t4_uld_list_lock);
10131	SLIST_FOREACH(u, &t4_uld_list, link) {
10132	    if (u->uld_id == ui->uld_id) {
10133		    rc = EEXIST;
10134		    goto done;
10135	    }
10136	}
10137
10138	SLIST_INSERT_HEAD(&t4_uld_list, ui, link);
10139	ui->refcount = 0;
10140done:
10141	sx_xunlock(&t4_uld_list_lock);
10142	return (rc);
10143}
10144
10145int
10146t4_unregister_uld(struct uld_info *ui)
10147{
10148	int rc = EINVAL;
10149	struct uld_info *u;
10150
10151	sx_xlock(&t4_uld_list_lock);
10152
10153	SLIST_FOREACH(u, &t4_uld_list, link) {
10154	    if (u == ui) {
10155		    if (ui->refcount > 0) {
10156			    rc = EBUSY;
10157			    goto done;
10158		    }
10159
10160		    SLIST_REMOVE(&t4_uld_list, ui, uld_info, link);
10161		    rc = 0;
10162		    goto done;
10163	    }
10164	}
10165done:
10166	sx_xunlock(&t4_uld_list_lock);
10167	return (rc);
10168}
10169
10170int
10171t4_activate_uld(struct adapter *sc, int id)
10172{
10173	int rc;
10174	struct uld_info *ui;
10175
10176	ASSERT_SYNCHRONIZED_OP(sc);
10177
10178	if (id < 0 || id > ULD_MAX)
10179		return (EINVAL);
10180	rc = EAGAIN;	/* kldoad the module with this ULD and try again. */
10181
10182	sx_slock(&t4_uld_list_lock);
10183
10184	SLIST_FOREACH(ui, &t4_uld_list, link) {
10185		if (ui->uld_id == id) {
10186			if (!(sc->flags & FULL_INIT_DONE)) {
10187				rc = adapter_full_init(sc);
10188				if (rc != 0)
10189					break;
10190			}
10191
10192			rc = ui->activate(sc);
10193			if (rc == 0) {
10194				setbit(&sc->active_ulds, id);
10195				ui->refcount++;
10196			}
10197			break;
10198		}
10199	}
10200
10201	sx_sunlock(&t4_uld_list_lock);
10202
10203	return (rc);
10204}
10205
10206int
10207t4_deactivate_uld(struct adapter *sc, int id)
10208{
10209	int rc;
10210	struct uld_info *ui;
10211
10212	ASSERT_SYNCHRONIZED_OP(sc);
10213
10214	if (id < 0 || id > ULD_MAX)
10215		return (EINVAL);
10216	rc = ENXIO;
10217
10218	sx_slock(&t4_uld_list_lock);
10219
10220	SLIST_FOREACH(ui, &t4_uld_list, link) {
10221		if (ui->uld_id == id) {
10222			rc = ui->deactivate(sc);
10223			if (rc == 0) {
10224				clrbit(&sc->active_ulds, id);
10225				ui->refcount--;
10226			}
10227			break;
10228		}
10229	}
10230
10231	sx_sunlock(&t4_uld_list_lock);
10232
10233	return (rc);
10234}
10235
10236int
10237uld_active(struct adapter *sc, int uld_id)
10238{
10239
10240	MPASS(uld_id >= 0 && uld_id <= ULD_MAX);
10241
10242	return (isset(&sc->active_ulds, uld_id));
10243}
10244#endif
10245
10246/*
10247 * t  = ptr to tunable.
10248 * nc = number of CPUs.
10249 * c  = compiled in default for that tunable.
10250 */
10251static void
10252calculate_nqueues(int *t, int nc, const int c)
10253{
10254	int nq;
10255
10256	if (*t > 0)
10257		return;
10258	nq = *t < 0 ? -*t : c;
10259	*t = min(nc, nq);
10260}
10261
10262/*
10263 * Come up with reasonable defaults for some of the tunables, provided they're
10264 * not set by the user (in which case we'll use the values as is).
10265 */
10266static void
10267tweak_tunables(void)
10268{
10269	int nc = mp_ncpus;	/* our snapshot of the number of CPUs */
10270
10271	if (t4_ntxq < 1) {
10272#ifdef RSS
10273		t4_ntxq = rss_getnumbuckets();
10274#else
10275		calculate_nqueues(&t4_ntxq, nc, NTXQ);
10276#endif
10277	}
10278
10279	calculate_nqueues(&t4_ntxq_vi, nc, NTXQ_VI);
10280
10281	if (t4_nrxq < 1) {
10282#ifdef RSS
10283		t4_nrxq = rss_getnumbuckets();
10284#else
10285		calculate_nqueues(&t4_nrxq, nc, NRXQ);
10286#endif
10287	}
10288
10289	calculate_nqueues(&t4_nrxq_vi, nc, NRXQ_VI);
10290
10291#ifdef TCP_OFFLOAD
10292	calculate_nqueues(&t4_nofldtxq, nc, NOFLDTXQ);
10293	calculate_nqueues(&t4_nofldtxq_vi, nc, NOFLDTXQ_VI);
10294	calculate_nqueues(&t4_nofldrxq, nc, NOFLDRXQ);
10295	calculate_nqueues(&t4_nofldrxq_vi, nc, NOFLDRXQ_VI);
10296
10297	if (t4_toecaps_allowed == -1)
10298		t4_toecaps_allowed = FW_CAPS_CONFIG_TOE;
10299
10300	if (t4_rdmacaps_allowed == -1) {
10301		t4_rdmacaps_allowed = FW_CAPS_CONFIG_RDMA_RDDP |
10302		    FW_CAPS_CONFIG_RDMA_RDMAC;
10303	}
10304
10305	if (t4_iscsicaps_allowed == -1) {
10306		t4_iscsicaps_allowed = FW_CAPS_CONFIG_ISCSI_INITIATOR_PDU |
10307		    FW_CAPS_CONFIG_ISCSI_TARGET_PDU |
10308		    FW_CAPS_CONFIG_ISCSI_T10DIF;
10309	}
10310
10311	if (t4_tmr_idx_ofld < 0 || t4_tmr_idx_ofld >= SGE_NTIMERS)
10312		t4_tmr_idx_ofld = TMR_IDX_OFLD;
10313
10314	if (t4_pktc_idx_ofld < -1 || t4_pktc_idx_ofld >= SGE_NCOUNTERS)
10315		t4_pktc_idx_ofld = PKTC_IDX_OFLD;
10316#else
10317	if (t4_toecaps_allowed == -1)
10318		t4_toecaps_allowed = 0;
10319
10320	if (t4_rdmacaps_allowed == -1)
10321		t4_rdmacaps_allowed = 0;
10322
10323	if (t4_iscsicaps_allowed == -1)
10324		t4_iscsicaps_allowed = 0;
10325#endif
10326
10327#ifdef DEV_NETMAP
10328	calculate_nqueues(&t4_nnmtxq_vi, nc, NNMTXQ_VI);
10329	calculate_nqueues(&t4_nnmrxq_vi, nc, NNMRXQ_VI);
10330#endif
10331
10332	if (t4_tmr_idx < 0 || t4_tmr_idx >= SGE_NTIMERS)
10333		t4_tmr_idx = TMR_IDX;
10334
10335	if (t4_pktc_idx < -1 || t4_pktc_idx >= SGE_NCOUNTERS)
10336		t4_pktc_idx = PKTC_IDX;
10337
10338	if (t4_qsize_txq < 128)
10339		t4_qsize_txq = 128;
10340
10341	if (t4_qsize_rxq < 128)
10342		t4_qsize_rxq = 128;
10343	while (t4_qsize_rxq & 7)
10344		t4_qsize_rxq++;
10345
10346	t4_intr_types &= INTR_MSIX | INTR_MSI | INTR_INTX;
10347
10348	/*
10349	 * Number of VIs to create per-port.  The first VI is the "main" regular
10350	 * VI for the port.  The rest are additional virtual interfaces on the
10351	 * same physical port.  Note that the main VI does not have native
10352	 * netmap support but the extra VIs do.
10353	 *
10354	 * Limit the number of VIs per port to the number of available
10355	 * MAC addresses per port.
10356	 */
10357	if (t4_num_vis < 1)
10358		t4_num_vis = 1;
10359	if (t4_num_vis > nitems(vi_mac_funcs)) {
10360		t4_num_vis = nitems(vi_mac_funcs);
10361		printf("cxgbe: number of VIs limited to %d\n", t4_num_vis);
10362	}
10363
10364	if (pcie_relaxed_ordering < 0 || pcie_relaxed_ordering > 2) {
10365		pcie_relaxed_ordering = 1;
10366#if defined(__i386__) || defined(__amd64__)
10367		if (cpu_vendor_id == CPU_VENDOR_INTEL)
10368			pcie_relaxed_ordering = 0;
10369#endif
10370	}
10371}
10372
10373#ifdef DDB
10374static void
10375t4_dump_tcb(struct adapter *sc, int tid)
10376{
10377	uint32_t base, i, j, off, pf, reg, save, tcb_addr, win_pos;
10378
10379	reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2);
10380	save = t4_read_reg(sc, reg);
10381	base = sc->memwin[2].mw_base;
10382
10383	/* Dump TCB for the tid */
10384	tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
10385	tcb_addr += tid * TCB_SIZE;
10386
10387	if (is_t4(sc)) {
10388		pf = 0;
10389		win_pos = tcb_addr & ~0xf;	/* start must be 16B aligned */
10390	} else {
10391		pf = V_PFNUM(sc->pf);
10392		win_pos = tcb_addr & ~0x7f;	/* start must be 128B aligned */
10393	}
10394	t4_write_reg(sc, reg, win_pos | pf);
10395	t4_read_reg(sc, reg);
10396
10397	off = tcb_addr - win_pos;
10398	for (i = 0; i < 4; i++) {
10399		uint32_t buf[8];
10400		for (j = 0; j < 8; j++, off += 4)
10401			buf[j] = htonl(t4_read_reg(sc, base + off));
10402
10403		db_printf("%08x %08x %08x %08x %08x %08x %08x %08x\n",
10404		    buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6],
10405		    buf[7]);
10406	}
10407
10408	t4_write_reg(sc, reg, save);
10409	t4_read_reg(sc, reg);
10410}
10411
10412static void
10413t4_dump_devlog(struct adapter *sc)
10414{
10415	struct devlog_params *dparams = &sc->params.devlog;
10416	struct fw_devlog_e e;
10417	int i, first, j, m, nentries, rc;
10418	uint64_t ftstamp = UINT64_MAX;
10419
10420	if (dparams->start == 0) {
10421		db_printf("devlog params not valid\n");
10422		return;
10423	}
10424
10425	nentries = dparams->size / sizeof(struct fw_devlog_e);
10426	m = fwmtype_to_hwmtype(dparams->memtype);
10427
10428	/* Find the first entry. */
10429	first = -1;
10430	for (i = 0; i < nentries && !db_pager_quit; i++) {
10431		rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e),
10432		    sizeof(e), (void *)&e);
10433		if (rc != 0)
10434			break;
10435
10436		if (e.timestamp == 0)
10437			break;
10438
10439		e.timestamp = be64toh(e.timestamp);
10440		if (e.timestamp < ftstamp) {
10441			ftstamp = e.timestamp;
10442			first = i;
10443		}
10444	}
10445
10446	if (first == -1)
10447		return;
10448
10449	i = first;
10450	do {
10451		rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e),
10452		    sizeof(e), (void *)&e);
10453		if (rc != 0)
10454			return;
10455
10456		if (e.timestamp == 0)
10457			return;
10458
10459		e.timestamp = be64toh(e.timestamp);
10460		e.seqno = be32toh(e.seqno);
10461		for (j = 0; j < 8; j++)
10462			e.params[j] = be32toh(e.params[j]);
10463
10464		db_printf("%10d  %15ju  %8s  %8s  ",
10465		    e.seqno, e.timestamp,
10466		    (e.level < nitems(devlog_level_strings) ?
10467			devlog_level_strings[e.level] : "UNKNOWN"),
10468		    (e.facility < nitems(devlog_facility_strings) ?
10469			devlog_facility_strings[e.facility] : "UNKNOWN"));
10470		db_printf(e.fmt, e.params[0], e.params[1], e.params[2],
10471		    e.params[3], e.params[4], e.params[5], e.params[6],
10472		    e.params[7]);
10473
10474		if (++i == nentries)
10475			i = 0;
10476	} while (i != first && !db_pager_quit);
10477}
10478
10479static struct command_table db_t4_table = LIST_HEAD_INITIALIZER(db_t4_table);
10480_DB_SET(_show, t4, NULL, db_show_table, 0, &db_t4_table);
10481
10482DB_FUNC(devlog, db_show_devlog, db_t4_table, CS_OWN, NULL)
10483{
10484	device_t dev;
10485	int t;
10486	bool valid;
10487
10488	valid = false;
10489	t = db_read_token();
10490	if (t == tIDENT) {
10491		dev = device_lookup_by_name(db_tok_string);
10492		valid = true;
10493	}
10494	db_skip_to_eol();
10495	if (!valid) {
10496		db_printf("usage: show t4 devlog <nexus>\n");
10497		return;
10498	}
10499
10500	if (dev == NULL) {
10501		db_printf("device not found\n");
10502		return;
10503	}
10504
10505	t4_dump_devlog(device_get_softc(dev));
10506}
10507
10508DB_FUNC(tcb, db_show_t4tcb, db_t4_table, CS_OWN, NULL)
10509{
10510	device_t dev;
10511	int radix, tid, t;
10512	bool valid;
10513
10514	valid = false;
10515	radix = db_radix;
10516	db_radix = 10;
10517	t = db_read_token();
10518	if (t == tIDENT) {
10519		dev = device_lookup_by_name(db_tok_string);
10520		t = db_read_token();
10521		if (t == tNUMBER) {
10522			tid = db_tok_number;
10523			valid = true;
10524		}
10525	}
10526	db_radix = radix;
10527	db_skip_to_eol();
10528	if (!valid) {
10529		db_printf("usage: show t4 tcb <nexus> <tid>\n");
10530		return;
10531	}
10532
10533	if (dev == NULL) {
10534		db_printf("device not found\n");
10535		return;
10536	}
10537	if (tid < 0) {
10538		db_printf("invalid tid\n");
10539		return;
10540	}
10541
10542	t4_dump_tcb(device_get_softc(dev), tid);
10543}
10544#endif
10545
10546/*
10547 * Borrowed from cesa_prep_aes_key().
10548 *
10549 * NB: The crypto engine wants the words in the decryption key in reverse
10550 * order.
10551 */
10552void
10553t4_aes_getdeckey(void *dec_key, const void *enc_key, unsigned int kbits)
10554{
10555	uint32_t ek[4 * (RIJNDAEL_MAXNR + 1)];
10556	uint32_t *dkey;
10557	int i;
10558
10559	rijndaelKeySetupEnc(ek, enc_key, kbits);
10560	dkey = dec_key;
10561	dkey += (kbits / 8) / 4;
10562
10563	switch (kbits) {
10564	case 128:
10565		for (i = 0; i < 4; i++)
10566			*--dkey = htobe32(ek[4 * 10 + i]);
10567		break;
10568	case 192:
10569		for (i = 0; i < 2; i++)
10570			*--dkey = htobe32(ek[4 * 11 + 2 + i]);
10571		for (i = 0; i < 4; i++)
10572			*--dkey = htobe32(ek[4 * 12 + i]);
10573		break;
10574	case 256:
10575		for (i = 0; i < 4; i++)
10576			*--dkey = htobe32(ek[4 * 13 + i]);
10577		for (i = 0; i < 4; i++)
10578			*--dkey = htobe32(ek[4 * 14 + i]);
10579		break;
10580	}
10581	MPASS(dkey == dec_key);
10582}
10583
10584static struct sx mlu;	/* mod load unload */
10585SX_SYSINIT(cxgbe_mlu, &mlu, "cxgbe mod load/unload");
10586
10587static int
10588mod_event(module_t mod, int cmd, void *arg)
10589{
10590	int rc = 0;
10591	static int loaded = 0;
10592
10593	switch (cmd) {
10594	case MOD_LOAD:
10595		sx_xlock(&mlu);
10596		if (loaded++ == 0) {
10597			t4_sge_modload();
10598			t4_register_shared_cpl_handler(CPL_SET_TCB_RPL,
10599			    t4_filter_rpl, CPL_COOKIE_FILTER);
10600			t4_register_shared_cpl_handler(CPL_L2T_WRITE_RPL,
10601			    do_l2t_write_rpl, CPL_COOKIE_FILTER);
10602			t4_register_shared_cpl_handler(CPL_ACT_OPEN_RPL,
10603			    t4_hashfilter_ao_rpl, CPL_COOKIE_HASHFILTER);
10604			t4_register_shared_cpl_handler(CPL_SET_TCB_RPL,
10605			    t4_hashfilter_tcb_rpl, CPL_COOKIE_HASHFILTER);
10606			t4_register_shared_cpl_handler(CPL_ABORT_RPL_RSS,
10607			    t4_del_hashfilter_rpl, CPL_COOKIE_HASHFILTER);
10608			t4_register_cpl_handler(CPL_TRACE_PKT, t4_trace_pkt);
10609			t4_register_cpl_handler(CPL_T5_TRACE_PKT, t5_trace_pkt);
10610			t4_register_cpl_handler(CPL_SMT_WRITE_RPL,
10611			    do_smt_write_rpl);
10612			sx_init(&t4_list_lock, "T4/T5 adapters");
10613			SLIST_INIT(&t4_list);
10614#ifdef TCP_OFFLOAD
10615			sx_init(&t4_uld_list_lock, "T4/T5 ULDs");
10616			SLIST_INIT(&t4_uld_list);
10617#endif
10618#ifdef INET6
10619			t4_clip_modload();
10620#endif
10621			t4_tracer_modload();
10622			tweak_tunables();
10623		}
10624		sx_xunlock(&mlu);
10625		break;
10626
10627	case MOD_UNLOAD:
10628		sx_xlock(&mlu);
10629		if (--loaded == 0) {
10630			int tries;
10631
10632			sx_slock(&t4_list_lock);
10633			if (!SLIST_EMPTY(&t4_list)) {
10634				rc = EBUSY;
10635				sx_sunlock(&t4_list_lock);
10636				goto done_unload;
10637			}
10638#ifdef TCP_OFFLOAD
10639			sx_slock(&t4_uld_list_lock);
10640			if (!SLIST_EMPTY(&t4_uld_list)) {
10641				rc = EBUSY;
10642				sx_sunlock(&t4_uld_list_lock);
10643				sx_sunlock(&t4_list_lock);
10644				goto done_unload;
10645			}
10646#endif
10647			tries = 0;
10648			while (tries++ < 5 && t4_sge_extfree_refs() != 0) {
10649				uprintf("%ju clusters with custom free routine "
10650				    "still is use.\n", t4_sge_extfree_refs());
10651				pause("t4unload", 2 * hz);
10652			}
10653#ifdef TCP_OFFLOAD
10654			sx_sunlock(&t4_uld_list_lock);
10655#endif
10656			sx_sunlock(&t4_list_lock);
10657
10658			if (t4_sge_extfree_refs() == 0) {
10659				t4_tracer_modunload();
10660#ifdef INET6
10661				t4_clip_modunload();
10662#endif
10663#ifdef TCP_OFFLOAD
10664				sx_destroy(&t4_uld_list_lock);
10665#endif
10666				sx_destroy(&t4_list_lock);
10667				t4_sge_modunload();
10668				loaded = 0;
10669			} else {
10670				rc = EBUSY;
10671				loaded++;	/* undo earlier decrement */
10672			}
10673		}
10674done_unload:
10675		sx_xunlock(&mlu);
10676		break;
10677	}
10678
10679	return (rc);
10680}
10681
10682static devclass_t t4_devclass, t5_devclass, t6_devclass;
10683static devclass_t cxgbe_devclass, cxl_devclass, cc_devclass;
10684static devclass_t vcxgbe_devclass, vcxl_devclass, vcc_devclass;
10685
10686DRIVER_MODULE(t4nex, pci, t4_driver, t4_devclass, mod_event, 0);
10687MODULE_VERSION(t4nex, 1);
10688MODULE_DEPEND(t4nex, firmware, 1, 1, 1);
10689#ifdef DEV_NETMAP
10690MODULE_DEPEND(t4nex, netmap, 1, 1, 1);
10691#endif /* DEV_NETMAP */
10692
10693DRIVER_MODULE(t5nex, pci, t5_driver, t5_devclass, mod_event, 0);
10694MODULE_VERSION(t5nex, 1);
10695MODULE_DEPEND(t5nex, firmware, 1, 1, 1);
10696#ifdef DEV_NETMAP
10697MODULE_DEPEND(t5nex, netmap, 1, 1, 1);
10698#endif /* DEV_NETMAP */
10699
10700DRIVER_MODULE(t6nex, pci, t6_driver, t6_devclass, mod_event, 0);
10701MODULE_VERSION(t6nex, 1);
10702MODULE_DEPEND(t6nex, firmware, 1, 1, 1);
10703#ifdef DEV_NETMAP
10704MODULE_DEPEND(t6nex, netmap, 1, 1, 1);
10705#endif /* DEV_NETMAP */
10706
10707DRIVER_MODULE(cxgbe, t4nex, cxgbe_driver, cxgbe_devclass, 0, 0);
10708MODULE_VERSION(cxgbe, 1);
10709
10710DRIVER_MODULE(cxl, t5nex, cxl_driver, cxl_devclass, 0, 0);
10711MODULE_VERSION(cxl, 1);
10712
10713DRIVER_MODULE(cc, t6nex, cc_driver, cc_devclass, 0, 0);
10714MODULE_VERSION(cc, 1);
10715
10716DRIVER_MODULE(vcxgbe, cxgbe, vcxgbe_driver, vcxgbe_devclass, 0, 0);
10717MODULE_VERSION(vcxgbe, 1);
10718
10719DRIVER_MODULE(vcxl, cxl, vcxl_driver, vcxl_devclass, 0, 0);
10720MODULE_VERSION(vcxl, 1);
10721
10722DRIVER_MODULE(vcc, cc, vcc_driver, vcc_devclass, 0, 0);
10723MODULE_VERSION(vcc, 1);
10724