1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2011 Chelsio Communications, Inc.
5 * All rights reserved.
6 * Written by: Navdeep Parhar <np@FreeBSD.org>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD$");
32
33#include "opt_ddb.h"
34#include "opt_inet.h"
35#include "opt_inet6.h"
36#include "opt_kern_tls.h"
37#include "opt_ratelimit.h"
38#include "opt_rss.h"
39
40#include <sys/param.h>
41#include <sys/conf.h>
42#include <sys/priv.h>
43#include <sys/kernel.h>
44#include <sys/bus.h>
45#include <sys/eventhandler.h>
46#include <sys/module.h>
47#include <sys/malloc.h>
48#include <sys/queue.h>
49#include <sys/taskqueue.h>
50#include <sys/pciio.h>
51#include <dev/pci/pcireg.h>
52#include <dev/pci/pcivar.h>
53#include <dev/pci/pci_private.h>
54#include <sys/firmware.h>
55#include <sys/sbuf.h>
56#include <sys/smp.h>
57#include <sys/socket.h>
58#include <sys/sockio.h>
59#include <sys/sysctl.h>
60#include <net/ethernet.h>
61#include <net/if.h>
62#include <net/if_types.h>
63#include <net/if_dl.h>
64#include <net/if_vlan_var.h>
65#ifdef RSS
66#include <net/rss_config.h>
67#endif
68#include <netinet/in.h>
69#include <netinet/ip.h>
70#ifdef KERN_TLS
71#include <netinet/tcp_seq.h>
72#endif
73#if defined(__i386__) || defined(__amd64__)
74#include <machine/md_var.h>
75#include <machine/cputypes.h>
76#include <vm/vm.h>
77#include <vm/pmap.h>
78#endif
79#ifdef DDB
80#include <ddb/ddb.h>
81#include <ddb/db_lex.h>
82#endif
83
84#include "common/common.h"
85#include "common/t4_msg.h"
86#include "common/t4_regs.h"
87#include "common/t4_regs_values.h"
88#include "cudbg/cudbg.h"
89#include "t4_clip.h"
90#include "t4_ioctl.h"
91#include "t4_l2t.h"
92#include "t4_mp_ring.h"
93#include "t4_if.h"
94#include "t4_smt.h"
95
96/* T4 bus driver interface */
97static int t4_probe(device_t);
98static int t4_attach(device_t);
99static int t4_detach(device_t);
100static int t4_child_location_str(device_t, device_t, char *, size_t);
101static int t4_ready(device_t);
102static int t4_read_port_device(device_t, int, device_t *);
103static device_method_t t4_methods[] = {
104	DEVMETHOD(device_probe,		t4_probe),
105	DEVMETHOD(device_attach,	t4_attach),
106	DEVMETHOD(device_detach,	t4_detach),
107
108	DEVMETHOD(bus_child_location_str, t4_child_location_str),
109
110	DEVMETHOD(t4_is_main_ready,	t4_ready),
111	DEVMETHOD(t4_read_port_device,	t4_read_port_device),
112
113	DEVMETHOD_END
114};
115static driver_t t4_driver = {
116	"t4nex",
117	t4_methods,
118	sizeof(struct adapter)
119};
120
121
122/* T4 port (cxgbe) interface */
123static int cxgbe_probe(device_t);
124static int cxgbe_attach(device_t);
125static int cxgbe_detach(device_t);
126device_method_t cxgbe_methods[] = {
127	DEVMETHOD(device_probe,		cxgbe_probe),
128	DEVMETHOD(device_attach,	cxgbe_attach),
129	DEVMETHOD(device_detach,	cxgbe_detach),
130	{ 0, 0 }
131};
132static driver_t cxgbe_driver = {
133	"cxgbe",
134	cxgbe_methods,
135	sizeof(struct port_info)
136};
137
138/* T4 VI (vcxgbe) interface */
139static int vcxgbe_probe(device_t);
140static int vcxgbe_attach(device_t);
141static int vcxgbe_detach(device_t);
142static device_method_t vcxgbe_methods[] = {
143	DEVMETHOD(device_probe,		vcxgbe_probe),
144	DEVMETHOD(device_attach,	vcxgbe_attach),
145	DEVMETHOD(device_detach,	vcxgbe_detach),
146	{ 0, 0 }
147};
148static driver_t vcxgbe_driver = {
149	"vcxgbe",
150	vcxgbe_methods,
151	sizeof(struct vi_info)
152};
153
154static d_ioctl_t t4_ioctl;
155
156static struct cdevsw t4_cdevsw = {
157       .d_version = D_VERSION,
158       .d_ioctl = t4_ioctl,
159       .d_name = "t4nex",
160};
161
162/* T5 bus driver interface */
163static int t5_probe(device_t);
164static device_method_t t5_methods[] = {
165	DEVMETHOD(device_probe,		t5_probe),
166	DEVMETHOD(device_attach,	t4_attach),
167	DEVMETHOD(device_detach,	t4_detach),
168
169	DEVMETHOD(bus_child_location_str, t4_child_location_str),
170
171	DEVMETHOD(t4_is_main_ready,	t4_ready),
172	DEVMETHOD(t4_read_port_device,	t4_read_port_device),
173
174	DEVMETHOD_END
175};
176static driver_t t5_driver = {
177	"t5nex",
178	t5_methods,
179	sizeof(struct adapter)
180};
181
182
183/* T5 port (cxl) interface */
184static driver_t cxl_driver = {
185	"cxl",
186	cxgbe_methods,
187	sizeof(struct port_info)
188};
189
190/* T5 VI (vcxl) interface */
191static driver_t vcxl_driver = {
192	"vcxl",
193	vcxgbe_methods,
194	sizeof(struct vi_info)
195};
196
197/* T6 bus driver interface */
198static int t6_probe(device_t);
199static device_method_t t6_methods[] = {
200	DEVMETHOD(device_probe,		t6_probe),
201	DEVMETHOD(device_attach,	t4_attach),
202	DEVMETHOD(device_detach,	t4_detach),
203
204	DEVMETHOD(bus_child_location_str, t4_child_location_str),
205
206	DEVMETHOD(t4_is_main_ready,	t4_ready),
207	DEVMETHOD(t4_read_port_device,	t4_read_port_device),
208
209	DEVMETHOD_END
210};
211static driver_t t6_driver = {
212	"t6nex",
213	t6_methods,
214	sizeof(struct adapter)
215};
216
217
218/* T6 port (cc) interface */
219static driver_t cc_driver = {
220	"cc",
221	cxgbe_methods,
222	sizeof(struct port_info)
223};
224
225/* T6 VI (vcc) interface */
226static driver_t vcc_driver = {
227	"vcc",
228	vcxgbe_methods,
229	sizeof(struct vi_info)
230};
231
232/* ifnet interface */
233static void cxgbe_init(void *);
234static int cxgbe_ioctl(struct ifnet *, unsigned long, caddr_t);
235static int cxgbe_transmit(struct ifnet *, struct mbuf *);
236static void cxgbe_qflush(struct ifnet *);
237#if defined(KERN_TLS) || defined(RATELIMIT)
238static int cxgbe_snd_tag_alloc(struct ifnet *, union if_snd_tag_alloc_params *,
239    struct m_snd_tag **);
240static int cxgbe_snd_tag_modify(struct m_snd_tag *,
241    union if_snd_tag_modify_params *);
242static int cxgbe_snd_tag_query(struct m_snd_tag *,
243    union if_snd_tag_query_params *);
244static void cxgbe_snd_tag_free(struct m_snd_tag *);
245#endif
246
247MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4/T5 Ethernet driver and services");
248
249/*
250 * Correct lock order when you need to acquire multiple locks is t4_list_lock,
251 * then ADAPTER_LOCK, then t4_uld_list_lock.
252 */
253static struct sx t4_list_lock;
254SLIST_HEAD(, adapter) t4_list;
255#ifdef TCP_OFFLOAD
256static struct sx t4_uld_list_lock;
257SLIST_HEAD(, uld_info) t4_uld_list;
258#endif
259
260/*
261 * Tunables.  See tweak_tunables() too.
262 *
263 * Each tunable is set to a default value here if it's known at compile-time.
264 * Otherwise it is set to -n as an indication to tweak_tunables() that it should
265 * provide a reasonable default (upto n) when the driver is loaded.
266 *
267 * Tunables applicable to both T4 and T5 are under hw.cxgbe.  Those specific to
268 * T5 are under hw.cxl.
269 */
270SYSCTL_NODE(_hw, OID_AUTO, cxgbe, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
271    "cxgbe(4) parameters");
272SYSCTL_NODE(_hw, OID_AUTO, cxl, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
273    "cxgbe(4) T5+ parameters");
274SYSCTL_NODE(_hw_cxgbe, OID_AUTO, toe, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
275    "cxgbe(4) TOE parameters");
276
277/*
278 * Number of queues for tx and rx, NIC and offload.
279 */
280#define NTXQ 16
281int t4_ntxq = -NTXQ;
282SYSCTL_INT(_hw_cxgbe, OID_AUTO, ntxq, CTLFLAG_RDTUN, &t4_ntxq, 0,
283    "Number of TX queues per port");
284TUNABLE_INT("hw.cxgbe.ntxq10g", &t4_ntxq);	/* Old name, undocumented */
285
286#define NRXQ 8
287int t4_nrxq = -NRXQ;
288SYSCTL_INT(_hw_cxgbe, OID_AUTO, nrxq, CTLFLAG_RDTUN, &t4_nrxq, 0,
289    "Number of RX queues per port");
290TUNABLE_INT("hw.cxgbe.nrxq10g", &t4_nrxq);	/* Old name, undocumented */
291
292#define NTXQ_VI 1
293static int t4_ntxq_vi = -NTXQ_VI;
294SYSCTL_INT(_hw_cxgbe, OID_AUTO, ntxq_vi, CTLFLAG_RDTUN, &t4_ntxq_vi, 0,
295    "Number of TX queues per VI");
296
297#define NRXQ_VI 1
298static int t4_nrxq_vi = -NRXQ_VI;
299SYSCTL_INT(_hw_cxgbe, OID_AUTO, nrxq_vi, CTLFLAG_RDTUN, &t4_nrxq_vi, 0,
300    "Number of RX queues per VI");
301
302static int t4_rsrv_noflowq = 0;
303SYSCTL_INT(_hw_cxgbe, OID_AUTO, rsrv_noflowq, CTLFLAG_RDTUN, &t4_rsrv_noflowq,
304    0, "Reserve TX queue 0 of each VI for non-flowid packets");
305
306#if defined(TCP_OFFLOAD) || defined(RATELIMIT)
307#define NOFLDTXQ 8
308static int t4_nofldtxq = -NOFLDTXQ;
309SYSCTL_INT(_hw_cxgbe, OID_AUTO, nofldtxq, CTLFLAG_RDTUN, &t4_nofldtxq, 0,
310    "Number of offload TX queues per port");
311
312#define NOFLDRXQ 2
313static int t4_nofldrxq = -NOFLDRXQ;
314SYSCTL_INT(_hw_cxgbe, OID_AUTO, nofldrxq, CTLFLAG_RDTUN, &t4_nofldrxq, 0,
315    "Number of offload RX queues per port");
316
317#define NOFLDTXQ_VI 1
318static int t4_nofldtxq_vi = -NOFLDTXQ_VI;
319SYSCTL_INT(_hw_cxgbe, OID_AUTO, nofldtxq_vi, CTLFLAG_RDTUN, &t4_nofldtxq_vi, 0,
320    "Number of offload TX queues per VI");
321
322#define NOFLDRXQ_VI 1
323static int t4_nofldrxq_vi = -NOFLDRXQ_VI;
324SYSCTL_INT(_hw_cxgbe, OID_AUTO, nofldrxq_vi, CTLFLAG_RDTUN, &t4_nofldrxq_vi, 0,
325    "Number of offload RX queues per VI");
326
327#define TMR_IDX_OFLD 1
328int t4_tmr_idx_ofld = TMR_IDX_OFLD;
329SYSCTL_INT(_hw_cxgbe, OID_AUTO, holdoff_timer_idx_ofld, CTLFLAG_RDTUN,
330    &t4_tmr_idx_ofld, 0, "Holdoff timer index for offload queues");
331
332#define PKTC_IDX_OFLD (-1)
333int t4_pktc_idx_ofld = PKTC_IDX_OFLD;
334SYSCTL_INT(_hw_cxgbe, OID_AUTO, holdoff_pktc_idx_ofld, CTLFLAG_RDTUN,
335    &t4_pktc_idx_ofld, 0, "holdoff packet counter index for offload queues");
336
337/* 0 means chip/fw default, non-zero number is value in microseconds */
338static u_long t4_toe_keepalive_idle = 0;
339SYSCTL_ULONG(_hw_cxgbe_toe, OID_AUTO, keepalive_idle, CTLFLAG_RDTUN,
340    &t4_toe_keepalive_idle, 0, "TOE keepalive idle timer (us)");
341
342/* 0 means chip/fw default, non-zero number is value in microseconds */
343static u_long t4_toe_keepalive_interval = 0;
344SYSCTL_ULONG(_hw_cxgbe_toe, OID_AUTO, keepalive_interval, CTLFLAG_RDTUN,
345    &t4_toe_keepalive_interval, 0, "TOE keepalive interval timer (us)");
346
347/* 0 means chip/fw default, non-zero number is # of keepalives before abort */
348static int t4_toe_keepalive_count = 0;
349SYSCTL_INT(_hw_cxgbe_toe, OID_AUTO, keepalive_count, CTLFLAG_RDTUN,
350    &t4_toe_keepalive_count, 0, "Number of TOE keepalive probes before abort");
351
352/* 0 means chip/fw default, non-zero number is value in microseconds */
353static u_long t4_toe_rexmt_min = 0;
354SYSCTL_ULONG(_hw_cxgbe_toe, OID_AUTO, rexmt_min, CTLFLAG_RDTUN,
355    &t4_toe_rexmt_min, 0, "Minimum TOE retransmit interval (us)");
356
357/* 0 means chip/fw default, non-zero number is value in microseconds */
358static u_long t4_toe_rexmt_max = 0;
359SYSCTL_ULONG(_hw_cxgbe_toe, OID_AUTO, rexmt_max, CTLFLAG_RDTUN,
360    &t4_toe_rexmt_max, 0, "Maximum TOE retransmit interval (us)");
361
362/* 0 means chip/fw default, non-zero number is # of rexmt before abort */
363static int t4_toe_rexmt_count = 0;
364SYSCTL_INT(_hw_cxgbe_toe, OID_AUTO, rexmt_count, CTLFLAG_RDTUN,
365    &t4_toe_rexmt_count, 0, "Number of TOE retransmissions before abort");
366
367/* -1 means chip/fw default, other values are raw backoff values to use */
368static int t4_toe_rexmt_backoff[16] = {
369	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
370};
371SYSCTL_NODE(_hw_cxgbe_toe, OID_AUTO, rexmt_backoff,
372    CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
373    "cxgbe(4) TOE retransmit backoff values");
374SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 0, CTLFLAG_RDTUN,
375    &t4_toe_rexmt_backoff[0], 0, "");
376SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 1, CTLFLAG_RDTUN,
377    &t4_toe_rexmt_backoff[1], 0, "");
378SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 2, CTLFLAG_RDTUN,
379    &t4_toe_rexmt_backoff[2], 0, "");
380SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 3, CTLFLAG_RDTUN,
381    &t4_toe_rexmt_backoff[3], 0, "");
382SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 4, CTLFLAG_RDTUN,
383    &t4_toe_rexmt_backoff[4], 0, "");
384SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 5, CTLFLAG_RDTUN,
385    &t4_toe_rexmt_backoff[5], 0, "");
386SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 6, CTLFLAG_RDTUN,
387    &t4_toe_rexmt_backoff[6], 0, "");
388SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 7, CTLFLAG_RDTUN,
389    &t4_toe_rexmt_backoff[7], 0, "");
390SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 8, CTLFLAG_RDTUN,
391    &t4_toe_rexmt_backoff[8], 0, "");
392SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 9, CTLFLAG_RDTUN,
393    &t4_toe_rexmt_backoff[9], 0, "");
394SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 10, CTLFLAG_RDTUN,
395    &t4_toe_rexmt_backoff[10], 0, "");
396SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 11, CTLFLAG_RDTUN,
397    &t4_toe_rexmt_backoff[11], 0, "");
398SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 12, CTLFLAG_RDTUN,
399    &t4_toe_rexmt_backoff[12], 0, "");
400SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 13, CTLFLAG_RDTUN,
401    &t4_toe_rexmt_backoff[13], 0, "");
402SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 14, CTLFLAG_RDTUN,
403    &t4_toe_rexmt_backoff[14], 0, "");
404SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 15, CTLFLAG_RDTUN,
405    &t4_toe_rexmt_backoff[15], 0, "");
406
407static int t4_toe_tls_rx_timeout = 5;
408SYSCTL_INT(_hw_cxgbe_toe, OID_AUTO, tls_rx_timeout, CTLFLAG_RDTUN,
409    &t4_toe_tls_rx_timeout, 0,
410    "Timeout in seconds to downgrade TLS sockets to plain TOE");
411#endif
412
413#ifdef DEV_NETMAP
414#define NN_MAIN_VI	(1 << 0)	/* Native netmap on the main VI */
415#define NN_EXTRA_VI	(1 << 1)	/* Native netmap on the extra VI(s) */
416static int t4_native_netmap = NN_EXTRA_VI;
417SYSCTL_INT(_hw_cxgbe, OID_AUTO, native_netmap, CTLFLAG_RDTUN, &t4_native_netmap,
418    0, "Native netmap support.  bit 0 = main VI, bit 1 = extra VIs");
419
420#define NNMTXQ 8
421static int t4_nnmtxq = -NNMTXQ;
422SYSCTL_INT(_hw_cxgbe, OID_AUTO, nnmtxq, CTLFLAG_RDTUN, &t4_nnmtxq, 0,
423    "Number of netmap TX queues");
424
425#define NNMRXQ 8
426static int t4_nnmrxq = -NNMRXQ;
427SYSCTL_INT(_hw_cxgbe, OID_AUTO, nnmrxq, CTLFLAG_RDTUN, &t4_nnmrxq, 0,
428    "Number of netmap RX queues");
429
430#define NNMTXQ_VI 2
431static int t4_nnmtxq_vi = -NNMTXQ_VI;
432SYSCTL_INT(_hw_cxgbe, OID_AUTO, nnmtxq_vi, CTLFLAG_RDTUN, &t4_nnmtxq_vi, 0,
433    "Number of netmap TX queues per VI");
434
435#define NNMRXQ_VI 2
436static int t4_nnmrxq_vi = -NNMRXQ_VI;
437SYSCTL_INT(_hw_cxgbe, OID_AUTO, nnmrxq_vi, CTLFLAG_RDTUN, &t4_nnmrxq_vi, 0,
438    "Number of netmap RX queues per VI");
439#endif
440
441/*
442 * Holdoff parameters for ports.
443 */
444#define TMR_IDX 1
445int t4_tmr_idx = TMR_IDX;
446SYSCTL_INT(_hw_cxgbe, OID_AUTO, holdoff_timer_idx, CTLFLAG_RDTUN, &t4_tmr_idx,
447    0, "Holdoff timer index");
448TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &t4_tmr_idx);	/* Old name */
449
450#define PKTC_IDX (-1)
451int t4_pktc_idx = PKTC_IDX;
452SYSCTL_INT(_hw_cxgbe, OID_AUTO, holdoff_pktc_idx, CTLFLAG_RDTUN, &t4_pktc_idx,
453    0, "Holdoff packet counter index");
454TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &t4_pktc_idx);	/* Old name */
455
456/*
457 * Size (# of entries) of each tx and rx queue.
458 */
459unsigned int t4_qsize_txq = TX_EQ_QSIZE;
460SYSCTL_INT(_hw_cxgbe, OID_AUTO, qsize_txq, CTLFLAG_RDTUN, &t4_qsize_txq, 0,
461    "Number of descriptors in each TX queue");
462
463unsigned int t4_qsize_rxq = RX_IQ_QSIZE;
464SYSCTL_INT(_hw_cxgbe, OID_AUTO, qsize_rxq, CTLFLAG_RDTUN, &t4_qsize_rxq, 0,
465    "Number of descriptors in each RX queue");
466
467/*
468 * Interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively).
469 */
470int t4_intr_types = INTR_MSIX | INTR_MSI | INTR_INTX;
471SYSCTL_INT(_hw_cxgbe, OID_AUTO, interrupt_types, CTLFLAG_RDTUN, &t4_intr_types,
472    0, "Interrupt types allowed (bit 0 = INTx, 1 = MSI, 2 = MSI-X)");
473
474/*
475 * Configuration file.  All the _CF names here are special.
476 */
477#define DEFAULT_CF	"default"
478#define BUILTIN_CF	"built-in"
479#define FLASH_CF	"flash"
480#define UWIRE_CF	"uwire"
481#define FPGA_CF		"fpga"
482static char t4_cfg_file[32] = DEFAULT_CF;
483SYSCTL_STRING(_hw_cxgbe, OID_AUTO, config_file, CTLFLAG_RDTUN, t4_cfg_file,
484    sizeof(t4_cfg_file), "Firmware configuration file");
485
486/*
487 * PAUSE settings (bit 0, 1, 2 = rx_pause, tx_pause, pause_autoneg respectively).
488 * rx_pause = 1 to heed incoming PAUSE frames, 0 to ignore them.
489 * tx_pause = 1 to emit PAUSE frames when the rx FIFO reaches its high water
490 *            mark or when signalled to do so, 0 to never emit PAUSE.
491 * pause_autoneg = 1 means PAUSE will be negotiated if possible and the
492 *                 negotiated settings will override rx_pause/tx_pause.
493 *                 Otherwise rx_pause/tx_pause are applied forcibly.
494 */
495static int t4_pause_settings = PAUSE_RX | PAUSE_TX | PAUSE_AUTONEG;
496SYSCTL_INT(_hw_cxgbe, OID_AUTO, pause_settings, CTLFLAG_RDTUN,
497    &t4_pause_settings, 0,
498    "PAUSE settings (bit 0 = rx_pause, 1 = tx_pause, 2 = pause_autoneg)");
499
500/*
501 * Forward Error Correction settings (bit 0, 1 = RS, BASER respectively).
502 * -1 to run with the firmware default.  Same as FEC_AUTO (bit 5)
503 *  0 to disable FEC.
504 */
505static int t4_fec = -1;
506SYSCTL_INT(_hw_cxgbe, OID_AUTO, fec, CTLFLAG_RDTUN, &t4_fec, 0,
507    "Forward Error Correction (bit 0 = RS, bit 1 = BASER_RS)");
508
509/*
510 * Link autonegotiation.
511 * -1 to run with the firmware default.
512 *  0 to disable.
513 *  1 to enable.
514 */
515static int t4_autoneg = -1;
516SYSCTL_INT(_hw_cxgbe, OID_AUTO, autoneg, CTLFLAG_RDTUN, &t4_autoneg, 0,
517    "Link autonegotiation");
518
519/*
520 * Firmware auto-install by driver during attach (0, 1, 2 = prohibited, allowed,
521 * encouraged respectively).  '-n' is the same as 'n' except the firmware
522 * version used in the checks is read from the firmware bundled with the driver.
523 */
524static int t4_fw_install = 1;
525SYSCTL_INT(_hw_cxgbe, OID_AUTO, fw_install, CTLFLAG_RDTUN, &t4_fw_install, 0,
526    "Firmware auto-install (0 = prohibited, 1 = allowed, 2 = encouraged)");
527
528/*
529 * ASIC features that will be used.  Disable the ones you don't want so that the
530 * chip resources aren't wasted on features that will not be used.
531 */
532static int t4_nbmcaps_allowed = 0;
533SYSCTL_INT(_hw_cxgbe, OID_AUTO, nbmcaps_allowed, CTLFLAG_RDTUN,
534    &t4_nbmcaps_allowed, 0, "Default NBM capabilities");
535
536static int t4_linkcaps_allowed = 0;	/* No DCBX, PPP, etc. by default */
537SYSCTL_INT(_hw_cxgbe, OID_AUTO, linkcaps_allowed, CTLFLAG_RDTUN,
538    &t4_linkcaps_allowed, 0, "Default link capabilities");
539
540static int t4_switchcaps_allowed = FW_CAPS_CONFIG_SWITCH_INGRESS |
541    FW_CAPS_CONFIG_SWITCH_EGRESS;
542SYSCTL_INT(_hw_cxgbe, OID_AUTO, switchcaps_allowed, CTLFLAG_RDTUN,
543    &t4_switchcaps_allowed, 0, "Default switch capabilities");
544
545#ifdef RATELIMIT
546static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC |
547	FW_CAPS_CONFIG_NIC_HASHFILTER | FW_CAPS_CONFIG_NIC_ETHOFLD;
548#else
549static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC |
550	FW_CAPS_CONFIG_NIC_HASHFILTER;
551#endif
552SYSCTL_INT(_hw_cxgbe, OID_AUTO, niccaps_allowed, CTLFLAG_RDTUN,
553    &t4_niccaps_allowed, 0, "Default NIC capabilities");
554
555static int t4_toecaps_allowed = -1;
556SYSCTL_INT(_hw_cxgbe, OID_AUTO, toecaps_allowed, CTLFLAG_RDTUN,
557    &t4_toecaps_allowed, 0, "Default TCP offload capabilities");
558
559static int t4_rdmacaps_allowed = -1;
560SYSCTL_INT(_hw_cxgbe, OID_AUTO, rdmacaps_allowed, CTLFLAG_RDTUN,
561    &t4_rdmacaps_allowed, 0, "Default RDMA capabilities");
562
563static int t4_cryptocaps_allowed = -1;
564SYSCTL_INT(_hw_cxgbe, OID_AUTO, cryptocaps_allowed, CTLFLAG_RDTUN,
565    &t4_cryptocaps_allowed, 0, "Default crypto capabilities");
566
567static int t4_iscsicaps_allowed = -1;
568SYSCTL_INT(_hw_cxgbe, OID_AUTO, iscsicaps_allowed, CTLFLAG_RDTUN,
569    &t4_iscsicaps_allowed, 0, "Default iSCSI capabilities");
570
571static int t4_fcoecaps_allowed = 0;
572SYSCTL_INT(_hw_cxgbe, OID_AUTO, fcoecaps_allowed, CTLFLAG_RDTUN,
573    &t4_fcoecaps_allowed, 0, "Default FCoE capabilities");
574
575static int t5_write_combine = 0;
576SYSCTL_INT(_hw_cxl, OID_AUTO, write_combine, CTLFLAG_RDTUN, &t5_write_combine,
577    0, "Use WC instead of UC for BAR2");
578
579static int t4_num_vis = 1;
580SYSCTL_INT(_hw_cxgbe, OID_AUTO, num_vis, CTLFLAG_RDTUN, &t4_num_vis, 0,
581    "Number of VIs per port");
582
583/*
584 * PCIe Relaxed Ordering.
585 * -1: driver should figure out a good value.
586 * 0: disable RO.
587 * 1: enable RO.
588 * 2: leave RO alone.
589 */
590static int pcie_relaxed_ordering = -1;
591SYSCTL_INT(_hw_cxgbe, OID_AUTO, pcie_relaxed_ordering, CTLFLAG_RDTUN,
592    &pcie_relaxed_ordering, 0,
593    "PCIe Relaxed Ordering: 0 = disable, 1 = enable, 2 = leave alone");
594
595static int t4_panic_on_fatal_err = 0;
596SYSCTL_INT(_hw_cxgbe, OID_AUTO, panic_on_fatal_err, CTLFLAG_RWTUN,
597    &t4_panic_on_fatal_err, 0, "panic on fatal errors");
598
599static int t4_tx_vm_wr = 0;
600SYSCTL_INT(_hw_cxgbe, OID_AUTO, tx_vm_wr, CTLFLAG_RWTUN, &t4_tx_vm_wr, 0,
601    "Use VM work requests to transmit packets.");
602
603/*
604 * Set to non-zero to enable the attack filter.  A packet that matches any of
605 * these conditions will get dropped on ingress:
606 * 1) IP && source address == destination address.
607 * 2) TCP/IP && source address is not a unicast address.
608 * 3) TCP/IP && destination address is not a unicast address.
609 * 4) IP && source address is loopback (127.x.y.z).
610 * 5) IP && destination address is loopback (127.x.y.z).
611 * 6) IPv6 && source address == destination address.
612 * 7) IPv6 && source address is not a unicast address.
613 * 8) IPv6 && source address is loopback (::1/128).
614 * 9) IPv6 && destination address is loopback (::1/128).
615 * 10) IPv6 && source address is unspecified (::/128).
616 * 11) IPv6 && destination address is unspecified (::/128).
617 * 12) TCP/IPv6 && source address is multicast (ff00::/8).
618 * 13) TCP/IPv6 && destination address is multicast (ff00::/8).
619 */
620static int t4_attack_filter = 0;
621SYSCTL_INT(_hw_cxgbe, OID_AUTO, attack_filter, CTLFLAG_RDTUN,
622    &t4_attack_filter, 0, "Drop suspicious traffic");
623
624static int t4_drop_ip_fragments = 0;
625SYSCTL_INT(_hw_cxgbe, OID_AUTO, drop_ip_fragments, CTLFLAG_RDTUN,
626    &t4_drop_ip_fragments, 0, "Drop IP fragments");
627
628static int t4_drop_pkts_with_l2_errors = 1;
629SYSCTL_INT(_hw_cxgbe, OID_AUTO, drop_pkts_with_l2_errors, CTLFLAG_RDTUN,
630    &t4_drop_pkts_with_l2_errors, 0,
631    "Drop all frames with Layer 2 length or checksum errors");
632
633static int t4_drop_pkts_with_l3_errors = 0;
634SYSCTL_INT(_hw_cxgbe, OID_AUTO, drop_pkts_with_l3_errors, CTLFLAG_RDTUN,
635    &t4_drop_pkts_with_l3_errors, 0,
636    "Drop all frames with IP version, length, or checksum errors");
637
638static int t4_drop_pkts_with_l4_errors = 0;
639SYSCTL_INT(_hw_cxgbe, OID_AUTO, drop_pkts_with_l4_errors, CTLFLAG_RDTUN,
640    &t4_drop_pkts_with_l4_errors, 0,
641    "Drop all frames with Layer 4 length, checksum, or other errors");
642
643#ifdef TCP_OFFLOAD
644/*
645 * TOE tunables.
646 */
647static int t4_cop_managed_offloading = 0;
648SYSCTL_INT(_hw_cxgbe, OID_AUTO, cop_managed_offloading, CTLFLAG_RDTUN,
649    &t4_cop_managed_offloading, 0,
650    "COP (Connection Offload Policy) controls all TOE offload");
651#endif
652
653#ifdef KERN_TLS
654/*
655 * This enables KERN_TLS for all adapters if set.
656 */
657static int t4_kern_tls = 0;
658SYSCTL_INT(_hw_cxgbe, OID_AUTO, kern_tls, CTLFLAG_RDTUN, &t4_kern_tls, 0,
659    "Enable KERN_TLS mode for all supported adapters");
660
661SYSCTL_NODE(_hw_cxgbe, OID_AUTO, tls, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
662    "cxgbe(4) KERN_TLS parameters");
663
664static int t4_tls_inline_keys = 0;
665SYSCTL_INT(_hw_cxgbe_tls, OID_AUTO, inline_keys, CTLFLAG_RDTUN,
666    &t4_tls_inline_keys, 0,
667    "Always pass TLS keys in work requests (1) or attempt to store TLS keys "
668    "in card memory.");
669
670static int t4_tls_combo_wrs = 0;
671SYSCTL_INT(_hw_cxgbe_tls, OID_AUTO, combo_wrs, CTLFLAG_RDTUN, &t4_tls_combo_wrs,
672    0, "Attempt to combine TCB field updates with TLS record work requests.");
673#endif
674
675/* Functions used by VIs to obtain unique MAC addresses for each VI. */
676static int vi_mac_funcs[] = {
677	FW_VI_FUNC_ETH,
678	FW_VI_FUNC_OFLD,
679	FW_VI_FUNC_IWARP,
680	FW_VI_FUNC_OPENISCSI,
681	FW_VI_FUNC_OPENFCOE,
682	FW_VI_FUNC_FOISCSI,
683	FW_VI_FUNC_FOFCOE,
684};
685
686struct intrs_and_queues {
687	uint16_t intr_type;	/* INTx, MSI, or MSI-X */
688	uint16_t num_vis;	/* number of VIs for each port */
689	uint16_t nirq;		/* Total # of vectors */
690	uint16_t ntxq;		/* # of NIC txq's for each port */
691	uint16_t nrxq;		/* # of NIC rxq's for each port */
692	uint16_t nofldtxq;	/* # of TOE/ETHOFLD txq's for each port */
693	uint16_t nofldrxq;	/* # of TOE rxq's for each port */
694	uint16_t nnmtxq;	/* # of netmap txq's */
695	uint16_t nnmrxq;	/* # of netmap rxq's */
696
697	/* The vcxgbe/vcxl interfaces use these and not the ones above. */
698	uint16_t ntxq_vi;	/* # of NIC txq's */
699	uint16_t nrxq_vi;	/* # of NIC rxq's */
700	uint16_t nofldtxq_vi;	/* # of TOE txq's */
701	uint16_t nofldrxq_vi;	/* # of TOE rxq's */
702	uint16_t nnmtxq_vi;	/* # of netmap txq's */
703	uint16_t nnmrxq_vi;	/* # of netmap rxq's */
704};
705
706static void setup_memwin(struct adapter *);
707static void position_memwin(struct adapter *, int, uint32_t);
708static int validate_mem_range(struct adapter *, uint32_t, uint32_t);
709static int fwmtype_to_hwmtype(int);
710static int validate_mt_off_len(struct adapter *, int, uint32_t, uint32_t,
711    uint32_t *);
712static int fixup_devlog_params(struct adapter *);
713static int cfg_itype_and_nqueues(struct adapter *, struct intrs_and_queues *);
714static int contact_firmware(struct adapter *);
715static int partition_resources(struct adapter *);
716static int get_params__pre_init(struct adapter *);
717static int set_params__pre_init(struct adapter *);
718static int get_params__post_init(struct adapter *);
719static int set_params__post_init(struct adapter *);
720static void t4_set_desc(struct adapter *);
721static bool fixed_ifmedia(struct port_info *);
722static void build_medialist(struct port_info *);
723static void init_link_config(struct port_info *);
724static int fixup_link_config(struct port_info *);
725static int apply_link_config(struct port_info *);
726static int cxgbe_init_synchronized(struct vi_info *);
727static int cxgbe_uninit_synchronized(struct vi_info *);
728static void quiesce_txq(struct adapter *, struct sge_txq *);
729static void quiesce_wrq(struct adapter *, struct sge_wrq *);
730static void quiesce_iq(struct adapter *, struct sge_iq *);
731static void quiesce_fl(struct adapter *, struct sge_fl *);
732static int t4_alloc_irq(struct adapter *, struct irq *, int rid,
733    driver_intr_t *, void *, char *);
734static int t4_free_irq(struct adapter *, struct irq *);
735static void t4_init_atid_table(struct adapter *);
736static void t4_free_atid_table(struct adapter *);
737static void get_regs(struct adapter *, struct t4_regdump *, uint8_t *);
738static void vi_refresh_stats(struct vi_info *);
739static void cxgbe_refresh_stats(struct vi_info *);
740static void cxgbe_tick(void *);
741static void vi_tick(void *);
742static void cxgbe_sysctls(struct port_info *);
743static int sysctl_int_array(SYSCTL_HANDLER_ARGS);
744static int sysctl_bitfield_8b(SYSCTL_HANDLER_ARGS);
745static int sysctl_bitfield_16b(SYSCTL_HANDLER_ARGS);
746static int sysctl_btphy(SYSCTL_HANDLER_ARGS);
747static int sysctl_noflowq(SYSCTL_HANDLER_ARGS);
748static int sysctl_tx_vm_wr(SYSCTL_HANDLER_ARGS);
749static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS);
750static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS);
751static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS);
752static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS);
753static int sysctl_pause_settings(SYSCTL_HANDLER_ARGS);
754static int sysctl_fec(SYSCTL_HANDLER_ARGS);
755static int sysctl_module_fec(SYSCTL_HANDLER_ARGS);
756static int sysctl_autoneg(SYSCTL_HANDLER_ARGS);
757static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS);
758static int sysctl_temperature(SYSCTL_HANDLER_ARGS);
759static int sysctl_vdd(SYSCTL_HANDLER_ARGS);
760static int sysctl_reset_sensor(SYSCTL_HANDLER_ARGS);
761static int sysctl_loadavg(SYSCTL_HANDLER_ARGS);
762static int sysctl_cctrl(SYSCTL_HANDLER_ARGS);
763static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS);
764static int sysctl_cim_la(SYSCTL_HANDLER_ARGS);
765static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS);
766static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS);
767static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS);
768static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS);
769static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS);
770static int sysctl_tid_stats(SYSCTL_HANDLER_ARGS);
771static int sysctl_devlog(SYSCTL_HANDLER_ARGS);
772static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS);
773static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS);
774static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS);
775static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS);
776static int sysctl_meminfo(SYSCTL_HANDLER_ARGS);
777static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS);
778static int sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS);
779static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS);
780static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS);
781static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS);
782static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS);
783static int sysctl_tids(SYSCTL_HANDLER_ARGS);
784static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS);
785static int sysctl_tnl_stats(SYSCTL_HANDLER_ARGS);
786static int sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS);
787static int sysctl_tp_la(SYSCTL_HANDLER_ARGS);
788static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS);
789static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS);
790static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS);
791static int sysctl_cpus(SYSCTL_HANDLER_ARGS);
792#ifdef TCP_OFFLOAD
793static int sysctl_tls(SYSCTL_HANDLER_ARGS);
794static int sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS);
795static int sysctl_tls_rx_timeout(SYSCTL_HANDLER_ARGS);
796static int sysctl_tp_tick(SYSCTL_HANDLER_ARGS);
797static int sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS);
798static int sysctl_tp_timer(SYSCTL_HANDLER_ARGS);
799static int sysctl_tp_shift_cnt(SYSCTL_HANDLER_ARGS);
800static int sysctl_tp_backoff(SYSCTL_HANDLER_ARGS);
801static int sysctl_holdoff_tmr_idx_ofld(SYSCTL_HANDLER_ARGS);
802static int sysctl_holdoff_pktc_idx_ofld(SYSCTL_HANDLER_ARGS);
803#endif
804static int get_sge_context(struct adapter *, struct t4_sge_context *);
805static int load_fw(struct adapter *, struct t4_data *);
806static int load_cfg(struct adapter *, struct t4_data *);
807static int load_boot(struct adapter *, struct t4_bootrom *);
808static int load_bootcfg(struct adapter *, struct t4_data *);
809static int cudbg_dump(struct adapter *, struct t4_cudbg_dump *);
810static void free_offload_policy(struct t4_offload_policy *);
811static int set_offload_policy(struct adapter *, struct t4_offload_policy *);
812static int read_card_mem(struct adapter *, int, struct t4_mem_range *);
813static int read_i2c(struct adapter *, struct t4_i2c_data *);
814static int clear_stats(struct adapter *, u_int);
815#ifdef TCP_OFFLOAD
816static int toe_capability(struct vi_info *, bool);
817static void t4_async_event(void *, int);
818#endif
819#ifdef KERN_TLS
820static int ktls_capability(struct adapter *, bool);
821#endif
822static int mod_event(module_t, int, void *);
823static int notify_siblings(device_t, int);
824static uint64_t vi_get_counter(struct ifnet *, ift_counter);
825static uint64_t cxgbe_get_counter(struct ifnet *, ift_counter);
826static void enable_vxlan_rx(struct adapter *);
827
828struct {
829	uint16_t device;
830	char *desc;
831} t4_pciids[] = {
832	{0xa000, "Chelsio Terminator 4 FPGA"},
833	{0x4400, "Chelsio T440-dbg"},
834	{0x4401, "Chelsio T420-CR"},
835	{0x4402, "Chelsio T422-CR"},
836	{0x4403, "Chelsio T440-CR"},
837	{0x4404, "Chelsio T420-BCH"},
838	{0x4405, "Chelsio T440-BCH"},
839	{0x4406, "Chelsio T440-CH"},
840	{0x4407, "Chelsio T420-SO"},
841	{0x4408, "Chelsio T420-CX"},
842	{0x4409, "Chelsio T420-BT"},
843	{0x440a, "Chelsio T404-BT"},
844	{0x440e, "Chelsio T440-LP-CR"},
845}, t5_pciids[] = {
846	{0xb000, "Chelsio Terminator 5 FPGA"},
847	{0x5400, "Chelsio T580-dbg"},
848	{0x5401,  "Chelsio T520-CR"},		/* 2 x 10G */
849	{0x5402,  "Chelsio T522-CR"},		/* 2 x 10G, 2 X 1G */
850	{0x5403,  "Chelsio T540-CR"},		/* 4 x 10G */
851	{0x5407,  "Chelsio T520-SO"},		/* 2 x 10G, nomem */
852	{0x5409,  "Chelsio T520-BT"},		/* 2 x 10GBaseT */
853	{0x540a,  "Chelsio T504-BT"},		/* 4 x 1G */
854	{0x540d,  "Chelsio T580-CR"},		/* 2 x 40G */
855	{0x540e,  "Chelsio T540-LP-CR"},	/* 4 x 10G */
856	{0x5410,  "Chelsio T580-LP-CR"},	/* 2 x 40G */
857	{0x5411,  "Chelsio T520-LL-CR"},	/* 2 x 10G */
858	{0x5412,  "Chelsio T560-CR"},		/* 1 x 40G, 2 x 10G */
859	{0x5414,  "Chelsio T580-LP-SO-CR"},	/* 2 x 40G, nomem */
860	{0x5415,  "Chelsio T502-BT"},		/* 2 x 1G */
861	{0x5418,  "Chelsio T540-BT"},		/* 4 x 10GBaseT */
862	{0x5419,  "Chelsio T540-LP-BT"},	/* 4 x 10GBaseT */
863	{0x541a,  "Chelsio T540-SO-BT"},	/* 4 x 10GBaseT, nomem */
864	{0x541b,  "Chelsio T540-SO-CR"},	/* 4 x 10G, nomem */
865
866	/* Custom */
867	{0x5483, "Custom T540-CR"},
868	{0x5484, "Custom T540-BT"},
869}, t6_pciids[] = {
870	{0xc006, "Chelsio Terminator 6 FPGA"},	/* T6 PE10K6 FPGA (PF0) */
871	{0x6400, "Chelsio T6-DBG-25"},		/* 2 x 10/25G, debug */
872	{0x6401, "Chelsio T6225-CR"},		/* 2 x 10/25G */
873	{0x6402, "Chelsio T6225-SO-CR"},	/* 2 x 10/25G, nomem */
874	{0x6403, "Chelsio T6425-CR"},		/* 4 x 10/25G */
875	{0x6404, "Chelsio T6425-SO-CR"},	/* 4 x 10/25G, nomem */
876	{0x6405, "Chelsio T6225-OCP-SO"},	/* 2 x 10/25G, nomem */
877	{0x6406, "Chelsio T62100-OCP-SO"},	/* 2 x 40/50/100G, nomem */
878	{0x6407, "Chelsio T62100-LP-CR"},	/* 2 x 40/50/100G */
879	{0x6408, "Chelsio T62100-SO-CR"},	/* 2 x 40/50/100G, nomem */
880	{0x6409, "Chelsio T6210-BT"},		/* 2 x 10GBASE-T */
881	{0x640d, "Chelsio T62100-CR"},		/* 2 x 40/50/100G */
882	{0x6410, "Chelsio T6-DBG-100"},		/* 2 x 40/50/100G, debug */
883	{0x6411, "Chelsio T6225-LL-CR"},	/* 2 x 10/25G */
884	{0x6414, "Chelsio T61100-OCP-SO"},	/* 1 x 40/50/100G, nomem */
885	{0x6415, "Chelsio T6201-BT"},		/* 2 x 1000BASE-T */
886
887	/* Custom */
888	{0x6480, "Custom T6225-CR"},
889	{0x6481, "Custom T62100-CR"},
890	{0x6482, "Custom T6225-CR"},
891	{0x6483, "Custom T62100-CR"},
892	{0x6484, "Custom T64100-CR"},
893	{0x6485, "Custom T6240-SO"},
894	{0x6486, "Custom T6225-SO-CR"},
895	{0x6487, "Custom T6225-CR"},
896};
897
898#ifdef TCP_OFFLOAD
899/*
900 * service_iq_fl() has an iq and needs the fl.  Offset of fl from the iq should
901 * be exactly the same for both rxq and ofld_rxq.
902 */
903CTASSERT(offsetof(struct sge_ofld_rxq, iq) == offsetof(struct sge_rxq, iq));
904CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl));
905#endif
906CTASSERT(sizeof(struct cluster_metadata) <= CL_METADATA_SIZE);
907
908static int
909t4_probe(device_t dev)
910{
911	int i;
912	uint16_t v = pci_get_vendor(dev);
913	uint16_t d = pci_get_device(dev);
914	uint8_t f = pci_get_function(dev);
915
916	if (v != PCI_VENDOR_ID_CHELSIO)
917		return (ENXIO);
918
919	/* Attach only to PF0 of the FPGA */
920	if (d == 0xa000 && f != 0)
921		return (ENXIO);
922
923	for (i = 0; i < nitems(t4_pciids); i++) {
924		if (d == t4_pciids[i].device) {
925			device_set_desc(dev, t4_pciids[i].desc);
926			return (BUS_PROBE_DEFAULT);
927		}
928	}
929
930	return (ENXIO);
931}
932
933static int
934t5_probe(device_t dev)
935{
936	int i;
937	uint16_t v = pci_get_vendor(dev);
938	uint16_t d = pci_get_device(dev);
939	uint8_t f = pci_get_function(dev);
940
941	if (v != PCI_VENDOR_ID_CHELSIO)
942		return (ENXIO);
943
944	/* Attach only to PF0 of the FPGA */
945	if (d == 0xb000 && f != 0)
946		return (ENXIO);
947
948	for (i = 0; i < nitems(t5_pciids); i++) {
949		if (d == t5_pciids[i].device) {
950			device_set_desc(dev, t5_pciids[i].desc);
951			return (BUS_PROBE_DEFAULT);
952		}
953	}
954
955	return (ENXIO);
956}
957
958static int
959t6_probe(device_t dev)
960{
961	int i;
962	uint16_t v = pci_get_vendor(dev);
963	uint16_t d = pci_get_device(dev);
964
965	if (v != PCI_VENDOR_ID_CHELSIO)
966		return (ENXIO);
967
968	for (i = 0; i < nitems(t6_pciids); i++) {
969		if (d == t6_pciids[i].device) {
970			device_set_desc(dev, t6_pciids[i].desc);
971			return (BUS_PROBE_DEFAULT);
972		}
973	}
974
975	return (ENXIO);
976}
977
978static void
979t5_attribute_workaround(device_t dev)
980{
981	device_t root_port;
982	uint32_t v;
983
984	/*
985	 * The T5 chips do not properly echo the No Snoop and Relaxed
986	 * Ordering attributes when replying to a TLP from a Root
987	 * Port.  As a workaround, find the parent Root Port and
988	 * disable No Snoop and Relaxed Ordering.  Note that this
989	 * affects all devices under this root port.
990	 */
991	root_port = pci_find_pcie_root_port(dev);
992	if (root_port == NULL) {
993		device_printf(dev, "Unable to find parent root port\n");
994		return;
995	}
996
997	v = pcie_adjust_config(root_port, PCIER_DEVICE_CTL,
998	    PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE, 0, 2);
999	if ((v & (PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE)) !=
1000	    0)
1001		device_printf(dev, "Disabled No Snoop/Relaxed Ordering on %s\n",
1002		    device_get_nameunit(root_port));
1003}
1004
1005static const struct devnames devnames[] = {
1006	{
1007		.nexus_name = "t4nex",
1008		.ifnet_name = "cxgbe",
1009		.vi_ifnet_name = "vcxgbe",
1010		.pf03_drv_name = "t4iov",
1011		.vf_nexus_name = "t4vf",
1012		.vf_ifnet_name = "cxgbev"
1013	}, {
1014		.nexus_name = "t5nex",
1015		.ifnet_name = "cxl",
1016		.vi_ifnet_name = "vcxl",
1017		.pf03_drv_name = "t5iov",
1018		.vf_nexus_name = "t5vf",
1019		.vf_ifnet_name = "cxlv"
1020	}, {
1021		.nexus_name = "t6nex",
1022		.ifnet_name = "cc",
1023		.vi_ifnet_name = "vcc",
1024		.pf03_drv_name = "t6iov",
1025		.vf_nexus_name = "t6vf",
1026		.vf_ifnet_name = "ccv"
1027	}
1028};
1029
1030void
1031t4_init_devnames(struct adapter *sc)
1032{
1033	int id;
1034
1035	id = chip_id(sc);
1036	if (id >= CHELSIO_T4 && id - CHELSIO_T4 < nitems(devnames))
1037		sc->names = &devnames[id - CHELSIO_T4];
1038	else {
1039		device_printf(sc->dev, "chip id %d is not supported.\n", id);
1040		sc->names = NULL;
1041	}
1042}
1043
1044static int
1045t4_ifnet_unit(struct adapter *sc, struct port_info *pi)
1046{
1047	const char *parent, *name;
1048	long value;
1049	int line, unit;
1050
1051	line = 0;
1052	parent = device_get_nameunit(sc->dev);
1053	name = sc->names->ifnet_name;
1054	while (resource_find_dev(&line, name, &unit, "at", parent) == 0) {
1055		if (resource_long_value(name, unit, "port", &value) == 0 &&
1056		    value == pi->port_id)
1057			return (unit);
1058	}
1059	return (-1);
1060}
1061
1062static int
1063t4_attach(device_t dev)
1064{
1065	struct adapter *sc;
1066	int rc = 0, i, j, rqidx, tqidx, nports;
1067	struct make_dev_args mda;
1068	struct intrs_and_queues iaq;
1069	struct sge *s;
1070	uint32_t *buf;
1071#if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1072	int ofld_tqidx;
1073#endif
1074#ifdef TCP_OFFLOAD
1075	int ofld_rqidx;
1076#endif
1077#ifdef DEV_NETMAP
1078	int nm_rqidx, nm_tqidx;
1079#endif
1080	int num_vis;
1081
1082	sc = device_get_softc(dev);
1083	sc->dev = dev;
1084	TUNABLE_INT_FETCH("hw.cxgbe.dflags", &sc->debug_flags);
1085
1086	if ((pci_get_device(dev) & 0xff00) == 0x5400)
1087		t5_attribute_workaround(dev);
1088	pci_enable_busmaster(dev);
1089	if (pci_find_cap(dev, PCIY_EXPRESS, &i) == 0) {
1090		uint32_t v;
1091
1092		pci_set_max_read_req(dev, 4096);
1093		v = pci_read_config(dev, i + PCIER_DEVICE_CTL, 2);
1094		sc->params.pci.mps = 128 << ((v & PCIEM_CTL_MAX_PAYLOAD) >> 5);
1095		if (pcie_relaxed_ordering == 0 &&
1096		    (v & PCIEM_CTL_RELAXED_ORD_ENABLE) != 0) {
1097			v &= ~PCIEM_CTL_RELAXED_ORD_ENABLE;
1098			pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2);
1099		} else if (pcie_relaxed_ordering == 1 &&
1100		    (v & PCIEM_CTL_RELAXED_ORD_ENABLE) == 0) {
1101			v |= PCIEM_CTL_RELAXED_ORD_ENABLE;
1102			pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2);
1103		}
1104	}
1105
1106	sc->sge_gts_reg = MYPF_REG(A_SGE_PF_GTS);
1107	sc->sge_kdoorbell_reg = MYPF_REG(A_SGE_PF_KDOORBELL);
1108	sc->traceq = -1;
1109	mtx_init(&sc->ifp_lock, sc->ifp_lockname, 0, MTX_DEF);
1110	snprintf(sc->ifp_lockname, sizeof(sc->ifp_lockname), "%s tracer",
1111	    device_get_nameunit(dev));
1112
1113	snprintf(sc->lockname, sizeof(sc->lockname), "%s",
1114	    device_get_nameunit(dev));
1115	mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF);
1116	t4_add_adapter(sc);
1117
1118	mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF);
1119	TAILQ_INIT(&sc->sfl);
1120	callout_init_mtx(&sc->sfl_callout, &sc->sfl_lock, 0);
1121
1122	mtx_init(&sc->reg_lock, "indirect register access", 0, MTX_DEF);
1123
1124	sc->policy = NULL;
1125	rw_init(&sc->policy_lock, "connection offload policy");
1126
1127	callout_init(&sc->ktls_tick, 1);
1128
1129#ifdef TCP_OFFLOAD
1130	TASK_INIT(&sc->async_event_task, 0, t4_async_event, sc);
1131#endif
1132
1133	refcount_init(&sc->vxlan_refcount, 0);
1134
1135	rc = t4_map_bars_0_and_4(sc);
1136	if (rc != 0)
1137		goto done; /* error message displayed already */
1138
1139	memset(sc->chan_map, 0xff, sizeof(sc->chan_map));
1140
1141	/* Prepare the adapter for operation. */
1142	buf = malloc(PAGE_SIZE, M_CXGBE, M_ZERO | M_WAITOK);
1143	rc = -t4_prep_adapter(sc, buf);
1144	free(buf, M_CXGBE);
1145	if (rc != 0) {
1146		device_printf(dev, "failed to prepare adapter: %d.\n", rc);
1147		goto done;
1148	}
1149
1150	/*
1151	 * This is the real PF# to which we're attaching.  Works from within PCI
1152	 * passthrough environments too, where pci_get_function() could return a
1153	 * different PF# depending on the passthrough configuration.  We need to
1154	 * use the real PF# in all our communication with the firmware.
1155	 */
1156	j = t4_read_reg(sc, A_PL_WHOAMI);
1157	sc->pf = chip_id(sc) <= CHELSIO_T5 ? G_SOURCEPF(j) : G_T6_SOURCEPF(j);
1158	sc->mbox = sc->pf;
1159
1160	t4_init_devnames(sc);
1161	if (sc->names == NULL) {
1162		rc = ENOTSUP;
1163		goto done; /* error message displayed already */
1164	}
1165
1166	/*
1167	 * Do this really early, with the memory windows set up even before the
1168	 * character device.  The userland tool's register i/o and mem read
1169	 * will work even in "recovery mode".
1170	 */
1171	setup_memwin(sc);
1172	if (t4_init_devlog_params(sc, 0) == 0)
1173		fixup_devlog_params(sc);
1174	make_dev_args_init(&mda);
1175	mda.mda_devsw = &t4_cdevsw;
1176	mda.mda_uid = UID_ROOT;
1177	mda.mda_gid = GID_WHEEL;
1178	mda.mda_mode = 0600;
1179	mda.mda_si_drv1 = sc;
1180	rc = make_dev_s(&mda, &sc->cdev, "%s", device_get_nameunit(dev));
1181	if (rc != 0)
1182		device_printf(dev, "failed to create nexus char device: %d.\n",
1183		    rc);
1184
1185	/* Go no further if recovery mode has been requested. */
1186	if (TUNABLE_INT_FETCH("hw.cxgbe.sos", &i) && i != 0) {
1187		device_printf(dev, "recovery mode.\n");
1188		goto done;
1189	}
1190
1191#if defined(__i386__)
1192	if ((cpu_feature & CPUID_CX8) == 0) {
1193		device_printf(dev, "64 bit atomics not available.\n");
1194		rc = ENOTSUP;
1195		goto done;
1196	}
1197#endif
1198
1199	/* Contact the firmware and try to become the master driver. */
1200	rc = contact_firmware(sc);
1201	if (rc != 0)
1202		goto done; /* error message displayed already */
1203	MPASS(sc->flags & FW_OK);
1204
1205	rc = get_params__pre_init(sc);
1206	if (rc != 0)
1207		goto done; /* error message displayed already */
1208
1209	if (sc->flags & MASTER_PF) {
1210		rc = partition_resources(sc);
1211		if (rc != 0)
1212			goto done; /* error message displayed already */
1213		t4_intr_clear(sc);
1214	}
1215
1216	rc = get_params__post_init(sc);
1217	if (rc != 0)
1218		goto done; /* error message displayed already */
1219
1220	rc = set_params__post_init(sc);
1221	if (rc != 0)
1222		goto done; /* error message displayed already */
1223
1224	rc = t4_map_bar_2(sc);
1225	if (rc != 0)
1226		goto done; /* error message displayed already */
1227
1228	rc = t4_create_dma_tag(sc);
1229	if (rc != 0)
1230		goto done; /* error message displayed already */
1231
1232	/*
1233	 * First pass over all the ports - allocate VIs and initialize some
1234	 * basic parameters like mac address, port type, etc.
1235	 */
1236	for_each_port(sc, i) {
1237		struct port_info *pi;
1238
1239		pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK);
1240		sc->port[i] = pi;
1241
1242		/* These must be set before t4_port_init */
1243		pi->adapter = sc;
1244		pi->port_id = i;
1245		/*
1246		 * XXX: vi[0] is special so we can't delay this allocation until
1247		 * pi->nvi's final value is known.
1248		 */
1249		pi->vi = malloc(sizeof(struct vi_info) * t4_num_vis, M_CXGBE,
1250		    M_ZERO | M_WAITOK);
1251
1252		/*
1253		 * Allocate the "main" VI and initialize parameters
1254		 * like mac addr.
1255		 */
1256		rc = -t4_port_init(sc, sc->mbox, sc->pf, 0, i);
1257		if (rc != 0) {
1258			device_printf(dev, "unable to initialize port %d: %d\n",
1259			    i, rc);
1260			free(pi->vi, M_CXGBE);
1261			free(pi, M_CXGBE);
1262			sc->port[i] = NULL;
1263			goto done;
1264		}
1265
1266		snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d",
1267		    device_get_nameunit(dev), i);
1268		mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF);
1269		sc->chan_map[pi->tx_chan] = i;
1270
1271		/*
1272		 * The MPS counter for FCS errors doesn't work correctly on the
1273		 * T6 so we use the MAC counter here.  Which MAC is in use
1274		 * depends on the link settings which will be known when the
1275		 * link comes up.
1276		 */
1277		if (is_t6(sc)) {
1278			pi->fcs_reg = -1;
1279		} else if (is_t4(sc)) {
1280			pi->fcs_reg = PORT_REG(pi->tx_chan,
1281			    A_MPS_PORT_STAT_RX_PORT_CRC_ERROR_L);
1282		} else {
1283			pi->fcs_reg = T5_PORT_REG(pi->tx_chan,
1284			    A_MPS_PORT_STAT_RX_PORT_CRC_ERROR_L);
1285		}
1286		pi->fcs_base = 0;
1287
1288		/* All VIs on this port share this media. */
1289		ifmedia_init(&pi->media, IFM_IMASK, cxgbe_media_change,
1290		    cxgbe_media_status);
1291
1292		PORT_LOCK(pi);
1293		init_link_config(pi);
1294		fixup_link_config(pi);
1295		build_medialist(pi);
1296		if (fixed_ifmedia(pi))
1297			pi->flags |= FIXED_IFMEDIA;
1298		PORT_UNLOCK(pi);
1299
1300		pi->dev = device_add_child(dev, sc->names->ifnet_name,
1301		    t4_ifnet_unit(sc, pi));
1302		if (pi->dev == NULL) {
1303			device_printf(dev,
1304			    "failed to add device for port %d.\n", i);
1305			rc = ENXIO;
1306			goto done;
1307		}
1308		pi->vi[0].dev = pi->dev;
1309		device_set_softc(pi->dev, pi);
1310	}
1311
1312	/*
1313	 * Interrupt type, # of interrupts, # of rx/tx queues, etc.
1314	 */
1315	nports = sc->params.nports;
1316	rc = cfg_itype_and_nqueues(sc, &iaq);
1317	if (rc != 0)
1318		goto done; /* error message displayed already */
1319
1320	num_vis = iaq.num_vis;
1321	sc->intr_type = iaq.intr_type;
1322	sc->intr_count = iaq.nirq;
1323
1324	s = &sc->sge;
1325	s->nrxq = nports * iaq.nrxq;
1326	s->ntxq = nports * iaq.ntxq;
1327	if (num_vis > 1) {
1328		s->nrxq += nports * (num_vis - 1) * iaq.nrxq_vi;
1329		s->ntxq += nports * (num_vis - 1) * iaq.ntxq_vi;
1330	}
1331	s->neq = s->ntxq + s->nrxq;	/* the free list in an rxq is an eq */
1332	s->neq += nports;		/* ctrl queues: 1 per port */
1333	s->niq = s->nrxq + 1;		/* 1 extra for firmware event queue */
1334#if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1335	if (is_offload(sc) || is_ethoffload(sc)) {
1336		s->nofldtxq = nports * iaq.nofldtxq;
1337		if (num_vis > 1)
1338			s->nofldtxq += nports * (num_vis - 1) * iaq.nofldtxq_vi;
1339		s->neq += s->nofldtxq;
1340
1341		s->ofld_txq = malloc(s->nofldtxq * sizeof(struct sge_ofld_txq),
1342		    M_CXGBE, M_ZERO | M_WAITOK);
1343	}
1344#endif
1345#ifdef TCP_OFFLOAD
1346	if (is_offload(sc)) {
1347		s->nofldrxq = nports * iaq.nofldrxq;
1348		if (num_vis > 1)
1349			s->nofldrxq += nports * (num_vis - 1) * iaq.nofldrxq_vi;
1350		s->neq += s->nofldrxq;	/* free list */
1351		s->niq += s->nofldrxq;
1352
1353		s->ofld_rxq = malloc(s->nofldrxq * sizeof(struct sge_ofld_rxq),
1354		    M_CXGBE, M_ZERO | M_WAITOK);
1355	}
1356#endif
1357#ifdef DEV_NETMAP
1358	s->nnmrxq = 0;
1359	s->nnmtxq = 0;
1360	if (t4_native_netmap & NN_MAIN_VI) {
1361		s->nnmrxq += nports * iaq.nnmrxq;
1362		s->nnmtxq += nports * iaq.nnmtxq;
1363	}
1364	if (num_vis > 1 && t4_native_netmap & NN_EXTRA_VI) {
1365		s->nnmrxq += nports * (num_vis - 1) * iaq.nnmrxq_vi;
1366		s->nnmtxq += nports * (num_vis - 1) * iaq.nnmtxq_vi;
1367	}
1368	s->neq += s->nnmtxq + s->nnmrxq;
1369	s->niq += s->nnmrxq;
1370
1371	s->nm_rxq = malloc(s->nnmrxq * sizeof(struct sge_nm_rxq),
1372	    M_CXGBE, M_ZERO | M_WAITOK);
1373	s->nm_txq = malloc(s->nnmtxq * sizeof(struct sge_nm_txq),
1374	    M_CXGBE, M_ZERO | M_WAITOK);
1375#endif
1376	MPASS(s->niq <= s->iqmap_sz);
1377	MPASS(s->neq <= s->eqmap_sz);
1378
1379	s->ctrlq = malloc(nports * sizeof(struct sge_wrq), M_CXGBE,
1380	    M_ZERO | M_WAITOK);
1381	s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE,
1382	    M_ZERO | M_WAITOK);
1383	s->txq = malloc(s->ntxq * sizeof(struct sge_txq), M_CXGBE,
1384	    M_ZERO | M_WAITOK);
1385	s->iqmap = malloc(s->iqmap_sz * sizeof(struct sge_iq *), M_CXGBE,
1386	    M_ZERO | M_WAITOK);
1387	s->eqmap = malloc(s->eqmap_sz * sizeof(struct sge_eq *), M_CXGBE,
1388	    M_ZERO | M_WAITOK);
1389
1390	sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE,
1391	    M_ZERO | M_WAITOK);
1392
1393	t4_init_l2t(sc, M_WAITOK);
1394	t4_init_smt(sc, M_WAITOK);
1395	t4_init_tx_sched(sc);
1396	t4_init_atid_table(sc);
1397#ifdef RATELIMIT
1398	t4_init_etid_table(sc);
1399#endif
1400#ifdef INET6
1401	t4_init_clip_table(sc);
1402#endif
1403	if (sc->vres.key.size != 0)
1404		sc->key_map = vmem_create("T4TLS key map", sc->vres.key.start,
1405		    sc->vres.key.size, 32, 0, M_FIRSTFIT | M_WAITOK);
1406
1407	/*
1408	 * Second pass over the ports.  This time we know the number of rx and
1409	 * tx queues that each port should get.
1410	 */
1411	rqidx = tqidx = 0;
1412#if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1413	ofld_tqidx = 0;
1414#endif
1415#ifdef TCP_OFFLOAD
1416	ofld_rqidx = 0;
1417#endif
1418#ifdef DEV_NETMAP
1419	nm_rqidx = nm_tqidx = 0;
1420#endif
1421	for_each_port(sc, i) {
1422		struct port_info *pi = sc->port[i];
1423		struct vi_info *vi;
1424
1425		if (pi == NULL)
1426			continue;
1427
1428		pi->nvi = num_vis;
1429		for_each_vi(pi, j, vi) {
1430			vi->pi = pi;
1431			vi->adapter = sc;
1432			vi->qsize_rxq = t4_qsize_rxq;
1433			vi->qsize_txq = t4_qsize_txq;
1434
1435			vi->first_rxq = rqidx;
1436			vi->first_txq = tqidx;
1437			vi->tmr_idx = t4_tmr_idx;
1438			vi->pktc_idx = t4_pktc_idx;
1439			vi->nrxq = j == 0 ? iaq.nrxq : iaq.nrxq_vi;
1440			vi->ntxq = j == 0 ? iaq.ntxq : iaq.ntxq_vi;
1441
1442			rqidx += vi->nrxq;
1443			tqidx += vi->ntxq;
1444
1445			if (j == 0 && vi->ntxq > 1)
1446				vi->rsrv_noflowq = t4_rsrv_noflowq ? 1 : 0;
1447			else
1448				vi->rsrv_noflowq = 0;
1449
1450#if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1451			vi->first_ofld_txq = ofld_tqidx;
1452			vi->nofldtxq = j == 0 ? iaq.nofldtxq : iaq.nofldtxq_vi;
1453			ofld_tqidx += vi->nofldtxq;
1454#endif
1455#ifdef TCP_OFFLOAD
1456			vi->ofld_tmr_idx = t4_tmr_idx_ofld;
1457			vi->ofld_pktc_idx = t4_pktc_idx_ofld;
1458			vi->first_ofld_rxq = ofld_rqidx;
1459			vi->nofldrxq = j == 0 ? iaq.nofldrxq : iaq.nofldrxq_vi;
1460
1461			ofld_rqidx += vi->nofldrxq;
1462#endif
1463#ifdef DEV_NETMAP
1464			vi->first_nm_rxq = nm_rqidx;
1465			vi->first_nm_txq = nm_tqidx;
1466			if (j == 0) {
1467				vi->nnmrxq = iaq.nnmrxq;
1468				vi->nnmtxq = iaq.nnmtxq;
1469			} else {
1470				vi->nnmrxq = iaq.nnmrxq_vi;
1471				vi->nnmtxq = iaq.nnmtxq_vi;
1472			}
1473			nm_rqidx += vi->nnmrxq;
1474			nm_tqidx += vi->nnmtxq;
1475#endif
1476		}
1477	}
1478
1479	rc = t4_setup_intr_handlers(sc);
1480	if (rc != 0) {
1481		device_printf(dev,
1482		    "failed to setup interrupt handlers: %d\n", rc);
1483		goto done;
1484	}
1485
1486	rc = bus_generic_probe(dev);
1487	if (rc != 0) {
1488		device_printf(dev, "failed to probe child drivers: %d\n", rc);
1489		goto done;
1490	}
1491
1492	/*
1493	 * Ensure thread-safe mailbox access (in debug builds).
1494	 *
1495	 * So far this was the only thread accessing the mailbox but various
1496	 * ifnets and sysctls are about to be created and their handlers/ioctls
1497	 * will access the mailbox from different threads.
1498	 */
1499	sc->flags |= CHK_MBOX_ACCESS;
1500
1501	rc = bus_generic_attach(dev);
1502	if (rc != 0) {
1503		device_printf(dev,
1504		    "failed to attach all child ports: %d\n", rc);
1505		goto done;
1506	}
1507
1508	device_printf(dev,
1509	    "PCIe gen%d x%d, %d ports, %d %s interrupt%s, %d eq, %d iq\n",
1510	    sc->params.pci.speed, sc->params.pci.width, sc->params.nports,
1511	    sc->intr_count, sc->intr_type == INTR_MSIX ? "MSI-X" :
1512	    (sc->intr_type == INTR_MSI ? "MSI" : "INTx"),
1513	    sc->intr_count > 1 ? "s" : "", sc->sge.neq, sc->sge.niq);
1514
1515	t4_set_desc(sc);
1516
1517	notify_siblings(dev, 0);
1518
1519done:
1520	if (rc != 0 && sc->cdev) {
1521		/* cdev was created and so cxgbetool works; recover that way. */
1522		device_printf(dev,
1523		    "error during attach, adapter is now in recovery mode.\n");
1524		rc = 0;
1525	}
1526
1527	if (rc != 0)
1528		t4_detach_common(dev);
1529	else
1530		t4_sysctls(sc);
1531
1532	return (rc);
1533}
1534
1535static int
1536t4_child_location_str(device_t bus, device_t dev, char *buf, size_t buflen)
1537{
1538	struct adapter *sc;
1539	struct port_info *pi;
1540	int i;
1541
1542	sc = device_get_softc(bus);
1543	buf[0] = '\0';
1544	for_each_port(sc, i) {
1545		pi = sc->port[i];
1546		if (pi != NULL && pi->dev == dev) {
1547			snprintf(buf, buflen, "port=%d", pi->port_id);
1548			break;
1549		}
1550	}
1551	return (0);
1552}
1553
1554static int
1555t4_ready(device_t dev)
1556{
1557	struct adapter *sc;
1558
1559	sc = device_get_softc(dev);
1560	if (sc->flags & FW_OK)
1561		return (0);
1562	return (ENXIO);
1563}
1564
1565static int
1566t4_read_port_device(device_t dev, int port, device_t *child)
1567{
1568	struct adapter *sc;
1569	struct port_info *pi;
1570
1571	sc = device_get_softc(dev);
1572	if (port < 0 || port >= MAX_NPORTS)
1573		return (EINVAL);
1574	pi = sc->port[port];
1575	if (pi == NULL || pi->dev == NULL)
1576		return (ENXIO);
1577	*child = pi->dev;
1578	return (0);
1579}
1580
1581static int
1582notify_siblings(device_t dev, int detaching)
1583{
1584	device_t sibling;
1585	int error, i;
1586
1587	error = 0;
1588	for (i = 0; i < PCI_FUNCMAX; i++) {
1589		if (i == pci_get_function(dev))
1590			continue;
1591		sibling = pci_find_dbsf(pci_get_domain(dev), pci_get_bus(dev),
1592		    pci_get_slot(dev), i);
1593		if (sibling == NULL || !device_is_attached(sibling))
1594			continue;
1595		if (detaching)
1596			error = T4_DETACH_CHILD(sibling);
1597		else
1598			(void)T4_ATTACH_CHILD(sibling);
1599		if (error)
1600			break;
1601	}
1602	return (error);
1603}
1604
1605/*
1606 * Idempotent
1607 */
1608static int
1609t4_detach(device_t dev)
1610{
1611	struct adapter *sc;
1612	int rc;
1613
1614	sc = device_get_softc(dev);
1615
1616	rc = notify_siblings(dev, 1);
1617	if (rc) {
1618		device_printf(dev,
1619		    "failed to detach sibling devices: %d\n", rc);
1620		return (rc);
1621	}
1622
1623	return (t4_detach_common(dev));
1624}
1625
1626int
1627t4_detach_common(device_t dev)
1628{
1629	struct adapter *sc;
1630	struct port_info *pi;
1631	int i, rc;
1632
1633	sc = device_get_softc(dev);
1634
1635	if (sc->cdev) {
1636		destroy_dev(sc->cdev);
1637		sc->cdev = NULL;
1638	}
1639
1640	sx_xlock(&t4_list_lock);
1641	SLIST_REMOVE(&t4_list, sc, adapter, link);
1642	sx_xunlock(&t4_list_lock);
1643
1644	sc->flags &= ~CHK_MBOX_ACCESS;
1645	if (sc->flags & FULL_INIT_DONE) {
1646		if (!(sc->flags & IS_VF))
1647			t4_intr_disable(sc);
1648	}
1649
1650	if (device_is_attached(dev)) {
1651		rc = bus_generic_detach(dev);
1652		if (rc) {
1653			device_printf(dev,
1654			    "failed to detach child devices: %d\n", rc);
1655			return (rc);
1656		}
1657	}
1658
1659#ifdef TCP_OFFLOAD
1660	taskqueue_drain(taskqueue_thread, &sc->async_event_task);
1661#endif
1662
1663	for (i = 0; i < sc->intr_count; i++)
1664		t4_free_irq(sc, &sc->irq[i]);
1665
1666	if ((sc->flags & (IS_VF | FW_OK)) == FW_OK)
1667		t4_free_tx_sched(sc);
1668
1669	for (i = 0; i < MAX_NPORTS; i++) {
1670		pi = sc->port[i];
1671		if (pi) {
1672			t4_free_vi(sc, sc->mbox, sc->pf, 0, pi->vi[0].viid);
1673			if (pi->dev)
1674				device_delete_child(dev, pi->dev);
1675
1676			mtx_destroy(&pi->pi_lock);
1677			free(pi->vi, M_CXGBE);
1678			free(pi, M_CXGBE);
1679		}
1680	}
1681
1682	device_delete_children(dev);
1683
1684	if (sc->flags & FULL_INIT_DONE)
1685		adapter_full_uninit(sc);
1686
1687	if ((sc->flags & (IS_VF | FW_OK)) == FW_OK)
1688		t4_fw_bye(sc, sc->mbox);
1689
1690	if (sc->intr_type == INTR_MSI || sc->intr_type == INTR_MSIX)
1691		pci_release_msi(dev);
1692
1693	if (sc->regs_res)
1694		bus_release_resource(dev, SYS_RES_MEMORY, sc->regs_rid,
1695		    sc->regs_res);
1696
1697	if (sc->udbs_res)
1698		bus_release_resource(dev, SYS_RES_MEMORY, sc->udbs_rid,
1699		    sc->udbs_res);
1700
1701	if (sc->msix_res)
1702		bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_rid,
1703		    sc->msix_res);
1704
1705	if (sc->l2t)
1706		t4_free_l2t(sc->l2t);
1707	if (sc->smt)
1708		t4_free_smt(sc->smt);
1709	t4_free_atid_table(sc);
1710#ifdef RATELIMIT
1711	t4_free_etid_table(sc);
1712#endif
1713	if (sc->key_map)
1714		vmem_destroy(sc->key_map);
1715#ifdef INET6
1716	t4_destroy_clip_table(sc);
1717#endif
1718
1719#if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1720	free(sc->sge.ofld_txq, M_CXGBE);
1721#endif
1722#ifdef TCP_OFFLOAD
1723	free(sc->sge.ofld_rxq, M_CXGBE);
1724#endif
1725#ifdef DEV_NETMAP
1726	free(sc->sge.nm_rxq, M_CXGBE);
1727	free(sc->sge.nm_txq, M_CXGBE);
1728#endif
1729	free(sc->irq, M_CXGBE);
1730	free(sc->sge.rxq, M_CXGBE);
1731	free(sc->sge.txq, M_CXGBE);
1732	free(sc->sge.ctrlq, M_CXGBE);
1733	free(sc->sge.iqmap, M_CXGBE);
1734	free(sc->sge.eqmap, M_CXGBE);
1735	free(sc->tids.ftid_tab, M_CXGBE);
1736	free(sc->tids.hpftid_tab, M_CXGBE);
1737	free_hftid_hash(&sc->tids);
1738	free(sc->tids.tid_tab, M_CXGBE);
1739	free(sc->tt.tls_rx_ports, M_CXGBE);
1740	t4_destroy_dma_tag(sc);
1741
1742	callout_drain(&sc->ktls_tick);
1743	callout_drain(&sc->sfl_callout);
1744	if (mtx_initialized(&sc->tids.ftid_lock)) {
1745		mtx_destroy(&sc->tids.ftid_lock);
1746		cv_destroy(&sc->tids.ftid_cv);
1747	}
1748	if (mtx_initialized(&sc->tids.atid_lock))
1749		mtx_destroy(&sc->tids.atid_lock);
1750	if (mtx_initialized(&sc->ifp_lock))
1751		mtx_destroy(&sc->ifp_lock);
1752
1753	if (rw_initialized(&sc->policy_lock)) {
1754		rw_destroy(&sc->policy_lock);
1755#ifdef TCP_OFFLOAD
1756		if (sc->policy != NULL)
1757			free_offload_policy(sc->policy);
1758#endif
1759	}
1760
1761	for (i = 0; i < NUM_MEMWIN; i++) {
1762		struct memwin *mw = &sc->memwin[i];
1763
1764		if (rw_initialized(&mw->mw_lock))
1765			rw_destroy(&mw->mw_lock);
1766	}
1767
1768	mtx_destroy(&sc->sfl_lock);
1769	mtx_destroy(&sc->reg_lock);
1770	mtx_destroy(&sc->sc_lock);
1771
1772	bzero(sc, sizeof(*sc));
1773
1774	return (0);
1775}
1776
1777static int
1778cxgbe_probe(device_t dev)
1779{
1780	char buf[128];
1781	struct port_info *pi = device_get_softc(dev);
1782
1783	snprintf(buf, sizeof(buf), "port %d", pi->port_id);
1784	device_set_desc_copy(dev, buf);
1785
1786	return (BUS_PROBE_DEFAULT);
1787}
1788
1789#define T4_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
1790    IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
1791    IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6 | IFCAP_HWSTATS | \
1792    IFCAP_HWRXTSTMP | IFCAP_MEXTPG)
1793#define T4_CAP_ENABLE (T4_CAP)
1794
1795static int
1796cxgbe_vi_attach(device_t dev, struct vi_info *vi)
1797{
1798	struct ifnet *ifp;
1799	struct sbuf *sb;
1800	struct pfil_head_args pa;
1801	struct adapter *sc = vi->adapter;
1802
1803	vi->xact_addr_filt = -1;
1804	mtx_init(&vi->tick_mtx, "vi tick", NULL, MTX_DEF);
1805	callout_init_mtx(&vi->tick, &vi->tick_mtx, 0);
1806	if (sc->flags & IS_VF || t4_tx_vm_wr != 0)
1807		vi->flags |= TX_USES_VM_WR;
1808
1809	/* Allocate an ifnet and set it up */
1810	ifp = if_alloc_dev(IFT_ETHER, dev);
1811	if (ifp == NULL) {
1812		device_printf(dev, "Cannot allocate ifnet\n");
1813		return (ENOMEM);
1814	}
1815	vi->ifp = ifp;
1816	ifp->if_softc = vi;
1817
1818	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1819	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1820
1821	ifp->if_init = cxgbe_init;
1822	ifp->if_ioctl = cxgbe_ioctl;
1823	ifp->if_transmit = cxgbe_transmit;
1824	ifp->if_qflush = cxgbe_qflush;
1825	if (vi->pi->nvi > 1 || sc->flags & IS_VF)
1826		ifp->if_get_counter = vi_get_counter;
1827	else
1828		ifp->if_get_counter = cxgbe_get_counter;
1829#if defined(KERN_TLS) || defined(RATELIMIT)
1830	ifp->if_snd_tag_alloc = cxgbe_snd_tag_alloc;
1831	ifp->if_snd_tag_modify = cxgbe_snd_tag_modify;
1832	ifp->if_snd_tag_query = cxgbe_snd_tag_query;
1833	ifp->if_snd_tag_free = cxgbe_snd_tag_free;
1834#endif
1835#ifdef RATELIMIT
1836	ifp->if_ratelimit_query = cxgbe_ratelimit_query;
1837#endif
1838
1839	ifp->if_capabilities = T4_CAP;
1840	ifp->if_capenable = T4_CAP_ENABLE;
1841	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
1842	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
1843	if (chip_id(sc) >= CHELSIO_T6) {
1844		ifp->if_capabilities |= IFCAP_VXLAN_HWCSUM | IFCAP_VXLAN_HWTSO;
1845		ifp->if_capenable |= IFCAP_VXLAN_HWCSUM | IFCAP_VXLAN_HWTSO;
1846		ifp->if_hwassist |= CSUM_INNER_IP6_UDP | CSUM_INNER_IP6_TCP |
1847		    CSUM_INNER_IP6_TSO | CSUM_INNER_IP | CSUM_INNER_IP_UDP |
1848		    CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO | CSUM_ENCAP_VXLAN;
1849	}
1850
1851#ifdef TCP_OFFLOAD
1852	if (vi->nofldrxq != 0)
1853		ifp->if_capabilities |= IFCAP_TOE;
1854#endif
1855#ifdef RATELIMIT
1856	if (is_ethoffload(sc) && vi->nofldtxq != 0) {
1857		ifp->if_capabilities |= IFCAP_TXRTLMT;
1858		ifp->if_capenable |= IFCAP_TXRTLMT;
1859	}
1860#endif
1861
1862	ifp->if_hw_tsomax = IP_MAXPACKET;
1863	if (vi->flags & TX_USES_VM_WR)
1864		ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_VM_TSO;
1865	else
1866		ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_TSO;
1867#ifdef RATELIMIT
1868	if (is_ethoffload(sc) && vi->nofldtxq != 0)
1869		ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_EO_TSO;
1870#endif
1871	ifp->if_hw_tsomaxsegsize = 65536;
1872#ifdef KERN_TLS
1873	if (is_ktls(sc)) {
1874		ifp->if_capabilities |= IFCAP_TXTLS;
1875		if (sc->flags & KERN_TLS_ON)
1876			ifp->if_capenable |= IFCAP_TXTLS;
1877	}
1878#endif
1879
1880	ether_ifattach(ifp, vi->hw_addr);
1881#ifdef DEV_NETMAP
1882	if (vi->nnmrxq != 0)
1883		cxgbe_nm_attach(vi);
1884#endif
1885	sb = sbuf_new_auto();
1886	sbuf_printf(sb, "%d txq, %d rxq (NIC)", vi->ntxq, vi->nrxq);
1887#if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1888	switch (ifp->if_capabilities & (IFCAP_TOE | IFCAP_TXRTLMT)) {
1889	case IFCAP_TOE:
1890		sbuf_printf(sb, "; %d txq (TOE)", vi->nofldtxq);
1891		break;
1892	case IFCAP_TOE | IFCAP_TXRTLMT:
1893		sbuf_printf(sb, "; %d txq (TOE/ETHOFLD)", vi->nofldtxq);
1894		break;
1895	case IFCAP_TXRTLMT:
1896		sbuf_printf(sb, "; %d txq (ETHOFLD)", vi->nofldtxq);
1897		break;
1898	}
1899#endif
1900#ifdef TCP_OFFLOAD
1901	if (ifp->if_capabilities & IFCAP_TOE)
1902		sbuf_printf(sb, ", %d rxq (TOE)", vi->nofldrxq);
1903#endif
1904#ifdef DEV_NETMAP
1905	if (ifp->if_capabilities & IFCAP_NETMAP)
1906		sbuf_printf(sb, "; %d txq, %d rxq (netmap)",
1907		    vi->nnmtxq, vi->nnmrxq);
1908#endif
1909	sbuf_finish(sb);
1910	device_printf(dev, "%s\n", sbuf_data(sb));
1911	sbuf_delete(sb);
1912
1913	vi_sysctls(vi);
1914
1915	pa.pa_version = PFIL_VERSION;
1916	pa.pa_flags = PFIL_IN;
1917	pa.pa_type = PFIL_TYPE_ETHERNET;
1918	pa.pa_headname = ifp->if_xname;
1919	vi->pfil = pfil_head_register(&pa);
1920
1921	return (0);
1922}
1923
1924static int
1925cxgbe_attach(device_t dev)
1926{
1927	struct port_info *pi = device_get_softc(dev);
1928	struct adapter *sc = pi->adapter;
1929	struct vi_info *vi;
1930	int i, rc;
1931
1932	rc = cxgbe_vi_attach(dev, &pi->vi[0]);
1933	if (rc)
1934		return (rc);
1935
1936	for_each_vi(pi, i, vi) {
1937		if (i == 0)
1938			continue;
1939		vi->dev = device_add_child(dev, sc->names->vi_ifnet_name, -1);
1940		if (vi->dev == NULL) {
1941			device_printf(dev, "failed to add VI %d\n", i);
1942			continue;
1943		}
1944		device_set_softc(vi->dev, vi);
1945	}
1946
1947	cxgbe_sysctls(pi);
1948
1949	bus_generic_attach(dev);
1950
1951	return (0);
1952}
1953
1954static void
1955cxgbe_vi_detach(struct vi_info *vi)
1956{
1957	struct ifnet *ifp = vi->ifp;
1958
1959	if (vi->pfil != NULL) {
1960		pfil_head_unregister(vi->pfil);
1961		vi->pfil = NULL;
1962	}
1963
1964	ether_ifdetach(ifp);
1965
1966	/* Let detach proceed even if these fail. */
1967#ifdef DEV_NETMAP
1968	if (ifp->if_capabilities & IFCAP_NETMAP)
1969		cxgbe_nm_detach(vi);
1970#endif
1971	cxgbe_uninit_synchronized(vi);
1972	callout_drain(&vi->tick);
1973	vi_full_uninit(vi);
1974
1975	if_free(vi->ifp);
1976	vi->ifp = NULL;
1977}
1978
1979static int
1980cxgbe_detach(device_t dev)
1981{
1982	struct port_info *pi = device_get_softc(dev);
1983	struct adapter *sc = pi->adapter;
1984	int rc;
1985
1986	/* Detach the extra VIs first. */
1987	rc = bus_generic_detach(dev);
1988	if (rc)
1989		return (rc);
1990	device_delete_children(dev);
1991
1992	doom_vi(sc, &pi->vi[0]);
1993
1994	if (pi->flags & HAS_TRACEQ) {
1995		sc->traceq = -1;	/* cloner should not create ifnet */
1996		t4_tracer_port_detach(sc);
1997	}
1998
1999	cxgbe_vi_detach(&pi->vi[0]);
2000	ifmedia_removeall(&pi->media);
2001
2002	end_synchronized_op(sc, 0);
2003
2004	return (0);
2005}
2006
2007static void
2008cxgbe_init(void *arg)
2009{
2010	struct vi_info *vi = arg;
2011	struct adapter *sc = vi->adapter;
2012
2013	if (begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4init") != 0)
2014		return;
2015	cxgbe_init_synchronized(vi);
2016	end_synchronized_op(sc, 0);
2017}
2018
2019static int
2020cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data)
2021{
2022	int rc = 0, mtu, flags;
2023	struct vi_info *vi = ifp->if_softc;
2024	struct port_info *pi = vi->pi;
2025	struct adapter *sc = pi->adapter;
2026	struct ifreq *ifr = (struct ifreq *)data;
2027	uint32_t mask;
2028
2029	switch (cmd) {
2030	case SIOCSIFMTU:
2031		mtu = ifr->ifr_mtu;
2032		if (mtu < ETHERMIN || mtu > MAX_MTU)
2033			return (EINVAL);
2034
2035		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4mtu");
2036		if (rc)
2037			return (rc);
2038		ifp->if_mtu = mtu;
2039		if (vi->flags & VI_INIT_DONE) {
2040			t4_update_fl_bufsize(ifp);
2041			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2042				rc = update_mac_settings(ifp, XGMAC_MTU);
2043		}
2044		end_synchronized_op(sc, 0);
2045		break;
2046
2047	case SIOCSIFFLAGS:
2048		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4flg");
2049		if (rc)
2050			return (rc);
2051
2052		if (ifp->if_flags & IFF_UP) {
2053			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2054				flags = vi->if_flags;
2055				if ((ifp->if_flags ^ flags) &
2056				    (IFF_PROMISC | IFF_ALLMULTI)) {
2057					rc = update_mac_settings(ifp,
2058					    XGMAC_PROMISC | XGMAC_ALLMULTI);
2059				}
2060			} else {
2061				rc = cxgbe_init_synchronized(vi);
2062			}
2063			vi->if_flags = ifp->if_flags;
2064		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2065			rc = cxgbe_uninit_synchronized(vi);
2066		}
2067		end_synchronized_op(sc, 0);
2068		break;
2069
2070	case SIOCADDMULTI:
2071	case SIOCDELMULTI:
2072		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4multi");
2073		if (rc)
2074			return (rc);
2075		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2076			rc = update_mac_settings(ifp, XGMAC_MCADDRS);
2077		end_synchronized_op(sc, 0);
2078		break;
2079
2080	case SIOCSIFCAP:
2081		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4cap");
2082		if (rc)
2083			return (rc);
2084
2085		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2086		if (mask & IFCAP_TXCSUM) {
2087			ifp->if_capenable ^= IFCAP_TXCSUM;
2088			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2089
2090			if (IFCAP_TSO4 & ifp->if_capenable &&
2091			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2092				mask &= ~IFCAP_TSO4;
2093				ifp->if_capenable &= ~IFCAP_TSO4;
2094				if_printf(ifp,
2095				    "tso4 disabled due to -txcsum.\n");
2096			}
2097		}
2098		if (mask & IFCAP_TXCSUM_IPV6) {
2099			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
2100			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
2101
2102			if (IFCAP_TSO6 & ifp->if_capenable &&
2103			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2104				mask &= ~IFCAP_TSO6;
2105				ifp->if_capenable &= ~IFCAP_TSO6;
2106				if_printf(ifp,
2107				    "tso6 disabled due to -txcsum6.\n");
2108			}
2109		}
2110		if (mask & IFCAP_RXCSUM)
2111			ifp->if_capenable ^= IFCAP_RXCSUM;
2112		if (mask & IFCAP_RXCSUM_IPV6)
2113			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
2114
2115		/*
2116		 * Note that we leave CSUM_TSO alone (it is always set).  The
2117		 * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
2118		 * sending a TSO request our way, so it's sufficient to toggle
2119		 * IFCAP_TSOx only.
2120		 */
2121		if (mask & IFCAP_TSO4) {
2122			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
2123			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2124				if_printf(ifp, "enable txcsum first.\n");
2125				rc = EAGAIN;
2126				goto fail;
2127			}
2128			ifp->if_capenable ^= IFCAP_TSO4;
2129		}
2130		if (mask & IFCAP_TSO6) {
2131			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
2132			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2133				if_printf(ifp, "enable txcsum6 first.\n");
2134				rc = EAGAIN;
2135				goto fail;
2136			}
2137			ifp->if_capenable ^= IFCAP_TSO6;
2138		}
2139		if (mask & IFCAP_LRO) {
2140#if defined(INET) || defined(INET6)
2141			int i;
2142			struct sge_rxq *rxq;
2143
2144			ifp->if_capenable ^= IFCAP_LRO;
2145			for_each_rxq(vi, i, rxq) {
2146				if (ifp->if_capenable & IFCAP_LRO)
2147					rxq->iq.flags |= IQ_LRO_ENABLED;
2148				else
2149					rxq->iq.flags &= ~IQ_LRO_ENABLED;
2150			}
2151#endif
2152		}
2153#ifdef TCP_OFFLOAD
2154		if (mask & IFCAP_TOE) {
2155			int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE;
2156
2157			rc = toe_capability(vi, enable);
2158			if (rc != 0)
2159				goto fail;
2160
2161			ifp->if_capenable ^= mask;
2162		}
2163#endif
2164		if (mask & IFCAP_VLAN_HWTAGGING) {
2165			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2166			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2167				rc = update_mac_settings(ifp, XGMAC_VLANEX);
2168		}
2169		if (mask & IFCAP_VLAN_MTU) {
2170			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2171
2172			/* Need to find out how to disable auto-mtu-inflation */
2173		}
2174		if (mask & IFCAP_VLAN_HWTSO)
2175			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2176		if (mask & IFCAP_VLAN_HWCSUM)
2177			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2178#ifdef RATELIMIT
2179		if (mask & IFCAP_TXRTLMT)
2180			ifp->if_capenable ^= IFCAP_TXRTLMT;
2181#endif
2182		if (mask & IFCAP_HWRXTSTMP) {
2183			int i;
2184			struct sge_rxq *rxq;
2185
2186			ifp->if_capenable ^= IFCAP_HWRXTSTMP;
2187			for_each_rxq(vi, i, rxq) {
2188				if (ifp->if_capenable & IFCAP_HWRXTSTMP)
2189					rxq->iq.flags |= IQ_RX_TIMESTAMP;
2190				else
2191					rxq->iq.flags &= ~IQ_RX_TIMESTAMP;
2192			}
2193		}
2194		if (mask & IFCAP_MEXTPG)
2195			ifp->if_capenable ^= IFCAP_MEXTPG;
2196
2197#ifdef KERN_TLS
2198		if (mask & IFCAP_TXTLS) {
2199			int enable = (ifp->if_capenable ^ mask) & IFCAP_TXTLS;
2200
2201			rc = ktls_capability(sc, enable);
2202			if (rc != 0)
2203				goto fail;
2204
2205			ifp->if_capenable ^= (mask & IFCAP_TXTLS);
2206		}
2207#endif
2208		if (mask & IFCAP_VXLAN_HWCSUM) {
2209			ifp->if_capenable ^= IFCAP_VXLAN_HWCSUM;
2210			ifp->if_hwassist ^= CSUM_INNER_IP6_UDP |
2211			    CSUM_INNER_IP6_TCP | CSUM_INNER_IP |
2212			    CSUM_INNER_IP_UDP | CSUM_INNER_IP_TCP;
2213		}
2214		if (mask & IFCAP_VXLAN_HWTSO) {
2215			ifp->if_capenable ^= IFCAP_VXLAN_HWTSO;
2216			ifp->if_hwassist ^= CSUM_INNER_IP6_TSO |
2217			    CSUM_INNER_IP_TSO;
2218		}
2219
2220#ifdef VLAN_CAPABILITIES
2221		VLAN_CAPABILITIES(ifp);
2222#endif
2223fail:
2224		end_synchronized_op(sc, 0);
2225		break;
2226
2227	case SIOCSIFMEDIA:
2228	case SIOCGIFMEDIA:
2229	case SIOCGIFXMEDIA:
2230		ifmedia_ioctl(ifp, ifr, &pi->media, cmd);
2231		break;
2232
2233	case SIOCGI2C: {
2234		struct ifi2creq i2c;
2235
2236		rc = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
2237		if (rc != 0)
2238			break;
2239		if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) {
2240			rc = EPERM;
2241			break;
2242		}
2243		if (i2c.len > sizeof(i2c.data)) {
2244			rc = EINVAL;
2245			break;
2246		}
2247		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4i2c");
2248		if (rc)
2249			return (rc);
2250		rc = -t4_i2c_rd(sc, sc->mbox, pi->port_id, i2c.dev_addr,
2251		    i2c.offset, i2c.len, &i2c.data[0]);
2252		end_synchronized_op(sc, 0);
2253		if (rc == 0)
2254			rc = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c));
2255		break;
2256	}
2257
2258	default:
2259		rc = ether_ioctl(ifp, cmd, data);
2260	}
2261
2262	return (rc);
2263}
2264
2265static int
2266cxgbe_transmit(struct ifnet *ifp, struct mbuf *m)
2267{
2268	struct vi_info *vi = ifp->if_softc;
2269	struct port_info *pi = vi->pi;
2270	struct adapter *sc;
2271	struct sge_txq *txq;
2272	void *items[1];
2273	int rc;
2274
2275	M_ASSERTPKTHDR(m);
2276	MPASS(m->m_nextpkt == NULL);	/* not quite ready for this yet */
2277#if defined(KERN_TLS) || defined(RATELIMIT)
2278	if (m->m_pkthdr.csum_flags & CSUM_SND_TAG)
2279		MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
2280#endif
2281
2282	if (__predict_false(pi->link_cfg.link_ok == false)) {
2283		m_freem(m);
2284		return (ENETDOWN);
2285	}
2286
2287	rc = parse_pkt(&m, vi->flags & TX_USES_VM_WR);
2288	if (__predict_false(rc != 0)) {
2289		MPASS(m == NULL);			/* was freed already */
2290		atomic_add_int(&pi->tx_parse_error, 1);	/* rare, atomic is ok */
2291		return (rc);
2292	}
2293#ifdef RATELIMIT
2294	if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) {
2295		if (m->m_pkthdr.snd_tag->type == IF_SND_TAG_TYPE_RATE_LIMIT)
2296			return (ethofld_transmit(ifp, m));
2297	}
2298#endif
2299
2300	/* Select a txq. */
2301	sc = vi->adapter;
2302	txq = &sc->sge.txq[vi->first_txq];
2303	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
2304		txq += ((m->m_pkthdr.flowid % (vi->ntxq - vi->rsrv_noflowq)) +
2305		    vi->rsrv_noflowq);
2306
2307	items[0] = m;
2308	rc = mp_ring_enqueue(txq->r, items, 1, 256);
2309	if (__predict_false(rc != 0))
2310		m_freem(m);
2311
2312	return (rc);
2313}
2314
2315static void
2316cxgbe_qflush(struct ifnet *ifp)
2317{
2318	struct vi_info *vi = ifp->if_softc;
2319	struct sge_txq *txq;
2320	int i;
2321
2322	/* queues do not exist if !VI_INIT_DONE. */
2323	if (vi->flags & VI_INIT_DONE) {
2324		for_each_txq(vi, i, txq) {
2325			TXQ_LOCK(txq);
2326			txq->eq.flags |= EQ_QFLUSH;
2327			TXQ_UNLOCK(txq);
2328			while (!mp_ring_is_idle(txq->r)) {
2329				mp_ring_check_drainage(txq->r, 4096);
2330				pause("qflush", 1);
2331			}
2332			TXQ_LOCK(txq);
2333			txq->eq.flags &= ~EQ_QFLUSH;
2334			TXQ_UNLOCK(txq);
2335		}
2336	}
2337	if_qflush(ifp);
2338}
2339
2340static uint64_t
2341vi_get_counter(struct ifnet *ifp, ift_counter c)
2342{
2343	struct vi_info *vi = ifp->if_softc;
2344	struct fw_vi_stats_vf *s = &vi->stats;
2345
2346	mtx_lock(&vi->tick_mtx);
2347	vi_refresh_stats(vi);
2348	mtx_unlock(&vi->tick_mtx);
2349
2350	switch (c) {
2351	case IFCOUNTER_IPACKETS:
2352		return (s->rx_bcast_frames + s->rx_mcast_frames +
2353		    s->rx_ucast_frames);
2354	case IFCOUNTER_IERRORS:
2355		return (s->rx_err_frames);
2356	case IFCOUNTER_OPACKETS:
2357		return (s->tx_bcast_frames + s->tx_mcast_frames +
2358		    s->tx_ucast_frames + s->tx_offload_frames);
2359	case IFCOUNTER_OERRORS:
2360		return (s->tx_drop_frames);
2361	case IFCOUNTER_IBYTES:
2362		return (s->rx_bcast_bytes + s->rx_mcast_bytes +
2363		    s->rx_ucast_bytes);
2364	case IFCOUNTER_OBYTES:
2365		return (s->tx_bcast_bytes + s->tx_mcast_bytes +
2366		    s->tx_ucast_bytes + s->tx_offload_bytes);
2367	case IFCOUNTER_IMCASTS:
2368		return (s->rx_mcast_frames);
2369	case IFCOUNTER_OMCASTS:
2370		return (s->tx_mcast_frames);
2371	case IFCOUNTER_OQDROPS: {
2372		uint64_t drops;
2373
2374		drops = 0;
2375		if (vi->flags & VI_INIT_DONE) {
2376			int i;
2377			struct sge_txq *txq;
2378
2379			for_each_txq(vi, i, txq)
2380				drops += counter_u64_fetch(txq->r->dropped);
2381		}
2382
2383		return (drops);
2384
2385	}
2386
2387	default:
2388		return (if_get_counter_default(ifp, c));
2389	}
2390}
2391
2392static uint64_t
2393cxgbe_get_counter(struct ifnet *ifp, ift_counter c)
2394{
2395	struct vi_info *vi = ifp->if_softc;
2396	struct port_info *pi = vi->pi;
2397	struct port_stats *s = &pi->stats;
2398
2399	mtx_lock(&vi->tick_mtx);
2400	cxgbe_refresh_stats(vi);
2401	mtx_unlock(&vi->tick_mtx);
2402
2403	switch (c) {
2404	case IFCOUNTER_IPACKETS:
2405		return (s->rx_frames);
2406
2407	case IFCOUNTER_IERRORS:
2408		return (s->rx_jabber + s->rx_runt + s->rx_too_long +
2409		    s->rx_fcs_err + s->rx_len_err);
2410
2411	case IFCOUNTER_OPACKETS:
2412		return (s->tx_frames);
2413
2414	case IFCOUNTER_OERRORS:
2415		return (s->tx_error_frames);
2416
2417	case IFCOUNTER_IBYTES:
2418		return (s->rx_octets);
2419
2420	case IFCOUNTER_OBYTES:
2421		return (s->tx_octets);
2422
2423	case IFCOUNTER_IMCASTS:
2424		return (s->rx_mcast_frames);
2425
2426	case IFCOUNTER_OMCASTS:
2427		return (s->tx_mcast_frames);
2428
2429	case IFCOUNTER_IQDROPS:
2430		return (s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 +
2431		    s->rx_ovflow3 + s->rx_trunc0 + s->rx_trunc1 + s->rx_trunc2 +
2432		    s->rx_trunc3 + pi->tnl_cong_drops);
2433
2434	case IFCOUNTER_OQDROPS: {
2435		uint64_t drops;
2436
2437		drops = s->tx_drop;
2438		if (vi->flags & VI_INIT_DONE) {
2439			int i;
2440			struct sge_txq *txq;
2441
2442			for_each_txq(vi, i, txq)
2443				drops += counter_u64_fetch(txq->r->dropped);
2444		}
2445
2446		return (drops);
2447
2448	}
2449
2450	default:
2451		return (if_get_counter_default(ifp, c));
2452	}
2453}
2454
2455#if defined(KERN_TLS) || defined(RATELIMIT)
2456static int
2457cxgbe_snd_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params,
2458    struct m_snd_tag **pt)
2459{
2460	int error;
2461
2462	switch (params->hdr.type) {
2463#ifdef RATELIMIT
2464	case IF_SND_TAG_TYPE_RATE_LIMIT:
2465		error = cxgbe_rate_tag_alloc(ifp, params, pt);
2466		break;
2467#endif
2468#ifdef KERN_TLS
2469	case IF_SND_TAG_TYPE_TLS:
2470		error = cxgbe_tls_tag_alloc(ifp, params, pt);
2471		break;
2472#endif
2473	default:
2474		error = EOPNOTSUPP;
2475	}
2476	return (error);
2477}
2478
2479static int
2480cxgbe_snd_tag_modify(struct m_snd_tag *mst,
2481    union if_snd_tag_modify_params *params)
2482{
2483
2484	switch (mst->type) {
2485#ifdef RATELIMIT
2486	case IF_SND_TAG_TYPE_RATE_LIMIT:
2487		return (cxgbe_rate_tag_modify(mst, params));
2488#endif
2489	default:
2490		return (EOPNOTSUPP);
2491	}
2492}
2493
2494static int
2495cxgbe_snd_tag_query(struct m_snd_tag *mst,
2496    union if_snd_tag_query_params *params)
2497{
2498
2499	switch (mst->type) {
2500#ifdef RATELIMIT
2501	case IF_SND_TAG_TYPE_RATE_LIMIT:
2502		return (cxgbe_rate_tag_query(mst, params));
2503#endif
2504	default:
2505		return (EOPNOTSUPP);
2506	}
2507}
2508
2509static void
2510cxgbe_snd_tag_free(struct m_snd_tag *mst)
2511{
2512
2513	switch (mst->type) {
2514#ifdef RATELIMIT
2515	case IF_SND_TAG_TYPE_RATE_LIMIT:
2516		cxgbe_rate_tag_free(mst);
2517		return;
2518#endif
2519#ifdef KERN_TLS
2520	case IF_SND_TAG_TYPE_TLS:
2521		cxgbe_tls_tag_free(mst);
2522		return;
2523#endif
2524	default:
2525		panic("shouldn't get here");
2526	}
2527}
2528#endif
2529
2530/*
2531 * The kernel picks a media from the list we had provided but we still validate
2532 * the requeste.
2533 */
2534int
2535cxgbe_media_change(struct ifnet *ifp)
2536{
2537	struct vi_info *vi = ifp->if_softc;
2538	struct port_info *pi = vi->pi;
2539	struct ifmedia *ifm = &pi->media;
2540	struct link_config *lc = &pi->link_cfg;
2541	struct adapter *sc = pi->adapter;
2542	int rc;
2543
2544	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4mec");
2545	if (rc != 0)
2546		return (rc);
2547	PORT_LOCK(pi);
2548	if (IFM_SUBTYPE(ifm->ifm_media) == IFM_AUTO) {
2549		/* ifconfig .. media autoselect */
2550		if (!(lc->pcaps & FW_PORT_CAP32_ANEG)) {
2551			rc = ENOTSUP; /* AN not supported by transceiver */
2552			goto done;
2553		}
2554		lc->requested_aneg = AUTONEG_ENABLE;
2555		lc->requested_speed = 0;
2556		lc->requested_fc |= PAUSE_AUTONEG;
2557	} else {
2558		lc->requested_aneg = AUTONEG_DISABLE;
2559		lc->requested_speed =
2560		    ifmedia_baudrate(ifm->ifm_media) / 1000000;
2561		lc->requested_fc = 0;
2562		if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_RXPAUSE)
2563			lc->requested_fc |= PAUSE_RX;
2564		if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_TXPAUSE)
2565			lc->requested_fc |= PAUSE_TX;
2566	}
2567	if (pi->up_vis > 0) {
2568		fixup_link_config(pi);
2569		rc = apply_link_config(pi);
2570	}
2571done:
2572	PORT_UNLOCK(pi);
2573	end_synchronized_op(sc, 0);
2574	return (rc);
2575}
2576
2577/*
2578 * Base media word (without ETHER, pause, link active, etc.) for the port at the
2579 * given speed.
2580 */
2581static int
2582port_mword(struct port_info *pi, uint32_t speed)
2583{
2584
2585	MPASS(speed & M_FW_PORT_CAP32_SPEED);
2586	MPASS(powerof2(speed));
2587
2588	switch(pi->port_type) {
2589	case FW_PORT_TYPE_BT_SGMII:
2590	case FW_PORT_TYPE_BT_XFI:
2591	case FW_PORT_TYPE_BT_XAUI:
2592		/* BaseT */
2593		switch (speed) {
2594		case FW_PORT_CAP32_SPEED_100M:
2595			return (IFM_100_T);
2596		case FW_PORT_CAP32_SPEED_1G:
2597			return (IFM_1000_T);
2598		case FW_PORT_CAP32_SPEED_10G:
2599			return (IFM_10G_T);
2600		}
2601		break;
2602	case FW_PORT_TYPE_KX4:
2603		if (speed == FW_PORT_CAP32_SPEED_10G)
2604			return (IFM_10G_KX4);
2605		break;
2606	case FW_PORT_TYPE_CX4:
2607		if (speed == FW_PORT_CAP32_SPEED_10G)
2608			return (IFM_10G_CX4);
2609		break;
2610	case FW_PORT_TYPE_KX:
2611		if (speed == FW_PORT_CAP32_SPEED_1G)
2612			return (IFM_1000_KX);
2613		break;
2614	case FW_PORT_TYPE_KR:
2615	case FW_PORT_TYPE_BP_AP:
2616	case FW_PORT_TYPE_BP4_AP:
2617	case FW_PORT_TYPE_BP40_BA:
2618	case FW_PORT_TYPE_KR4_100G:
2619	case FW_PORT_TYPE_KR_SFP28:
2620	case FW_PORT_TYPE_KR_XLAUI:
2621		switch (speed) {
2622		case FW_PORT_CAP32_SPEED_1G:
2623			return (IFM_1000_KX);
2624		case FW_PORT_CAP32_SPEED_10G:
2625			return (IFM_10G_KR);
2626		case FW_PORT_CAP32_SPEED_25G:
2627			return (IFM_25G_KR);
2628		case FW_PORT_CAP32_SPEED_40G:
2629			return (IFM_40G_KR4);
2630		case FW_PORT_CAP32_SPEED_50G:
2631			return (IFM_50G_KR2);
2632		case FW_PORT_CAP32_SPEED_100G:
2633			return (IFM_100G_KR4);
2634		}
2635		break;
2636	case FW_PORT_TYPE_FIBER_XFI:
2637	case FW_PORT_TYPE_FIBER_XAUI:
2638	case FW_PORT_TYPE_SFP:
2639	case FW_PORT_TYPE_QSFP_10G:
2640	case FW_PORT_TYPE_QSA:
2641	case FW_PORT_TYPE_QSFP:
2642	case FW_PORT_TYPE_CR4_QSFP:
2643	case FW_PORT_TYPE_CR_QSFP:
2644	case FW_PORT_TYPE_CR2_QSFP:
2645	case FW_PORT_TYPE_SFP28:
2646		/* Pluggable transceiver */
2647		switch (pi->mod_type) {
2648		case FW_PORT_MOD_TYPE_LR:
2649			switch (speed) {
2650			case FW_PORT_CAP32_SPEED_1G:
2651				return (IFM_1000_LX);
2652			case FW_PORT_CAP32_SPEED_10G:
2653				return (IFM_10G_LR);
2654			case FW_PORT_CAP32_SPEED_25G:
2655				return (IFM_25G_LR);
2656			case FW_PORT_CAP32_SPEED_40G:
2657				return (IFM_40G_LR4);
2658			case FW_PORT_CAP32_SPEED_50G:
2659				return (IFM_50G_LR2);
2660			case FW_PORT_CAP32_SPEED_100G:
2661				return (IFM_100G_LR4);
2662			}
2663			break;
2664		case FW_PORT_MOD_TYPE_SR:
2665			switch (speed) {
2666			case FW_PORT_CAP32_SPEED_1G:
2667				return (IFM_1000_SX);
2668			case FW_PORT_CAP32_SPEED_10G:
2669				return (IFM_10G_SR);
2670			case FW_PORT_CAP32_SPEED_25G:
2671				return (IFM_25G_SR);
2672			case FW_PORT_CAP32_SPEED_40G:
2673				return (IFM_40G_SR4);
2674			case FW_PORT_CAP32_SPEED_50G:
2675				return (IFM_50G_SR2);
2676			case FW_PORT_CAP32_SPEED_100G:
2677				return (IFM_100G_SR4);
2678			}
2679			break;
2680		case FW_PORT_MOD_TYPE_ER:
2681			if (speed == FW_PORT_CAP32_SPEED_10G)
2682				return (IFM_10G_ER);
2683			break;
2684		case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
2685		case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
2686			switch (speed) {
2687			case FW_PORT_CAP32_SPEED_1G:
2688				return (IFM_1000_CX);
2689			case FW_PORT_CAP32_SPEED_10G:
2690				return (IFM_10G_TWINAX);
2691			case FW_PORT_CAP32_SPEED_25G:
2692				return (IFM_25G_CR);
2693			case FW_PORT_CAP32_SPEED_40G:
2694				return (IFM_40G_CR4);
2695			case FW_PORT_CAP32_SPEED_50G:
2696				return (IFM_50G_CR2);
2697			case FW_PORT_CAP32_SPEED_100G:
2698				return (IFM_100G_CR4);
2699			}
2700			break;
2701		case FW_PORT_MOD_TYPE_LRM:
2702			if (speed == FW_PORT_CAP32_SPEED_10G)
2703				return (IFM_10G_LRM);
2704			break;
2705		case FW_PORT_MOD_TYPE_NA:
2706			MPASS(0);	/* Not pluggable? */
2707			/* fall throough */
2708		case FW_PORT_MOD_TYPE_ERROR:
2709		case FW_PORT_MOD_TYPE_UNKNOWN:
2710		case FW_PORT_MOD_TYPE_NOTSUPPORTED:
2711			break;
2712		case FW_PORT_MOD_TYPE_NONE:
2713			return (IFM_NONE);
2714		}
2715		break;
2716	case FW_PORT_TYPE_NONE:
2717		return (IFM_NONE);
2718	}
2719
2720	return (IFM_UNKNOWN);
2721}
2722
2723void
2724cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2725{
2726	struct vi_info *vi = ifp->if_softc;
2727	struct port_info *pi = vi->pi;
2728	struct adapter *sc = pi->adapter;
2729	struct link_config *lc = &pi->link_cfg;
2730
2731	if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4med") != 0)
2732		return;
2733	PORT_LOCK(pi);
2734
2735	if (pi->up_vis == 0) {
2736		/*
2737		 * If all the interfaces are administratively down the firmware
2738		 * does not report transceiver changes.  Refresh port info here
2739		 * so that ifconfig displays accurate ifmedia at all times.
2740		 * This is the only reason we have a synchronized op in this
2741		 * function.  Just PORT_LOCK would have been enough otherwise.
2742		 */
2743		t4_update_port_info(pi);
2744		build_medialist(pi);
2745	}
2746
2747	/* ifm_status */
2748	ifmr->ifm_status = IFM_AVALID;
2749	if (lc->link_ok == false)
2750		goto done;
2751	ifmr->ifm_status |= IFM_ACTIVE;
2752
2753	/* ifm_active */
2754	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2755	ifmr->ifm_active &= ~(IFM_ETH_TXPAUSE | IFM_ETH_RXPAUSE);
2756	if (lc->fc & PAUSE_RX)
2757		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
2758	if (lc->fc & PAUSE_TX)
2759		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
2760	ifmr->ifm_active |= port_mword(pi, speed_to_fwcap(lc->speed));
2761done:
2762	PORT_UNLOCK(pi);
2763	end_synchronized_op(sc, 0);
2764}
2765
2766static int
2767vcxgbe_probe(device_t dev)
2768{
2769	char buf[128];
2770	struct vi_info *vi = device_get_softc(dev);
2771
2772	snprintf(buf, sizeof(buf), "port %d vi %td", vi->pi->port_id,
2773	    vi - vi->pi->vi);
2774	device_set_desc_copy(dev, buf);
2775
2776	return (BUS_PROBE_DEFAULT);
2777}
2778
2779static int
2780alloc_extra_vi(struct adapter *sc, struct port_info *pi, struct vi_info *vi)
2781{
2782	int func, index, rc;
2783	uint32_t param, val;
2784
2785	ASSERT_SYNCHRONIZED_OP(sc);
2786
2787	index = vi - pi->vi;
2788	MPASS(index > 0);	/* This function deals with _extra_ VIs only */
2789	KASSERT(index < nitems(vi_mac_funcs),
2790	    ("%s: VI %s doesn't have a MAC func", __func__,
2791	    device_get_nameunit(vi->dev)));
2792	func = vi_mac_funcs[index];
2793	rc = t4_alloc_vi_func(sc, sc->mbox, pi->tx_chan, sc->pf, 0, 1,
2794	    vi->hw_addr, &vi->rss_size, &vi->vfvld, &vi->vin, func, 0);
2795	if (rc < 0) {
2796		device_printf(vi->dev, "failed to allocate virtual interface %d"
2797		    "for port %d: %d\n", index, pi->port_id, -rc);
2798		return (-rc);
2799	}
2800	vi->viid = rc;
2801
2802	if (vi->rss_size == 1) {
2803		/*
2804		 * This VI didn't get a slice of the RSS table.  Reduce the
2805		 * number of VIs being created (hw.cxgbe.num_vis) or modify the
2806		 * configuration file (nvi, rssnvi for this PF) if this is a
2807		 * problem.
2808		 */
2809		device_printf(vi->dev, "RSS table not available.\n");
2810		vi->rss_base = 0xffff;
2811
2812		return (0);
2813	}
2814
2815	param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
2816	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_RSSINFO) |
2817	    V_FW_PARAMS_PARAM_YZ(vi->viid);
2818	rc = t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
2819	if (rc)
2820		vi->rss_base = 0xffff;
2821	else {
2822		MPASS((val >> 16) == vi->rss_size);
2823		vi->rss_base = val & 0xffff;
2824	}
2825
2826	return (0);
2827}
2828
2829static int
2830vcxgbe_attach(device_t dev)
2831{
2832	struct vi_info *vi;
2833	struct port_info *pi;
2834	struct adapter *sc;
2835	int rc;
2836
2837	vi = device_get_softc(dev);
2838	pi = vi->pi;
2839	sc = pi->adapter;
2840
2841	rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4via");
2842	if (rc)
2843		return (rc);
2844	rc = alloc_extra_vi(sc, pi, vi);
2845	end_synchronized_op(sc, 0);
2846	if (rc)
2847		return (rc);
2848
2849	rc = cxgbe_vi_attach(dev, vi);
2850	if (rc) {
2851		t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
2852		return (rc);
2853	}
2854	return (0);
2855}
2856
2857static int
2858vcxgbe_detach(device_t dev)
2859{
2860	struct vi_info *vi;
2861	struct adapter *sc;
2862
2863	vi = device_get_softc(dev);
2864	sc = vi->adapter;
2865
2866	doom_vi(sc, vi);
2867
2868	cxgbe_vi_detach(vi);
2869	t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
2870
2871	end_synchronized_op(sc, 0);
2872
2873	return (0);
2874}
2875
2876static struct callout fatal_callout;
2877
2878static void
2879delayed_panic(void *arg)
2880{
2881	struct adapter *sc = arg;
2882
2883	panic("%s: panic on fatal error", device_get_nameunit(sc->dev));
2884}
2885
2886void
2887t4_fatal_err(struct adapter *sc, bool fw_error)
2888{
2889
2890	t4_shutdown_adapter(sc);
2891	log(LOG_ALERT, "%s: encountered fatal error, adapter stopped.\n",
2892	    device_get_nameunit(sc->dev));
2893	if (fw_error) {
2894		if (sc->flags & CHK_MBOX_ACCESS)
2895			ASSERT_SYNCHRONIZED_OP(sc);
2896		sc->flags |= ADAP_ERR;
2897	} else {
2898		ADAPTER_LOCK(sc);
2899		sc->flags |= ADAP_ERR;
2900		ADAPTER_UNLOCK(sc);
2901	}
2902#ifdef TCP_OFFLOAD
2903	taskqueue_enqueue(taskqueue_thread, &sc->async_event_task);
2904#endif
2905
2906	if (t4_panic_on_fatal_err) {
2907		log(LOG_ALERT, "%s: panic on fatal error after 30s",
2908		    device_get_nameunit(sc->dev));
2909		callout_reset(&fatal_callout, hz * 30, delayed_panic, sc);
2910	}
2911}
2912
2913void
2914t4_add_adapter(struct adapter *sc)
2915{
2916	sx_xlock(&t4_list_lock);
2917	SLIST_INSERT_HEAD(&t4_list, sc, link);
2918	sx_xunlock(&t4_list_lock);
2919}
2920
2921int
2922t4_map_bars_0_and_4(struct adapter *sc)
2923{
2924	sc->regs_rid = PCIR_BAR(0);
2925	sc->regs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2926	    &sc->regs_rid, RF_ACTIVE);
2927	if (sc->regs_res == NULL) {
2928		device_printf(sc->dev, "cannot map registers.\n");
2929		return (ENXIO);
2930	}
2931	sc->bt = rman_get_bustag(sc->regs_res);
2932	sc->bh = rman_get_bushandle(sc->regs_res);
2933	sc->mmio_len = rman_get_size(sc->regs_res);
2934	setbit(&sc->doorbells, DOORBELL_KDB);
2935
2936	sc->msix_rid = PCIR_BAR(4);
2937	sc->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2938	    &sc->msix_rid, RF_ACTIVE);
2939	if (sc->msix_res == NULL) {
2940		device_printf(sc->dev, "cannot map MSI-X BAR.\n");
2941		return (ENXIO);
2942	}
2943
2944	return (0);
2945}
2946
2947int
2948t4_map_bar_2(struct adapter *sc)
2949{
2950
2951	/*
2952	 * T4: only iWARP driver uses the userspace doorbells.  There is no need
2953	 * to map it if RDMA is disabled.
2954	 */
2955	if (is_t4(sc) && sc->rdmacaps == 0)
2956		return (0);
2957
2958	sc->udbs_rid = PCIR_BAR(2);
2959	sc->udbs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2960	    &sc->udbs_rid, RF_ACTIVE);
2961	if (sc->udbs_res == NULL) {
2962		device_printf(sc->dev, "cannot map doorbell BAR.\n");
2963		return (ENXIO);
2964	}
2965	sc->udbs_base = rman_get_virtual(sc->udbs_res);
2966
2967	if (chip_id(sc) >= CHELSIO_T5) {
2968		setbit(&sc->doorbells, DOORBELL_UDB);
2969#if defined(__i386__) || defined(__amd64__)
2970		if (t5_write_combine) {
2971			int rc, mode;
2972
2973			/*
2974			 * Enable write combining on BAR2.  This is the
2975			 * userspace doorbell BAR and is split into 128B
2976			 * (UDBS_SEG_SIZE) doorbell regions, each associated
2977			 * with an egress queue.  The first 64B has the doorbell
2978			 * and the second 64B can be used to submit a tx work
2979			 * request with an implicit doorbell.
2980			 */
2981
2982			rc = pmap_change_attr((vm_offset_t)sc->udbs_base,
2983			    rman_get_size(sc->udbs_res), PAT_WRITE_COMBINING);
2984			if (rc == 0) {
2985				clrbit(&sc->doorbells, DOORBELL_UDB);
2986				setbit(&sc->doorbells, DOORBELL_WCWR);
2987				setbit(&sc->doorbells, DOORBELL_UDBWC);
2988			} else {
2989				device_printf(sc->dev,
2990				    "couldn't enable write combining: %d\n",
2991				    rc);
2992			}
2993
2994			mode = is_t5(sc) ? V_STATMODE(0) : V_T6_STATMODE(0);
2995			t4_write_reg(sc, A_SGE_STAT_CFG,
2996			    V_STATSOURCE_T5(7) | mode);
2997		}
2998#endif
2999	}
3000	sc->iwt.wc_en = isset(&sc->doorbells, DOORBELL_UDBWC) ? 1 : 0;
3001
3002	return (0);
3003}
3004
3005struct memwin_init {
3006	uint32_t base;
3007	uint32_t aperture;
3008};
3009
3010static const struct memwin_init t4_memwin[NUM_MEMWIN] = {
3011	{ MEMWIN0_BASE, MEMWIN0_APERTURE },
3012	{ MEMWIN1_BASE, MEMWIN1_APERTURE },
3013	{ MEMWIN2_BASE_T4, MEMWIN2_APERTURE_T4 }
3014};
3015
3016static const struct memwin_init t5_memwin[NUM_MEMWIN] = {
3017	{ MEMWIN0_BASE, MEMWIN0_APERTURE },
3018	{ MEMWIN1_BASE, MEMWIN1_APERTURE },
3019	{ MEMWIN2_BASE_T5, MEMWIN2_APERTURE_T5 },
3020};
3021
3022static void
3023setup_memwin(struct adapter *sc)
3024{
3025	const struct memwin_init *mw_init;
3026	struct memwin *mw;
3027	int i;
3028	uint32_t bar0;
3029
3030	if (is_t4(sc)) {
3031		/*
3032		 * Read low 32b of bar0 indirectly via the hardware backdoor
3033		 * mechanism.  Works from within PCI passthrough environments
3034		 * too, where rman_get_start() can return a different value.  We
3035		 * need to program the T4 memory window decoders with the actual
3036		 * addresses that will be coming across the PCIe link.
3037		 */
3038		bar0 = t4_hw_pci_read_cfg4(sc, PCIR_BAR(0));
3039		bar0 &= (uint32_t) PCIM_BAR_MEM_BASE;
3040
3041		mw_init = &t4_memwin[0];
3042	} else {
3043		/* T5+ use the relative offset inside the PCIe BAR */
3044		bar0 = 0;
3045
3046		mw_init = &t5_memwin[0];
3047	}
3048
3049	for (i = 0, mw = &sc->memwin[0]; i < NUM_MEMWIN; i++, mw_init++, mw++) {
3050		if (!rw_initialized(&mw->mw_lock)) {
3051			rw_init(&mw->mw_lock, "memory window access");
3052			mw->mw_base = mw_init->base;
3053			mw->mw_aperture = mw_init->aperture;
3054			mw->mw_curpos = 0;
3055		}
3056		t4_write_reg(sc,
3057		    PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, i),
3058		    (mw->mw_base + bar0) | V_BIR(0) |
3059		    V_WINDOW(ilog2(mw->mw_aperture) - 10));
3060		rw_wlock(&mw->mw_lock);
3061		position_memwin(sc, i, mw->mw_curpos);
3062		rw_wunlock(&mw->mw_lock);
3063	}
3064
3065	/* flush */
3066	t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 2));
3067}
3068
3069/*
3070 * Positions the memory window at the given address in the card's address space.
3071 * There are some alignment requirements and the actual position may be at an
3072 * address prior to the requested address.  mw->mw_curpos always has the actual
3073 * position of the window.
3074 */
3075static void
3076position_memwin(struct adapter *sc, int idx, uint32_t addr)
3077{
3078	struct memwin *mw;
3079	uint32_t pf;
3080	uint32_t reg;
3081
3082	MPASS(idx >= 0 && idx < NUM_MEMWIN);
3083	mw = &sc->memwin[idx];
3084	rw_assert(&mw->mw_lock, RA_WLOCKED);
3085
3086	if (is_t4(sc)) {
3087		pf = 0;
3088		mw->mw_curpos = addr & ~0xf;	/* start must be 16B aligned */
3089	} else {
3090		pf = V_PFNUM(sc->pf);
3091		mw->mw_curpos = addr & ~0x7f;	/* start must be 128B aligned */
3092	}
3093	reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, idx);
3094	t4_write_reg(sc, reg, mw->mw_curpos | pf);
3095	t4_read_reg(sc, reg);	/* flush */
3096}
3097
3098int
3099rw_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val,
3100    int len, int rw)
3101{
3102	struct memwin *mw;
3103	uint32_t mw_end, v;
3104
3105	MPASS(idx >= 0 && idx < NUM_MEMWIN);
3106
3107	/* Memory can only be accessed in naturally aligned 4 byte units */
3108	if (addr & 3 || len & 3 || len <= 0)
3109		return (EINVAL);
3110
3111	mw = &sc->memwin[idx];
3112	while (len > 0) {
3113		rw_rlock(&mw->mw_lock);
3114		mw_end = mw->mw_curpos + mw->mw_aperture;
3115		if (addr >= mw_end || addr < mw->mw_curpos) {
3116			/* Will need to reposition the window */
3117			if (!rw_try_upgrade(&mw->mw_lock)) {
3118				rw_runlock(&mw->mw_lock);
3119				rw_wlock(&mw->mw_lock);
3120			}
3121			rw_assert(&mw->mw_lock, RA_WLOCKED);
3122			position_memwin(sc, idx, addr);
3123			rw_downgrade(&mw->mw_lock);
3124			mw_end = mw->mw_curpos + mw->mw_aperture;
3125		}
3126		rw_assert(&mw->mw_lock, RA_RLOCKED);
3127		while (addr < mw_end && len > 0) {
3128			if (rw == 0) {
3129				v = t4_read_reg(sc, mw->mw_base + addr -
3130				    mw->mw_curpos);
3131				*val++ = le32toh(v);
3132			} else {
3133				v = *val++;
3134				t4_write_reg(sc, mw->mw_base + addr -
3135				    mw->mw_curpos, htole32(v));
3136			}
3137			addr += 4;
3138			len -= 4;
3139		}
3140		rw_runlock(&mw->mw_lock);
3141	}
3142
3143	return (0);
3144}
3145
3146static void
3147t4_init_atid_table(struct adapter *sc)
3148{
3149	struct tid_info *t;
3150	int i;
3151
3152	t = &sc->tids;
3153	if (t->natids == 0)
3154		return;
3155
3156	MPASS(t->atid_tab == NULL);
3157
3158	t->atid_tab = malloc(t->natids * sizeof(*t->atid_tab), M_CXGBE,
3159	    M_ZERO | M_WAITOK);
3160	mtx_init(&t->atid_lock, "atid lock", NULL, MTX_DEF);
3161	t->afree = t->atid_tab;
3162	t->atids_in_use = 0;
3163	for (i = 1; i < t->natids; i++)
3164		t->atid_tab[i - 1].next = &t->atid_tab[i];
3165	t->atid_tab[t->natids - 1].next = NULL;
3166}
3167
3168static void
3169t4_free_atid_table(struct adapter *sc)
3170{
3171	struct tid_info *t;
3172
3173	t = &sc->tids;
3174
3175	KASSERT(t->atids_in_use == 0,
3176	    ("%s: %d atids still in use.", __func__, t->atids_in_use));
3177
3178	if (mtx_initialized(&t->atid_lock))
3179		mtx_destroy(&t->atid_lock);
3180	free(t->atid_tab, M_CXGBE);
3181	t->atid_tab = NULL;
3182}
3183
3184int
3185alloc_atid(struct adapter *sc, void *ctx)
3186{
3187	struct tid_info *t = &sc->tids;
3188	int atid = -1;
3189
3190	mtx_lock(&t->atid_lock);
3191	if (t->afree) {
3192		union aopen_entry *p = t->afree;
3193
3194		atid = p - t->atid_tab;
3195		MPASS(atid <= M_TID_TID);
3196		t->afree = p->next;
3197		p->data = ctx;
3198		t->atids_in_use++;
3199	}
3200	mtx_unlock(&t->atid_lock);
3201	return (atid);
3202}
3203
3204void *
3205lookup_atid(struct adapter *sc, int atid)
3206{
3207	struct tid_info *t = &sc->tids;
3208
3209	return (t->atid_tab[atid].data);
3210}
3211
3212void
3213free_atid(struct adapter *sc, int atid)
3214{
3215	struct tid_info *t = &sc->tids;
3216	union aopen_entry *p = &t->atid_tab[atid];
3217
3218	mtx_lock(&t->atid_lock);
3219	p->next = t->afree;
3220	t->afree = p;
3221	t->atids_in_use--;
3222	mtx_unlock(&t->atid_lock);
3223}
3224
3225static void
3226queue_tid_release(struct adapter *sc, int tid)
3227{
3228
3229	CXGBE_UNIMPLEMENTED("deferred tid release");
3230}
3231
3232void
3233release_tid(struct adapter *sc, int tid, struct sge_wrq *ctrlq)
3234{
3235	struct wrqe *wr;
3236	struct cpl_tid_release *req;
3237
3238	wr = alloc_wrqe(sizeof(*req), ctrlq);
3239	if (wr == NULL) {
3240		queue_tid_release(sc, tid);	/* defer */
3241		return;
3242	}
3243	req = wrtod(wr);
3244
3245	INIT_TP_WR_MIT_CPL(req, CPL_TID_RELEASE, tid);
3246
3247	t4_wrq_tx(sc, wr);
3248}
3249
3250static int
3251t4_range_cmp(const void *a, const void *b)
3252{
3253	return ((const struct t4_range *)a)->start -
3254	       ((const struct t4_range *)b)->start;
3255}
3256
3257/*
3258 * Verify that the memory range specified by the addr/len pair is valid within
3259 * the card's address space.
3260 */
3261static int
3262validate_mem_range(struct adapter *sc, uint32_t addr, uint32_t len)
3263{
3264	struct t4_range mem_ranges[4], *r, *next;
3265	uint32_t em, addr_len;
3266	int i, n, remaining;
3267
3268	/* Memory can only be accessed in naturally aligned 4 byte units */
3269	if (addr & 3 || len & 3 || len == 0)
3270		return (EINVAL);
3271
3272	/* Enabled memories */
3273	em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
3274
3275	r = &mem_ranges[0];
3276	n = 0;
3277	bzero(r, sizeof(mem_ranges));
3278	if (em & F_EDRAM0_ENABLE) {
3279		addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR);
3280		r->size = G_EDRAM0_SIZE(addr_len) << 20;
3281		if (r->size > 0) {
3282			r->start = G_EDRAM0_BASE(addr_len) << 20;
3283			if (addr >= r->start &&
3284			    addr + len <= r->start + r->size)
3285				return (0);
3286			r++;
3287			n++;
3288		}
3289	}
3290	if (em & F_EDRAM1_ENABLE) {
3291		addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR);
3292		r->size = G_EDRAM1_SIZE(addr_len) << 20;
3293		if (r->size > 0) {
3294			r->start = G_EDRAM1_BASE(addr_len) << 20;
3295			if (addr >= r->start &&
3296			    addr + len <= r->start + r->size)
3297				return (0);
3298			r++;
3299			n++;
3300		}
3301	}
3302	if (em & F_EXT_MEM_ENABLE) {
3303		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
3304		r->size = G_EXT_MEM_SIZE(addr_len) << 20;
3305		if (r->size > 0) {
3306			r->start = G_EXT_MEM_BASE(addr_len) << 20;
3307			if (addr >= r->start &&
3308			    addr + len <= r->start + r->size)
3309				return (0);
3310			r++;
3311			n++;
3312		}
3313	}
3314	if (is_t5(sc) && em & F_EXT_MEM1_ENABLE) {
3315		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
3316		r->size = G_EXT_MEM1_SIZE(addr_len) << 20;
3317		if (r->size > 0) {
3318			r->start = G_EXT_MEM1_BASE(addr_len) << 20;
3319			if (addr >= r->start &&
3320			    addr + len <= r->start + r->size)
3321				return (0);
3322			r++;
3323			n++;
3324		}
3325	}
3326	MPASS(n <= nitems(mem_ranges));
3327
3328	if (n > 1) {
3329		/* Sort and merge the ranges. */
3330		qsort(mem_ranges, n, sizeof(struct t4_range), t4_range_cmp);
3331
3332		/* Start from index 0 and examine the next n - 1 entries. */
3333		r = &mem_ranges[0];
3334		for (remaining = n - 1; remaining > 0; remaining--, r++) {
3335
3336			MPASS(r->size > 0);	/* r is a valid entry. */
3337			next = r + 1;
3338			MPASS(next->size > 0);	/* and so is the next one. */
3339
3340			while (r->start + r->size >= next->start) {
3341				/* Merge the next one into the current entry. */
3342				r->size = max(r->start + r->size,
3343				    next->start + next->size) - r->start;
3344				n--;	/* One fewer entry in total. */
3345				if (--remaining == 0)
3346					goto done;	/* short circuit */
3347				next++;
3348			}
3349			if (next != r + 1) {
3350				/*
3351				 * Some entries were merged into r and next
3352				 * points to the first valid entry that couldn't
3353				 * be merged.
3354				 */
3355				MPASS(next->size > 0);	/* must be valid */
3356				memcpy(r + 1, next, remaining * sizeof(*r));
3357#ifdef INVARIANTS
3358				/*
3359				 * This so that the foo->size assertion in the
3360				 * next iteration of the loop do the right
3361				 * thing for entries that were pulled up and are
3362				 * no longer valid.
3363				 */
3364				MPASS(n < nitems(mem_ranges));
3365				bzero(&mem_ranges[n], (nitems(mem_ranges) - n) *
3366				    sizeof(struct t4_range));
3367#endif
3368			}
3369		}
3370done:
3371		/* Done merging the ranges. */
3372		MPASS(n > 0);
3373		r = &mem_ranges[0];
3374		for (i = 0; i < n; i++, r++) {
3375			if (addr >= r->start &&
3376			    addr + len <= r->start + r->size)
3377				return (0);
3378		}
3379	}
3380
3381	return (EFAULT);
3382}
3383
3384static int
3385fwmtype_to_hwmtype(int mtype)
3386{
3387
3388	switch (mtype) {
3389	case FW_MEMTYPE_EDC0:
3390		return (MEM_EDC0);
3391	case FW_MEMTYPE_EDC1:
3392		return (MEM_EDC1);
3393	case FW_MEMTYPE_EXTMEM:
3394		return (MEM_MC0);
3395	case FW_MEMTYPE_EXTMEM1:
3396		return (MEM_MC1);
3397	default:
3398		panic("%s: cannot translate fw mtype %d.", __func__, mtype);
3399	}
3400}
3401
3402/*
3403 * Verify that the memory range specified by the memtype/offset/len pair is
3404 * valid and lies entirely within the memtype specified.  The global address of
3405 * the start of the range is returned in addr.
3406 */
3407static int
3408validate_mt_off_len(struct adapter *sc, int mtype, uint32_t off, uint32_t len,
3409    uint32_t *addr)
3410{
3411	uint32_t em, addr_len, maddr;
3412
3413	/* Memory can only be accessed in naturally aligned 4 byte units */
3414	if (off & 3 || len & 3 || len == 0)
3415		return (EINVAL);
3416
3417	em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
3418	switch (fwmtype_to_hwmtype(mtype)) {
3419	case MEM_EDC0:
3420		if (!(em & F_EDRAM0_ENABLE))
3421			return (EINVAL);
3422		addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR);
3423		maddr = G_EDRAM0_BASE(addr_len) << 20;
3424		break;
3425	case MEM_EDC1:
3426		if (!(em & F_EDRAM1_ENABLE))
3427			return (EINVAL);
3428		addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR);
3429		maddr = G_EDRAM1_BASE(addr_len) << 20;
3430		break;
3431	case MEM_MC:
3432		if (!(em & F_EXT_MEM_ENABLE))
3433			return (EINVAL);
3434		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
3435		maddr = G_EXT_MEM_BASE(addr_len) << 20;
3436		break;
3437	case MEM_MC1:
3438		if (!is_t5(sc) || !(em & F_EXT_MEM1_ENABLE))
3439			return (EINVAL);
3440		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
3441		maddr = G_EXT_MEM1_BASE(addr_len) << 20;
3442		break;
3443	default:
3444		return (EINVAL);
3445	}
3446
3447	*addr = maddr + off;	/* global address */
3448	return (validate_mem_range(sc, *addr, len));
3449}
3450
3451static int
3452fixup_devlog_params(struct adapter *sc)
3453{
3454	struct devlog_params *dparams = &sc->params.devlog;
3455	int rc;
3456
3457	rc = validate_mt_off_len(sc, dparams->memtype, dparams->start,
3458	    dparams->size, &dparams->addr);
3459
3460	return (rc);
3461}
3462
3463static void
3464update_nirq(struct intrs_and_queues *iaq, int nports)
3465{
3466
3467	iaq->nirq = T4_EXTRA_INTR;
3468	iaq->nirq += nports * max(iaq->nrxq, iaq->nnmrxq);
3469	iaq->nirq += nports * iaq->nofldrxq;
3470	iaq->nirq += nports * (iaq->num_vis - 1) *
3471	    max(iaq->nrxq_vi, iaq->nnmrxq_vi);
3472	iaq->nirq += nports * (iaq->num_vis - 1) * iaq->nofldrxq_vi;
3473}
3474
3475/*
3476 * Adjust requirements to fit the number of interrupts available.
3477 */
3478static void
3479calculate_iaq(struct adapter *sc, struct intrs_and_queues *iaq, int itype,
3480    int navail)
3481{
3482	int old_nirq;
3483	const int nports = sc->params.nports;
3484
3485	MPASS(nports > 0);
3486	MPASS(navail > 0);
3487
3488	bzero(iaq, sizeof(*iaq));
3489	iaq->intr_type = itype;
3490	iaq->num_vis = t4_num_vis;
3491	iaq->ntxq = t4_ntxq;
3492	iaq->ntxq_vi = t4_ntxq_vi;
3493	iaq->nrxq = t4_nrxq;
3494	iaq->nrxq_vi = t4_nrxq_vi;
3495#if defined(TCP_OFFLOAD) || defined(RATELIMIT)
3496	if (is_offload(sc) || is_ethoffload(sc)) {
3497		iaq->nofldtxq = t4_nofldtxq;
3498		iaq->nofldtxq_vi = t4_nofldtxq_vi;
3499	}
3500#endif
3501#ifdef TCP_OFFLOAD
3502	if (is_offload(sc)) {
3503		iaq->nofldrxq = t4_nofldrxq;
3504		iaq->nofldrxq_vi = t4_nofldrxq_vi;
3505	}
3506#endif
3507#ifdef DEV_NETMAP
3508	if (t4_native_netmap & NN_MAIN_VI) {
3509		iaq->nnmtxq = t4_nnmtxq;
3510		iaq->nnmrxq = t4_nnmrxq;
3511	}
3512	if (t4_native_netmap & NN_EXTRA_VI) {
3513		iaq->nnmtxq_vi = t4_nnmtxq_vi;
3514		iaq->nnmrxq_vi = t4_nnmrxq_vi;
3515	}
3516#endif
3517
3518	update_nirq(iaq, nports);
3519	if (iaq->nirq <= navail &&
3520	    (itype != INTR_MSI || powerof2(iaq->nirq))) {
3521		/*
3522		 * This is the normal case -- there are enough interrupts for
3523		 * everything.
3524		 */
3525		goto done;
3526	}
3527
3528	/*
3529	 * If extra VIs have been configured try reducing their count and see if
3530	 * that works.
3531	 */
3532	while (iaq->num_vis > 1) {
3533		iaq->num_vis--;
3534		update_nirq(iaq, nports);
3535		if (iaq->nirq <= navail &&
3536		    (itype != INTR_MSI || powerof2(iaq->nirq))) {
3537			device_printf(sc->dev, "virtual interfaces per port "
3538			    "reduced to %d from %d.  nrxq=%u, nofldrxq=%u, "
3539			    "nrxq_vi=%u nofldrxq_vi=%u, nnmrxq_vi=%u.  "
3540			    "itype %d, navail %u, nirq %d.\n",
3541			    iaq->num_vis, t4_num_vis, iaq->nrxq, iaq->nofldrxq,
3542			    iaq->nrxq_vi, iaq->nofldrxq_vi, iaq->nnmrxq_vi,
3543			    itype, navail, iaq->nirq);
3544			goto done;
3545		}
3546	}
3547
3548	/*
3549	 * Extra VIs will not be created.  Log a message if they were requested.
3550	 */
3551	MPASS(iaq->num_vis == 1);
3552	iaq->ntxq_vi = iaq->nrxq_vi = 0;
3553	iaq->nofldtxq_vi = iaq->nofldrxq_vi = 0;
3554	iaq->nnmtxq_vi = iaq->nnmrxq_vi = 0;
3555	if (iaq->num_vis != t4_num_vis) {
3556		device_printf(sc->dev, "extra virtual interfaces disabled.  "
3557		    "nrxq=%u, nofldrxq=%u, nrxq_vi=%u nofldrxq_vi=%u, "
3558		    "nnmrxq_vi=%u.  itype %d, navail %u, nirq %d.\n",
3559		    iaq->nrxq, iaq->nofldrxq, iaq->nrxq_vi, iaq->nofldrxq_vi,
3560		    iaq->nnmrxq_vi, itype, navail, iaq->nirq);
3561	}
3562
3563	/*
3564	 * Keep reducing the number of NIC rx queues to the next lower power of
3565	 * 2 (for even RSS distribution) and halving the TOE rx queues and see
3566	 * if that works.
3567	 */
3568	do {
3569		if (iaq->nrxq > 1) {
3570			do {
3571				iaq->nrxq--;
3572			} while (!powerof2(iaq->nrxq));
3573			if (iaq->nnmrxq > iaq->nrxq)
3574				iaq->nnmrxq = iaq->nrxq;
3575		}
3576		if (iaq->nofldrxq > 1)
3577			iaq->nofldrxq >>= 1;
3578
3579		old_nirq = iaq->nirq;
3580		update_nirq(iaq, nports);
3581		if (iaq->nirq <= navail &&
3582		    (itype != INTR_MSI || powerof2(iaq->nirq))) {
3583			device_printf(sc->dev, "running with reduced number of "
3584			    "rx queues because of shortage of interrupts.  "
3585			    "nrxq=%u, nofldrxq=%u.  "
3586			    "itype %d, navail %u, nirq %d.\n", iaq->nrxq,
3587			    iaq->nofldrxq, itype, navail, iaq->nirq);
3588			goto done;
3589		}
3590	} while (old_nirq != iaq->nirq);
3591
3592	/* One interrupt for everything.  Ugh. */
3593	device_printf(sc->dev, "running with minimal number of queues.  "
3594	    "itype %d, navail %u.\n", itype, navail);
3595	iaq->nirq = 1;
3596	iaq->nrxq = 1;
3597	iaq->ntxq = 1;
3598	if (iaq->nofldrxq > 0) {
3599		iaq->nofldrxq = 1;
3600		iaq->nofldtxq = 1;
3601	}
3602	iaq->nnmtxq = 0;
3603	iaq->nnmrxq = 0;
3604done:
3605	MPASS(iaq->num_vis > 0);
3606	if (iaq->num_vis > 1) {
3607		MPASS(iaq->nrxq_vi > 0);
3608		MPASS(iaq->ntxq_vi > 0);
3609	}
3610	MPASS(iaq->nirq > 0);
3611	MPASS(iaq->nrxq > 0);
3612	MPASS(iaq->ntxq > 0);
3613	if (itype == INTR_MSI) {
3614		MPASS(powerof2(iaq->nirq));
3615	}
3616}
3617
3618static int
3619cfg_itype_and_nqueues(struct adapter *sc, struct intrs_and_queues *iaq)
3620{
3621	int rc, itype, navail, nalloc;
3622
3623	for (itype = INTR_MSIX; itype; itype >>= 1) {
3624
3625		if ((itype & t4_intr_types) == 0)
3626			continue;	/* not allowed */
3627
3628		if (itype == INTR_MSIX)
3629			navail = pci_msix_count(sc->dev);
3630		else if (itype == INTR_MSI)
3631			navail = pci_msi_count(sc->dev);
3632		else
3633			navail = 1;
3634restart:
3635		if (navail == 0)
3636			continue;
3637
3638		calculate_iaq(sc, iaq, itype, navail);
3639		nalloc = iaq->nirq;
3640		rc = 0;
3641		if (itype == INTR_MSIX)
3642			rc = pci_alloc_msix(sc->dev, &nalloc);
3643		else if (itype == INTR_MSI)
3644			rc = pci_alloc_msi(sc->dev, &nalloc);
3645
3646		if (rc == 0 && nalloc > 0) {
3647			if (nalloc == iaq->nirq)
3648				return (0);
3649
3650			/*
3651			 * Didn't get the number requested.  Use whatever number
3652			 * the kernel is willing to allocate.
3653			 */
3654			device_printf(sc->dev, "fewer vectors than requested, "
3655			    "type=%d, req=%d, rcvd=%d; will downshift req.\n",
3656			    itype, iaq->nirq, nalloc);
3657			pci_release_msi(sc->dev);
3658			navail = nalloc;
3659			goto restart;
3660		}
3661
3662		device_printf(sc->dev,
3663		    "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n",
3664		    itype, rc, iaq->nirq, nalloc);
3665	}
3666
3667	device_printf(sc->dev,
3668	    "failed to find a usable interrupt type.  "
3669	    "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types,
3670	    pci_msix_count(sc->dev), pci_msi_count(sc->dev));
3671
3672	return (ENXIO);
3673}
3674
3675#define FW_VERSION(chip) ( \
3676    V_FW_HDR_FW_VER_MAJOR(chip##FW_VERSION_MAJOR) | \
3677    V_FW_HDR_FW_VER_MINOR(chip##FW_VERSION_MINOR) | \
3678    V_FW_HDR_FW_VER_MICRO(chip##FW_VERSION_MICRO) | \
3679    V_FW_HDR_FW_VER_BUILD(chip##FW_VERSION_BUILD))
3680#define FW_INTFVER(chip, intf) (chip##FW_HDR_INTFVER_##intf)
3681
3682/* Just enough of fw_hdr to cover all version info. */
3683struct fw_h {
3684	__u8	ver;
3685	__u8	chip;
3686	__be16	len512;
3687	__be32	fw_ver;
3688	__be32	tp_microcode_ver;
3689	__u8	intfver_nic;
3690	__u8	intfver_vnic;
3691	__u8	intfver_ofld;
3692	__u8	intfver_ri;
3693	__u8	intfver_iscsipdu;
3694	__u8	intfver_iscsi;
3695	__u8	intfver_fcoepdu;
3696	__u8	intfver_fcoe;
3697};
3698/* Spot check a couple of fields. */
3699CTASSERT(offsetof(struct fw_h, fw_ver) == offsetof(struct fw_hdr, fw_ver));
3700CTASSERT(offsetof(struct fw_h, intfver_nic) == offsetof(struct fw_hdr, intfver_nic));
3701CTASSERT(offsetof(struct fw_h, intfver_fcoe) == offsetof(struct fw_hdr, intfver_fcoe));
3702
3703struct fw_info {
3704	uint8_t chip;
3705	char *kld_name;
3706	char *fw_mod_name;
3707	struct fw_h fw_h;
3708} fw_info[] = {
3709	{
3710		.chip = CHELSIO_T4,
3711		.kld_name = "t4fw_cfg",
3712		.fw_mod_name = "t4fw",
3713		.fw_h = {
3714			.chip = FW_HDR_CHIP_T4,
3715			.fw_ver = htobe32(FW_VERSION(T4)),
3716			.intfver_nic = FW_INTFVER(T4, NIC),
3717			.intfver_vnic = FW_INTFVER(T4, VNIC),
3718			.intfver_ofld = FW_INTFVER(T4, OFLD),
3719			.intfver_ri = FW_INTFVER(T4, RI),
3720			.intfver_iscsipdu = FW_INTFVER(T4, ISCSIPDU),
3721			.intfver_iscsi = FW_INTFVER(T4, ISCSI),
3722			.intfver_fcoepdu = FW_INTFVER(T4, FCOEPDU),
3723			.intfver_fcoe = FW_INTFVER(T4, FCOE),
3724		},
3725	}, {
3726		.chip = CHELSIO_T5,
3727		.kld_name = "t5fw_cfg",
3728		.fw_mod_name = "t5fw",
3729		.fw_h = {
3730			.chip = FW_HDR_CHIP_T5,
3731			.fw_ver = htobe32(FW_VERSION(T5)),
3732			.intfver_nic = FW_INTFVER(T5, NIC),
3733			.intfver_vnic = FW_INTFVER(T5, VNIC),
3734			.intfver_ofld = FW_INTFVER(T5, OFLD),
3735			.intfver_ri = FW_INTFVER(T5, RI),
3736			.intfver_iscsipdu = FW_INTFVER(T5, ISCSIPDU),
3737			.intfver_iscsi = FW_INTFVER(T5, ISCSI),
3738			.intfver_fcoepdu = FW_INTFVER(T5, FCOEPDU),
3739			.intfver_fcoe = FW_INTFVER(T5, FCOE),
3740		},
3741	}, {
3742		.chip = CHELSIO_T6,
3743		.kld_name = "t6fw_cfg",
3744		.fw_mod_name = "t6fw",
3745		.fw_h = {
3746			.chip = FW_HDR_CHIP_T6,
3747			.fw_ver = htobe32(FW_VERSION(T6)),
3748			.intfver_nic = FW_INTFVER(T6, NIC),
3749			.intfver_vnic = FW_INTFVER(T6, VNIC),
3750			.intfver_ofld = FW_INTFVER(T6, OFLD),
3751			.intfver_ri = FW_INTFVER(T6, RI),
3752			.intfver_iscsipdu = FW_INTFVER(T6, ISCSIPDU),
3753			.intfver_iscsi = FW_INTFVER(T6, ISCSI),
3754			.intfver_fcoepdu = FW_INTFVER(T6, FCOEPDU),
3755			.intfver_fcoe = FW_INTFVER(T6, FCOE),
3756		},
3757	}
3758};
3759
3760static struct fw_info *
3761find_fw_info(int chip)
3762{
3763	int i;
3764
3765	for (i = 0; i < nitems(fw_info); i++) {
3766		if (fw_info[i].chip == chip)
3767			return (&fw_info[i]);
3768	}
3769	return (NULL);
3770}
3771
3772/*
3773 * Is the given firmware API compatible with the one the driver was compiled
3774 * with?
3775 */
3776static int
3777fw_compatible(const struct fw_h *hdr1, const struct fw_h *hdr2)
3778{
3779
3780	/* short circuit if it's the exact same firmware version */
3781	if (hdr1->chip == hdr2->chip && hdr1->fw_ver == hdr2->fw_ver)
3782		return (1);
3783
3784	/*
3785	 * XXX: Is this too conservative?  Perhaps I should limit this to the
3786	 * features that are supported in the driver.
3787	 */
3788#define SAME_INTF(x) (hdr1->intfver_##x == hdr2->intfver_##x)
3789	if (hdr1->chip == hdr2->chip && SAME_INTF(nic) && SAME_INTF(vnic) &&
3790	    SAME_INTF(ofld) && SAME_INTF(ri) && SAME_INTF(iscsipdu) &&
3791	    SAME_INTF(iscsi) && SAME_INTF(fcoepdu) && SAME_INTF(fcoe))
3792		return (1);
3793#undef SAME_INTF
3794
3795	return (0);
3796}
3797
3798static int
3799load_fw_module(struct adapter *sc, const struct firmware **dcfg,
3800    const struct firmware **fw)
3801{
3802	struct fw_info *fw_info;
3803
3804	*dcfg = NULL;
3805	if (fw != NULL)
3806		*fw = NULL;
3807
3808	fw_info = find_fw_info(chip_id(sc));
3809	if (fw_info == NULL) {
3810		device_printf(sc->dev,
3811		    "unable to look up firmware information for chip %d.\n",
3812		    chip_id(sc));
3813		return (EINVAL);
3814	}
3815
3816	*dcfg = firmware_get(fw_info->kld_name);
3817	if (*dcfg != NULL) {
3818		if (fw != NULL)
3819			*fw = firmware_get(fw_info->fw_mod_name);
3820		return (0);
3821	}
3822
3823	return (ENOENT);
3824}
3825
3826static void
3827unload_fw_module(struct adapter *sc, const struct firmware *dcfg,
3828    const struct firmware *fw)
3829{
3830
3831	if (fw != NULL)
3832		firmware_put(fw, FIRMWARE_UNLOAD);
3833	if (dcfg != NULL)
3834		firmware_put(dcfg, FIRMWARE_UNLOAD);
3835}
3836
3837/*
3838 * Return values:
3839 * 0 means no firmware install attempted.
3840 * ERESTART means a firmware install was attempted and was successful.
3841 * +ve errno means a firmware install was attempted but failed.
3842 */
3843static int
3844install_kld_firmware(struct adapter *sc, struct fw_h *card_fw,
3845    const struct fw_h *drv_fw, const char *reason, int *already)
3846{
3847	const struct firmware *cfg, *fw;
3848	const uint32_t c = be32toh(card_fw->fw_ver);
3849	uint32_t d, k;
3850	int rc, fw_install;
3851	struct fw_h bundled_fw;
3852	bool load_attempted;
3853
3854	cfg = fw = NULL;
3855	load_attempted = false;
3856	fw_install = t4_fw_install < 0 ? -t4_fw_install : t4_fw_install;
3857
3858	memcpy(&bundled_fw, drv_fw, sizeof(bundled_fw));
3859	if (t4_fw_install < 0) {
3860		rc = load_fw_module(sc, &cfg, &fw);
3861		if (rc != 0 || fw == NULL) {
3862			device_printf(sc->dev,
3863			    "failed to load firmware module: %d. cfg %p, fw %p;"
3864			    " will use compiled-in firmware version for"
3865			    "hw.cxgbe.fw_install checks.\n",
3866			    rc, cfg, fw);
3867		} else {
3868			memcpy(&bundled_fw, fw->data, sizeof(bundled_fw));
3869		}
3870		load_attempted = true;
3871	}
3872	d = be32toh(bundled_fw.fw_ver);
3873
3874	if (reason != NULL)
3875		goto install;
3876
3877	if ((sc->flags & FW_OK) == 0) {
3878
3879		if (c == 0xffffffff) {
3880			reason = "missing";
3881			goto install;
3882		}
3883
3884		rc = 0;
3885		goto done;
3886	}
3887
3888	if (!fw_compatible(card_fw, &bundled_fw)) {
3889		reason = "incompatible or unusable";
3890		goto install;
3891	}
3892
3893	if (d > c) {
3894		reason = "older than the version bundled with this driver";
3895		goto install;
3896	}
3897
3898	if (fw_install == 2 && d != c) {
3899		reason = "different than the version bundled with this driver";
3900		goto install;
3901	}
3902
3903	/* No reason to do anything to the firmware already on the card. */
3904	rc = 0;
3905	goto done;
3906
3907install:
3908	rc = 0;
3909	if ((*already)++)
3910		goto done;
3911
3912	if (fw_install == 0) {
3913		device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
3914		    "but the driver is prohibited from installing a firmware "
3915		    "on the card.\n",
3916		    G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
3917		    G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason);
3918
3919		goto done;
3920	}
3921
3922	/*
3923	 * We'll attempt to install a firmware.  Load the module first (if it
3924	 * hasn't been loaded already).
3925	 */
3926	if (!load_attempted) {
3927		rc = load_fw_module(sc, &cfg, &fw);
3928		if (rc != 0 || fw == NULL) {
3929			device_printf(sc->dev,
3930			    "failed to load firmware module: %d. cfg %p, fw %p\n",
3931			    rc, cfg, fw);
3932			/* carry on */
3933		}
3934	}
3935	if (fw == NULL) {
3936		device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
3937		    "but the driver cannot take corrective action because it "
3938		    "is unable to load the firmware module.\n",
3939		    G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
3940		    G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason);
3941		rc = sc->flags & FW_OK ? 0 : ENOENT;
3942		goto done;
3943	}
3944	k = be32toh(((const struct fw_hdr *)fw->data)->fw_ver);
3945	if (k != d) {
3946		MPASS(t4_fw_install > 0);
3947		device_printf(sc->dev,
3948		    "firmware in KLD (%u.%u.%u.%u) is not what the driver was "
3949		    "expecting (%u.%u.%u.%u) and will not be used.\n",
3950		    G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k),
3951		    G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k),
3952		    G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d),
3953		    G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d));
3954		rc = sc->flags & FW_OK ? 0 : EINVAL;
3955		goto done;
3956	}
3957
3958	device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
3959	    "installing firmware %u.%u.%u.%u on card.\n",
3960	    G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
3961	    G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason,
3962	    G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d),
3963	    G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d));
3964
3965	rc = -t4_fw_upgrade(sc, sc->mbox, fw->data, fw->datasize, 0);
3966	if (rc != 0) {
3967		device_printf(sc->dev, "failed to install firmware: %d\n", rc);
3968	} else {
3969		/* Installed successfully, update the cached header too. */
3970		rc = ERESTART;
3971		memcpy(card_fw, fw->data, sizeof(*card_fw));
3972	}
3973done:
3974	unload_fw_module(sc, cfg, fw);
3975
3976	return (rc);
3977}
3978
3979/*
3980 * Establish contact with the firmware and attempt to become the master driver.
3981 *
3982 * A firmware will be installed to the card if needed (if the driver is allowed
3983 * to do so).
3984 */
3985static int
3986contact_firmware(struct adapter *sc)
3987{
3988	int rc, already = 0;
3989	enum dev_state state;
3990	struct fw_info *fw_info;
3991	struct fw_hdr *card_fw;		/* fw on the card */
3992	const struct fw_h *drv_fw;
3993
3994	fw_info = find_fw_info(chip_id(sc));
3995	if (fw_info == NULL) {
3996		device_printf(sc->dev,
3997		    "unable to look up firmware information for chip %d.\n",
3998		    chip_id(sc));
3999		return (EINVAL);
4000	}
4001	drv_fw = &fw_info->fw_h;
4002
4003	/* Read the header of the firmware on the card */
4004	card_fw = malloc(sizeof(*card_fw), M_CXGBE, M_ZERO | M_WAITOK);
4005restart:
4006	rc = -t4_get_fw_hdr(sc, card_fw);
4007	if (rc != 0) {
4008		device_printf(sc->dev,
4009		    "unable to read firmware header from card's flash: %d\n",
4010		    rc);
4011		goto done;
4012	}
4013
4014	rc = install_kld_firmware(sc, (struct fw_h *)card_fw, drv_fw, NULL,
4015	    &already);
4016	if (rc == ERESTART)
4017		goto restart;
4018	if (rc != 0)
4019		goto done;
4020
4021	rc = t4_fw_hello(sc, sc->mbox, sc->mbox, MASTER_MAY, &state);
4022	if (rc < 0 || state == DEV_STATE_ERR) {
4023		rc = -rc;
4024		device_printf(sc->dev,
4025		    "failed to connect to the firmware: %d, %d.  "
4026		    "PCIE_FW 0x%08x\n", rc, state, t4_read_reg(sc, A_PCIE_FW));
4027#if 0
4028		if (install_kld_firmware(sc, (struct fw_h *)card_fw, drv_fw,
4029		    "not responding properly to HELLO", &already) == ERESTART)
4030			goto restart;
4031#endif
4032		goto done;
4033	}
4034	MPASS(be32toh(card_fw->flags) & FW_HDR_FLAGS_RESET_HALT);
4035	sc->flags |= FW_OK;	/* The firmware responded to the FW_HELLO. */
4036
4037	if (rc == sc->pf) {
4038		sc->flags |= MASTER_PF;
4039		rc = install_kld_firmware(sc, (struct fw_h *)card_fw, drv_fw,
4040		    NULL, &already);
4041		if (rc == ERESTART)
4042			rc = 0;
4043		else if (rc != 0)
4044			goto done;
4045	} else if (state == DEV_STATE_UNINIT) {
4046		/*
4047		 * We didn't get to be the master so we definitely won't be
4048		 * configuring the chip.  It's a bug if someone else hasn't
4049		 * configured it already.
4050		 */
4051		device_printf(sc->dev, "couldn't be master(%d), "
4052		    "device not already initialized either(%d).  "
4053		    "PCIE_FW 0x%08x\n", rc, state, t4_read_reg(sc, A_PCIE_FW));
4054		rc = EPROTO;
4055		goto done;
4056	} else {
4057		/*
4058		 * Some other PF is the master and has configured the chip.
4059		 * This is allowed but untested.
4060		 */
4061		device_printf(sc->dev, "PF%d is master, device state %d.  "
4062		    "PCIE_FW 0x%08x\n", rc, state, t4_read_reg(sc, A_PCIE_FW));
4063		snprintf(sc->cfg_file, sizeof(sc->cfg_file), "pf%d", rc);
4064		sc->cfcsum = 0;
4065		rc = 0;
4066	}
4067done:
4068	if (rc != 0 && sc->flags & FW_OK) {
4069		t4_fw_bye(sc, sc->mbox);
4070		sc->flags &= ~FW_OK;
4071	}
4072	free(card_fw, M_CXGBE);
4073	return (rc);
4074}
4075
4076static int
4077copy_cfg_file_to_card(struct adapter *sc, char *cfg_file,
4078    uint32_t mtype, uint32_t moff)
4079{
4080	struct fw_info *fw_info;
4081	const struct firmware *dcfg, *rcfg = NULL;
4082	const uint32_t *cfdata;
4083	uint32_t cflen, addr;
4084	int rc;
4085
4086	load_fw_module(sc, &dcfg, NULL);
4087
4088	/* Card specific interpretation of "default". */
4089	if (strncmp(cfg_file, DEFAULT_CF, sizeof(t4_cfg_file)) == 0) {
4090		if (pci_get_device(sc->dev) == 0x440a)
4091			snprintf(cfg_file, sizeof(t4_cfg_file), UWIRE_CF);
4092		if (is_fpga(sc))
4093			snprintf(cfg_file, sizeof(t4_cfg_file), FPGA_CF);
4094	}
4095
4096	if (strncmp(cfg_file, DEFAULT_CF, sizeof(t4_cfg_file)) == 0) {
4097		if (dcfg == NULL) {
4098			device_printf(sc->dev,
4099			    "KLD with default config is not available.\n");
4100			rc = ENOENT;
4101			goto done;
4102		}
4103		cfdata = dcfg->data;
4104		cflen = dcfg->datasize & ~3;
4105	} else {
4106		char s[32];
4107
4108		fw_info = find_fw_info(chip_id(sc));
4109		if (fw_info == NULL) {
4110			device_printf(sc->dev,
4111			    "unable to look up firmware information for chip %d.\n",
4112			    chip_id(sc));
4113			rc = EINVAL;
4114			goto done;
4115		}
4116		snprintf(s, sizeof(s), "%s_%s", fw_info->kld_name, cfg_file);
4117
4118		rcfg = firmware_get(s);
4119		if (rcfg == NULL) {
4120			device_printf(sc->dev,
4121			    "unable to load module \"%s\" for configuration "
4122			    "profile \"%s\".\n", s, cfg_file);
4123			rc = ENOENT;
4124			goto done;
4125		}
4126		cfdata = rcfg->data;
4127		cflen = rcfg->datasize & ~3;
4128	}
4129
4130	if (cflen > FLASH_CFG_MAX_SIZE) {
4131		device_printf(sc->dev,
4132		    "config file too long (%d, max allowed is %d).\n",
4133		    cflen, FLASH_CFG_MAX_SIZE);
4134		rc = EINVAL;
4135		goto done;
4136	}
4137
4138	rc = validate_mt_off_len(sc, mtype, moff, cflen, &addr);
4139	if (rc != 0) {
4140		device_printf(sc->dev,
4141		    "%s: addr (%d/0x%x) or len %d is not valid: %d.\n",
4142		    __func__, mtype, moff, cflen, rc);
4143		rc = EINVAL;
4144		goto done;
4145	}
4146	write_via_memwin(sc, 2, addr, cfdata, cflen);
4147done:
4148	if (rcfg != NULL)
4149		firmware_put(rcfg, FIRMWARE_UNLOAD);
4150	unload_fw_module(sc, dcfg, NULL);
4151	return (rc);
4152}
4153
4154struct caps_allowed {
4155	uint16_t nbmcaps;
4156	uint16_t linkcaps;
4157	uint16_t switchcaps;
4158	uint16_t niccaps;
4159	uint16_t toecaps;
4160	uint16_t rdmacaps;
4161	uint16_t cryptocaps;
4162	uint16_t iscsicaps;
4163	uint16_t fcoecaps;
4164};
4165
4166#define FW_PARAM_DEV(param) \
4167	(V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
4168	 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
4169#define FW_PARAM_PFVF(param) \
4170	(V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
4171	 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param))
4172
4173/*
4174 * Provide a configuration profile to the firmware and have it initialize the
4175 * chip accordingly.  This may involve uploading a configuration file to the
4176 * card.
4177 */
4178static int
4179apply_cfg_and_initialize(struct adapter *sc, char *cfg_file,
4180    const struct caps_allowed *caps_allowed)
4181{
4182	int rc;
4183	struct fw_caps_config_cmd caps;
4184	uint32_t mtype, moff, finicsum, cfcsum, param, val;
4185
4186	rc = -t4_fw_reset(sc, sc->mbox, F_PIORSTMODE | F_PIORST);
4187	if (rc != 0) {
4188		device_printf(sc->dev, "firmware reset failed: %d.\n", rc);
4189		return (rc);
4190	}
4191
4192	bzero(&caps, sizeof(caps));
4193	caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
4194	    F_FW_CMD_REQUEST | F_FW_CMD_READ);
4195	if (strncmp(cfg_file, BUILTIN_CF, sizeof(t4_cfg_file)) == 0) {
4196		mtype = 0;
4197		moff = 0;
4198		caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
4199	} else if (strncmp(cfg_file, FLASH_CF, sizeof(t4_cfg_file)) == 0) {
4200		mtype = FW_MEMTYPE_FLASH;
4201		moff = t4_flash_cfg_addr(sc);
4202		caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID |
4203		    V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
4204		    V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(moff >> 16) |
4205		    FW_LEN16(caps));
4206	} else {
4207		/*
4208		 * Ask the firmware where it wants us to upload the config file.
4209		 */
4210		param = FW_PARAM_DEV(CF);
4211		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
4212		if (rc != 0) {
4213			/* No support for config file?  Shouldn't happen. */
4214			device_printf(sc->dev,
4215			    "failed to query config file location: %d.\n", rc);
4216			goto done;
4217		}
4218		mtype = G_FW_PARAMS_PARAM_Y(val);
4219		moff = G_FW_PARAMS_PARAM_Z(val) << 16;
4220		caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID |
4221		    V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
4222		    V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(moff >> 16) |
4223		    FW_LEN16(caps));
4224
4225		rc = copy_cfg_file_to_card(sc, cfg_file, mtype, moff);
4226		if (rc != 0) {
4227			device_printf(sc->dev,
4228			    "failed to upload config file to card: %d.\n", rc);
4229			goto done;
4230		}
4231	}
4232	rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
4233	if (rc != 0) {
4234		device_printf(sc->dev, "failed to pre-process config file: %d "
4235		    "(mtype %d, moff 0x%x).\n", rc, mtype, moff);
4236		goto done;
4237	}
4238
4239	finicsum = be32toh(caps.finicsum);
4240	cfcsum = be32toh(caps.cfcsum);	/* actual */
4241	if (finicsum != cfcsum) {
4242		device_printf(sc->dev,
4243		    "WARNING: config file checksum mismatch: %08x %08x\n",
4244		    finicsum, cfcsum);
4245	}
4246	sc->cfcsum = cfcsum;
4247	snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", cfg_file);
4248
4249	/*
4250	 * Let the firmware know what features will (not) be used so it can tune
4251	 * things accordingly.
4252	 */
4253#define LIMIT_CAPS(x) do { \
4254	caps.x##caps &= htobe16(caps_allowed->x##caps); \
4255} while (0)
4256	LIMIT_CAPS(nbm);
4257	LIMIT_CAPS(link);
4258	LIMIT_CAPS(switch);
4259	LIMIT_CAPS(nic);
4260	LIMIT_CAPS(toe);
4261	LIMIT_CAPS(rdma);
4262	LIMIT_CAPS(crypto);
4263	LIMIT_CAPS(iscsi);
4264	LIMIT_CAPS(fcoe);
4265#undef LIMIT_CAPS
4266	if (caps.niccaps & htobe16(FW_CAPS_CONFIG_NIC_HASHFILTER)) {
4267		/*
4268		 * TOE and hashfilters are mutually exclusive.  It is a config
4269		 * file or firmware bug if both are reported as available.  Try
4270		 * to cope with the situation in non-debug builds by disabling
4271		 * TOE.
4272		 */
4273		MPASS(caps.toecaps == 0);
4274
4275		caps.toecaps = 0;
4276		caps.rdmacaps = 0;
4277		caps.iscsicaps = 0;
4278	}
4279
4280	caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
4281	    F_FW_CMD_REQUEST | F_FW_CMD_WRITE);
4282	caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
4283	rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), NULL);
4284	if (rc != 0) {
4285		device_printf(sc->dev,
4286		    "failed to process config file: %d.\n", rc);
4287		goto done;
4288	}
4289
4290	t4_tweak_chip_settings(sc);
4291	set_params__pre_init(sc);
4292
4293	/* get basic stuff going */
4294	rc = -t4_fw_initialize(sc, sc->mbox);
4295	if (rc != 0) {
4296		device_printf(sc->dev, "fw_initialize failed: %d.\n", rc);
4297		goto done;
4298	}
4299done:
4300	return (rc);
4301}
4302
4303/*
4304 * Partition chip resources for use between various PFs, VFs, etc.
4305 */
4306static int
4307partition_resources(struct adapter *sc)
4308{
4309	char cfg_file[sizeof(t4_cfg_file)];
4310	struct caps_allowed caps_allowed;
4311	int rc;
4312	bool fallback;
4313
4314	/* Only the master driver gets to configure the chip resources. */
4315	MPASS(sc->flags & MASTER_PF);
4316
4317#define COPY_CAPS(x) do { \
4318	caps_allowed.x##caps = t4_##x##caps_allowed; \
4319} while (0)
4320	bzero(&caps_allowed, sizeof(caps_allowed));
4321	COPY_CAPS(nbm);
4322	COPY_CAPS(link);
4323	COPY_CAPS(switch);
4324	COPY_CAPS(nic);
4325	COPY_CAPS(toe);
4326	COPY_CAPS(rdma);
4327	COPY_CAPS(crypto);
4328	COPY_CAPS(iscsi);
4329	COPY_CAPS(fcoe);
4330	fallback = sc->debug_flags & DF_DISABLE_CFG_RETRY ? false : true;
4331	snprintf(cfg_file, sizeof(cfg_file), "%s", t4_cfg_file);
4332retry:
4333	rc = apply_cfg_and_initialize(sc, cfg_file, &caps_allowed);
4334	if (rc != 0 && fallback) {
4335		device_printf(sc->dev,
4336		    "failed (%d) to configure card with \"%s\" profile, "
4337		    "will fall back to a basic configuration and retry.\n",
4338		    rc, cfg_file);
4339		snprintf(cfg_file, sizeof(cfg_file), "%s", BUILTIN_CF);
4340		bzero(&caps_allowed, sizeof(caps_allowed));
4341		COPY_CAPS(switch);
4342		caps_allowed.niccaps = FW_CAPS_CONFIG_NIC;
4343		fallback = false;
4344		goto retry;
4345	}
4346#undef COPY_CAPS
4347	return (rc);
4348}
4349
4350/*
4351 * Retrieve parameters that are needed (or nice to have) very early.
4352 */
4353static int
4354get_params__pre_init(struct adapter *sc)
4355{
4356	int rc;
4357	uint32_t param[2], val[2];
4358
4359	t4_get_version_info(sc);
4360
4361	snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u",
4362	    G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers),
4363	    G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers),
4364	    G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers),
4365	    G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers));
4366
4367	snprintf(sc->bs_version, sizeof(sc->bs_version), "%u.%u.%u.%u",
4368	    G_FW_HDR_FW_VER_MAJOR(sc->params.bs_vers),
4369	    G_FW_HDR_FW_VER_MINOR(sc->params.bs_vers),
4370	    G_FW_HDR_FW_VER_MICRO(sc->params.bs_vers),
4371	    G_FW_HDR_FW_VER_BUILD(sc->params.bs_vers));
4372
4373	snprintf(sc->tp_version, sizeof(sc->tp_version), "%u.%u.%u.%u",
4374	    G_FW_HDR_FW_VER_MAJOR(sc->params.tp_vers),
4375	    G_FW_HDR_FW_VER_MINOR(sc->params.tp_vers),
4376	    G_FW_HDR_FW_VER_MICRO(sc->params.tp_vers),
4377	    G_FW_HDR_FW_VER_BUILD(sc->params.tp_vers));
4378
4379	snprintf(sc->er_version, sizeof(sc->er_version), "%u.%u.%u.%u",
4380	    G_FW_HDR_FW_VER_MAJOR(sc->params.er_vers),
4381	    G_FW_HDR_FW_VER_MINOR(sc->params.er_vers),
4382	    G_FW_HDR_FW_VER_MICRO(sc->params.er_vers),
4383	    G_FW_HDR_FW_VER_BUILD(sc->params.er_vers));
4384
4385	param[0] = FW_PARAM_DEV(PORTVEC);
4386	param[1] = FW_PARAM_DEV(CCLK);
4387	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4388	if (rc != 0) {
4389		device_printf(sc->dev,
4390		    "failed to query parameters (pre_init): %d.\n", rc);
4391		return (rc);
4392	}
4393
4394	sc->params.portvec = val[0];
4395	sc->params.nports = bitcount32(val[0]);
4396	sc->params.vpd.cclk = val[1];
4397
4398	/* Read device log parameters. */
4399	rc = -t4_init_devlog_params(sc, 1);
4400	if (rc == 0)
4401		fixup_devlog_params(sc);
4402	else {
4403		device_printf(sc->dev,
4404		    "failed to get devlog parameters: %d.\n", rc);
4405		rc = 0;	/* devlog isn't critical for device operation */
4406	}
4407
4408	return (rc);
4409}
4410
4411/*
4412 * Any params that need to be set before FW_INITIALIZE.
4413 */
4414static int
4415set_params__pre_init(struct adapter *sc)
4416{
4417	int rc = 0;
4418	uint32_t param, val;
4419
4420	if (chip_id(sc) >= CHELSIO_T6) {
4421		param = FW_PARAM_DEV(HPFILTER_REGION_SUPPORT);
4422		val = 1;
4423		rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
4424		/* firmwares < 1.20.1.0 do not have this param. */
4425		if (rc == FW_EINVAL &&
4426		    sc->params.fw_vers < FW_VERSION32(1, 20, 1, 0)) {
4427			rc = 0;
4428		}
4429		if (rc != 0) {
4430			device_printf(sc->dev,
4431			    "failed to enable high priority filters :%d.\n",
4432			    rc);
4433		}
4434	}
4435
4436	/* Enable opaque VIIDs with firmwares that support it. */
4437	param = FW_PARAM_DEV(OPAQUE_VIID_SMT_EXTN);
4438	val = 1;
4439	rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
4440	if (rc == 0 && val == 1)
4441		sc->params.viid_smt_extn_support = true;
4442	else
4443		sc->params.viid_smt_extn_support = false;
4444
4445	return (rc);
4446}
4447
4448/*
4449 * Retrieve various parameters that are of interest to the driver.  The device
4450 * has been initialized by the firmware at this point.
4451 */
4452static int
4453get_params__post_init(struct adapter *sc)
4454{
4455	int rc;
4456	uint32_t param[7], val[7];
4457	struct fw_caps_config_cmd caps;
4458
4459	param[0] = FW_PARAM_PFVF(IQFLINT_START);
4460	param[1] = FW_PARAM_PFVF(EQ_START);
4461	param[2] = FW_PARAM_PFVF(FILTER_START);
4462	param[3] = FW_PARAM_PFVF(FILTER_END);
4463	param[4] = FW_PARAM_PFVF(L2T_START);
4464	param[5] = FW_PARAM_PFVF(L2T_END);
4465	param[6] = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
4466	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
4467	    V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_VDD);
4468	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 7, param, val);
4469	if (rc != 0) {
4470		device_printf(sc->dev,
4471		    "failed to query parameters (post_init): %d.\n", rc);
4472		return (rc);
4473	}
4474
4475	sc->sge.iq_start = val[0];
4476	sc->sge.eq_start = val[1];
4477	if ((int)val[3] > (int)val[2]) {
4478		sc->tids.ftid_base = val[2];
4479		sc->tids.ftid_end = val[3];
4480		sc->tids.nftids = val[3] - val[2] + 1;
4481	}
4482	sc->vres.l2t.start = val[4];
4483	sc->vres.l2t.size = val[5] - val[4] + 1;
4484	KASSERT(sc->vres.l2t.size <= L2T_SIZE,
4485	    ("%s: L2 table size (%u) larger than expected (%u)",
4486	    __func__, sc->vres.l2t.size, L2T_SIZE));
4487	sc->params.core_vdd = val[6];
4488
4489	param[0] = FW_PARAM_PFVF(IQFLINT_END);
4490	param[1] = FW_PARAM_PFVF(EQ_END);
4491	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4492	if (rc != 0) {
4493		device_printf(sc->dev,
4494		    "failed to query parameters (post_init2): %d.\n", rc);
4495		return (rc);
4496	}
4497	MPASS((int)val[0] >= sc->sge.iq_start);
4498	sc->sge.iqmap_sz = val[0] - sc->sge.iq_start + 1;
4499	MPASS((int)val[1] >= sc->sge.eq_start);
4500	sc->sge.eqmap_sz = val[1] - sc->sge.eq_start + 1;
4501
4502	if (chip_id(sc) >= CHELSIO_T6) {
4503
4504		sc->tids.tid_base = t4_read_reg(sc,
4505		    A_LE_DB_ACTIVE_TABLE_START_INDEX);
4506
4507		param[0] = FW_PARAM_PFVF(HPFILTER_START);
4508		param[1] = FW_PARAM_PFVF(HPFILTER_END);
4509		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4510		if (rc != 0) {
4511			device_printf(sc->dev,
4512			   "failed to query hpfilter parameters: %d.\n", rc);
4513			return (rc);
4514		}
4515		if ((int)val[1] > (int)val[0]) {
4516			sc->tids.hpftid_base = val[0];
4517			sc->tids.hpftid_end = val[1];
4518			sc->tids.nhpftids = val[1] - val[0] + 1;
4519
4520			/*
4521			 * These should go off if the layout changes and the
4522			 * driver needs to catch up.
4523			 */
4524			MPASS(sc->tids.hpftid_base == 0);
4525			MPASS(sc->tids.tid_base == sc->tids.nhpftids);
4526		}
4527
4528		param[0] = FW_PARAM_PFVF(RAWF_START);
4529		param[1] = FW_PARAM_PFVF(RAWF_END);
4530		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4531		if (rc != 0) {
4532			device_printf(sc->dev,
4533			   "failed to query rawf parameters: %d.\n", rc);
4534			return (rc);
4535		}
4536		if ((int)val[1] > (int)val[0]) {
4537			sc->rawf_base = val[0];
4538			sc->nrawf = val[1] - val[0] + 1;
4539		}
4540	}
4541
4542	/*
4543	 * MPSBGMAP is queried separately because only recent firmwares support
4544	 * it as a parameter and we don't want the compound query above to fail
4545	 * on older firmwares.
4546	 */
4547	param[0] = FW_PARAM_DEV(MPSBGMAP);
4548	val[0] = 0;
4549	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4550	if (rc == 0)
4551		sc->params.mps_bg_map = val[0];
4552	else
4553		sc->params.mps_bg_map = 0;
4554
4555	/*
4556	 * Determine whether the firmware supports the filter2 work request.
4557	 * This is queried separately for the same reason as MPSBGMAP above.
4558	 */
4559	param[0] = FW_PARAM_DEV(FILTER2_WR);
4560	val[0] = 0;
4561	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4562	if (rc == 0)
4563		sc->params.filter2_wr_support = val[0] != 0;
4564	else
4565		sc->params.filter2_wr_support = 0;
4566
4567	/*
4568	 * Find out whether we're allowed to use the ULPTX MEMWRITE DSGL.
4569	 * This is queried separately for the same reason as other params above.
4570	 */
4571	param[0] = FW_PARAM_DEV(ULPTX_MEMWRITE_DSGL);
4572	val[0] = 0;
4573	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4574	if (rc == 0)
4575		sc->params.ulptx_memwrite_dsgl = val[0] != 0;
4576	else
4577		sc->params.ulptx_memwrite_dsgl = false;
4578
4579	/* FW_RI_FR_NSMR_TPTE_WR support */
4580	param[0] = FW_PARAM_DEV(RI_FR_NSMR_TPTE_WR);
4581	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4582	if (rc == 0)
4583		sc->params.fr_nsmr_tpte_wr_support = val[0] != 0;
4584	else
4585		sc->params.fr_nsmr_tpte_wr_support = false;
4586
4587	param[0] = FW_PARAM_PFVF(MAX_PKTS_PER_ETH_TX_PKTS_WR);
4588	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4589	if (rc == 0)
4590		sc->params.max_pkts_per_eth_tx_pkts_wr = val[0];
4591	else
4592		sc->params.max_pkts_per_eth_tx_pkts_wr = 15;
4593
4594	/* get capabilites */
4595	bzero(&caps, sizeof(caps));
4596	caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
4597	    F_FW_CMD_REQUEST | F_FW_CMD_READ);
4598	caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
4599	rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
4600	if (rc != 0) {
4601		device_printf(sc->dev,
4602		    "failed to get card capabilities: %d.\n", rc);
4603		return (rc);
4604	}
4605
4606#define READ_CAPS(x) do { \
4607	sc->x = htobe16(caps.x); \
4608} while (0)
4609	READ_CAPS(nbmcaps);
4610	READ_CAPS(linkcaps);
4611	READ_CAPS(switchcaps);
4612	READ_CAPS(niccaps);
4613	READ_CAPS(toecaps);
4614	READ_CAPS(rdmacaps);
4615	READ_CAPS(cryptocaps);
4616	READ_CAPS(iscsicaps);
4617	READ_CAPS(fcoecaps);
4618
4619	if (sc->niccaps & FW_CAPS_CONFIG_NIC_HASHFILTER) {
4620		MPASS(chip_id(sc) > CHELSIO_T4);
4621		MPASS(sc->toecaps == 0);
4622		sc->toecaps = 0;
4623
4624		param[0] = FW_PARAM_DEV(NTID);
4625		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4626		if (rc != 0) {
4627			device_printf(sc->dev,
4628			    "failed to query HASHFILTER parameters: %d.\n", rc);
4629			return (rc);
4630		}
4631		sc->tids.ntids = val[0];
4632		if (sc->params.fw_vers < FW_VERSION32(1, 20, 5, 0)) {
4633			MPASS(sc->tids.ntids >= sc->tids.nhpftids);
4634			sc->tids.ntids -= sc->tids.nhpftids;
4635		}
4636		sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS);
4637		sc->params.hash_filter = 1;
4638	}
4639	if (sc->niccaps & FW_CAPS_CONFIG_NIC_ETHOFLD) {
4640		param[0] = FW_PARAM_PFVF(ETHOFLD_START);
4641		param[1] = FW_PARAM_PFVF(ETHOFLD_END);
4642		param[2] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
4643		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 3, param, val);
4644		if (rc != 0) {
4645			device_printf(sc->dev,
4646			    "failed to query NIC parameters: %d.\n", rc);
4647			return (rc);
4648		}
4649		if ((int)val[1] > (int)val[0]) {
4650			sc->tids.etid_base = val[0];
4651			sc->tids.etid_end = val[1];
4652			sc->tids.netids = val[1] - val[0] + 1;
4653			sc->params.eo_wr_cred = val[2];
4654			sc->params.ethoffload = 1;
4655		}
4656	}
4657	if (sc->toecaps) {
4658		/* query offload-related parameters */
4659		param[0] = FW_PARAM_DEV(NTID);
4660		param[1] = FW_PARAM_PFVF(SERVER_START);
4661		param[2] = FW_PARAM_PFVF(SERVER_END);
4662		param[3] = FW_PARAM_PFVF(TDDP_START);
4663		param[4] = FW_PARAM_PFVF(TDDP_END);
4664		param[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
4665		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
4666		if (rc != 0) {
4667			device_printf(sc->dev,
4668			    "failed to query TOE parameters: %d.\n", rc);
4669			return (rc);
4670		}
4671		sc->tids.ntids = val[0];
4672		if (sc->params.fw_vers < FW_VERSION32(1, 20, 5, 0)) {
4673			MPASS(sc->tids.ntids >= sc->tids.nhpftids);
4674			sc->tids.ntids -= sc->tids.nhpftids;
4675		}
4676		sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS);
4677		if ((int)val[2] > (int)val[1]) {
4678			sc->tids.stid_base = val[1];
4679			sc->tids.nstids = val[2] - val[1] + 1;
4680		}
4681		sc->vres.ddp.start = val[3];
4682		sc->vres.ddp.size = val[4] - val[3] + 1;
4683		sc->params.ofldq_wr_cred = val[5];
4684		sc->params.offload = 1;
4685	} else {
4686		/*
4687		 * The firmware attempts memfree TOE configuration for -SO cards
4688		 * and will report toecaps=0 if it runs out of resources (this
4689		 * depends on the config file).  It may not report 0 for other
4690		 * capabilities dependent on the TOE in this case.  Set them to
4691		 * 0 here so that the driver doesn't bother tracking resources
4692		 * that will never be used.
4693		 */
4694		sc->iscsicaps = 0;
4695		sc->rdmacaps = 0;
4696	}
4697	if (sc->rdmacaps) {
4698		param[0] = FW_PARAM_PFVF(STAG_START);
4699		param[1] = FW_PARAM_PFVF(STAG_END);
4700		param[2] = FW_PARAM_PFVF(RQ_START);
4701		param[3] = FW_PARAM_PFVF(RQ_END);
4702		param[4] = FW_PARAM_PFVF(PBL_START);
4703		param[5] = FW_PARAM_PFVF(PBL_END);
4704		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
4705		if (rc != 0) {
4706			device_printf(sc->dev,
4707			    "failed to query RDMA parameters(1): %d.\n", rc);
4708			return (rc);
4709		}
4710		sc->vres.stag.start = val[0];
4711		sc->vres.stag.size = val[1] - val[0] + 1;
4712		sc->vres.rq.start = val[2];
4713		sc->vres.rq.size = val[3] - val[2] + 1;
4714		sc->vres.pbl.start = val[4];
4715		sc->vres.pbl.size = val[5] - val[4] + 1;
4716
4717		param[0] = FW_PARAM_PFVF(SQRQ_START);
4718		param[1] = FW_PARAM_PFVF(SQRQ_END);
4719		param[2] = FW_PARAM_PFVF(CQ_START);
4720		param[3] = FW_PARAM_PFVF(CQ_END);
4721		param[4] = FW_PARAM_PFVF(OCQ_START);
4722		param[5] = FW_PARAM_PFVF(OCQ_END);
4723		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
4724		if (rc != 0) {
4725			device_printf(sc->dev,
4726			    "failed to query RDMA parameters(2): %d.\n", rc);
4727			return (rc);
4728		}
4729		sc->vres.qp.start = val[0];
4730		sc->vres.qp.size = val[1] - val[0] + 1;
4731		sc->vres.cq.start = val[2];
4732		sc->vres.cq.size = val[3] - val[2] + 1;
4733		sc->vres.ocq.start = val[4];
4734		sc->vres.ocq.size = val[5] - val[4] + 1;
4735
4736		param[0] = FW_PARAM_PFVF(SRQ_START);
4737		param[1] = FW_PARAM_PFVF(SRQ_END);
4738		param[2] = FW_PARAM_DEV(MAXORDIRD_QP);
4739		param[3] = FW_PARAM_DEV(MAXIRD_ADAPTER);
4740		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 4, param, val);
4741		if (rc != 0) {
4742			device_printf(sc->dev,
4743			    "failed to query RDMA parameters(3): %d.\n", rc);
4744			return (rc);
4745		}
4746		sc->vres.srq.start = val[0];
4747		sc->vres.srq.size = val[1] - val[0] + 1;
4748		sc->params.max_ordird_qp = val[2];
4749		sc->params.max_ird_adapter = val[3];
4750	}
4751	if (sc->iscsicaps) {
4752		param[0] = FW_PARAM_PFVF(ISCSI_START);
4753		param[1] = FW_PARAM_PFVF(ISCSI_END);
4754		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4755		if (rc != 0) {
4756			device_printf(sc->dev,
4757			    "failed to query iSCSI parameters: %d.\n", rc);
4758			return (rc);
4759		}
4760		sc->vres.iscsi.start = val[0];
4761		sc->vres.iscsi.size = val[1] - val[0] + 1;
4762	}
4763	if (sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS) {
4764		param[0] = FW_PARAM_PFVF(TLS_START);
4765		param[1] = FW_PARAM_PFVF(TLS_END);
4766		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4767		if (rc != 0) {
4768			device_printf(sc->dev,
4769			    "failed to query TLS parameters: %d.\n", rc);
4770			return (rc);
4771		}
4772		sc->vres.key.start = val[0];
4773		sc->vres.key.size = val[1] - val[0] + 1;
4774	}
4775
4776	/*
4777	 * We've got the params we wanted to query directly from the firmware.
4778	 * Grab some others via other means.
4779	 */
4780	t4_init_sge_params(sc);
4781	t4_init_tp_params(sc);
4782	t4_read_mtu_tbl(sc, sc->params.mtus, NULL);
4783	t4_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd);
4784
4785	rc = t4_verify_chip_settings(sc);
4786	if (rc != 0)
4787		return (rc);
4788	t4_init_rx_buf_info(sc);
4789
4790	return (rc);
4791}
4792
4793#ifdef KERN_TLS
4794static void
4795ktls_tick(void *arg)
4796{
4797	struct adapter *sc;
4798	uint32_t tstamp;
4799
4800	sc = arg;
4801	if (sc->flags & KERN_TLS_ON) {
4802		tstamp = tcp_ts_getticks();
4803		t4_write_reg(sc, A_TP_SYNC_TIME_HI, tstamp >> 1);
4804		t4_write_reg(sc, A_TP_SYNC_TIME_LO, tstamp << 31);
4805	}
4806	callout_schedule_sbt(&sc->ktls_tick, SBT_1MS, 0, C_HARDCLOCK);
4807}
4808
4809static int
4810t4_config_kern_tls(struct adapter *sc, bool enable)
4811{
4812	int rc;
4813	uint32_t param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
4814	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_KTLS_HW) |
4815	    V_FW_PARAMS_PARAM_Y(enable ? 1 : 0) |
4816	    V_FW_PARAMS_PARAM_Z(FW_PARAMS_PARAM_DEV_KTLS_HW_USER_ENABLE);
4817
4818	rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &param);
4819	if (rc != 0) {
4820		CH_ERR(sc, "failed to %s NIC TLS: %d\n",
4821		    enable ?  "enable" : "disable", rc);
4822		return (rc);
4823	}
4824
4825	if (enable)
4826		sc->flags |= KERN_TLS_ON;
4827	else
4828		sc->flags &= ~KERN_TLS_ON;
4829
4830	return (rc);
4831}
4832#endif
4833
4834static int
4835set_params__post_init(struct adapter *sc)
4836{
4837	uint32_t mask, param, val;
4838#ifdef TCP_OFFLOAD
4839	int i, v, shift;
4840#endif
4841
4842	/* ask for encapsulated CPLs */
4843	param = FW_PARAM_PFVF(CPLFW4MSG_ENCAP);
4844	val = 1;
4845	(void)t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
4846
4847	/* Enable 32b port caps if the firmware supports it. */
4848	param = FW_PARAM_PFVF(PORT_CAPS32);
4849	val = 1;
4850	if (t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val) == 0)
4851		sc->params.port_caps32 = 1;
4852
4853	/* Let filter + maskhash steer to a part of the VI's RSS region. */
4854	val = 1 << (G_MASKSIZE(t4_read_reg(sc, A_TP_RSS_CONFIG_TNL)) - 1);
4855	t4_set_reg_field(sc, A_TP_RSS_CONFIG_TNL, V_MASKFILTER(M_MASKFILTER),
4856	    V_MASKFILTER(val - 1));
4857
4858	mask = F_DROPERRORANY | F_DROPERRORMAC | F_DROPERRORIPVER |
4859	    F_DROPERRORFRAG | F_DROPERRORATTACK | F_DROPERRORETHHDRLEN |
4860	    F_DROPERRORIPHDRLEN | F_DROPERRORTCPHDRLEN | F_DROPERRORPKTLEN |
4861	    F_DROPERRORTCPOPT | F_DROPERRORCSUMIP | F_DROPERRORCSUM;
4862	val = 0;
4863	if (chip_id(sc) < CHELSIO_T6 && t4_attack_filter != 0) {
4864		t4_set_reg_field(sc, A_TP_GLOBAL_CONFIG, F_ATTACKFILTERENABLE,
4865		    F_ATTACKFILTERENABLE);
4866		val |= F_DROPERRORATTACK;
4867	}
4868	if (t4_drop_ip_fragments != 0) {
4869		t4_set_reg_field(sc, A_TP_GLOBAL_CONFIG, F_FRAGMENTDROP,
4870		    F_FRAGMENTDROP);
4871		val |= F_DROPERRORFRAG;
4872	}
4873	if (t4_drop_pkts_with_l2_errors != 0)
4874		val |= F_DROPERRORMAC | F_DROPERRORETHHDRLEN;
4875	if (t4_drop_pkts_with_l3_errors != 0) {
4876		val |= F_DROPERRORIPVER | F_DROPERRORIPHDRLEN |
4877		    F_DROPERRORCSUMIP;
4878	}
4879	if (t4_drop_pkts_with_l4_errors != 0) {
4880		val |= F_DROPERRORTCPHDRLEN | F_DROPERRORPKTLEN |
4881		    F_DROPERRORTCPOPT | F_DROPERRORCSUM;
4882	}
4883	t4_set_reg_field(sc, A_TP_ERR_CONFIG, mask, val);
4884
4885#ifdef TCP_OFFLOAD
4886	/*
4887	 * Override the TOE timers with user provided tunables.  This is not the
4888	 * recommended way to change the timers (the firmware config file is) so
4889	 * these tunables are not documented.
4890	 *
4891	 * All the timer tunables are in microseconds.
4892	 */
4893	if (t4_toe_keepalive_idle != 0) {
4894		v = us_to_tcp_ticks(sc, t4_toe_keepalive_idle);
4895		v &= M_KEEPALIVEIDLE;
4896		t4_set_reg_field(sc, A_TP_KEEP_IDLE,
4897		    V_KEEPALIVEIDLE(M_KEEPALIVEIDLE), V_KEEPALIVEIDLE(v));
4898	}
4899	if (t4_toe_keepalive_interval != 0) {
4900		v = us_to_tcp_ticks(sc, t4_toe_keepalive_interval);
4901		v &= M_KEEPALIVEINTVL;
4902		t4_set_reg_field(sc, A_TP_KEEP_INTVL,
4903		    V_KEEPALIVEINTVL(M_KEEPALIVEINTVL), V_KEEPALIVEINTVL(v));
4904	}
4905	if (t4_toe_keepalive_count != 0) {
4906		v = t4_toe_keepalive_count & M_KEEPALIVEMAXR2;
4907		t4_set_reg_field(sc, A_TP_SHIFT_CNT,
4908		    V_KEEPALIVEMAXR1(M_KEEPALIVEMAXR1) |
4909		    V_KEEPALIVEMAXR2(M_KEEPALIVEMAXR2),
4910		    V_KEEPALIVEMAXR1(1) | V_KEEPALIVEMAXR2(v));
4911	}
4912	if (t4_toe_rexmt_min != 0) {
4913		v = us_to_tcp_ticks(sc, t4_toe_rexmt_min);
4914		v &= M_RXTMIN;
4915		t4_set_reg_field(sc, A_TP_RXT_MIN,
4916		    V_RXTMIN(M_RXTMIN), V_RXTMIN(v));
4917	}
4918	if (t4_toe_rexmt_max != 0) {
4919		v = us_to_tcp_ticks(sc, t4_toe_rexmt_max);
4920		v &= M_RXTMAX;
4921		t4_set_reg_field(sc, A_TP_RXT_MAX,
4922		    V_RXTMAX(M_RXTMAX), V_RXTMAX(v));
4923	}
4924	if (t4_toe_rexmt_count != 0) {
4925		v = t4_toe_rexmt_count & M_RXTSHIFTMAXR2;
4926		t4_set_reg_field(sc, A_TP_SHIFT_CNT,
4927		    V_RXTSHIFTMAXR1(M_RXTSHIFTMAXR1) |
4928		    V_RXTSHIFTMAXR2(M_RXTSHIFTMAXR2),
4929		    V_RXTSHIFTMAXR1(1) | V_RXTSHIFTMAXR2(v));
4930	}
4931	for (i = 0; i < nitems(t4_toe_rexmt_backoff); i++) {
4932		if (t4_toe_rexmt_backoff[i] != -1) {
4933			v = t4_toe_rexmt_backoff[i] & M_TIMERBACKOFFINDEX0;
4934			shift = (i & 3) << 3;
4935			t4_set_reg_field(sc, A_TP_TCP_BACKOFF_REG0 + (i & ~3),
4936			    M_TIMERBACKOFFINDEX0 << shift, v << shift);
4937		}
4938	}
4939#endif
4940
4941#ifdef KERN_TLS
4942	if (sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS &&
4943	    sc->toecaps & FW_CAPS_CONFIG_TOE) {
4944		/*
4945		 * Limit TOE connections to 2 reassembly "islands".  This is
4946		 * required for TOE TLS connections to downgrade to plain TOE
4947		 * connections if an unsupported TLS version or ciphersuite is
4948		 * used.
4949		 */
4950		t4_tp_wr_bits_indirect(sc, A_TP_FRAG_CONFIG,
4951		    V_PASSMODE(M_PASSMODE), V_PASSMODE(2));
4952		if (is_ktls(sc)) {
4953			sc->tlst.inline_keys = t4_tls_inline_keys;
4954			sc->tlst.combo_wrs = t4_tls_combo_wrs;
4955			if (t4_kern_tls != 0)
4956				t4_config_kern_tls(sc, true);
4957		}
4958	}
4959#endif
4960	return (0);
4961}
4962
4963#undef FW_PARAM_PFVF
4964#undef FW_PARAM_DEV
4965
4966static void
4967t4_set_desc(struct adapter *sc)
4968{
4969	char buf[128];
4970	struct adapter_params *p = &sc->params;
4971
4972	snprintf(buf, sizeof(buf), "Chelsio %s", p->vpd.id);
4973
4974	device_set_desc_copy(sc->dev, buf);
4975}
4976
4977static inline void
4978ifmedia_add4(struct ifmedia *ifm, int m)
4979{
4980
4981	ifmedia_add(ifm, m, 0, NULL);
4982	ifmedia_add(ifm, m | IFM_ETH_TXPAUSE, 0, NULL);
4983	ifmedia_add(ifm, m | IFM_ETH_RXPAUSE, 0, NULL);
4984	ifmedia_add(ifm, m | IFM_ETH_TXPAUSE | IFM_ETH_RXPAUSE, 0, NULL);
4985}
4986
4987/*
4988 * This is the selected media, which is not quite the same as the active media.
4989 * The media line in ifconfig is "media: Ethernet selected (active)" if selected
4990 * and active are not the same, and "media: Ethernet selected" otherwise.
4991 */
4992static void
4993set_current_media(struct port_info *pi)
4994{
4995	struct link_config *lc;
4996	struct ifmedia *ifm;
4997	int mword;
4998	u_int speed;
4999
5000	PORT_LOCK_ASSERT_OWNED(pi);
5001
5002	/* Leave current media alone if it's already set to IFM_NONE. */
5003	ifm = &pi->media;
5004	if (ifm->ifm_cur != NULL &&
5005	    IFM_SUBTYPE(ifm->ifm_cur->ifm_media) == IFM_NONE)
5006		return;
5007
5008	lc = &pi->link_cfg;
5009	if (lc->requested_aneg != AUTONEG_DISABLE &&
5010	    lc->pcaps & FW_PORT_CAP32_ANEG) {
5011		ifmedia_set(ifm, IFM_ETHER | IFM_AUTO);
5012		return;
5013	}
5014	mword = IFM_ETHER | IFM_FDX;
5015	if (lc->requested_fc & PAUSE_TX)
5016		mword |= IFM_ETH_TXPAUSE;
5017	if (lc->requested_fc & PAUSE_RX)
5018		mword |= IFM_ETH_RXPAUSE;
5019	if (lc->requested_speed == 0)
5020		speed = port_top_speed(pi) * 1000;	/* Gbps -> Mbps */
5021	else
5022		speed = lc->requested_speed;
5023	mword |= port_mword(pi, speed_to_fwcap(speed));
5024	ifmedia_set(ifm, mword);
5025}
5026
5027/*
5028 * Returns true if the ifmedia list for the port cannot change.
5029 */
5030static bool
5031fixed_ifmedia(struct port_info *pi)
5032{
5033
5034	return (pi->port_type == FW_PORT_TYPE_BT_SGMII ||
5035	    pi->port_type == FW_PORT_TYPE_BT_XFI ||
5036	    pi->port_type == FW_PORT_TYPE_BT_XAUI ||
5037	    pi->port_type == FW_PORT_TYPE_KX4 ||
5038	    pi->port_type == FW_PORT_TYPE_KX ||
5039	    pi->port_type == FW_PORT_TYPE_KR ||
5040	    pi->port_type == FW_PORT_TYPE_BP_AP ||
5041	    pi->port_type == FW_PORT_TYPE_BP4_AP ||
5042	    pi->port_type == FW_PORT_TYPE_BP40_BA ||
5043	    pi->port_type == FW_PORT_TYPE_KR4_100G ||
5044	    pi->port_type == FW_PORT_TYPE_KR_SFP28 ||
5045	    pi->port_type == FW_PORT_TYPE_KR_XLAUI);
5046}
5047
5048static void
5049build_medialist(struct port_info *pi)
5050{
5051	uint32_t ss, speed;
5052	int unknown, mword, bit;
5053	struct link_config *lc;
5054	struct ifmedia *ifm;
5055
5056	PORT_LOCK_ASSERT_OWNED(pi);
5057
5058	if (pi->flags & FIXED_IFMEDIA)
5059		return;
5060
5061	/*
5062	 * Rebuild the ifmedia list.
5063	 */
5064	ifm = &pi->media;
5065	ifmedia_removeall(ifm);
5066	lc = &pi->link_cfg;
5067	ss = G_FW_PORT_CAP32_SPEED(lc->pcaps); /* Supported Speeds */
5068	if (__predict_false(ss == 0)) {	/* not supposed to happen. */
5069		MPASS(ss != 0);
5070no_media:
5071		MPASS(LIST_EMPTY(&ifm->ifm_list));
5072		ifmedia_add(ifm, IFM_ETHER | IFM_NONE, 0, NULL);
5073		ifmedia_set(ifm, IFM_ETHER | IFM_NONE);
5074		return;
5075	}
5076
5077	unknown = 0;
5078	for (bit = S_FW_PORT_CAP32_SPEED; bit < fls(ss); bit++) {
5079		speed = 1 << bit;
5080		MPASS(speed & M_FW_PORT_CAP32_SPEED);
5081		if (ss & speed) {
5082			mword = port_mword(pi, speed);
5083			if (mword == IFM_NONE) {
5084				goto no_media;
5085			} else if (mword == IFM_UNKNOWN)
5086				unknown++;
5087			else
5088				ifmedia_add4(ifm, IFM_ETHER | IFM_FDX | mword);
5089		}
5090	}
5091	if (unknown > 0) /* Add one unknown for all unknown media types. */
5092		ifmedia_add4(ifm, IFM_ETHER | IFM_FDX | IFM_UNKNOWN);
5093	if (lc->pcaps & FW_PORT_CAP32_ANEG)
5094		ifmedia_add(ifm, IFM_ETHER | IFM_AUTO, 0, NULL);
5095
5096	set_current_media(pi);
5097}
5098
5099/*
5100 * Initialize the requested fields in the link config based on driver tunables.
5101 */
5102static void
5103init_link_config(struct port_info *pi)
5104{
5105	struct link_config *lc = &pi->link_cfg;
5106
5107	PORT_LOCK_ASSERT_OWNED(pi);
5108
5109	lc->requested_speed = 0;
5110
5111	if (t4_autoneg == 0)
5112		lc->requested_aneg = AUTONEG_DISABLE;
5113	else if (t4_autoneg == 1)
5114		lc->requested_aneg = AUTONEG_ENABLE;
5115	else
5116		lc->requested_aneg = AUTONEG_AUTO;
5117
5118	lc->requested_fc = t4_pause_settings & (PAUSE_TX | PAUSE_RX |
5119	    PAUSE_AUTONEG);
5120
5121	if (t4_fec & FEC_AUTO)
5122		lc->requested_fec = FEC_AUTO;
5123	else if (t4_fec == 0)
5124		lc->requested_fec = FEC_NONE;
5125	else {
5126		/* -1 is handled by the FEC_AUTO block above and not here. */
5127		lc->requested_fec = t4_fec &
5128		    (FEC_RS | FEC_BASER_RS | FEC_NONE | FEC_MODULE);
5129		if (lc->requested_fec == 0)
5130			lc->requested_fec = FEC_AUTO;
5131	}
5132}
5133
5134/*
5135 * Makes sure that all requested settings comply with what's supported by the
5136 * port.  Returns the number of settings that were invalid and had to be fixed.
5137 */
5138static int
5139fixup_link_config(struct port_info *pi)
5140{
5141	int n = 0;
5142	struct link_config *lc = &pi->link_cfg;
5143	uint32_t fwspeed;
5144
5145	PORT_LOCK_ASSERT_OWNED(pi);
5146
5147	/* Speed (when not autonegotiating) */
5148	if (lc->requested_speed != 0) {
5149		fwspeed = speed_to_fwcap(lc->requested_speed);
5150		if ((fwspeed & lc->pcaps) == 0) {
5151			n++;
5152			lc->requested_speed = 0;
5153		}
5154	}
5155
5156	/* Link autonegotiation */
5157	MPASS(lc->requested_aneg == AUTONEG_ENABLE ||
5158	    lc->requested_aneg == AUTONEG_DISABLE ||
5159	    lc->requested_aneg == AUTONEG_AUTO);
5160	if (lc->requested_aneg == AUTONEG_ENABLE &&
5161	    !(lc->pcaps & FW_PORT_CAP32_ANEG)) {
5162		n++;
5163		lc->requested_aneg = AUTONEG_AUTO;
5164	}
5165
5166	/* Flow control */
5167	MPASS((lc->requested_fc & ~(PAUSE_TX | PAUSE_RX | PAUSE_AUTONEG)) == 0);
5168	if (lc->requested_fc & PAUSE_TX &&
5169	    !(lc->pcaps & FW_PORT_CAP32_FC_TX)) {
5170		n++;
5171		lc->requested_fc &= ~PAUSE_TX;
5172	}
5173	if (lc->requested_fc & PAUSE_RX &&
5174	    !(lc->pcaps & FW_PORT_CAP32_FC_RX)) {
5175		n++;
5176		lc->requested_fc &= ~PAUSE_RX;
5177	}
5178	if (!(lc->requested_fc & PAUSE_AUTONEG) &&
5179	    !(lc->pcaps & FW_PORT_CAP32_FORCE_PAUSE)) {
5180		n++;
5181		lc->requested_fc |= PAUSE_AUTONEG;
5182	}
5183
5184	/* FEC */
5185	if ((lc->requested_fec & FEC_RS &&
5186	    !(lc->pcaps & FW_PORT_CAP32_FEC_RS)) ||
5187	    (lc->requested_fec & FEC_BASER_RS &&
5188	    !(lc->pcaps & FW_PORT_CAP32_FEC_BASER_RS))) {
5189		n++;
5190		lc->requested_fec = FEC_AUTO;
5191	}
5192
5193	return (n);
5194}
5195
5196/*
5197 * Apply the requested L1 settings, which are expected to be valid, to the
5198 * hardware.
5199 */
5200static int
5201apply_link_config(struct port_info *pi)
5202{
5203	struct adapter *sc = pi->adapter;
5204	struct link_config *lc = &pi->link_cfg;
5205	int rc;
5206
5207#ifdef INVARIANTS
5208	ASSERT_SYNCHRONIZED_OP(sc);
5209	PORT_LOCK_ASSERT_OWNED(pi);
5210
5211	if (lc->requested_aneg == AUTONEG_ENABLE)
5212		MPASS(lc->pcaps & FW_PORT_CAP32_ANEG);
5213	if (!(lc->requested_fc & PAUSE_AUTONEG))
5214		MPASS(lc->pcaps & FW_PORT_CAP32_FORCE_PAUSE);
5215	if (lc->requested_fc & PAUSE_TX)
5216		MPASS(lc->pcaps & FW_PORT_CAP32_FC_TX);
5217	if (lc->requested_fc & PAUSE_RX)
5218		MPASS(lc->pcaps & FW_PORT_CAP32_FC_RX);
5219	if (lc->requested_fec & FEC_RS)
5220		MPASS(lc->pcaps & FW_PORT_CAP32_FEC_RS);
5221	if (lc->requested_fec & FEC_BASER_RS)
5222		MPASS(lc->pcaps & FW_PORT_CAP32_FEC_BASER_RS);
5223#endif
5224	rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc);
5225	if (rc != 0) {
5226		/* Don't complain if the VF driver gets back an EPERM. */
5227		if (!(sc->flags & IS_VF) || rc != FW_EPERM)
5228			device_printf(pi->dev, "l1cfg failed: %d\n", rc);
5229	} else {
5230		/*
5231		 * An L1_CFG will almost always result in a link-change event if
5232		 * the link is up, and the driver will refresh the actual
5233		 * fec/fc/etc. when the notification is processed.  If the link
5234		 * is down then the actual settings are meaningless.
5235		 *
5236		 * This takes care of the case where a change in the L1 settings
5237		 * may not result in a notification.
5238		 */
5239		if (lc->link_ok && !(lc->requested_fc & PAUSE_AUTONEG))
5240			lc->fc = lc->requested_fc & (PAUSE_TX | PAUSE_RX);
5241	}
5242	return (rc);
5243}
5244
5245#define FW_MAC_EXACT_CHUNK	7
5246struct mcaddr_ctx {
5247	struct ifnet *ifp;
5248	const uint8_t *mcaddr[FW_MAC_EXACT_CHUNK];
5249	uint64_t hash;
5250	int i;
5251	int del;
5252	int rc;
5253};
5254
5255static u_int
5256add_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt)
5257{
5258	struct mcaddr_ctx *ctx = arg;
5259	struct vi_info *vi = ctx->ifp->if_softc;
5260	struct port_info *pi = vi->pi;
5261	struct adapter *sc = pi->adapter;
5262
5263	if (ctx->rc < 0)
5264		return (0);
5265
5266	ctx->mcaddr[ctx->i] = LLADDR(sdl);
5267	MPASS(ETHER_IS_MULTICAST(ctx->mcaddr[ctx->i]));
5268	ctx->i++;
5269
5270	if (ctx->i == FW_MAC_EXACT_CHUNK) {
5271		ctx->rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, ctx->del,
5272		    ctx->i, ctx->mcaddr, NULL, &ctx->hash, 0);
5273		if (ctx->rc < 0) {
5274			int j;
5275
5276			for (j = 0; j < ctx->i; j++) {
5277				if_printf(ctx->ifp,
5278				    "failed to add mc address"
5279				    " %02x:%02x:%02x:"
5280				    "%02x:%02x:%02x rc=%d\n",
5281				    ctx->mcaddr[j][0], ctx->mcaddr[j][1],
5282				    ctx->mcaddr[j][2], ctx->mcaddr[j][3],
5283				    ctx->mcaddr[j][4], ctx->mcaddr[j][5],
5284				    -ctx->rc);
5285			}
5286			return (0);
5287		}
5288		ctx->del = 0;
5289		ctx->i = 0;
5290	}
5291
5292	return (1);
5293}
5294
5295/*
5296 * Program the port's XGMAC based on parameters in ifnet.  The caller also
5297 * indicates which parameters should be programmed (the rest are left alone).
5298 */
5299int
5300update_mac_settings(struct ifnet *ifp, int flags)
5301{
5302	int rc = 0;
5303	struct vi_info *vi = ifp->if_softc;
5304	struct port_info *pi = vi->pi;
5305	struct adapter *sc = pi->adapter;
5306	int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1;
5307	uint8_t match_all_mac[ETHER_ADDR_LEN] = {0};
5308
5309	ASSERT_SYNCHRONIZED_OP(sc);
5310	KASSERT(flags, ("%s: not told what to update.", __func__));
5311
5312	if (flags & XGMAC_MTU)
5313		mtu = ifp->if_mtu;
5314
5315	if (flags & XGMAC_PROMISC)
5316		promisc = ifp->if_flags & IFF_PROMISC ? 1 : 0;
5317
5318	if (flags & XGMAC_ALLMULTI)
5319		allmulti = ifp->if_flags & IFF_ALLMULTI ? 1 : 0;
5320
5321	if (flags & XGMAC_VLANEX)
5322		vlanex = ifp->if_capenable & IFCAP_VLAN_HWTAGGING ? 1 : 0;
5323
5324	if (flags & (XGMAC_MTU|XGMAC_PROMISC|XGMAC_ALLMULTI|XGMAC_VLANEX)) {
5325		rc = -t4_set_rxmode(sc, sc->mbox, vi->viid, mtu, promisc,
5326		    allmulti, 1, vlanex, false);
5327		if (rc) {
5328			if_printf(ifp, "set_rxmode (%x) failed: %d\n", flags,
5329			    rc);
5330			return (rc);
5331		}
5332	}
5333
5334	if (flags & XGMAC_UCADDR) {
5335		uint8_t ucaddr[ETHER_ADDR_LEN];
5336
5337		bcopy(IF_LLADDR(ifp), ucaddr, sizeof(ucaddr));
5338		rc = t4_change_mac(sc, sc->mbox, vi->viid, vi->xact_addr_filt,
5339		    ucaddr, true, &vi->smt_idx);
5340		if (rc < 0) {
5341			rc = -rc;
5342			if_printf(ifp, "change_mac failed: %d\n", rc);
5343			return (rc);
5344		} else {
5345			vi->xact_addr_filt = rc;
5346			rc = 0;
5347		}
5348	}
5349
5350	if (flags & XGMAC_MCADDRS) {
5351		struct epoch_tracker et;
5352		struct mcaddr_ctx ctx;
5353		int j;
5354
5355		ctx.ifp = ifp;
5356		ctx.hash = 0;
5357		ctx.i = 0;
5358		ctx.del = 1;
5359		ctx.rc = 0;
5360		/*
5361		 * Unlike other drivers, we accumulate list of pointers into
5362		 * interface address lists and we need to keep it safe even
5363		 * after if_foreach_llmaddr() returns, thus we must enter the
5364		 * network epoch.
5365		 */
5366		NET_EPOCH_ENTER(et);
5367		if_foreach_llmaddr(ifp, add_maddr, &ctx);
5368		if (ctx.rc < 0) {
5369			NET_EPOCH_EXIT(et);
5370			rc = -ctx.rc;
5371			return (rc);
5372		}
5373		if (ctx.i > 0) {
5374			rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid,
5375			    ctx.del, ctx.i, ctx.mcaddr, NULL, &ctx.hash, 0);
5376			NET_EPOCH_EXIT(et);
5377			if (rc < 0) {
5378				rc = -rc;
5379				for (j = 0; j < ctx.i; j++) {
5380					if_printf(ifp,
5381					    "failed to add mcast address"
5382					    " %02x:%02x:%02x:"
5383					    "%02x:%02x:%02x rc=%d\n",
5384					    ctx.mcaddr[j][0], ctx.mcaddr[j][1],
5385					    ctx.mcaddr[j][2], ctx.mcaddr[j][3],
5386					    ctx.mcaddr[j][4], ctx.mcaddr[j][5],
5387					    rc);
5388				}
5389				return (rc);
5390			}
5391			ctx.del = 0;
5392		} else
5393			NET_EPOCH_EXIT(et);
5394
5395		rc = -t4_set_addr_hash(sc, sc->mbox, vi->viid, 0, ctx.hash, 0);
5396		if (rc != 0)
5397			if_printf(ifp, "failed to set mcast address hash: %d\n",
5398			    rc);
5399		if (ctx.del == 0) {
5400			/* We clobbered the VXLAN entry if there was one. */
5401			pi->vxlan_tcam_entry = false;
5402		}
5403	}
5404
5405	if (IS_MAIN_VI(vi) && sc->vxlan_refcount > 0 &&
5406	    pi->vxlan_tcam_entry == false) {
5407		rc = t4_alloc_raw_mac_filt(sc, vi->viid, match_all_mac,
5408		    match_all_mac, sc->rawf_base + pi->port_id, 1, pi->port_id,
5409		    true);
5410		if (rc < 0) {
5411			rc = -rc;
5412			if_printf(ifp, "failed to add VXLAN TCAM entry: %d.\n",
5413			    rc);
5414		} else {
5415			MPASS(rc == sc->rawf_base + pi->port_id);
5416			rc = 0;
5417			pi->vxlan_tcam_entry = true;
5418		}
5419	}
5420
5421	return (rc);
5422}
5423
5424/*
5425 * {begin|end}_synchronized_op must be called from the same thread.
5426 */
5427int
5428begin_synchronized_op(struct adapter *sc, struct vi_info *vi, int flags,
5429    char *wmesg)
5430{
5431	int rc, pri;
5432
5433#ifdef WITNESS
5434	/* the caller thinks it's ok to sleep, but is it really? */
5435	if (flags & SLEEP_OK)
5436		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
5437		    "begin_synchronized_op");
5438#endif
5439
5440	if (INTR_OK)
5441		pri = PCATCH;
5442	else
5443		pri = 0;
5444
5445	ADAPTER_LOCK(sc);
5446	for (;;) {
5447
5448		if (vi && IS_DOOMED(vi)) {
5449			rc = ENXIO;
5450			goto done;
5451		}
5452
5453		if (!IS_BUSY(sc)) {
5454			rc = 0;
5455			break;
5456		}
5457
5458		if (!(flags & SLEEP_OK)) {
5459			rc = EBUSY;
5460			goto done;
5461		}
5462
5463		if (mtx_sleep(&sc->flags, &sc->sc_lock, pri, wmesg, 0)) {
5464			rc = EINTR;
5465			goto done;
5466		}
5467	}
5468
5469	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
5470	SET_BUSY(sc);
5471#ifdef INVARIANTS
5472	sc->last_op = wmesg;
5473	sc->last_op_thr = curthread;
5474	sc->last_op_flags = flags;
5475#endif
5476
5477done:
5478	if (!(flags & HOLD_LOCK) || rc)
5479		ADAPTER_UNLOCK(sc);
5480
5481	return (rc);
5482}
5483
5484/*
5485 * Tell if_ioctl and if_init that the VI is going away.  This is
5486 * special variant of begin_synchronized_op and must be paired with a
5487 * call to end_synchronized_op.
5488 */
5489void
5490doom_vi(struct adapter *sc, struct vi_info *vi)
5491{
5492
5493	ADAPTER_LOCK(sc);
5494	SET_DOOMED(vi);
5495	wakeup(&sc->flags);
5496	while (IS_BUSY(sc))
5497		mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0);
5498	SET_BUSY(sc);
5499#ifdef INVARIANTS
5500	sc->last_op = "t4detach";
5501	sc->last_op_thr = curthread;
5502	sc->last_op_flags = 0;
5503#endif
5504	ADAPTER_UNLOCK(sc);
5505}
5506
5507/*
5508 * {begin|end}_synchronized_op must be called from the same thread.
5509 */
5510void
5511end_synchronized_op(struct adapter *sc, int flags)
5512{
5513
5514	if (flags & LOCK_HELD)
5515		ADAPTER_LOCK_ASSERT_OWNED(sc);
5516	else
5517		ADAPTER_LOCK(sc);
5518
5519	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
5520	CLR_BUSY(sc);
5521	wakeup(&sc->flags);
5522	ADAPTER_UNLOCK(sc);
5523}
5524
5525static int
5526cxgbe_init_synchronized(struct vi_info *vi)
5527{
5528	struct port_info *pi = vi->pi;
5529	struct adapter *sc = pi->adapter;
5530	struct ifnet *ifp = vi->ifp;
5531	int rc = 0, i;
5532	struct sge_txq *txq;
5533
5534	ASSERT_SYNCHRONIZED_OP(sc);
5535
5536	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
5537		return (0);	/* already running */
5538
5539	if (!(sc->flags & FULL_INIT_DONE) &&
5540	    ((rc = adapter_full_init(sc)) != 0))
5541		return (rc);	/* error message displayed already */
5542
5543	if (!(vi->flags & VI_INIT_DONE) &&
5544	    ((rc = vi_full_init(vi)) != 0))
5545		return (rc); /* error message displayed already */
5546
5547	rc = update_mac_settings(ifp, XGMAC_ALL);
5548	if (rc)
5549		goto done;	/* error message displayed already */
5550
5551	PORT_LOCK(pi);
5552	if (pi->up_vis == 0) {
5553		t4_update_port_info(pi);
5554		fixup_link_config(pi);
5555		build_medialist(pi);
5556		apply_link_config(pi);
5557	}
5558
5559	rc = -t4_enable_vi(sc, sc->mbox, vi->viid, true, true);
5560	if (rc != 0) {
5561		if_printf(ifp, "enable_vi failed: %d\n", rc);
5562		PORT_UNLOCK(pi);
5563		goto done;
5564	}
5565
5566	/*
5567	 * Can't fail from this point onwards.  Review cxgbe_uninit_synchronized
5568	 * if this changes.
5569	 */
5570
5571	for_each_txq(vi, i, txq) {
5572		TXQ_LOCK(txq);
5573		txq->eq.flags |= EQ_ENABLED;
5574		TXQ_UNLOCK(txq);
5575	}
5576
5577	/*
5578	 * The first iq of the first port to come up is used for tracing.
5579	 */
5580	if (sc->traceq < 0 && IS_MAIN_VI(vi)) {
5581		sc->traceq = sc->sge.rxq[vi->first_rxq].iq.abs_id;
5582		t4_write_reg(sc, is_t4(sc) ?  A_MPS_TRC_RSS_CONTROL :
5583		    A_MPS_T5_TRC_RSS_CONTROL, V_RSSCONTROL(pi->tx_chan) |
5584		    V_QUEUENUMBER(sc->traceq));
5585		pi->flags |= HAS_TRACEQ;
5586	}
5587
5588	/* all ok */
5589	pi->up_vis++;
5590	ifp->if_drv_flags |= IFF_DRV_RUNNING;
5591	if (pi->link_cfg.link_ok)
5592		t4_os_link_changed(pi);
5593	PORT_UNLOCK(pi);
5594
5595	mtx_lock(&vi->tick_mtx);
5596	if (ifp->if_get_counter == vi_get_counter)
5597		callout_reset(&vi->tick, hz, vi_tick, vi);
5598	else
5599		callout_reset(&vi->tick, hz, cxgbe_tick, vi);
5600	mtx_unlock(&vi->tick_mtx);
5601done:
5602	if (rc != 0)
5603		cxgbe_uninit_synchronized(vi);
5604
5605	return (rc);
5606}
5607
5608/*
5609 * Idempotent.
5610 */
5611static int
5612cxgbe_uninit_synchronized(struct vi_info *vi)
5613{
5614	struct port_info *pi = vi->pi;
5615	struct adapter *sc = pi->adapter;
5616	struct ifnet *ifp = vi->ifp;
5617	int rc, i;
5618	struct sge_txq *txq;
5619
5620	ASSERT_SYNCHRONIZED_OP(sc);
5621
5622	if (!(vi->flags & VI_INIT_DONE)) {
5623		if (__predict_false(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
5624			KASSERT(0, ("uninited VI is running"));
5625			if_printf(ifp, "uninited VI with running ifnet.  "
5626			    "vi->flags 0x%016lx, if_flags 0x%08x, "
5627			    "if_drv_flags 0x%08x\n", vi->flags, ifp->if_flags,
5628			    ifp->if_drv_flags);
5629		}
5630		return (0);
5631	}
5632
5633	/*
5634	 * Disable the VI so that all its data in either direction is discarded
5635	 * by the MPS.  Leave everything else (the queues, interrupts, and 1Hz
5636	 * tick) intact as the TP can deliver negative advice or data that it's
5637	 * holding in its RAM (for an offloaded connection) even after the VI is
5638	 * disabled.
5639	 */
5640	rc = -t4_enable_vi(sc, sc->mbox, vi->viid, false, false);
5641	if (rc) {
5642		if_printf(ifp, "disable_vi failed: %d\n", rc);
5643		return (rc);
5644	}
5645
5646	for_each_txq(vi, i, txq) {
5647		TXQ_LOCK(txq);
5648		txq->eq.flags &= ~EQ_ENABLED;
5649		TXQ_UNLOCK(txq);
5650	}
5651
5652	mtx_lock(&vi->tick_mtx);
5653	callout_stop(&vi->tick);
5654	mtx_unlock(&vi->tick_mtx);
5655
5656	PORT_LOCK(pi);
5657	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
5658		PORT_UNLOCK(pi);
5659		return (0);
5660	}
5661	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
5662	pi->up_vis--;
5663	if (pi->up_vis > 0) {
5664		PORT_UNLOCK(pi);
5665		return (0);
5666	}
5667
5668	pi->link_cfg.link_ok = false;
5669	pi->link_cfg.speed = 0;
5670	pi->link_cfg.link_down_rc = 255;
5671	t4_os_link_changed(pi);
5672	PORT_UNLOCK(pi);
5673
5674	return (0);
5675}
5676
5677/*
5678 * It is ok for this function to fail midway and return right away.  t4_detach
5679 * will walk the entire sc->irq list and clean up whatever is valid.
5680 */
5681int
5682t4_setup_intr_handlers(struct adapter *sc)
5683{
5684	int rc, rid, p, q, v;
5685	char s[8];
5686	struct irq *irq;
5687	struct port_info *pi;
5688	struct vi_info *vi;
5689	struct sge *sge = &sc->sge;
5690	struct sge_rxq *rxq;
5691#ifdef TCP_OFFLOAD
5692	struct sge_ofld_rxq *ofld_rxq;
5693#endif
5694#ifdef DEV_NETMAP
5695	struct sge_nm_rxq *nm_rxq;
5696#endif
5697#ifdef RSS
5698	int nbuckets = rss_getnumbuckets();
5699#endif
5700
5701	/*
5702	 * Setup interrupts.
5703	 */
5704	irq = &sc->irq[0];
5705	rid = sc->intr_type == INTR_INTX ? 0 : 1;
5706	if (forwarding_intr_to_fwq(sc))
5707		return (t4_alloc_irq(sc, irq, rid, t4_intr_all, sc, "all"));
5708
5709	/* Multiple interrupts. */
5710	if (sc->flags & IS_VF)
5711		KASSERT(sc->intr_count >= T4VF_EXTRA_INTR + sc->params.nports,
5712		    ("%s: too few intr.", __func__));
5713	else
5714		KASSERT(sc->intr_count >= T4_EXTRA_INTR + sc->params.nports,
5715		    ("%s: too few intr.", __func__));
5716
5717	/* The first one is always error intr on PFs */
5718	if (!(sc->flags & IS_VF)) {
5719		rc = t4_alloc_irq(sc, irq, rid, t4_intr_err, sc, "err");
5720		if (rc != 0)
5721			return (rc);
5722		irq++;
5723		rid++;
5724	}
5725
5726	/* The second one is always the firmware event queue (first on VFs) */
5727	rc = t4_alloc_irq(sc, irq, rid, t4_intr_evt, &sge->fwq, "evt");
5728	if (rc != 0)
5729		return (rc);
5730	irq++;
5731	rid++;
5732
5733	for_each_port(sc, p) {
5734		pi = sc->port[p];
5735		for_each_vi(pi, v, vi) {
5736			vi->first_intr = rid - 1;
5737
5738			if (vi->nnmrxq > 0) {
5739				int n = max(vi->nrxq, vi->nnmrxq);
5740
5741				rxq = &sge->rxq[vi->first_rxq];
5742#ifdef DEV_NETMAP
5743				nm_rxq = &sge->nm_rxq[vi->first_nm_rxq];
5744#endif
5745				for (q = 0; q < n; q++) {
5746					snprintf(s, sizeof(s), "%x%c%x", p,
5747					    'a' + v, q);
5748					if (q < vi->nrxq)
5749						irq->rxq = rxq++;
5750#ifdef DEV_NETMAP
5751					if (q < vi->nnmrxq)
5752						irq->nm_rxq = nm_rxq++;
5753
5754					if (irq->nm_rxq != NULL &&
5755					    irq->rxq == NULL) {
5756						/* Netmap rx only */
5757						rc = t4_alloc_irq(sc, irq, rid,
5758						    t4_nm_intr, irq->nm_rxq, s);
5759					}
5760					if (irq->nm_rxq != NULL &&
5761					    irq->rxq != NULL) {
5762						/* NIC and Netmap rx */
5763						rc = t4_alloc_irq(sc, irq, rid,
5764						    t4_vi_intr, irq, s);
5765					}
5766#endif
5767					if (irq->rxq != NULL &&
5768					    irq->nm_rxq == NULL) {
5769						/* NIC rx only */
5770						rc = t4_alloc_irq(sc, irq, rid,
5771						    t4_intr, irq->rxq, s);
5772					}
5773					if (rc != 0)
5774						return (rc);
5775#ifdef RSS
5776					if (q < vi->nrxq) {
5777						bus_bind_intr(sc->dev, irq->res,
5778						    rss_getcpu(q % nbuckets));
5779					}
5780#endif
5781					irq++;
5782					rid++;
5783					vi->nintr++;
5784				}
5785			} else {
5786				for_each_rxq(vi, q, rxq) {
5787					snprintf(s, sizeof(s), "%x%c%x", p,
5788					    'a' + v, q);
5789					rc = t4_alloc_irq(sc, irq, rid,
5790					    t4_intr, rxq, s);
5791					if (rc != 0)
5792						return (rc);
5793#ifdef RSS
5794					bus_bind_intr(sc->dev, irq->res,
5795					    rss_getcpu(q % nbuckets));
5796#endif
5797					irq++;
5798					rid++;
5799					vi->nintr++;
5800				}
5801			}
5802#ifdef TCP_OFFLOAD
5803			for_each_ofld_rxq(vi, q, ofld_rxq) {
5804				snprintf(s, sizeof(s), "%x%c%x", p, 'A' + v, q);
5805				rc = t4_alloc_irq(sc, irq, rid, t4_intr,
5806				    ofld_rxq, s);
5807				if (rc != 0)
5808					return (rc);
5809				irq++;
5810				rid++;
5811				vi->nintr++;
5812			}
5813#endif
5814		}
5815	}
5816	MPASS(irq == &sc->irq[sc->intr_count]);
5817
5818	return (0);
5819}
5820
5821static void
5822write_global_rss_key(struct adapter *sc)
5823{
5824#ifdef RSS
5825	int i;
5826	uint32_t raw_rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
5827	uint32_t rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
5828
5829	CTASSERT(RSS_KEYSIZE == 40);
5830
5831	rss_getkey((void *)&raw_rss_key[0]);
5832	for (i = 0; i < nitems(rss_key); i++) {
5833		rss_key[i] = htobe32(raw_rss_key[nitems(rss_key) - 1 - i]);
5834	}
5835	t4_write_rss_key(sc, &rss_key[0], -1, 1);
5836#endif
5837}
5838
5839int
5840adapter_full_init(struct adapter *sc)
5841{
5842	int rc, i;
5843
5844	ASSERT_SYNCHRONIZED_OP(sc);
5845	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
5846	KASSERT((sc->flags & FULL_INIT_DONE) == 0,
5847	    ("%s: FULL_INIT_DONE already", __func__));
5848
5849	/*
5850	 * queues that belong to the adapter (not any particular port).
5851	 */
5852	rc = t4_setup_adapter_queues(sc);
5853	if (rc != 0)
5854		goto done;
5855
5856	for (i = 0; i < nitems(sc->tq); i++) {
5857		sc->tq[i] = taskqueue_create("t4 taskq", M_NOWAIT,
5858		    taskqueue_thread_enqueue, &sc->tq[i]);
5859		if (sc->tq[i] == NULL) {
5860			device_printf(sc->dev,
5861			    "failed to allocate task queue %d\n", i);
5862			rc = ENOMEM;
5863			goto done;
5864		}
5865		taskqueue_start_threads(&sc->tq[i], 1, PI_NET, "%s tq%d",
5866		    device_get_nameunit(sc->dev), i);
5867	}
5868
5869	if (!(sc->flags & IS_VF)) {
5870		write_global_rss_key(sc);
5871		t4_intr_enable(sc);
5872	}
5873#ifdef KERN_TLS
5874	if (is_ktls(sc))
5875		callout_reset_sbt(&sc->ktls_tick, SBT_1MS, 0, ktls_tick, sc,
5876		    C_HARDCLOCK);
5877#endif
5878	sc->flags |= FULL_INIT_DONE;
5879done:
5880	if (rc != 0)
5881		adapter_full_uninit(sc);
5882
5883	return (rc);
5884}
5885
5886int
5887adapter_full_uninit(struct adapter *sc)
5888{
5889	int i;
5890
5891	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
5892
5893	t4_teardown_adapter_queues(sc);
5894
5895	for (i = 0; i < nitems(sc->tq) && sc->tq[i]; i++) {
5896		taskqueue_free(sc->tq[i]);
5897		sc->tq[i] = NULL;
5898	}
5899
5900	sc->flags &= ~FULL_INIT_DONE;
5901
5902	return (0);
5903}
5904
5905#ifdef RSS
5906#define SUPPORTED_RSS_HASHTYPES (RSS_HASHTYPE_RSS_IPV4 | \
5907    RSS_HASHTYPE_RSS_TCP_IPV4 | RSS_HASHTYPE_RSS_IPV6 | \
5908    RSS_HASHTYPE_RSS_TCP_IPV6 | RSS_HASHTYPE_RSS_UDP_IPV4 | \
5909    RSS_HASHTYPE_RSS_UDP_IPV6)
5910
5911/* Translates kernel hash types to hardware. */
5912static int
5913hashconfig_to_hashen(int hashconfig)
5914{
5915	int hashen = 0;
5916
5917	if (hashconfig & RSS_HASHTYPE_RSS_IPV4)
5918		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN;
5919	if (hashconfig & RSS_HASHTYPE_RSS_IPV6)
5920		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN;
5921	if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV4) {
5922		hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN |
5923		    F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
5924	}
5925	if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV6) {
5926		hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN |
5927		    F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
5928	}
5929	if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV4)
5930		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
5931	if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV6)
5932		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
5933
5934	return (hashen);
5935}
5936
5937/* Translates hardware hash types to kernel. */
5938static int
5939hashen_to_hashconfig(int hashen)
5940{
5941	int hashconfig = 0;
5942
5943	if (hashen & F_FW_RSS_VI_CONFIG_CMD_UDPEN) {
5944		/*
5945		 * If UDP hashing was enabled it must have been enabled for
5946		 * either IPv4 or IPv6 (inclusive or).  Enabling UDP without
5947		 * enabling any 4-tuple hash is nonsense configuration.
5948		 */
5949		MPASS(hashen & (F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
5950		    F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN));
5951
5952		if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
5953			hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV4;
5954		if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
5955			hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV6;
5956	}
5957	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
5958		hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV4;
5959	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
5960		hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV6;
5961	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
5962		hashconfig |= RSS_HASHTYPE_RSS_IPV4;
5963	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
5964		hashconfig |= RSS_HASHTYPE_RSS_IPV6;
5965
5966	return (hashconfig);
5967}
5968#endif
5969
5970int
5971vi_full_init(struct vi_info *vi)
5972{
5973	struct adapter *sc = vi->adapter;
5974	struct ifnet *ifp = vi->ifp;
5975	uint16_t *rss;
5976	struct sge_rxq *rxq;
5977	int rc, i, j;
5978#ifdef RSS
5979	int nbuckets = rss_getnumbuckets();
5980	int hashconfig = rss_gethashconfig();
5981	int extra;
5982#endif
5983
5984	ASSERT_SYNCHRONIZED_OP(sc);
5985	KASSERT((vi->flags & VI_INIT_DONE) == 0,
5986	    ("%s: VI_INIT_DONE already", __func__));
5987
5988	sysctl_ctx_init(&vi->ctx);
5989	vi->flags |= VI_SYSCTL_CTX;
5990
5991	/*
5992	 * Allocate tx/rx/fl queues for this VI.
5993	 */
5994	rc = t4_setup_vi_queues(vi);
5995	if (rc != 0)
5996		goto done;	/* error message displayed already */
5997
5998	/*
5999	 * Setup RSS for this VI.  Save a copy of the RSS table for later use.
6000	 */
6001	if (vi->nrxq > vi->rss_size) {
6002		if_printf(ifp, "nrxq (%d) > hw RSS table size (%d); "
6003		    "some queues will never receive traffic.\n", vi->nrxq,
6004		    vi->rss_size);
6005	} else if (vi->rss_size % vi->nrxq) {
6006		if_printf(ifp, "nrxq (%d), hw RSS table size (%d); "
6007		    "expect uneven traffic distribution.\n", vi->nrxq,
6008		    vi->rss_size);
6009	}
6010#ifdef RSS
6011	if (vi->nrxq != nbuckets) {
6012		if_printf(ifp, "nrxq (%d) != kernel RSS buckets (%d);"
6013		    "performance will be impacted.\n", vi->nrxq, nbuckets);
6014	}
6015#endif
6016	rss = malloc(vi->rss_size * sizeof (*rss), M_CXGBE, M_ZERO | M_WAITOK);
6017	for (i = 0; i < vi->rss_size;) {
6018#ifdef RSS
6019		j = rss_get_indirection_to_bucket(i);
6020		j %= vi->nrxq;
6021		rxq = &sc->sge.rxq[vi->first_rxq + j];
6022		rss[i++] = rxq->iq.abs_id;
6023#else
6024		for_each_rxq(vi, j, rxq) {
6025			rss[i++] = rxq->iq.abs_id;
6026			if (i == vi->rss_size)
6027				break;
6028		}
6029#endif
6030	}
6031
6032	rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, rss,
6033	    vi->rss_size);
6034	if (rc != 0) {
6035		free(rss, M_CXGBE);
6036		if_printf(ifp, "rss_config failed: %d\n", rc);
6037		goto done;
6038	}
6039
6040#ifdef RSS
6041	vi->hashen = hashconfig_to_hashen(hashconfig);
6042
6043	/*
6044	 * We may have had to enable some hashes even though the global config
6045	 * wants them disabled.  This is a potential problem that must be
6046	 * reported to the user.
6047	 */
6048	extra = hashen_to_hashconfig(vi->hashen) ^ hashconfig;
6049
6050	/*
6051	 * If we consider only the supported hash types, then the enabled hashes
6052	 * are a superset of the requested hashes.  In other words, there cannot
6053	 * be any supported hash that was requested but not enabled, but there
6054	 * can be hashes that were not requested but had to be enabled.
6055	 */
6056	extra &= SUPPORTED_RSS_HASHTYPES;
6057	MPASS((extra & hashconfig) == 0);
6058
6059	if (extra) {
6060		if_printf(ifp,
6061		    "global RSS config (0x%x) cannot be accommodated.\n",
6062		    hashconfig);
6063	}
6064	if (extra & RSS_HASHTYPE_RSS_IPV4)
6065		if_printf(ifp, "IPv4 2-tuple hashing forced on.\n");
6066	if (extra & RSS_HASHTYPE_RSS_TCP_IPV4)
6067		if_printf(ifp, "TCP/IPv4 4-tuple hashing forced on.\n");
6068	if (extra & RSS_HASHTYPE_RSS_IPV6)
6069		if_printf(ifp, "IPv6 2-tuple hashing forced on.\n");
6070	if (extra & RSS_HASHTYPE_RSS_TCP_IPV6)
6071		if_printf(ifp, "TCP/IPv6 4-tuple hashing forced on.\n");
6072	if (extra & RSS_HASHTYPE_RSS_UDP_IPV4)
6073		if_printf(ifp, "UDP/IPv4 4-tuple hashing forced on.\n");
6074	if (extra & RSS_HASHTYPE_RSS_UDP_IPV6)
6075		if_printf(ifp, "UDP/IPv6 4-tuple hashing forced on.\n");
6076#else
6077	vi->hashen = F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN |
6078	    F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN |
6079	    F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
6080	    F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_UDPEN;
6081#endif
6082	rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, vi->hashen, rss[0], 0, 0);
6083	if (rc != 0) {
6084		free(rss, M_CXGBE);
6085		if_printf(ifp, "rss hash/defaultq config failed: %d\n", rc);
6086		goto done;
6087	}
6088
6089	vi->rss = rss;
6090	vi->flags |= VI_INIT_DONE;
6091done:
6092	if (rc != 0)
6093		vi_full_uninit(vi);
6094
6095	return (rc);
6096}
6097
6098/*
6099 * Idempotent.
6100 */
6101int
6102vi_full_uninit(struct vi_info *vi)
6103{
6104	struct port_info *pi = vi->pi;
6105	struct adapter *sc = pi->adapter;
6106	int i;
6107	struct sge_rxq *rxq;
6108	struct sge_txq *txq;
6109#ifdef TCP_OFFLOAD
6110	struct sge_ofld_rxq *ofld_rxq;
6111#endif
6112#if defined(TCP_OFFLOAD) || defined(RATELIMIT)
6113	struct sge_ofld_txq *ofld_txq;
6114#endif
6115
6116	if (vi->flags & VI_INIT_DONE) {
6117
6118		/* Need to quiesce queues.  */
6119
6120		/* XXX: Only for the first VI? */
6121		if (IS_MAIN_VI(vi) && !(sc->flags & IS_VF))
6122			quiesce_wrq(sc, &sc->sge.ctrlq[pi->port_id]);
6123
6124		for_each_txq(vi, i, txq) {
6125			quiesce_txq(sc, txq);
6126		}
6127
6128#if defined(TCP_OFFLOAD) || defined(RATELIMIT)
6129		for_each_ofld_txq(vi, i, ofld_txq) {
6130			quiesce_wrq(sc, &ofld_txq->wrq);
6131		}
6132#endif
6133
6134		for_each_rxq(vi, i, rxq) {
6135			quiesce_iq(sc, &rxq->iq);
6136			quiesce_fl(sc, &rxq->fl);
6137		}
6138
6139#ifdef TCP_OFFLOAD
6140		for_each_ofld_rxq(vi, i, ofld_rxq) {
6141			quiesce_iq(sc, &ofld_rxq->iq);
6142			quiesce_fl(sc, &ofld_rxq->fl);
6143		}
6144#endif
6145		free(vi->rss, M_CXGBE);
6146		free(vi->nm_rss, M_CXGBE);
6147	}
6148
6149	t4_teardown_vi_queues(vi);
6150	vi->flags &= ~VI_INIT_DONE;
6151
6152	return (0);
6153}
6154
6155static void
6156quiesce_txq(struct adapter *sc, struct sge_txq *txq)
6157{
6158	struct sge_eq *eq = &txq->eq;
6159	struct sge_qstat *spg = (void *)&eq->desc[eq->sidx];
6160
6161	(void) sc;	/* unused */
6162
6163#ifdef INVARIANTS
6164	TXQ_LOCK(txq);
6165	MPASS((eq->flags & EQ_ENABLED) == 0);
6166	TXQ_UNLOCK(txq);
6167#endif
6168
6169	/* Wait for the mp_ring to empty. */
6170	while (!mp_ring_is_idle(txq->r)) {
6171		mp_ring_check_drainage(txq->r, 4096);
6172		pause("rquiesce", 1);
6173	}
6174
6175	/* Then wait for the hardware to finish. */
6176	while (spg->cidx != htobe16(eq->pidx))
6177		pause("equiesce", 1);
6178
6179	/* Finally, wait for the driver to reclaim all descriptors. */
6180	while (eq->cidx != eq->pidx)
6181		pause("dquiesce", 1);
6182}
6183
6184static void
6185quiesce_wrq(struct adapter *sc, struct sge_wrq *wrq)
6186{
6187
6188	/* XXXTX */
6189}
6190
6191static void
6192quiesce_iq(struct adapter *sc, struct sge_iq *iq)
6193{
6194	(void) sc;	/* unused */
6195
6196	/* Synchronize with the interrupt handler */
6197	while (!atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_DISABLED))
6198		pause("iqfree", 1);
6199}
6200
6201static void
6202quiesce_fl(struct adapter *sc, struct sge_fl *fl)
6203{
6204	mtx_lock(&sc->sfl_lock);
6205	FL_LOCK(fl);
6206	fl->flags |= FL_DOOMED;
6207	FL_UNLOCK(fl);
6208	callout_stop(&sc->sfl_callout);
6209	mtx_unlock(&sc->sfl_lock);
6210
6211	KASSERT((fl->flags & FL_STARVING) == 0,
6212	    ("%s: still starving", __func__));
6213}
6214
6215static int
6216t4_alloc_irq(struct adapter *sc, struct irq *irq, int rid,
6217    driver_intr_t *handler, void *arg, char *name)
6218{
6219	int rc;
6220
6221	irq->rid = rid;
6222	irq->res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &irq->rid,
6223	    RF_SHAREABLE | RF_ACTIVE);
6224	if (irq->res == NULL) {
6225		device_printf(sc->dev,
6226		    "failed to allocate IRQ for rid %d, name %s.\n", rid, name);
6227		return (ENOMEM);
6228	}
6229
6230	rc = bus_setup_intr(sc->dev, irq->res, INTR_MPSAFE | INTR_TYPE_NET,
6231	    NULL, handler, arg, &irq->tag);
6232	if (rc != 0) {
6233		device_printf(sc->dev,
6234		    "failed to setup interrupt for rid %d, name %s: %d\n",
6235		    rid, name, rc);
6236	} else if (name)
6237		bus_describe_intr(sc->dev, irq->res, irq->tag, "%s", name);
6238
6239	return (rc);
6240}
6241
6242static int
6243t4_free_irq(struct adapter *sc, struct irq *irq)
6244{
6245	if (irq->tag)
6246		bus_teardown_intr(sc->dev, irq->res, irq->tag);
6247	if (irq->res)
6248		bus_release_resource(sc->dev, SYS_RES_IRQ, irq->rid, irq->res);
6249
6250	bzero(irq, sizeof(*irq));
6251
6252	return (0);
6253}
6254
6255static void
6256get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf)
6257{
6258
6259	regs->version = chip_id(sc) | chip_rev(sc) << 10;
6260	t4_get_regs(sc, buf, regs->len);
6261}
6262
6263#define	A_PL_INDIR_CMD	0x1f8
6264
6265#define	S_PL_AUTOINC	31
6266#define	M_PL_AUTOINC	0x1U
6267#define	V_PL_AUTOINC(x)	((x) << S_PL_AUTOINC)
6268#define	G_PL_AUTOINC(x)	(((x) >> S_PL_AUTOINC) & M_PL_AUTOINC)
6269
6270#define	S_PL_VFID	20
6271#define	M_PL_VFID	0xffU
6272#define	V_PL_VFID(x)	((x) << S_PL_VFID)
6273#define	G_PL_VFID(x)	(((x) >> S_PL_VFID) & M_PL_VFID)
6274
6275#define	S_PL_ADDR	0
6276#define	M_PL_ADDR	0xfffffU
6277#define	V_PL_ADDR(x)	((x) << S_PL_ADDR)
6278#define	G_PL_ADDR(x)	(((x) >> S_PL_ADDR) & M_PL_ADDR)
6279
6280#define	A_PL_INDIR_DATA	0x1fc
6281
6282static uint64_t
6283read_vf_stat(struct adapter *sc, u_int vin, int reg)
6284{
6285	u32 stats[2];
6286
6287	if (sc->flags & IS_VF) {
6288		stats[0] = t4_read_reg(sc, VF_MPS_REG(reg));
6289		stats[1] = t4_read_reg(sc, VF_MPS_REG(reg + 4));
6290	} else {
6291		mtx_assert(&sc->reg_lock, MA_OWNED);
6292		t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) |
6293		    V_PL_VFID(vin) | V_PL_ADDR(VF_MPS_REG(reg)));
6294		stats[0] = t4_read_reg(sc, A_PL_INDIR_DATA);
6295		stats[1] = t4_read_reg(sc, A_PL_INDIR_DATA);
6296	}
6297	return (((uint64_t)stats[1]) << 32 | stats[0]);
6298}
6299
6300static void
6301t4_get_vi_stats(struct adapter *sc, u_int vin, struct fw_vi_stats_vf *stats)
6302{
6303
6304#define GET_STAT(name) \
6305	read_vf_stat(sc, vin, A_MPS_VF_STAT_##name##_L)
6306
6307	if (!(sc->flags & IS_VF))
6308		mtx_lock(&sc->reg_lock);
6309	stats->tx_bcast_bytes    = GET_STAT(TX_VF_BCAST_BYTES);
6310	stats->tx_bcast_frames   = GET_STAT(TX_VF_BCAST_FRAMES);
6311	stats->tx_mcast_bytes    = GET_STAT(TX_VF_MCAST_BYTES);
6312	stats->tx_mcast_frames   = GET_STAT(TX_VF_MCAST_FRAMES);
6313	stats->tx_ucast_bytes    = GET_STAT(TX_VF_UCAST_BYTES);
6314	stats->tx_ucast_frames   = GET_STAT(TX_VF_UCAST_FRAMES);
6315	stats->tx_drop_frames    = GET_STAT(TX_VF_DROP_FRAMES);
6316	stats->tx_offload_bytes  = GET_STAT(TX_VF_OFFLOAD_BYTES);
6317	stats->tx_offload_frames = GET_STAT(TX_VF_OFFLOAD_FRAMES);
6318	stats->rx_bcast_bytes    = GET_STAT(RX_VF_BCAST_BYTES);
6319	stats->rx_bcast_frames   = GET_STAT(RX_VF_BCAST_FRAMES);
6320	stats->rx_mcast_bytes    = GET_STAT(RX_VF_MCAST_BYTES);
6321	stats->rx_mcast_frames   = GET_STAT(RX_VF_MCAST_FRAMES);
6322	stats->rx_ucast_bytes    = GET_STAT(RX_VF_UCAST_BYTES);
6323	stats->rx_ucast_frames   = GET_STAT(RX_VF_UCAST_FRAMES);
6324	stats->rx_err_frames     = GET_STAT(RX_VF_ERR_FRAMES);
6325	if (!(sc->flags & IS_VF))
6326		mtx_unlock(&sc->reg_lock);
6327
6328#undef GET_STAT
6329}
6330
6331static void
6332t4_clr_vi_stats(struct adapter *sc, u_int vin)
6333{
6334	int reg;
6335
6336	t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) | V_PL_VFID(vin) |
6337	    V_PL_ADDR(VF_MPS_REG(A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L)));
6338	for (reg = A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L;
6339	     reg <= A_MPS_VF_STAT_RX_VF_ERR_FRAMES_H; reg += 4)
6340		t4_write_reg(sc, A_PL_INDIR_DATA, 0);
6341}
6342
6343static void
6344vi_refresh_stats(struct vi_info *vi)
6345{
6346	struct timeval tv;
6347	const struct timeval interval = {0, 250000};	/* 250ms */
6348
6349	mtx_assert(&vi->tick_mtx, MA_OWNED);
6350
6351	if (!(vi->flags & VI_INIT_DONE) || vi->flags & VI_SKIP_STATS)
6352		return;
6353
6354	getmicrotime(&tv);
6355	timevalsub(&tv, &interval);
6356	if (timevalcmp(&tv, &vi->last_refreshed, <))
6357		return;
6358
6359	t4_get_vi_stats(vi->adapter, vi->vin, &vi->stats);
6360	getmicrotime(&vi->last_refreshed);
6361}
6362
6363static void
6364cxgbe_refresh_stats(struct vi_info *vi)
6365{
6366	u_int i, v, tnl_cong_drops, chan_map;
6367	struct timeval tv;
6368	const struct timeval interval = {0, 250000};	/* 250ms */
6369	struct port_info *pi;
6370	struct adapter *sc;
6371
6372	mtx_assert(&vi->tick_mtx, MA_OWNED);
6373
6374	if (vi->flags & VI_SKIP_STATS)
6375		return;
6376
6377	getmicrotime(&tv);
6378	timevalsub(&tv, &interval);
6379	if (timevalcmp(&tv, &vi->last_refreshed, <))
6380		return;
6381
6382	pi = vi->pi;
6383	sc = vi->adapter;
6384	tnl_cong_drops = 0;
6385	t4_get_port_stats(sc, pi->tx_chan, &pi->stats);
6386	chan_map = pi->rx_e_chan_map;
6387	while (chan_map) {
6388		i = ffs(chan_map) - 1;
6389		mtx_lock(&sc->reg_lock);
6390		t4_read_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v, 1,
6391		    A_TP_MIB_TNL_CNG_DROP_0 + i);
6392		mtx_unlock(&sc->reg_lock);
6393		tnl_cong_drops += v;
6394		chan_map &= ~(1 << i);
6395	}
6396	pi->tnl_cong_drops = tnl_cong_drops;
6397	getmicrotime(&vi->last_refreshed);
6398}
6399
6400static void
6401cxgbe_tick(void *arg)
6402{
6403	struct vi_info *vi = arg;
6404
6405	MPASS(IS_MAIN_VI(vi));
6406	mtx_assert(&vi->tick_mtx, MA_OWNED);
6407
6408	cxgbe_refresh_stats(vi);
6409	callout_schedule(&vi->tick, hz);
6410}
6411
6412static void
6413vi_tick(void *arg)
6414{
6415	struct vi_info *vi = arg;
6416
6417	mtx_assert(&vi->tick_mtx, MA_OWNED);
6418
6419	vi_refresh_stats(vi);
6420	callout_schedule(&vi->tick, hz);
6421}
6422
6423/*
6424 * Should match fw_caps_config_<foo> enums in t4fw_interface.h
6425 */
6426static char *caps_decoder[] = {
6427	"\20\001IPMI\002NCSI",				/* 0: NBM */
6428	"\20\001PPP\002QFC\003DCBX",			/* 1: link */
6429	"\20\001INGRESS\002EGRESS",			/* 2: switch */
6430	"\20\001NIC\002VM\003IDS\004UM\005UM_ISGL"	/* 3: NIC */
6431	    "\006HASHFILTER\007ETHOFLD",
6432	"\20\001TOE",					/* 4: TOE */
6433	"\20\001RDDP\002RDMAC",				/* 5: RDMA */
6434	"\20\001INITIATOR_PDU\002TARGET_PDU"		/* 6: iSCSI */
6435	    "\003INITIATOR_CNXOFLD\004TARGET_CNXOFLD"
6436	    "\005INITIATOR_SSNOFLD\006TARGET_SSNOFLD"
6437	    "\007T10DIF"
6438	    "\010INITIATOR_CMDOFLD\011TARGET_CMDOFLD",
6439	"\20\001LOOKASIDE\002TLSKEYS\003IPSEC_INLINE"	/* 7: Crypto */
6440	    "\004TLS_HW",
6441	"\20\001INITIATOR\002TARGET\003CTRL_OFLD"	/* 8: FCoE */
6442		    "\004PO_INITIATOR\005PO_TARGET",
6443};
6444
6445void
6446t4_sysctls(struct adapter *sc)
6447{
6448	struct sysctl_ctx_list *ctx;
6449	struct sysctl_oid *oid;
6450	struct sysctl_oid_list *children, *c0;
6451	static char *doorbells = {"\20\1UDB\2WCWR\3UDBWC\4KDB"};
6452
6453	ctx = device_get_sysctl_ctx(sc->dev);
6454
6455	/*
6456	 * dev.t4nex.X.
6457	 */
6458	oid = device_get_sysctl_tree(sc->dev);
6459	c0 = children = SYSCTL_CHILDREN(oid);
6460
6461	sc->sc_do_rxcopy = 1;
6462	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "do_rx_copy", CTLFLAG_RW,
6463	    &sc->sc_do_rxcopy, 1, "Do RX copy of small frames");
6464
6465	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nports", CTLFLAG_RD, NULL,
6466	    sc->params.nports, "# of ports");
6467
6468	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "doorbells",
6469	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, doorbells,
6470	    (uintptr_t)&sc->doorbells, sysctl_bitfield_8b, "A",
6471	    "available doorbells");
6472
6473	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD, NULL,
6474	    sc->params.vpd.cclk, "core clock frequency (in KHz)");
6475
6476	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_timers",
6477	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
6478	    sc->params.sge.timer_val, sizeof(sc->params.sge.timer_val),
6479	    sysctl_int_array, "A", "interrupt holdoff timer values (us)");
6480
6481	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pkt_counts",
6482	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
6483	    sc->params.sge.counter_val, sizeof(sc->params.sge.counter_val),
6484	    sysctl_int_array, "A", "interrupt holdoff packet counter values");
6485
6486	t4_sge_sysctls(sc, ctx, children);
6487
6488	sc->lro_timeout = 100;
6489	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lro_timeout", CTLFLAG_RW,
6490	    &sc->lro_timeout, 0, "lro inactive-flush timeout (in us)");
6491
6492	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dflags", CTLFLAG_RW,
6493	    &sc->debug_flags, 0, "flags to enable runtime debugging");
6494
6495	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "tp_version",
6496	    CTLFLAG_RD, sc->tp_version, 0, "TP microcode version");
6497
6498	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version",
6499	    CTLFLAG_RD, sc->fw_version, 0, "firmware version");
6500
6501	if (sc->flags & IS_VF)
6502		return;
6503
6504	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD,
6505	    NULL, chip_rev(sc), "chip hardware revision");
6506
6507	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "sn",
6508	    CTLFLAG_RD, sc->params.vpd.sn, 0, "serial number");
6509
6510	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "pn",
6511	    CTLFLAG_RD, sc->params.vpd.pn, 0, "part number");
6512
6513	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "ec",
6514	    CTLFLAG_RD, sc->params.vpd.ec, 0, "engineering change");
6515
6516	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "md_version",
6517	    CTLFLAG_RD, sc->params.vpd.md, 0, "manufacturing diags version");
6518
6519	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "na",
6520	    CTLFLAG_RD, sc->params.vpd.na, 0, "network address");
6521
6522	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "er_version", CTLFLAG_RD,
6523	    sc->er_version, 0, "expansion ROM version");
6524
6525	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "bs_version", CTLFLAG_RD,
6526	    sc->bs_version, 0, "bootstrap firmware version");
6527
6528	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "scfg_version", CTLFLAG_RD,
6529	    NULL, sc->params.scfg_vers, "serial config version");
6530
6531	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "vpd_version", CTLFLAG_RD,
6532	    NULL, sc->params.vpd_vers, "VPD version");
6533
6534	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "cf",
6535	    CTLFLAG_RD, sc->cfg_file, 0, "configuration file");
6536
6537	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cfcsum", CTLFLAG_RD, NULL,
6538	    sc->cfcsum, "config file checksum");
6539
6540#define SYSCTL_CAP(name, n, text) \
6541	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, #name, \
6542	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, caps_decoder[n], \
6543	    (uintptr_t)&sc->name, sysctl_bitfield_16b, "A", \
6544	    "available " text " capabilities")
6545
6546	SYSCTL_CAP(nbmcaps, 0, "NBM");
6547	SYSCTL_CAP(linkcaps, 1, "link");
6548	SYSCTL_CAP(switchcaps, 2, "switch");
6549	SYSCTL_CAP(niccaps, 3, "NIC");
6550	SYSCTL_CAP(toecaps, 4, "TCP offload");
6551	SYSCTL_CAP(rdmacaps, 5, "RDMA");
6552	SYSCTL_CAP(iscsicaps, 6, "iSCSI");
6553	SYSCTL_CAP(cryptocaps, 7, "crypto");
6554	SYSCTL_CAP(fcoecaps, 8, "FCoE");
6555#undef SYSCTL_CAP
6556
6557	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nfilters", CTLFLAG_RD,
6558	    NULL, sc->tids.nftids, "number of filters");
6559
6560	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature",
6561	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6562	    sysctl_temperature, "I", "chip temperature (in Celsius)");
6563	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "reset_sensor",
6564	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
6565	    sysctl_reset_sensor, "I", "reset the chip's temperature sensor.");
6566
6567	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "loadavg",
6568	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6569	    sysctl_loadavg, "A",
6570	    "microprocessor load averages (debug firmwares only)");
6571
6572	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "core_vdd",
6573	    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, sysctl_vdd,
6574	    "I", "core Vdd (in mV)");
6575
6576	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "local_cpus",
6577	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, LOCAL_CPUS,
6578	    sysctl_cpus, "A", "local CPUs");
6579
6580	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_cpus",
6581	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, INTR_CPUS,
6582	    sysctl_cpus, "A", "preferred CPUs for interrupts");
6583
6584	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "swintr", CTLFLAG_RW,
6585	    &sc->swintr, 0, "software triggered interrupts");
6586
6587	/*
6588	 * dev.t4nex.X.misc.  Marked CTLFLAG_SKIP to avoid information overload.
6589	 */
6590	oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "misc",
6591	    CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE, NULL,
6592	    "logs and miscellaneous information");
6593	children = SYSCTL_CHILDREN(oid);
6594
6595	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cctrl",
6596	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6597	    sysctl_cctrl, "A", "congestion control");
6598
6599	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp0",
6600	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6601	    sysctl_cim_ibq_obq, "A", "CIM IBQ 0 (TP0)");
6602
6603	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp1",
6604	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 1,
6605	    sysctl_cim_ibq_obq, "A", "CIM IBQ 1 (TP1)");
6606
6607	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ulp",
6608	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 2,
6609	    sysctl_cim_ibq_obq, "A", "CIM IBQ 2 (ULP)");
6610
6611	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge0",
6612	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 3,
6613	    sysctl_cim_ibq_obq, "A", "CIM IBQ 3 (SGE0)");
6614
6615	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge1",
6616	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 4,
6617	    sysctl_cim_ibq_obq, "A", "CIM IBQ 4 (SGE1)");
6618
6619	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ncsi",
6620	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 5,
6621	    sysctl_cim_ibq_obq, "A", "CIM IBQ 5 (NCSI)");
6622
6623	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_la",
6624	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6625	    sysctl_cim_la, "A", "CIM logic analyzer");
6626
6627	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ma_la",
6628	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6629	    sysctl_cim_ma_la, "A", "CIM MA logic analyzer");
6630
6631	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp0",
6632	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6633	    0 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 0 (ULP0)");
6634
6635	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp1",
6636	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6637	    1 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 1 (ULP1)");
6638
6639	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp2",
6640	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6641	    2 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 2 (ULP2)");
6642
6643	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp3",
6644	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6645	    3 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 3 (ULP3)");
6646
6647	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge",
6648	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6649	    4 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 4 (SGE)");
6650
6651	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ncsi",
6652	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6653	    5 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 5 (NCSI)");
6654
6655	if (chip_id(sc) > CHELSIO_T4) {
6656		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge0_rx",
6657		    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6658		    6 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A",
6659		    "CIM OBQ 6 (SGE0-RX)");
6660
6661		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge1_rx",
6662		    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6663		    7 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A",
6664		    "CIM OBQ 7 (SGE1-RX)");
6665	}
6666
6667	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_pif_la",
6668	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6669	    sysctl_cim_pif_la, "A", "CIM PIF logic analyzer");
6670
6671	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_qcfg",
6672	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6673	    sysctl_cim_qcfg, "A", "CIM queue configuration");
6674
6675	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cpl_stats",
6676	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6677	    sysctl_cpl_stats, "A", "CPL statistics");
6678
6679	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ddp_stats",
6680	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6681	    sysctl_ddp_stats, "A", "non-TCP DDP statistics");
6682
6683	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tid_stats",
6684	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6685	    sysctl_tid_stats, "A", "tid stats");
6686
6687	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "devlog",
6688	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6689	    sysctl_devlog, "A", "firmware's device log");
6690
6691	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoe_stats",
6692	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6693	    sysctl_fcoe_stats, "A", "FCoE statistics");
6694
6695	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "hw_sched",
6696	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6697	    sysctl_hw_sched, "A", "hardware scheduler ");
6698
6699	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "l2t",
6700	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6701	    sysctl_l2t, "A", "hardware L2 table");
6702
6703	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "smt",
6704	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6705	    sysctl_smt, "A", "hardware source MAC table");
6706
6707#ifdef INET6
6708	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "clip",
6709	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6710	    sysctl_clip, "A", "active CLIP table entries");
6711#endif
6712
6713	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "lb_stats",
6714	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6715	    sysctl_lb_stats, "A", "loopback statistics");
6716
6717	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "meminfo",
6718	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6719	    sysctl_meminfo, "A", "memory regions");
6720
6721	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "mps_tcam",
6722	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6723	    chip_id(sc) <= CHELSIO_T5 ? sysctl_mps_tcam : sysctl_mps_tcam_t6,
6724	    "A", "MPS TCAM entries");
6725
6726	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "path_mtus",
6727	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6728	    sysctl_path_mtus, "A", "path MTUs");
6729
6730	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pm_stats",
6731	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6732	    sysctl_pm_stats, "A", "PM statistics");
6733
6734	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_stats",
6735	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6736	    sysctl_rdma_stats, "A", "RDMA statistics");
6737
6738	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tcp_stats",
6739	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6740	    sysctl_tcp_stats, "A", "TCP statistics");
6741
6742	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tids",
6743	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6744	    sysctl_tids, "A", "TID information");
6745
6746	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_err_stats",
6747	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6748	    sysctl_tp_err_stats, "A", "TP error statistics");
6749
6750	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tnl_stats",
6751	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6752	    sysctl_tnl_stats, "A", "TP tunnel statistics");
6753
6754	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la_mask",
6755	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
6756	    sysctl_tp_la_mask, "I", "TP logic analyzer event capture mask");
6757
6758	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la",
6759	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6760	    sysctl_tp_la, "A", "TP logic analyzer");
6761
6762	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_rate",
6763	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6764	    sysctl_tx_rate, "A", "Tx rate");
6765
6766	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ulprx_la",
6767	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6768	    sysctl_ulprx_la, "A", "ULPRX logic analyzer");
6769
6770	if (chip_id(sc) >= CHELSIO_T5) {
6771		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "wcwr_stats",
6772		    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6773		    sysctl_wcwr_stats, "A", "write combined work requests");
6774	}
6775
6776#ifdef KERN_TLS
6777	if (is_ktls(sc)) {
6778		/*
6779		 * dev.t4nex.0.tls.
6780		 */
6781		oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "tls",
6782		    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "KERN_TLS parameters");
6783		children = SYSCTL_CHILDREN(oid);
6784
6785		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "inline_keys",
6786		    CTLFLAG_RW, &sc->tlst.inline_keys, 0, "Always pass TLS "
6787		    "keys in work requests (1) or attempt to store TLS keys "
6788		    "in card memory.");
6789		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "combo_wrs",
6790		    CTLFLAG_RW, &sc->tlst.combo_wrs, 0, "Attempt to combine "
6791		    "TCB field updates with TLS record work requests.");
6792	}
6793#endif
6794
6795#ifdef TCP_OFFLOAD
6796	if (is_offload(sc)) {
6797		int i;
6798		char s[4];
6799
6800		/*
6801		 * dev.t4nex.X.toe.
6802		 */
6803		oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "toe",
6804		    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TOE parameters");
6805		children = SYSCTL_CHILDREN(oid);
6806
6807		sc->tt.cong_algorithm = -1;
6808		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_algorithm",
6809		    CTLFLAG_RW, &sc->tt.cong_algorithm, 0, "congestion control "
6810		    "(-1 = default, 0 = reno, 1 = tahoe, 2 = newreno, "
6811		    "3 = highspeed)");
6812
6813		sc->tt.sndbuf = -1;
6814		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sndbuf", CTLFLAG_RW,
6815		    &sc->tt.sndbuf, 0, "hardware send buffer");
6816
6817		sc->tt.ddp = 0;
6818		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp",
6819		    CTLFLAG_RW | CTLFLAG_SKIP, &sc->tt.ddp, 0, "");
6820		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_zcopy", CTLFLAG_RW,
6821		    &sc->tt.ddp, 0, "Enable zero-copy aio_read(2)");
6822
6823		sc->tt.rx_coalesce = -1;
6824		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_coalesce",
6825		    CTLFLAG_RW, &sc->tt.rx_coalesce, 0, "receive coalescing");
6826
6827		sc->tt.tls = 0;
6828		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tls", CTLTYPE_INT |
6829		    CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, sysctl_tls, "I",
6830		    "Inline TLS allowed");
6831
6832		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tls_rx_ports",
6833		    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
6834		    sysctl_tls_rx_ports, "I",
6835		    "TCP ports that use inline TLS+TOE RX");
6836
6837		sc->tt.tls_rx_timeout = t4_toe_tls_rx_timeout;
6838		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tls_rx_timeout",
6839		    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
6840		    sysctl_tls_rx_timeout, "I",
6841		    "Timeout in seconds to downgrade TLS sockets to plain TOE");
6842
6843		sc->tt.tx_align = -1;
6844		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_align",
6845		    CTLFLAG_RW, &sc->tt.tx_align, 0, "chop and align payload");
6846
6847		sc->tt.tx_zcopy = 0;
6848		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_zcopy",
6849		    CTLFLAG_RW, &sc->tt.tx_zcopy, 0,
6850		    "Enable zero-copy aio_write(2)");
6851
6852		sc->tt.cop_managed_offloading = !!t4_cop_managed_offloading;
6853		SYSCTL_ADD_INT(ctx, children, OID_AUTO,
6854		    "cop_managed_offloading", CTLFLAG_RW,
6855		    &sc->tt.cop_managed_offloading, 0,
6856		    "COP (Connection Offload Policy) controls all TOE offload");
6857
6858		sc->tt.autorcvbuf_inc = 16 * 1024;
6859		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "autorcvbuf_inc",
6860		    CTLFLAG_RW, &sc->tt.autorcvbuf_inc, 0,
6861		    "autorcvbuf increment");
6862
6863		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timer_tick",
6864		    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6865		    sysctl_tp_tick, "A", "TP timer tick (us)");
6866
6867		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timestamp_tick",
6868		    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 1,
6869		    sysctl_tp_tick, "A", "TCP timestamp tick (us)");
6870
6871		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_tick",
6872		    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 2,
6873		    sysctl_tp_tick, "A", "DACK tick (us)");
6874
6875		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_timer",
6876		    CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6877		    sysctl_tp_dack_timer, "IU", "DACK timer (us)");
6878
6879		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_min",
6880		    CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6881		    A_TP_RXT_MIN, sysctl_tp_timer, "LU",
6882		    "Minimum retransmit interval (us)");
6883
6884		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_max",
6885		    CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6886		    A_TP_RXT_MAX, sysctl_tp_timer, "LU",
6887		    "Maximum retransmit interval (us)");
6888
6889		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_min",
6890		    CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6891		    A_TP_PERS_MIN, sysctl_tp_timer, "LU",
6892		    "Persist timer min (us)");
6893
6894		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_max",
6895		    CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6896		    A_TP_PERS_MAX, sysctl_tp_timer, "LU",
6897		    "Persist timer max (us)");
6898
6899		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_idle",
6900		    CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6901		    A_TP_KEEP_IDLE, sysctl_tp_timer, "LU",
6902		    "Keepalive idle timer (us)");
6903
6904		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_interval",
6905		    CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6906		    A_TP_KEEP_INTVL, sysctl_tp_timer, "LU",
6907		    "Keepalive interval timer (us)");
6908
6909		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "initial_srtt",
6910		    CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6911		    A_TP_INIT_SRTT, sysctl_tp_timer, "LU", "Initial SRTT (us)");
6912
6913		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "finwait2_timer",
6914		    CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6915		    A_TP_FINWAIT2_TIMER, sysctl_tp_timer, "LU",
6916		    "FINWAIT2 timer (us)");
6917
6918		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "syn_rexmt_count",
6919		    CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6920		    S_SYNSHIFTMAX, sysctl_tp_shift_cnt, "IU",
6921		    "Number of SYN retransmissions before abort");
6922
6923		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_count",
6924		    CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6925		    S_RXTSHIFTMAXR2, sysctl_tp_shift_cnt, "IU",
6926		    "Number of retransmissions before abort");
6927
6928		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_count",
6929		    CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6930		    S_KEEPALIVEMAXR2, sysctl_tp_shift_cnt, "IU",
6931		    "Number of keepalive probes before abort");
6932
6933		oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "rexmt_backoff",
6934		    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
6935		    "TOE retransmit backoffs");
6936		children = SYSCTL_CHILDREN(oid);
6937		for (i = 0; i < 16; i++) {
6938			snprintf(s, sizeof(s), "%u", i);
6939			SYSCTL_ADD_PROC(ctx, children, OID_AUTO, s,
6940			    CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6941			    i, sysctl_tp_backoff, "IU",
6942			    "TOE retransmit backoff");
6943		}
6944	}
6945#endif
6946}
6947
6948void
6949vi_sysctls(struct vi_info *vi)
6950{
6951	struct sysctl_ctx_list *ctx;
6952	struct sysctl_oid *oid;
6953	struct sysctl_oid_list *children;
6954
6955	ctx = device_get_sysctl_ctx(vi->dev);
6956
6957	/*
6958	 * dev.v?(cxgbe|cxl).X.
6959	 */
6960	oid = device_get_sysctl_tree(vi->dev);
6961	children = SYSCTL_CHILDREN(oid);
6962
6963	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "viid", CTLFLAG_RD, NULL,
6964	    vi->viid, "VI identifer");
6965	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nrxq", CTLFLAG_RD,
6966	    &vi->nrxq, 0, "# of rx queues");
6967	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ntxq", CTLFLAG_RD,
6968	    &vi->ntxq, 0, "# of tx queues");
6969	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_rxq", CTLFLAG_RD,
6970	    &vi->first_rxq, 0, "index of first rx queue");
6971	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_txq", CTLFLAG_RD,
6972	    &vi->first_txq, 0, "index of first tx queue");
6973	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rss_base", CTLFLAG_RD, NULL,
6974	    vi->rss_base, "start of RSS indirection table");
6975	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rss_size", CTLFLAG_RD, NULL,
6976	    vi->rss_size, "size of RSS indirection table");
6977
6978	if (IS_MAIN_VI(vi)) {
6979		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rsrv_noflowq",
6980		    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, 0,
6981		    sysctl_noflowq, "IU",
6982		    "Reserve queue 0 for non-flowid packets");
6983	}
6984
6985	if (vi->adapter->flags & IS_VF) {
6986		MPASS(vi->flags & TX_USES_VM_WR);
6987		SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_vm_wr", CTLFLAG_RD,
6988		    NULL, 1, "use VM work requests for transmit");
6989	} else {
6990		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_vm_wr",
6991		    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, 0,
6992		    sysctl_tx_vm_wr, "I", "use VM work requestes for transmit");
6993	}
6994
6995#ifdef TCP_OFFLOAD
6996	if (vi->nofldrxq != 0) {
6997		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD,
6998		    &vi->nofldrxq, 0,
6999		    "# of rx queues for offloaded TCP connections");
7000		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_rxq",
7001		    CTLFLAG_RD, &vi->first_ofld_rxq, 0,
7002		    "index of first TOE rx queue");
7003		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx_ofld",
7004		    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, 0,
7005		    sysctl_holdoff_tmr_idx_ofld, "I",
7006		    "holdoff timer index for TOE queues");
7007		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx_ofld",
7008		    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, 0,
7009		    sysctl_holdoff_pktc_idx_ofld, "I",
7010		    "holdoff packet counter index for TOE queues");
7011	}
7012#endif
7013#if defined(TCP_OFFLOAD) || defined(RATELIMIT)
7014	if (vi->nofldtxq != 0) {
7015		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldtxq", CTLFLAG_RD,
7016		    &vi->nofldtxq, 0,
7017		    "# of tx queues for TOE/ETHOFLD");
7018		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_txq",
7019		    CTLFLAG_RD, &vi->first_ofld_txq, 0,
7020		    "index of first TOE/ETHOFLD tx queue");
7021	}
7022#endif
7023#ifdef DEV_NETMAP
7024	if (vi->nnmrxq != 0) {
7025		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmrxq", CTLFLAG_RD,
7026		    &vi->nnmrxq, 0, "# of netmap rx queues");
7027		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmtxq", CTLFLAG_RD,
7028		    &vi->nnmtxq, 0, "# of netmap tx queues");
7029		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_rxq",
7030		    CTLFLAG_RD, &vi->first_nm_rxq, 0,
7031		    "index of first netmap rx queue");
7032		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_txq",
7033		    CTLFLAG_RD, &vi->first_nm_txq, 0,
7034		    "index of first netmap tx queue");
7035	}
7036#endif
7037
7038	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx",
7039	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, 0,
7040	    sysctl_holdoff_tmr_idx, "I", "holdoff timer index");
7041	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx",
7042	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, 0,
7043	    sysctl_holdoff_pktc_idx, "I", "holdoff packet counter index");
7044
7045	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_rxq",
7046	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, 0,
7047	    sysctl_qsize_rxq, "I", "rx queue size");
7048	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_txq",
7049	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, 0,
7050	    sysctl_qsize_txq, "I", "tx queue size");
7051}
7052
7053static void
7054cxgbe_sysctls(struct port_info *pi)
7055{
7056	struct sysctl_ctx_list *ctx;
7057	struct sysctl_oid *oid;
7058	struct sysctl_oid_list *children, *children2;
7059	struct adapter *sc = pi->adapter;
7060	int i;
7061	char name[16];
7062	static char *tc_flags = {"\20\1USER\2SYNC\3ASYNC\4ERR"};
7063
7064	ctx = device_get_sysctl_ctx(pi->dev);
7065
7066	/*
7067	 * dev.cxgbe.X.
7068	 */
7069	oid = device_get_sysctl_tree(pi->dev);
7070	children = SYSCTL_CHILDREN(oid);
7071
7072	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkdnrc",
7073	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, pi, 0,
7074	    sysctl_linkdnrc, "A", "reason why link is down");
7075	if (pi->port_type == FW_PORT_TYPE_BT_XAUI) {
7076		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature",
7077		    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, pi, 0,
7078		    sysctl_btphy, "I", "PHY temperature (in Celsius)");
7079		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fw_version",
7080		    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, pi, 1,
7081		    sysctl_btphy, "I", "PHY firmware version");
7082	}
7083
7084	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pause_settings",
7085	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, pi, 0,
7086	    sysctl_pause_settings, "A",
7087	    "PAUSE settings (bit 0 = rx_pause, 1 = tx_pause, 2 = pause_autoneg)");
7088	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fec",
7089	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, pi, 0,
7090	    sysctl_fec, "A",
7091	    "FECs to use (bit 0 = RS, 1 = FC, 2 = none, 5 = auto, 6 = module)");
7092	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "module_fec",
7093	    CTLTYPE_STRING | CTLFLAG_MPSAFE, pi, 0, sysctl_module_fec, "A",
7094	    "FEC recommended by the cable/transceiver");
7095	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "autoneg",
7096	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, pi, 0,
7097	    sysctl_autoneg, "I",
7098	    "autonegotiation (-1 = not supported)");
7099
7100	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "pcaps", CTLFLAG_RD,
7101	    &pi->link_cfg.pcaps, 0, "port capabilities");
7102	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "acaps", CTLFLAG_RD,
7103	    &pi->link_cfg.acaps, 0, "advertised capabilities");
7104	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lpacaps", CTLFLAG_RD,
7105	    &pi->link_cfg.lpacaps, 0, "link partner advertised capabilities");
7106
7107	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "max_speed", CTLFLAG_RD, NULL,
7108	    port_top_speed(pi), "max speed (in Gbps)");
7109	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "mps_bg_map", CTLFLAG_RD, NULL,
7110	    pi->mps_bg_map, "MPS buffer group map");
7111	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_e_chan_map", CTLFLAG_RD,
7112	    NULL, pi->rx_e_chan_map, "TP rx e-channel map");
7113	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_c_chan", CTLFLAG_RD, NULL,
7114	    pi->rx_c_chan, "TP rx c-channel");
7115
7116	if (sc->flags & IS_VF)
7117		return;
7118
7119	/*
7120	 * dev.(cxgbe|cxl).X.tc.
7121	 */
7122	oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "tc",
7123	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
7124	    "Tx scheduler traffic classes (cl_rl)");
7125	children2 = SYSCTL_CHILDREN(oid);
7126	SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "pktsize",
7127	    CTLFLAG_RW, &pi->sched_params->pktsize, 0,
7128	    "pktsize for per-flow cl-rl (0 means up to the driver )");
7129	SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "burstsize",
7130	    CTLFLAG_RW, &pi->sched_params->burstsize, 0,
7131	    "burstsize for per-flow cl-rl (0 means up to the driver)");
7132	for (i = 0; i < sc->chip_params->nsched_cls; i++) {
7133		struct tx_cl_rl_params *tc = &pi->sched_params->cl_rl[i];
7134
7135		snprintf(name, sizeof(name), "%d", i);
7136		children2 = SYSCTL_CHILDREN(SYSCTL_ADD_NODE(ctx,
7137		    SYSCTL_CHILDREN(oid), OID_AUTO, name,
7138		    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "traffic class"));
7139		SYSCTL_ADD_PROC(ctx, children2, OID_AUTO, "flags",
7140		    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, tc_flags,
7141		    (uintptr_t)&tc->flags, sysctl_bitfield_8b, "A", "flags");
7142		SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "refcount",
7143		    CTLFLAG_RD, &tc->refcount, 0, "references to this class");
7144		SYSCTL_ADD_PROC(ctx, children2, OID_AUTO, "params",
7145		    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
7146		    (pi->port_id << 16) | i, sysctl_tc_params, "A",
7147		    "traffic class parameters");
7148	}
7149
7150	/*
7151	 * dev.cxgbe.X.stats.
7152	 */
7153	oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "stats",
7154	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "port statistics");
7155	children = SYSCTL_CHILDREN(oid);
7156	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_parse_error", CTLFLAG_RD,
7157	    &pi->tx_parse_error, 0,
7158	    "# of tx packets with invalid length or # of segments");
7159
7160#define T4_REGSTAT(name, stat, desc) \
7161    SYSCTL_ADD_OID(ctx, children, OID_AUTO, #name, \
7162        CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, \
7163	(is_t4(sc) ? PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_##stat##_L) : \
7164	T5_PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_##stat##_L)), \
7165        sysctl_handle_t4_reg64, "QU", desc)
7166
7167/* We get these from port_stats and they may be stale by up to 1s */
7168#define T4_PORTSTAT(name, desc) \
7169	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, #name, CTLFLAG_RD, \
7170	    &pi->stats.name, desc)
7171
7172	T4_REGSTAT(tx_octets, TX_PORT_BYTES, "# of octets in good frames");
7173	T4_REGSTAT(tx_frames, TX_PORT_FRAMES, "total # of good frames");
7174	T4_REGSTAT(tx_bcast_frames, TX_PORT_BCAST, "# of broadcast frames");
7175	T4_REGSTAT(tx_mcast_frames, TX_PORT_MCAST, "# of multicast frames");
7176	T4_REGSTAT(tx_ucast_frames, TX_PORT_UCAST, "# of unicast frames");
7177	T4_REGSTAT(tx_error_frames, TX_PORT_ERROR, "# of error frames");
7178	T4_REGSTAT(tx_frames_64, TX_PORT_64B, "# of tx frames in this range");
7179	T4_REGSTAT(tx_frames_65_127, TX_PORT_65B_127B, "# of tx frames in this range");
7180	T4_REGSTAT(tx_frames_128_255, TX_PORT_128B_255B, "# of tx frames in this range");
7181	T4_REGSTAT(tx_frames_256_511, TX_PORT_256B_511B, "# of tx frames in this range");
7182	T4_REGSTAT(tx_frames_512_1023, TX_PORT_512B_1023B, "# of tx frames in this range");
7183	T4_REGSTAT(tx_frames_1024_1518, TX_PORT_1024B_1518B, "# of tx frames in this range");
7184	T4_REGSTAT(tx_frames_1519_max, TX_PORT_1519B_MAX, "# of tx frames in this range");
7185	T4_REGSTAT(tx_drop, TX_PORT_DROP, "# of dropped tx frames");
7186	T4_REGSTAT(tx_pause, TX_PORT_PAUSE, "# of pause frames transmitted");
7187	T4_REGSTAT(tx_ppp0, TX_PORT_PPP0, "# of PPP prio 0 frames transmitted");
7188	T4_REGSTAT(tx_ppp1, TX_PORT_PPP1, "# of PPP prio 1 frames transmitted");
7189	T4_REGSTAT(tx_ppp2, TX_PORT_PPP2, "# of PPP prio 2 frames transmitted");
7190	T4_REGSTAT(tx_ppp3, TX_PORT_PPP3, "# of PPP prio 3 frames transmitted");
7191	T4_REGSTAT(tx_ppp4, TX_PORT_PPP4, "# of PPP prio 4 frames transmitted");
7192	T4_REGSTAT(tx_ppp5, TX_PORT_PPP5, "# of PPP prio 5 frames transmitted");
7193	T4_REGSTAT(tx_ppp6, TX_PORT_PPP6, "# of PPP prio 6 frames transmitted");
7194	T4_REGSTAT(tx_ppp7, TX_PORT_PPP7, "# of PPP prio 7 frames transmitted");
7195
7196	T4_REGSTAT(rx_octets, RX_PORT_BYTES, "# of octets in good frames");
7197	T4_REGSTAT(rx_frames, RX_PORT_FRAMES, "total # of good frames");
7198	T4_REGSTAT(rx_bcast_frames, RX_PORT_BCAST, "# of broadcast frames");
7199	T4_REGSTAT(rx_mcast_frames, RX_PORT_MCAST, "# of multicast frames");
7200	T4_REGSTAT(rx_ucast_frames, RX_PORT_UCAST, "# of unicast frames");
7201	T4_REGSTAT(rx_too_long, RX_PORT_MTU_ERROR, "# of frames exceeding MTU");
7202	T4_REGSTAT(rx_jabber, RX_PORT_MTU_CRC_ERROR, "# of jabber frames");
7203	if (is_t6(sc)) {
7204		T4_PORTSTAT(rx_fcs_err,
7205		    "# of frames received with bad FCS since last link up");
7206	} else {
7207		T4_REGSTAT(rx_fcs_err, RX_PORT_CRC_ERROR,
7208		    "# of frames received with bad FCS");
7209	}
7210	T4_REGSTAT(rx_len_err, RX_PORT_LEN_ERROR, "# of frames received with length error");
7211	T4_REGSTAT(rx_symbol_err, RX_PORT_SYM_ERROR, "symbol errors");
7212	T4_REGSTAT(rx_runt, RX_PORT_LESS_64B, "# of short frames received");
7213	T4_REGSTAT(rx_frames_64, RX_PORT_64B, "# of rx frames in this range");
7214	T4_REGSTAT(rx_frames_65_127, RX_PORT_65B_127B, "# of rx frames in this range");
7215	T4_REGSTAT(rx_frames_128_255, RX_PORT_128B_255B, "# of rx frames in this range");
7216	T4_REGSTAT(rx_frames_256_511, RX_PORT_256B_511B, "# of rx frames in this range");
7217	T4_REGSTAT(rx_frames_512_1023, RX_PORT_512B_1023B, "# of rx frames in this range");
7218	T4_REGSTAT(rx_frames_1024_1518, RX_PORT_1024B_1518B, "# of rx frames in this range");
7219	T4_REGSTAT(rx_frames_1519_max, RX_PORT_1519B_MAX, "# of rx frames in this range");
7220	T4_REGSTAT(rx_pause, RX_PORT_PAUSE, "# of pause frames received");
7221	T4_REGSTAT(rx_ppp0, RX_PORT_PPP0, "# of PPP prio 0 frames received");
7222	T4_REGSTAT(rx_ppp1, RX_PORT_PPP1, "# of PPP prio 1 frames received");
7223	T4_REGSTAT(rx_ppp2, RX_PORT_PPP2, "# of PPP prio 2 frames received");
7224	T4_REGSTAT(rx_ppp3, RX_PORT_PPP3, "# of PPP prio 3 frames received");
7225	T4_REGSTAT(rx_ppp4, RX_PORT_PPP4, "# of PPP prio 4 frames received");
7226	T4_REGSTAT(rx_ppp5, RX_PORT_PPP5, "# of PPP prio 5 frames received");
7227	T4_REGSTAT(rx_ppp6, RX_PORT_PPP6, "# of PPP prio 6 frames received");
7228	T4_REGSTAT(rx_ppp7, RX_PORT_PPP7, "# of PPP prio 7 frames received");
7229
7230	T4_PORTSTAT(rx_ovflow0, "# drops due to buffer-group 0 overflows");
7231	T4_PORTSTAT(rx_ovflow1, "# drops due to buffer-group 1 overflows");
7232	T4_PORTSTAT(rx_ovflow2, "# drops due to buffer-group 2 overflows");
7233	T4_PORTSTAT(rx_ovflow3, "# drops due to buffer-group 3 overflows");
7234	T4_PORTSTAT(rx_trunc0, "# of buffer-group 0 truncated packets");
7235	T4_PORTSTAT(rx_trunc1, "# of buffer-group 1 truncated packets");
7236	T4_PORTSTAT(rx_trunc2, "# of buffer-group 2 truncated packets");
7237	T4_PORTSTAT(rx_trunc3, "# of buffer-group 3 truncated packets");
7238
7239#undef T4_REGSTAT
7240#undef T4_PORTSTAT
7241}
7242
7243static int
7244sysctl_int_array(SYSCTL_HANDLER_ARGS)
7245{
7246	int rc, *i, space = 0;
7247	struct sbuf sb;
7248
7249	sbuf_new_for_sysctl(&sb, NULL, 64, req);
7250	for (i = arg1; arg2; arg2 -= sizeof(int), i++) {
7251		if (space)
7252			sbuf_printf(&sb, " ");
7253		sbuf_printf(&sb, "%d", *i);
7254		space = 1;
7255	}
7256	rc = sbuf_finish(&sb);
7257	sbuf_delete(&sb);
7258	return (rc);
7259}
7260
7261static int
7262sysctl_bitfield_8b(SYSCTL_HANDLER_ARGS)
7263{
7264	int rc;
7265	struct sbuf *sb;
7266
7267	rc = sysctl_wire_old_buffer(req, 0);
7268	if (rc != 0)
7269		return(rc);
7270
7271	sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
7272	if (sb == NULL)
7273		return (ENOMEM);
7274
7275	sbuf_printf(sb, "%b", *(uint8_t *)(uintptr_t)arg2, (char *)arg1);
7276	rc = sbuf_finish(sb);
7277	sbuf_delete(sb);
7278
7279	return (rc);
7280}
7281
7282static int
7283sysctl_bitfield_16b(SYSCTL_HANDLER_ARGS)
7284{
7285	int rc;
7286	struct sbuf *sb;
7287
7288	rc = sysctl_wire_old_buffer(req, 0);
7289	if (rc != 0)
7290		return(rc);
7291
7292	sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
7293	if (sb == NULL)
7294		return (ENOMEM);
7295
7296	sbuf_printf(sb, "%b", *(uint16_t *)(uintptr_t)arg2, (char *)arg1);
7297	rc = sbuf_finish(sb);
7298	sbuf_delete(sb);
7299
7300	return (rc);
7301}
7302
7303static int
7304sysctl_btphy(SYSCTL_HANDLER_ARGS)
7305{
7306	struct port_info *pi = arg1;
7307	int op = arg2;
7308	struct adapter *sc = pi->adapter;
7309	u_int v;
7310	int rc;
7311
7312	rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4btt");
7313	if (rc)
7314		return (rc);
7315	/* XXX: magic numbers */
7316	rc = -t4_mdio_rd(sc, sc->mbox, pi->mdio_addr, 0x1e, op ? 0x20 : 0xc820,
7317	    &v);
7318	end_synchronized_op(sc, 0);
7319	if (rc)
7320		return (rc);
7321	if (op == 0)
7322		v /= 256;
7323
7324	rc = sysctl_handle_int(oidp, &v, 0, req);
7325	return (rc);
7326}
7327
7328static int
7329sysctl_noflowq(SYSCTL_HANDLER_ARGS)
7330{
7331	struct vi_info *vi = arg1;
7332	int rc, val;
7333
7334	val = vi->rsrv_noflowq;
7335	rc = sysctl_handle_int(oidp, &val, 0, req);
7336	if (rc != 0 || req->newptr == NULL)
7337		return (rc);
7338
7339	if ((val >= 1) && (vi->ntxq > 1))
7340		vi->rsrv_noflowq = 1;
7341	else
7342		vi->rsrv_noflowq = 0;
7343
7344	return (rc);
7345}
7346
7347static int
7348sysctl_tx_vm_wr(SYSCTL_HANDLER_ARGS)
7349{
7350	struct vi_info *vi = arg1;
7351	struct adapter *sc = vi->adapter;
7352	int rc, val, i;
7353
7354	MPASS(!(sc->flags & IS_VF));
7355
7356	val = vi->flags & TX_USES_VM_WR ? 1 : 0;
7357	rc = sysctl_handle_int(oidp, &val, 0, req);
7358	if (rc != 0 || req->newptr == NULL)
7359		return (rc);
7360
7361	if (val != 0 && val != 1)
7362		return (EINVAL);
7363
7364	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
7365	    "t4txvm");
7366	if (rc)
7367		return (rc);
7368	if (vi->ifp->if_drv_flags & IFF_DRV_RUNNING) {
7369		/*
7370		 * We don't want parse_pkt to run with one setting (VF or PF)
7371		 * and then eth_tx to see a different setting but still use
7372		 * stale information calculated by parse_pkt.
7373		 */
7374		rc = EBUSY;
7375	} else {
7376		struct port_info *pi = vi->pi;
7377		struct sge_txq *txq;
7378		uint32_t ctrl0;
7379		uint8_t npkt = sc->params.max_pkts_per_eth_tx_pkts_wr;
7380
7381		if (val) {
7382			vi->flags |= TX_USES_VM_WR;
7383			vi->ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_VM_TSO;
7384			ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) |
7385			    V_TXPKT_INTF(pi->tx_chan));
7386			if (!(sc->flags & IS_VF))
7387				npkt--;
7388		} else {
7389			vi->flags &= ~TX_USES_VM_WR;
7390			vi->ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_TSO;
7391			ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) |
7392			    V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf) |
7393			    V_TXPKT_VF(vi->vin) | V_TXPKT_VF_VLD(vi->vfvld));
7394		}
7395		for_each_txq(vi, i, txq) {
7396			txq->cpl_ctrl0 = ctrl0;
7397			txq->txp.max_npkt = npkt;
7398		}
7399	}
7400	end_synchronized_op(sc, LOCK_HELD);
7401	return (rc);
7402}
7403
7404static int
7405sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS)
7406{
7407	struct vi_info *vi = arg1;
7408	struct adapter *sc = vi->adapter;
7409	int idx, rc, i;
7410	struct sge_rxq *rxq;
7411	uint8_t v;
7412
7413	idx = vi->tmr_idx;
7414
7415	rc = sysctl_handle_int(oidp, &idx, 0, req);
7416	if (rc != 0 || req->newptr == NULL)
7417		return (rc);
7418
7419	if (idx < 0 || idx >= SGE_NTIMERS)
7420		return (EINVAL);
7421
7422	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
7423	    "t4tmr");
7424	if (rc)
7425		return (rc);
7426
7427	v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->pktc_idx != -1);
7428	for_each_rxq(vi, i, rxq) {
7429#ifdef atomic_store_rel_8
7430		atomic_store_rel_8(&rxq->iq.intr_params, v);
7431#else
7432		rxq->iq.intr_params = v;
7433#endif
7434	}
7435	vi->tmr_idx = idx;
7436
7437	end_synchronized_op(sc, LOCK_HELD);
7438	return (0);
7439}
7440
7441static int
7442sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS)
7443{
7444	struct vi_info *vi = arg1;
7445	struct adapter *sc = vi->adapter;
7446	int idx, rc;
7447
7448	idx = vi->pktc_idx;
7449
7450	rc = sysctl_handle_int(oidp, &idx, 0, req);
7451	if (rc != 0 || req->newptr == NULL)
7452		return (rc);
7453
7454	if (idx < -1 || idx >= SGE_NCOUNTERS)
7455		return (EINVAL);
7456
7457	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
7458	    "t4pktc");
7459	if (rc)
7460		return (rc);
7461
7462	if (vi->flags & VI_INIT_DONE)
7463		rc = EBUSY; /* cannot be changed once the queues are created */
7464	else
7465		vi->pktc_idx = idx;
7466
7467	end_synchronized_op(sc, LOCK_HELD);
7468	return (rc);
7469}
7470
7471static int
7472sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS)
7473{
7474	struct vi_info *vi = arg1;
7475	struct adapter *sc = vi->adapter;
7476	int qsize, rc;
7477
7478	qsize = vi->qsize_rxq;
7479
7480	rc = sysctl_handle_int(oidp, &qsize, 0, req);
7481	if (rc != 0 || req->newptr == NULL)
7482		return (rc);
7483
7484	if (qsize < 128 || (qsize & 7))
7485		return (EINVAL);
7486
7487	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
7488	    "t4rxqs");
7489	if (rc)
7490		return (rc);
7491
7492	if (vi->flags & VI_INIT_DONE)
7493		rc = EBUSY; /* cannot be changed once the queues are created */
7494	else
7495		vi->qsize_rxq = qsize;
7496
7497	end_synchronized_op(sc, LOCK_HELD);
7498	return (rc);
7499}
7500
7501static int
7502sysctl_qsize_txq(SYSCTL_HANDLER_ARGS)
7503{
7504	struct vi_info *vi = arg1;
7505	struct adapter *sc = vi->adapter;
7506	int qsize, rc;
7507
7508	qsize = vi->qsize_txq;
7509
7510	rc = sysctl_handle_int(oidp, &qsize, 0, req);
7511	if (rc != 0 || req->newptr == NULL)
7512		return (rc);
7513
7514	if (qsize < 128 || qsize > 65536)
7515		return (EINVAL);
7516
7517	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
7518	    "t4txqs");
7519	if (rc)
7520		return (rc);
7521
7522	if (vi->flags & VI_INIT_DONE)
7523		rc = EBUSY; /* cannot be changed once the queues are created */
7524	else
7525		vi->qsize_txq = qsize;
7526
7527	end_synchronized_op(sc, LOCK_HELD);
7528	return (rc);
7529}
7530
7531static int
7532sysctl_pause_settings(SYSCTL_HANDLER_ARGS)
7533{
7534	struct port_info *pi = arg1;
7535	struct adapter *sc = pi->adapter;
7536	struct link_config *lc = &pi->link_cfg;
7537	int rc;
7538
7539	if (req->newptr == NULL) {
7540		struct sbuf *sb;
7541		static char *bits = "\20\1RX\2TX\3AUTO";
7542
7543		rc = sysctl_wire_old_buffer(req, 0);
7544		if (rc != 0)
7545			return(rc);
7546
7547		sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
7548		if (sb == NULL)
7549			return (ENOMEM);
7550
7551		if (lc->link_ok) {
7552			sbuf_printf(sb, "%b", (lc->fc & (PAUSE_TX | PAUSE_RX)) |
7553			    (lc->requested_fc & PAUSE_AUTONEG), bits);
7554		} else {
7555			sbuf_printf(sb, "%b", lc->requested_fc & (PAUSE_TX |
7556			    PAUSE_RX | PAUSE_AUTONEG), bits);
7557		}
7558		rc = sbuf_finish(sb);
7559		sbuf_delete(sb);
7560	} else {
7561		char s[2];
7562		int n;
7563
7564		s[0] = '0' + (lc->requested_fc & (PAUSE_TX | PAUSE_RX |
7565		    PAUSE_AUTONEG));
7566		s[1] = 0;
7567
7568		rc = sysctl_handle_string(oidp, s, sizeof(s), req);
7569		if (rc != 0)
7570			return(rc);
7571
7572		if (s[1] != 0)
7573			return (EINVAL);
7574		if (s[0] < '0' || s[0] > '9')
7575			return (EINVAL);	/* not a number */
7576		n = s[0] - '0';
7577		if (n & ~(PAUSE_TX | PAUSE_RX | PAUSE_AUTONEG))
7578			return (EINVAL);	/* some other bit is set too */
7579
7580		rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
7581		    "t4PAUSE");
7582		if (rc)
7583			return (rc);
7584		PORT_LOCK(pi);
7585		lc->requested_fc = n;
7586		fixup_link_config(pi);
7587		if (pi->up_vis > 0)
7588			rc = apply_link_config(pi);
7589		set_current_media(pi);
7590		PORT_UNLOCK(pi);
7591		end_synchronized_op(sc, 0);
7592	}
7593
7594	return (rc);
7595}
7596
7597static int
7598sysctl_fec(SYSCTL_HANDLER_ARGS)
7599{
7600	struct port_info *pi = arg1;
7601	struct adapter *sc = pi->adapter;
7602	struct link_config *lc = &pi->link_cfg;
7603	int rc;
7604	int8_t old;
7605
7606	if (req->newptr == NULL) {
7607		struct sbuf *sb;
7608		static char *bits = "\20\1RS-FEC\2FC-FEC\3NO-FEC\4RSVD2"
7609		    "\5RSVD3\6auto\7module";
7610
7611		rc = sysctl_wire_old_buffer(req, 0);
7612		if (rc != 0)
7613			return(rc);
7614
7615		sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
7616		if (sb == NULL)
7617			return (ENOMEM);
7618
7619		/*
7620		 * Display the requested_fec when the link is down -- the actual
7621		 * FEC makes sense only when the link is up.
7622		 */
7623		if (lc->link_ok) {
7624			sbuf_printf(sb, "%b", (lc->fec & M_FW_PORT_CAP32_FEC) |
7625			    (lc->requested_fec & (FEC_AUTO | FEC_MODULE)),
7626			    bits);
7627		} else {
7628			sbuf_printf(sb, "%b", lc->requested_fec, bits);
7629		}
7630		rc = sbuf_finish(sb);
7631		sbuf_delete(sb);
7632	} else {
7633		char s[8];
7634		int n;
7635
7636		snprintf(s, sizeof(s), "%d",
7637		    lc->requested_fec == FEC_AUTO ? -1 :
7638		    lc->requested_fec & (M_FW_PORT_CAP32_FEC | FEC_MODULE));
7639
7640		rc = sysctl_handle_string(oidp, s, sizeof(s), req);
7641		if (rc != 0)
7642			return(rc);
7643
7644		n = strtol(&s[0], NULL, 0);
7645		if (n < 0 || n & FEC_AUTO)
7646			n = FEC_AUTO;
7647		else if (n & ~(M_FW_PORT_CAP32_FEC | FEC_MODULE))
7648			return (EINVAL);/* some other bit is set too */
7649
7650		rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
7651		    "t4fec");
7652		if (rc)
7653			return (rc);
7654		PORT_LOCK(pi);
7655		old = lc->requested_fec;
7656		if (n == FEC_AUTO)
7657			lc->requested_fec = FEC_AUTO;
7658		else if (n == 0 || n == FEC_NONE)
7659			lc->requested_fec = FEC_NONE;
7660		else {
7661			if ((lc->pcaps |
7662			    V_FW_PORT_CAP32_FEC(n & M_FW_PORT_CAP32_FEC)) !=
7663			    lc->pcaps) {
7664				rc = ENOTSUP;
7665				goto done;
7666			}
7667			lc->requested_fec = n & (M_FW_PORT_CAP32_FEC |
7668			    FEC_MODULE);
7669		}
7670		fixup_link_config(pi);
7671		if (pi->up_vis > 0) {
7672			rc = apply_link_config(pi);
7673			if (rc != 0) {
7674				lc->requested_fec = old;
7675				if (rc == FW_EPROTO)
7676					rc = ENOTSUP;
7677			}
7678		}
7679done:
7680		PORT_UNLOCK(pi);
7681		end_synchronized_op(sc, 0);
7682	}
7683
7684	return (rc);
7685}
7686
7687static int
7688sysctl_module_fec(SYSCTL_HANDLER_ARGS)
7689{
7690	struct port_info *pi = arg1;
7691	struct adapter *sc = pi->adapter;
7692	struct link_config *lc = &pi->link_cfg;
7693	int rc;
7694	int8_t fec;
7695	struct sbuf *sb;
7696	static char *bits = "\20\1RS-FEC\2FC-FEC\3NO-FEC\4RSVD2\5RSVD3";
7697
7698	rc = sysctl_wire_old_buffer(req, 0);
7699	if (rc != 0)
7700		return (rc);
7701
7702	sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
7703	if (sb == NULL)
7704		return (ENOMEM);
7705
7706	if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4mfec") != 0)
7707		return (EBUSY);
7708	PORT_LOCK(pi);
7709	if (pi->up_vis == 0) {
7710		/*
7711		 * If all the interfaces are administratively down the firmware
7712		 * does not report transceiver changes.  Refresh port info here.
7713		 * This is the only reason we have a synchronized op in this
7714		 * function.  Just PORT_LOCK would have been enough otherwise.
7715		 */
7716		t4_update_port_info(pi);
7717	}
7718
7719	fec = lc->fec_hint;
7720	if (pi->mod_type == FW_PORT_MOD_TYPE_NONE ||
7721	    !fec_supported(lc->pcaps)) {
7722		sbuf_printf(sb, "n/a");
7723	} else {
7724		if (fec == 0)
7725			fec = FEC_NONE;
7726		sbuf_printf(sb, "%b", fec & M_FW_PORT_CAP32_FEC, bits);
7727	}
7728	rc = sbuf_finish(sb);
7729	sbuf_delete(sb);
7730
7731	PORT_UNLOCK(pi);
7732	end_synchronized_op(sc, 0);
7733
7734	return (rc);
7735}
7736
7737static int
7738sysctl_autoneg(SYSCTL_HANDLER_ARGS)
7739{
7740	struct port_info *pi = arg1;
7741	struct adapter *sc = pi->adapter;
7742	struct link_config *lc = &pi->link_cfg;
7743	int rc, val;
7744
7745	if (lc->pcaps & FW_PORT_CAP32_ANEG)
7746		val = lc->requested_aneg == AUTONEG_DISABLE ? 0 : 1;
7747	else
7748		val = -1;
7749	rc = sysctl_handle_int(oidp, &val, 0, req);
7750	if (rc != 0 || req->newptr == NULL)
7751		return (rc);
7752	if (val == 0)
7753		val = AUTONEG_DISABLE;
7754	else if (val == 1)
7755		val = AUTONEG_ENABLE;
7756	else
7757		val = AUTONEG_AUTO;
7758
7759	rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
7760	    "t4aneg");
7761	if (rc)
7762		return (rc);
7763	PORT_LOCK(pi);
7764	if (val == AUTONEG_ENABLE && !(lc->pcaps & FW_PORT_CAP32_ANEG)) {
7765		rc = ENOTSUP;
7766		goto done;
7767	}
7768	lc->requested_aneg = val;
7769	fixup_link_config(pi);
7770	if (pi->up_vis > 0)
7771		rc = apply_link_config(pi);
7772	set_current_media(pi);
7773done:
7774	PORT_UNLOCK(pi);
7775	end_synchronized_op(sc, 0);
7776	return (rc);
7777}
7778
7779static int
7780sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS)
7781{
7782	struct adapter *sc = arg1;
7783	int reg = arg2;
7784	uint64_t val;
7785
7786	val = t4_read_reg64(sc, reg);
7787
7788	return (sysctl_handle_64(oidp, &val, 0, req));
7789}
7790
7791static int
7792sysctl_temperature(SYSCTL_HANDLER_ARGS)
7793{
7794	struct adapter *sc = arg1;
7795	int rc, t;
7796	uint32_t param, val;
7797
7798	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4temp");
7799	if (rc)
7800		return (rc);
7801	param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
7802	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
7803	    V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_TMP);
7804	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
7805	end_synchronized_op(sc, 0);
7806	if (rc)
7807		return (rc);
7808
7809	/* unknown is returned as 0 but we display -1 in that case */
7810	t = val == 0 ? -1 : val;
7811
7812	rc = sysctl_handle_int(oidp, &t, 0, req);
7813	return (rc);
7814}
7815
7816static int
7817sysctl_vdd(SYSCTL_HANDLER_ARGS)
7818{
7819	struct adapter *sc = arg1;
7820	int rc;
7821	uint32_t param, val;
7822
7823	if (sc->params.core_vdd == 0) {
7824		rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
7825		    "t4vdd");
7826		if (rc)
7827			return (rc);
7828		param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
7829		    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
7830		    V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_VDD);
7831		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
7832		end_synchronized_op(sc, 0);
7833		if (rc)
7834			return (rc);
7835		sc->params.core_vdd = val;
7836	}
7837
7838	return (sysctl_handle_int(oidp, &sc->params.core_vdd, 0, req));
7839}
7840
7841static int
7842sysctl_reset_sensor(SYSCTL_HANDLER_ARGS)
7843{
7844	struct adapter *sc = arg1;
7845	int rc, v;
7846	uint32_t param, val;
7847
7848	v = sc->sensor_resets;
7849	rc = sysctl_handle_int(oidp, &v, 0, req);
7850	if (rc != 0 || req->newptr == NULL || v <= 0)
7851		return (rc);
7852
7853	if (sc->params.fw_vers < FW_VERSION32(1, 24, 7, 0) ||
7854	    chip_id(sc) < CHELSIO_T5)
7855		return (ENOTSUP);
7856
7857	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4srst");
7858	if (rc)
7859		return (rc);
7860	param = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
7861	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
7862	    V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_RESET_TMP_SENSOR));
7863	val = 1;
7864	rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
7865	end_synchronized_op(sc, 0);
7866	if (rc == 0)
7867		sc->sensor_resets++;
7868	return (rc);
7869}
7870
7871static int
7872sysctl_loadavg(SYSCTL_HANDLER_ARGS)
7873{
7874	struct adapter *sc = arg1;
7875	struct sbuf *sb;
7876	int rc;
7877	uint32_t param, val;
7878
7879	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4lavg");
7880	if (rc)
7881		return (rc);
7882	param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
7883	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_LOAD);
7884	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
7885	end_synchronized_op(sc, 0);
7886	if (rc)
7887		return (rc);
7888
7889	rc = sysctl_wire_old_buffer(req, 0);
7890	if (rc != 0)
7891		return (rc);
7892
7893	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7894	if (sb == NULL)
7895		return (ENOMEM);
7896
7897	if (val == 0xffffffff) {
7898		/* Only debug and custom firmwares report load averages. */
7899		sbuf_printf(sb, "not available");
7900	} else {
7901		sbuf_printf(sb, "%d %d %d", val & 0xff, (val >> 8) & 0xff,
7902		    (val >> 16) & 0xff);
7903	}
7904	rc = sbuf_finish(sb);
7905	sbuf_delete(sb);
7906
7907	return (rc);
7908}
7909
7910static int
7911sysctl_cctrl(SYSCTL_HANDLER_ARGS)
7912{
7913	struct adapter *sc = arg1;
7914	struct sbuf *sb;
7915	int rc, i;
7916	uint16_t incr[NMTUS][NCCTRL_WIN];
7917	static const char *dec_fac[] = {
7918		"0.5", "0.5625", "0.625", "0.6875", "0.75", "0.8125", "0.875",
7919		"0.9375"
7920	};
7921
7922	rc = sysctl_wire_old_buffer(req, 0);
7923	if (rc != 0)
7924		return (rc);
7925
7926	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7927	if (sb == NULL)
7928		return (ENOMEM);
7929
7930	t4_read_cong_tbl(sc, incr);
7931
7932	for (i = 0; i < NCCTRL_WIN; ++i) {
7933		sbuf_printf(sb, "%2d: %4u %4u %4u %4u %4u %4u %4u %4u\n", i,
7934		    incr[0][i], incr[1][i], incr[2][i], incr[3][i], incr[4][i],
7935		    incr[5][i], incr[6][i], incr[7][i]);
7936		sbuf_printf(sb, "%8u %4u %4u %4u %4u %4u %4u %4u %5u %s\n",
7937		    incr[8][i], incr[9][i], incr[10][i], incr[11][i],
7938		    incr[12][i], incr[13][i], incr[14][i], incr[15][i],
7939		    sc->params.a_wnd[i], dec_fac[sc->params.b_wnd[i]]);
7940	}
7941
7942	rc = sbuf_finish(sb);
7943	sbuf_delete(sb);
7944
7945	return (rc);
7946}
7947
7948static const char *qname[CIM_NUM_IBQ + CIM_NUM_OBQ_T5] = {
7949	"TP0", "TP1", "ULP", "SGE0", "SGE1", "NC-SI",	/* ibq's */
7950	"ULP0", "ULP1", "ULP2", "ULP3", "SGE", "NC-SI",	/* obq's */
7951	"SGE0-RX", "SGE1-RX"	/* additional obq's (T5 onwards) */
7952};
7953
7954static int
7955sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS)
7956{
7957	struct adapter *sc = arg1;
7958	struct sbuf *sb;
7959	int rc, i, n, qid = arg2;
7960	uint32_t *buf, *p;
7961	char *qtype;
7962	u_int cim_num_obq = sc->chip_params->cim_num_obq;
7963
7964	KASSERT(qid >= 0 && qid < CIM_NUM_IBQ + cim_num_obq,
7965	    ("%s: bad qid %d\n", __func__, qid));
7966
7967	if (qid < CIM_NUM_IBQ) {
7968		/* inbound queue */
7969		qtype = "IBQ";
7970		n = 4 * CIM_IBQ_SIZE;
7971		buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
7972		rc = t4_read_cim_ibq(sc, qid, buf, n);
7973	} else {
7974		/* outbound queue */
7975		qtype = "OBQ";
7976		qid -= CIM_NUM_IBQ;
7977		n = 4 * cim_num_obq * CIM_OBQ_SIZE;
7978		buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
7979		rc = t4_read_cim_obq(sc, qid, buf, n);
7980	}
7981
7982	if (rc < 0) {
7983		rc = -rc;
7984		goto done;
7985	}
7986	n = rc * sizeof(uint32_t);	/* rc has # of words actually read */
7987
7988	rc = sysctl_wire_old_buffer(req, 0);
7989	if (rc != 0)
7990		goto done;
7991
7992	sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req);
7993	if (sb == NULL) {
7994		rc = ENOMEM;
7995		goto done;
7996	}
7997
7998	sbuf_printf(sb, "%s%d %s", qtype , qid, qname[arg2]);
7999	for (i = 0, p = buf; i < n; i += 16, p += 4)
8000		sbuf_printf(sb, "\n%#06x: %08x %08x %08x %08x", i, p[0], p[1],
8001		    p[2], p[3]);
8002
8003	rc = sbuf_finish(sb);
8004	sbuf_delete(sb);
8005done:
8006	free(buf, M_CXGBE);
8007	return (rc);
8008}
8009
8010static void
8011sbuf_cim_la4(struct adapter *sc, struct sbuf *sb, uint32_t *buf, uint32_t cfg)
8012{
8013	uint32_t *p;
8014
8015	sbuf_printf(sb, "Status   Data      PC%s",
8016	    cfg & F_UPDBGLACAPTPCONLY ? "" :
8017	    "     LS0Stat  LS0Addr             LS0Data");
8018
8019	for (p = buf; p <= &buf[sc->params.cim_la_size - 8]; p += 8) {
8020		if (cfg & F_UPDBGLACAPTPCONLY) {
8021			sbuf_printf(sb, "\n  %02x   %08x %08x", p[5] & 0xff,
8022			    p[6], p[7]);
8023			sbuf_printf(sb, "\n  %02x   %02x%06x %02x%06x",
8024			    (p[3] >> 8) & 0xff, p[3] & 0xff, p[4] >> 8,
8025			    p[4] & 0xff, p[5] >> 8);
8026			sbuf_printf(sb, "\n  %02x   %x%07x %x%07x",
8027			    (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
8028			    p[1] & 0xf, p[2] >> 4);
8029		} else {
8030			sbuf_printf(sb,
8031			    "\n  %02x   %x%07x %x%07x %08x %08x "
8032			    "%08x%08x%08x%08x",
8033			    (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
8034			    p[1] & 0xf, p[2] >> 4, p[2] & 0xf, p[3], p[4], p[5],
8035			    p[6], p[7]);
8036		}
8037	}
8038}
8039
8040static void
8041sbuf_cim_la6(struct adapter *sc, struct sbuf *sb, uint32_t *buf, uint32_t cfg)
8042{
8043	uint32_t *p;
8044
8045	sbuf_printf(sb, "Status   Inst    Data      PC%s",
8046	    cfg & F_UPDBGLACAPTPCONLY ? "" :
8047	    "     LS0Stat  LS0Addr  LS0Data  LS1Stat  LS1Addr  LS1Data");
8048
8049	for (p = buf; p <= &buf[sc->params.cim_la_size - 10]; p += 10) {
8050		if (cfg & F_UPDBGLACAPTPCONLY) {
8051			sbuf_printf(sb, "\n  %02x   %08x %08x %08x",
8052			    p[3] & 0xff, p[2], p[1], p[0]);
8053			sbuf_printf(sb, "\n  %02x   %02x%06x %02x%06x %02x%06x",
8054			    (p[6] >> 8) & 0xff, p[6] & 0xff, p[5] >> 8,
8055			    p[5] & 0xff, p[4] >> 8, p[4] & 0xff, p[3] >> 8);
8056			sbuf_printf(sb, "\n  %02x   %04x%04x %04x%04x %04x%04x",
8057			    (p[9] >> 16) & 0xff, p[9] & 0xffff, p[8] >> 16,
8058			    p[8] & 0xffff, p[7] >> 16, p[7] & 0xffff,
8059			    p[6] >> 16);
8060		} else {
8061			sbuf_printf(sb, "\n  %02x   %04x%04x %04x%04x %04x%04x "
8062			    "%08x %08x %08x %08x %08x %08x",
8063			    (p[9] >> 16) & 0xff,
8064			    p[9] & 0xffff, p[8] >> 16,
8065			    p[8] & 0xffff, p[7] >> 16,
8066			    p[7] & 0xffff, p[6] >> 16,
8067			    p[2], p[1], p[0], p[5], p[4], p[3]);
8068		}
8069	}
8070}
8071
8072static int
8073sbuf_cim_la(struct adapter *sc, struct sbuf *sb, int flags)
8074{
8075	uint32_t cfg, *buf;
8076	int rc;
8077
8078	rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg);
8079	if (rc != 0)
8080		return (rc);
8081
8082	MPASS(flags == M_WAITOK || flags == M_NOWAIT);
8083	buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE,
8084	    M_ZERO | flags);
8085	if (buf == NULL)
8086		return (ENOMEM);
8087
8088	rc = -t4_cim_read_la(sc, buf, NULL);
8089	if (rc != 0)
8090		goto done;
8091	if (chip_id(sc) < CHELSIO_T6)
8092		sbuf_cim_la4(sc, sb, buf, cfg);
8093	else
8094		sbuf_cim_la6(sc, sb, buf, cfg);
8095
8096done:
8097	free(buf, M_CXGBE);
8098	return (rc);
8099}
8100
8101static int
8102sysctl_cim_la(SYSCTL_HANDLER_ARGS)
8103{
8104	struct adapter *sc = arg1;
8105	struct sbuf *sb;
8106	int rc;
8107
8108	rc = sysctl_wire_old_buffer(req, 0);
8109	if (rc != 0)
8110		return (rc);
8111	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8112	if (sb == NULL)
8113		return (ENOMEM);
8114
8115	rc = sbuf_cim_la(sc, sb, M_WAITOK);
8116	if (rc == 0)
8117		rc = sbuf_finish(sb);
8118	sbuf_delete(sb);
8119	return (rc);
8120}
8121
8122bool
8123t4_os_dump_cimla(struct adapter *sc, int arg, bool verbose)
8124{
8125	struct sbuf sb;
8126	int rc;
8127
8128	if (sbuf_new(&sb, NULL, 4096, SBUF_AUTOEXTEND) != &sb)
8129		return (false);
8130	rc = sbuf_cim_la(sc, &sb, M_NOWAIT);
8131	if (rc == 0) {
8132		rc = sbuf_finish(&sb);
8133		if (rc == 0) {
8134			log(LOG_DEBUG, "%s: CIM LA dump follows.\n%s",
8135		    		device_get_nameunit(sc->dev), sbuf_data(&sb));
8136		}
8137	}
8138	sbuf_delete(&sb);
8139	return (false);
8140}
8141
8142static int
8143sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS)
8144{
8145	struct adapter *sc = arg1;
8146	u_int i;
8147	struct sbuf *sb;
8148	uint32_t *buf, *p;
8149	int rc;
8150
8151	rc = sysctl_wire_old_buffer(req, 0);
8152	if (rc != 0)
8153		return (rc);
8154
8155	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8156	if (sb == NULL)
8157		return (ENOMEM);
8158
8159	buf = malloc(2 * CIM_MALA_SIZE * 5 * sizeof(uint32_t), M_CXGBE,
8160	    M_ZERO | M_WAITOK);
8161
8162	t4_cim_read_ma_la(sc, buf, buf + 5 * CIM_MALA_SIZE);
8163	p = buf;
8164
8165	for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) {
8166		sbuf_printf(sb, "\n%02x%08x%08x%08x%08x", p[4], p[3], p[2],
8167		    p[1], p[0]);
8168	}
8169
8170	sbuf_printf(sb, "\n\nCnt ID Tag UE       Data       RDY VLD");
8171	for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) {
8172		sbuf_printf(sb, "\n%3u %2u  %x   %u %08x%08x  %u   %u",
8173		    (p[2] >> 10) & 0xff, (p[2] >> 7) & 7,
8174		    (p[2] >> 3) & 0xf, (p[2] >> 2) & 1,
8175		    (p[1] >> 2) | ((p[2] & 3) << 30),
8176		    (p[0] >> 2) | ((p[1] & 3) << 30), (p[0] >> 1) & 1,
8177		    p[0] & 1);
8178	}
8179
8180	rc = sbuf_finish(sb);
8181	sbuf_delete(sb);
8182	free(buf, M_CXGBE);
8183	return (rc);
8184}
8185
8186static int
8187sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS)
8188{
8189	struct adapter *sc = arg1;
8190	u_int i;
8191	struct sbuf *sb;
8192	uint32_t *buf, *p;
8193	int rc;
8194
8195	rc = sysctl_wire_old_buffer(req, 0);
8196	if (rc != 0)
8197		return (rc);
8198
8199	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8200	if (sb == NULL)
8201		return (ENOMEM);
8202
8203	buf = malloc(2 * CIM_PIFLA_SIZE * 6 * sizeof(uint32_t), M_CXGBE,
8204	    M_ZERO | M_WAITOK);
8205
8206	t4_cim_read_pif_la(sc, buf, buf + 6 * CIM_PIFLA_SIZE, NULL, NULL);
8207	p = buf;
8208
8209	sbuf_printf(sb, "Cntl ID DataBE   Addr                 Data");
8210	for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) {
8211		sbuf_printf(sb, "\n %02x  %02x  %04x  %08x %08x%08x%08x%08x",
8212		    (p[5] >> 22) & 0xff, (p[5] >> 16) & 0x3f, p[5] & 0xffff,
8213		    p[4], p[3], p[2], p[1], p[0]);
8214	}
8215
8216	sbuf_printf(sb, "\n\nCntl ID               Data");
8217	for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) {
8218		sbuf_printf(sb, "\n %02x  %02x %08x%08x%08x%08x",
8219		    (p[4] >> 6) & 0xff, p[4] & 0x3f, p[3], p[2], p[1], p[0]);
8220	}
8221
8222	rc = sbuf_finish(sb);
8223	sbuf_delete(sb);
8224	free(buf, M_CXGBE);
8225	return (rc);
8226}
8227
8228static int
8229sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS)
8230{
8231	struct adapter *sc = arg1;
8232	struct sbuf *sb;
8233	int rc, i;
8234	uint16_t base[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
8235	uint16_t size[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
8236	uint16_t thres[CIM_NUM_IBQ];
8237	uint32_t obq_wr[2 * CIM_NUM_OBQ_T5], *wr = obq_wr;
8238	uint32_t stat[4 * (CIM_NUM_IBQ + CIM_NUM_OBQ_T5)], *p = stat;
8239	u_int cim_num_obq, ibq_rdaddr, obq_rdaddr, nq;
8240
8241	cim_num_obq = sc->chip_params->cim_num_obq;
8242	if (is_t4(sc)) {
8243		ibq_rdaddr = A_UP_IBQ_0_RDADDR;
8244		obq_rdaddr = A_UP_OBQ_0_REALADDR;
8245	} else {
8246		ibq_rdaddr = A_UP_IBQ_0_SHADOW_RDADDR;
8247		obq_rdaddr = A_UP_OBQ_0_SHADOW_REALADDR;
8248	}
8249	nq = CIM_NUM_IBQ + cim_num_obq;
8250
8251	rc = -t4_cim_read(sc, ibq_rdaddr, 4 * nq, stat);
8252	if (rc == 0)
8253		rc = -t4_cim_read(sc, obq_rdaddr, 2 * cim_num_obq, obq_wr);
8254	if (rc != 0)
8255		return (rc);
8256
8257	t4_read_cimq_cfg(sc, base, size, thres);
8258
8259	rc = sysctl_wire_old_buffer(req, 0);
8260	if (rc != 0)
8261		return (rc);
8262
8263	sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req);
8264	if (sb == NULL)
8265		return (ENOMEM);
8266
8267	sbuf_printf(sb,
8268	    "  Queue  Base  Size Thres  RdPtr WrPtr  SOP  EOP Avail");
8269
8270	for (i = 0; i < CIM_NUM_IBQ; i++, p += 4)
8271		sbuf_printf(sb, "\n%7s %5x %5u %5u %6x  %4x %4u %4u %5u",
8272		    qname[i], base[i], size[i], thres[i], G_IBQRDADDR(p[0]),
8273		    G_IBQWRADDR(p[1]), G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
8274		    G_QUEREMFLITS(p[2]) * 16);
8275	for ( ; i < nq; i++, p += 4, wr += 2)
8276		sbuf_printf(sb, "\n%7s %5x %5u %12x  %4x %4u %4u %5u", qname[i],
8277		    base[i], size[i], G_QUERDADDR(p[0]) & 0x3fff,
8278		    wr[0] - base[i], G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
8279		    G_QUEREMFLITS(p[2]) * 16);
8280
8281	rc = sbuf_finish(sb);
8282	sbuf_delete(sb);
8283
8284	return (rc);
8285}
8286
8287static int
8288sysctl_cpl_stats(SYSCTL_HANDLER_ARGS)
8289{
8290	struct adapter *sc = arg1;
8291	struct sbuf *sb;
8292	int rc;
8293	struct tp_cpl_stats stats;
8294
8295	rc = sysctl_wire_old_buffer(req, 0);
8296	if (rc != 0)
8297		return (rc);
8298
8299	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8300	if (sb == NULL)
8301		return (ENOMEM);
8302
8303	mtx_lock(&sc->reg_lock);
8304	t4_tp_get_cpl_stats(sc, &stats, 0);
8305	mtx_unlock(&sc->reg_lock);
8306
8307	if (sc->chip_params->nchan > 2) {
8308		sbuf_printf(sb, "                 channel 0  channel 1"
8309		    "  channel 2  channel 3");
8310		sbuf_printf(sb, "\nCPL requests:   %10u %10u %10u %10u",
8311		    stats.req[0], stats.req[1], stats.req[2], stats.req[3]);
8312		sbuf_printf(sb, "\nCPL responses:  %10u %10u %10u %10u",
8313		    stats.rsp[0], stats.rsp[1], stats.rsp[2], stats.rsp[3]);
8314	} else {
8315		sbuf_printf(sb, "                 channel 0  channel 1");
8316		sbuf_printf(sb, "\nCPL requests:   %10u %10u",
8317		    stats.req[0], stats.req[1]);
8318		sbuf_printf(sb, "\nCPL responses:  %10u %10u",
8319		    stats.rsp[0], stats.rsp[1]);
8320	}
8321
8322	rc = sbuf_finish(sb);
8323	sbuf_delete(sb);
8324
8325	return (rc);
8326}
8327
8328static int
8329sysctl_ddp_stats(SYSCTL_HANDLER_ARGS)
8330{
8331	struct adapter *sc = arg1;
8332	struct sbuf *sb;
8333	int rc;
8334	struct tp_usm_stats stats;
8335
8336	rc = sysctl_wire_old_buffer(req, 0);
8337	if (rc != 0)
8338		return(rc);
8339
8340	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8341	if (sb == NULL)
8342		return (ENOMEM);
8343
8344	mtx_lock(&sc->reg_lock);
8345	t4_get_usm_stats(sc, &stats, 1);
8346	mtx_unlock(&sc->reg_lock);
8347
8348	sbuf_printf(sb, "Frames: %u\n", stats.frames);
8349	sbuf_printf(sb, "Octets: %ju\n", stats.octets);
8350	sbuf_printf(sb, "Drops:  %u", stats.drops);
8351
8352	rc = sbuf_finish(sb);
8353	sbuf_delete(sb);
8354
8355	return (rc);
8356}
8357
8358static int
8359sysctl_tid_stats(SYSCTL_HANDLER_ARGS)
8360{
8361	struct adapter *sc = arg1;
8362	struct sbuf *sb;
8363	int rc;
8364	struct tp_tid_stats stats;
8365
8366	rc = sysctl_wire_old_buffer(req, 0);
8367	if (rc != 0)
8368		return(rc);
8369
8370	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8371	if (sb == NULL)
8372		return (ENOMEM);
8373
8374	mtx_lock(&sc->reg_lock);
8375	t4_tp_get_tid_stats(sc, &stats, 1);
8376	mtx_unlock(&sc->reg_lock);
8377
8378	sbuf_printf(sb, "Delete:     %u\n", stats.del);
8379	sbuf_printf(sb, "Invalidate: %u\n", stats.inv);
8380	sbuf_printf(sb, "Active:     %u\n", stats.act);
8381	sbuf_printf(sb, "Passive:    %u", stats.pas);
8382
8383	rc = sbuf_finish(sb);
8384	sbuf_delete(sb);
8385
8386	return (rc);
8387}
8388
8389static const char * const devlog_level_strings[] = {
8390	[FW_DEVLOG_LEVEL_EMERG]		= "EMERG",
8391	[FW_DEVLOG_LEVEL_CRIT]		= "CRIT",
8392	[FW_DEVLOG_LEVEL_ERR]		= "ERR",
8393	[FW_DEVLOG_LEVEL_NOTICE]	= "NOTICE",
8394	[FW_DEVLOG_LEVEL_INFO]		= "INFO",
8395	[FW_DEVLOG_LEVEL_DEBUG]		= "DEBUG"
8396};
8397
8398static const char * const devlog_facility_strings[] = {
8399	[FW_DEVLOG_FACILITY_CORE]	= "CORE",
8400	[FW_DEVLOG_FACILITY_CF]		= "CF",
8401	[FW_DEVLOG_FACILITY_SCHED]	= "SCHED",
8402	[FW_DEVLOG_FACILITY_TIMER]	= "TIMER",
8403	[FW_DEVLOG_FACILITY_RES]	= "RES",
8404	[FW_DEVLOG_FACILITY_HW]		= "HW",
8405	[FW_DEVLOG_FACILITY_FLR]	= "FLR",
8406	[FW_DEVLOG_FACILITY_DMAQ]	= "DMAQ",
8407	[FW_DEVLOG_FACILITY_PHY]	= "PHY",
8408	[FW_DEVLOG_FACILITY_MAC]	= "MAC",
8409	[FW_DEVLOG_FACILITY_PORT]	= "PORT",
8410	[FW_DEVLOG_FACILITY_VI]		= "VI",
8411	[FW_DEVLOG_FACILITY_FILTER]	= "FILTER",
8412	[FW_DEVLOG_FACILITY_ACL]	= "ACL",
8413	[FW_DEVLOG_FACILITY_TM]		= "TM",
8414	[FW_DEVLOG_FACILITY_QFC]	= "QFC",
8415	[FW_DEVLOG_FACILITY_DCB]	= "DCB",
8416	[FW_DEVLOG_FACILITY_ETH]	= "ETH",
8417	[FW_DEVLOG_FACILITY_OFLD]	= "OFLD",
8418	[FW_DEVLOG_FACILITY_RI]		= "RI",
8419	[FW_DEVLOG_FACILITY_ISCSI]	= "ISCSI",
8420	[FW_DEVLOG_FACILITY_FCOE]	= "FCOE",
8421	[FW_DEVLOG_FACILITY_FOISCSI]	= "FOISCSI",
8422	[FW_DEVLOG_FACILITY_FOFCOE]	= "FOFCOE",
8423	[FW_DEVLOG_FACILITY_CHNET]	= "CHNET",
8424};
8425
8426static int
8427sbuf_devlog(struct adapter *sc, struct sbuf *sb, int flags)
8428{
8429	int i, j, rc, nentries, first = 0;
8430	struct devlog_params *dparams = &sc->params.devlog;
8431	struct fw_devlog_e *buf, *e;
8432	uint64_t ftstamp = UINT64_MAX;
8433
8434	if (dparams->addr == 0)
8435		return (ENXIO);
8436
8437	MPASS(flags == M_WAITOK || flags == M_NOWAIT);
8438	buf = malloc(dparams->size, M_CXGBE, M_ZERO | flags);
8439	if (buf == NULL)
8440		return (ENOMEM);
8441
8442	rc = read_via_memwin(sc, 1, dparams->addr, (void *)buf, dparams->size);
8443	if (rc != 0)
8444		goto done;
8445
8446	nentries = dparams->size / sizeof(struct fw_devlog_e);
8447	for (i = 0; i < nentries; i++) {
8448		e = &buf[i];
8449
8450		if (e->timestamp == 0)
8451			break;	/* end */
8452
8453		e->timestamp = be64toh(e->timestamp);
8454		e->seqno = be32toh(e->seqno);
8455		for (j = 0; j < 8; j++)
8456			e->params[j] = be32toh(e->params[j]);
8457
8458		if (e->timestamp < ftstamp) {
8459			ftstamp = e->timestamp;
8460			first = i;
8461		}
8462	}
8463
8464	if (buf[first].timestamp == 0)
8465		goto done;	/* nothing in the log */
8466
8467	sbuf_printf(sb, "%10s  %15s  %8s  %8s  %s\n",
8468	    "Seq#", "Tstamp", "Level", "Facility", "Message");
8469
8470	i = first;
8471	do {
8472		e = &buf[i];
8473		if (e->timestamp == 0)
8474			break;	/* end */
8475
8476		sbuf_printf(sb, "%10d  %15ju  %8s  %8s  ",
8477		    e->seqno, e->timestamp,
8478		    (e->level < nitems(devlog_level_strings) ?
8479			devlog_level_strings[e->level] : "UNKNOWN"),
8480		    (e->facility < nitems(devlog_facility_strings) ?
8481			devlog_facility_strings[e->facility] : "UNKNOWN"));
8482		sbuf_printf(sb, e->fmt, e->params[0], e->params[1],
8483		    e->params[2], e->params[3], e->params[4],
8484		    e->params[5], e->params[6], e->params[7]);
8485
8486		if (++i == nentries)
8487			i = 0;
8488	} while (i != first);
8489done:
8490	free(buf, M_CXGBE);
8491	return (rc);
8492}
8493
8494static int
8495sysctl_devlog(SYSCTL_HANDLER_ARGS)
8496{
8497	struct adapter *sc = arg1;
8498	int rc;
8499	struct sbuf *sb;
8500
8501	rc = sysctl_wire_old_buffer(req, 0);
8502	if (rc != 0)
8503		return (rc);
8504	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8505	if (sb == NULL)
8506		return (ENOMEM);
8507
8508	rc = sbuf_devlog(sc, sb, M_WAITOK);
8509	if (rc == 0)
8510		rc = sbuf_finish(sb);
8511	sbuf_delete(sb);
8512	return (rc);
8513}
8514
8515void
8516t4_os_dump_devlog(struct adapter *sc)
8517{
8518	int rc;
8519	struct sbuf sb;
8520
8521	if (sbuf_new(&sb, NULL, 4096, SBUF_AUTOEXTEND) != &sb)
8522		return;
8523	rc = sbuf_devlog(sc, &sb, M_NOWAIT);
8524	if (rc == 0) {
8525		rc = sbuf_finish(&sb);
8526		if (rc == 0) {
8527			log(LOG_DEBUG, "%s: device log follows.\n%s",
8528		    		device_get_nameunit(sc->dev), sbuf_data(&sb));
8529		}
8530	}
8531	sbuf_delete(&sb);
8532}
8533
8534static int
8535sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS)
8536{
8537	struct adapter *sc = arg1;
8538	struct sbuf *sb;
8539	int rc;
8540	struct tp_fcoe_stats stats[MAX_NCHAN];
8541	int i, nchan = sc->chip_params->nchan;
8542
8543	rc = sysctl_wire_old_buffer(req, 0);
8544	if (rc != 0)
8545		return (rc);
8546
8547	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8548	if (sb == NULL)
8549		return (ENOMEM);
8550
8551	mtx_lock(&sc->reg_lock);
8552	for (i = 0; i < nchan; i++)
8553		t4_get_fcoe_stats(sc, i, &stats[i], 1);
8554	mtx_unlock(&sc->reg_lock);
8555
8556	if (nchan > 2) {
8557		sbuf_printf(sb, "                   channel 0        channel 1"
8558		    "        channel 2        channel 3");
8559		sbuf_printf(sb, "\noctetsDDP:  %16ju %16ju %16ju %16ju",
8560		    stats[0].octets_ddp, stats[1].octets_ddp,
8561		    stats[2].octets_ddp, stats[3].octets_ddp);
8562		sbuf_printf(sb, "\nframesDDP:  %16u %16u %16u %16u",
8563		    stats[0].frames_ddp, stats[1].frames_ddp,
8564		    stats[2].frames_ddp, stats[3].frames_ddp);
8565		sbuf_printf(sb, "\nframesDrop: %16u %16u %16u %16u",
8566		    stats[0].frames_drop, stats[1].frames_drop,
8567		    stats[2].frames_drop, stats[3].frames_drop);
8568	} else {
8569		sbuf_printf(sb, "                   channel 0        channel 1");
8570		sbuf_printf(sb, "\noctetsDDP:  %16ju %16ju",
8571		    stats[0].octets_ddp, stats[1].octets_ddp);
8572		sbuf_printf(sb, "\nframesDDP:  %16u %16u",
8573		    stats[0].frames_ddp, stats[1].frames_ddp);
8574		sbuf_printf(sb, "\nframesDrop: %16u %16u",
8575		    stats[0].frames_drop, stats[1].frames_drop);
8576	}
8577
8578	rc = sbuf_finish(sb);
8579	sbuf_delete(sb);
8580
8581	return (rc);
8582}
8583
8584static int
8585sysctl_hw_sched(SYSCTL_HANDLER_ARGS)
8586{
8587	struct adapter *sc = arg1;
8588	struct sbuf *sb;
8589	int rc, i;
8590	unsigned int map, kbps, ipg, mode;
8591	unsigned int pace_tab[NTX_SCHED];
8592
8593	rc = sysctl_wire_old_buffer(req, 0);
8594	if (rc != 0)
8595		return (rc);
8596
8597	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8598	if (sb == NULL)
8599		return (ENOMEM);
8600
8601	map = t4_read_reg(sc, A_TP_TX_MOD_QUEUE_REQ_MAP);
8602	mode = G_TIMERMODE(t4_read_reg(sc, A_TP_MOD_CONFIG));
8603	t4_read_pace_tbl(sc, pace_tab);
8604
8605	sbuf_printf(sb, "Scheduler  Mode   Channel  Rate (Kbps)   "
8606	    "Class IPG (0.1 ns)   Flow IPG (us)");
8607
8608	for (i = 0; i < NTX_SCHED; ++i, map >>= 2) {
8609		t4_get_tx_sched(sc, i, &kbps, &ipg, 1);
8610		sbuf_printf(sb, "\n    %u      %-5s     %u     ", i,
8611		    (mode & (1 << i)) ? "flow" : "class", map & 3);
8612		if (kbps)
8613			sbuf_printf(sb, "%9u     ", kbps);
8614		else
8615			sbuf_printf(sb, " disabled     ");
8616
8617		if (ipg)
8618			sbuf_printf(sb, "%13u        ", ipg);
8619		else
8620			sbuf_printf(sb, "     disabled        ");
8621
8622		if (pace_tab[i])
8623			sbuf_printf(sb, "%10u", pace_tab[i]);
8624		else
8625			sbuf_printf(sb, "  disabled");
8626	}
8627
8628	rc = sbuf_finish(sb);
8629	sbuf_delete(sb);
8630
8631	return (rc);
8632}
8633
8634static int
8635sysctl_lb_stats(SYSCTL_HANDLER_ARGS)
8636{
8637	struct adapter *sc = arg1;
8638	struct sbuf *sb;
8639	int rc, i, j;
8640	uint64_t *p0, *p1;
8641	struct lb_port_stats s[2];
8642	static const char *stat_name[] = {
8643		"OctetsOK:", "FramesOK:", "BcastFrames:", "McastFrames:",
8644		"UcastFrames:", "ErrorFrames:", "Frames64:", "Frames65To127:",
8645		"Frames128To255:", "Frames256To511:", "Frames512To1023:",
8646		"Frames1024To1518:", "Frames1519ToMax:", "FramesDropped:",
8647		"BG0FramesDropped:", "BG1FramesDropped:", "BG2FramesDropped:",
8648		"BG3FramesDropped:", "BG0FramesTrunc:", "BG1FramesTrunc:",
8649		"BG2FramesTrunc:", "BG3FramesTrunc:"
8650	};
8651
8652	rc = sysctl_wire_old_buffer(req, 0);
8653	if (rc != 0)
8654		return (rc);
8655
8656	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8657	if (sb == NULL)
8658		return (ENOMEM);
8659
8660	memset(s, 0, sizeof(s));
8661
8662	for (i = 0; i < sc->chip_params->nchan; i += 2) {
8663		t4_get_lb_stats(sc, i, &s[0]);
8664		t4_get_lb_stats(sc, i + 1, &s[1]);
8665
8666		p0 = &s[0].octets;
8667		p1 = &s[1].octets;
8668		sbuf_printf(sb, "%s                       Loopback %u"
8669		    "           Loopback %u", i == 0 ? "" : "\n", i, i + 1);
8670
8671		for (j = 0; j < nitems(stat_name); j++)
8672			sbuf_printf(sb, "\n%-17s %20ju %20ju", stat_name[j],
8673				   *p0++, *p1++);
8674	}
8675
8676	rc = sbuf_finish(sb);
8677	sbuf_delete(sb);
8678
8679	return (rc);
8680}
8681
8682static int
8683sysctl_linkdnrc(SYSCTL_HANDLER_ARGS)
8684{
8685	int rc = 0;
8686	struct port_info *pi = arg1;
8687	struct link_config *lc = &pi->link_cfg;
8688	struct sbuf *sb;
8689
8690	rc = sysctl_wire_old_buffer(req, 0);
8691	if (rc != 0)
8692		return(rc);
8693	sb = sbuf_new_for_sysctl(NULL, NULL, 64, req);
8694	if (sb == NULL)
8695		return (ENOMEM);
8696
8697	if (lc->link_ok || lc->link_down_rc == 255)
8698		sbuf_printf(sb, "n/a");
8699	else
8700		sbuf_printf(sb, "%s", t4_link_down_rc_str(lc->link_down_rc));
8701
8702	rc = sbuf_finish(sb);
8703	sbuf_delete(sb);
8704
8705	return (rc);
8706}
8707
8708struct mem_desc {
8709	unsigned int base;
8710	unsigned int limit;
8711	unsigned int idx;
8712};
8713
8714static int
8715mem_desc_cmp(const void *a, const void *b)
8716{
8717	return ((const struct mem_desc *)a)->base -
8718	       ((const struct mem_desc *)b)->base;
8719}
8720
8721static void
8722mem_region_show(struct sbuf *sb, const char *name, unsigned int from,
8723    unsigned int to)
8724{
8725	unsigned int size;
8726
8727	if (from == to)
8728		return;
8729
8730	size = to - from + 1;
8731	if (size == 0)
8732		return;
8733
8734	/* XXX: need humanize_number(3) in libkern for a more readable 'size' */
8735	sbuf_printf(sb, "%-15s %#x-%#x [%u]\n", name, from, to, size);
8736}
8737
8738static int
8739sysctl_meminfo(SYSCTL_HANDLER_ARGS)
8740{
8741	struct adapter *sc = arg1;
8742	struct sbuf *sb;
8743	int rc, i, n;
8744	uint32_t lo, hi, used, alloc;
8745	static const char *memory[] = {"EDC0:", "EDC1:", "MC:", "MC0:", "MC1:"};
8746	static const char *region[] = {
8747		"DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:",
8748		"Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:",
8749		"Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:",
8750		"TDDP region:", "TPT region:", "STAG region:", "RQ region:",
8751		"RQUDP region:", "PBL region:", "TXPBL region:",
8752		"DBVFIFO region:", "ULPRX state:", "ULPTX state:",
8753		"On-chip queues:", "TLS keys:",
8754	};
8755	struct mem_desc avail[4];
8756	struct mem_desc mem[nitems(region) + 3];	/* up to 3 holes */
8757	struct mem_desc *md = mem;
8758
8759	rc = sysctl_wire_old_buffer(req, 0);
8760	if (rc != 0)
8761		return (rc);
8762
8763	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8764	if (sb == NULL)
8765		return (ENOMEM);
8766
8767	for (i = 0; i < nitems(mem); i++) {
8768		mem[i].limit = 0;
8769		mem[i].idx = i;
8770	}
8771
8772	/* Find and sort the populated memory ranges */
8773	i = 0;
8774	lo = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
8775	if (lo & F_EDRAM0_ENABLE) {
8776		hi = t4_read_reg(sc, A_MA_EDRAM0_BAR);
8777		avail[i].base = G_EDRAM0_BASE(hi) << 20;
8778		avail[i].limit = avail[i].base + (G_EDRAM0_SIZE(hi) << 20);
8779		avail[i].idx = 0;
8780		i++;
8781	}
8782	if (lo & F_EDRAM1_ENABLE) {
8783		hi = t4_read_reg(sc, A_MA_EDRAM1_BAR);
8784		avail[i].base = G_EDRAM1_BASE(hi) << 20;
8785		avail[i].limit = avail[i].base + (G_EDRAM1_SIZE(hi) << 20);
8786		avail[i].idx = 1;
8787		i++;
8788	}
8789	if (lo & F_EXT_MEM_ENABLE) {
8790		hi = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
8791		avail[i].base = G_EXT_MEM_BASE(hi) << 20;
8792		avail[i].limit = avail[i].base +
8793		    (G_EXT_MEM_SIZE(hi) << 20);
8794		avail[i].idx = is_t5(sc) ? 3 : 2;	/* Call it MC0 for T5 */
8795		i++;
8796	}
8797	if (is_t5(sc) && lo & F_EXT_MEM1_ENABLE) {
8798		hi = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
8799		avail[i].base = G_EXT_MEM1_BASE(hi) << 20;
8800		avail[i].limit = avail[i].base +
8801		    (G_EXT_MEM1_SIZE(hi) << 20);
8802		avail[i].idx = 4;
8803		i++;
8804	}
8805	if (!i)                                    /* no memory available */
8806		return 0;
8807	qsort(avail, i, sizeof(struct mem_desc), mem_desc_cmp);
8808
8809	(md++)->base = t4_read_reg(sc, A_SGE_DBQ_CTXT_BADDR);
8810	(md++)->base = t4_read_reg(sc, A_SGE_IMSG_CTXT_BADDR);
8811	(md++)->base = t4_read_reg(sc, A_SGE_FLM_CACHE_BADDR);
8812	(md++)->base = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
8813	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_BASE);
8814	(md++)->base = t4_read_reg(sc, A_TP_CMM_TIMER_BASE);
8815	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_RX_FLST_BASE);
8816	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_TX_FLST_BASE);
8817	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_PS_FLST_BASE);
8818
8819	/* the next few have explicit upper bounds */
8820	md->base = t4_read_reg(sc, A_TP_PMM_TX_BASE);
8821	md->limit = md->base - 1 +
8822		    t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE) *
8823		    G_PMTXMAXPAGE(t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE));
8824	md++;
8825
8826	md->base = t4_read_reg(sc, A_TP_PMM_RX_BASE);
8827	md->limit = md->base - 1 +
8828		    t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) *
8829		    G_PMRXMAXPAGE(t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE));
8830	md++;
8831
8832	if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
8833		if (chip_id(sc) <= CHELSIO_T5)
8834			md->base = t4_read_reg(sc, A_LE_DB_HASH_TID_BASE);
8835		else
8836			md->base = t4_read_reg(sc, A_LE_DB_HASH_TBL_BASE_ADDR);
8837		md->limit = 0;
8838	} else {
8839		md->base = 0;
8840		md->idx = nitems(region);  /* hide it */
8841	}
8842	md++;
8843
8844#define ulp_region(reg) \
8845	md->base = t4_read_reg(sc, A_ULP_ ## reg ## _LLIMIT);\
8846	(md++)->limit = t4_read_reg(sc, A_ULP_ ## reg ## _ULIMIT)
8847
8848	ulp_region(RX_ISCSI);
8849	ulp_region(RX_TDDP);
8850	ulp_region(TX_TPT);
8851	ulp_region(RX_STAG);
8852	ulp_region(RX_RQ);
8853	ulp_region(RX_RQUDP);
8854	ulp_region(RX_PBL);
8855	ulp_region(TX_PBL);
8856#undef ulp_region
8857
8858	md->base = 0;
8859	md->idx = nitems(region);
8860	if (!is_t4(sc)) {
8861		uint32_t size = 0;
8862		uint32_t sge_ctrl = t4_read_reg(sc, A_SGE_CONTROL2);
8863		uint32_t fifo_size = t4_read_reg(sc, A_SGE_DBVFIFO_SIZE);
8864
8865		if (is_t5(sc)) {
8866			if (sge_ctrl & F_VFIFO_ENABLE)
8867				size = G_DBVFIFO_SIZE(fifo_size);
8868		} else
8869			size = G_T6_DBVFIFO_SIZE(fifo_size);
8870
8871		if (size) {
8872			md->base = G_BASEADDR(t4_read_reg(sc,
8873			    A_SGE_DBVFIFO_BADDR));
8874			md->limit = md->base + (size << 2) - 1;
8875		}
8876	}
8877	md++;
8878
8879	md->base = t4_read_reg(sc, A_ULP_RX_CTX_BASE);
8880	md->limit = 0;
8881	md++;
8882	md->base = t4_read_reg(sc, A_ULP_TX_ERR_TABLE_BASE);
8883	md->limit = 0;
8884	md++;
8885
8886	md->base = sc->vres.ocq.start;
8887	if (sc->vres.ocq.size)
8888		md->limit = md->base + sc->vres.ocq.size - 1;
8889	else
8890		md->idx = nitems(region);  /* hide it */
8891	md++;
8892
8893	md->base = sc->vres.key.start;
8894	if (sc->vres.key.size)
8895		md->limit = md->base + sc->vres.key.size - 1;
8896	else
8897		md->idx = nitems(region);  /* hide it */
8898	md++;
8899
8900	/* add any address-space holes, there can be up to 3 */
8901	for (n = 0; n < i - 1; n++)
8902		if (avail[n].limit < avail[n + 1].base)
8903			(md++)->base = avail[n].limit;
8904	if (avail[n].limit)
8905		(md++)->base = avail[n].limit;
8906
8907	n = md - mem;
8908	qsort(mem, n, sizeof(struct mem_desc), mem_desc_cmp);
8909
8910	for (lo = 0; lo < i; lo++)
8911		mem_region_show(sb, memory[avail[lo].idx], avail[lo].base,
8912				avail[lo].limit - 1);
8913
8914	sbuf_printf(sb, "\n");
8915	for (i = 0; i < n; i++) {
8916		if (mem[i].idx >= nitems(region))
8917			continue;                        /* skip holes */
8918		if (!mem[i].limit)
8919			mem[i].limit = i < n - 1 ? mem[i + 1].base - 1 : ~0;
8920		mem_region_show(sb, region[mem[i].idx], mem[i].base,
8921				mem[i].limit);
8922	}
8923
8924	sbuf_printf(sb, "\n");
8925	lo = t4_read_reg(sc, A_CIM_SDRAM_BASE_ADDR);
8926	hi = t4_read_reg(sc, A_CIM_SDRAM_ADDR_SIZE) + lo - 1;
8927	mem_region_show(sb, "uP RAM:", lo, hi);
8928
8929	lo = t4_read_reg(sc, A_CIM_EXTMEM2_BASE_ADDR);
8930	hi = t4_read_reg(sc, A_CIM_EXTMEM2_ADDR_SIZE) + lo - 1;
8931	mem_region_show(sb, "uP Extmem2:", lo, hi);
8932
8933	lo = t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE);
8934	sbuf_printf(sb, "\n%u Rx pages of size %uKiB for %u channels\n",
8935		   G_PMRXMAXPAGE(lo),
8936		   t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) >> 10,
8937		   (lo & F_PMRXNUMCHN) ? 2 : 1);
8938
8939	lo = t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE);
8940	hi = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE);
8941	sbuf_printf(sb, "%u Tx pages of size %u%ciB for %u channels\n",
8942		   G_PMTXMAXPAGE(lo),
8943		   hi >= (1 << 20) ? (hi >> 20) : (hi >> 10),
8944		   hi >= (1 << 20) ? 'M' : 'K', 1 << G_PMTXNUMCHN(lo));
8945	sbuf_printf(sb, "%u p-structs\n",
8946		   t4_read_reg(sc, A_TP_CMM_MM_MAX_PSTRUCT));
8947
8948	for (i = 0; i < 4; i++) {
8949		if (chip_id(sc) > CHELSIO_T5)
8950			lo = t4_read_reg(sc, A_MPS_RX_MAC_BG_PG_CNT0 + i * 4);
8951		else
8952			lo = t4_read_reg(sc, A_MPS_RX_PG_RSV0 + i * 4);
8953		if (is_t5(sc)) {
8954			used = G_T5_USED(lo);
8955			alloc = G_T5_ALLOC(lo);
8956		} else {
8957			used = G_USED(lo);
8958			alloc = G_ALLOC(lo);
8959		}
8960		/* For T6 these are MAC buffer groups */
8961		sbuf_printf(sb, "\nPort %d using %u pages out of %u allocated",
8962		    i, used, alloc);
8963	}
8964	for (i = 0; i < sc->chip_params->nchan; i++) {
8965		if (chip_id(sc) > CHELSIO_T5)
8966			lo = t4_read_reg(sc, A_MPS_RX_LPBK_BG_PG_CNT0 + i * 4);
8967		else
8968			lo = t4_read_reg(sc, A_MPS_RX_PG_RSV4 + i * 4);
8969		if (is_t5(sc)) {
8970			used = G_T5_USED(lo);
8971			alloc = G_T5_ALLOC(lo);
8972		} else {
8973			used = G_USED(lo);
8974			alloc = G_ALLOC(lo);
8975		}
8976		/* For T6 these are MAC buffer groups */
8977		sbuf_printf(sb,
8978		    "\nLoopback %d using %u pages out of %u allocated",
8979		    i, used, alloc);
8980	}
8981
8982	rc = sbuf_finish(sb);
8983	sbuf_delete(sb);
8984
8985	return (rc);
8986}
8987
8988static inline void
8989tcamxy2valmask(uint64_t x, uint64_t y, uint8_t *addr, uint64_t *mask)
8990{
8991	*mask = x | y;
8992	y = htobe64(y);
8993	memcpy(addr, (char *)&y + 2, ETHER_ADDR_LEN);
8994}
8995
8996static int
8997sysctl_mps_tcam(SYSCTL_HANDLER_ARGS)
8998{
8999	struct adapter *sc = arg1;
9000	struct sbuf *sb;
9001	int rc, i;
9002
9003	MPASS(chip_id(sc) <= CHELSIO_T5);
9004
9005	rc = sysctl_wire_old_buffer(req, 0);
9006	if (rc != 0)
9007		return (rc);
9008
9009	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
9010	if (sb == NULL)
9011		return (ENOMEM);
9012
9013	sbuf_printf(sb,
9014	    "Idx  Ethernet address     Mask     Vld Ports PF"
9015	    "  VF              Replication             P0 P1 P2 P3  ML");
9016	for (i = 0; i < sc->chip_params->mps_tcam_size; i++) {
9017		uint64_t tcamx, tcamy, mask;
9018		uint32_t cls_lo, cls_hi;
9019		uint8_t addr[ETHER_ADDR_LEN];
9020
9021		tcamy = t4_read_reg64(sc, MPS_CLS_TCAM_Y_L(i));
9022		tcamx = t4_read_reg64(sc, MPS_CLS_TCAM_X_L(i));
9023		if (tcamx & tcamy)
9024			continue;
9025		tcamxy2valmask(tcamx, tcamy, addr, &mask);
9026		cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i));
9027		cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i));
9028		sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x %012jx"
9029			   "  %c   %#x%4u%4d", i, addr[0], addr[1], addr[2],
9030			   addr[3], addr[4], addr[5], (uintmax_t)mask,
9031			   (cls_lo & F_SRAM_VLD) ? 'Y' : 'N',
9032			   G_PORTMAP(cls_hi), G_PF(cls_lo),
9033			   (cls_lo & F_VF_VALID) ? G_VF(cls_lo) : -1);
9034
9035		if (cls_lo & F_REPLICATE) {
9036			struct fw_ldst_cmd ldst_cmd;
9037
9038			memset(&ldst_cmd, 0, sizeof(ldst_cmd));
9039			ldst_cmd.op_to_addrspace =
9040			    htobe32(V_FW_CMD_OP(FW_LDST_CMD) |
9041				F_FW_CMD_REQUEST | F_FW_CMD_READ |
9042				V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS));
9043			ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd));
9044			ldst_cmd.u.mps.rplc.fid_idx =
9045			    htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) |
9046				V_FW_LDST_CMD_IDX(i));
9047
9048			rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
9049			    "t4mps");
9050			if (rc)
9051				break;
9052			rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd,
9053			    sizeof(ldst_cmd), &ldst_cmd);
9054			end_synchronized_op(sc, 0);
9055
9056			if (rc != 0) {
9057				sbuf_printf(sb, "%36d", rc);
9058				rc = 0;
9059			} else {
9060				sbuf_printf(sb, " %08x %08x %08x %08x",
9061				    be32toh(ldst_cmd.u.mps.rplc.rplc127_96),
9062				    be32toh(ldst_cmd.u.mps.rplc.rplc95_64),
9063				    be32toh(ldst_cmd.u.mps.rplc.rplc63_32),
9064				    be32toh(ldst_cmd.u.mps.rplc.rplc31_0));
9065			}
9066		} else
9067			sbuf_printf(sb, "%36s", "");
9068
9069		sbuf_printf(sb, "%4u%3u%3u%3u %#3x", G_SRAM_PRIO0(cls_lo),
9070		    G_SRAM_PRIO1(cls_lo), G_SRAM_PRIO2(cls_lo),
9071		    G_SRAM_PRIO3(cls_lo), (cls_lo >> S_MULTILISTEN0) & 0xf);
9072	}
9073
9074	if (rc)
9075		(void) sbuf_finish(sb);
9076	else
9077		rc = sbuf_finish(sb);
9078	sbuf_delete(sb);
9079
9080	return (rc);
9081}
9082
9083static int
9084sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS)
9085{
9086	struct adapter *sc = arg1;
9087	struct sbuf *sb;
9088	int rc, i;
9089
9090	MPASS(chip_id(sc) > CHELSIO_T5);
9091
9092	rc = sysctl_wire_old_buffer(req, 0);
9093	if (rc != 0)
9094		return (rc);
9095
9096	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
9097	if (sb == NULL)
9098		return (ENOMEM);
9099
9100	sbuf_printf(sb, "Idx  Ethernet address     Mask       VNI   Mask"
9101	    "   IVLAN Vld DIP_Hit   Lookup  Port Vld Ports PF  VF"
9102	    "                           Replication"
9103	    "                                    P0 P1 P2 P3  ML\n");
9104
9105	for (i = 0; i < sc->chip_params->mps_tcam_size; i++) {
9106		uint8_t dip_hit, vlan_vld, lookup_type, port_num;
9107		uint16_t ivlan;
9108		uint64_t tcamx, tcamy, val, mask;
9109		uint32_t cls_lo, cls_hi, ctl, data2, vnix, vniy;
9110		uint8_t addr[ETHER_ADDR_LEN];
9111
9112		ctl = V_CTLREQID(1) | V_CTLCMDTYPE(0) | V_CTLXYBITSEL(0);
9113		if (i < 256)
9114			ctl |= V_CTLTCAMINDEX(i) | V_CTLTCAMSEL(0);
9115		else
9116			ctl |= V_CTLTCAMINDEX(i - 256) | V_CTLTCAMSEL(1);
9117		t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl);
9118		val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1);
9119		tcamy = G_DMACH(val) << 32;
9120		tcamy |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1);
9121		data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1);
9122		lookup_type = G_DATALKPTYPE(data2);
9123		port_num = G_DATAPORTNUM(data2);
9124		if (lookup_type && lookup_type != M_DATALKPTYPE) {
9125			/* Inner header VNI */
9126			vniy = ((data2 & F_DATAVIDH2) << 23) |
9127				       (G_DATAVIDH1(data2) << 16) | G_VIDL(val);
9128			dip_hit = data2 & F_DATADIPHIT;
9129			vlan_vld = 0;
9130		} else {
9131			vniy = 0;
9132			dip_hit = 0;
9133			vlan_vld = data2 & F_DATAVIDH2;
9134			ivlan = G_VIDL(val);
9135		}
9136
9137		ctl |= V_CTLXYBITSEL(1);
9138		t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl);
9139		val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1);
9140		tcamx = G_DMACH(val) << 32;
9141		tcamx |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1);
9142		data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1);
9143		if (lookup_type && lookup_type != M_DATALKPTYPE) {
9144			/* Inner header VNI mask */
9145			vnix = ((data2 & F_DATAVIDH2) << 23) |
9146			       (G_DATAVIDH1(data2) << 16) | G_VIDL(val);
9147		} else
9148			vnix = 0;
9149
9150		if (tcamx & tcamy)
9151			continue;
9152		tcamxy2valmask(tcamx, tcamy, addr, &mask);
9153
9154		cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i));
9155		cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i));
9156
9157		if (lookup_type && lookup_type != M_DATALKPTYPE) {
9158			sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x "
9159			    "%012jx %06x %06x    -    -   %3c"
9160			    "        I  %4x   %3c   %#x%4u%4d", i, addr[0],
9161			    addr[1], addr[2], addr[3], addr[4], addr[5],
9162			    (uintmax_t)mask, vniy, vnix, dip_hit ? 'Y' : 'N',
9163			    port_num, cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N',
9164			    G_PORTMAP(cls_hi), G_T6_PF(cls_lo),
9165			    cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1);
9166		} else {
9167			sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x "
9168			    "%012jx    -       -   ", i, addr[0], addr[1],
9169			    addr[2], addr[3], addr[4], addr[5],
9170			    (uintmax_t)mask);
9171
9172			if (vlan_vld)
9173				sbuf_printf(sb, "%4u   Y     ", ivlan);
9174			else
9175				sbuf_printf(sb, "  -    N     ");
9176
9177			sbuf_printf(sb, "-      %3c  %4x   %3c   %#x%4u%4d",
9178			    lookup_type ? 'I' : 'O', port_num,
9179			    cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N',
9180			    G_PORTMAP(cls_hi), G_T6_PF(cls_lo),
9181			    cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1);
9182		}
9183
9184
9185		if (cls_lo & F_T6_REPLICATE) {
9186			struct fw_ldst_cmd ldst_cmd;
9187
9188			memset(&ldst_cmd, 0, sizeof(ldst_cmd));
9189			ldst_cmd.op_to_addrspace =
9190			    htobe32(V_FW_CMD_OP(FW_LDST_CMD) |
9191				F_FW_CMD_REQUEST | F_FW_CMD_READ |
9192				V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS));
9193			ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd));
9194			ldst_cmd.u.mps.rplc.fid_idx =
9195			    htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) |
9196				V_FW_LDST_CMD_IDX(i));
9197
9198			rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
9199			    "t6mps");
9200			if (rc)
9201				break;
9202			rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd,
9203			    sizeof(ldst_cmd), &ldst_cmd);
9204			end_synchronized_op(sc, 0);
9205
9206			if (rc != 0) {
9207				sbuf_printf(sb, "%72d", rc);
9208				rc = 0;
9209			} else {
9210				sbuf_printf(sb, " %08x %08x %08x %08x"
9211				    " %08x %08x %08x %08x",
9212				    be32toh(ldst_cmd.u.mps.rplc.rplc255_224),
9213				    be32toh(ldst_cmd.u.mps.rplc.rplc223_192),
9214				    be32toh(ldst_cmd.u.mps.rplc.rplc191_160),
9215				    be32toh(ldst_cmd.u.mps.rplc.rplc159_128),
9216				    be32toh(ldst_cmd.u.mps.rplc.rplc127_96),
9217				    be32toh(ldst_cmd.u.mps.rplc.rplc95_64),
9218				    be32toh(ldst_cmd.u.mps.rplc.rplc63_32),
9219				    be32toh(ldst_cmd.u.mps.rplc.rplc31_0));
9220			}
9221		} else
9222			sbuf_printf(sb, "%72s", "");
9223
9224		sbuf_printf(sb, "%4u%3u%3u%3u %#x",
9225		    G_T6_SRAM_PRIO0(cls_lo), G_T6_SRAM_PRIO1(cls_lo),
9226		    G_T6_SRAM_PRIO2(cls_lo), G_T6_SRAM_PRIO3(cls_lo),
9227		    (cls_lo >> S_T6_MULTILISTEN0) & 0xf);
9228	}
9229
9230	if (rc)
9231		(void) sbuf_finish(sb);
9232	else
9233		rc = sbuf_finish(sb);
9234	sbuf_delete(sb);
9235
9236	return (rc);
9237}
9238
9239static int
9240sysctl_path_mtus(SYSCTL_HANDLER_ARGS)
9241{
9242	struct adapter *sc = arg1;
9243	struct sbuf *sb;
9244	int rc;
9245	uint16_t mtus[NMTUS];
9246
9247	rc = sysctl_wire_old_buffer(req, 0);
9248	if (rc != 0)
9249		return (rc);
9250
9251	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
9252	if (sb == NULL)
9253		return (ENOMEM);
9254
9255	t4_read_mtu_tbl(sc, mtus, NULL);
9256
9257	sbuf_printf(sb, "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u",
9258	    mtus[0], mtus[1], mtus[2], mtus[3], mtus[4], mtus[5], mtus[6],
9259	    mtus[7], mtus[8], mtus[9], mtus[10], mtus[11], mtus[12], mtus[13],
9260	    mtus[14], mtus[15]);
9261
9262	rc = sbuf_finish(sb);
9263	sbuf_delete(sb);
9264
9265	return (rc);
9266}
9267
9268static int
9269sysctl_pm_stats(SYSCTL_HANDLER_ARGS)
9270{
9271	struct adapter *sc = arg1;
9272	struct sbuf *sb;
9273	int rc, i;
9274	uint32_t tx_cnt[MAX_PM_NSTATS], rx_cnt[MAX_PM_NSTATS];
9275	uint64_t tx_cyc[MAX_PM_NSTATS], rx_cyc[MAX_PM_NSTATS];
9276	static const char *tx_stats[MAX_PM_NSTATS] = {
9277		"Read:", "Write bypass:", "Write mem:", "Bypass + mem:",
9278		"Tx FIFO wait", NULL, "Tx latency"
9279	};
9280	static const char *rx_stats[MAX_PM_NSTATS] = {
9281		"Read:", "Write bypass:", "Write mem:", "Flush:",
9282		"Rx FIFO wait", NULL, "Rx latency"
9283	};
9284
9285	rc = sysctl_wire_old_buffer(req, 0);
9286	if (rc != 0)
9287		return (rc);
9288
9289	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
9290	if (sb == NULL)
9291		return (ENOMEM);
9292
9293	t4_pmtx_get_stats(sc, tx_cnt, tx_cyc);
9294	t4_pmrx_get_stats(sc, rx_cnt, rx_cyc);
9295
9296	sbuf_printf(sb, "                Tx pcmds             Tx bytes");
9297	for (i = 0; i < 4; i++) {
9298		sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
9299		    tx_cyc[i]);
9300	}
9301
9302	sbuf_printf(sb, "\n                Rx pcmds             Rx bytes");
9303	for (i = 0; i < 4; i++) {
9304		sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
9305		    rx_cyc[i]);
9306	}
9307
9308	if (chip_id(sc) > CHELSIO_T5) {
9309		sbuf_printf(sb,
9310		    "\n              Total wait      Total occupancy");
9311		sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
9312		    tx_cyc[i]);
9313		sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
9314		    rx_cyc[i]);
9315
9316		i += 2;
9317		MPASS(i < nitems(tx_stats));
9318
9319		sbuf_printf(sb,
9320		    "\n                   Reads           Total wait");
9321		sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
9322		    tx_cyc[i]);
9323		sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
9324		    rx_cyc[i]);
9325	}
9326
9327	rc = sbuf_finish(sb);
9328	sbuf_delete(sb);
9329
9330	return (rc);
9331}
9332
9333static int
9334sysctl_rdma_stats(SYSCTL_HANDLER_ARGS)
9335{
9336	struct adapter *sc = arg1;
9337	struct sbuf *sb;
9338	int rc;
9339	struct tp_rdma_stats stats;
9340
9341	rc = sysctl_wire_old_buffer(req, 0);
9342	if (rc != 0)
9343		return (rc);
9344
9345	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
9346	if (sb == NULL)
9347		return (ENOMEM);
9348
9349	mtx_lock(&sc->reg_lock);
9350	t4_tp_get_rdma_stats(sc, &stats, 0);
9351	mtx_unlock(&sc->reg_lock);
9352
9353	sbuf_printf(sb, "NoRQEModDefferals: %u\n", stats.rqe_dfr_mod);
9354	sbuf_printf(sb, "NoRQEPktDefferals: %u", stats.rqe_dfr_pkt);
9355
9356	rc = sbuf_finish(sb);
9357	sbuf_delete(sb);
9358
9359	return (rc);
9360}
9361
9362static int
9363sysctl_tcp_stats(SYSCTL_HANDLER_ARGS)
9364{
9365	struct adapter *sc = arg1;
9366	struct sbuf *sb;
9367	int rc;
9368	struct tp_tcp_stats v4, v6;
9369
9370	rc = sysctl_wire_old_buffer(req, 0);
9371	if (rc != 0)
9372		return (rc);
9373
9374	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
9375	if (sb == NULL)
9376		return (ENOMEM);
9377
9378	mtx_lock(&sc->reg_lock);
9379	t4_tp_get_tcp_stats(sc, &v4, &v6, 0);
9380	mtx_unlock(&sc->reg_lock);
9381
9382	sbuf_printf(sb,
9383	    "                                IP                 IPv6\n");
9384	sbuf_printf(sb, "OutRsts:      %20u %20u\n",
9385	    v4.tcp_out_rsts, v6.tcp_out_rsts);
9386	sbuf_printf(sb, "InSegs:       %20ju %20ju\n",
9387	    v4.tcp_in_segs, v6.tcp_in_segs);
9388	sbuf_printf(sb, "OutSegs:      %20ju %20ju\n",
9389	    v4.tcp_out_segs, v6.tcp_out_segs);
9390	sbuf_printf(sb, "RetransSegs:  %20ju %20ju",
9391	    v4.tcp_retrans_segs, v6.tcp_retrans_segs);
9392
9393	rc = sbuf_finish(sb);
9394	sbuf_delete(sb);
9395
9396	return (rc);
9397}
9398
9399static int
9400sysctl_tids(SYSCTL_HANDLER_ARGS)
9401{
9402	struct adapter *sc = arg1;
9403	struct sbuf *sb;
9404	int rc;
9405	struct tid_info *t = &sc->tids;
9406
9407	rc = sysctl_wire_old_buffer(req, 0);
9408	if (rc != 0)
9409		return (rc);
9410
9411	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
9412	if (sb == NULL)
9413		return (ENOMEM);
9414
9415	if (t->natids) {
9416		sbuf_printf(sb, "ATID range: 0-%u, in use: %u\n", t->natids - 1,
9417		    t->atids_in_use);
9418	}
9419
9420	if (t->nhpftids) {
9421		sbuf_printf(sb, "HPFTID range: %u-%u, in use: %u\n",
9422		    t->hpftid_base, t->hpftid_end, t->hpftids_in_use);
9423	}
9424
9425	if (t->ntids) {
9426		sbuf_printf(sb, "TID range: ");
9427		if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
9428			uint32_t b, hb;
9429
9430			if (chip_id(sc) <= CHELSIO_T5) {
9431				b = t4_read_reg(sc, A_LE_DB_SERVER_INDEX) / 4;
9432				hb = t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4;
9433			} else {
9434				b = t4_read_reg(sc, A_LE_DB_SRVR_START_INDEX);
9435				hb = t4_read_reg(sc, A_T6_LE_DB_HASH_TID_BASE);
9436			}
9437
9438			if (b)
9439				sbuf_printf(sb, "%u-%u, ", t->tid_base, b - 1);
9440			sbuf_printf(sb, "%u-%u", hb, t->ntids - 1);
9441		} else {
9442			sbuf_printf(sb, "%u-%u", t->tid_base, t->tid_base +
9443			    t->ntids - 1);
9444		}
9445		sbuf_printf(sb, ", in use: %u\n",
9446		    atomic_load_acq_int(&t->tids_in_use));
9447	}
9448
9449	if (t->nstids) {
9450		sbuf_printf(sb, "STID range: %u-%u, in use: %u\n", t->stid_base,
9451		    t->stid_base + t->nstids - 1, t->stids_in_use);
9452	}
9453
9454	if (t->nftids) {
9455		sbuf_printf(sb, "FTID range: %u-%u, in use: %u\n", t->ftid_base,
9456		    t->ftid_end, t->ftids_in_use);
9457	}
9458
9459	if (t->netids) {
9460		sbuf_printf(sb, "ETID range: %u-%u, in use: %u\n", t->etid_base,
9461		    t->etid_base + t->netids - 1, t->etids_in_use);
9462	}
9463
9464	sbuf_printf(sb, "HW TID usage: %u IP users, %u IPv6 users",
9465	    t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV4),
9466	    t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV6));
9467
9468	rc = sbuf_finish(sb);
9469	sbuf_delete(sb);
9470
9471	return (rc);
9472}
9473
9474static int
9475sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS)
9476{
9477	struct adapter *sc = arg1;
9478	struct sbuf *sb;
9479	int rc;
9480	struct tp_err_stats stats;
9481
9482	rc = sysctl_wire_old_buffer(req, 0);
9483	if (rc != 0)
9484		return (rc);
9485
9486	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
9487	if (sb == NULL)
9488		return (ENOMEM);
9489
9490	mtx_lock(&sc->reg_lock);
9491	t4_tp_get_err_stats(sc, &stats, 0);
9492	mtx_unlock(&sc->reg_lock);
9493
9494	if (sc->chip_params->nchan > 2) {
9495		sbuf_printf(sb, "                 channel 0  channel 1"
9496		    "  channel 2  channel 3\n");
9497		sbuf_printf(sb, "macInErrs:      %10u %10u %10u %10u\n",
9498		    stats.mac_in_errs[0], stats.mac_in_errs[1],
9499		    stats.mac_in_errs[2], stats.mac_in_errs[3]);
9500		sbuf_printf(sb, "hdrInErrs:      %10u %10u %10u %10u\n",
9501		    stats.hdr_in_errs[0], stats.hdr_in_errs[1],
9502		    stats.hdr_in_errs[2], stats.hdr_in_errs[3]);
9503		sbuf_printf(sb, "tcpInErrs:      %10u %10u %10u %10u\n",
9504		    stats.tcp_in_errs[0], stats.tcp_in_errs[1],
9505		    stats.tcp_in_errs[2], stats.tcp_in_errs[3]);
9506		sbuf_printf(sb, "tcp6InErrs:     %10u %10u %10u %10u\n",
9507		    stats.tcp6_in_errs[0], stats.tcp6_in_errs[1],
9508		    stats.tcp6_in_errs[2], stats.tcp6_in_errs[3]);
9509		sbuf_printf(sb, "tnlCongDrops:   %10u %10u %10u %10u\n",
9510		    stats.tnl_cong_drops[0], stats.tnl_cong_drops[1],
9511		    stats.tnl_cong_drops[2], stats.tnl_cong_drops[3]);
9512		sbuf_printf(sb, "tnlTxDrops:     %10u %10u %10u %10u\n",
9513		    stats.tnl_tx_drops[0], stats.tnl_tx_drops[1],
9514		    stats.tnl_tx_drops[2], stats.tnl_tx_drops[3]);
9515		sbuf_printf(sb, "ofldVlanDrops:  %10u %10u %10u %10u\n",
9516		    stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1],
9517		    stats.ofld_vlan_drops[2], stats.ofld_vlan_drops[3]);
9518		sbuf_printf(sb, "ofldChanDrops:  %10u %10u %10u %10u\n\n",
9519		    stats.ofld_chan_drops[0], stats.ofld_chan_drops[1],
9520		    stats.ofld_chan_drops[2], stats.ofld_chan_drops[3]);
9521	} else {
9522		sbuf_printf(sb, "                 channel 0  channel 1\n");
9523		sbuf_printf(sb, "macInErrs:      %10u %10u\n",
9524		    stats.mac_in_errs[0], stats.mac_in_errs[1]);
9525		sbuf_printf(sb, "hdrInErrs:      %10u %10u\n",
9526		    stats.hdr_in_errs[0], stats.hdr_in_errs[1]);
9527		sbuf_printf(sb, "tcpInErrs:      %10u %10u\n",
9528		    stats.tcp_in_errs[0], stats.tcp_in_errs[1]);
9529		sbuf_printf(sb, "tcp6InErrs:     %10u %10u\n",
9530		    stats.tcp6_in_errs[0], stats.tcp6_in_errs[1]);
9531		sbuf_printf(sb, "tnlCongDrops:   %10u %10u\n",
9532		    stats.tnl_cong_drops[0], stats.tnl_cong_drops[1]);
9533		sbuf_printf(sb, "tnlTxDrops:     %10u %10u\n",
9534		    stats.tnl_tx_drops[0], stats.tnl_tx_drops[1]);
9535		sbuf_printf(sb, "ofldVlanDrops:  %10u %10u\n",
9536		    stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1]);
9537		sbuf_printf(sb, "ofldChanDrops:  %10u %10u\n\n",
9538		    stats.ofld_chan_drops[0], stats.ofld_chan_drops[1]);
9539	}
9540
9541	sbuf_printf(sb, "ofldNoNeigh:    %u\nofldCongDefer:  %u",
9542	    stats.ofld_no_neigh, stats.ofld_cong_defer);
9543
9544	rc = sbuf_finish(sb);
9545	sbuf_delete(sb);
9546
9547	return (rc);
9548}
9549
9550static int
9551sysctl_tnl_stats(SYSCTL_HANDLER_ARGS)
9552{
9553	struct adapter *sc = arg1;
9554	struct sbuf *sb;
9555	int rc;
9556	struct tp_tnl_stats stats;
9557
9558	rc = sysctl_wire_old_buffer(req, 0);
9559	if (rc != 0)
9560		return(rc);
9561
9562	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
9563	if (sb == NULL)
9564		return (ENOMEM);
9565
9566	mtx_lock(&sc->reg_lock);
9567	t4_tp_get_tnl_stats(sc, &stats, 1);
9568	mtx_unlock(&sc->reg_lock);
9569
9570	if (sc->chip_params->nchan > 2) {
9571		sbuf_printf(sb, "           channel 0  channel 1"
9572		    "  channel 2  channel 3\n");
9573		sbuf_printf(sb, "OutPkts:  %10u %10u %10u %10u\n",
9574		    stats.out_pkt[0], stats.out_pkt[1],
9575		    stats.out_pkt[2], stats.out_pkt[3]);
9576		sbuf_printf(sb, "InPkts:   %10u %10u %10u %10u",
9577		    stats.in_pkt[0], stats.in_pkt[1],
9578		    stats.in_pkt[2], stats.in_pkt[3]);
9579	} else {
9580		sbuf_printf(sb, "           channel 0  channel 1\n");
9581		sbuf_printf(sb, "OutPkts:  %10u %10u\n",
9582		    stats.out_pkt[0], stats.out_pkt[1]);
9583		sbuf_printf(sb, "InPkts:   %10u %10u",
9584		    stats.in_pkt[0], stats.in_pkt[1]);
9585	}
9586
9587	rc = sbuf_finish(sb);
9588	sbuf_delete(sb);
9589
9590	return (rc);
9591}
9592
9593static int
9594sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS)
9595{
9596	struct adapter *sc = arg1;
9597	struct tp_params *tpp = &sc->params.tp;
9598	u_int mask;
9599	int rc;
9600
9601	mask = tpp->la_mask >> 16;
9602	rc = sysctl_handle_int(oidp, &mask, 0, req);
9603	if (rc != 0 || req->newptr == NULL)
9604		return (rc);
9605	if (mask > 0xffff)
9606		return (EINVAL);
9607	tpp->la_mask = mask << 16;
9608	t4_set_reg_field(sc, A_TP_DBG_LA_CONFIG, 0xffff0000U, tpp->la_mask);
9609
9610	return (0);
9611}
9612
9613struct field_desc {
9614	const char *name;
9615	u_int start;
9616	u_int width;
9617};
9618
9619static void
9620field_desc_show(struct sbuf *sb, uint64_t v, const struct field_desc *f)
9621{
9622	char buf[32];
9623	int line_size = 0;
9624
9625	while (f->name) {
9626		uint64_t mask = (1ULL << f->width) - 1;
9627		int len = snprintf(buf, sizeof(buf), "%s: %ju", f->name,
9628		    ((uintmax_t)v >> f->start) & mask);
9629
9630		if (line_size + len >= 79) {
9631			line_size = 8;
9632			sbuf_printf(sb, "\n        ");
9633		}
9634		sbuf_printf(sb, "%s ", buf);
9635		line_size += len + 1;
9636		f++;
9637	}
9638	sbuf_printf(sb, "\n");
9639}
9640
9641static const struct field_desc tp_la0[] = {
9642	{ "RcfOpCodeOut", 60, 4 },
9643	{ "State", 56, 4 },
9644	{ "WcfState", 52, 4 },
9645	{ "RcfOpcSrcOut", 50, 2 },
9646	{ "CRxError", 49, 1 },
9647	{ "ERxError", 48, 1 },
9648	{ "SanityFailed", 47, 1 },
9649	{ "SpuriousMsg", 46, 1 },
9650	{ "FlushInputMsg", 45, 1 },
9651	{ "FlushInputCpl", 44, 1 },
9652	{ "RssUpBit", 43, 1 },
9653	{ "RssFilterHit", 42, 1 },
9654	{ "Tid", 32, 10 },
9655	{ "InitTcb", 31, 1 },
9656	{ "LineNumber", 24, 7 },
9657	{ "Emsg", 23, 1 },
9658	{ "EdataOut", 22, 1 },
9659	{ "Cmsg", 21, 1 },
9660	{ "CdataOut", 20, 1 },
9661	{ "EreadPdu", 19, 1 },
9662	{ "CreadPdu", 18, 1 },
9663	{ "TunnelPkt", 17, 1 },
9664	{ "RcfPeerFin", 16, 1 },
9665	{ "RcfReasonOut", 12, 4 },
9666	{ "TxCchannel", 10, 2 },
9667	{ "RcfTxChannel", 8, 2 },
9668	{ "RxEchannel", 6, 2 },
9669	{ "RcfRxChannel", 5, 1 },
9670	{ "RcfDataOutSrdy", 4, 1 },
9671	{ "RxDvld", 3, 1 },
9672	{ "RxOoDvld", 2, 1 },
9673	{ "RxCongestion", 1, 1 },
9674	{ "TxCongestion", 0, 1 },
9675	{ NULL }
9676};
9677
9678static const struct field_desc tp_la1[] = {
9679	{ "CplCmdIn", 56, 8 },
9680	{ "CplCmdOut", 48, 8 },
9681	{ "ESynOut", 47, 1 },
9682	{ "EAckOut", 46, 1 },
9683	{ "EFinOut", 45, 1 },
9684	{ "ERstOut", 44, 1 },
9685	{ "SynIn", 43, 1 },
9686	{ "AckIn", 42, 1 },
9687	{ "FinIn", 41, 1 },
9688	{ "RstIn", 40, 1 },
9689	{ "DataIn", 39, 1 },
9690	{ "DataInVld", 38, 1 },
9691	{ "PadIn", 37, 1 },
9692	{ "RxBufEmpty", 36, 1 },
9693	{ "RxDdp", 35, 1 },
9694	{ "RxFbCongestion", 34, 1 },
9695	{ "TxFbCongestion", 33, 1 },
9696	{ "TxPktSumSrdy", 32, 1 },
9697	{ "RcfUlpType", 28, 4 },
9698	{ "Eread", 27, 1 },
9699	{ "Ebypass", 26, 1 },
9700	{ "Esave", 25, 1 },
9701	{ "Static0", 24, 1 },
9702	{ "Cread", 23, 1 },
9703	{ "Cbypass", 22, 1 },
9704	{ "Csave", 21, 1 },
9705	{ "CPktOut", 20, 1 },
9706	{ "RxPagePoolFull", 18, 2 },
9707	{ "RxLpbkPkt", 17, 1 },
9708	{ "TxLpbkPkt", 16, 1 },
9709	{ "RxVfValid", 15, 1 },
9710	{ "SynLearned", 14, 1 },
9711	{ "SetDelEntry", 13, 1 },
9712	{ "SetInvEntry", 12, 1 },
9713	{ "CpcmdDvld", 11, 1 },
9714	{ "CpcmdSave", 10, 1 },
9715	{ "RxPstructsFull", 8, 2 },
9716	{ "EpcmdDvld", 7, 1 },
9717	{ "EpcmdFlush", 6, 1 },
9718	{ "EpcmdTrimPrefix", 5, 1 },
9719	{ "EpcmdTrimPostfix", 4, 1 },
9720	{ "ERssIp4Pkt", 3, 1 },
9721	{ "ERssIp6Pkt", 2, 1 },
9722	{ "ERssTcpUdpPkt", 1, 1 },
9723	{ "ERssFceFipPkt", 0, 1 },
9724	{ NULL }
9725};
9726
9727static const struct field_desc tp_la2[] = {
9728	{ "CplCmdIn", 56, 8 },
9729	{ "MpsVfVld", 55, 1 },
9730	{ "MpsPf", 52, 3 },
9731	{ "MpsVf", 44, 8 },
9732	{ "SynIn", 43, 1 },
9733	{ "AckIn", 42, 1 },
9734	{ "FinIn", 41, 1 },
9735	{ "RstIn", 40, 1 },
9736	{ "DataIn", 39, 1 },
9737	{ "DataInVld", 38, 1 },
9738	{ "PadIn", 37, 1 },
9739	{ "RxBufEmpty", 36, 1 },
9740	{ "RxDdp", 35, 1 },
9741	{ "RxFbCongestion", 34, 1 },
9742	{ "TxFbCongestion", 33, 1 },
9743	{ "TxPktSumSrdy", 32, 1 },
9744	{ "RcfUlpType", 28, 4 },
9745	{ "Eread", 27, 1 },
9746	{ "Ebypass", 26, 1 },
9747	{ "Esave", 25, 1 },
9748	{ "Static0", 24, 1 },
9749	{ "Cread", 23, 1 },
9750	{ "Cbypass", 22, 1 },
9751	{ "Csave", 21, 1 },
9752	{ "CPktOut", 20, 1 },
9753	{ "RxPagePoolFull", 18, 2 },
9754	{ "RxLpbkPkt", 17, 1 },
9755	{ "TxLpbkPkt", 16, 1 },
9756	{ "RxVfValid", 15, 1 },
9757	{ "SynLearned", 14, 1 },
9758	{ "SetDelEntry", 13, 1 },
9759	{ "SetInvEntry", 12, 1 },
9760	{ "CpcmdDvld", 11, 1 },
9761	{ "CpcmdSave", 10, 1 },
9762	{ "RxPstructsFull", 8, 2 },
9763	{ "EpcmdDvld", 7, 1 },
9764	{ "EpcmdFlush", 6, 1 },
9765	{ "EpcmdTrimPrefix", 5, 1 },
9766	{ "EpcmdTrimPostfix", 4, 1 },
9767	{ "ERssIp4Pkt", 3, 1 },
9768	{ "ERssIp6Pkt", 2, 1 },
9769	{ "ERssTcpUdpPkt", 1, 1 },
9770	{ "ERssFceFipPkt", 0, 1 },
9771	{ NULL }
9772};
9773
9774static void
9775tp_la_show(struct sbuf *sb, uint64_t *p, int idx)
9776{
9777
9778	field_desc_show(sb, *p, tp_la0);
9779}
9780
9781static void
9782tp_la_show2(struct sbuf *sb, uint64_t *p, int idx)
9783{
9784
9785	if (idx)
9786		sbuf_printf(sb, "\n");
9787	field_desc_show(sb, p[0], tp_la0);
9788	if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL)
9789		field_desc_show(sb, p[1], tp_la0);
9790}
9791
9792static void
9793tp_la_show3(struct sbuf *sb, uint64_t *p, int idx)
9794{
9795
9796	if (idx)
9797		sbuf_printf(sb, "\n");
9798	field_desc_show(sb, p[0], tp_la0);
9799	if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL)
9800		field_desc_show(sb, p[1], (p[0] & (1 << 17)) ? tp_la2 : tp_la1);
9801}
9802
9803static int
9804sysctl_tp_la(SYSCTL_HANDLER_ARGS)
9805{
9806	struct adapter *sc = arg1;
9807	struct sbuf *sb;
9808	uint64_t *buf, *p;
9809	int rc;
9810	u_int i, inc;
9811	void (*show_func)(struct sbuf *, uint64_t *, int);
9812
9813	rc = sysctl_wire_old_buffer(req, 0);
9814	if (rc != 0)
9815		return (rc);
9816
9817	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
9818	if (sb == NULL)
9819		return (ENOMEM);
9820
9821	buf = malloc(TPLA_SIZE * sizeof(uint64_t), M_CXGBE, M_ZERO | M_WAITOK);
9822
9823	t4_tp_read_la(sc, buf, NULL);
9824	p = buf;
9825
9826	switch (G_DBGLAMODE(t4_read_reg(sc, A_TP_DBG_LA_CONFIG))) {
9827	case 2:
9828		inc = 2;
9829		show_func = tp_la_show2;
9830		break;
9831	case 3:
9832		inc = 2;
9833		show_func = tp_la_show3;
9834		break;
9835	default:
9836		inc = 1;
9837		show_func = tp_la_show;
9838	}
9839
9840	for (i = 0; i < TPLA_SIZE / inc; i++, p += inc)
9841		(*show_func)(sb, p, i);
9842
9843	rc = sbuf_finish(sb);
9844	sbuf_delete(sb);
9845	free(buf, M_CXGBE);
9846	return (rc);
9847}
9848
9849static int
9850sysctl_tx_rate(SYSCTL_HANDLER_ARGS)
9851{
9852	struct adapter *sc = arg1;
9853	struct sbuf *sb;
9854	int rc;
9855	u64 nrate[MAX_NCHAN], orate[MAX_NCHAN];
9856
9857	rc = sysctl_wire_old_buffer(req, 0);
9858	if (rc != 0)
9859		return (rc);
9860
9861	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
9862	if (sb == NULL)
9863		return (ENOMEM);
9864
9865	t4_get_chan_txrate(sc, nrate, orate);
9866
9867	if (sc->chip_params->nchan > 2) {
9868		sbuf_printf(sb, "              channel 0   channel 1"
9869		    "   channel 2   channel 3\n");
9870		sbuf_printf(sb, "NIC B/s:     %10ju  %10ju  %10ju  %10ju\n",
9871		    nrate[0], nrate[1], nrate[2], nrate[3]);
9872		sbuf_printf(sb, "Offload B/s: %10ju  %10ju  %10ju  %10ju",
9873		    orate[0], orate[1], orate[2], orate[3]);
9874	} else {
9875		sbuf_printf(sb, "              channel 0   channel 1\n");
9876		sbuf_printf(sb, "NIC B/s:     %10ju  %10ju\n",
9877		    nrate[0], nrate[1]);
9878		sbuf_printf(sb, "Offload B/s: %10ju  %10ju",
9879		    orate[0], orate[1]);
9880	}
9881
9882	rc = sbuf_finish(sb);
9883	sbuf_delete(sb);
9884
9885	return (rc);
9886}
9887
9888static int
9889sysctl_ulprx_la(SYSCTL_HANDLER_ARGS)
9890{
9891	struct adapter *sc = arg1;
9892	struct sbuf *sb;
9893	uint32_t *buf, *p;
9894	int rc, i;
9895
9896	rc = sysctl_wire_old_buffer(req, 0);
9897	if (rc != 0)
9898		return (rc);
9899
9900	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
9901	if (sb == NULL)
9902		return (ENOMEM);
9903
9904	buf = malloc(ULPRX_LA_SIZE * 8 * sizeof(uint32_t), M_CXGBE,
9905	    M_ZERO | M_WAITOK);
9906
9907	t4_ulprx_read_la(sc, buf);
9908	p = buf;
9909
9910	sbuf_printf(sb, "      Pcmd        Type   Message"
9911	    "                Data");
9912	for (i = 0; i < ULPRX_LA_SIZE; i++, p += 8) {
9913		sbuf_printf(sb, "\n%08x%08x  %4x  %08x  %08x%08x%08x%08x",
9914		    p[1], p[0], p[2], p[3], p[7], p[6], p[5], p[4]);
9915	}
9916
9917	rc = sbuf_finish(sb);
9918	sbuf_delete(sb);
9919	free(buf, M_CXGBE);
9920	return (rc);
9921}
9922
9923static int
9924sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS)
9925{
9926	struct adapter *sc = arg1;
9927	struct sbuf *sb;
9928	int rc, v;
9929
9930	MPASS(chip_id(sc) >= CHELSIO_T5);
9931
9932	rc = sysctl_wire_old_buffer(req, 0);
9933	if (rc != 0)
9934		return (rc);
9935
9936	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
9937	if (sb == NULL)
9938		return (ENOMEM);
9939
9940	v = t4_read_reg(sc, A_SGE_STAT_CFG);
9941	if (G_STATSOURCE_T5(v) == 7) {
9942		int mode;
9943
9944		mode = is_t5(sc) ? G_STATMODE(v) : G_T6_STATMODE(v);
9945		if (mode == 0) {
9946			sbuf_printf(sb, "total %d, incomplete %d",
9947			    t4_read_reg(sc, A_SGE_STAT_TOTAL),
9948			    t4_read_reg(sc, A_SGE_STAT_MATCH));
9949		} else if (mode == 1) {
9950			sbuf_printf(sb, "total %d, data overflow %d",
9951			    t4_read_reg(sc, A_SGE_STAT_TOTAL),
9952			    t4_read_reg(sc, A_SGE_STAT_MATCH));
9953		} else {
9954			sbuf_printf(sb, "unknown mode %d", mode);
9955		}
9956	}
9957	rc = sbuf_finish(sb);
9958	sbuf_delete(sb);
9959
9960	return (rc);
9961}
9962
9963static int
9964sysctl_cpus(SYSCTL_HANDLER_ARGS)
9965{
9966	struct adapter *sc = arg1;
9967	enum cpu_sets op = arg2;
9968	cpuset_t cpuset;
9969	struct sbuf *sb;
9970	int i, rc;
9971
9972	MPASS(op == LOCAL_CPUS || op == INTR_CPUS);
9973
9974	CPU_ZERO(&cpuset);
9975	rc = bus_get_cpus(sc->dev, op, sizeof(cpuset), &cpuset);
9976	if (rc != 0)
9977		return (rc);
9978
9979	rc = sysctl_wire_old_buffer(req, 0);
9980	if (rc != 0)
9981		return (rc);
9982
9983	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
9984	if (sb == NULL)
9985		return (ENOMEM);
9986
9987	CPU_FOREACH(i)
9988		sbuf_printf(sb, "%d ", i);
9989	rc = sbuf_finish(sb);
9990	sbuf_delete(sb);
9991
9992	return (rc);
9993}
9994
9995#ifdef TCP_OFFLOAD
9996static int
9997sysctl_tls(SYSCTL_HANDLER_ARGS)
9998{
9999	struct adapter *sc = arg1;
10000	int i, j, v, rc;
10001	struct vi_info *vi;
10002
10003	v = sc->tt.tls;
10004	rc = sysctl_handle_int(oidp, &v, 0, req);
10005	if (rc != 0 || req->newptr == NULL)
10006		return (rc);
10007
10008	if (v != 0 && !(sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS))
10009		return (ENOTSUP);
10010
10011	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4stls");
10012	if (rc)
10013		return (rc);
10014	sc->tt.tls = !!v;
10015	for_each_port(sc, i) {
10016		for_each_vi(sc->port[i], j, vi) {
10017			if (vi->flags & VI_INIT_DONE)
10018				t4_update_fl_bufsize(vi->ifp);
10019		}
10020	}
10021	end_synchronized_op(sc, 0);
10022
10023	return (0);
10024
10025}
10026
10027static int
10028sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS)
10029{
10030	struct adapter *sc = arg1;
10031	int *old_ports, *new_ports;
10032	int i, new_count, rc;
10033
10034	if (req->newptr == NULL && req->oldptr == NULL)
10035		return (SYSCTL_OUT(req, NULL, imax(sc->tt.num_tls_rx_ports, 1) *
10036		    sizeof(sc->tt.tls_rx_ports[0])));
10037
10038	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4tlsrx");
10039	if (rc)
10040		return (rc);
10041
10042	if (sc->tt.num_tls_rx_ports == 0) {
10043		i = -1;
10044		rc = SYSCTL_OUT(req, &i, sizeof(i));
10045	} else
10046		rc = SYSCTL_OUT(req, sc->tt.tls_rx_ports,
10047		    sc->tt.num_tls_rx_ports * sizeof(sc->tt.tls_rx_ports[0]));
10048	if (rc == 0 && req->newptr != NULL) {
10049		new_count = req->newlen / sizeof(new_ports[0]);
10050		new_ports = malloc(new_count * sizeof(new_ports[0]), M_CXGBE,
10051		    M_WAITOK);
10052		rc = SYSCTL_IN(req, new_ports, new_count *
10053		    sizeof(new_ports[0]));
10054		if (rc)
10055			goto err;
10056
10057		/* Allow setting to a single '-1' to clear the list. */
10058		if (new_count == 1 && new_ports[0] == -1) {
10059			ADAPTER_LOCK(sc);
10060			old_ports = sc->tt.tls_rx_ports;
10061			sc->tt.tls_rx_ports = NULL;
10062			sc->tt.num_tls_rx_ports = 0;
10063			ADAPTER_UNLOCK(sc);
10064			free(old_ports, M_CXGBE);
10065		} else {
10066			for (i = 0; i < new_count; i++) {
10067				if (new_ports[i] < 1 ||
10068				    new_ports[i] > IPPORT_MAX) {
10069					rc = EINVAL;
10070					goto err;
10071				}
10072			}
10073
10074			ADAPTER_LOCK(sc);
10075			old_ports = sc->tt.tls_rx_ports;
10076			sc->tt.tls_rx_ports = new_ports;
10077			sc->tt.num_tls_rx_ports = new_count;
10078			ADAPTER_UNLOCK(sc);
10079			free(old_ports, M_CXGBE);
10080			new_ports = NULL;
10081		}
10082	err:
10083		free(new_ports, M_CXGBE);
10084	}
10085	end_synchronized_op(sc, 0);
10086	return (rc);
10087}
10088
10089static int
10090sysctl_tls_rx_timeout(SYSCTL_HANDLER_ARGS)
10091{
10092	struct adapter *sc = arg1;
10093	int v, rc;
10094
10095	v = sc->tt.tls_rx_timeout;
10096	rc = sysctl_handle_int(oidp, &v, 0, req);
10097	if (rc != 0 || req->newptr == NULL)
10098		return (rc);
10099
10100	if (v < 0)
10101		return (EINVAL);
10102
10103	if (v != 0 && !(sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS))
10104		return (ENOTSUP);
10105
10106	sc->tt.tls_rx_timeout = v;
10107
10108	return (0);
10109
10110}
10111
10112static void
10113unit_conv(char *buf, size_t len, u_int val, u_int factor)
10114{
10115	u_int rem = val % factor;
10116
10117	if (rem == 0)
10118		snprintf(buf, len, "%u", val / factor);
10119	else {
10120		while (rem % 10 == 0)
10121			rem /= 10;
10122		snprintf(buf, len, "%u.%u", val / factor, rem);
10123	}
10124}
10125
10126static int
10127sysctl_tp_tick(SYSCTL_HANDLER_ARGS)
10128{
10129	struct adapter *sc = arg1;
10130	char buf[16];
10131	u_int res, re;
10132	u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
10133
10134	res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION);
10135	switch (arg2) {
10136	case 0:
10137		/* timer_tick */
10138		re = G_TIMERRESOLUTION(res);
10139		break;
10140	case 1:
10141		/* TCP timestamp tick */
10142		re = G_TIMESTAMPRESOLUTION(res);
10143		break;
10144	case 2:
10145		/* DACK tick */
10146		re = G_DELAYEDACKRESOLUTION(res);
10147		break;
10148	default:
10149		return (EDOOFUS);
10150	}
10151
10152	unit_conv(buf, sizeof(buf), (cclk_ps << re), 1000000);
10153
10154	return (sysctl_handle_string(oidp, buf, sizeof(buf), req));
10155}
10156
10157static int
10158sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS)
10159{
10160	struct adapter *sc = arg1;
10161	u_int res, dack_re, v;
10162	u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
10163
10164	res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION);
10165	dack_re = G_DELAYEDACKRESOLUTION(res);
10166	v = ((cclk_ps << dack_re) / 1000000) * t4_read_reg(sc, A_TP_DACK_TIMER);
10167
10168	return (sysctl_handle_int(oidp, &v, 0, req));
10169}
10170
10171static int
10172sysctl_tp_timer(SYSCTL_HANDLER_ARGS)
10173{
10174	struct adapter *sc = arg1;
10175	int reg = arg2;
10176	u_int tre;
10177	u_long tp_tick_us, v;
10178	u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
10179
10180	MPASS(reg == A_TP_RXT_MIN || reg == A_TP_RXT_MAX ||
10181	    reg == A_TP_PERS_MIN  || reg == A_TP_PERS_MAX ||
10182	    reg == A_TP_KEEP_IDLE || reg == A_TP_KEEP_INTVL ||
10183	    reg == A_TP_INIT_SRTT || reg == A_TP_FINWAIT2_TIMER);
10184
10185	tre = G_TIMERRESOLUTION(t4_read_reg(sc, A_TP_TIMER_RESOLUTION));
10186	tp_tick_us = (cclk_ps << tre) / 1000000;
10187
10188	if (reg == A_TP_INIT_SRTT)
10189		v = tp_tick_us * G_INITSRTT(t4_read_reg(sc, reg));
10190	else
10191		v = tp_tick_us * t4_read_reg(sc, reg);
10192
10193	return (sysctl_handle_long(oidp, &v, 0, req));
10194}
10195
10196/*
10197 * All fields in TP_SHIFT_CNT are 4b and the starting location of the field is
10198 * passed to this function.
10199 */
10200static int
10201sysctl_tp_shift_cnt(SYSCTL_HANDLER_ARGS)
10202{
10203	struct adapter *sc = arg1;
10204	int idx = arg2;
10205	u_int v;
10206
10207	MPASS(idx >= 0 && idx <= 24);
10208
10209	v = (t4_read_reg(sc, A_TP_SHIFT_CNT) >> idx) & 0xf;
10210
10211	return (sysctl_handle_int(oidp, &v, 0, req));
10212}
10213
10214static int
10215sysctl_tp_backoff(SYSCTL_HANDLER_ARGS)
10216{
10217	struct adapter *sc = arg1;
10218	int idx = arg2;
10219	u_int shift, v, r;
10220
10221	MPASS(idx >= 0 && idx < 16);
10222
10223	r = A_TP_TCP_BACKOFF_REG0 + (idx & ~3);
10224	shift = (idx & 3) << 3;
10225	v = (t4_read_reg(sc, r) >> shift) & M_TIMERBACKOFFINDEX0;
10226
10227	return (sysctl_handle_int(oidp, &v, 0, req));
10228}
10229
10230static int
10231sysctl_holdoff_tmr_idx_ofld(SYSCTL_HANDLER_ARGS)
10232{
10233	struct vi_info *vi = arg1;
10234	struct adapter *sc = vi->adapter;
10235	int idx, rc, i;
10236	struct sge_ofld_rxq *ofld_rxq;
10237	uint8_t v;
10238
10239	idx = vi->ofld_tmr_idx;
10240
10241	rc = sysctl_handle_int(oidp, &idx, 0, req);
10242	if (rc != 0 || req->newptr == NULL)
10243		return (rc);
10244
10245	if (idx < 0 || idx >= SGE_NTIMERS)
10246		return (EINVAL);
10247
10248	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
10249	    "t4otmr");
10250	if (rc)
10251		return (rc);
10252
10253	v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->ofld_pktc_idx != -1);
10254	for_each_ofld_rxq(vi, i, ofld_rxq) {
10255#ifdef atomic_store_rel_8
10256		atomic_store_rel_8(&ofld_rxq->iq.intr_params, v);
10257#else
10258		ofld_rxq->iq.intr_params = v;
10259#endif
10260	}
10261	vi->ofld_tmr_idx = idx;
10262
10263	end_synchronized_op(sc, LOCK_HELD);
10264	return (0);
10265}
10266
10267static int
10268sysctl_holdoff_pktc_idx_ofld(SYSCTL_HANDLER_ARGS)
10269{
10270	struct vi_info *vi = arg1;
10271	struct adapter *sc = vi->adapter;
10272	int idx, rc;
10273
10274	idx = vi->ofld_pktc_idx;
10275
10276	rc = sysctl_handle_int(oidp, &idx, 0, req);
10277	if (rc != 0 || req->newptr == NULL)
10278		return (rc);
10279
10280	if (idx < -1 || idx >= SGE_NCOUNTERS)
10281		return (EINVAL);
10282
10283	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
10284	    "t4opktc");
10285	if (rc)
10286		return (rc);
10287
10288	if (vi->flags & VI_INIT_DONE)
10289		rc = EBUSY; /* cannot be changed once the queues are created */
10290	else
10291		vi->ofld_pktc_idx = idx;
10292
10293	end_synchronized_op(sc, LOCK_HELD);
10294	return (rc);
10295}
10296#endif
10297
10298static int
10299get_sge_context(struct adapter *sc, struct t4_sge_context *cntxt)
10300{
10301	int rc;
10302
10303	if (cntxt->cid > M_CTXTQID)
10304		return (EINVAL);
10305
10306	if (cntxt->mem_id != CTXT_EGRESS && cntxt->mem_id != CTXT_INGRESS &&
10307	    cntxt->mem_id != CTXT_FLM && cntxt->mem_id != CTXT_CNM)
10308		return (EINVAL);
10309
10310	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ctxt");
10311	if (rc)
10312		return (rc);
10313
10314	if (sc->flags & FW_OK) {
10315		rc = -t4_sge_ctxt_rd(sc, sc->mbox, cntxt->cid, cntxt->mem_id,
10316		    &cntxt->data[0]);
10317		if (rc == 0)
10318			goto done;
10319	}
10320
10321	/*
10322	 * Read via firmware failed or wasn't even attempted.  Read directly via
10323	 * the backdoor.
10324	 */
10325	rc = -t4_sge_ctxt_rd_bd(sc, cntxt->cid, cntxt->mem_id, &cntxt->data[0]);
10326done:
10327	end_synchronized_op(sc, 0);
10328	return (rc);
10329}
10330
10331static int
10332load_fw(struct adapter *sc, struct t4_data *fw)
10333{
10334	int rc;
10335	uint8_t *fw_data;
10336
10337	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldfw");
10338	if (rc)
10339		return (rc);
10340
10341	/*
10342	 * The firmware, with the sole exception of the memory parity error
10343	 * handler, runs from memory and not flash.  It is almost always safe to
10344	 * install a new firmware on a running system.  Just set bit 1 in
10345	 * hw.cxgbe.dflags or dev.<nexus>.<n>.dflags first.
10346	 */
10347	if (sc->flags & FULL_INIT_DONE &&
10348	    (sc->debug_flags & DF_LOAD_FW_ANYTIME) == 0) {
10349		rc = EBUSY;
10350		goto done;
10351	}
10352
10353	fw_data = malloc(fw->len, M_CXGBE, M_WAITOK);
10354
10355	rc = copyin(fw->data, fw_data, fw->len);
10356	if (rc == 0)
10357		rc = -t4_load_fw(sc, fw_data, fw->len);
10358
10359	free(fw_data, M_CXGBE);
10360done:
10361	end_synchronized_op(sc, 0);
10362	return (rc);
10363}
10364
10365static int
10366load_cfg(struct adapter *sc, struct t4_data *cfg)
10367{
10368	int rc;
10369	uint8_t *cfg_data = NULL;
10370
10371	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldcf");
10372	if (rc)
10373		return (rc);
10374
10375	if (cfg->len == 0) {
10376		/* clear */
10377		rc = -t4_load_cfg(sc, NULL, 0);
10378		goto done;
10379	}
10380
10381	cfg_data = malloc(cfg->len, M_CXGBE, M_WAITOK);
10382
10383	rc = copyin(cfg->data, cfg_data, cfg->len);
10384	if (rc == 0)
10385		rc = -t4_load_cfg(sc, cfg_data, cfg->len);
10386
10387	free(cfg_data, M_CXGBE);
10388done:
10389	end_synchronized_op(sc, 0);
10390	return (rc);
10391}
10392
10393static int
10394load_boot(struct adapter *sc, struct t4_bootrom *br)
10395{
10396	int rc;
10397	uint8_t *br_data = NULL;
10398	u_int offset;
10399
10400	if (br->len > 1024 * 1024)
10401		return (EFBIG);
10402
10403	if (br->pf_offset == 0) {
10404		/* pfidx */
10405		if (br->pfidx_addr > 7)
10406			return (EINVAL);
10407		offset = G_OFFSET(t4_read_reg(sc, PF_REG(br->pfidx_addr,
10408		    A_PCIE_PF_EXPROM_OFST)));
10409	} else if (br->pf_offset == 1) {
10410		/* offset */
10411		offset = G_OFFSET(br->pfidx_addr);
10412	} else {
10413		return (EINVAL);
10414	}
10415
10416	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldbr");
10417	if (rc)
10418		return (rc);
10419
10420	if (br->len == 0) {
10421		/* clear */
10422		rc = -t4_load_boot(sc, NULL, offset, 0);
10423		goto done;
10424	}
10425
10426	br_data = malloc(br->len, M_CXGBE, M_WAITOK);
10427
10428	rc = copyin(br->data, br_data, br->len);
10429	if (rc == 0)
10430		rc = -t4_load_boot(sc, br_data, offset, br->len);
10431
10432	free(br_data, M_CXGBE);
10433done:
10434	end_synchronized_op(sc, 0);
10435	return (rc);
10436}
10437
10438static int
10439load_bootcfg(struct adapter *sc, struct t4_data *bc)
10440{
10441	int rc;
10442	uint8_t *bc_data = NULL;
10443
10444	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldcf");
10445	if (rc)
10446		return (rc);
10447
10448	if (bc->len == 0) {
10449		/* clear */
10450		rc = -t4_load_bootcfg(sc, NULL, 0);
10451		goto done;
10452	}
10453
10454	bc_data = malloc(bc->len, M_CXGBE, M_WAITOK);
10455
10456	rc = copyin(bc->data, bc_data, bc->len);
10457	if (rc == 0)
10458		rc = -t4_load_bootcfg(sc, bc_data, bc->len);
10459
10460	free(bc_data, M_CXGBE);
10461done:
10462	end_synchronized_op(sc, 0);
10463	return (rc);
10464}
10465
10466static int
10467cudbg_dump(struct adapter *sc, struct t4_cudbg_dump *dump)
10468{
10469	int rc;
10470	struct cudbg_init *cudbg;
10471	void *handle, *buf;
10472
10473	/* buf is large, don't block if no memory is available */
10474	buf = malloc(dump->len, M_CXGBE, M_NOWAIT | M_ZERO);
10475	if (buf == NULL)
10476		return (ENOMEM);
10477
10478	handle = cudbg_alloc_handle();
10479	if (handle == NULL) {
10480		rc = ENOMEM;
10481		goto done;
10482	}
10483
10484	cudbg = cudbg_get_init(handle);
10485	cudbg->adap = sc;
10486	cudbg->print = (cudbg_print_cb)printf;
10487
10488#ifndef notyet
10489	device_printf(sc->dev, "%s: wr_flash %u, len %u, data %p.\n",
10490	    __func__, dump->wr_flash, dump->len, dump->data);
10491#endif
10492
10493	if (dump->wr_flash)
10494		cudbg->use_flash = 1;
10495	MPASS(sizeof(cudbg->dbg_bitmap) == sizeof(dump->bitmap));
10496	memcpy(cudbg->dbg_bitmap, dump->bitmap, sizeof(cudbg->dbg_bitmap));
10497
10498	rc = cudbg_collect(handle, buf, &dump->len);
10499	if (rc != 0)
10500		goto done;
10501
10502	rc = copyout(buf, dump->data, dump->len);
10503done:
10504	cudbg_free_handle(handle);
10505	free(buf, M_CXGBE);
10506	return (rc);
10507}
10508
10509static void
10510free_offload_policy(struct t4_offload_policy *op)
10511{
10512	struct offload_rule *r;
10513	int i;
10514
10515	if (op == NULL)
10516		return;
10517
10518	r = &op->rule[0];
10519	for (i = 0; i < op->nrules; i++, r++) {
10520		free(r->bpf_prog.bf_insns, M_CXGBE);
10521	}
10522	free(op->rule, M_CXGBE);
10523	free(op, M_CXGBE);
10524}
10525
10526static int
10527set_offload_policy(struct adapter *sc, struct t4_offload_policy *uop)
10528{
10529	int i, rc, len;
10530	struct t4_offload_policy *op, *old;
10531	struct bpf_program *bf;
10532	const struct offload_settings *s;
10533	struct offload_rule *r;
10534	void *u;
10535
10536	if (!is_offload(sc))
10537		return (ENODEV);
10538
10539	if (uop->nrules == 0) {
10540		/* Delete installed policies. */
10541		op = NULL;
10542		goto set_policy;
10543	} else if (uop->nrules > 256) { /* arbitrary */
10544		return (E2BIG);
10545	}
10546
10547	/* Copy userspace offload policy to kernel */
10548	op = malloc(sizeof(*op), M_CXGBE, M_ZERO | M_WAITOK);
10549	op->nrules = uop->nrules;
10550	len = op->nrules * sizeof(struct offload_rule);
10551	op->rule = malloc(len, M_CXGBE, M_ZERO | M_WAITOK);
10552	rc = copyin(uop->rule, op->rule, len);
10553	if (rc) {
10554		free(op->rule, M_CXGBE);
10555		free(op, M_CXGBE);
10556		return (rc);
10557	}
10558
10559	r = &op->rule[0];
10560	for (i = 0; i < op->nrules; i++, r++) {
10561
10562		/* Validate open_type */
10563		if (r->open_type != OPEN_TYPE_LISTEN &&
10564		    r->open_type != OPEN_TYPE_ACTIVE &&
10565		    r->open_type != OPEN_TYPE_PASSIVE &&
10566		    r->open_type != OPEN_TYPE_DONTCARE) {
10567error:
10568			/*
10569			 * Rules 0 to i have malloc'd filters that need to be
10570			 * freed.  Rules i+1 to nrules have userspace pointers
10571			 * and should be left alone.
10572			 */
10573			op->nrules = i;
10574			free_offload_policy(op);
10575			return (rc);
10576		}
10577
10578		/* Validate settings */
10579		s = &r->settings;
10580		if ((s->offload != 0 && s->offload != 1) ||
10581		    s->cong_algo < -1 || s->cong_algo > CONG_ALG_HIGHSPEED ||
10582		    s->sched_class < -1 ||
10583		    s->sched_class >= sc->chip_params->nsched_cls) {
10584			rc = EINVAL;
10585			goto error;
10586		}
10587
10588		bf = &r->bpf_prog;
10589		u = bf->bf_insns;	/* userspace ptr */
10590		bf->bf_insns = NULL;
10591		if (bf->bf_len == 0) {
10592			/* legal, matches everything */
10593			continue;
10594		}
10595		len = bf->bf_len * sizeof(*bf->bf_insns);
10596		bf->bf_insns = malloc(len, M_CXGBE, M_ZERO | M_WAITOK);
10597		rc = copyin(u, bf->bf_insns, len);
10598		if (rc != 0)
10599			goto error;
10600
10601		if (!bpf_validate(bf->bf_insns, bf->bf_len)) {
10602			rc = EINVAL;
10603			goto error;
10604		}
10605	}
10606set_policy:
10607	rw_wlock(&sc->policy_lock);
10608	old = sc->policy;
10609	sc->policy = op;
10610	rw_wunlock(&sc->policy_lock);
10611	free_offload_policy(old);
10612
10613	return (0);
10614}
10615
10616#define MAX_READ_BUF_SIZE (128 * 1024)
10617static int
10618read_card_mem(struct adapter *sc, int win, struct t4_mem_range *mr)
10619{
10620	uint32_t addr, remaining, n;
10621	uint32_t *buf;
10622	int rc;
10623	uint8_t *dst;
10624
10625	rc = validate_mem_range(sc, mr->addr, mr->len);
10626	if (rc != 0)
10627		return (rc);
10628
10629	buf = malloc(min(mr->len, MAX_READ_BUF_SIZE), M_CXGBE, M_WAITOK);
10630	addr = mr->addr;
10631	remaining = mr->len;
10632	dst = (void *)mr->data;
10633
10634	while (remaining) {
10635		n = min(remaining, MAX_READ_BUF_SIZE);
10636		read_via_memwin(sc, 2, addr, buf, n);
10637
10638		rc = copyout(buf, dst, n);
10639		if (rc != 0)
10640			break;
10641
10642		dst += n;
10643		remaining -= n;
10644		addr += n;
10645	}
10646
10647	free(buf, M_CXGBE);
10648	return (rc);
10649}
10650#undef MAX_READ_BUF_SIZE
10651
10652static int
10653read_i2c(struct adapter *sc, struct t4_i2c_data *i2cd)
10654{
10655	int rc;
10656
10657	if (i2cd->len == 0 || i2cd->port_id >= sc->params.nports)
10658		return (EINVAL);
10659
10660	if (i2cd->len > sizeof(i2cd->data))
10661		return (EFBIG);
10662
10663	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4i2crd");
10664	if (rc)
10665		return (rc);
10666	rc = -t4_i2c_rd(sc, sc->mbox, i2cd->port_id, i2cd->dev_addr,
10667	    i2cd->offset, i2cd->len, &i2cd->data[0]);
10668	end_synchronized_op(sc, 0);
10669
10670	return (rc);
10671}
10672
10673static int
10674clear_stats(struct adapter *sc, u_int port_id)
10675{
10676	int i, v, chan_map;
10677	struct port_info *pi;
10678	struct vi_info *vi;
10679	struct sge_rxq *rxq;
10680	struct sge_txq *txq;
10681	struct sge_wrq *wrq;
10682#if defined(TCP_OFFLOAD) || defined(RATELIMIT)
10683	struct sge_ofld_txq *ofld_txq;
10684#endif
10685#ifdef TCP_OFFLOAD
10686	struct sge_ofld_rxq *ofld_rxq;
10687#endif
10688
10689	if (port_id >= sc->params.nports)
10690		return (EINVAL);
10691	pi = sc->port[port_id];
10692	if (pi == NULL)
10693		return (EIO);
10694
10695	/* MAC stats */
10696	t4_clr_port_stats(sc, pi->tx_chan);
10697	if (is_t6(sc)) {
10698		if (pi->fcs_reg != -1)
10699			pi->fcs_base = t4_read_reg64(sc, pi->fcs_reg);
10700		else
10701			pi->stats.rx_fcs_err = 0;
10702	}
10703	pi->tx_parse_error = 0;
10704	pi->tnl_cong_drops = 0;
10705	mtx_lock(&sc->reg_lock);
10706	for_each_vi(pi, v, vi) {
10707		if (vi->flags & VI_INIT_DONE)
10708			t4_clr_vi_stats(sc, vi->vin);
10709	}
10710	chan_map = pi->rx_e_chan_map;
10711	v = 0;	/* reuse */
10712	while (chan_map) {
10713		i = ffs(chan_map) - 1;
10714		t4_write_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v,
10715		    1, A_TP_MIB_TNL_CNG_DROP_0 + i);
10716		chan_map &= ~(1 << i);
10717	}
10718	mtx_unlock(&sc->reg_lock);
10719
10720	/*
10721	 * Since this command accepts a port, clear stats for
10722	 * all VIs on this port.
10723	 */
10724	for_each_vi(pi, v, vi) {
10725		if (vi->flags & VI_INIT_DONE) {
10726
10727			for_each_rxq(vi, i, rxq) {
10728#if defined(INET) || defined(INET6)
10729				rxq->lro.lro_queued = 0;
10730				rxq->lro.lro_flushed = 0;
10731#endif
10732				rxq->rxcsum = 0;
10733				rxq->vlan_extraction = 0;
10734				rxq->vxlan_rxcsum = 0;
10735
10736				rxq->fl.cl_allocated = 0;
10737				rxq->fl.cl_recycled = 0;
10738				rxq->fl.cl_fast_recycled = 0;
10739			}
10740
10741			for_each_txq(vi, i, txq) {
10742				txq->txcsum = 0;
10743				txq->tso_wrs = 0;
10744				txq->vlan_insertion = 0;
10745				txq->imm_wrs = 0;
10746				txq->sgl_wrs = 0;
10747				txq->txpkt_wrs = 0;
10748				txq->txpkts0_wrs = 0;
10749				txq->txpkts1_wrs = 0;
10750				txq->txpkts0_pkts = 0;
10751				txq->txpkts1_pkts = 0;
10752				txq->txpkts_flush = 0;
10753				txq->raw_wrs = 0;
10754				txq->vxlan_tso_wrs = 0;
10755				txq->vxlan_txcsum = 0;
10756				txq->kern_tls_records = 0;
10757				txq->kern_tls_short = 0;
10758				txq->kern_tls_partial = 0;
10759				txq->kern_tls_full = 0;
10760				txq->kern_tls_octets = 0;
10761				txq->kern_tls_waste = 0;
10762				txq->kern_tls_options = 0;
10763				txq->kern_tls_header = 0;
10764				txq->kern_tls_fin = 0;
10765				txq->kern_tls_fin_short = 0;
10766				txq->kern_tls_cbc = 0;
10767				txq->kern_tls_gcm = 0;
10768				mp_ring_reset_stats(txq->r);
10769			}
10770
10771#if defined(TCP_OFFLOAD) || defined(RATELIMIT)
10772			for_each_ofld_txq(vi, i, ofld_txq) {
10773				ofld_txq->wrq.tx_wrs_direct = 0;
10774				ofld_txq->wrq.tx_wrs_copied = 0;
10775				counter_u64_zero(ofld_txq->tx_iscsi_pdus);
10776				counter_u64_zero(ofld_txq->tx_iscsi_octets);
10777				counter_u64_zero(ofld_txq->tx_toe_tls_records);
10778				counter_u64_zero(ofld_txq->tx_toe_tls_octets);
10779			}
10780#endif
10781#ifdef TCP_OFFLOAD
10782			for_each_ofld_rxq(vi, i, ofld_rxq) {
10783				ofld_rxq->fl.cl_allocated = 0;
10784				ofld_rxq->fl.cl_recycled = 0;
10785				ofld_rxq->fl.cl_fast_recycled = 0;
10786				ofld_rxq->rx_toe_tls_records = 0;
10787				ofld_rxq->rx_toe_tls_octets = 0;
10788			}
10789#endif
10790
10791			if (IS_MAIN_VI(vi)) {
10792				wrq = &sc->sge.ctrlq[pi->port_id];
10793				wrq->tx_wrs_direct = 0;
10794				wrq->tx_wrs_copied = 0;
10795			}
10796		}
10797	}
10798
10799	return (0);
10800}
10801
10802int
10803t4_os_find_pci_capability(struct adapter *sc, int cap)
10804{
10805	int i;
10806
10807	return (pci_find_cap(sc->dev, cap, &i) == 0 ? i : 0);
10808}
10809
10810int
10811t4_os_pci_save_state(struct adapter *sc)
10812{
10813	device_t dev;
10814	struct pci_devinfo *dinfo;
10815
10816	dev = sc->dev;
10817	dinfo = device_get_ivars(dev);
10818
10819	pci_cfg_save(dev, dinfo, 0);
10820	return (0);
10821}
10822
10823int
10824t4_os_pci_restore_state(struct adapter *sc)
10825{
10826	device_t dev;
10827	struct pci_devinfo *dinfo;
10828
10829	dev = sc->dev;
10830	dinfo = device_get_ivars(dev);
10831
10832	pci_cfg_restore(dev, dinfo);
10833	return (0);
10834}
10835
10836void
10837t4_os_portmod_changed(struct port_info *pi)
10838{
10839	struct adapter *sc = pi->adapter;
10840	struct vi_info *vi;
10841	struct ifnet *ifp;
10842	static const char *mod_str[] = {
10843		NULL, "LR", "SR", "ER", "TWINAX", "active TWINAX", "LRM"
10844	};
10845
10846	KASSERT((pi->flags & FIXED_IFMEDIA) == 0,
10847	    ("%s: port_type %u", __func__, pi->port_type));
10848
10849	vi = &pi->vi[0];
10850	if (begin_synchronized_op(sc, vi, HOLD_LOCK, "t4mod") == 0) {
10851		PORT_LOCK(pi);
10852		build_medialist(pi);
10853		if (pi->mod_type != FW_PORT_MOD_TYPE_NONE) {
10854			fixup_link_config(pi);
10855			apply_link_config(pi);
10856		}
10857		PORT_UNLOCK(pi);
10858		end_synchronized_op(sc, LOCK_HELD);
10859	}
10860
10861	ifp = vi->ifp;
10862	if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
10863		if_printf(ifp, "transceiver unplugged.\n");
10864	else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
10865		if_printf(ifp, "unknown transceiver inserted.\n");
10866	else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
10867		if_printf(ifp, "unsupported transceiver inserted.\n");
10868	else if (pi->mod_type > 0 && pi->mod_type < nitems(mod_str)) {
10869		if_printf(ifp, "%dGbps %s transceiver inserted.\n",
10870		    port_top_speed(pi), mod_str[pi->mod_type]);
10871	} else {
10872		if_printf(ifp, "transceiver (type %d) inserted.\n",
10873		    pi->mod_type);
10874	}
10875}
10876
10877void
10878t4_os_link_changed(struct port_info *pi)
10879{
10880	struct vi_info *vi;
10881	struct ifnet *ifp;
10882	struct link_config *lc = &pi->link_cfg;
10883	struct adapter *sc = pi->adapter;
10884	int v;
10885
10886	PORT_LOCK_ASSERT_OWNED(pi);
10887
10888	if (is_t6(sc)) {
10889		if (lc->link_ok) {
10890			if (lc->speed > 25000 ||
10891			    (lc->speed == 25000 && lc->fec == FEC_RS)) {
10892				pi->fcs_reg = T5_PORT_REG(pi->tx_chan,
10893				    A_MAC_PORT_AFRAMECHECKSEQUENCEERRORS);
10894			} else {
10895				pi->fcs_reg = T5_PORT_REG(pi->tx_chan,
10896				    A_MAC_PORT_MTIP_1G10G_RX_CRCERRORS);
10897			}
10898			pi->fcs_base = t4_read_reg64(sc, pi->fcs_reg);
10899			pi->stats.rx_fcs_err = 0;
10900		} else {
10901			pi->fcs_reg = -1;
10902		}
10903	} else {
10904		MPASS(pi->fcs_reg != -1);
10905		MPASS(pi->fcs_base == 0);
10906	}
10907
10908	for_each_vi(pi, v, vi) {
10909		ifp = vi->ifp;
10910		if (ifp == NULL)
10911			continue;
10912
10913		if (lc->link_ok) {
10914			ifp->if_baudrate = IF_Mbps(lc->speed);
10915			if_link_state_change(ifp, LINK_STATE_UP);
10916		} else {
10917			if_link_state_change(ifp, LINK_STATE_DOWN);
10918		}
10919	}
10920}
10921
10922void
10923t4_iterate(void (*func)(struct adapter *, void *), void *arg)
10924{
10925	struct adapter *sc;
10926
10927	sx_slock(&t4_list_lock);
10928	SLIST_FOREACH(sc, &t4_list, link) {
10929		/*
10930		 * func should not make any assumptions about what state sc is
10931		 * in - the only guarantee is that sc->sc_lock is a valid lock.
10932		 */
10933		func(sc, arg);
10934	}
10935	sx_sunlock(&t4_list_lock);
10936}
10937
10938static int
10939t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag,
10940    struct thread *td)
10941{
10942	int rc;
10943	struct adapter *sc = dev->si_drv1;
10944
10945	rc = priv_check(td, PRIV_DRIVER);
10946	if (rc != 0)
10947		return (rc);
10948
10949	switch (cmd) {
10950	case CHELSIO_T4_GETREG: {
10951		struct t4_reg *edata = (struct t4_reg *)data;
10952
10953		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
10954			return (EFAULT);
10955
10956		if (edata->size == 4)
10957			edata->val = t4_read_reg(sc, edata->addr);
10958		else if (edata->size == 8)
10959			edata->val = t4_read_reg64(sc, edata->addr);
10960		else
10961			return (EINVAL);
10962
10963		break;
10964	}
10965	case CHELSIO_T4_SETREG: {
10966		struct t4_reg *edata = (struct t4_reg *)data;
10967
10968		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
10969			return (EFAULT);
10970
10971		if (edata->size == 4) {
10972			if (edata->val & 0xffffffff00000000)
10973				return (EINVAL);
10974			t4_write_reg(sc, edata->addr, (uint32_t) edata->val);
10975		} else if (edata->size == 8)
10976			t4_write_reg64(sc, edata->addr, edata->val);
10977		else
10978			return (EINVAL);
10979		break;
10980	}
10981	case CHELSIO_T4_REGDUMP: {
10982		struct t4_regdump *regs = (struct t4_regdump *)data;
10983		int reglen = t4_get_regs_len(sc);
10984		uint8_t *buf;
10985
10986		if (regs->len < reglen) {
10987			regs->len = reglen; /* hint to the caller */
10988			return (ENOBUFS);
10989		}
10990
10991		regs->len = reglen;
10992		buf = malloc(reglen, M_CXGBE, M_WAITOK | M_ZERO);
10993		get_regs(sc, regs, buf);
10994		rc = copyout(buf, regs->data, reglen);
10995		free(buf, M_CXGBE);
10996		break;
10997	}
10998	case CHELSIO_T4_GET_FILTER_MODE:
10999		rc = get_filter_mode(sc, (uint32_t *)data);
11000		break;
11001	case CHELSIO_T4_SET_FILTER_MODE:
11002		rc = set_filter_mode(sc, *(uint32_t *)data);
11003		break;
11004	case CHELSIO_T4_SET_FILTER_MASK:
11005		rc = set_filter_mask(sc, *(uint32_t *)data);
11006		break;
11007	case CHELSIO_T4_GET_FILTER:
11008		rc = get_filter(sc, (struct t4_filter *)data);
11009		break;
11010	case CHELSIO_T4_SET_FILTER:
11011		rc = set_filter(sc, (struct t4_filter *)data);
11012		break;
11013	case CHELSIO_T4_DEL_FILTER:
11014		rc = del_filter(sc, (struct t4_filter *)data);
11015		break;
11016	case CHELSIO_T4_GET_SGE_CONTEXT:
11017		rc = get_sge_context(sc, (struct t4_sge_context *)data);
11018		break;
11019	case CHELSIO_T4_LOAD_FW:
11020		rc = load_fw(sc, (struct t4_data *)data);
11021		break;
11022	case CHELSIO_T4_GET_MEM:
11023		rc = read_card_mem(sc, 2, (struct t4_mem_range *)data);
11024		break;
11025	case CHELSIO_T4_GET_I2C:
11026		rc = read_i2c(sc, (struct t4_i2c_data *)data);
11027		break;
11028	case CHELSIO_T4_CLEAR_STATS:
11029		rc = clear_stats(sc, *(uint32_t *)data);
11030		break;
11031	case CHELSIO_T4_SCHED_CLASS:
11032		rc = t4_set_sched_class(sc, (struct t4_sched_params *)data);
11033		break;
11034	case CHELSIO_T4_SCHED_QUEUE:
11035		rc = t4_set_sched_queue(sc, (struct t4_sched_queue *)data);
11036		break;
11037	case CHELSIO_T4_GET_TRACER:
11038		rc = t4_get_tracer(sc, (struct t4_tracer *)data);
11039		break;
11040	case CHELSIO_T4_SET_TRACER:
11041		rc = t4_set_tracer(sc, (struct t4_tracer *)data);
11042		break;
11043	case CHELSIO_T4_LOAD_CFG:
11044		rc = load_cfg(sc, (struct t4_data *)data);
11045		break;
11046	case CHELSIO_T4_LOAD_BOOT:
11047		rc = load_boot(sc, (struct t4_bootrom *)data);
11048		break;
11049	case CHELSIO_T4_LOAD_BOOTCFG:
11050		rc = load_bootcfg(sc, (struct t4_data *)data);
11051		break;
11052	case CHELSIO_T4_CUDBG_DUMP:
11053		rc = cudbg_dump(sc, (struct t4_cudbg_dump *)data);
11054		break;
11055	case CHELSIO_T4_SET_OFLD_POLICY:
11056		rc = set_offload_policy(sc, (struct t4_offload_policy *)data);
11057		break;
11058	default:
11059		rc = ENOTTY;
11060	}
11061
11062	return (rc);
11063}
11064
11065#ifdef TCP_OFFLOAD
11066static int
11067toe_capability(struct vi_info *vi, bool enable)
11068{
11069	int rc;
11070	struct port_info *pi = vi->pi;
11071	struct adapter *sc = pi->adapter;
11072
11073	ASSERT_SYNCHRONIZED_OP(sc);
11074
11075	if (!is_offload(sc))
11076		return (ENODEV);
11077
11078	if (enable) {
11079#ifdef KERN_TLS
11080		if (sc->flags & KERN_TLS_ON) {
11081			int i, j, n;
11082			struct port_info *p;
11083			struct vi_info *v;
11084
11085			/*
11086			 * Reconfigure hardware for TOE if TXTLS is not enabled
11087			 * on any ifnet.
11088			 */
11089			n = 0;
11090			for_each_port(sc, i) {
11091				p = sc->port[i];
11092				for_each_vi(p, j, v) {
11093					if (v->ifp->if_capenable & IFCAP_TXTLS) {
11094						CH_WARN(sc,
11095						    "%s has NIC TLS enabled.\n",
11096						    device_get_nameunit(v->dev));
11097						n++;
11098					}
11099				}
11100			}
11101			if (n > 0) {
11102				CH_WARN(sc, "Disable NIC TLS on all interfaces "
11103				    "associated with this adapter before "
11104				    "trying to enable TOE.\n");
11105				return (EAGAIN);
11106			}
11107			rc = t4_config_kern_tls(sc, false);
11108			if (rc)
11109				return (rc);
11110		}
11111#endif
11112		if ((vi->ifp->if_capenable & IFCAP_TOE) != 0) {
11113			/* TOE is already enabled. */
11114			return (0);
11115		}
11116
11117		/*
11118		 * We need the port's queues around so that we're able to send
11119		 * and receive CPLs to/from the TOE even if the ifnet for this
11120		 * port has never been UP'd administratively.
11121		 */
11122		if (!(vi->flags & VI_INIT_DONE)) {
11123			rc = vi_full_init(vi);
11124			if (rc)
11125				return (rc);
11126		}
11127		if (!(pi->vi[0].flags & VI_INIT_DONE)) {
11128			rc = vi_full_init(&pi->vi[0]);
11129			if (rc)
11130				return (rc);
11131		}
11132
11133		if (isset(&sc->offload_map, pi->port_id)) {
11134			/* TOE is enabled on another VI of this port. */
11135			pi->uld_vis++;
11136			return (0);
11137		}
11138
11139		if (!uld_active(sc, ULD_TOM)) {
11140			rc = t4_activate_uld(sc, ULD_TOM);
11141			if (rc == EAGAIN) {
11142				log(LOG_WARNING,
11143				    "You must kldload t4_tom.ko before trying "
11144				    "to enable TOE on a cxgbe interface.\n");
11145			}
11146			if (rc != 0)
11147				return (rc);
11148			KASSERT(sc->tom_softc != NULL,
11149			    ("%s: TOM activated but softc NULL", __func__));
11150			KASSERT(uld_active(sc, ULD_TOM),
11151			    ("%s: TOM activated but flag not set", __func__));
11152		}
11153
11154		/* Activate iWARP and iSCSI too, if the modules are loaded. */
11155		if (!uld_active(sc, ULD_IWARP))
11156			(void) t4_activate_uld(sc, ULD_IWARP);
11157		if (!uld_active(sc, ULD_ISCSI))
11158			(void) t4_activate_uld(sc, ULD_ISCSI);
11159
11160		pi->uld_vis++;
11161		setbit(&sc->offload_map, pi->port_id);
11162	} else {
11163		pi->uld_vis--;
11164
11165		if (!isset(&sc->offload_map, pi->port_id) || pi->uld_vis > 0)
11166			return (0);
11167
11168		KASSERT(uld_active(sc, ULD_TOM),
11169		    ("%s: TOM never initialized?", __func__));
11170		clrbit(&sc->offload_map, pi->port_id);
11171	}
11172
11173	return (0);
11174}
11175
11176/*
11177 * Add an upper layer driver to the global list.
11178 */
11179int
11180t4_register_uld(struct uld_info *ui)
11181{
11182	int rc = 0;
11183	struct uld_info *u;
11184
11185	sx_xlock(&t4_uld_list_lock);
11186	SLIST_FOREACH(u, &t4_uld_list, link) {
11187	    if (u->uld_id == ui->uld_id) {
11188		    rc = EEXIST;
11189		    goto done;
11190	    }
11191	}
11192
11193	SLIST_INSERT_HEAD(&t4_uld_list, ui, link);
11194	ui->refcount = 0;
11195done:
11196	sx_xunlock(&t4_uld_list_lock);
11197	return (rc);
11198}
11199
11200int
11201t4_unregister_uld(struct uld_info *ui)
11202{
11203	int rc = EINVAL;
11204	struct uld_info *u;
11205
11206	sx_xlock(&t4_uld_list_lock);
11207
11208	SLIST_FOREACH(u, &t4_uld_list, link) {
11209	    if (u == ui) {
11210		    if (ui->refcount > 0) {
11211			    rc = EBUSY;
11212			    goto done;
11213		    }
11214
11215		    SLIST_REMOVE(&t4_uld_list, ui, uld_info, link);
11216		    rc = 0;
11217		    goto done;
11218	    }
11219	}
11220done:
11221	sx_xunlock(&t4_uld_list_lock);
11222	return (rc);
11223}
11224
11225int
11226t4_activate_uld(struct adapter *sc, int id)
11227{
11228	int rc;
11229	struct uld_info *ui;
11230
11231	ASSERT_SYNCHRONIZED_OP(sc);
11232
11233	if (id < 0 || id > ULD_MAX)
11234		return (EINVAL);
11235	rc = EAGAIN;	/* kldoad the module with this ULD and try again. */
11236
11237	sx_slock(&t4_uld_list_lock);
11238
11239	SLIST_FOREACH(ui, &t4_uld_list, link) {
11240		if (ui->uld_id == id) {
11241			if (!(sc->flags & FULL_INIT_DONE)) {
11242				rc = adapter_full_init(sc);
11243				if (rc != 0)
11244					break;
11245			}
11246
11247			rc = ui->activate(sc);
11248			if (rc == 0) {
11249				setbit(&sc->active_ulds, id);
11250				ui->refcount++;
11251			}
11252			break;
11253		}
11254	}
11255
11256	sx_sunlock(&t4_uld_list_lock);
11257
11258	return (rc);
11259}
11260
11261int
11262t4_deactivate_uld(struct adapter *sc, int id)
11263{
11264	int rc;
11265	struct uld_info *ui;
11266
11267	ASSERT_SYNCHRONIZED_OP(sc);
11268
11269	if (id < 0 || id > ULD_MAX)
11270		return (EINVAL);
11271	rc = ENXIO;
11272
11273	sx_slock(&t4_uld_list_lock);
11274
11275	SLIST_FOREACH(ui, &t4_uld_list, link) {
11276		if (ui->uld_id == id) {
11277			rc = ui->deactivate(sc);
11278			if (rc == 0) {
11279				clrbit(&sc->active_ulds, id);
11280				ui->refcount--;
11281			}
11282			break;
11283		}
11284	}
11285
11286	sx_sunlock(&t4_uld_list_lock);
11287
11288	return (rc);
11289}
11290
11291static void
11292t4_async_event(void *arg, int n)
11293{
11294	struct uld_info *ui;
11295	struct adapter *sc = (struct adapter *)arg;
11296
11297	if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4async") != 0)
11298		return;
11299	sx_slock(&t4_uld_list_lock);
11300	SLIST_FOREACH(ui, &t4_uld_list, link) {
11301		if (ui->uld_id == ULD_IWARP) {
11302			ui->async_event(sc);
11303			break;
11304		}
11305	}
11306	sx_sunlock(&t4_uld_list_lock);
11307	end_synchronized_op(sc, 0);
11308}
11309
11310int
11311uld_active(struct adapter *sc, int uld_id)
11312{
11313
11314	MPASS(uld_id >= 0 && uld_id <= ULD_MAX);
11315
11316	return (isset(&sc->active_ulds, uld_id));
11317}
11318#endif
11319
11320#ifdef KERN_TLS
11321static int
11322ktls_capability(struct adapter *sc, bool enable)
11323{
11324	ASSERT_SYNCHRONIZED_OP(sc);
11325
11326	if (!is_ktls(sc))
11327		return (ENODEV);
11328
11329	if (enable) {
11330		if (sc->flags & KERN_TLS_ON)
11331			return (0);	/* already on */
11332		if (sc->offload_map != 0) {
11333			CH_WARN(sc,
11334			    "Disable TOE on all interfaces associated with "
11335			    "this adapter before trying to enable NIC TLS.\n");
11336			return (EAGAIN);
11337		}
11338		return (t4_config_kern_tls(sc, true));
11339	} else {
11340		/*
11341		 * Nothing to do for disable.  If TOE is enabled sometime later
11342		 * then toe_capability will reconfigure the hardware.
11343		 */
11344		return (0);
11345	}
11346}
11347#endif
11348
11349/*
11350 * t  = ptr to tunable.
11351 * nc = number of CPUs.
11352 * c  = compiled in default for that tunable.
11353 */
11354static void
11355calculate_nqueues(int *t, int nc, const int c)
11356{
11357	int nq;
11358
11359	if (*t > 0)
11360		return;
11361	nq = *t < 0 ? -*t : c;
11362	*t = min(nc, nq);
11363}
11364
11365/*
11366 * Come up with reasonable defaults for some of the tunables, provided they're
11367 * not set by the user (in which case we'll use the values as is).
11368 */
11369static void
11370tweak_tunables(void)
11371{
11372	int nc = mp_ncpus;	/* our snapshot of the number of CPUs */
11373
11374	if (t4_ntxq < 1) {
11375#ifdef RSS
11376		t4_ntxq = rss_getnumbuckets();
11377#else
11378		calculate_nqueues(&t4_ntxq, nc, NTXQ);
11379#endif
11380	}
11381
11382	calculate_nqueues(&t4_ntxq_vi, nc, NTXQ_VI);
11383
11384	if (t4_nrxq < 1) {
11385#ifdef RSS
11386		t4_nrxq = rss_getnumbuckets();
11387#else
11388		calculate_nqueues(&t4_nrxq, nc, NRXQ);
11389#endif
11390	}
11391
11392	calculate_nqueues(&t4_nrxq_vi, nc, NRXQ_VI);
11393
11394#if defined(TCP_OFFLOAD) || defined(RATELIMIT)
11395	calculate_nqueues(&t4_nofldtxq, nc, NOFLDTXQ);
11396	calculate_nqueues(&t4_nofldtxq_vi, nc, NOFLDTXQ_VI);
11397#endif
11398#ifdef TCP_OFFLOAD
11399	calculate_nqueues(&t4_nofldrxq, nc, NOFLDRXQ);
11400	calculate_nqueues(&t4_nofldrxq_vi, nc, NOFLDRXQ_VI);
11401#endif
11402
11403#if defined(TCP_OFFLOAD) || defined(KERN_TLS)
11404	if (t4_toecaps_allowed == -1)
11405		t4_toecaps_allowed = FW_CAPS_CONFIG_TOE;
11406#else
11407	if (t4_toecaps_allowed == -1)
11408		t4_toecaps_allowed = 0;
11409#endif
11410
11411#ifdef TCP_OFFLOAD
11412	if (t4_rdmacaps_allowed == -1) {
11413		t4_rdmacaps_allowed = FW_CAPS_CONFIG_RDMA_RDDP |
11414		    FW_CAPS_CONFIG_RDMA_RDMAC;
11415	}
11416
11417	if (t4_iscsicaps_allowed == -1) {
11418		t4_iscsicaps_allowed = FW_CAPS_CONFIG_ISCSI_INITIATOR_PDU |
11419		    FW_CAPS_CONFIG_ISCSI_TARGET_PDU |
11420		    FW_CAPS_CONFIG_ISCSI_T10DIF;
11421	}
11422
11423	if (t4_tmr_idx_ofld < 0 || t4_tmr_idx_ofld >= SGE_NTIMERS)
11424		t4_tmr_idx_ofld = TMR_IDX_OFLD;
11425
11426	if (t4_pktc_idx_ofld < -1 || t4_pktc_idx_ofld >= SGE_NCOUNTERS)
11427		t4_pktc_idx_ofld = PKTC_IDX_OFLD;
11428
11429	if (t4_toe_tls_rx_timeout < 0)
11430		t4_toe_tls_rx_timeout = 0;
11431#else
11432	if (t4_rdmacaps_allowed == -1)
11433		t4_rdmacaps_allowed = 0;
11434
11435	if (t4_iscsicaps_allowed == -1)
11436		t4_iscsicaps_allowed = 0;
11437#endif
11438
11439#ifdef DEV_NETMAP
11440	calculate_nqueues(&t4_nnmtxq, nc, NNMTXQ);
11441	calculate_nqueues(&t4_nnmrxq, nc, NNMRXQ);
11442	calculate_nqueues(&t4_nnmtxq_vi, nc, NNMTXQ_VI);
11443	calculate_nqueues(&t4_nnmrxq_vi, nc, NNMRXQ_VI);
11444#endif
11445
11446	if (t4_tmr_idx < 0 || t4_tmr_idx >= SGE_NTIMERS)
11447		t4_tmr_idx = TMR_IDX;
11448
11449	if (t4_pktc_idx < -1 || t4_pktc_idx >= SGE_NCOUNTERS)
11450		t4_pktc_idx = PKTC_IDX;
11451
11452	if (t4_qsize_txq < 128)
11453		t4_qsize_txq = 128;
11454
11455	if (t4_qsize_rxq < 128)
11456		t4_qsize_rxq = 128;
11457	while (t4_qsize_rxq & 7)
11458		t4_qsize_rxq++;
11459
11460	t4_intr_types &= INTR_MSIX | INTR_MSI | INTR_INTX;
11461
11462	/*
11463	 * Number of VIs to create per-port.  The first VI is the "main" regular
11464	 * VI for the port.  The rest are additional virtual interfaces on the
11465	 * same physical port.  Note that the main VI does not have native
11466	 * netmap support but the extra VIs do.
11467	 *
11468	 * Limit the number of VIs per port to the number of available
11469	 * MAC addresses per port.
11470	 */
11471	if (t4_num_vis < 1)
11472		t4_num_vis = 1;
11473	if (t4_num_vis > nitems(vi_mac_funcs)) {
11474		t4_num_vis = nitems(vi_mac_funcs);
11475		printf("cxgbe: number of VIs limited to %d\n", t4_num_vis);
11476	}
11477
11478	if (pcie_relaxed_ordering < 0 || pcie_relaxed_ordering > 2) {
11479		pcie_relaxed_ordering = 1;
11480#if defined(__i386__) || defined(__amd64__)
11481		if (cpu_vendor_id == CPU_VENDOR_INTEL)
11482			pcie_relaxed_ordering = 0;
11483#endif
11484	}
11485}
11486
11487#ifdef DDB
11488static void
11489t4_dump_tcb(struct adapter *sc, int tid)
11490{
11491	uint32_t base, i, j, off, pf, reg, save, tcb_addr, win_pos;
11492
11493	reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2);
11494	save = t4_read_reg(sc, reg);
11495	base = sc->memwin[2].mw_base;
11496
11497	/* Dump TCB for the tid */
11498	tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
11499	tcb_addr += tid * TCB_SIZE;
11500
11501	if (is_t4(sc)) {
11502		pf = 0;
11503		win_pos = tcb_addr & ~0xf;	/* start must be 16B aligned */
11504	} else {
11505		pf = V_PFNUM(sc->pf);
11506		win_pos = tcb_addr & ~0x7f;	/* start must be 128B aligned */
11507	}
11508	t4_write_reg(sc, reg, win_pos | pf);
11509	t4_read_reg(sc, reg);
11510
11511	off = tcb_addr - win_pos;
11512	for (i = 0; i < 4; i++) {
11513		uint32_t buf[8];
11514		for (j = 0; j < 8; j++, off += 4)
11515			buf[j] = htonl(t4_read_reg(sc, base + off));
11516
11517		db_printf("%08x %08x %08x %08x %08x %08x %08x %08x\n",
11518		    buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6],
11519		    buf[7]);
11520	}
11521
11522	t4_write_reg(sc, reg, save);
11523	t4_read_reg(sc, reg);
11524}
11525
11526static void
11527t4_dump_devlog(struct adapter *sc)
11528{
11529	struct devlog_params *dparams = &sc->params.devlog;
11530	struct fw_devlog_e e;
11531	int i, first, j, m, nentries, rc;
11532	uint64_t ftstamp = UINT64_MAX;
11533
11534	if (dparams->start == 0) {
11535		db_printf("devlog params not valid\n");
11536		return;
11537	}
11538
11539	nentries = dparams->size / sizeof(struct fw_devlog_e);
11540	m = fwmtype_to_hwmtype(dparams->memtype);
11541
11542	/* Find the first entry. */
11543	first = -1;
11544	for (i = 0; i < nentries && !db_pager_quit; i++) {
11545		rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e),
11546		    sizeof(e), (void *)&e);
11547		if (rc != 0)
11548			break;
11549
11550		if (e.timestamp == 0)
11551			break;
11552
11553		e.timestamp = be64toh(e.timestamp);
11554		if (e.timestamp < ftstamp) {
11555			ftstamp = e.timestamp;
11556			first = i;
11557		}
11558	}
11559
11560	if (first == -1)
11561		return;
11562
11563	i = first;
11564	do {
11565		rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e),
11566		    sizeof(e), (void *)&e);
11567		if (rc != 0)
11568			return;
11569
11570		if (e.timestamp == 0)
11571			return;
11572
11573		e.timestamp = be64toh(e.timestamp);
11574		e.seqno = be32toh(e.seqno);
11575		for (j = 0; j < 8; j++)
11576			e.params[j] = be32toh(e.params[j]);
11577
11578		db_printf("%10d  %15ju  %8s  %8s  ",
11579		    e.seqno, e.timestamp,
11580		    (e.level < nitems(devlog_level_strings) ?
11581			devlog_level_strings[e.level] : "UNKNOWN"),
11582		    (e.facility < nitems(devlog_facility_strings) ?
11583			devlog_facility_strings[e.facility] : "UNKNOWN"));
11584		db_printf(e.fmt, e.params[0], e.params[1], e.params[2],
11585		    e.params[3], e.params[4], e.params[5], e.params[6],
11586		    e.params[7]);
11587
11588		if (++i == nentries)
11589			i = 0;
11590	} while (i != first && !db_pager_quit);
11591}
11592
11593static struct command_table db_t4_table = LIST_HEAD_INITIALIZER(db_t4_table);
11594_DB_SET(_show, t4, NULL, db_show_table, 0, &db_t4_table);
11595
11596DB_FUNC(devlog, db_show_devlog, db_t4_table, CS_OWN, NULL)
11597{
11598	device_t dev;
11599	int t;
11600	bool valid;
11601
11602	valid = false;
11603	t = db_read_token();
11604	if (t == tIDENT) {
11605		dev = device_lookup_by_name(db_tok_string);
11606		valid = true;
11607	}
11608	db_skip_to_eol();
11609	if (!valid) {
11610		db_printf("usage: show t4 devlog <nexus>\n");
11611		return;
11612	}
11613
11614	if (dev == NULL) {
11615		db_printf("device not found\n");
11616		return;
11617	}
11618
11619	t4_dump_devlog(device_get_softc(dev));
11620}
11621
11622DB_FUNC(tcb, db_show_t4tcb, db_t4_table, CS_OWN, NULL)
11623{
11624	device_t dev;
11625	int radix, tid, t;
11626	bool valid;
11627
11628	valid = false;
11629	radix = db_radix;
11630	db_radix = 10;
11631	t = db_read_token();
11632	if (t == tIDENT) {
11633		dev = device_lookup_by_name(db_tok_string);
11634		t = db_read_token();
11635		if (t == tNUMBER) {
11636			tid = db_tok_number;
11637			valid = true;
11638		}
11639	}
11640	db_radix = radix;
11641	db_skip_to_eol();
11642	if (!valid) {
11643		db_printf("usage: show t4 tcb <nexus> <tid>\n");
11644		return;
11645	}
11646
11647	if (dev == NULL) {
11648		db_printf("device not found\n");
11649		return;
11650	}
11651	if (tid < 0) {
11652		db_printf("invalid tid\n");
11653		return;
11654	}
11655
11656	t4_dump_tcb(device_get_softc(dev), tid);
11657}
11658#endif
11659
11660static eventhandler_tag vxlan_start_evtag;
11661static eventhandler_tag vxlan_stop_evtag;
11662
11663struct vxlan_evargs {
11664	struct ifnet *ifp;
11665	uint16_t port;
11666};
11667
11668static void
11669enable_vxlan_rx(struct adapter *sc)
11670{
11671	int i, rc;
11672	struct port_info *pi;
11673	uint8_t match_all_mac[ETHER_ADDR_LEN] = {0};
11674
11675	ASSERT_SYNCHRONIZED_OP(sc);
11676
11677	t4_write_reg(sc, A_MPS_RX_VXLAN_TYPE, V_VXLAN(sc->vxlan_port) |
11678	    F_VXLAN_EN);
11679	for_each_port(sc, i) {
11680		pi = sc->port[i];
11681		if (pi->vxlan_tcam_entry == true)
11682			continue;
11683		rc = t4_alloc_raw_mac_filt(sc, pi->vi[0].viid, match_all_mac,
11684		    match_all_mac, sc->rawf_base + pi->port_id, 1, pi->port_id,
11685		    true);
11686		if (rc < 0) {
11687			rc = -rc;
11688			CH_ERR(&pi->vi[0],
11689			    "failed to add VXLAN TCAM entry: %d.\n", rc);
11690		} else {
11691			MPASS(rc == sc->rawf_base + pi->port_id);
11692			pi->vxlan_tcam_entry = true;
11693		}
11694	}
11695}
11696
11697static void
11698t4_vxlan_start(struct adapter *sc, void *arg)
11699{
11700	struct vxlan_evargs *v = arg;
11701
11702	if (sc->nrawf == 0 || chip_id(sc) <= CHELSIO_T5)
11703		return;
11704	if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4vxst") != 0)
11705		return;
11706
11707	if (sc->vxlan_refcount == 0) {
11708		sc->vxlan_port = v->port;
11709		sc->vxlan_refcount = 1;
11710		enable_vxlan_rx(sc);
11711	} else if (sc->vxlan_port == v->port) {
11712		sc->vxlan_refcount++;
11713	} else {
11714		CH_ERR(sc, "VXLAN already configured on port  %d; "
11715		    "ignoring attempt to configure it on port %d\n",
11716		    sc->vxlan_port, v->port);
11717	}
11718	end_synchronized_op(sc, 0);
11719}
11720
11721static void
11722t4_vxlan_stop(struct adapter *sc, void *arg)
11723{
11724	struct vxlan_evargs *v = arg;
11725
11726	if (sc->nrawf == 0 || chip_id(sc) <= CHELSIO_T5)
11727		return;
11728	if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4vxsp") != 0)
11729		return;
11730
11731	/*
11732	 * VXLANs may have been configured before the driver was loaded so we
11733	 * may see more stops than starts.  This is not handled cleanly but at
11734	 * least we keep the refcount sane.
11735	 */
11736	if (sc->vxlan_port != v->port)
11737		goto done;
11738	if (sc->vxlan_refcount == 0) {
11739		log(LOG_ERR,
11740		    "%s: VXLAN operation on port %d was stopped earlier; "
11741		    "ignoring attempt to stop it again.\n",
11742		    device_get_nameunit(sc->dev), sc->vxlan_port);
11743	} else if (--sc->vxlan_refcount == 0) {
11744		t4_set_reg_field(sc, A_MPS_RX_VXLAN_TYPE, F_VXLAN_EN, 0);
11745	}
11746done:
11747	end_synchronized_op(sc, 0);
11748}
11749
11750static void
11751t4_vxlan_start_handler(void *arg __unused, struct ifnet *ifp,
11752    sa_family_t family, u_int port)
11753{
11754	struct vxlan_evargs v;
11755
11756	MPASS(family == AF_INET || family == AF_INET6);
11757	v.ifp = ifp;
11758	v.port = port;
11759
11760	t4_iterate(t4_vxlan_start, &v);
11761}
11762
11763static void
11764t4_vxlan_stop_handler(void *arg __unused, struct ifnet *ifp, sa_family_t family,
11765    u_int port)
11766{
11767	struct vxlan_evargs v;
11768
11769	MPASS(family == AF_INET || family == AF_INET6);
11770	v.ifp = ifp;
11771	v.port = port;
11772
11773	t4_iterate(t4_vxlan_stop, &v);
11774}
11775
11776
11777static struct sx mlu;	/* mod load unload */
11778SX_SYSINIT(cxgbe_mlu, &mlu, "cxgbe mod load/unload");
11779
11780static int
11781mod_event(module_t mod, int cmd, void *arg)
11782{
11783	int rc = 0;
11784	static int loaded = 0;
11785
11786	switch (cmd) {
11787	case MOD_LOAD:
11788		sx_xlock(&mlu);
11789		if (loaded++ == 0) {
11790			t4_sge_modload();
11791			t4_register_shared_cpl_handler(CPL_SET_TCB_RPL,
11792			    t4_filter_rpl, CPL_COOKIE_FILTER);
11793			t4_register_shared_cpl_handler(CPL_L2T_WRITE_RPL,
11794			    do_l2t_write_rpl, CPL_COOKIE_FILTER);
11795			t4_register_shared_cpl_handler(CPL_ACT_OPEN_RPL,
11796			    t4_hashfilter_ao_rpl, CPL_COOKIE_HASHFILTER);
11797			t4_register_shared_cpl_handler(CPL_SET_TCB_RPL,
11798			    t4_hashfilter_tcb_rpl, CPL_COOKIE_HASHFILTER);
11799			t4_register_shared_cpl_handler(CPL_ABORT_RPL_RSS,
11800			    t4_del_hashfilter_rpl, CPL_COOKIE_HASHFILTER);
11801			t4_register_cpl_handler(CPL_TRACE_PKT, t4_trace_pkt);
11802			t4_register_cpl_handler(CPL_T5_TRACE_PKT, t5_trace_pkt);
11803			t4_register_cpl_handler(CPL_SMT_WRITE_RPL,
11804			    do_smt_write_rpl);
11805			sx_init(&t4_list_lock, "T4/T5 adapters");
11806			SLIST_INIT(&t4_list);
11807			callout_init(&fatal_callout, 1);
11808#ifdef TCP_OFFLOAD
11809			sx_init(&t4_uld_list_lock, "T4/T5 ULDs");
11810			SLIST_INIT(&t4_uld_list);
11811#endif
11812#ifdef INET6
11813			t4_clip_modload();
11814#endif
11815#ifdef KERN_TLS
11816			t6_ktls_modload();
11817#endif
11818			t4_tracer_modload();
11819			tweak_tunables();
11820			vxlan_start_evtag =
11821			    EVENTHANDLER_REGISTER(vxlan_start,
11822				t4_vxlan_start_handler, NULL,
11823				EVENTHANDLER_PRI_ANY);
11824			vxlan_stop_evtag =
11825			    EVENTHANDLER_REGISTER(vxlan_stop,
11826				t4_vxlan_stop_handler, NULL,
11827				EVENTHANDLER_PRI_ANY);
11828		}
11829		sx_xunlock(&mlu);
11830		break;
11831
11832	case MOD_UNLOAD:
11833		sx_xlock(&mlu);
11834		if (--loaded == 0) {
11835			int tries;
11836
11837			sx_slock(&t4_list_lock);
11838			if (!SLIST_EMPTY(&t4_list)) {
11839				rc = EBUSY;
11840				sx_sunlock(&t4_list_lock);
11841				goto done_unload;
11842			}
11843#ifdef TCP_OFFLOAD
11844			sx_slock(&t4_uld_list_lock);
11845			if (!SLIST_EMPTY(&t4_uld_list)) {
11846				rc = EBUSY;
11847				sx_sunlock(&t4_uld_list_lock);
11848				sx_sunlock(&t4_list_lock);
11849				goto done_unload;
11850			}
11851#endif
11852			tries = 0;
11853			while (tries++ < 5 && t4_sge_extfree_refs() != 0) {
11854				uprintf("%ju clusters with custom free routine "
11855				    "still is use.\n", t4_sge_extfree_refs());
11856				pause("t4unload", 2 * hz);
11857			}
11858#ifdef TCP_OFFLOAD
11859			sx_sunlock(&t4_uld_list_lock);
11860#endif
11861			sx_sunlock(&t4_list_lock);
11862
11863			if (t4_sge_extfree_refs() == 0) {
11864				EVENTHANDLER_DEREGISTER(vxlan_start,
11865				    vxlan_start_evtag);
11866				EVENTHANDLER_DEREGISTER(vxlan_stop,
11867				    vxlan_stop_evtag);
11868				t4_tracer_modunload();
11869#ifdef KERN_TLS
11870				t6_ktls_modunload();
11871#endif
11872#ifdef INET6
11873				t4_clip_modunload();
11874#endif
11875#ifdef TCP_OFFLOAD
11876				sx_destroy(&t4_uld_list_lock);
11877#endif
11878				sx_destroy(&t4_list_lock);
11879				t4_sge_modunload();
11880				loaded = 0;
11881			} else {
11882				rc = EBUSY;
11883				loaded++;	/* undo earlier decrement */
11884			}
11885		}
11886done_unload:
11887		sx_xunlock(&mlu);
11888		break;
11889	}
11890
11891	return (rc);
11892}
11893
11894static devclass_t t4_devclass, t5_devclass, t6_devclass;
11895static devclass_t cxgbe_devclass, cxl_devclass, cc_devclass;
11896static devclass_t vcxgbe_devclass, vcxl_devclass, vcc_devclass;
11897
11898DRIVER_MODULE(t4nex, pci, t4_driver, t4_devclass, mod_event, 0);
11899MODULE_VERSION(t4nex, 1);
11900MODULE_DEPEND(t4nex, firmware, 1, 1, 1);
11901#ifdef DEV_NETMAP
11902MODULE_DEPEND(t4nex, netmap, 1, 1, 1);
11903#endif /* DEV_NETMAP */
11904
11905DRIVER_MODULE(t5nex, pci, t5_driver, t5_devclass, mod_event, 0);
11906MODULE_VERSION(t5nex, 1);
11907MODULE_DEPEND(t5nex, firmware, 1, 1, 1);
11908#ifdef DEV_NETMAP
11909MODULE_DEPEND(t5nex, netmap, 1, 1, 1);
11910#endif /* DEV_NETMAP */
11911
11912DRIVER_MODULE(t6nex, pci, t6_driver, t6_devclass, mod_event, 0);
11913MODULE_VERSION(t6nex, 1);
11914MODULE_DEPEND(t6nex, firmware, 1, 1, 1);
11915#ifdef DEV_NETMAP
11916MODULE_DEPEND(t6nex, netmap, 1, 1, 1);
11917#endif /* DEV_NETMAP */
11918
11919DRIVER_MODULE(cxgbe, t4nex, cxgbe_driver, cxgbe_devclass, 0, 0);
11920MODULE_VERSION(cxgbe, 1);
11921
11922DRIVER_MODULE(cxl, t5nex, cxl_driver, cxl_devclass, 0, 0);
11923MODULE_VERSION(cxl, 1);
11924
11925DRIVER_MODULE(cc, t6nex, cc_driver, cc_devclass, 0, 0);
11926MODULE_VERSION(cc, 1);
11927
11928DRIVER_MODULE(vcxgbe, cxgbe, vcxgbe_driver, vcxgbe_devclass, 0, 0);
11929MODULE_VERSION(vcxgbe, 1);
11930
11931DRIVER_MODULE(vcxl, cxl, vcxl_driver, vcxl_devclass, 0, 0);
11932MODULE_VERSION(vcxl, 1);
11933
11934DRIVER_MODULE(vcc, cc, vcc_driver, vcc_devclass, 0, 0);
11935MODULE_VERSION(vcc, 1);
11936