t4_main.c revision 306664
1/*-
2 * Copyright (c) 2011 Chelsio Communications, Inc.
3 * All rights reserved.
4 * Written by: Navdeep Parhar <np@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: stable/11/sys/dev/cxgbe/t4_main.c 306664 2016-10-03 23:49:05Z jhb $");
30
31#include "opt_ddb.h"
32#include "opt_inet.h"
33#include "opt_inet6.h"
34#include "opt_rss.h"
35
36#include <sys/param.h>
37#include <sys/conf.h>
38#include <sys/priv.h>
39#include <sys/kernel.h>
40#include <sys/bus.h>
41#include <sys/module.h>
42#include <sys/malloc.h>
43#include <sys/queue.h>
44#include <sys/taskqueue.h>
45#include <sys/pciio.h>
46#include <dev/pci/pcireg.h>
47#include <dev/pci/pcivar.h>
48#include <dev/pci/pci_private.h>
49#include <sys/firmware.h>
50#include <sys/sbuf.h>
51#include <sys/smp.h>
52#include <sys/socket.h>
53#include <sys/sockio.h>
54#include <sys/sysctl.h>
55#include <net/ethernet.h>
56#include <net/if.h>
57#include <net/if_types.h>
58#include <net/if_dl.h>
59#include <net/if_vlan_var.h>
60#ifdef RSS
61#include <net/rss_config.h>
62#endif
63#if defined(__i386__) || defined(__amd64__)
64#include <vm/vm.h>
65#include <vm/pmap.h>
66#endif
67#ifdef DDB
68#include <ddb/ddb.h>
69#include <ddb/db_lex.h>
70#endif
71
72#include "common/common.h"
73#include "common/t4_msg.h"
74#include "common/t4_regs.h"
75#include "common/t4_regs_values.h"
76#include "t4_ioctl.h"
77#include "t4_l2t.h"
78#include "t4_mp_ring.h"
79#include "t4_if.h"
80
81/* T4 bus driver interface */
82static int t4_probe(device_t);
83static int t4_attach(device_t);
84static int t4_detach(device_t);
85static int t4_ready(device_t);
86static int t4_read_port_device(device_t, int, device_t *);
87static device_method_t t4_methods[] = {
88	DEVMETHOD(device_probe,		t4_probe),
89	DEVMETHOD(device_attach,	t4_attach),
90	DEVMETHOD(device_detach,	t4_detach),
91
92	DEVMETHOD(t4_is_main_ready,	t4_ready),
93	DEVMETHOD(t4_read_port_device,	t4_read_port_device),
94
95	DEVMETHOD_END
96};
97static driver_t t4_driver = {
98	"t4nex",
99	t4_methods,
100	sizeof(struct adapter)
101};
102
103
104/* T4 port (cxgbe) interface */
105static int cxgbe_probe(device_t);
106static int cxgbe_attach(device_t);
107static int cxgbe_detach(device_t);
108device_method_t cxgbe_methods[] = {
109	DEVMETHOD(device_probe,		cxgbe_probe),
110	DEVMETHOD(device_attach,	cxgbe_attach),
111	DEVMETHOD(device_detach,	cxgbe_detach),
112	{ 0, 0 }
113};
114static driver_t cxgbe_driver = {
115	"cxgbe",
116	cxgbe_methods,
117	sizeof(struct port_info)
118};
119
120/* T4 VI (vcxgbe) interface */
121static int vcxgbe_probe(device_t);
122static int vcxgbe_attach(device_t);
123static int vcxgbe_detach(device_t);
124static device_method_t vcxgbe_methods[] = {
125	DEVMETHOD(device_probe,		vcxgbe_probe),
126	DEVMETHOD(device_attach,	vcxgbe_attach),
127	DEVMETHOD(device_detach,	vcxgbe_detach),
128	{ 0, 0 }
129};
130static driver_t vcxgbe_driver = {
131	"vcxgbe",
132	vcxgbe_methods,
133	sizeof(struct vi_info)
134};
135
136static d_ioctl_t t4_ioctl;
137
138static struct cdevsw t4_cdevsw = {
139       .d_version = D_VERSION,
140       .d_ioctl = t4_ioctl,
141       .d_name = "t4nex",
142};
143
144/* T5 bus driver interface */
145static int t5_probe(device_t);
146static device_method_t t5_methods[] = {
147	DEVMETHOD(device_probe,		t5_probe),
148	DEVMETHOD(device_attach,	t4_attach),
149	DEVMETHOD(device_detach,	t4_detach),
150
151	DEVMETHOD(t4_is_main_ready,	t4_ready),
152	DEVMETHOD(t4_read_port_device,	t4_read_port_device),
153
154	DEVMETHOD_END
155};
156static driver_t t5_driver = {
157	"t5nex",
158	t5_methods,
159	sizeof(struct adapter)
160};
161
162
163/* T5 port (cxl) interface */
164static driver_t cxl_driver = {
165	"cxl",
166	cxgbe_methods,
167	sizeof(struct port_info)
168};
169
170/* T5 VI (vcxl) interface */
171static driver_t vcxl_driver = {
172	"vcxl",
173	vcxgbe_methods,
174	sizeof(struct vi_info)
175};
176
177/* ifnet + media interface */
178static void cxgbe_init(void *);
179static int cxgbe_ioctl(struct ifnet *, unsigned long, caddr_t);
180static int cxgbe_transmit(struct ifnet *, struct mbuf *);
181static void cxgbe_qflush(struct ifnet *);
182static int cxgbe_media_change(struct ifnet *);
183static void cxgbe_media_status(struct ifnet *, struct ifmediareq *);
184
185MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4/T5 Ethernet driver and services");
186
187/*
188 * Correct lock order when you need to acquire multiple locks is t4_list_lock,
189 * then ADAPTER_LOCK, then t4_uld_list_lock.
190 */
191static struct sx t4_list_lock;
192SLIST_HEAD(, adapter) t4_list;
193#ifdef TCP_OFFLOAD
194static struct sx t4_uld_list_lock;
195SLIST_HEAD(, uld_info) t4_uld_list;
196#endif
197
198/*
199 * Tunables.  See tweak_tunables() too.
200 *
201 * Each tunable is set to a default value here if it's known at compile-time.
202 * Otherwise it is set to -1 as an indication to tweak_tunables() that it should
203 * provide a reasonable default when the driver is loaded.
204 *
205 * Tunables applicable to both T4 and T5 are under hw.cxgbe.  Those specific to
206 * T5 are under hw.cxl.
207 */
208
209/*
210 * Number of queues for tx and rx, 10G and 1G, NIC and offload.
211 */
212#define NTXQ_10G 16
213int t4_ntxq10g = -1;
214TUNABLE_INT("hw.cxgbe.ntxq10g", &t4_ntxq10g);
215
216#define NRXQ_10G 8
217int t4_nrxq10g = -1;
218TUNABLE_INT("hw.cxgbe.nrxq10g", &t4_nrxq10g);
219
220#define NTXQ_1G 4
221int t4_ntxq1g = -1;
222TUNABLE_INT("hw.cxgbe.ntxq1g", &t4_ntxq1g);
223
224#define NRXQ_1G 2
225int t4_nrxq1g = -1;
226TUNABLE_INT("hw.cxgbe.nrxq1g", &t4_nrxq1g);
227
228#define NTXQ_VI 1
229static int t4_ntxq_vi = -1;
230TUNABLE_INT("hw.cxgbe.ntxq_vi", &t4_ntxq_vi);
231
232#define NRXQ_VI 1
233static int t4_nrxq_vi = -1;
234TUNABLE_INT("hw.cxgbe.nrxq_vi", &t4_nrxq_vi);
235
236static int t4_rsrv_noflowq = 0;
237TUNABLE_INT("hw.cxgbe.rsrv_noflowq", &t4_rsrv_noflowq);
238
239#ifdef TCP_OFFLOAD
240#define NOFLDTXQ_10G 8
241static int t4_nofldtxq10g = -1;
242TUNABLE_INT("hw.cxgbe.nofldtxq10g", &t4_nofldtxq10g);
243
244#define NOFLDRXQ_10G 2
245static int t4_nofldrxq10g = -1;
246TUNABLE_INT("hw.cxgbe.nofldrxq10g", &t4_nofldrxq10g);
247
248#define NOFLDTXQ_1G 2
249static int t4_nofldtxq1g = -1;
250TUNABLE_INT("hw.cxgbe.nofldtxq1g", &t4_nofldtxq1g);
251
252#define NOFLDRXQ_1G 1
253static int t4_nofldrxq1g = -1;
254TUNABLE_INT("hw.cxgbe.nofldrxq1g", &t4_nofldrxq1g);
255
256#define NOFLDTXQ_VI 1
257static int t4_nofldtxq_vi = -1;
258TUNABLE_INT("hw.cxgbe.nofldtxq_vi", &t4_nofldtxq_vi);
259
260#define NOFLDRXQ_VI 1
261static int t4_nofldrxq_vi = -1;
262TUNABLE_INT("hw.cxgbe.nofldrxq_vi", &t4_nofldrxq_vi);
263#endif
264
265#ifdef DEV_NETMAP
266#define NNMTXQ_VI 2
267static int t4_nnmtxq_vi = -1;
268TUNABLE_INT("hw.cxgbe.nnmtxq_vi", &t4_nnmtxq_vi);
269
270#define NNMRXQ_VI 2
271static int t4_nnmrxq_vi = -1;
272TUNABLE_INT("hw.cxgbe.nnmrxq_vi", &t4_nnmrxq_vi);
273#endif
274
275/*
276 * Holdoff parameters for 10G and 1G ports.
277 */
278#define TMR_IDX_10G 1
279int t4_tmr_idx_10g = TMR_IDX_10G;
280TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &t4_tmr_idx_10g);
281
282#define PKTC_IDX_10G (-1)
283int t4_pktc_idx_10g = PKTC_IDX_10G;
284TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &t4_pktc_idx_10g);
285
286#define TMR_IDX_1G 1
287int t4_tmr_idx_1g = TMR_IDX_1G;
288TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_1G", &t4_tmr_idx_1g);
289
290#define PKTC_IDX_1G (-1)
291int t4_pktc_idx_1g = PKTC_IDX_1G;
292TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_1G", &t4_pktc_idx_1g);
293
294/*
295 * Size (# of entries) of each tx and rx queue.
296 */
297unsigned int t4_qsize_txq = TX_EQ_QSIZE;
298TUNABLE_INT("hw.cxgbe.qsize_txq", &t4_qsize_txq);
299
300unsigned int t4_qsize_rxq = RX_IQ_QSIZE;
301TUNABLE_INT("hw.cxgbe.qsize_rxq", &t4_qsize_rxq);
302
303/*
304 * Interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively).
305 */
306int t4_intr_types = INTR_MSIX | INTR_MSI | INTR_INTX;
307TUNABLE_INT("hw.cxgbe.interrupt_types", &t4_intr_types);
308
309/*
310 * Configuration file.
311 */
312#define DEFAULT_CF	"default"
313#define FLASH_CF	"flash"
314#define UWIRE_CF	"uwire"
315#define FPGA_CF		"fpga"
316static char t4_cfg_file[32] = DEFAULT_CF;
317TUNABLE_STR("hw.cxgbe.config_file", t4_cfg_file, sizeof(t4_cfg_file));
318
319/*
320 * PAUSE settings (bit 0, 1 = rx_pause, tx_pause respectively).
321 * rx_pause = 1 to heed incoming PAUSE frames, 0 to ignore them.
322 * tx_pause = 1 to emit PAUSE frames when the rx FIFO reaches its high water
323 *            mark or when signalled to do so, 0 to never emit PAUSE.
324 */
325static int t4_pause_settings = PAUSE_TX | PAUSE_RX;
326TUNABLE_INT("hw.cxgbe.pause_settings", &t4_pause_settings);
327
328/*
329 * Firmware auto-install by driver during attach (0, 1, 2 = prohibited, allowed,
330 * encouraged respectively).
331 */
332static unsigned int t4_fw_install = 1;
333TUNABLE_INT("hw.cxgbe.fw_install", &t4_fw_install);
334
335/*
336 * ASIC features that will be used.  Disable the ones you don't want so that the
337 * chip resources aren't wasted on features that will not be used.
338 */
339static int t4_nbmcaps_allowed = 0;
340TUNABLE_INT("hw.cxgbe.nbmcaps_allowed", &t4_nbmcaps_allowed);
341
342static int t4_linkcaps_allowed = 0;	/* No DCBX, PPP, etc. by default */
343TUNABLE_INT("hw.cxgbe.linkcaps_allowed", &t4_linkcaps_allowed);
344
345static int t4_switchcaps_allowed = FW_CAPS_CONFIG_SWITCH_INGRESS |
346    FW_CAPS_CONFIG_SWITCH_EGRESS;
347TUNABLE_INT("hw.cxgbe.switchcaps_allowed", &t4_switchcaps_allowed);
348
349static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC;
350TUNABLE_INT("hw.cxgbe.niccaps_allowed", &t4_niccaps_allowed);
351
352static int t4_toecaps_allowed = -1;
353TUNABLE_INT("hw.cxgbe.toecaps_allowed", &t4_toecaps_allowed);
354
355static int t4_rdmacaps_allowed = -1;
356TUNABLE_INT("hw.cxgbe.rdmacaps_allowed", &t4_rdmacaps_allowed);
357
358static int t4_tlscaps_allowed = 0;
359TUNABLE_INT("hw.cxgbe.tlscaps_allowed", &t4_tlscaps_allowed);
360
361static int t4_iscsicaps_allowed = -1;
362TUNABLE_INT("hw.cxgbe.iscsicaps_allowed", &t4_iscsicaps_allowed);
363
364static int t4_fcoecaps_allowed = 0;
365TUNABLE_INT("hw.cxgbe.fcoecaps_allowed", &t4_fcoecaps_allowed);
366
367static int t5_write_combine = 0;
368TUNABLE_INT("hw.cxl.write_combine", &t5_write_combine);
369
370static int t4_num_vis = 1;
371TUNABLE_INT("hw.cxgbe.num_vis", &t4_num_vis);
372
373/* Functions used by extra VIs to obtain unique MAC addresses for each VI. */
374static int vi_mac_funcs[] = {
375	FW_VI_FUNC_OFLD,
376	FW_VI_FUNC_IWARP,
377	FW_VI_FUNC_OPENISCSI,
378	FW_VI_FUNC_OPENFCOE,
379	FW_VI_FUNC_FOISCSI,
380	FW_VI_FUNC_FOFCOE,
381};
382
383struct intrs_and_queues {
384	uint16_t intr_type;	/* INTx, MSI, or MSI-X */
385	uint16_t nirq;		/* Total # of vectors */
386	uint16_t intr_flags_10g;/* Interrupt flags for each 10G port */
387	uint16_t intr_flags_1g;	/* Interrupt flags for each 1G port */
388	uint16_t ntxq10g;	/* # of NIC txq's for each 10G port */
389	uint16_t nrxq10g;	/* # of NIC rxq's for each 10G port */
390	uint16_t ntxq1g;	/* # of NIC txq's for each 1G port */
391	uint16_t nrxq1g;	/* # of NIC rxq's for each 1G port */
392	uint16_t rsrv_noflowq;	/* Flag whether to reserve queue 0 */
393	uint16_t nofldtxq10g;	/* # of TOE txq's for each 10G port */
394	uint16_t nofldrxq10g;	/* # of TOE rxq's for each 10G port */
395	uint16_t nofldtxq1g;	/* # of TOE txq's for each 1G port */
396	uint16_t nofldrxq1g;	/* # of TOE rxq's for each 1G port */
397
398	/* The vcxgbe/vcxl interfaces use these and not the ones above. */
399	uint16_t ntxq_vi;	/* # of NIC txq's */
400	uint16_t nrxq_vi;	/* # of NIC rxq's */
401	uint16_t nofldtxq_vi;	/* # of TOE txq's */
402	uint16_t nofldrxq_vi;	/* # of TOE rxq's */
403	uint16_t nnmtxq_vi;	/* # of netmap txq's */
404	uint16_t nnmrxq_vi;	/* # of netmap rxq's */
405};
406
407struct filter_entry {
408        uint32_t valid:1;	/* filter allocated and valid */
409        uint32_t locked:1;	/* filter is administratively locked */
410        uint32_t pending:1;	/* filter action is pending firmware reply */
411	uint32_t smtidx:8;	/* Source MAC Table index for smac */
412	struct l2t_entry *l2t;	/* Layer Two Table entry for dmac */
413
414        struct t4_filter_specification fs;
415};
416
417static void setup_memwin(struct adapter *);
418static void position_memwin(struct adapter *, int, uint32_t);
419static int rw_via_memwin(struct adapter *, int, uint32_t, uint32_t *, int, int);
420static inline int read_via_memwin(struct adapter *, int, uint32_t, uint32_t *,
421    int);
422static inline int write_via_memwin(struct adapter *, int, uint32_t,
423    const uint32_t *, int);
424static int validate_mem_range(struct adapter *, uint32_t, int);
425static int fwmtype_to_hwmtype(int);
426static int validate_mt_off_len(struct adapter *, int, uint32_t, int,
427    uint32_t *);
428static int fixup_devlog_params(struct adapter *);
429static int cfg_itype_and_nqueues(struct adapter *, int, int, int,
430    struct intrs_and_queues *);
431static int prep_firmware(struct adapter *);
432static int partition_resources(struct adapter *, const struct firmware *,
433    const char *);
434static int get_params__pre_init(struct adapter *);
435static int get_params__post_init(struct adapter *);
436static int set_params__post_init(struct adapter *);
437static void t4_set_desc(struct adapter *);
438static void build_medialist(struct port_info *, struct ifmedia *);
439static int cxgbe_init_synchronized(struct vi_info *);
440static int cxgbe_uninit_synchronized(struct vi_info *);
441static void quiesce_txq(struct adapter *, struct sge_txq *);
442static void quiesce_wrq(struct adapter *, struct sge_wrq *);
443static void quiesce_iq(struct adapter *, struct sge_iq *);
444static void quiesce_fl(struct adapter *, struct sge_fl *);
445static int t4_alloc_irq(struct adapter *, struct irq *, int rid,
446    driver_intr_t *, void *, char *);
447static int t4_free_irq(struct adapter *, struct irq *);
448static void get_regs(struct adapter *, struct t4_regdump *, uint8_t *);
449static void vi_refresh_stats(struct adapter *, struct vi_info *);
450static void cxgbe_refresh_stats(struct adapter *, struct port_info *);
451static void cxgbe_tick(void *);
452static void cxgbe_vlan_config(void *, struct ifnet *, uint16_t);
453static void cxgbe_sysctls(struct port_info *);
454static int sysctl_int_array(SYSCTL_HANDLER_ARGS);
455static int sysctl_bitfield(SYSCTL_HANDLER_ARGS);
456static int sysctl_btphy(SYSCTL_HANDLER_ARGS);
457static int sysctl_noflowq(SYSCTL_HANDLER_ARGS);
458static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS);
459static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS);
460static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS);
461static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS);
462static int sysctl_pause_settings(SYSCTL_HANDLER_ARGS);
463static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS);
464static int sysctl_temperature(SYSCTL_HANDLER_ARGS);
465#ifdef SBUF_DRAIN
466static int sysctl_cctrl(SYSCTL_HANDLER_ARGS);
467static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS);
468static int sysctl_cim_la(SYSCTL_HANDLER_ARGS);
469static int sysctl_cim_la_t6(SYSCTL_HANDLER_ARGS);
470static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS);
471static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS);
472static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS);
473static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS);
474static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS);
475static int sysctl_devlog(SYSCTL_HANDLER_ARGS);
476static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS);
477static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS);
478static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS);
479static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS);
480static int sysctl_meminfo(SYSCTL_HANDLER_ARGS);
481static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS);
482static int sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS);
483static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS);
484static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS);
485static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS);
486static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS);
487static int sysctl_tids(SYSCTL_HANDLER_ARGS);
488static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS);
489static int sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS);
490static int sysctl_tp_la(SYSCTL_HANDLER_ARGS);
491static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS);
492static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS);
493static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS);
494static int sysctl_tc_params(SYSCTL_HANDLER_ARGS);
495#endif
496#ifdef TCP_OFFLOAD
497static int sysctl_tp_tick(SYSCTL_HANDLER_ARGS);
498static int sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS);
499static int sysctl_tp_timer(SYSCTL_HANDLER_ARGS);
500#endif
501static uint32_t fconf_iconf_to_mode(uint32_t, uint32_t);
502static uint32_t mode_to_fconf(uint32_t);
503static uint32_t mode_to_iconf(uint32_t);
504static int check_fspec_against_fconf_iconf(struct adapter *,
505    struct t4_filter_specification *);
506static int get_filter_mode(struct adapter *, uint32_t *);
507static int set_filter_mode(struct adapter *, uint32_t);
508static inline uint64_t get_filter_hits(struct adapter *, uint32_t);
509static int get_filter(struct adapter *, struct t4_filter *);
510static int set_filter(struct adapter *, struct t4_filter *);
511static int del_filter(struct adapter *, struct t4_filter *);
512static void clear_filter(struct filter_entry *);
513static int set_filter_wr(struct adapter *, int);
514static int del_filter_wr(struct adapter *, int);
515static int set_tcb_rpl(struct sge_iq *, const struct rss_header *,
516    struct mbuf *);
517static int get_sge_context(struct adapter *, struct t4_sge_context *);
518static int load_fw(struct adapter *, struct t4_data *);
519static int read_card_mem(struct adapter *, int, struct t4_mem_range *);
520static int read_i2c(struct adapter *, struct t4_i2c_data *);
521#ifdef TCP_OFFLOAD
522static int toe_capability(struct vi_info *, int);
523#endif
524static int mod_event(module_t, int, void *);
525static int notify_siblings(device_t, int);
526
527struct {
528	uint16_t device;
529	char *desc;
530} t4_pciids[] = {
531	{0xa000, "Chelsio Terminator 4 FPGA"},
532	{0x4400, "Chelsio T440-dbg"},
533	{0x4401, "Chelsio T420-CR"},
534	{0x4402, "Chelsio T422-CR"},
535	{0x4403, "Chelsio T440-CR"},
536	{0x4404, "Chelsio T420-BCH"},
537	{0x4405, "Chelsio T440-BCH"},
538	{0x4406, "Chelsio T440-CH"},
539	{0x4407, "Chelsio T420-SO"},
540	{0x4408, "Chelsio T420-CX"},
541	{0x4409, "Chelsio T420-BT"},
542	{0x440a, "Chelsio T404-BT"},
543	{0x440e, "Chelsio T440-LP-CR"},
544}, t5_pciids[] = {
545	{0xb000, "Chelsio Terminator 5 FPGA"},
546	{0x5400, "Chelsio T580-dbg"},
547	{0x5401,  "Chelsio T520-CR"},		/* 2 x 10G */
548	{0x5402,  "Chelsio T522-CR"},		/* 2 x 10G, 2 X 1G */
549	{0x5403,  "Chelsio T540-CR"},		/* 4 x 10G */
550	{0x5407,  "Chelsio T520-SO"},		/* 2 x 10G, nomem */
551	{0x5409,  "Chelsio T520-BT"},		/* 2 x 10GBaseT */
552	{0x540a,  "Chelsio T504-BT"},		/* 4 x 1G */
553	{0x540d,  "Chelsio T580-CR"},		/* 2 x 40G */
554	{0x540e,  "Chelsio T540-LP-CR"},	/* 4 x 10G */
555	{0x5410,  "Chelsio T580-LP-CR"},	/* 2 x 40G */
556	{0x5411,  "Chelsio T520-LL-CR"},	/* 2 x 10G */
557	{0x5412,  "Chelsio T560-CR"},		/* 1 x 40G, 2 x 10G */
558	{0x5414,  "Chelsio T580-LP-SO-CR"},	/* 2 x 40G, nomem */
559	{0x5415,  "Chelsio T502-BT"},		/* 2 x 1G */
560#ifdef notyet
561	{0x5404,  "Chelsio T520-BCH"},
562	{0x5405,  "Chelsio T540-BCH"},
563	{0x5406,  "Chelsio T540-CH"},
564	{0x5408,  "Chelsio T520-CX"},
565	{0x540b,  "Chelsio B520-SR"},
566	{0x540c,  "Chelsio B504-BT"},
567	{0x540f,  "Chelsio Amsterdam"},
568	{0x5413,  "Chelsio T580-CHR"},
569#endif
570};
571
572#ifdef TCP_OFFLOAD
573/*
574 * service_iq() has an iq and needs the fl.  Offset of fl from the iq should be
575 * exactly the same for both rxq and ofld_rxq.
576 */
577CTASSERT(offsetof(struct sge_ofld_rxq, iq) == offsetof(struct sge_rxq, iq));
578CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl));
579#endif
580CTASSERT(sizeof(struct cluster_metadata) <= CL_METADATA_SIZE);
581
582static int
583t4_probe(device_t dev)
584{
585	int i;
586	uint16_t v = pci_get_vendor(dev);
587	uint16_t d = pci_get_device(dev);
588	uint8_t f = pci_get_function(dev);
589
590	if (v != PCI_VENDOR_ID_CHELSIO)
591		return (ENXIO);
592
593	/* Attach only to PF0 of the FPGA */
594	if (d == 0xa000 && f != 0)
595		return (ENXIO);
596
597	for (i = 0; i < nitems(t4_pciids); i++) {
598		if (d == t4_pciids[i].device) {
599			device_set_desc(dev, t4_pciids[i].desc);
600			return (BUS_PROBE_DEFAULT);
601		}
602	}
603
604	return (ENXIO);
605}
606
607static int
608t5_probe(device_t dev)
609{
610	int i;
611	uint16_t v = pci_get_vendor(dev);
612	uint16_t d = pci_get_device(dev);
613	uint8_t f = pci_get_function(dev);
614
615	if (v != PCI_VENDOR_ID_CHELSIO)
616		return (ENXIO);
617
618	/* Attach only to PF0 of the FPGA */
619	if (d == 0xb000 && f != 0)
620		return (ENXIO);
621
622	for (i = 0; i < nitems(t5_pciids); i++) {
623		if (d == t5_pciids[i].device) {
624			device_set_desc(dev, t5_pciids[i].desc);
625			return (BUS_PROBE_DEFAULT);
626		}
627	}
628
629	return (ENXIO);
630}
631
632static void
633t5_attribute_workaround(device_t dev)
634{
635	device_t root_port;
636	uint32_t v;
637
638	/*
639	 * The T5 chips do not properly echo the No Snoop and Relaxed
640	 * Ordering attributes when replying to a TLP from a Root
641	 * Port.  As a workaround, find the parent Root Port and
642	 * disable No Snoop and Relaxed Ordering.  Note that this
643	 * affects all devices under this root port.
644	 */
645	root_port = pci_find_pcie_root_port(dev);
646	if (root_port == NULL) {
647		device_printf(dev, "Unable to find parent root port\n");
648		return;
649	}
650
651	v = pcie_adjust_config(root_port, PCIER_DEVICE_CTL,
652	    PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE, 0, 2);
653	if ((v & (PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE)) !=
654	    0)
655		device_printf(dev, "Disabled No Snoop/Relaxed Ordering on %s\n",
656		    device_get_nameunit(root_port));
657}
658
659static int
660t4_attach(device_t dev)
661{
662	struct adapter *sc;
663	int rc = 0, i, j, n10g, n1g, rqidx, tqidx;
664	struct make_dev_args mda;
665	struct intrs_and_queues iaq;
666	struct sge *s;
667	uint8_t *buf;
668#ifdef TCP_OFFLOAD
669	int ofld_rqidx, ofld_tqidx;
670#endif
671#ifdef DEV_NETMAP
672	int nm_rqidx, nm_tqidx;
673#endif
674	int num_vis;
675
676	sc = device_get_softc(dev);
677	sc->dev = dev;
678	TUNABLE_INT_FETCH("hw.cxgbe.debug_flags", &sc->debug_flags);
679
680	if ((pci_get_device(dev) & 0xff00) == 0x5400)
681		t5_attribute_workaround(dev);
682	pci_enable_busmaster(dev);
683	if (pci_find_cap(dev, PCIY_EXPRESS, &i) == 0) {
684		uint32_t v;
685
686		pci_set_max_read_req(dev, 4096);
687		v = pci_read_config(dev, i + PCIER_DEVICE_CTL, 2);
688		v |= PCIEM_CTL_RELAXED_ORD_ENABLE;
689		pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2);
690
691		sc->params.pci.mps = 128 << ((v & PCIEM_CTL_MAX_PAYLOAD) >> 5);
692	}
693
694	sc->sge_gts_reg = MYPF_REG(A_SGE_PF_GTS);
695	sc->sge_kdoorbell_reg = MYPF_REG(A_SGE_PF_KDOORBELL);
696	sc->traceq = -1;
697	mtx_init(&sc->ifp_lock, sc->ifp_lockname, 0, MTX_DEF);
698	snprintf(sc->ifp_lockname, sizeof(sc->ifp_lockname), "%s tracer",
699	    device_get_nameunit(dev));
700
701	snprintf(sc->lockname, sizeof(sc->lockname), "%s",
702	    device_get_nameunit(dev));
703	mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF);
704	t4_add_adapter(sc);
705
706	mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF);
707	TAILQ_INIT(&sc->sfl);
708	callout_init_mtx(&sc->sfl_callout, &sc->sfl_lock, 0);
709
710	mtx_init(&sc->reg_lock, "indirect register access", 0, MTX_DEF);
711
712	rc = t4_map_bars_0_and_4(sc);
713	if (rc != 0)
714		goto done; /* error message displayed already */
715
716	/*
717	 * This is the real PF# to which we're attaching.  Works from within PCI
718	 * passthrough environments too, where pci_get_function() could return a
719	 * different PF# depending on the passthrough configuration.  We need to
720	 * use the real PF# in all our communication with the firmware.
721	 */
722	sc->pf = G_SOURCEPF(t4_read_reg(sc, A_PL_WHOAMI));
723	sc->mbox = sc->pf;
724
725	memset(sc->chan_map, 0xff, sizeof(sc->chan_map));
726
727	/* Prepare the adapter for operation. */
728	buf = malloc(PAGE_SIZE, M_CXGBE, M_ZERO | M_WAITOK);
729	rc = -t4_prep_adapter(sc, buf);
730	free(buf, M_CXGBE);
731	if (rc != 0) {
732		device_printf(dev, "failed to prepare adapter: %d.\n", rc);
733		goto done;
734	}
735
736	/*
737	 * Do this really early, with the memory windows set up even before the
738	 * character device.  The userland tool's register i/o and mem read
739	 * will work even in "recovery mode".
740	 */
741	setup_memwin(sc);
742	if (t4_init_devlog_params(sc, 0) == 0)
743		fixup_devlog_params(sc);
744	make_dev_args_init(&mda);
745	mda.mda_devsw = &t4_cdevsw;
746	mda.mda_uid = UID_ROOT;
747	mda.mda_gid = GID_WHEEL;
748	mda.mda_mode = 0600;
749	mda.mda_si_drv1 = sc;
750	rc = make_dev_s(&mda, &sc->cdev, "%s", device_get_nameunit(dev));
751	if (rc != 0)
752		device_printf(dev, "failed to create nexus char device: %d.\n",
753		    rc);
754
755	/* Go no further if recovery mode has been requested. */
756	if (TUNABLE_INT_FETCH("hw.cxgbe.sos", &i) && i != 0) {
757		device_printf(dev, "recovery mode.\n");
758		goto done;
759	}
760
761#if defined(__i386__)
762	if ((cpu_feature & CPUID_CX8) == 0) {
763		device_printf(dev, "64 bit atomics not available.\n");
764		rc = ENOTSUP;
765		goto done;
766	}
767#endif
768
769	/* Prepare the firmware for operation */
770	rc = prep_firmware(sc);
771	if (rc != 0)
772		goto done; /* error message displayed already */
773
774	rc = get_params__post_init(sc);
775	if (rc != 0)
776		goto done; /* error message displayed already */
777
778	rc = set_params__post_init(sc);
779	if (rc != 0)
780		goto done; /* error message displayed already */
781
782	rc = t4_map_bar_2(sc);
783	if (rc != 0)
784		goto done; /* error message displayed already */
785
786	rc = t4_create_dma_tag(sc);
787	if (rc != 0)
788		goto done; /* error message displayed already */
789
790	/*
791	 * Number of VIs to create per-port.  The first VI is the "main" regular
792	 * VI for the port.  The rest are additional virtual interfaces on the
793	 * same physical port.  Note that the main VI does not have native
794	 * netmap support but the extra VIs do.
795	 *
796	 * Limit the number of VIs per port to the number of available
797	 * MAC addresses per port.
798	 */
799	if (t4_num_vis >= 1)
800		num_vis = t4_num_vis;
801	else
802		num_vis = 1;
803	if (num_vis > nitems(vi_mac_funcs)) {
804		num_vis = nitems(vi_mac_funcs);
805		device_printf(dev, "Number of VIs limited to %d\n", num_vis);
806	}
807
808	/*
809	 * First pass over all the ports - allocate VIs and initialize some
810	 * basic parameters like mac address, port type, etc.  We also figure
811	 * out whether a port is 10G or 1G and use that information when
812	 * calculating how many interrupts to attempt to allocate.
813	 */
814	n10g = n1g = 0;
815	for_each_port(sc, i) {
816		struct port_info *pi;
817
818		pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK);
819		sc->port[i] = pi;
820
821		/* These must be set before t4_port_init */
822		pi->adapter = sc;
823		pi->port_id = i;
824		/*
825		 * XXX: vi[0] is special so we can't delay this allocation until
826		 * pi->nvi's final value is known.
827		 */
828		pi->vi = malloc(sizeof(struct vi_info) * num_vis, M_CXGBE,
829		    M_ZERO | M_WAITOK);
830
831		/*
832		 * Allocate the "main" VI and initialize parameters
833		 * like mac addr.
834		 */
835		rc = -t4_port_init(sc, sc->mbox, sc->pf, 0, i);
836		if (rc != 0) {
837			device_printf(dev, "unable to initialize port %d: %d\n",
838			    i, rc);
839			free(pi->vi, M_CXGBE);
840			free(pi, M_CXGBE);
841			sc->port[i] = NULL;
842			goto done;
843		}
844
845		pi->link_cfg.requested_fc &= ~(PAUSE_TX | PAUSE_RX);
846		pi->link_cfg.requested_fc |= t4_pause_settings;
847		pi->link_cfg.fc &= ~(PAUSE_TX | PAUSE_RX);
848		pi->link_cfg.fc |= t4_pause_settings;
849
850		rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, &pi->link_cfg);
851		if (rc != 0) {
852			device_printf(dev, "port %d l1cfg failed: %d\n", i, rc);
853			free(pi->vi, M_CXGBE);
854			free(pi, M_CXGBE);
855			sc->port[i] = NULL;
856			goto done;
857		}
858
859		snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d",
860		    device_get_nameunit(dev), i);
861		mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF);
862		sc->chan_map[pi->tx_chan] = i;
863
864		pi->tc = malloc(sizeof(struct tx_sched_class) *
865		    sc->chip_params->nsched_cls, M_CXGBE, M_ZERO | M_WAITOK);
866
867		if (is_10G_port(pi) || is_40G_port(pi)) {
868			n10g++;
869		} else {
870			n1g++;
871		}
872
873		pi->linkdnrc = -1;
874
875		pi->dev = device_add_child(dev, is_t4(sc) ? "cxgbe" : "cxl", -1);
876		if (pi->dev == NULL) {
877			device_printf(dev,
878			    "failed to add device for port %d.\n", i);
879			rc = ENXIO;
880			goto done;
881		}
882		pi->vi[0].dev = pi->dev;
883		device_set_softc(pi->dev, pi);
884	}
885
886	/*
887	 * Interrupt type, # of interrupts, # of rx/tx queues, etc.
888	 */
889	rc = cfg_itype_and_nqueues(sc, n10g, n1g, num_vis, &iaq);
890	if (rc != 0)
891		goto done; /* error message displayed already */
892	if (iaq.nrxq_vi + iaq.nofldrxq_vi + iaq.nnmrxq_vi == 0)
893		num_vis = 1;
894
895	sc->intr_type = iaq.intr_type;
896	sc->intr_count = iaq.nirq;
897
898	s = &sc->sge;
899	s->nrxq = n10g * iaq.nrxq10g + n1g * iaq.nrxq1g;
900	s->ntxq = n10g * iaq.ntxq10g + n1g * iaq.ntxq1g;
901	if (num_vis > 1) {
902		s->nrxq += (n10g + n1g) * (num_vis - 1) * iaq.nrxq_vi;
903		s->ntxq += (n10g + n1g) * (num_vis - 1) * iaq.ntxq_vi;
904	}
905	s->neq = s->ntxq + s->nrxq;	/* the free list in an rxq is an eq */
906	s->neq += sc->params.nports + 1;/* ctrl queues: 1 per port + 1 mgmt */
907	s->niq = s->nrxq + 1;		/* 1 extra for firmware event queue */
908#ifdef TCP_OFFLOAD
909	if (is_offload(sc)) {
910		s->nofldrxq = n10g * iaq.nofldrxq10g + n1g * iaq.nofldrxq1g;
911		s->nofldtxq = n10g * iaq.nofldtxq10g + n1g * iaq.nofldtxq1g;
912		if (num_vis > 1) {
913			s->nofldrxq += (n10g + n1g) * (num_vis - 1) *
914			    iaq.nofldrxq_vi;
915			s->nofldtxq += (n10g + n1g) * (num_vis - 1) *
916			    iaq.nofldtxq_vi;
917		}
918		s->neq += s->nofldtxq + s->nofldrxq;
919		s->niq += s->nofldrxq;
920
921		s->ofld_rxq = malloc(s->nofldrxq * sizeof(struct sge_ofld_rxq),
922		    M_CXGBE, M_ZERO | M_WAITOK);
923		s->ofld_txq = malloc(s->nofldtxq * sizeof(struct sge_wrq),
924		    M_CXGBE, M_ZERO | M_WAITOK);
925	}
926#endif
927#ifdef DEV_NETMAP
928	if (num_vis > 1) {
929		s->nnmrxq = (n10g + n1g) * (num_vis - 1) * iaq.nnmrxq_vi;
930		s->nnmtxq = (n10g + n1g) * (num_vis - 1) * iaq.nnmtxq_vi;
931	}
932	s->neq += s->nnmtxq + s->nnmrxq;
933	s->niq += s->nnmrxq;
934
935	s->nm_rxq = malloc(s->nnmrxq * sizeof(struct sge_nm_rxq),
936	    M_CXGBE, M_ZERO | M_WAITOK);
937	s->nm_txq = malloc(s->nnmtxq * sizeof(struct sge_nm_txq),
938	    M_CXGBE, M_ZERO | M_WAITOK);
939#endif
940
941	s->ctrlq = malloc(sc->params.nports * sizeof(struct sge_wrq), M_CXGBE,
942	    M_ZERO | M_WAITOK);
943	s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE,
944	    M_ZERO | M_WAITOK);
945	s->txq = malloc(s->ntxq * sizeof(struct sge_txq), M_CXGBE,
946	    M_ZERO | M_WAITOK);
947	s->iqmap = malloc(s->niq * sizeof(struct sge_iq *), M_CXGBE,
948	    M_ZERO | M_WAITOK);
949	s->eqmap = malloc(s->neq * sizeof(struct sge_eq *), M_CXGBE,
950	    M_ZERO | M_WAITOK);
951
952	sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE,
953	    M_ZERO | M_WAITOK);
954
955	t4_init_l2t(sc, M_WAITOK);
956
957	/*
958	 * Second pass over the ports.  This time we know the number of rx and
959	 * tx queues that each port should get.
960	 */
961	rqidx = tqidx = 0;
962#ifdef TCP_OFFLOAD
963	ofld_rqidx = ofld_tqidx = 0;
964#endif
965#ifdef DEV_NETMAP
966	nm_rqidx = nm_tqidx = 0;
967#endif
968	for_each_port(sc, i) {
969		struct port_info *pi = sc->port[i];
970		struct vi_info *vi;
971
972		if (pi == NULL)
973			continue;
974
975		pi->nvi = num_vis;
976		for_each_vi(pi, j, vi) {
977			vi->pi = pi;
978			vi->qsize_rxq = t4_qsize_rxq;
979			vi->qsize_txq = t4_qsize_txq;
980
981			vi->first_rxq = rqidx;
982			vi->first_txq = tqidx;
983			if (is_10G_port(pi) || is_40G_port(pi)) {
984				vi->tmr_idx = t4_tmr_idx_10g;
985				vi->pktc_idx = t4_pktc_idx_10g;
986				vi->flags |= iaq.intr_flags_10g & INTR_RXQ;
987				vi->nrxq = j == 0 ? iaq.nrxq10g : iaq.nrxq_vi;
988				vi->ntxq = j == 0 ? iaq.ntxq10g : iaq.ntxq_vi;
989			} else {
990				vi->tmr_idx = t4_tmr_idx_1g;
991				vi->pktc_idx = t4_pktc_idx_1g;
992				vi->flags |= iaq.intr_flags_1g & INTR_RXQ;
993				vi->nrxq = j == 0 ? iaq.nrxq1g : iaq.nrxq_vi;
994				vi->ntxq = j == 0 ? iaq.ntxq1g : iaq.ntxq_vi;
995			}
996			rqidx += vi->nrxq;
997			tqidx += vi->ntxq;
998
999			if (j == 0 && vi->ntxq > 1)
1000				vi->rsrv_noflowq = iaq.rsrv_noflowq ? 1 : 0;
1001			else
1002				vi->rsrv_noflowq = 0;
1003
1004#ifdef TCP_OFFLOAD
1005			vi->first_ofld_rxq = ofld_rqidx;
1006			vi->first_ofld_txq = ofld_tqidx;
1007			if (is_10G_port(pi) || is_40G_port(pi)) {
1008				vi->flags |= iaq.intr_flags_10g & INTR_OFLD_RXQ;
1009				vi->nofldrxq = j == 0 ? iaq.nofldrxq10g :
1010				    iaq.nofldrxq_vi;
1011				vi->nofldtxq = j == 0 ? iaq.nofldtxq10g :
1012				    iaq.nofldtxq_vi;
1013			} else {
1014				vi->flags |= iaq.intr_flags_1g & INTR_OFLD_RXQ;
1015				vi->nofldrxq = j == 0 ? iaq.nofldrxq1g :
1016				    iaq.nofldrxq_vi;
1017				vi->nofldtxq = j == 0 ? iaq.nofldtxq1g :
1018				    iaq.nofldtxq_vi;
1019			}
1020			ofld_rqidx += vi->nofldrxq;
1021			ofld_tqidx += vi->nofldtxq;
1022#endif
1023#ifdef DEV_NETMAP
1024			if (j > 0) {
1025				vi->first_nm_rxq = nm_rqidx;
1026				vi->first_nm_txq = nm_tqidx;
1027				vi->nnmrxq = iaq.nnmrxq_vi;
1028				vi->nnmtxq = iaq.nnmtxq_vi;
1029				nm_rqidx += vi->nnmrxq;
1030				nm_tqidx += vi->nnmtxq;
1031			}
1032#endif
1033		}
1034	}
1035
1036	rc = t4_setup_intr_handlers(sc);
1037	if (rc != 0) {
1038		device_printf(dev,
1039		    "failed to setup interrupt handlers: %d\n", rc);
1040		goto done;
1041	}
1042
1043	rc = bus_generic_attach(dev);
1044	if (rc != 0) {
1045		device_printf(dev,
1046		    "failed to attach all child ports: %d\n", rc);
1047		goto done;
1048	}
1049
1050	device_printf(dev,
1051	    "PCIe gen%d x%d, %d ports, %d %s interrupt%s, %d eq, %d iq\n",
1052	    sc->params.pci.speed, sc->params.pci.width, sc->params.nports,
1053	    sc->intr_count, sc->intr_type == INTR_MSIX ? "MSI-X" :
1054	    (sc->intr_type == INTR_MSI ? "MSI" : "INTx"),
1055	    sc->intr_count > 1 ? "s" : "", sc->sge.neq, sc->sge.niq);
1056
1057	t4_set_desc(sc);
1058
1059	notify_siblings(dev, 0);
1060
1061done:
1062	if (rc != 0 && sc->cdev) {
1063		/* cdev was created and so cxgbetool works; recover that way. */
1064		device_printf(dev,
1065		    "error during attach, adapter is now in recovery mode.\n");
1066		rc = 0;
1067	}
1068
1069	if (rc != 0)
1070		t4_detach_common(dev);
1071	else
1072		t4_sysctls(sc);
1073
1074	return (rc);
1075}
1076
1077static int
1078t4_ready(device_t dev)
1079{
1080	struct adapter *sc;
1081
1082	sc = device_get_softc(dev);
1083	if (sc->flags & FW_OK)
1084		return (0);
1085	return (ENXIO);
1086}
1087
1088static int
1089t4_read_port_device(device_t dev, int port, device_t *child)
1090{
1091	struct adapter *sc;
1092	struct port_info *pi;
1093
1094	sc = device_get_softc(dev);
1095	if (port < 0 || port >= MAX_NPORTS)
1096		return (EINVAL);
1097	pi = sc->port[port];
1098	if (pi == NULL || pi->dev == NULL)
1099		return (ENXIO);
1100	*child = pi->dev;
1101	return (0);
1102}
1103
1104static int
1105notify_siblings(device_t dev, int detaching)
1106{
1107	device_t sibling;
1108	int error, i;
1109
1110	error = 0;
1111	for (i = 0; i < PCI_FUNCMAX; i++) {
1112		if (i == pci_get_function(dev))
1113			continue;
1114		sibling = pci_find_dbsf(pci_get_domain(dev), pci_get_bus(dev),
1115		    pci_get_slot(dev), i);
1116		if (sibling == NULL || !device_is_attached(sibling))
1117			continue;
1118		if (detaching)
1119			error = T4_DETACH_CHILD(sibling);
1120		else
1121			(void)T4_ATTACH_CHILD(sibling);
1122		if (error)
1123			break;
1124	}
1125	return (error);
1126}
1127
1128/*
1129 * Idempotent
1130 */
1131static int
1132t4_detach(device_t dev)
1133{
1134	struct adapter *sc;
1135	int rc;
1136
1137	sc = device_get_softc(dev);
1138
1139	rc = notify_siblings(dev, 1);
1140	if (rc) {
1141		device_printf(dev,
1142		    "failed to detach sibling devices: %d\n", rc);
1143		return (rc);
1144	}
1145
1146	return (t4_detach_common(dev));
1147}
1148
1149int
1150t4_detach_common(device_t dev)
1151{
1152	struct adapter *sc;
1153	struct port_info *pi;
1154	int i, rc;
1155
1156	sc = device_get_softc(dev);
1157
1158	if (sc->flags & FULL_INIT_DONE) {
1159		if (!(sc->flags & IS_VF))
1160			t4_intr_disable(sc);
1161	}
1162
1163	if (sc->cdev) {
1164		destroy_dev(sc->cdev);
1165		sc->cdev = NULL;
1166	}
1167
1168	if (device_is_attached(dev)) {
1169		rc = bus_generic_detach(dev);
1170		if (rc) {
1171			device_printf(dev,
1172			    "failed to detach child devices: %d\n", rc);
1173			return (rc);
1174		}
1175	}
1176
1177	for (i = 0; i < sc->intr_count; i++)
1178		t4_free_irq(sc, &sc->irq[i]);
1179
1180	for (i = 0; i < MAX_NPORTS; i++) {
1181		pi = sc->port[i];
1182		if (pi) {
1183			t4_free_vi(sc, sc->mbox, sc->pf, 0, pi->vi[0].viid);
1184			if (pi->dev)
1185				device_delete_child(dev, pi->dev);
1186
1187			mtx_destroy(&pi->pi_lock);
1188			free(pi->vi, M_CXGBE);
1189			free(pi->tc, M_CXGBE);
1190			free(pi, M_CXGBE);
1191		}
1192	}
1193
1194	if (sc->flags & FULL_INIT_DONE)
1195		adapter_full_uninit(sc);
1196
1197	if ((sc->flags & (IS_VF | FW_OK)) == FW_OK)
1198		t4_fw_bye(sc, sc->mbox);
1199
1200	if (sc->intr_type == INTR_MSI || sc->intr_type == INTR_MSIX)
1201		pci_release_msi(dev);
1202
1203	if (sc->regs_res)
1204		bus_release_resource(dev, SYS_RES_MEMORY, sc->regs_rid,
1205		    sc->regs_res);
1206
1207	if (sc->udbs_res)
1208		bus_release_resource(dev, SYS_RES_MEMORY, sc->udbs_rid,
1209		    sc->udbs_res);
1210
1211	if (sc->msix_res)
1212		bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_rid,
1213		    sc->msix_res);
1214
1215	if (sc->l2t)
1216		t4_free_l2t(sc->l2t);
1217
1218#ifdef TCP_OFFLOAD
1219	free(sc->sge.ofld_rxq, M_CXGBE);
1220	free(sc->sge.ofld_txq, M_CXGBE);
1221#endif
1222#ifdef DEV_NETMAP
1223	free(sc->sge.nm_rxq, M_CXGBE);
1224	free(sc->sge.nm_txq, M_CXGBE);
1225#endif
1226	free(sc->irq, M_CXGBE);
1227	free(sc->sge.rxq, M_CXGBE);
1228	free(sc->sge.txq, M_CXGBE);
1229	free(sc->sge.ctrlq, M_CXGBE);
1230	free(sc->sge.iqmap, M_CXGBE);
1231	free(sc->sge.eqmap, M_CXGBE);
1232	free(sc->tids.ftid_tab, M_CXGBE);
1233	t4_destroy_dma_tag(sc);
1234	if (mtx_initialized(&sc->sc_lock)) {
1235		sx_xlock(&t4_list_lock);
1236		SLIST_REMOVE(&t4_list, sc, adapter, link);
1237		sx_xunlock(&t4_list_lock);
1238		mtx_destroy(&sc->sc_lock);
1239	}
1240
1241	callout_drain(&sc->sfl_callout);
1242	if (mtx_initialized(&sc->tids.ftid_lock))
1243		mtx_destroy(&sc->tids.ftid_lock);
1244	if (mtx_initialized(&sc->sfl_lock))
1245		mtx_destroy(&sc->sfl_lock);
1246	if (mtx_initialized(&sc->ifp_lock))
1247		mtx_destroy(&sc->ifp_lock);
1248	if (mtx_initialized(&sc->reg_lock))
1249		mtx_destroy(&sc->reg_lock);
1250
1251	for (i = 0; i < NUM_MEMWIN; i++) {
1252		struct memwin *mw = &sc->memwin[i];
1253
1254		if (rw_initialized(&mw->mw_lock))
1255			rw_destroy(&mw->mw_lock);
1256	}
1257
1258	bzero(sc, sizeof(*sc));
1259
1260	return (0);
1261}
1262
1263static int
1264cxgbe_probe(device_t dev)
1265{
1266	char buf[128];
1267	struct port_info *pi = device_get_softc(dev);
1268
1269	snprintf(buf, sizeof(buf), "port %d", pi->port_id);
1270	device_set_desc_copy(dev, buf);
1271
1272	return (BUS_PROBE_DEFAULT);
1273}
1274
1275#define T4_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
1276    IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
1277    IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6 | IFCAP_HWSTATS)
1278#define T4_CAP_ENABLE (T4_CAP)
1279
1280static int
1281cxgbe_vi_attach(device_t dev, struct vi_info *vi)
1282{
1283	struct ifnet *ifp;
1284	struct sbuf *sb;
1285
1286	vi->xact_addr_filt = -1;
1287	callout_init(&vi->tick, 1);
1288
1289	/* Allocate an ifnet and set it up */
1290	ifp = if_alloc(IFT_ETHER);
1291	if (ifp == NULL) {
1292		device_printf(dev, "Cannot allocate ifnet\n");
1293		return (ENOMEM);
1294	}
1295	vi->ifp = ifp;
1296	ifp->if_softc = vi;
1297
1298	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1299	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1300
1301	ifp->if_init = cxgbe_init;
1302	ifp->if_ioctl = cxgbe_ioctl;
1303	ifp->if_transmit = cxgbe_transmit;
1304	ifp->if_qflush = cxgbe_qflush;
1305	ifp->if_get_counter = cxgbe_get_counter;
1306
1307	ifp->if_capabilities = T4_CAP;
1308#ifdef TCP_OFFLOAD
1309	if (vi->nofldrxq != 0)
1310		ifp->if_capabilities |= IFCAP_TOE;
1311#endif
1312	ifp->if_capenable = T4_CAP_ENABLE;
1313	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
1314	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
1315
1316	ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
1317	ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS;
1318	ifp->if_hw_tsomaxsegsize = 65536;
1319
1320	/* Initialize ifmedia for this VI */
1321	ifmedia_init(&vi->media, IFM_IMASK, cxgbe_media_change,
1322	    cxgbe_media_status);
1323	build_medialist(vi->pi, &vi->media);
1324
1325	vi->vlan_c = EVENTHANDLER_REGISTER(vlan_config, cxgbe_vlan_config, ifp,
1326	    EVENTHANDLER_PRI_ANY);
1327
1328	ether_ifattach(ifp, vi->hw_addr);
1329#ifdef DEV_NETMAP
1330	if (vi->nnmrxq != 0)
1331		cxgbe_nm_attach(vi);
1332#endif
1333	sb = sbuf_new_auto();
1334	sbuf_printf(sb, "%d txq, %d rxq (NIC)", vi->ntxq, vi->nrxq);
1335#ifdef TCP_OFFLOAD
1336	if (ifp->if_capabilities & IFCAP_TOE)
1337		sbuf_printf(sb, "; %d txq, %d rxq (TOE)",
1338		    vi->nofldtxq, vi->nofldrxq);
1339#endif
1340#ifdef DEV_NETMAP
1341	if (ifp->if_capabilities & IFCAP_NETMAP)
1342		sbuf_printf(sb, "; %d txq, %d rxq (netmap)",
1343		    vi->nnmtxq, vi->nnmrxq);
1344#endif
1345	sbuf_finish(sb);
1346	device_printf(dev, "%s\n", sbuf_data(sb));
1347	sbuf_delete(sb);
1348
1349	vi_sysctls(vi);
1350
1351	return (0);
1352}
1353
1354static int
1355cxgbe_attach(device_t dev)
1356{
1357	struct port_info *pi = device_get_softc(dev);
1358	struct vi_info *vi;
1359	int i, rc;
1360
1361	callout_init_mtx(&pi->tick, &pi->pi_lock, 0);
1362
1363	rc = cxgbe_vi_attach(dev, &pi->vi[0]);
1364	if (rc)
1365		return (rc);
1366
1367	for_each_vi(pi, i, vi) {
1368		if (i == 0)
1369			continue;
1370		vi->dev = device_add_child(dev, is_t4(pi->adapter) ?
1371		    "vcxgbe" : "vcxl", -1);
1372		if (vi->dev == NULL) {
1373			device_printf(dev, "failed to add VI %d\n", i);
1374			continue;
1375		}
1376		device_set_softc(vi->dev, vi);
1377	}
1378
1379	cxgbe_sysctls(pi);
1380
1381	bus_generic_attach(dev);
1382
1383	return (0);
1384}
1385
1386static void
1387cxgbe_vi_detach(struct vi_info *vi)
1388{
1389	struct ifnet *ifp = vi->ifp;
1390
1391	ether_ifdetach(ifp);
1392
1393	if (vi->vlan_c)
1394		EVENTHANDLER_DEREGISTER(vlan_config, vi->vlan_c);
1395
1396	/* Let detach proceed even if these fail. */
1397#ifdef DEV_NETMAP
1398	if (ifp->if_capabilities & IFCAP_NETMAP)
1399		cxgbe_nm_detach(vi);
1400#endif
1401	cxgbe_uninit_synchronized(vi);
1402	callout_drain(&vi->tick);
1403	vi_full_uninit(vi);
1404
1405	ifmedia_removeall(&vi->media);
1406	if_free(vi->ifp);
1407	vi->ifp = NULL;
1408}
1409
1410static int
1411cxgbe_detach(device_t dev)
1412{
1413	struct port_info *pi = device_get_softc(dev);
1414	struct adapter *sc = pi->adapter;
1415	int rc;
1416
1417	/* Detach the extra VIs first. */
1418	rc = bus_generic_detach(dev);
1419	if (rc)
1420		return (rc);
1421	device_delete_children(dev);
1422
1423	doom_vi(sc, &pi->vi[0]);
1424
1425	if (pi->flags & HAS_TRACEQ) {
1426		sc->traceq = -1;	/* cloner should not create ifnet */
1427		t4_tracer_port_detach(sc);
1428	}
1429
1430	cxgbe_vi_detach(&pi->vi[0]);
1431	callout_drain(&pi->tick);
1432
1433	end_synchronized_op(sc, 0);
1434
1435	return (0);
1436}
1437
1438static void
1439cxgbe_init(void *arg)
1440{
1441	struct vi_info *vi = arg;
1442	struct adapter *sc = vi->pi->adapter;
1443
1444	if (begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4init") != 0)
1445		return;
1446	cxgbe_init_synchronized(vi);
1447	end_synchronized_op(sc, 0);
1448}
1449
1450static int
1451cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data)
1452{
1453	int rc = 0, mtu, flags, can_sleep;
1454	struct vi_info *vi = ifp->if_softc;
1455	struct adapter *sc = vi->pi->adapter;
1456	struct ifreq *ifr = (struct ifreq *)data;
1457	uint32_t mask;
1458
1459	switch (cmd) {
1460	case SIOCSIFMTU:
1461		mtu = ifr->ifr_mtu;
1462		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO))
1463			return (EINVAL);
1464
1465		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4mtu");
1466		if (rc)
1467			return (rc);
1468		ifp->if_mtu = mtu;
1469		if (vi->flags & VI_INIT_DONE) {
1470			t4_update_fl_bufsize(ifp);
1471			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1472				rc = update_mac_settings(ifp, XGMAC_MTU);
1473		}
1474		end_synchronized_op(sc, 0);
1475		break;
1476
1477	case SIOCSIFFLAGS:
1478		can_sleep = 0;
1479redo_sifflags:
1480		rc = begin_synchronized_op(sc, vi,
1481		    can_sleep ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4flg");
1482		if (rc)
1483			return (rc);
1484
1485		if (ifp->if_flags & IFF_UP) {
1486			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1487				flags = vi->if_flags;
1488				if ((ifp->if_flags ^ flags) &
1489				    (IFF_PROMISC | IFF_ALLMULTI)) {
1490					if (can_sleep == 1) {
1491						end_synchronized_op(sc, 0);
1492						can_sleep = 0;
1493						goto redo_sifflags;
1494					}
1495					rc = update_mac_settings(ifp,
1496					    XGMAC_PROMISC | XGMAC_ALLMULTI);
1497				}
1498			} else {
1499				if (can_sleep == 0) {
1500					end_synchronized_op(sc, LOCK_HELD);
1501					can_sleep = 1;
1502					goto redo_sifflags;
1503				}
1504				rc = cxgbe_init_synchronized(vi);
1505			}
1506			vi->if_flags = ifp->if_flags;
1507		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1508			if (can_sleep == 0) {
1509				end_synchronized_op(sc, LOCK_HELD);
1510				can_sleep = 1;
1511				goto redo_sifflags;
1512			}
1513			rc = cxgbe_uninit_synchronized(vi);
1514		}
1515		end_synchronized_op(sc, can_sleep ? 0 : LOCK_HELD);
1516		break;
1517
1518	case SIOCADDMULTI:
1519	case SIOCDELMULTI: /* these two are called with a mutex held :-( */
1520		rc = begin_synchronized_op(sc, vi, HOLD_LOCK, "t4multi");
1521		if (rc)
1522			return (rc);
1523		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1524			rc = update_mac_settings(ifp, XGMAC_MCADDRS);
1525		end_synchronized_op(sc, LOCK_HELD);
1526		break;
1527
1528	case SIOCSIFCAP:
1529		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4cap");
1530		if (rc)
1531			return (rc);
1532
1533		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1534		if (mask & IFCAP_TXCSUM) {
1535			ifp->if_capenable ^= IFCAP_TXCSUM;
1536			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
1537
1538			if (IFCAP_TSO4 & ifp->if_capenable &&
1539			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1540				ifp->if_capenable &= ~IFCAP_TSO4;
1541				if_printf(ifp,
1542				    "tso4 disabled due to -txcsum.\n");
1543			}
1544		}
1545		if (mask & IFCAP_TXCSUM_IPV6) {
1546			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1547			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
1548
1549			if (IFCAP_TSO6 & ifp->if_capenable &&
1550			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1551				ifp->if_capenable &= ~IFCAP_TSO6;
1552				if_printf(ifp,
1553				    "tso6 disabled due to -txcsum6.\n");
1554			}
1555		}
1556		if (mask & IFCAP_RXCSUM)
1557			ifp->if_capenable ^= IFCAP_RXCSUM;
1558		if (mask & IFCAP_RXCSUM_IPV6)
1559			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1560
1561		/*
1562		 * Note that we leave CSUM_TSO alone (it is always set).  The
1563		 * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
1564		 * sending a TSO request our way, so it's sufficient to toggle
1565		 * IFCAP_TSOx only.
1566		 */
1567		if (mask & IFCAP_TSO4) {
1568			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
1569			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1570				if_printf(ifp, "enable txcsum first.\n");
1571				rc = EAGAIN;
1572				goto fail;
1573			}
1574			ifp->if_capenable ^= IFCAP_TSO4;
1575		}
1576		if (mask & IFCAP_TSO6) {
1577			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
1578			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1579				if_printf(ifp, "enable txcsum6 first.\n");
1580				rc = EAGAIN;
1581				goto fail;
1582			}
1583			ifp->if_capenable ^= IFCAP_TSO6;
1584		}
1585		if (mask & IFCAP_LRO) {
1586#if defined(INET) || defined(INET6)
1587			int i;
1588			struct sge_rxq *rxq;
1589
1590			ifp->if_capenable ^= IFCAP_LRO;
1591			for_each_rxq(vi, i, rxq) {
1592				if (ifp->if_capenable & IFCAP_LRO)
1593					rxq->iq.flags |= IQ_LRO_ENABLED;
1594				else
1595					rxq->iq.flags &= ~IQ_LRO_ENABLED;
1596			}
1597#endif
1598		}
1599#ifdef TCP_OFFLOAD
1600		if (mask & IFCAP_TOE) {
1601			int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE;
1602
1603			rc = toe_capability(vi, enable);
1604			if (rc != 0)
1605				goto fail;
1606
1607			ifp->if_capenable ^= mask;
1608		}
1609#endif
1610		if (mask & IFCAP_VLAN_HWTAGGING) {
1611			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1612			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1613				rc = update_mac_settings(ifp, XGMAC_VLANEX);
1614		}
1615		if (mask & IFCAP_VLAN_MTU) {
1616			ifp->if_capenable ^= IFCAP_VLAN_MTU;
1617
1618			/* Need to find out how to disable auto-mtu-inflation */
1619		}
1620		if (mask & IFCAP_VLAN_HWTSO)
1621			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1622		if (mask & IFCAP_VLAN_HWCSUM)
1623			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
1624
1625#ifdef VLAN_CAPABILITIES
1626		VLAN_CAPABILITIES(ifp);
1627#endif
1628fail:
1629		end_synchronized_op(sc, 0);
1630		break;
1631
1632	case SIOCSIFMEDIA:
1633	case SIOCGIFMEDIA:
1634		ifmedia_ioctl(ifp, ifr, &vi->media, cmd);
1635		break;
1636
1637	case SIOCGI2C: {
1638		struct ifi2creq i2c;
1639
1640		rc = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
1641		if (rc != 0)
1642			break;
1643		if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) {
1644			rc = EPERM;
1645			break;
1646		}
1647		if (i2c.len > sizeof(i2c.data)) {
1648			rc = EINVAL;
1649			break;
1650		}
1651		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4i2c");
1652		if (rc)
1653			return (rc);
1654		rc = -t4_i2c_rd(sc, sc->mbox, vi->pi->port_id, i2c.dev_addr,
1655		    i2c.offset, i2c.len, &i2c.data[0]);
1656		end_synchronized_op(sc, 0);
1657		if (rc == 0)
1658			rc = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
1659		break;
1660	}
1661
1662	default:
1663		rc = ether_ioctl(ifp, cmd, data);
1664	}
1665
1666	return (rc);
1667}
1668
1669static int
1670cxgbe_transmit(struct ifnet *ifp, struct mbuf *m)
1671{
1672	struct vi_info *vi = ifp->if_softc;
1673	struct port_info *pi = vi->pi;
1674	struct adapter *sc = pi->adapter;
1675	struct sge_txq *txq;
1676	void *items[1];
1677	int rc;
1678
1679	M_ASSERTPKTHDR(m);
1680	MPASS(m->m_nextpkt == NULL);	/* not quite ready for this yet */
1681
1682	if (__predict_false(pi->link_cfg.link_ok == 0)) {
1683		m_freem(m);
1684		return (ENETDOWN);
1685	}
1686
1687	rc = parse_pkt(sc, &m);
1688	if (__predict_false(rc != 0)) {
1689		MPASS(m == NULL);			/* was freed already */
1690		atomic_add_int(&pi->tx_parse_error, 1);	/* rare, atomic is ok */
1691		return (rc);
1692	}
1693
1694	/* Select a txq. */
1695	txq = &sc->sge.txq[vi->first_txq];
1696	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1697		txq += ((m->m_pkthdr.flowid % (vi->ntxq - vi->rsrv_noflowq)) +
1698		    vi->rsrv_noflowq);
1699
1700	items[0] = m;
1701	rc = mp_ring_enqueue(txq->r, items, 1, 4096);
1702	if (__predict_false(rc != 0))
1703		m_freem(m);
1704
1705	return (rc);
1706}
1707
1708static void
1709cxgbe_qflush(struct ifnet *ifp)
1710{
1711	struct vi_info *vi = ifp->if_softc;
1712	struct sge_txq *txq;
1713	int i;
1714
1715	/* queues do not exist if !VI_INIT_DONE. */
1716	if (vi->flags & VI_INIT_DONE) {
1717		for_each_txq(vi, i, txq) {
1718			TXQ_LOCK(txq);
1719			txq->eq.flags &= ~EQ_ENABLED;
1720			TXQ_UNLOCK(txq);
1721			while (!mp_ring_is_idle(txq->r)) {
1722				mp_ring_check_drainage(txq->r, 0);
1723				pause("qflush", 1);
1724			}
1725		}
1726	}
1727	if_qflush(ifp);
1728}
1729
1730static uint64_t
1731vi_get_counter(struct ifnet *ifp, ift_counter c)
1732{
1733	struct vi_info *vi = ifp->if_softc;
1734	struct fw_vi_stats_vf *s = &vi->stats;
1735
1736	vi_refresh_stats(vi->pi->adapter, vi);
1737
1738	switch (c) {
1739	case IFCOUNTER_IPACKETS:
1740		return (s->rx_bcast_frames + s->rx_mcast_frames +
1741		    s->rx_ucast_frames);
1742	case IFCOUNTER_IERRORS:
1743		return (s->rx_err_frames);
1744	case IFCOUNTER_OPACKETS:
1745		return (s->tx_bcast_frames + s->tx_mcast_frames +
1746		    s->tx_ucast_frames + s->tx_offload_frames);
1747	case IFCOUNTER_OERRORS:
1748		return (s->tx_drop_frames);
1749	case IFCOUNTER_IBYTES:
1750		return (s->rx_bcast_bytes + s->rx_mcast_bytes +
1751		    s->rx_ucast_bytes);
1752	case IFCOUNTER_OBYTES:
1753		return (s->tx_bcast_bytes + s->tx_mcast_bytes +
1754		    s->tx_ucast_bytes + s->tx_offload_bytes);
1755	case IFCOUNTER_IMCASTS:
1756		return (s->rx_mcast_frames);
1757	case IFCOUNTER_OMCASTS:
1758		return (s->tx_mcast_frames);
1759	case IFCOUNTER_OQDROPS: {
1760		uint64_t drops;
1761
1762		drops = 0;
1763		if (vi->flags & VI_INIT_DONE) {
1764			int i;
1765			struct sge_txq *txq;
1766
1767			for_each_txq(vi, i, txq)
1768				drops += counter_u64_fetch(txq->r->drops);
1769		}
1770
1771		return (drops);
1772
1773	}
1774
1775	default:
1776		return (if_get_counter_default(ifp, c));
1777	}
1778}
1779
1780uint64_t
1781cxgbe_get_counter(struct ifnet *ifp, ift_counter c)
1782{
1783	struct vi_info *vi = ifp->if_softc;
1784	struct port_info *pi = vi->pi;
1785	struct adapter *sc = pi->adapter;
1786	struct port_stats *s = &pi->stats;
1787
1788	if (pi->nvi > 1 || sc->flags & IS_VF)
1789		return (vi_get_counter(ifp, c));
1790
1791	cxgbe_refresh_stats(sc, pi);
1792
1793	switch (c) {
1794	case IFCOUNTER_IPACKETS:
1795		return (s->rx_frames);
1796
1797	case IFCOUNTER_IERRORS:
1798		return (s->rx_jabber + s->rx_runt + s->rx_too_long +
1799		    s->rx_fcs_err + s->rx_len_err);
1800
1801	case IFCOUNTER_OPACKETS:
1802		return (s->tx_frames);
1803
1804	case IFCOUNTER_OERRORS:
1805		return (s->tx_error_frames);
1806
1807	case IFCOUNTER_IBYTES:
1808		return (s->rx_octets);
1809
1810	case IFCOUNTER_OBYTES:
1811		return (s->tx_octets);
1812
1813	case IFCOUNTER_IMCASTS:
1814		return (s->rx_mcast_frames);
1815
1816	case IFCOUNTER_OMCASTS:
1817		return (s->tx_mcast_frames);
1818
1819	case IFCOUNTER_IQDROPS:
1820		return (s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 +
1821		    s->rx_ovflow3 + s->rx_trunc0 + s->rx_trunc1 + s->rx_trunc2 +
1822		    s->rx_trunc3 + pi->tnl_cong_drops);
1823
1824	case IFCOUNTER_OQDROPS: {
1825		uint64_t drops;
1826
1827		drops = s->tx_drop;
1828		if (vi->flags & VI_INIT_DONE) {
1829			int i;
1830			struct sge_txq *txq;
1831
1832			for_each_txq(vi, i, txq)
1833				drops += counter_u64_fetch(txq->r->drops);
1834		}
1835
1836		return (drops);
1837
1838	}
1839
1840	default:
1841		return (if_get_counter_default(ifp, c));
1842	}
1843}
1844
1845static int
1846cxgbe_media_change(struct ifnet *ifp)
1847{
1848	struct vi_info *vi = ifp->if_softc;
1849
1850	device_printf(vi->dev, "%s unimplemented.\n", __func__);
1851
1852	return (EOPNOTSUPP);
1853}
1854
1855static void
1856cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1857{
1858	struct vi_info *vi = ifp->if_softc;
1859	struct port_info *pi = vi->pi;
1860	struct ifmedia_entry *cur;
1861	int speed = pi->link_cfg.speed;
1862
1863	cur = vi->media.ifm_cur;
1864
1865	ifmr->ifm_status = IFM_AVALID;
1866	if (!pi->link_cfg.link_ok)
1867		return;
1868
1869	ifmr->ifm_status |= IFM_ACTIVE;
1870
1871	/* active and current will differ iff current media is autoselect. */
1872	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
1873		return;
1874
1875	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
1876	if (speed == 10000)
1877		ifmr->ifm_active |= IFM_10G_T;
1878	else if (speed == 1000)
1879		ifmr->ifm_active |= IFM_1000_T;
1880	else if (speed == 100)
1881		ifmr->ifm_active |= IFM_100_TX;
1882	else if (speed == 10)
1883		ifmr->ifm_active |= IFM_10_T;
1884	else
1885		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
1886			    speed));
1887}
1888
1889static int
1890vcxgbe_probe(device_t dev)
1891{
1892	char buf[128];
1893	struct vi_info *vi = device_get_softc(dev);
1894
1895	snprintf(buf, sizeof(buf), "port %d vi %td", vi->pi->port_id,
1896	    vi - vi->pi->vi);
1897	device_set_desc_copy(dev, buf);
1898
1899	return (BUS_PROBE_DEFAULT);
1900}
1901
1902static int
1903vcxgbe_attach(device_t dev)
1904{
1905	struct vi_info *vi;
1906	struct port_info *pi;
1907	struct adapter *sc;
1908	int func, index, rc;
1909	u32 param, val;
1910
1911	vi = device_get_softc(dev);
1912	pi = vi->pi;
1913	sc = pi->adapter;
1914
1915	index = vi - pi->vi;
1916	KASSERT(index < nitems(vi_mac_funcs),
1917	    ("%s: VI %s doesn't have a MAC func", __func__,
1918	    device_get_nameunit(dev)));
1919	func = vi_mac_funcs[index];
1920	rc = t4_alloc_vi_func(sc, sc->mbox, pi->tx_chan, sc->pf, 0, 1,
1921	    vi->hw_addr, &vi->rss_size, func, 0);
1922	if (rc < 0) {
1923		device_printf(dev, "Failed to allocate virtual interface "
1924		    "for port %d: %d\n", pi->port_id, -rc);
1925		return (-rc);
1926	}
1927	vi->viid = rc;
1928
1929	param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
1930	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_RSSINFO) |
1931	    V_FW_PARAMS_PARAM_YZ(vi->viid);
1932	rc = t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
1933	if (rc)
1934		vi->rss_base = 0xffff;
1935	else {
1936		/* MPASS((val >> 16) == rss_size); */
1937		vi->rss_base = val & 0xffff;
1938	}
1939
1940	rc = cxgbe_vi_attach(dev, vi);
1941	if (rc) {
1942		t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
1943		return (rc);
1944	}
1945	return (0);
1946}
1947
1948static int
1949vcxgbe_detach(device_t dev)
1950{
1951	struct vi_info *vi;
1952	struct adapter *sc;
1953
1954	vi = device_get_softc(dev);
1955	sc = vi->pi->adapter;
1956
1957	doom_vi(sc, vi);
1958
1959	cxgbe_vi_detach(vi);
1960	t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
1961
1962	end_synchronized_op(sc, 0);
1963
1964	return (0);
1965}
1966
1967void
1968t4_fatal_err(struct adapter *sc)
1969{
1970	t4_set_reg_field(sc, A_SGE_CONTROL, F_GLOBALENABLE, 0);
1971	t4_intr_disable(sc);
1972	log(LOG_EMERG, "%s: encountered fatal error, adapter stopped.\n",
1973	    device_get_nameunit(sc->dev));
1974}
1975
1976void
1977t4_add_adapter(struct adapter *sc)
1978{
1979	sx_xlock(&t4_list_lock);
1980	SLIST_INSERT_HEAD(&t4_list, sc, link);
1981	sx_xunlock(&t4_list_lock);
1982}
1983
1984int
1985t4_map_bars_0_and_4(struct adapter *sc)
1986{
1987	sc->regs_rid = PCIR_BAR(0);
1988	sc->regs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
1989	    &sc->regs_rid, RF_ACTIVE);
1990	if (sc->regs_res == NULL) {
1991		device_printf(sc->dev, "cannot map registers.\n");
1992		return (ENXIO);
1993	}
1994	sc->bt = rman_get_bustag(sc->regs_res);
1995	sc->bh = rman_get_bushandle(sc->regs_res);
1996	sc->mmio_len = rman_get_size(sc->regs_res);
1997	setbit(&sc->doorbells, DOORBELL_KDB);
1998
1999	sc->msix_rid = PCIR_BAR(4);
2000	sc->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2001	    &sc->msix_rid, RF_ACTIVE);
2002	if (sc->msix_res == NULL) {
2003		device_printf(sc->dev, "cannot map MSI-X BAR.\n");
2004		return (ENXIO);
2005	}
2006
2007	return (0);
2008}
2009
2010int
2011t4_map_bar_2(struct adapter *sc)
2012{
2013
2014	/*
2015	 * T4: only iWARP driver uses the userspace doorbells.  There is no need
2016	 * to map it if RDMA is disabled.
2017	 */
2018	if (is_t4(sc) && sc->rdmacaps == 0)
2019		return (0);
2020
2021	sc->udbs_rid = PCIR_BAR(2);
2022	sc->udbs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2023	    &sc->udbs_rid, RF_ACTIVE);
2024	if (sc->udbs_res == NULL) {
2025		device_printf(sc->dev, "cannot map doorbell BAR.\n");
2026		return (ENXIO);
2027	}
2028	sc->udbs_base = rman_get_virtual(sc->udbs_res);
2029
2030	if (is_t5(sc)) {
2031		setbit(&sc->doorbells, DOORBELL_UDB);
2032#if defined(__i386__) || defined(__amd64__)
2033		if (t5_write_combine) {
2034			int rc;
2035
2036			/*
2037			 * Enable write combining on BAR2.  This is the
2038			 * userspace doorbell BAR and is split into 128B
2039			 * (UDBS_SEG_SIZE) doorbell regions, each associated
2040			 * with an egress queue.  The first 64B has the doorbell
2041			 * and the second 64B can be used to submit a tx work
2042			 * request with an implicit doorbell.
2043			 */
2044
2045			rc = pmap_change_attr((vm_offset_t)sc->udbs_base,
2046			    rman_get_size(sc->udbs_res), PAT_WRITE_COMBINING);
2047			if (rc == 0) {
2048				clrbit(&sc->doorbells, DOORBELL_UDB);
2049				setbit(&sc->doorbells, DOORBELL_WCWR);
2050				setbit(&sc->doorbells, DOORBELL_UDBWC);
2051			} else {
2052				device_printf(sc->dev,
2053				    "couldn't enable write combining: %d\n",
2054				    rc);
2055			}
2056
2057			t4_write_reg(sc, A_SGE_STAT_CFG,
2058			    V_STATSOURCE_T5(7) | V_STATMODE(0));
2059		}
2060#endif
2061	}
2062
2063	return (0);
2064}
2065
2066struct memwin_init {
2067	uint32_t base;
2068	uint32_t aperture;
2069};
2070
2071static const struct memwin_init t4_memwin[NUM_MEMWIN] = {
2072	{ MEMWIN0_BASE, MEMWIN0_APERTURE },
2073	{ MEMWIN1_BASE, MEMWIN1_APERTURE },
2074	{ MEMWIN2_BASE_T4, MEMWIN2_APERTURE_T4 }
2075};
2076
2077static const struct memwin_init t5_memwin[NUM_MEMWIN] = {
2078	{ MEMWIN0_BASE, MEMWIN0_APERTURE },
2079	{ MEMWIN1_BASE, MEMWIN1_APERTURE },
2080	{ MEMWIN2_BASE_T5, MEMWIN2_APERTURE_T5 },
2081};
2082
2083static void
2084setup_memwin(struct adapter *sc)
2085{
2086	const struct memwin_init *mw_init;
2087	struct memwin *mw;
2088	int i;
2089	uint32_t bar0;
2090
2091	if (is_t4(sc)) {
2092		/*
2093		 * Read low 32b of bar0 indirectly via the hardware backdoor
2094		 * mechanism.  Works from within PCI passthrough environments
2095		 * too, where rman_get_start() can return a different value.  We
2096		 * need to program the T4 memory window decoders with the actual
2097		 * addresses that will be coming across the PCIe link.
2098		 */
2099		bar0 = t4_hw_pci_read_cfg4(sc, PCIR_BAR(0));
2100		bar0 &= (uint32_t) PCIM_BAR_MEM_BASE;
2101
2102		mw_init = &t4_memwin[0];
2103	} else {
2104		/* T5+ use the relative offset inside the PCIe BAR */
2105		bar0 = 0;
2106
2107		mw_init = &t5_memwin[0];
2108	}
2109
2110	for (i = 0, mw = &sc->memwin[0]; i < NUM_MEMWIN; i++, mw_init++, mw++) {
2111		rw_init(&mw->mw_lock, "memory window access");
2112		mw->mw_base = mw_init->base;
2113		mw->mw_aperture = mw_init->aperture;
2114		mw->mw_curpos = 0;
2115		t4_write_reg(sc,
2116		    PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, i),
2117		    (mw->mw_base + bar0) | V_BIR(0) |
2118		    V_WINDOW(ilog2(mw->mw_aperture) - 10));
2119		rw_wlock(&mw->mw_lock);
2120		position_memwin(sc, i, 0);
2121		rw_wunlock(&mw->mw_lock);
2122	}
2123
2124	/* flush */
2125	t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 2));
2126}
2127
2128/*
2129 * Positions the memory window at the given address in the card's address space.
2130 * There are some alignment requirements and the actual position may be at an
2131 * address prior to the requested address.  mw->mw_curpos always has the actual
2132 * position of the window.
2133 */
2134static void
2135position_memwin(struct adapter *sc, int idx, uint32_t addr)
2136{
2137	struct memwin *mw;
2138	uint32_t pf;
2139	uint32_t reg;
2140
2141	MPASS(idx >= 0 && idx < NUM_MEMWIN);
2142	mw = &sc->memwin[idx];
2143	rw_assert(&mw->mw_lock, RA_WLOCKED);
2144
2145	if (is_t4(sc)) {
2146		pf = 0;
2147		mw->mw_curpos = addr & ~0xf;	/* start must be 16B aligned */
2148	} else {
2149		pf = V_PFNUM(sc->pf);
2150		mw->mw_curpos = addr & ~0x7f;	/* start must be 128B aligned */
2151	}
2152	reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, idx);
2153	t4_write_reg(sc, reg, mw->mw_curpos | pf);
2154	t4_read_reg(sc, reg);	/* flush */
2155}
2156
2157static int
2158rw_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val,
2159    int len, int rw)
2160{
2161	struct memwin *mw;
2162	uint32_t mw_end, v;
2163
2164	MPASS(idx >= 0 && idx < NUM_MEMWIN);
2165
2166	/* Memory can only be accessed in naturally aligned 4 byte units */
2167	if (addr & 3 || len & 3 || len <= 0)
2168		return (EINVAL);
2169
2170	mw = &sc->memwin[idx];
2171	while (len > 0) {
2172		rw_rlock(&mw->mw_lock);
2173		mw_end = mw->mw_curpos + mw->mw_aperture;
2174		if (addr >= mw_end || addr < mw->mw_curpos) {
2175			/* Will need to reposition the window */
2176			if (!rw_try_upgrade(&mw->mw_lock)) {
2177				rw_runlock(&mw->mw_lock);
2178				rw_wlock(&mw->mw_lock);
2179			}
2180			rw_assert(&mw->mw_lock, RA_WLOCKED);
2181			position_memwin(sc, idx, addr);
2182			rw_downgrade(&mw->mw_lock);
2183			mw_end = mw->mw_curpos + mw->mw_aperture;
2184		}
2185		rw_assert(&mw->mw_lock, RA_RLOCKED);
2186		while (addr < mw_end && len > 0) {
2187			if (rw == 0) {
2188				v = t4_read_reg(sc, mw->mw_base + addr -
2189				    mw->mw_curpos);
2190				*val++ = le32toh(v);
2191			} else {
2192				v = *val++;
2193				t4_write_reg(sc, mw->mw_base + addr -
2194				    mw->mw_curpos, htole32(v));
2195			}
2196			addr += 4;
2197			len -= 4;
2198		}
2199		rw_runlock(&mw->mw_lock);
2200	}
2201
2202	return (0);
2203}
2204
2205static inline int
2206read_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val,
2207    int len)
2208{
2209
2210	return (rw_via_memwin(sc, idx, addr, val, len, 0));
2211}
2212
2213static inline int
2214write_via_memwin(struct adapter *sc, int idx, uint32_t addr,
2215    const uint32_t *val, int len)
2216{
2217
2218	return (rw_via_memwin(sc, idx, addr, (void *)(uintptr_t)val, len, 1));
2219}
2220
2221static int
2222t4_range_cmp(const void *a, const void *b)
2223{
2224	return ((const struct t4_range *)a)->start -
2225	       ((const struct t4_range *)b)->start;
2226}
2227
2228/*
2229 * Verify that the memory range specified by the addr/len pair is valid within
2230 * the card's address space.
2231 */
2232static int
2233validate_mem_range(struct adapter *sc, uint32_t addr, int len)
2234{
2235	struct t4_range mem_ranges[4], *r, *next;
2236	uint32_t em, addr_len;
2237	int i, n, remaining;
2238
2239	/* Memory can only be accessed in naturally aligned 4 byte units */
2240	if (addr & 3 || len & 3 || len <= 0)
2241		return (EINVAL);
2242
2243	/* Enabled memories */
2244	em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
2245
2246	r = &mem_ranges[0];
2247	n = 0;
2248	bzero(r, sizeof(mem_ranges));
2249	if (em & F_EDRAM0_ENABLE) {
2250		addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR);
2251		r->size = G_EDRAM0_SIZE(addr_len) << 20;
2252		if (r->size > 0) {
2253			r->start = G_EDRAM0_BASE(addr_len) << 20;
2254			if (addr >= r->start &&
2255			    addr + len <= r->start + r->size)
2256				return (0);
2257			r++;
2258			n++;
2259		}
2260	}
2261	if (em & F_EDRAM1_ENABLE) {
2262		addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR);
2263		r->size = G_EDRAM1_SIZE(addr_len) << 20;
2264		if (r->size > 0) {
2265			r->start = G_EDRAM1_BASE(addr_len) << 20;
2266			if (addr >= r->start &&
2267			    addr + len <= r->start + r->size)
2268				return (0);
2269			r++;
2270			n++;
2271		}
2272	}
2273	if (em & F_EXT_MEM_ENABLE) {
2274		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
2275		r->size = G_EXT_MEM_SIZE(addr_len) << 20;
2276		if (r->size > 0) {
2277			r->start = G_EXT_MEM_BASE(addr_len) << 20;
2278			if (addr >= r->start &&
2279			    addr + len <= r->start + r->size)
2280				return (0);
2281			r++;
2282			n++;
2283		}
2284	}
2285	if (is_t5(sc) && em & F_EXT_MEM1_ENABLE) {
2286		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
2287		r->size = G_EXT_MEM1_SIZE(addr_len) << 20;
2288		if (r->size > 0) {
2289			r->start = G_EXT_MEM1_BASE(addr_len) << 20;
2290			if (addr >= r->start &&
2291			    addr + len <= r->start + r->size)
2292				return (0);
2293			r++;
2294			n++;
2295		}
2296	}
2297	MPASS(n <= nitems(mem_ranges));
2298
2299	if (n > 1) {
2300		/* Sort and merge the ranges. */
2301		qsort(mem_ranges, n, sizeof(struct t4_range), t4_range_cmp);
2302
2303		/* Start from index 0 and examine the next n - 1 entries. */
2304		r = &mem_ranges[0];
2305		for (remaining = n - 1; remaining > 0; remaining--, r++) {
2306
2307			MPASS(r->size > 0);	/* r is a valid entry. */
2308			next = r + 1;
2309			MPASS(next->size > 0);	/* and so is the next one. */
2310
2311			while (r->start + r->size >= next->start) {
2312				/* Merge the next one into the current entry. */
2313				r->size = max(r->start + r->size,
2314				    next->start + next->size) - r->start;
2315				n--;	/* One fewer entry in total. */
2316				if (--remaining == 0)
2317					goto done;	/* short circuit */
2318				next++;
2319			}
2320			if (next != r + 1) {
2321				/*
2322				 * Some entries were merged into r and next
2323				 * points to the first valid entry that couldn't
2324				 * be merged.
2325				 */
2326				MPASS(next->size > 0);	/* must be valid */
2327				memcpy(r + 1, next, remaining * sizeof(*r));
2328#ifdef INVARIANTS
2329				/*
2330				 * This so that the foo->size assertion in the
2331				 * next iteration of the loop do the right
2332				 * thing for entries that were pulled up and are
2333				 * no longer valid.
2334				 */
2335				MPASS(n < nitems(mem_ranges));
2336				bzero(&mem_ranges[n], (nitems(mem_ranges) - n) *
2337				    sizeof(struct t4_range));
2338#endif
2339			}
2340		}
2341done:
2342		/* Done merging the ranges. */
2343		MPASS(n > 0);
2344		r = &mem_ranges[0];
2345		for (i = 0; i < n; i++, r++) {
2346			if (addr >= r->start &&
2347			    addr + len <= r->start + r->size)
2348				return (0);
2349		}
2350	}
2351
2352	return (EFAULT);
2353}
2354
2355static int
2356fwmtype_to_hwmtype(int mtype)
2357{
2358
2359	switch (mtype) {
2360	case FW_MEMTYPE_EDC0:
2361		return (MEM_EDC0);
2362	case FW_MEMTYPE_EDC1:
2363		return (MEM_EDC1);
2364	case FW_MEMTYPE_EXTMEM:
2365		return (MEM_MC0);
2366	case FW_MEMTYPE_EXTMEM1:
2367		return (MEM_MC1);
2368	default:
2369		panic("%s: cannot translate fw mtype %d.", __func__, mtype);
2370	}
2371}
2372
2373/*
2374 * Verify that the memory range specified by the memtype/offset/len pair is
2375 * valid and lies entirely within the memtype specified.  The global address of
2376 * the start of the range is returned in addr.
2377 */
2378static int
2379validate_mt_off_len(struct adapter *sc, int mtype, uint32_t off, int len,
2380    uint32_t *addr)
2381{
2382	uint32_t em, addr_len, maddr;
2383
2384	/* Memory can only be accessed in naturally aligned 4 byte units */
2385	if (off & 3 || len & 3 || len == 0)
2386		return (EINVAL);
2387
2388	em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
2389	switch (fwmtype_to_hwmtype(mtype)) {
2390	case MEM_EDC0:
2391		if (!(em & F_EDRAM0_ENABLE))
2392			return (EINVAL);
2393		addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR);
2394		maddr = G_EDRAM0_BASE(addr_len) << 20;
2395		break;
2396	case MEM_EDC1:
2397		if (!(em & F_EDRAM1_ENABLE))
2398			return (EINVAL);
2399		addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR);
2400		maddr = G_EDRAM1_BASE(addr_len) << 20;
2401		break;
2402	case MEM_MC:
2403		if (!(em & F_EXT_MEM_ENABLE))
2404			return (EINVAL);
2405		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
2406		maddr = G_EXT_MEM_BASE(addr_len) << 20;
2407		break;
2408	case MEM_MC1:
2409		if (!is_t5(sc) || !(em & F_EXT_MEM1_ENABLE))
2410			return (EINVAL);
2411		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
2412		maddr = G_EXT_MEM1_BASE(addr_len) << 20;
2413		break;
2414	default:
2415		return (EINVAL);
2416	}
2417
2418	*addr = maddr + off;	/* global address */
2419	return (validate_mem_range(sc, *addr, len));
2420}
2421
2422static int
2423fixup_devlog_params(struct adapter *sc)
2424{
2425	struct devlog_params *dparams = &sc->params.devlog;
2426	int rc;
2427
2428	rc = validate_mt_off_len(sc, dparams->memtype, dparams->start,
2429	    dparams->size, &dparams->addr);
2430
2431	return (rc);
2432}
2433
2434static int
2435cfg_itype_and_nqueues(struct adapter *sc, int n10g, int n1g, int num_vis,
2436    struct intrs_and_queues *iaq)
2437{
2438	int rc, itype, navail, nrxq10g, nrxq1g, n;
2439	int nofldrxq10g = 0, nofldrxq1g = 0;
2440
2441	bzero(iaq, sizeof(*iaq));
2442
2443	iaq->ntxq10g = t4_ntxq10g;
2444	iaq->ntxq1g = t4_ntxq1g;
2445	iaq->ntxq_vi = t4_ntxq_vi;
2446	iaq->nrxq10g = nrxq10g = t4_nrxq10g;
2447	iaq->nrxq1g = nrxq1g = t4_nrxq1g;
2448	iaq->nrxq_vi = t4_nrxq_vi;
2449	iaq->rsrv_noflowq = t4_rsrv_noflowq;
2450#ifdef TCP_OFFLOAD
2451	if (is_offload(sc)) {
2452		iaq->nofldtxq10g = t4_nofldtxq10g;
2453		iaq->nofldtxq1g = t4_nofldtxq1g;
2454		iaq->nofldtxq_vi = t4_nofldtxq_vi;
2455		iaq->nofldrxq10g = nofldrxq10g = t4_nofldrxq10g;
2456		iaq->nofldrxq1g = nofldrxq1g = t4_nofldrxq1g;
2457		iaq->nofldrxq_vi = t4_nofldrxq_vi;
2458	}
2459#endif
2460#ifdef DEV_NETMAP
2461	iaq->nnmtxq_vi = t4_nnmtxq_vi;
2462	iaq->nnmrxq_vi = t4_nnmrxq_vi;
2463#endif
2464
2465	for (itype = INTR_MSIX; itype; itype >>= 1) {
2466
2467		if ((itype & t4_intr_types) == 0)
2468			continue;	/* not allowed */
2469
2470		if (itype == INTR_MSIX)
2471			navail = pci_msix_count(sc->dev);
2472		else if (itype == INTR_MSI)
2473			navail = pci_msi_count(sc->dev);
2474		else
2475			navail = 1;
2476restart:
2477		if (navail == 0)
2478			continue;
2479
2480		iaq->intr_type = itype;
2481		iaq->intr_flags_10g = 0;
2482		iaq->intr_flags_1g = 0;
2483
2484		/*
2485		 * Best option: an interrupt vector for errors, one for the
2486		 * firmware event queue, and one for every rxq (NIC and TOE) of
2487		 * every VI.  The VIs that support netmap use the same
2488		 * interrupts for the NIC rx queues and the netmap rx queues
2489		 * because only one set of queues is active at a time.
2490		 */
2491		iaq->nirq = T4_EXTRA_INTR;
2492		iaq->nirq += n10g * (nrxq10g + nofldrxq10g);
2493		iaq->nirq += n1g * (nrxq1g + nofldrxq1g);
2494		iaq->nirq += (n10g + n1g) * (num_vis - 1) *
2495		    max(iaq->nrxq_vi, iaq->nnmrxq_vi);	/* See comment above. */
2496		iaq->nirq += (n10g + n1g) * (num_vis - 1) * iaq->nofldrxq_vi;
2497		if (iaq->nirq <= navail &&
2498		    (itype != INTR_MSI || powerof2(iaq->nirq))) {
2499			iaq->intr_flags_10g = INTR_ALL;
2500			iaq->intr_flags_1g = INTR_ALL;
2501			goto allocate;
2502		}
2503
2504		/* Disable the VIs (and netmap) if there aren't enough intrs */
2505		if (num_vis > 1) {
2506			device_printf(sc->dev, "virtual interfaces disabled "
2507			    "because num_vis=%u with current settings "
2508			    "(nrxq10g=%u, nrxq1g=%u, nofldrxq10g=%u, "
2509			    "nofldrxq1g=%u, nrxq_vi=%u nofldrxq_vi=%u, "
2510			    "nnmrxq_vi=%u) would need %u interrupts but "
2511			    "only %u are available.\n", num_vis, nrxq10g,
2512			    nrxq1g, nofldrxq10g, nofldrxq1g, iaq->nrxq_vi,
2513			    iaq->nofldrxq_vi, iaq->nnmrxq_vi, iaq->nirq,
2514			    navail);
2515			num_vis = 1;
2516			iaq->ntxq_vi = iaq->nrxq_vi = 0;
2517			iaq->nofldtxq_vi = iaq->nofldrxq_vi = 0;
2518			iaq->nnmtxq_vi = iaq->nnmrxq_vi = 0;
2519			goto restart;
2520		}
2521
2522		/*
2523		 * Second best option: a vector for errors, one for the firmware
2524		 * event queue, and vectors for either all the NIC rx queues or
2525		 * all the TOE rx queues.  The queues that don't get vectors
2526		 * will forward their interrupts to those that do.
2527		 */
2528		iaq->nirq = T4_EXTRA_INTR;
2529		if (nrxq10g >= nofldrxq10g) {
2530			iaq->intr_flags_10g = INTR_RXQ;
2531			iaq->nirq += n10g * nrxq10g;
2532		} else {
2533			iaq->intr_flags_10g = INTR_OFLD_RXQ;
2534			iaq->nirq += n10g * nofldrxq10g;
2535		}
2536		if (nrxq1g >= nofldrxq1g) {
2537			iaq->intr_flags_1g = INTR_RXQ;
2538			iaq->nirq += n1g * nrxq1g;
2539		} else {
2540			iaq->intr_flags_1g = INTR_OFLD_RXQ;
2541			iaq->nirq += n1g * nofldrxq1g;
2542		}
2543		if (iaq->nirq <= navail &&
2544		    (itype != INTR_MSI || powerof2(iaq->nirq)))
2545			goto allocate;
2546
2547		/*
2548		 * Next best option: an interrupt vector for errors, one for the
2549		 * firmware event queue, and at least one per main-VI.  At this
2550		 * point we know we'll have to downsize nrxq and/or nofldrxq to
2551		 * fit what's available to us.
2552		 */
2553		iaq->nirq = T4_EXTRA_INTR;
2554		iaq->nirq += n10g + n1g;
2555		if (iaq->nirq <= navail) {
2556			int leftover = navail - iaq->nirq;
2557
2558			if (n10g > 0) {
2559				int target = max(nrxq10g, nofldrxq10g);
2560
2561				iaq->intr_flags_10g = nrxq10g >= nofldrxq10g ?
2562				    INTR_RXQ : INTR_OFLD_RXQ;
2563
2564				n = 1;
2565				while (n < target && leftover >= n10g) {
2566					leftover -= n10g;
2567					iaq->nirq += n10g;
2568					n++;
2569				}
2570				iaq->nrxq10g = min(n, nrxq10g);
2571#ifdef TCP_OFFLOAD
2572				iaq->nofldrxq10g = min(n, nofldrxq10g);
2573#endif
2574			}
2575
2576			if (n1g > 0) {
2577				int target = max(nrxq1g, nofldrxq1g);
2578
2579				iaq->intr_flags_1g = nrxq1g >= nofldrxq1g ?
2580				    INTR_RXQ : INTR_OFLD_RXQ;
2581
2582				n = 1;
2583				while (n < target && leftover >= n1g) {
2584					leftover -= n1g;
2585					iaq->nirq += n1g;
2586					n++;
2587				}
2588				iaq->nrxq1g = min(n, nrxq1g);
2589#ifdef TCP_OFFLOAD
2590				iaq->nofldrxq1g = min(n, nofldrxq1g);
2591#endif
2592			}
2593
2594			if (itype != INTR_MSI || powerof2(iaq->nirq))
2595				goto allocate;
2596		}
2597
2598		/*
2599		 * Least desirable option: one interrupt vector for everything.
2600		 */
2601		iaq->nirq = iaq->nrxq10g = iaq->nrxq1g = 1;
2602		iaq->intr_flags_10g = iaq->intr_flags_1g = 0;
2603#ifdef TCP_OFFLOAD
2604		if (is_offload(sc))
2605			iaq->nofldrxq10g = iaq->nofldrxq1g = 1;
2606#endif
2607allocate:
2608		navail = iaq->nirq;
2609		rc = 0;
2610		if (itype == INTR_MSIX)
2611			rc = pci_alloc_msix(sc->dev, &navail);
2612		else if (itype == INTR_MSI)
2613			rc = pci_alloc_msi(sc->dev, &navail);
2614
2615		if (rc == 0) {
2616			if (navail == iaq->nirq)
2617				return (0);
2618
2619			/*
2620			 * Didn't get the number requested.  Use whatever number
2621			 * the kernel is willing to allocate (it's in navail).
2622			 */
2623			device_printf(sc->dev, "fewer vectors than requested, "
2624			    "type=%d, req=%d, rcvd=%d; will downshift req.\n",
2625			    itype, iaq->nirq, navail);
2626			pci_release_msi(sc->dev);
2627			goto restart;
2628		}
2629
2630		device_printf(sc->dev,
2631		    "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n",
2632		    itype, rc, iaq->nirq, navail);
2633	}
2634
2635	device_printf(sc->dev,
2636	    "failed to find a usable interrupt type.  "
2637	    "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types,
2638	    pci_msix_count(sc->dev), pci_msi_count(sc->dev));
2639
2640	return (ENXIO);
2641}
2642
2643#define FW_VERSION(chip) ( \
2644    V_FW_HDR_FW_VER_MAJOR(chip##FW_VERSION_MAJOR) | \
2645    V_FW_HDR_FW_VER_MINOR(chip##FW_VERSION_MINOR) | \
2646    V_FW_HDR_FW_VER_MICRO(chip##FW_VERSION_MICRO) | \
2647    V_FW_HDR_FW_VER_BUILD(chip##FW_VERSION_BUILD))
2648#define FW_INTFVER(chip, intf) (chip##FW_HDR_INTFVER_##intf)
2649
2650struct fw_info {
2651	uint8_t chip;
2652	char *kld_name;
2653	char *fw_mod_name;
2654	struct fw_hdr fw_hdr;	/* XXX: waste of space, need a sparse struct */
2655} fw_info[] = {
2656	{
2657		.chip = CHELSIO_T4,
2658		.kld_name = "t4fw_cfg",
2659		.fw_mod_name = "t4fw",
2660		.fw_hdr = {
2661			.chip = FW_HDR_CHIP_T4,
2662			.fw_ver = htobe32_const(FW_VERSION(T4)),
2663			.intfver_nic = FW_INTFVER(T4, NIC),
2664			.intfver_vnic = FW_INTFVER(T4, VNIC),
2665			.intfver_ofld = FW_INTFVER(T4, OFLD),
2666			.intfver_ri = FW_INTFVER(T4, RI),
2667			.intfver_iscsipdu = FW_INTFVER(T4, ISCSIPDU),
2668			.intfver_iscsi = FW_INTFVER(T4, ISCSI),
2669			.intfver_fcoepdu = FW_INTFVER(T4, FCOEPDU),
2670			.intfver_fcoe = FW_INTFVER(T4, FCOE),
2671		},
2672	}, {
2673		.chip = CHELSIO_T5,
2674		.kld_name = "t5fw_cfg",
2675		.fw_mod_name = "t5fw",
2676		.fw_hdr = {
2677			.chip = FW_HDR_CHIP_T5,
2678			.fw_ver = htobe32_const(FW_VERSION(T5)),
2679			.intfver_nic = FW_INTFVER(T5, NIC),
2680			.intfver_vnic = FW_INTFVER(T5, VNIC),
2681			.intfver_ofld = FW_INTFVER(T5, OFLD),
2682			.intfver_ri = FW_INTFVER(T5, RI),
2683			.intfver_iscsipdu = FW_INTFVER(T5, ISCSIPDU),
2684			.intfver_iscsi = FW_INTFVER(T5, ISCSI),
2685			.intfver_fcoepdu = FW_INTFVER(T5, FCOEPDU),
2686			.intfver_fcoe = FW_INTFVER(T5, FCOE),
2687		},
2688	}
2689};
2690
2691static struct fw_info *
2692find_fw_info(int chip)
2693{
2694	int i;
2695
2696	for (i = 0; i < nitems(fw_info); i++) {
2697		if (fw_info[i].chip == chip)
2698			return (&fw_info[i]);
2699	}
2700	return (NULL);
2701}
2702
2703/*
2704 * Is the given firmware API compatible with the one the driver was compiled
2705 * with?
2706 */
2707static int
2708fw_compatible(const struct fw_hdr *hdr1, const struct fw_hdr *hdr2)
2709{
2710
2711	/* short circuit if it's the exact same firmware version */
2712	if (hdr1->chip == hdr2->chip && hdr1->fw_ver == hdr2->fw_ver)
2713		return (1);
2714
2715	/*
2716	 * XXX: Is this too conservative?  Perhaps I should limit this to the
2717	 * features that are supported in the driver.
2718	 */
2719#define SAME_INTF(x) (hdr1->intfver_##x == hdr2->intfver_##x)
2720	if (hdr1->chip == hdr2->chip && SAME_INTF(nic) && SAME_INTF(vnic) &&
2721	    SAME_INTF(ofld) && SAME_INTF(ri) && SAME_INTF(iscsipdu) &&
2722	    SAME_INTF(iscsi) && SAME_INTF(fcoepdu) && SAME_INTF(fcoe))
2723		return (1);
2724#undef SAME_INTF
2725
2726	return (0);
2727}
2728
2729/*
2730 * The firmware in the KLD is usable, but should it be installed?  This routine
2731 * explains itself in detail if it indicates the KLD firmware should be
2732 * installed.
2733 */
2734static int
2735should_install_kld_fw(struct adapter *sc, int card_fw_usable, int k, int c)
2736{
2737	const char *reason;
2738
2739	if (!card_fw_usable) {
2740		reason = "incompatible or unusable";
2741		goto install;
2742	}
2743
2744	if (k > c) {
2745		reason = "older than the version bundled with this driver";
2746		goto install;
2747	}
2748
2749	if (t4_fw_install == 2 && k != c) {
2750		reason = "different than the version bundled with this driver";
2751		goto install;
2752	}
2753
2754	return (0);
2755
2756install:
2757	if (t4_fw_install == 0) {
2758		device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
2759		    "but the driver is prohibited from installing a different "
2760		    "firmware on the card.\n",
2761		    G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
2762		    G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason);
2763
2764		return (0);
2765	}
2766
2767	device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
2768	    "installing firmware %u.%u.%u.%u on card.\n",
2769	    G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
2770	    G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason,
2771	    G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k),
2772	    G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k));
2773
2774	return (1);
2775}
2776/*
2777 * Establish contact with the firmware and determine if we are the master driver
2778 * or not, and whether we are responsible for chip initialization.
2779 */
2780static int
2781prep_firmware(struct adapter *sc)
2782{
2783	const struct firmware *fw = NULL, *default_cfg;
2784	int rc, pf, card_fw_usable, kld_fw_usable, need_fw_reset = 1;
2785	enum dev_state state;
2786	struct fw_info *fw_info;
2787	struct fw_hdr *card_fw;		/* fw on the card */
2788	const struct fw_hdr *kld_fw;	/* fw in the KLD */
2789	const struct fw_hdr *drv_fw;	/* fw header the driver was compiled
2790					   against */
2791
2792	/* Contact firmware. */
2793	rc = t4_fw_hello(sc, sc->mbox, sc->mbox, MASTER_MAY, &state);
2794	if (rc < 0 || state == DEV_STATE_ERR) {
2795		rc = -rc;
2796		device_printf(sc->dev,
2797		    "failed to connect to the firmware: %d, %d.\n", rc, state);
2798		return (rc);
2799	}
2800	pf = rc;
2801	if (pf == sc->mbox)
2802		sc->flags |= MASTER_PF;
2803	else if (state == DEV_STATE_UNINIT) {
2804		/*
2805		 * We didn't get to be the master so we definitely won't be
2806		 * configuring the chip.  It's a bug if someone else hasn't
2807		 * configured it already.
2808		 */
2809		device_printf(sc->dev, "couldn't be master(%d), "
2810		    "device not already initialized either(%d).\n", rc, state);
2811		return (EDOOFUS);
2812	}
2813
2814	/* This is the firmware whose headers the driver was compiled against */
2815	fw_info = find_fw_info(chip_id(sc));
2816	if (fw_info == NULL) {
2817		device_printf(sc->dev,
2818		    "unable to look up firmware information for chip %d.\n",
2819		    chip_id(sc));
2820		return (EINVAL);
2821	}
2822	drv_fw = &fw_info->fw_hdr;
2823
2824	/*
2825	 * The firmware KLD contains many modules.  The KLD name is also the
2826	 * name of the module that contains the default config file.
2827	 */
2828	default_cfg = firmware_get(fw_info->kld_name);
2829
2830	/* Read the header of the firmware on the card */
2831	card_fw = malloc(sizeof(*card_fw), M_CXGBE, M_ZERO | M_WAITOK);
2832	rc = -t4_read_flash(sc, FLASH_FW_START,
2833	    sizeof (*card_fw) / sizeof (uint32_t), (uint32_t *)card_fw, 1);
2834	if (rc == 0)
2835		card_fw_usable = fw_compatible(drv_fw, (const void*)card_fw);
2836	else {
2837		device_printf(sc->dev,
2838		    "Unable to read card's firmware header: %d\n", rc);
2839		card_fw_usable = 0;
2840	}
2841
2842	/* This is the firmware in the KLD */
2843	fw = firmware_get(fw_info->fw_mod_name);
2844	if (fw != NULL) {
2845		kld_fw = (const void *)fw->data;
2846		kld_fw_usable = fw_compatible(drv_fw, kld_fw);
2847	} else {
2848		kld_fw = NULL;
2849		kld_fw_usable = 0;
2850	}
2851
2852	if (card_fw_usable && card_fw->fw_ver == drv_fw->fw_ver &&
2853	    (!kld_fw_usable || kld_fw->fw_ver == drv_fw->fw_ver)) {
2854		/*
2855		 * Common case: the firmware on the card is an exact match and
2856		 * the KLD is an exact match too, or the KLD is
2857		 * absent/incompatible.  Note that t4_fw_install = 2 is ignored
2858		 * here -- use cxgbetool loadfw if you want to reinstall the
2859		 * same firmware as the one on the card.
2860		 */
2861	} else if (kld_fw_usable && state == DEV_STATE_UNINIT &&
2862	    should_install_kld_fw(sc, card_fw_usable, be32toh(kld_fw->fw_ver),
2863	    be32toh(card_fw->fw_ver))) {
2864
2865		rc = -t4_fw_upgrade(sc, sc->mbox, fw->data, fw->datasize, 0);
2866		if (rc != 0) {
2867			device_printf(sc->dev,
2868			    "failed to install firmware: %d\n", rc);
2869			goto done;
2870		}
2871
2872		/* Installed successfully, update the cached header too. */
2873		memcpy(card_fw, kld_fw, sizeof(*card_fw));
2874		card_fw_usable = 1;
2875		need_fw_reset = 0;	/* already reset as part of load_fw */
2876	}
2877
2878	if (!card_fw_usable) {
2879		uint32_t d, c, k;
2880
2881		d = ntohl(drv_fw->fw_ver);
2882		c = ntohl(card_fw->fw_ver);
2883		k = kld_fw ? ntohl(kld_fw->fw_ver) : 0;
2884
2885		device_printf(sc->dev, "Cannot find a usable firmware: "
2886		    "fw_install %d, chip state %d, "
2887		    "driver compiled with %d.%d.%d.%d, "
2888		    "card has %d.%d.%d.%d, KLD has %d.%d.%d.%d\n",
2889		    t4_fw_install, state,
2890		    G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d),
2891		    G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d),
2892		    G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
2893		    G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c),
2894		    G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k),
2895		    G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k));
2896		rc = EINVAL;
2897		goto done;
2898	}
2899
2900	/* We're using whatever's on the card and it's known to be good. */
2901	sc->params.fw_vers = ntohl(card_fw->fw_ver);
2902	snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u",
2903	    G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers),
2904	    G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers),
2905	    G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers),
2906	    G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers));
2907
2908	t4_get_tp_version(sc, &sc->params.tp_vers);
2909	snprintf(sc->tp_version, sizeof(sc->tp_version), "%u.%u.%u.%u",
2910	    G_FW_HDR_FW_VER_MAJOR(sc->params.tp_vers),
2911	    G_FW_HDR_FW_VER_MINOR(sc->params.tp_vers),
2912	    G_FW_HDR_FW_VER_MICRO(sc->params.tp_vers),
2913	    G_FW_HDR_FW_VER_BUILD(sc->params.tp_vers));
2914
2915	if (t4_get_exprom_version(sc, &sc->params.exprom_vers) != 0)
2916		sc->params.exprom_vers = 0;
2917	else {
2918		snprintf(sc->exprom_version, sizeof(sc->exprom_version),
2919		    "%u.%u.%u.%u",
2920		    G_FW_HDR_FW_VER_MAJOR(sc->params.exprom_vers),
2921		    G_FW_HDR_FW_VER_MINOR(sc->params.exprom_vers),
2922		    G_FW_HDR_FW_VER_MICRO(sc->params.exprom_vers),
2923		    G_FW_HDR_FW_VER_BUILD(sc->params.exprom_vers));
2924	}
2925
2926	/* Reset device */
2927	if (need_fw_reset &&
2928	    (rc = -t4_fw_reset(sc, sc->mbox, F_PIORSTMODE | F_PIORST)) != 0) {
2929		device_printf(sc->dev, "firmware reset failed: %d.\n", rc);
2930		if (rc != ETIMEDOUT && rc != EIO)
2931			t4_fw_bye(sc, sc->mbox);
2932		goto done;
2933	}
2934	sc->flags |= FW_OK;
2935
2936	rc = get_params__pre_init(sc);
2937	if (rc != 0)
2938		goto done; /* error message displayed already */
2939
2940	/* Partition adapter resources as specified in the config file. */
2941	if (state == DEV_STATE_UNINIT) {
2942
2943		KASSERT(sc->flags & MASTER_PF,
2944		    ("%s: trying to change chip settings when not master.",
2945		    __func__));
2946
2947		rc = partition_resources(sc, default_cfg, fw_info->kld_name);
2948		if (rc != 0)
2949			goto done;	/* error message displayed already */
2950
2951		t4_tweak_chip_settings(sc);
2952
2953		/* get basic stuff going */
2954		rc = -t4_fw_initialize(sc, sc->mbox);
2955		if (rc != 0) {
2956			device_printf(sc->dev, "fw init failed: %d.\n", rc);
2957			goto done;
2958		}
2959	} else {
2960		snprintf(sc->cfg_file, sizeof(sc->cfg_file), "pf%d", pf);
2961		sc->cfcsum = 0;
2962	}
2963
2964done:
2965	free(card_fw, M_CXGBE);
2966	if (fw != NULL)
2967		firmware_put(fw, FIRMWARE_UNLOAD);
2968	if (default_cfg != NULL)
2969		firmware_put(default_cfg, FIRMWARE_UNLOAD);
2970
2971	return (rc);
2972}
2973
2974#define FW_PARAM_DEV(param) \
2975	(V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
2976	 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
2977#define FW_PARAM_PFVF(param) \
2978	(V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
2979	 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param))
2980
2981/*
2982 * Partition chip resources for use between various PFs, VFs, etc.
2983 */
2984static int
2985partition_resources(struct adapter *sc, const struct firmware *default_cfg,
2986    const char *name_prefix)
2987{
2988	const struct firmware *cfg = NULL;
2989	int rc = 0;
2990	struct fw_caps_config_cmd caps;
2991	uint32_t mtype, moff, finicsum, cfcsum;
2992
2993	/*
2994	 * Figure out what configuration file to use.  Pick the default config
2995	 * file for the card if the user hasn't specified one explicitly.
2996	 */
2997	snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", t4_cfg_file);
2998	if (strncmp(t4_cfg_file, DEFAULT_CF, sizeof(t4_cfg_file)) == 0) {
2999		/* Card specific overrides go here. */
3000		if (pci_get_device(sc->dev) == 0x440a)
3001			snprintf(sc->cfg_file, sizeof(sc->cfg_file), UWIRE_CF);
3002		if (is_fpga(sc))
3003			snprintf(sc->cfg_file, sizeof(sc->cfg_file), FPGA_CF);
3004	}
3005
3006	/*
3007	 * We need to load another module if the profile is anything except
3008	 * "default" or "flash".
3009	 */
3010	if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) != 0 &&
3011	    strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) {
3012		char s[32];
3013
3014		snprintf(s, sizeof(s), "%s_%s", name_prefix, sc->cfg_file);
3015		cfg = firmware_get(s);
3016		if (cfg == NULL) {
3017			if (default_cfg != NULL) {
3018				device_printf(sc->dev,
3019				    "unable to load module \"%s\" for "
3020				    "configuration profile \"%s\", will use "
3021				    "the default config file instead.\n",
3022				    s, sc->cfg_file);
3023				snprintf(sc->cfg_file, sizeof(sc->cfg_file),
3024				    "%s", DEFAULT_CF);
3025			} else {
3026				device_printf(sc->dev,
3027				    "unable to load module \"%s\" for "
3028				    "configuration profile \"%s\", will use "
3029				    "the config file on the card's flash "
3030				    "instead.\n", s, sc->cfg_file);
3031				snprintf(sc->cfg_file, sizeof(sc->cfg_file),
3032				    "%s", FLASH_CF);
3033			}
3034		}
3035	}
3036
3037	if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) == 0 &&
3038	    default_cfg == NULL) {
3039		device_printf(sc->dev,
3040		    "default config file not available, will use the config "
3041		    "file on the card's flash instead.\n");
3042		snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", FLASH_CF);
3043	}
3044
3045	if (strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) {
3046		u_int cflen;
3047		const uint32_t *cfdata;
3048		uint32_t param, val, addr;
3049
3050		KASSERT(cfg != NULL || default_cfg != NULL,
3051		    ("%s: no config to upload", __func__));
3052
3053		/*
3054		 * Ask the firmware where it wants us to upload the config file.
3055		 */
3056		param = FW_PARAM_DEV(CF);
3057		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
3058		if (rc != 0) {
3059			/* No support for config file?  Shouldn't happen. */
3060			device_printf(sc->dev,
3061			    "failed to query config file location: %d.\n", rc);
3062			goto done;
3063		}
3064		mtype = G_FW_PARAMS_PARAM_Y(val);
3065		moff = G_FW_PARAMS_PARAM_Z(val) << 16;
3066
3067		/*
3068		 * XXX: sheer laziness.  We deliberately added 4 bytes of
3069		 * useless stuffing/comments at the end of the config file so
3070		 * it's ok to simply throw away the last remaining bytes when
3071		 * the config file is not an exact multiple of 4.  This also
3072		 * helps with the validate_mt_off_len check.
3073		 */
3074		if (cfg != NULL) {
3075			cflen = cfg->datasize & ~3;
3076			cfdata = cfg->data;
3077		} else {
3078			cflen = default_cfg->datasize & ~3;
3079			cfdata = default_cfg->data;
3080		}
3081
3082		if (cflen > FLASH_CFG_MAX_SIZE) {
3083			device_printf(sc->dev,
3084			    "config file too long (%d, max allowed is %d).  "
3085			    "Will try to use the config on the card, if any.\n",
3086			    cflen, FLASH_CFG_MAX_SIZE);
3087			goto use_config_on_flash;
3088		}
3089
3090		rc = validate_mt_off_len(sc, mtype, moff, cflen, &addr);
3091		if (rc != 0) {
3092			device_printf(sc->dev,
3093			    "%s: addr (%d/0x%x) or len %d is not valid: %d.  "
3094			    "Will try to use the config on the card, if any.\n",
3095			    __func__, mtype, moff, cflen, rc);
3096			goto use_config_on_flash;
3097		}
3098		write_via_memwin(sc, 2, addr, cfdata, cflen);
3099	} else {
3100use_config_on_flash:
3101		mtype = FW_MEMTYPE_FLASH;
3102		moff = t4_flash_cfg_addr(sc);
3103	}
3104
3105	bzero(&caps, sizeof(caps));
3106	caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
3107	    F_FW_CMD_REQUEST | F_FW_CMD_READ);
3108	caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID |
3109	    V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
3110	    V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(moff >> 16) | FW_LEN16(caps));
3111	rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
3112	if (rc != 0) {
3113		device_printf(sc->dev,
3114		    "failed to pre-process config file: %d "
3115		    "(mtype %d, moff 0x%x).\n", rc, mtype, moff);
3116		goto done;
3117	}
3118
3119	finicsum = be32toh(caps.finicsum);
3120	cfcsum = be32toh(caps.cfcsum);
3121	if (finicsum != cfcsum) {
3122		device_printf(sc->dev,
3123		    "WARNING: config file checksum mismatch: %08x %08x\n",
3124		    finicsum, cfcsum);
3125	}
3126	sc->cfcsum = cfcsum;
3127
3128#define LIMIT_CAPS(x) do { \
3129	caps.x &= htobe16(t4_##x##_allowed); \
3130} while (0)
3131
3132	/*
3133	 * Let the firmware know what features will (not) be used so it can tune
3134	 * things accordingly.
3135	 */
3136	LIMIT_CAPS(nbmcaps);
3137	LIMIT_CAPS(linkcaps);
3138	LIMIT_CAPS(switchcaps);
3139	LIMIT_CAPS(niccaps);
3140	LIMIT_CAPS(toecaps);
3141	LIMIT_CAPS(rdmacaps);
3142	LIMIT_CAPS(tlscaps);
3143	LIMIT_CAPS(iscsicaps);
3144	LIMIT_CAPS(fcoecaps);
3145#undef LIMIT_CAPS
3146
3147	caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
3148	    F_FW_CMD_REQUEST | F_FW_CMD_WRITE);
3149	caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
3150	rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), NULL);
3151	if (rc != 0) {
3152		device_printf(sc->dev,
3153		    "failed to process config file: %d.\n", rc);
3154	}
3155done:
3156	if (cfg != NULL)
3157		firmware_put(cfg, FIRMWARE_UNLOAD);
3158	return (rc);
3159}
3160
3161/*
3162 * Retrieve parameters that are needed (or nice to have) very early.
3163 */
3164static int
3165get_params__pre_init(struct adapter *sc)
3166{
3167	int rc;
3168	uint32_t param[2], val[2];
3169
3170	param[0] = FW_PARAM_DEV(PORTVEC);
3171	param[1] = FW_PARAM_DEV(CCLK);
3172	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
3173	if (rc != 0) {
3174		device_printf(sc->dev,
3175		    "failed to query parameters (pre_init): %d.\n", rc);
3176		return (rc);
3177	}
3178
3179	sc->params.portvec = val[0];
3180	sc->params.nports = bitcount32(val[0]);
3181	sc->params.vpd.cclk = val[1];
3182
3183	/* Read device log parameters. */
3184	rc = -t4_init_devlog_params(sc, 1);
3185	if (rc == 0)
3186		fixup_devlog_params(sc);
3187	else {
3188		device_printf(sc->dev,
3189		    "failed to get devlog parameters: %d.\n", rc);
3190		rc = 0;	/* devlog isn't critical for device operation */
3191	}
3192
3193	return (rc);
3194}
3195
3196/*
3197 * Retrieve various parameters that are of interest to the driver.  The device
3198 * has been initialized by the firmware at this point.
3199 */
3200static int
3201get_params__post_init(struct adapter *sc)
3202{
3203	int rc;
3204	uint32_t param[7], val[7];
3205	struct fw_caps_config_cmd caps;
3206
3207	param[0] = FW_PARAM_PFVF(IQFLINT_START);
3208	param[1] = FW_PARAM_PFVF(EQ_START);
3209	param[2] = FW_PARAM_PFVF(FILTER_START);
3210	param[3] = FW_PARAM_PFVF(FILTER_END);
3211	param[4] = FW_PARAM_PFVF(L2T_START);
3212	param[5] = FW_PARAM_PFVF(L2T_END);
3213	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
3214	if (rc != 0) {
3215		device_printf(sc->dev,
3216		    "failed to query parameters (post_init): %d.\n", rc);
3217		return (rc);
3218	}
3219
3220	sc->sge.iq_start = val[0];
3221	sc->sge.eq_start = val[1];
3222	sc->tids.ftid_base = val[2];
3223	sc->tids.nftids = val[3] - val[2] + 1;
3224	sc->params.ftid_min = val[2];
3225	sc->params.ftid_max = val[3];
3226	sc->vres.l2t.start = val[4];
3227	sc->vres.l2t.size = val[5] - val[4] + 1;
3228	KASSERT(sc->vres.l2t.size <= L2T_SIZE,
3229	    ("%s: L2 table size (%u) larger than expected (%u)",
3230	    __func__, sc->vres.l2t.size, L2T_SIZE));
3231
3232	/* get capabilites */
3233	bzero(&caps, sizeof(caps));
3234	caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
3235	    F_FW_CMD_REQUEST | F_FW_CMD_READ);
3236	caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
3237	rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
3238	if (rc != 0) {
3239		device_printf(sc->dev,
3240		    "failed to get card capabilities: %d.\n", rc);
3241		return (rc);
3242	}
3243
3244#define READ_CAPS(x) do { \
3245	sc->x = htobe16(caps.x); \
3246} while (0)
3247	READ_CAPS(nbmcaps);
3248	READ_CAPS(linkcaps);
3249	READ_CAPS(switchcaps);
3250	READ_CAPS(niccaps);
3251	READ_CAPS(toecaps);
3252	READ_CAPS(rdmacaps);
3253	READ_CAPS(tlscaps);
3254	READ_CAPS(iscsicaps);
3255	READ_CAPS(fcoecaps);
3256
3257	if (sc->niccaps & FW_CAPS_CONFIG_NIC_ETHOFLD) {
3258		param[0] = FW_PARAM_PFVF(ETHOFLD_START);
3259		param[1] = FW_PARAM_PFVF(ETHOFLD_END);
3260		param[2] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
3261		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 3, param, val);
3262		if (rc != 0) {
3263			device_printf(sc->dev,
3264			    "failed to query NIC parameters: %d.\n", rc);
3265			return (rc);
3266		}
3267		sc->tids.etid_base = val[0];
3268		sc->params.etid_min = val[0];
3269		sc->tids.netids = val[1] - val[0] + 1;
3270		sc->params.netids = sc->tids.netids;
3271		sc->params.eo_wr_cred = val[2];
3272		sc->params.ethoffload = 1;
3273	}
3274
3275	if (sc->toecaps) {
3276		/* query offload-related parameters */
3277		param[0] = FW_PARAM_DEV(NTID);
3278		param[1] = FW_PARAM_PFVF(SERVER_START);
3279		param[2] = FW_PARAM_PFVF(SERVER_END);
3280		param[3] = FW_PARAM_PFVF(TDDP_START);
3281		param[4] = FW_PARAM_PFVF(TDDP_END);
3282		param[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
3283		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
3284		if (rc != 0) {
3285			device_printf(sc->dev,
3286			    "failed to query TOE parameters: %d.\n", rc);
3287			return (rc);
3288		}
3289		sc->tids.ntids = val[0];
3290		sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS);
3291		sc->tids.stid_base = val[1];
3292		sc->tids.nstids = val[2] - val[1] + 1;
3293		sc->vres.ddp.start = val[3];
3294		sc->vres.ddp.size = val[4] - val[3] + 1;
3295		sc->params.ofldq_wr_cred = val[5];
3296		sc->params.offload = 1;
3297	}
3298	if (sc->rdmacaps) {
3299		param[0] = FW_PARAM_PFVF(STAG_START);
3300		param[1] = FW_PARAM_PFVF(STAG_END);
3301		param[2] = FW_PARAM_PFVF(RQ_START);
3302		param[3] = FW_PARAM_PFVF(RQ_END);
3303		param[4] = FW_PARAM_PFVF(PBL_START);
3304		param[5] = FW_PARAM_PFVF(PBL_END);
3305		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
3306		if (rc != 0) {
3307			device_printf(sc->dev,
3308			    "failed to query RDMA parameters(1): %d.\n", rc);
3309			return (rc);
3310		}
3311		sc->vres.stag.start = val[0];
3312		sc->vres.stag.size = val[1] - val[0] + 1;
3313		sc->vres.rq.start = val[2];
3314		sc->vres.rq.size = val[3] - val[2] + 1;
3315		sc->vres.pbl.start = val[4];
3316		sc->vres.pbl.size = val[5] - val[4] + 1;
3317
3318		param[0] = FW_PARAM_PFVF(SQRQ_START);
3319		param[1] = FW_PARAM_PFVF(SQRQ_END);
3320		param[2] = FW_PARAM_PFVF(CQ_START);
3321		param[3] = FW_PARAM_PFVF(CQ_END);
3322		param[4] = FW_PARAM_PFVF(OCQ_START);
3323		param[5] = FW_PARAM_PFVF(OCQ_END);
3324		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
3325		if (rc != 0) {
3326			device_printf(sc->dev,
3327			    "failed to query RDMA parameters(2): %d.\n", rc);
3328			return (rc);
3329		}
3330		sc->vres.qp.start = val[0];
3331		sc->vres.qp.size = val[1] - val[0] + 1;
3332		sc->vres.cq.start = val[2];
3333		sc->vres.cq.size = val[3] - val[2] + 1;
3334		sc->vres.ocq.start = val[4];
3335		sc->vres.ocq.size = val[5] - val[4] + 1;
3336	}
3337	if (sc->iscsicaps) {
3338		param[0] = FW_PARAM_PFVF(ISCSI_START);
3339		param[1] = FW_PARAM_PFVF(ISCSI_END);
3340		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
3341		if (rc != 0) {
3342			device_printf(sc->dev,
3343			    "failed to query iSCSI parameters: %d.\n", rc);
3344			return (rc);
3345		}
3346		sc->vres.iscsi.start = val[0];
3347		sc->vres.iscsi.size = val[1] - val[0] + 1;
3348	}
3349
3350	t4_init_sge_params(sc);
3351
3352	/*
3353	 * We've got the params we wanted to query via the firmware.  Now grab
3354	 * some others directly from the chip.
3355	 */
3356	rc = t4_read_chip_settings(sc);
3357
3358	return (rc);
3359}
3360
3361static int
3362set_params__post_init(struct adapter *sc)
3363{
3364	uint32_t param, val;
3365
3366	/* ask for encapsulated CPLs */
3367	param = FW_PARAM_PFVF(CPLFW4MSG_ENCAP);
3368	val = 1;
3369	(void)t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
3370
3371	return (0);
3372}
3373
3374#undef FW_PARAM_PFVF
3375#undef FW_PARAM_DEV
3376
3377static void
3378t4_set_desc(struct adapter *sc)
3379{
3380	char buf[128];
3381	struct adapter_params *p = &sc->params;
3382
3383	snprintf(buf, sizeof(buf), "Chelsio %s %sNIC (rev %d), S/N:%s, "
3384	    "P/N:%s, E/C:%s", p->vpd.id, is_offload(sc) ? "R" : "",
3385	    chip_rev(sc), p->vpd.sn, p->vpd.pn, p->vpd.ec);
3386
3387	device_set_desc_copy(sc->dev, buf);
3388}
3389
3390static void
3391build_medialist(struct port_info *pi, struct ifmedia *media)
3392{
3393	int m;
3394
3395	PORT_LOCK(pi);
3396
3397	ifmedia_removeall(media);
3398
3399	m = IFM_ETHER | IFM_FDX;
3400
3401	switch(pi->port_type) {
3402	case FW_PORT_TYPE_BT_XFI:
3403	case FW_PORT_TYPE_BT_XAUI:
3404		ifmedia_add(media, m | IFM_10G_T, 0, NULL);
3405		/* fall through */
3406
3407	case FW_PORT_TYPE_BT_SGMII:
3408		ifmedia_add(media, m | IFM_1000_T, 0, NULL);
3409		ifmedia_add(media, m | IFM_100_TX, 0, NULL);
3410		ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL);
3411		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
3412		break;
3413
3414	case FW_PORT_TYPE_CX4:
3415		ifmedia_add(media, m | IFM_10G_CX4, 0, NULL);
3416		ifmedia_set(media, m | IFM_10G_CX4);
3417		break;
3418
3419	case FW_PORT_TYPE_QSFP_10G:
3420	case FW_PORT_TYPE_SFP:
3421	case FW_PORT_TYPE_FIBER_XFI:
3422	case FW_PORT_TYPE_FIBER_XAUI:
3423		switch (pi->mod_type) {
3424
3425		case FW_PORT_MOD_TYPE_LR:
3426			ifmedia_add(media, m | IFM_10G_LR, 0, NULL);
3427			ifmedia_set(media, m | IFM_10G_LR);
3428			break;
3429
3430		case FW_PORT_MOD_TYPE_SR:
3431			ifmedia_add(media, m | IFM_10G_SR, 0, NULL);
3432			ifmedia_set(media, m | IFM_10G_SR);
3433			break;
3434
3435		case FW_PORT_MOD_TYPE_LRM:
3436			ifmedia_add(media, m | IFM_10G_LRM, 0, NULL);
3437			ifmedia_set(media, m | IFM_10G_LRM);
3438			break;
3439
3440		case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
3441		case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
3442			ifmedia_add(media, m | IFM_10G_TWINAX, 0, NULL);
3443			ifmedia_set(media, m | IFM_10G_TWINAX);
3444			break;
3445
3446		case FW_PORT_MOD_TYPE_NONE:
3447			m &= ~IFM_FDX;
3448			ifmedia_add(media, m | IFM_NONE, 0, NULL);
3449			ifmedia_set(media, m | IFM_NONE);
3450			break;
3451
3452		case FW_PORT_MOD_TYPE_NA:
3453		case FW_PORT_MOD_TYPE_ER:
3454		default:
3455			device_printf(pi->dev,
3456			    "unknown port_type (%d), mod_type (%d)\n",
3457			    pi->port_type, pi->mod_type);
3458			ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL);
3459			ifmedia_set(media, m | IFM_UNKNOWN);
3460			break;
3461		}
3462		break;
3463
3464	case FW_PORT_TYPE_QSFP:
3465		switch (pi->mod_type) {
3466
3467		case FW_PORT_MOD_TYPE_LR:
3468			ifmedia_add(media, m | IFM_40G_LR4, 0, NULL);
3469			ifmedia_set(media, m | IFM_40G_LR4);
3470			break;
3471
3472		case FW_PORT_MOD_TYPE_SR:
3473			ifmedia_add(media, m | IFM_40G_SR4, 0, NULL);
3474			ifmedia_set(media, m | IFM_40G_SR4);
3475			break;
3476
3477		case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
3478		case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
3479			ifmedia_add(media, m | IFM_40G_CR4, 0, NULL);
3480			ifmedia_set(media, m | IFM_40G_CR4);
3481			break;
3482
3483		case FW_PORT_MOD_TYPE_NONE:
3484			m &= ~IFM_FDX;
3485			ifmedia_add(media, m | IFM_NONE, 0, NULL);
3486			ifmedia_set(media, m | IFM_NONE);
3487			break;
3488
3489		default:
3490			device_printf(pi->dev,
3491			    "unknown port_type (%d), mod_type (%d)\n",
3492			    pi->port_type, pi->mod_type);
3493			ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL);
3494			ifmedia_set(media, m | IFM_UNKNOWN);
3495			break;
3496		}
3497		break;
3498
3499	default:
3500		device_printf(pi->dev,
3501		    "unknown port_type (%d), mod_type (%d)\n", pi->port_type,
3502		    pi->mod_type);
3503		ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL);
3504		ifmedia_set(media, m | IFM_UNKNOWN);
3505		break;
3506	}
3507
3508	PORT_UNLOCK(pi);
3509}
3510
3511#define FW_MAC_EXACT_CHUNK	7
3512
3513/*
3514 * Program the port's XGMAC based on parameters in ifnet.  The caller also
3515 * indicates which parameters should be programmed (the rest are left alone).
3516 */
3517int
3518update_mac_settings(struct ifnet *ifp, int flags)
3519{
3520	int rc = 0;
3521	struct vi_info *vi = ifp->if_softc;
3522	struct port_info *pi = vi->pi;
3523	struct adapter *sc = pi->adapter;
3524	int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1;
3525
3526	ASSERT_SYNCHRONIZED_OP(sc);
3527	KASSERT(flags, ("%s: not told what to update.", __func__));
3528
3529	if (flags & XGMAC_MTU)
3530		mtu = ifp->if_mtu;
3531
3532	if (flags & XGMAC_PROMISC)
3533		promisc = ifp->if_flags & IFF_PROMISC ? 1 : 0;
3534
3535	if (flags & XGMAC_ALLMULTI)
3536		allmulti = ifp->if_flags & IFF_ALLMULTI ? 1 : 0;
3537
3538	if (flags & XGMAC_VLANEX)
3539		vlanex = ifp->if_capenable & IFCAP_VLAN_HWTAGGING ? 1 : 0;
3540
3541	if (flags & (XGMAC_MTU|XGMAC_PROMISC|XGMAC_ALLMULTI|XGMAC_VLANEX)) {
3542		rc = -t4_set_rxmode(sc, sc->mbox, vi->viid, mtu, promisc,
3543		    allmulti, 1, vlanex, false);
3544		if (rc) {
3545			if_printf(ifp, "set_rxmode (%x) failed: %d\n", flags,
3546			    rc);
3547			return (rc);
3548		}
3549	}
3550
3551	if (flags & XGMAC_UCADDR) {
3552		uint8_t ucaddr[ETHER_ADDR_LEN];
3553
3554		bcopy(IF_LLADDR(ifp), ucaddr, sizeof(ucaddr));
3555		rc = t4_change_mac(sc, sc->mbox, vi->viid, vi->xact_addr_filt,
3556		    ucaddr, true, true);
3557		if (rc < 0) {
3558			rc = -rc;
3559			if_printf(ifp, "change_mac failed: %d\n", rc);
3560			return (rc);
3561		} else {
3562			vi->xact_addr_filt = rc;
3563			rc = 0;
3564		}
3565	}
3566
3567	if (flags & XGMAC_MCADDRS) {
3568		const uint8_t *mcaddr[FW_MAC_EXACT_CHUNK];
3569		int del = 1;
3570		uint64_t hash = 0;
3571		struct ifmultiaddr *ifma;
3572		int i = 0, j;
3573
3574		if_maddr_rlock(ifp);
3575		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3576			if (ifma->ifma_addr->sa_family != AF_LINK)
3577				continue;
3578			mcaddr[i] =
3579			    LLADDR((struct sockaddr_dl *)ifma->ifma_addr);
3580			MPASS(ETHER_IS_MULTICAST(mcaddr[i]));
3581			i++;
3582
3583			if (i == FW_MAC_EXACT_CHUNK) {
3584				rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid,
3585				    del, i, mcaddr, NULL, &hash, 0);
3586				if (rc < 0) {
3587					rc = -rc;
3588					for (j = 0; j < i; j++) {
3589						if_printf(ifp,
3590						    "failed to add mc address"
3591						    " %02x:%02x:%02x:"
3592						    "%02x:%02x:%02x rc=%d\n",
3593						    mcaddr[j][0], mcaddr[j][1],
3594						    mcaddr[j][2], mcaddr[j][3],
3595						    mcaddr[j][4], mcaddr[j][5],
3596						    rc);
3597					}
3598					goto mcfail;
3599				}
3600				del = 0;
3601				i = 0;
3602			}
3603		}
3604		if (i > 0) {
3605			rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, del, i,
3606			    mcaddr, NULL, &hash, 0);
3607			if (rc < 0) {
3608				rc = -rc;
3609				for (j = 0; j < i; j++) {
3610					if_printf(ifp,
3611					    "failed to add mc address"
3612					    " %02x:%02x:%02x:"
3613					    "%02x:%02x:%02x rc=%d\n",
3614					    mcaddr[j][0], mcaddr[j][1],
3615					    mcaddr[j][2], mcaddr[j][3],
3616					    mcaddr[j][4], mcaddr[j][5],
3617					    rc);
3618				}
3619				goto mcfail;
3620			}
3621		}
3622
3623		rc = -t4_set_addr_hash(sc, sc->mbox, vi->viid, 0, hash, 0);
3624		if (rc != 0)
3625			if_printf(ifp, "failed to set mc address hash: %d", rc);
3626mcfail:
3627		if_maddr_runlock(ifp);
3628	}
3629
3630	return (rc);
3631}
3632
3633/*
3634 * {begin|end}_synchronized_op must be called from the same thread.
3635 */
3636int
3637begin_synchronized_op(struct adapter *sc, struct vi_info *vi, int flags,
3638    char *wmesg)
3639{
3640	int rc, pri;
3641
3642#ifdef WITNESS
3643	/* the caller thinks it's ok to sleep, but is it really? */
3644	if (flags & SLEEP_OK)
3645		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
3646		    "begin_synchronized_op");
3647#endif
3648
3649	if (INTR_OK)
3650		pri = PCATCH;
3651	else
3652		pri = 0;
3653
3654	ADAPTER_LOCK(sc);
3655	for (;;) {
3656
3657		if (vi && IS_DOOMED(vi)) {
3658			rc = ENXIO;
3659			goto done;
3660		}
3661
3662		if (!IS_BUSY(sc)) {
3663			rc = 0;
3664			break;
3665		}
3666
3667		if (!(flags & SLEEP_OK)) {
3668			rc = EBUSY;
3669			goto done;
3670		}
3671
3672		if (mtx_sleep(&sc->flags, &sc->sc_lock, pri, wmesg, 0)) {
3673			rc = EINTR;
3674			goto done;
3675		}
3676	}
3677
3678	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
3679	SET_BUSY(sc);
3680#ifdef INVARIANTS
3681	sc->last_op = wmesg;
3682	sc->last_op_thr = curthread;
3683	sc->last_op_flags = flags;
3684#endif
3685
3686done:
3687	if (!(flags & HOLD_LOCK) || rc)
3688		ADAPTER_UNLOCK(sc);
3689
3690	return (rc);
3691}
3692
3693/*
3694 * Tell if_ioctl and if_init that the VI is going away.  This is
3695 * special variant of begin_synchronized_op and must be paired with a
3696 * call to end_synchronized_op.
3697 */
3698void
3699doom_vi(struct adapter *sc, struct vi_info *vi)
3700{
3701
3702	ADAPTER_LOCK(sc);
3703	SET_DOOMED(vi);
3704	wakeup(&sc->flags);
3705	while (IS_BUSY(sc))
3706		mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0);
3707	SET_BUSY(sc);
3708#ifdef INVARIANTS
3709	sc->last_op = "t4detach";
3710	sc->last_op_thr = curthread;
3711	sc->last_op_flags = 0;
3712#endif
3713	ADAPTER_UNLOCK(sc);
3714}
3715
3716/*
3717 * {begin|end}_synchronized_op must be called from the same thread.
3718 */
3719void
3720end_synchronized_op(struct adapter *sc, int flags)
3721{
3722
3723	if (flags & LOCK_HELD)
3724		ADAPTER_LOCK_ASSERT_OWNED(sc);
3725	else
3726		ADAPTER_LOCK(sc);
3727
3728	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
3729	CLR_BUSY(sc);
3730	wakeup(&sc->flags);
3731	ADAPTER_UNLOCK(sc);
3732}
3733
3734static int
3735cxgbe_init_synchronized(struct vi_info *vi)
3736{
3737	struct port_info *pi = vi->pi;
3738	struct adapter *sc = pi->adapter;
3739	struct ifnet *ifp = vi->ifp;
3740	int rc = 0, i;
3741	struct sge_txq *txq;
3742
3743	ASSERT_SYNCHRONIZED_OP(sc);
3744
3745	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3746		return (0);	/* already running */
3747
3748	if (!(sc->flags & FULL_INIT_DONE) &&
3749	    ((rc = adapter_full_init(sc)) != 0))
3750		return (rc);	/* error message displayed already */
3751
3752	if (!(vi->flags & VI_INIT_DONE) &&
3753	    ((rc = vi_full_init(vi)) != 0))
3754		return (rc); /* error message displayed already */
3755
3756	rc = update_mac_settings(ifp, XGMAC_ALL);
3757	if (rc)
3758		goto done;	/* error message displayed already */
3759
3760	rc = -t4_enable_vi(sc, sc->mbox, vi->viid, true, true);
3761	if (rc != 0) {
3762		if_printf(ifp, "enable_vi failed: %d\n", rc);
3763		goto done;
3764	}
3765
3766	/*
3767	 * Can't fail from this point onwards.  Review cxgbe_uninit_synchronized
3768	 * if this changes.
3769	 */
3770
3771	for_each_txq(vi, i, txq) {
3772		TXQ_LOCK(txq);
3773		txq->eq.flags |= EQ_ENABLED;
3774		TXQ_UNLOCK(txq);
3775	}
3776
3777	/*
3778	 * The first iq of the first port to come up is used for tracing.
3779	 */
3780	if (sc->traceq < 0 && IS_MAIN_VI(vi)) {
3781		sc->traceq = sc->sge.rxq[vi->first_rxq].iq.abs_id;
3782		t4_write_reg(sc, is_t4(sc) ?  A_MPS_TRC_RSS_CONTROL :
3783		    A_MPS_T5_TRC_RSS_CONTROL, V_RSSCONTROL(pi->tx_chan) |
3784		    V_QUEUENUMBER(sc->traceq));
3785		pi->flags |= HAS_TRACEQ;
3786	}
3787
3788	/* all ok */
3789	PORT_LOCK(pi);
3790	ifp->if_drv_flags |= IFF_DRV_RUNNING;
3791	pi->up_vis++;
3792
3793	if (pi->nvi > 1 || sc->flags & IS_VF)
3794		callout_reset(&vi->tick, hz, vi_tick, vi);
3795	else
3796		callout_reset(&pi->tick, hz, cxgbe_tick, pi);
3797	PORT_UNLOCK(pi);
3798done:
3799	if (rc != 0)
3800		cxgbe_uninit_synchronized(vi);
3801
3802	return (rc);
3803}
3804
3805/*
3806 * Idempotent.
3807 */
3808static int
3809cxgbe_uninit_synchronized(struct vi_info *vi)
3810{
3811	struct port_info *pi = vi->pi;
3812	struct adapter *sc = pi->adapter;
3813	struct ifnet *ifp = vi->ifp;
3814	int rc, i;
3815	struct sge_txq *txq;
3816
3817	ASSERT_SYNCHRONIZED_OP(sc);
3818
3819	if (!(vi->flags & VI_INIT_DONE)) {
3820		KASSERT(!(ifp->if_drv_flags & IFF_DRV_RUNNING),
3821		    ("uninited VI is running"));
3822		return (0);
3823	}
3824
3825	/*
3826	 * Disable the VI so that all its data in either direction is discarded
3827	 * by the MPS.  Leave everything else (the queues, interrupts, and 1Hz
3828	 * tick) intact as the TP can deliver negative advice or data that it's
3829	 * holding in its RAM (for an offloaded connection) even after the VI is
3830	 * disabled.
3831	 */
3832	rc = -t4_enable_vi(sc, sc->mbox, vi->viid, false, false);
3833	if (rc) {
3834		if_printf(ifp, "disable_vi failed: %d\n", rc);
3835		return (rc);
3836	}
3837
3838	for_each_txq(vi, i, txq) {
3839		TXQ_LOCK(txq);
3840		txq->eq.flags &= ~EQ_ENABLED;
3841		TXQ_UNLOCK(txq);
3842	}
3843
3844	PORT_LOCK(pi);
3845	if (pi->nvi > 1 || sc->flags & IS_VF)
3846		callout_stop(&vi->tick);
3847	else
3848		callout_stop(&pi->tick);
3849	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3850		PORT_UNLOCK(pi);
3851		return (0);
3852	}
3853	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3854	pi->up_vis--;
3855	if (pi->up_vis > 0) {
3856		PORT_UNLOCK(pi);
3857		return (0);
3858	}
3859	PORT_UNLOCK(pi);
3860
3861	pi->link_cfg.link_ok = 0;
3862	pi->link_cfg.speed = 0;
3863	pi->linkdnrc = -1;
3864	t4_os_link_changed(sc, pi->port_id, 0, -1);
3865
3866	return (0);
3867}
3868
3869/*
3870 * It is ok for this function to fail midway and return right away.  t4_detach
3871 * will walk the entire sc->irq list and clean up whatever is valid.
3872 */
3873int
3874t4_setup_intr_handlers(struct adapter *sc)
3875{
3876	int rc, rid, p, q, v;
3877	char s[8];
3878	struct irq *irq;
3879	struct port_info *pi;
3880	struct vi_info *vi;
3881	struct sge *sge = &sc->sge;
3882	struct sge_rxq *rxq;
3883#ifdef TCP_OFFLOAD
3884	struct sge_ofld_rxq *ofld_rxq;
3885#endif
3886#ifdef DEV_NETMAP
3887	struct sge_nm_rxq *nm_rxq;
3888#endif
3889#ifdef RSS
3890	int nbuckets = rss_getnumbuckets();
3891#endif
3892
3893	/*
3894	 * Setup interrupts.
3895	 */
3896	irq = &sc->irq[0];
3897	rid = sc->intr_type == INTR_INTX ? 0 : 1;
3898	if (sc->intr_count == 1)
3899		return (t4_alloc_irq(sc, irq, rid, t4_intr_all, sc, "all"));
3900
3901	/* Multiple interrupts. */
3902	if (sc->flags & IS_VF)
3903		KASSERT(sc->intr_count >= T4VF_EXTRA_INTR + sc->params.nports,
3904		    ("%s: too few intr.", __func__));
3905	else
3906		KASSERT(sc->intr_count >= T4_EXTRA_INTR + sc->params.nports,
3907		    ("%s: too few intr.", __func__));
3908
3909	/* The first one is always error intr on PFs */
3910	if (!(sc->flags & IS_VF)) {
3911		rc = t4_alloc_irq(sc, irq, rid, t4_intr_err, sc, "err");
3912		if (rc != 0)
3913			return (rc);
3914		irq++;
3915		rid++;
3916	}
3917
3918	/* The second one is always the firmware event queue (first on VFs) */
3919	rc = t4_alloc_irq(sc, irq, rid, t4_intr_evt, &sge->fwq, "evt");
3920	if (rc != 0)
3921		return (rc);
3922	irq++;
3923	rid++;
3924
3925	for_each_port(sc, p) {
3926		pi = sc->port[p];
3927		for_each_vi(pi, v, vi) {
3928			vi->first_intr = rid - 1;
3929
3930			if (vi->nnmrxq > 0) {
3931				int n = max(vi->nrxq, vi->nnmrxq);
3932
3933				MPASS(vi->flags & INTR_RXQ);
3934
3935				rxq = &sge->rxq[vi->first_rxq];
3936#ifdef DEV_NETMAP
3937				nm_rxq = &sge->nm_rxq[vi->first_nm_rxq];
3938#endif
3939				for (q = 0; q < n; q++) {
3940					snprintf(s, sizeof(s), "%x%c%x", p,
3941					    'a' + v, q);
3942					if (q < vi->nrxq)
3943						irq->rxq = rxq++;
3944#ifdef DEV_NETMAP
3945					if (q < vi->nnmrxq)
3946						irq->nm_rxq = nm_rxq++;
3947#endif
3948					rc = t4_alloc_irq(sc, irq, rid,
3949					    t4_vi_intr, irq, s);
3950					if (rc != 0)
3951						return (rc);
3952					irq++;
3953					rid++;
3954					vi->nintr++;
3955				}
3956			} else if (vi->flags & INTR_RXQ) {
3957				for_each_rxq(vi, q, rxq) {
3958					snprintf(s, sizeof(s), "%x%c%x", p,
3959					    'a' + v, q);
3960					rc = t4_alloc_irq(sc, irq, rid,
3961					    t4_intr, rxq, s);
3962					if (rc != 0)
3963						return (rc);
3964#ifdef RSS
3965					bus_bind_intr(sc->dev, irq->res,
3966					    rss_getcpu(q % nbuckets));
3967#endif
3968					irq++;
3969					rid++;
3970					vi->nintr++;
3971				}
3972			}
3973#ifdef TCP_OFFLOAD
3974			if (vi->flags & INTR_OFLD_RXQ) {
3975				for_each_ofld_rxq(vi, q, ofld_rxq) {
3976					snprintf(s, sizeof(s), "%x%c%x", p,
3977					    'A' + v, q);
3978					rc = t4_alloc_irq(sc, irq, rid,
3979					    t4_intr, ofld_rxq, s);
3980					if (rc != 0)
3981						return (rc);
3982					irq++;
3983					rid++;
3984					vi->nintr++;
3985				}
3986			}
3987#endif
3988		}
3989	}
3990	MPASS(irq == &sc->irq[sc->intr_count]);
3991
3992	return (0);
3993}
3994
3995int
3996adapter_full_init(struct adapter *sc)
3997{
3998	int rc, i;
3999
4000	ASSERT_SYNCHRONIZED_OP(sc);
4001	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
4002	KASSERT((sc->flags & FULL_INIT_DONE) == 0,
4003	    ("%s: FULL_INIT_DONE already", __func__));
4004
4005	/*
4006	 * queues that belong to the adapter (not any particular port).
4007	 */
4008	rc = t4_setup_adapter_queues(sc);
4009	if (rc != 0)
4010		goto done;
4011
4012	for (i = 0; i < nitems(sc->tq); i++) {
4013		sc->tq[i] = taskqueue_create("t4 taskq", M_NOWAIT,
4014		    taskqueue_thread_enqueue, &sc->tq[i]);
4015		if (sc->tq[i] == NULL) {
4016			device_printf(sc->dev,
4017			    "failed to allocate task queue %d\n", i);
4018			rc = ENOMEM;
4019			goto done;
4020		}
4021		taskqueue_start_threads(&sc->tq[i], 1, PI_NET, "%s tq%d",
4022		    device_get_nameunit(sc->dev), i);
4023	}
4024
4025	if (!(sc->flags & IS_VF))
4026		t4_intr_enable(sc);
4027	sc->flags |= FULL_INIT_DONE;
4028done:
4029	if (rc != 0)
4030		adapter_full_uninit(sc);
4031
4032	return (rc);
4033}
4034
4035int
4036adapter_full_uninit(struct adapter *sc)
4037{
4038	int i;
4039
4040	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
4041
4042	t4_teardown_adapter_queues(sc);
4043
4044	for (i = 0; i < nitems(sc->tq) && sc->tq[i]; i++) {
4045		taskqueue_free(sc->tq[i]);
4046		sc->tq[i] = NULL;
4047	}
4048
4049	sc->flags &= ~FULL_INIT_DONE;
4050
4051	return (0);
4052}
4053
4054#ifdef RSS
4055#define SUPPORTED_RSS_HASHTYPES (RSS_HASHTYPE_RSS_IPV4 | \
4056    RSS_HASHTYPE_RSS_TCP_IPV4 | RSS_HASHTYPE_RSS_IPV6 | \
4057    RSS_HASHTYPE_RSS_TCP_IPV6 | RSS_HASHTYPE_RSS_UDP_IPV4 | \
4058    RSS_HASHTYPE_RSS_UDP_IPV6)
4059
4060/* Translates kernel hash types to hardware. */
4061static int
4062hashconfig_to_hashen(int hashconfig)
4063{
4064	int hashen = 0;
4065
4066	if (hashconfig & RSS_HASHTYPE_RSS_IPV4)
4067		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN;
4068	if (hashconfig & RSS_HASHTYPE_RSS_IPV6)
4069		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN;
4070	if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV4) {
4071		hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN |
4072		    F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
4073	}
4074	if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV6) {
4075		hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN |
4076		    F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
4077	}
4078	if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV4)
4079		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
4080	if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV6)
4081		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
4082
4083	return (hashen);
4084}
4085
4086/* Translates hardware hash types to kernel. */
4087static int
4088hashen_to_hashconfig(int hashen)
4089{
4090	int hashconfig = 0;
4091
4092	if (hashen & F_FW_RSS_VI_CONFIG_CMD_UDPEN) {
4093		/*
4094		 * If UDP hashing was enabled it must have been enabled for
4095		 * either IPv4 or IPv6 (inclusive or).  Enabling UDP without
4096		 * enabling any 4-tuple hash is nonsense configuration.
4097		 */
4098		MPASS(hashen & (F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
4099		    F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN));
4100
4101		if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
4102			hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV4;
4103		if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
4104			hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV6;
4105	}
4106	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
4107		hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV4;
4108	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
4109		hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV6;
4110	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
4111		hashconfig |= RSS_HASHTYPE_RSS_IPV4;
4112	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
4113		hashconfig |= RSS_HASHTYPE_RSS_IPV6;
4114
4115	return (hashconfig);
4116}
4117#endif
4118
4119int
4120vi_full_init(struct vi_info *vi)
4121{
4122	struct adapter *sc = vi->pi->adapter;
4123	struct ifnet *ifp = vi->ifp;
4124	uint16_t *rss;
4125	struct sge_rxq *rxq;
4126	int rc, i, j, hashen;
4127#ifdef RSS
4128	int nbuckets = rss_getnumbuckets();
4129	int hashconfig = rss_gethashconfig();
4130	int extra;
4131	uint32_t raw_rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
4132	uint32_t rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
4133#endif
4134
4135	ASSERT_SYNCHRONIZED_OP(sc);
4136	KASSERT((vi->flags & VI_INIT_DONE) == 0,
4137	    ("%s: VI_INIT_DONE already", __func__));
4138
4139	sysctl_ctx_init(&vi->ctx);
4140	vi->flags |= VI_SYSCTL_CTX;
4141
4142	/*
4143	 * Allocate tx/rx/fl queues for this VI.
4144	 */
4145	rc = t4_setup_vi_queues(vi);
4146	if (rc != 0)
4147		goto done;	/* error message displayed already */
4148
4149	/*
4150	 * Setup RSS for this VI.  Save a copy of the RSS table for later use.
4151	 */
4152	if (vi->nrxq > vi->rss_size) {
4153		if_printf(ifp, "nrxq (%d) > hw RSS table size (%d); "
4154		    "some queues will never receive traffic.\n", vi->nrxq,
4155		    vi->rss_size);
4156	} else if (vi->rss_size % vi->nrxq) {
4157		if_printf(ifp, "nrxq (%d), hw RSS table size (%d); "
4158		    "expect uneven traffic distribution.\n", vi->nrxq,
4159		    vi->rss_size);
4160	}
4161#ifdef RSS
4162	MPASS(RSS_KEYSIZE == 40);
4163	if (vi->nrxq != nbuckets) {
4164		if_printf(ifp, "nrxq (%d) != kernel RSS buckets (%d);"
4165		    "performance will be impacted.\n", vi->nrxq, nbuckets);
4166	}
4167
4168	rss_getkey((void *)&raw_rss_key[0]);
4169	for (i = 0; i < nitems(rss_key); i++) {
4170		rss_key[i] = htobe32(raw_rss_key[nitems(rss_key) - 1 - i]);
4171	}
4172	t4_write_rss_key(sc, &rss_key[0], -1);
4173#endif
4174	rss = malloc(vi->rss_size * sizeof (*rss), M_CXGBE, M_ZERO | M_WAITOK);
4175	for (i = 0; i < vi->rss_size;) {
4176#ifdef RSS
4177		j = rss_get_indirection_to_bucket(i);
4178		j %= vi->nrxq;
4179		rxq = &sc->sge.rxq[vi->first_rxq + j];
4180		rss[i++] = rxq->iq.abs_id;
4181#else
4182		for_each_rxq(vi, j, rxq) {
4183			rss[i++] = rxq->iq.abs_id;
4184			if (i == vi->rss_size)
4185				break;
4186		}
4187#endif
4188	}
4189
4190	rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, rss,
4191	    vi->rss_size);
4192	if (rc != 0) {
4193		if_printf(ifp, "rss_config failed: %d\n", rc);
4194		goto done;
4195	}
4196
4197#ifdef RSS
4198	hashen = hashconfig_to_hashen(hashconfig);
4199
4200	/*
4201	 * We may have had to enable some hashes even though the global config
4202	 * wants them disabled.  This is a potential problem that must be
4203	 * reported to the user.
4204	 */
4205	extra = hashen_to_hashconfig(hashen) ^ hashconfig;
4206
4207	/*
4208	 * If we consider only the supported hash types, then the enabled hashes
4209	 * are a superset of the requested hashes.  In other words, there cannot
4210	 * be any supported hash that was requested but not enabled, but there
4211	 * can be hashes that were not requested but had to be enabled.
4212	 */
4213	extra &= SUPPORTED_RSS_HASHTYPES;
4214	MPASS((extra & hashconfig) == 0);
4215
4216	if (extra) {
4217		if_printf(ifp,
4218		    "global RSS config (0x%x) cannot be accommodated.\n",
4219		    hashconfig);
4220	}
4221	if (extra & RSS_HASHTYPE_RSS_IPV4)
4222		if_printf(ifp, "IPv4 2-tuple hashing forced on.\n");
4223	if (extra & RSS_HASHTYPE_RSS_TCP_IPV4)
4224		if_printf(ifp, "TCP/IPv4 4-tuple hashing forced on.\n");
4225	if (extra & RSS_HASHTYPE_RSS_IPV6)
4226		if_printf(ifp, "IPv6 2-tuple hashing forced on.\n");
4227	if (extra & RSS_HASHTYPE_RSS_TCP_IPV6)
4228		if_printf(ifp, "TCP/IPv6 4-tuple hashing forced on.\n");
4229	if (extra & RSS_HASHTYPE_RSS_UDP_IPV4)
4230		if_printf(ifp, "UDP/IPv4 4-tuple hashing forced on.\n");
4231	if (extra & RSS_HASHTYPE_RSS_UDP_IPV6)
4232		if_printf(ifp, "UDP/IPv6 4-tuple hashing forced on.\n");
4233#else
4234	hashen = F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN |
4235	    F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN |
4236	    F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
4237	    F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_UDPEN;
4238#endif
4239	rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, hashen, rss[0]);
4240	if (rc != 0) {
4241		if_printf(ifp, "rss hash/defaultq config failed: %d\n", rc);
4242		goto done;
4243	}
4244
4245	vi->rss = rss;
4246	vi->flags |= VI_INIT_DONE;
4247done:
4248	if (rc != 0)
4249		vi_full_uninit(vi);
4250
4251	return (rc);
4252}
4253
4254/*
4255 * Idempotent.
4256 */
4257int
4258vi_full_uninit(struct vi_info *vi)
4259{
4260	struct port_info *pi = vi->pi;
4261	struct adapter *sc = pi->adapter;
4262	int i;
4263	struct sge_rxq *rxq;
4264	struct sge_txq *txq;
4265#ifdef TCP_OFFLOAD
4266	struct sge_ofld_rxq *ofld_rxq;
4267	struct sge_wrq *ofld_txq;
4268#endif
4269
4270	if (vi->flags & VI_INIT_DONE) {
4271
4272		/* Need to quiesce queues.  */
4273
4274		/* XXX: Only for the first VI? */
4275		if (IS_MAIN_VI(vi) && !(sc->flags & IS_VF))
4276			quiesce_wrq(sc, &sc->sge.ctrlq[pi->port_id]);
4277
4278		for_each_txq(vi, i, txq) {
4279			quiesce_txq(sc, txq);
4280		}
4281
4282#ifdef TCP_OFFLOAD
4283		for_each_ofld_txq(vi, i, ofld_txq) {
4284			quiesce_wrq(sc, ofld_txq);
4285		}
4286#endif
4287
4288		for_each_rxq(vi, i, rxq) {
4289			quiesce_iq(sc, &rxq->iq);
4290			quiesce_fl(sc, &rxq->fl);
4291		}
4292
4293#ifdef TCP_OFFLOAD
4294		for_each_ofld_rxq(vi, i, ofld_rxq) {
4295			quiesce_iq(sc, &ofld_rxq->iq);
4296			quiesce_fl(sc, &ofld_rxq->fl);
4297		}
4298#endif
4299		free(vi->rss, M_CXGBE);
4300		free(vi->nm_rss, M_CXGBE);
4301	}
4302
4303	t4_teardown_vi_queues(vi);
4304	vi->flags &= ~VI_INIT_DONE;
4305
4306	return (0);
4307}
4308
4309static void
4310quiesce_txq(struct adapter *sc, struct sge_txq *txq)
4311{
4312	struct sge_eq *eq = &txq->eq;
4313	struct sge_qstat *spg = (void *)&eq->desc[eq->sidx];
4314
4315	(void) sc;	/* unused */
4316
4317#ifdef INVARIANTS
4318	TXQ_LOCK(txq);
4319	MPASS((eq->flags & EQ_ENABLED) == 0);
4320	TXQ_UNLOCK(txq);
4321#endif
4322
4323	/* Wait for the mp_ring to empty. */
4324	while (!mp_ring_is_idle(txq->r)) {
4325		mp_ring_check_drainage(txq->r, 0);
4326		pause("rquiesce", 1);
4327	}
4328
4329	/* Then wait for the hardware to finish. */
4330	while (spg->cidx != htobe16(eq->pidx))
4331		pause("equiesce", 1);
4332
4333	/* Finally, wait for the driver to reclaim all descriptors. */
4334	while (eq->cidx != eq->pidx)
4335		pause("dquiesce", 1);
4336}
4337
4338static void
4339quiesce_wrq(struct adapter *sc, struct sge_wrq *wrq)
4340{
4341
4342	/* XXXTX */
4343}
4344
4345static void
4346quiesce_iq(struct adapter *sc, struct sge_iq *iq)
4347{
4348	(void) sc;	/* unused */
4349
4350	/* Synchronize with the interrupt handler */
4351	while (!atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_DISABLED))
4352		pause("iqfree", 1);
4353}
4354
4355static void
4356quiesce_fl(struct adapter *sc, struct sge_fl *fl)
4357{
4358	mtx_lock(&sc->sfl_lock);
4359	FL_LOCK(fl);
4360	fl->flags |= FL_DOOMED;
4361	FL_UNLOCK(fl);
4362	callout_stop(&sc->sfl_callout);
4363	mtx_unlock(&sc->sfl_lock);
4364
4365	KASSERT((fl->flags & FL_STARVING) == 0,
4366	    ("%s: still starving", __func__));
4367}
4368
4369static int
4370t4_alloc_irq(struct adapter *sc, struct irq *irq, int rid,
4371    driver_intr_t *handler, void *arg, char *name)
4372{
4373	int rc;
4374
4375	irq->rid = rid;
4376	irq->res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &irq->rid,
4377	    RF_SHAREABLE | RF_ACTIVE);
4378	if (irq->res == NULL) {
4379		device_printf(sc->dev,
4380		    "failed to allocate IRQ for rid %d, name %s.\n", rid, name);
4381		return (ENOMEM);
4382	}
4383
4384	rc = bus_setup_intr(sc->dev, irq->res, INTR_MPSAFE | INTR_TYPE_NET,
4385	    NULL, handler, arg, &irq->tag);
4386	if (rc != 0) {
4387		device_printf(sc->dev,
4388		    "failed to setup interrupt for rid %d, name %s: %d\n",
4389		    rid, name, rc);
4390	} else if (name)
4391		bus_describe_intr(sc->dev, irq->res, irq->tag, name);
4392
4393	return (rc);
4394}
4395
4396static int
4397t4_free_irq(struct adapter *sc, struct irq *irq)
4398{
4399	if (irq->tag)
4400		bus_teardown_intr(sc->dev, irq->res, irq->tag);
4401	if (irq->res)
4402		bus_release_resource(sc->dev, SYS_RES_IRQ, irq->rid, irq->res);
4403
4404	bzero(irq, sizeof(*irq));
4405
4406	return (0);
4407}
4408
4409static void
4410get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf)
4411{
4412
4413	regs->version = chip_id(sc) | chip_rev(sc) << 10;
4414	t4_get_regs(sc, buf, regs->len);
4415}
4416
4417#define	A_PL_INDIR_CMD	0x1f8
4418
4419#define	S_PL_AUTOINC	31
4420#define	M_PL_AUTOINC	0x1U
4421#define	V_PL_AUTOINC(x)	((x) << S_PL_AUTOINC)
4422#define	G_PL_AUTOINC(x)	(((x) >> S_PL_AUTOINC) & M_PL_AUTOINC)
4423
4424#define	S_PL_VFID	20
4425#define	M_PL_VFID	0xffU
4426#define	V_PL_VFID(x)	((x) << S_PL_VFID)
4427#define	G_PL_VFID(x)	(((x) >> S_PL_VFID) & M_PL_VFID)
4428
4429#define	S_PL_ADDR	0
4430#define	M_PL_ADDR	0xfffffU
4431#define	V_PL_ADDR(x)	((x) << S_PL_ADDR)
4432#define	G_PL_ADDR(x)	(((x) >> S_PL_ADDR) & M_PL_ADDR)
4433
4434#define	A_PL_INDIR_DATA	0x1fc
4435
4436static uint64_t
4437read_vf_stat(struct adapter *sc, unsigned int viid, int reg)
4438{
4439	u32 stats[2];
4440
4441	mtx_assert(&sc->reg_lock, MA_OWNED);
4442	if (sc->flags & IS_VF) {
4443		stats[0] = t4_read_reg(sc, VF_MPS_REG(reg));
4444		stats[1] = t4_read_reg(sc, VF_MPS_REG(reg + 4));
4445	} else {
4446		t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) |
4447		    V_PL_VFID(G_FW_VIID_VIN(viid)) |
4448		    V_PL_ADDR(VF_MPS_REG(reg)));
4449		stats[0] = t4_read_reg(sc, A_PL_INDIR_DATA);
4450		stats[1] = t4_read_reg(sc, A_PL_INDIR_DATA);
4451	}
4452	return (((uint64_t)stats[1]) << 32 | stats[0]);
4453}
4454
4455static void
4456t4_get_vi_stats(struct adapter *sc, unsigned int viid,
4457    struct fw_vi_stats_vf *stats)
4458{
4459
4460#define GET_STAT(name) \
4461	read_vf_stat(sc, viid, A_MPS_VF_STAT_##name##_L)
4462
4463	stats->tx_bcast_bytes    = GET_STAT(TX_VF_BCAST_BYTES);
4464	stats->tx_bcast_frames   = GET_STAT(TX_VF_BCAST_FRAMES);
4465	stats->tx_mcast_bytes    = GET_STAT(TX_VF_MCAST_BYTES);
4466	stats->tx_mcast_frames   = GET_STAT(TX_VF_MCAST_FRAMES);
4467	stats->tx_ucast_bytes    = GET_STAT(TX_VF_UCAST_BYTES);
4468	stats->tx_ucast_frames   = GET_STAT(TX_VF_UCAST_FRAMES);
4469	stats->tx_drop_frames    = GET_STAT(TX_VF_DROP_FRAMES);
4470	stats->tx_offload_bytes  = GET_STAT(TX_VF_OFFLOAD_BYTES);
4471	stats->tx_offload_frames = GET_STAT(TX_VF_OFFLOAD_FRAMES);
4472	stats->rx_bcast_bytes    = GET_STAT(RX_VF_BCAST_BYTES);
4473	stats->rx_bcast_frames   = GET_STAT(RX_VF_BCAST_FRAMES);
4474	stats->rx_mcast_bytes    = GET_STAT(RX_VF_MCAST_BYTES);
4475	stats->rx_mcast_frames   = GET_STAT(RX_VF_MCAST_FRAMES);
4476	stats->rx_ucast_bytes    = GET_STAT(RX_VF_UCAST_BYTES);
4477	stats->rx_ucast_frames   = GET_STAT(RX_VF_UCAST_FRAMES);
4478	stats->rx_err_frames     = GET_STAT(RX_VF_ERR_FRAMES);
4479
4480#undef GET_STAT
4481}
4482
4483static void
4484t4_clr_vi_stats(struct adapter *sc, unsigned int viid)
4485{
4486	int reg;
4487
4488	t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) |
4489	    V_PL_VFID(G_FW_VIID_VIN(viid)) |
4490	    V_PL_ADDR(VF_MPS_REG(A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L)));
4491	for (reg = A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L;
4492	     reg <= A_MPS_VF_STAT_RX_VF_ERR_FRAMES_H; reg += 4)
4493		t4_write_reg(sc, A_PL_INDIR_DATA, 0);
4494}
4495
4496static void
4497vi_refresh_stats(struct adapter *sc, struct vi_info *vi)
4498{
4499	struct timeval tv;
4500	const struct timeval interval = {0, 250000};	/* 250ms */
4501
4502	if (!(vi->flags & VI_INIT_DONE))
4503		return;
4504
4505	getmicrotime(&tv);
4506	timevalsub(&tv, &interval);
4507	if (timevalcmp(&tv, &vi->last_refreshed, <))
4508		return;
4509
4510	mtx_lock(&sc->reg_lock);
4511	t4_get_vi_stats(sc, vi->viid, &vi->stats);
4512	getmicrotime(&vi->last_refreshed);
4513	mtx_unlock(&sc->reg_lock);
4514}
4515
4516static void
4517cxgbe_refresh_stats(struct adapter *sc, struct port_info *pi)
4518{
4519	int i;
4520	u_int v, tnl_cong_drops;
4521	struct timeval tv;
4522	const struct timeval interval = {0, 250000};	/* 250ms */
4523
4524	getmicrotime(&tv);
4525	timevalsub(&tv, &interval);
4526	if (timevalcmp(&tv, &pi->last_refreshed, <))
4527		return;
4528
4529	tnl_cong_drops = 0;
4530	t4_get_port_stats(sc, pi->tx_chan, &pi->stats);
4531	for (i = 0; i < sc->chip_params->nchan; i++) {
4532		if (pi->rx_chan_map & (1 << i)) {
4533			mtx_lock(&sc->reg_lock);
4534			t4_read_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v,
4535			    1, A_TP_MIB_TNL_CNG_DROP_0 + i);
4536			mtx_unlock(&sc->reg_lock);
4537			tnl_cong_drops += v;
4538		}
4539	}
4540	pi->tnl_cong_drops = tnl_cong_drops;
4541	getmicrotime(&pi->last_refreshed);
4542}
4543
4544static void
4545cxgbe_tick(void *arg)
4546{
4547	struct port_info *pi = arg;
4548	struct adapter *sc = pi->adapter;
4549
4550	PORT_LOCK_ASSERT_OWNED(pi);
4551	cxgbe_refresh_stats(sc, pi);
4552
4553	callout_schedule(&pi->tick, hz);
4554}
4555
4556void
4557vi_tick(void *arg)
4558{
4559	struct vi_info *vi = arg;
4560	struct adapter *sc = vi->pi->adapter;
4561
4562	vi_refresh_stats(sc, vi);
4563
4564	callout_schedule(&vi->tick, hz);
4565}
4566
4567static void
4568cxgbe_vlan_config(void *arg, struct ifnet *ifp, uint16_t vid)
4569{
4570	struct ifnet *vlan;
4571
4572	if (arg != ifp || ifp->if_type != IFT_ETHER)
4573		return;
4574
4575	vlan = VLAN_DEVAT(ifp, vid);
4576	VLAN_SETCOOKIE(vlan, ifp);
4577}
4578
4579/*
4580 * Should match fw_caps_config_<foo> enums in t4fw_interface.h
4581 */
4582static char *caps_decoder[] = {
4583	"\20\001IPMI\002NCSI",				/* 0: NBM */
4584	"\20\001PPP\002QFC\003DCBX",			/* 1: link */
4585	"\20\001INGRESS\002EGRESS",			/* 2: switch */
4586	"\20\001NIC\002VM\003IDS\004UM\005UM_ISGL"	/* 3: NIC */
4587	    "\006HASHFILTER\007ETHOFLD",
4588	"\20\001TOE",					/* 4: TOE */
4589	"\20\001RDDP\002RDMAC",				/* 5: RDMA */
4590	"\20\001INITIATOR_PDU\002TARGET_PDU"		/* 6: iSCSI */
4591	    "\003INITIATOR_CNXOFLD\004TARGET_CNXOFLD"
4592	    "\005INITIATOR_SSNOFLD\006TARGET_SSNOFLD"
4593	    "\007T10DIF"
4594	    "\010INITIATOR_CMDOFLD\011TARGET_CMDOFLD",
4595	"\20\00KEYS",					/* 7: TLS */
4596	"\20\001INITIATOR\002TARGET\003CTRL_OFLD"	/* 8: FCoE */
4597		    "\004PO_INITIATOR\005PO_TARGET",
4598};
4599
4600void
4601t4_sysctls(struct adapter *sc)
4602{
4603	struct sysctl_ctx_list *ctx;
4604	struct sysctl_oid *oid;
4605	struct sysctl_oid_list *children, *c0;
4606	static char *doorbells = {"\20\1UDB\2WCWR\3UDBWC\4KDB"};
4607
4608	ctx = device_get_sysctl_ctx(sc->dev);
4609
4610	/*
4611	 * dev.t4nex.X.
4612	 */
4613	oid = device_get_sysctl_tree(sc->dev);
4614	c0 = children = SYSCTL_CHILDREN(oid);
4615
4616	sc->sc_do_rxcopy = 1;
4617	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "do_rx_copy", CTLFLAG_RW,
4618	    &sc->sc_do_rxcopy, 1, "Do RX copy of small frames");
4619
4620	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nports", CTLFLAG_RD, NULL,
4621	    sc->params.nports, "# of ports");
4622
4623	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "doorbells",
4624	    CTLTYPE_STRING | CTLFLAG_RD, doorbells, sc->doorbells,
4625	    sysctl_bitfield, "A", "available doorbells");
4626
4627	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD, NULL,
4628	    sc->params.vpd.cclk, "core clock frequency (in KHz)");
4629
4630	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_timers",
4631	    CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.timer_val,
4632	    sizeof(sc->params.sge.timer_val), sysctl_int_array, "A",
4633	    "interrupt holdoff timer values (us)");
4634
4635	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pkt_counts",
4636	    CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.counter_val,
4637	    sizeof(sc->params.sge.counter_val), sysctl_int_array, "A",
4638	    "interrupt holdoff packet counter values");
4639
4640	t4_sge_sysctls(sc, ctx, children);
4641
4642	sc->lro_timeout = 100;
4643	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lro_timeout", CTLFLAG_RW,
4644	    &sc->lro_timeout, 0, "lro inactive-flush timeout (in us)");
4645
4646	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "debug_flags", CTLFLAG_RW,
4647	    &sc->debug_flags, 0, "flags to enable runtime debugging");
4648
4649	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "tp_version",
4650	    CTLFLAG_RD, sc->tp_version, 0, "TP microcode version");
4651
4652	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version",
4653	    CTLFLAG_RD, sc->fw_version, 0, "firmware version");
4654
4655	if (sc->flags & IS_VF)
4656		return;
4657
4658	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD,
4659	    NULL, chip_rev(sc), "chip hardware revision");
4660
4661	if (sc->params.exprom_vers != 0) {
4662		SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "exprom_version",
4663		    CTLFLAG_RD, sc->exprom_version, 0, "expansion ROM version");
4664	}
4665
4666	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "cf",
4667	    CTLFLAG_RD, sc->cfg_file, 0, "configuration file");
4668
4669	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cfcsum", CTLFLAG_RD, NULL,
4670	    sc->cfcsum, "config file checksum");
4671
4672#define SYSCTL_CAP(name, n, text) \
4673	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, #name, \
4674	    CTLTYPE_STRING | CTLFLAG_RD, caps_decoder[n], sc->name, \
4675	    sysctl_bitfield, "A", "available " text "capabilities")
4676
4677	SYSCTL_CAP(nbmcaps, 0, "NBM");
4678	SYSCTL_CAP(linkcaps, 1, "link");
4679	SYSCTL_CAP(switchcaps, 2, "switch");
4680	SYSCTL_CAP(niccaps, 3, "NIC");
4681	SYSCTL_CAP(toecaps, 4, "TCP offload");
4682	SYSCTL_CAP(rdmacaps, 5, "RDMA");
4683	SYSCTL_CAP(iscsicaps, 6, "iSCSI");
4684	SYSCTL_CAP(tlscaps, 7, "TLS");
4685	SYSCTL_CAP(fcoecaps, 8, "FCoE");
4686#undef SYSCTL_CAP
4687
4688	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nfilters", CTLFLAG_RD,
4689	    NULL, sc->tids.nftids, "number of filters");
4690
4691	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", CTLTYPE_INT |
4692	    CTLFLAG_RD, sc, 0, sysctl_temperature, "I",
4693	    "chip temperature (in Celsius)");
4694
4695#ifdef SBUF_DRAIN
4696	/*
4697	 * dev.t4nex.X.misc.  Marked CTLFLAG_SKIP to avoid information overload.
4698	 */
4699	oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "misc",
4700	    CTLFLAG_RD | CTLFLAG_SKIP, NULL,
4701	    "logs and miscellaneous information");
4702	children = SYSCTL_CHILDREN(oid);
4703
4704	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cctrl",
4705	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4706	    sysctl_cctrl, "A", "congestion control");
4707
4708	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp0",
4709	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4710	    sysctl_cim_ibq_obq, "A", "CIM IBQ 0 (TP0)");
4711
4712	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp1",
4713	    CTLTYPE_STRING | CTLFLAG_RD, sc, 1,
4714	    sysctl_cim_ibq_obq, "A", "CIM IBQ 1 (TP1)");
4715
4716	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ulp",
4717	    CTLTYPE_STRING | CTLFLAG_RD, sc, 2,
4718	    sysctl_cim_ibq_obq, "A", "CIM IBQ 2 (ULP)");
4719
4720	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge0",
4721	    CTLTYPE_STRING | CTLFLAG_RD, sc, 3,
4722	    sysctl_cim_ibq_obq, "A", "CIM IBQ 3 (SGE0)");
4723
4724	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge1",
4725	    CTLTYPE_STRING | CTLFLAG_RD, sc, 4,
4726	    sysctl_cim_ibq_obq, "A", "CIM IBQ 4 (SGE1)");
4727
4728	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ncsi",
4729	    CTLTYPE_STRING | CTLFLAG_RD, sc, 5,
4730	    sysctl_cim_ibq_obq, "A", "CIM IBQ 5 (NCSI)");
4731
4732	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_la",
4733	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4734	    chip_id(sc) <= CHELSIO_T5 ? sysctl_cim_la : sysctl_cim_la_t6,
4735	    "A", "CIM logic analyzer");
4736
4737	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ma_la",
4738	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4739	    sysctl_cim_ma_la, "A", "CIM MA logic analyzer");
4740
4741	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp0",
4742	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0 + CIM_NUM_IBQ,
4743	    sysctl_cim_ibq_obq, "A", "CIM OBQ 0 (ULP0)");
4744
4745	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp1",
4746	    CTLTYPE_STRING | CTLFLAG_RD, sc, 1 + CIM_NUM_IBQ,
4747	    sysctl_cim_ibq_obq, "A", "CIM OBQ 1 (ULP1)");
4748
4749	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp2",
4750	    CTLTYPE_STRING | CTLFLAG_RD, sc, 2 + CIM_NUM_IBQ,
4751	    sysctl_cim_ibq_obq, "A", "CIM OBQ 2 (ULP2)");
4752
4753	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp3",
4754	    CTLTYPE_STRING | CTLFLAG_RD, sc, 3 + CIM_NUM_IBQ,
4755	    sysctl_cim_ibq_obq, "A", "CIM OBQ 3 (ULP3)");
4756
4757	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge",
4758	    CTLTYPE_STRING | CTLFLAG_RD, sc, 4 + CIM_NUM_IBQ,
4759	    sysctl_cim_ibq_obq, "A", "CIM OBQ 4 (SGE)");
4760
4761	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ncsi",
4762	    CTLTYPE_STRING | CTLFLAG_RD, sc, 5 + CIM_NUM_IBQ,
4763	    sysctl_cim_ibq_obq, "A", "CIM OBQ 5 (NCSI)");
4764
4765	if (chip_id(sc) > CHELSIO_T4) {
4766		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge0_rx",
4767		    CTLTYPE_STRING | CTLFLAG_RD, sc, 6 + CIM_NUM_IBQ,
4768		    sysctl_cim_ibq_obq, "A", "CIM OBQ 6 (SGE0-RX)");
4769
4770		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge1_rx",
4771		    CTLTYPE_STRING | CTLFLAG_RD, sc, 7 + CIM_NUM_IBQ,
4772		    sysctl_cim_ibq_obq, "A", "CIM OBQ 7 (SGE1-RX)");
4773	}
4774
4775	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_pif_la",
4776	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4777	    sysctl_cim_pif_la, "A", "CIM PIF logic analyzer");
4778
4779	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_qcfg",
4780	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4781	    sysctl_cim_qcfg, "A", "CIM queue configuration");
4782
4783	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cpl_stats",
4784	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4785	    sysctl_cpl_stats, "A", "CPL statistics");
4786
4787	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ddp_stats",
4788	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4789	    sysctl_ddp_stats, "A", "non-TCP DDP statistics");
4790
4791	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "devlog",
4792	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4793	    sysctl_devlog, "A", "firmware's device log");
4794
4795	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoe_stats",
4796	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4797	    sysctl_fcoe_stats, "A", "FCoE statistics");
4798
4799	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "hw_sched",
4800	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4801	    sysctl_hw_sched, "A", "hardware scheduler ");
4802
4803	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "l2t",
4804	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4805	    sysctl_l2t, "A", "hardware L2 table");
4806
4807	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "lb_stats",
4808	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4809	    sysctl_lb_stats, "A", "loopback statistics");
4810
4811	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "meminfo",
4812	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4813	    sysctl_meminfo, "A", "memory regions");
4814
4815	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "mps_tcam",
4816	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4817	    chip_id(sc) <= CHELSIO_T5 ? sysctl_mps_tcam : sysctl_mps_tcam_t6,
4818	    "A", "MPS TCAM entries");
4819
4820	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "path_mtus",
4821	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4822	    sysctl_path_mtus, "A", "path MTUs");
4823
4824	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pm_stats",
4825	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4826	    sysctl_pm_stats, "A", "PM statistics");
4827
4828	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_stats",
4829	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4830	    sysctl_rdma_stats, "A", "RDMA statistics");
4831
4832	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tcp_stats",
4833	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4834	    sysctl_tcp_stats, "A", "TCP statistics");
4835
4836	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tids",
4837	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4838	    sysctl_tids, "A", "TID information");
4839
4840	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_err_stats",
4841	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4842	    sysctl_tp_err_stats, "A", "TP error statistics");
4843
4844	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la_mask",
4845	    CTLTYPE_INT | CTLFLAG_RW, sc, 0, sysctl_tp_la_mask, "I",
4846	    "TP logic analyzer event capture mask");
4847
4848	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la",
4849	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4850	    sysctl_tp_la, "A", "TP logic analyzer");
4851
4852	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_rate",
4853	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4854	    sysctl_tx_rate, "A", "Tx rate");
4855
4856	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ulprx_la",
4857	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4858	    sysctl_ulprx_la, "A", "ULPRX logic analyzer");
4859
4860	if (is_t5(sc)) {
4861		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "wcwr_stats",
4862		    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4863		    sysctl_wcwr_stats, "A", "write combined work requests");
4864	}
4865#endif
4866
4867#ifdef TCP_OFFLOAD
4868	if (is_offload(sc)) {
4869		/*
4870		 * dev.t4nex.X.toe.
4871		 */
4872		oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "toe", CTLFLAG_RD,
4873		    NULL, "TOE parameters");
4874		children = SYSCTL_CHILDREN(oid);
4875
4876		sc->tt.sndbuf = 256 * 1024;
4877		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sndbuf", CTLFLAG_RW,
4878		    &sc->tt.sndbuf, 0, "max hardware send buffer size");
4879
4880		sc->tt.ddp = 0;
4881		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp", CTLFLAG_RW,
4882		    &sc->tt.ddp, 0, "DDP allowed");
4883
4884		sc->tt.rx_coalesce = 1;
4885		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_coalesce",
4886		    CTLFLAG_RW, &sc->tt.rx_coalesce, 0, "receive coalescing");
4887
4888		sc->tt.tx_align = 1;
4889		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_align",
4890		    CTLFLAG_RW, &sc->tt.tx_align, 0, "chop and align payload");
4891
4892		sc->tt.tx_zcopy = 0;
4893		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_zcopy",
4894		    CTLFLAG_RW, &sc->tt.tx_zcopy, 0,
4895		    "Enable zero-copy aio_write(2)");
4896
4897		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timer_tick",
4898		    CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_tick, "A",
4899		    "TP timer tick (us)");
4900
4901		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timestamp_tick",
4902		    CTLTYPE_STRING | CTLFLAG_RD, sc, 1, sysctl_tp_tick, "A",
4903		    "TCP timestamp tick (us)");
4904
4905		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_tick",
4906		    CTLTYPE_STRING | CTLFLAG_RD, sc, 2, sysctl_tp_tick, "A",
4907		    "DACK tick (us)");
4908
4909		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_timer",
4910		    CTLTYPE_UINT | CTLFLAG_RD, sc, 0, sysctl_tp_dack_timer,
4911		    "IU", "DACK timer (us)");
4912
4913		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_min",
4914		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MIN,
4915		    sysctl_tp_timer, "LU", "Retransmit min (us)");
4916
4917		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_max",
4918		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MAX,
4919		    sysctl_tp_timer, "LU", "Retransmit max (us)");
4920
4921		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_min",
4922		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MIN,
4923		    sysctl_tp_timer, "LU", "Persist timer min (us)");
4924
4925		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_max",
4926		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MAX,
4927		    sysctl_tp_timer, "LU", "Persist timer max (us)");
4928
4929		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_idle",
4930		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_IDLE,
4931		    sysctl_tp_timer, "LU", "Keepidle idle timer (us)");
4932
4933		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_intvl",
4934		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_INTVL,
4935		    sysctl_tp_timer, "LU", "Keepidle interval (us)");
4936
4937		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "initial_srtt",
4938		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_INIT_SRTT,
4939		    sysctl_tp_timer, "LU", "Initial SRTT (us)");
4940
4941		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "finwait2_timer",
4942		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_FINWAIT2_TIMER,
4943		    sysctl_tp_timer, "LU", "FINWAIT2 timer (us)");
4944	}
4945#endif
4946}
4947
4948void
4949vi_sysctls(struct vi_info *vi)
4950{
4951	struct sysctl_ctx_list *ctx;
4952	struct sysctl_oid *oid;
4953	struct sysctl_oid_list *children;
4954
4955	ctx = device_get_sysctl_ctx(vi->dev);
4956
4957	/*
4958	 * dev.v?(cxgbe|cxl).X.
4959	 */
4960	oid = device_get_sysctl_tree(vi->dev);
4961	children = SYSCTL_CHILDREN(oid);
4962
4963	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "viid", CTLFLAG_RD, NULL,
4964	    vi->viid, "VI identifer");
4965	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nrxq", CTLFLAG_RD,
4966	    &vi->nrxq, 0, "# of rx queues");
4967	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ntxq", CTLFLAG_RD,
4968	    &vi->ntxq, 0, "# of tx queues");
4969	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_rxq", CTLFLAG_RD,
4970	    &vi->first_rxq, 0, "index of first rx queue");
4971	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_txq", CTLFLAG_RD,
4972	    &vi->first_txq, 0, "index of first tx queue");
4973
4974	if (IS_MAIN_VI(vi)) {
4975		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rsrv_noflowq",
4976		    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_noflowq, "IU",
4977		    "Reserve queue 0 for non-flowid packets");
4978	}
4979
4980#ifdef TCP_OFFLOAD
4981	if (vi->nofldrxq != 0) {
4982		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD,
4983		    &vi->nofldrxq, 0,
4984		    "# of rx queues for offloaded TCP connections");
4985		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldtxq", CTLFLAG_RD,
4986		    &vi->nofldtxq, 0,
4987		    "# of tx queues for offloaded TCP connections");
4988		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_rxq",
4989		    CTLFLAG_RD, &vi->first_ofld_rxq, 0,
4990		    "index of first TOE rx queue");
4991		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_txq",
4992		    CTLFLAG_RD, &vi->first_ofld_txq, 0,
4993		    "index of first TOE tx queue");
4994	}
4995#endif
4996#ifdef DEV_NETMAP
4997	if (vi->nnmrxq != 0) {
4998		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmrxq", CTLFLAG_RD,
4999		    &vi->nnmrxq, 0, "# of netmap rx queues");
5000		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmtxq", CTLFLAG_RD,
5001		    &vi->nnmtxq, 0, "# of netmap tx queues");
5002		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_rxq",
5003		    CTLFLAG_RD, &vi->first_nm_rxq, 0,
5004		    "index of first netmap rx queue");
5005		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_txq",
5006		    CTLFLAG_RD, &vi->first_nm_txq, 0,
5007		    "index of first netmap tx queue");
5008	}
5009#endif
5010
5011	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx",
5012	    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_tmr_idx, "I",
5013	    "holdoff timer index");
5014	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx",
5015	    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_pktc_idx, "I",
5016	    "holdoff packet counter index");
5017
5018	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_rxq",
5019	    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_rxq, "I",
5020	    "rx queue size");
5021	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_txq",
5022	    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_txq, "I",
5023	    "tx queue size");
5024}
5025
5026static void
5027cxgbe_sysctls(struct port_info *pi)
5028{
5029	struct sysctl_ctx_list *ctx;
5030	struct sysctl_oid *oid;
5031	struct sysctl_oid_list *children, *children2;
5032	struct adapter *sc = pi->adapter;
5033	int i;
5034	char name[16];
5035
5036	ctx = device_get_sysctl_ctx(pi->dev);
5037
5038	/*
5039	 * dev.cxgbe.X.
5040	 */
5041	oid = device_get_sysctl_tree(pi->dev);
5042	children = SYSCTL_CHILDREN(oid);
5043
5044	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkdnrc", CTLTYPE_STRING |
5045	   CTLFLAG_RD, pi, 0, sysctl_linkdnrc, "A", "reason why link is down");
5046	if (pi->port_type == FW_PORT_TYPE_BT_XAUI) {
5047		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature",
5048		    CTLTYPE_INT | CTLFLAG_RD, pi, 0, sysctl_btphy, "I",
5049		    "PHY temperature (in Celsius)");
5050		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fw_version",
5051		    CTLTYPE_INT | CTLFLAG_RD, pi, 1, sysctl_btphy, "I",
5052		    "PHY firmware version");
5053	}
5054
5055	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pause_settings",
5056	    CTLTYPE_STRING | CTLFLAG_RW, pi, PAUSE_TX, sysctl_pause_settings,
5057	    "A", "PAUSE settings (bit 0 = rx_pause, bit 1 = tx_pause)");
5058
5059	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "max_speed", CTLFLAG_RD, NULL,
5060	    port_top_speed(pi), "max speed (in Gbps)");
5061
5062	if (sc->flags & IS_VF)
5063		return;
5064
5065	/*
5066	 * dev.(cxgbe|cxl).X.tc.
5067	 */
5068	oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "tc", CTLFLAG_RD, NULL,
5069	    "Tx scheduler traffic classes");
5070	for (i = 0; i < sc->chip_params->nsched_cls; i++) {
5071		struct tx_sched_class *tc = &pi->tc[i];
5072
5073		snprintf(name, sizeof(name), "%d", i);
5074		children2 = SYSCTL_CHILDREN(SYSCTL_ADD_NODE(ctx,
5075		    SYSCTL_CHILDREN(oid), OID_AUTO, name, CTLFLAG_RD, NULL,
5076		    "traffic class"));
5077		SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "flags", CTLFLAG_RD,
5078		    &tc->flags, 0, "flags");
5079		SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "refcount",
5080		    CTLFLAG_RD, &tc->refcount, 0, "references to this class");
5081#ifdef SBUF_DRAIN
5082		SYSCTL_ADD_PROC(ctx, children2, OID_AUTO, "params",
5083		    CTLTYPE_STRING | CTLFLAG_RD, sc, (pi->port_id << 16) | i,
5084		    sysctl_tc_params, "A", "traffic class parameters");
5085#endif
5086	}
5087
5088	/*
5089	 * dev.cxgbe.X.stats.
5090	 */
5091	oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "stats", CTLFLAG_RD,
5092	    NULL, "port statistics");
5093	children = SYSCTL_CHILDREN(oid);
5094	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_parse_error", CTLFLAG_RD,
5095	    &pi->tx_parse_error, 0,
5096	    "# of tx packets with invalid length or # of segments");
5097
5098#define SYSCTL_ADD_T4_REG64(pi, name, desc, reg) \
5099	SYSCTL_ADD_OID(ctx, children, OID_AUTO, name, \
5100	    CTLTYPE_U64 | CTLFLAG_RD, sc, reg, \
5101	    sysctl_handle_t4_reg64, "QU", desc)
5102
5103	SYSCTL_ADD_T4_REG64(pi, "tx_octets", "# of octets in good frames",
5104	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BYTES_L));
5105	SYSCTL_ADD_T4_REG64(pi, "tx_frames", "total # of good frames",
5106	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_FRAMES_L));
5107	SYSCTL_ADD_T4_REG64(pi, "tx_bcast_frames", "# of broadcast frames",
5108	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BCAST_L));
5109	SYSCTL_ADD_T4_REG64(pi, "tx_mcast_frames", "# of multicast frames",
5110	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_MCAST_L));
5111	SYSCTL_ADD_T4_REG64(pi, "tx_ucast_frames", "# of unicast frames",
5112	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_UCAST_L));
5113	SYSCTL_ADD_T4_REG64(pi, "tx_error_frames", "# of error frames",
5114	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_ERROR_L));
5115	SYSCTL_ADD_T4_REG64(pi, "tx_frames_64",
5116	    "# of tx frames in this range",
5117	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_64B_L));
5118	SYSCTL_ADD_T4_REG64(pi, "tx_frames_65_127",
5119	    "# of tx frames in this range",
5120	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_65B_127B_L));
5121	SYSCTL_ADD_T4_REG64(pi, "tx_frames_128_255",
5122	    "# of tx frames in this range",
5123	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_128B_255B_L));
5124	SYSCTL_ADD_T4_REG64(pi, "tx_frames_256_511",
5125	    "# of tx frames in this range",
5126	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_256B_511B_L));
5127	SYSCTL_ADD_T4_REG64(pi, "tx_frames_512_1023",
5128	    "# of tx frames in this range",
5129	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_512B_1023B_L));
5130	SYSCTL_ADD_T4_REG64(pi, "tx_frames_1024_1518",
5131	    "# of tx frames in this range",
5132	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1024B_1518B_L));
5133	SYSCTL_ADD_T4_REG64(pi, "tx_frames_1519_max",
5134	    "# of tx frames in this range",
5135	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1519B_MAX_L));
5136	SYSCTL_ADD_T4_REG64(pi, "tx_drop", "# of dropped tx frames",
5137	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_DROP_L));
5138	SYSCTL_ADD_T4_REG64(pi, "tx_pause", "# of pause frames transmitted",
5139	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PAUSE_L));
5140	SYSCTL_ADD_T4_REG64(pi, "tx_ppp0", "# of PPP prio 0 frames transmitted",
5141	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP0_L));
5142	SYSCTL_ADD_T4_REG64(pi, "tx_ppp1", "# of PPP prio 1 frames transmitted",
5143	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP1_L));
5144	SYSCTL_ADD_T4_REG64(pi, "tx_ppp2", "# of PPP prio 2 frames transmitted",
5145	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP2_L));
5146	SYSCTL_ADD_T4_REG64(pi, "tx_ppp3", "# of PPP prio 3 frames transmitted",
5147	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP3_L));
5148	SYSCTL_ADD_T4_REG64(pi, "tx_ppp4", "# of PPP prio 4 frames transmitted",
5149	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP4_L));
5150	SYSCTL_ADD_T4_REG64(pi, "tx_ppp5", "# of PPP prio 5 frames transmitted",
5151	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP5_L));
5152	SYSCTL_ADD_T4_REG64(pi, "tx_ppp6", "# of PPP prio 6 frames transmitted",
5153	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP6_L));
5154	SYSCTL_ADD_T4_REG64(pi, "tx_ppp7", "# of PPP prio 7 frames transmitted",
5155	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP7_L));
5156
5157	SYSCTL_ADD_T4_REG64(pi, "rx_octets", "# of octets in good frames",
5158	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BYTES_L));
5159	SYSCTL_ADD_T4_REG64(pi, "rx_frames", "total # of good frames",
5160	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_FRAMES_L));
5161	SYSCTL_ADD_T4_REG64(pi, "rx_bcast_frames", "# of broadcast frames",
5162	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BCAST_L));
5163	SYSCTL_ADD_T4_REG64(pi, "rx_mcast_frames", "# of multicast frames",
5164	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MCAST_L));
5165	SYSCTL_ADD_T4_REG64(pi, "rx_ucast_frames", "# of unicast frames",
5166	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_UCAST_L));
5167	SYSCTL_ADD_T4_REG64(pi, "rx_too_long", "# of frames exceeding MTU",
5168	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_ERROR_L));
5169	SYSCTL_ADD_T4_REG64(pi, "rx_jabber", "# of jabber frames",
5170	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_CRC_ERROR_L));
5171	SYSCTL_ADD_T4_REG64(pi, "rx_fcs_err",
5172	    "# of frames received with bad FCS",
5173	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_CRC_ERROR_L));
5174	SYSCTL_ADD_T4_REG64(pi, "rx_len_err",
5175	    "# of frames received with length error",
5176	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LEN_ERROR_L));
5177	SYSCTL_ADD_T4_REG64(pi, "rx_symbol_err", "symbol errors",
5178	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_SYM_ERROR_L));
5179	SYSCTL_ADD_T4_REG64(pi, "rx_runt", "# of short frames received",
5180	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LESS_64B_L));
5181	SYSCTL_ADD_T4_REG64(pi, "rx_frames_64",
5182	    "# of rx frames in this range",
5183	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_64B_L));
5184	SYSCTL_ADD_T4_REG64(pi, "rx_frames_65_127",
5185	    "# of rx frames in this range",
5186	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_65B_127B_L));
5187	SYSCTL_ADD_T4_REG64(pi, "rx_frames_128_255",
5188	    "# of rx frames in this range",
5189	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_128B_255B_L));
5190	SYSCTL_ADD_T4_REG64(pi, "rx_frames_256_511",
5191	    "# of rx frames in this range",
5192	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_256B_511B_L));
5193	SYSCTL_ADD_T4_REG64(pi, "rx_frames_512_1023",
5194	    "# of rx frames in this range",
5195	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_512B_1023B_L));
5196	SYSCTL_ADD_T4_REG64(pi, "rx_frames_1024_1518",
5197	    "# of rx frames in this range",
5198	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1024B_1518B_L));
5199	SYSCTL_ADD_T4_REG64(pi, "rx_frames_1519_max",
5200	    "# of rx frames in this range",
5201	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1519B_MAX_L));
5202	SYSCTL_ADD_T4_REG64(pi, "rx_pause", "# of pause frames received",
5203	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PAUSE_L));
5204	SYSCTL_ADD_T4_REG64(pi, "rx_ppp0", "# of PPP prio 0 frames received",
5205	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP0_L));
5206	SYSCTL_ADD_T4_REG64(pi, "rx_ppp1", "# of PPP prio 1 frames received",
5207	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP1_L));
5208	SYSCTL_ADD_T4_REG64(pi, "rx_ppp2", "# of PPP prio 2 frames received",
5209	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP2_L));
5210	SYSCTL_ADD_T4_REG64(pi, "rx_ppp3", "# of PPP prio 3 frames received",
5211	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP3_L));
5212	SYSCTL_ADD_T4_REG64(pi, "rx_ppp4", "# of PPP prio 4 frames received",
5213	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP4_L));
5214	SYSCTL_ADD_T4_REG64(pi, "rx_ppp5", "# of PPP prio 5 frames received",
5215	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP5_L));
5216	SYSCTL_ADD_T4_REG64(pi, "rx_ppp6", "# of PPP prio 6 frames received",
5217	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP6_L));
5218	SYSCTL_ADD_T4_REG64(pi, "rx_ppp7", "# of PPP prio 7 frames received",
5219	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP7_L));
5220
5221#undef SYSCTL_ADD_T4_REG64
5222
5223#define SYSCTL_ADD_T4_PORTSTAT(name, desc) \
5224	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, #name, CTLFLAG_RD, \
5225	    &pi->stats.name, desc)
5226
5227	/* We get these from port_stats and they may be stale by up to 1s */
5228	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow0,
5229	    "# drops due to buffer-group 0 overflows");
5230	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow1,
5231	    "# drops due to buffer-group 1 overflows");
5232	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow2,
5233	    "# drops due to buffer-group 2 overflows");
5234	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow3,
5235	    "# drops due to buffer-group 3 overflows");
5236	SYSCTL_ADD_T4_PORTSTAT(rx_trunc0,
5237	    "# of buffer-group 0 truncated packets");
5238	SYSCTL_ADD_T4_PORTSTAT(rx_trunc1,
5239	    "# of buffer-group 1 truncated packets");
5240	SYSCTL_ADD_T4_PORTSTAT(rx_trunc2,
5241	    "# of buffer-group 2 truncated packets");
5242	SYSCTL_ADD_T4_PORTSTAT(rx_trunc3,
5243	    "# of buffer-group 3 truncated packets");
5244
5245#undef SYSCTL_ADD_T4_PORTSTAT
5246}
5247
5248static int
5249sysctl_int_array(SYSCTL_HANDLER_ARGS)
5250{
5251	int rc, *i, space = 0;
5252	struct sbuf sb;
5253
5254	sbuf_new_for_sysctl(&sb, NULL, 64, req);
5255	for (i = arg1; arg2; arg2 -= sizeof(int), i++) {
5256		if (space)
5257			sbuf_printf(&sb, " ");
5258		sbuf_printf(&sb, "%d", *i);
5259		space = 1;
5260	}
5261	rc = sbuf_finish(&sb);
5262	sbuf_delete(&sb);
5263	return (rc);
5264}
5265
5266static int
5267sysctl_bitfield(SYSCTL_HANDLER_ARGS)
5268{
5269	int rc;
5270	struct sbuf *sb;
5271
5272	rc = sysctl_wire_old_buffer(req, 0);
5273	if (rc != 0)
5274		return(rc);
5275
5276	sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5277	if (sb == NULL)
5278		return (ENOMEM);
5279
5280	sbuf_printf(sb, "%b", (int)arg2, (char *)arg1);
5281	rc = sbuf_finish(sb);
5282	sbuf_delete(sb);
5283
5284	return (rc);
5285}
5286
5287static int
5288sysctl_btphy(SYSCTL_HANDLER_ARGS)
5289{
5290	struct port_info *pi = arg1;
5291	int op = arg2;
5292	struct adapter *sc = pi->adapter;
5293	u_int v;
5294	int rc;
5295
5296	rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4btt");
5297	if (rc)
5298		return (rc);
5299	/* XXX: magic numbers */
5300	rc = -t4_mdio_rd(sc, sc->mbox, pi->mdio_addr, 0x1e, op ? 0x20 : 0xc820,
5301	    &v);
5302	end_synchronized_op(sc, 0);
5303	if (rc)
5304		return (rc);
5305	if (op == 0)
5306		v /= 256;
5307
5308	rc = sysctl_handle_int(oidp, &v, 0, req);
5309	return (rc);
5310}
5311
5312static int
5313sysctl_noflowq(SYSCTL_HANDLER_ARGS)
5314{
5315	struct vi_info *vi = arg1;
5316	int rc, val;
5317
5318	val = vi->rsrv_noflowq;
5319	rc = sysctl_handle_int(oidp, &val, 0, req);
5320	if (rc != 0 || req->newptr == NULL)
5321		return (rc);
5322
5323	if ((val >= 1) && (vi->ntxq > 1))
5324		vi->rsrv_noflowq = 1;
5325	else
5326		vi->rsrv_noflowq = 0;
5327
5328	return (rc);
5329}
5330
5331static int
5332sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS)
5333{
5334	struct vi_info *vi = arg1;
5335	struct adapter *sc = vi->pi->adapter;
5336	int idx, rc, i;
5337	struct sge_rxq *rxq;
5338#ifdef TCP_OFFLOAD
5339	struct sge_ofld_rxq *ofld_rxq;
5340#endif
5341	uint8_t v;
5342
5343	idx = vi->tmr_idx;
5344
5345	rc = sysctl_handle_int(oidp, &idx, 0, req);
5346	if (rc != 0 || req->newptr == NULL)
5347		return (rc);
5348
5349	if (idx < 0 || idx >= SGE_NTIMERS)
5350		return (EINVAL);
5351
5352	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
5353	    "t4tmr");
5354	if (rc)
5355		return (rc);
5356
5357	v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->pktc_idx != -1);
5358	for_each_rxq(vi, i, rxq) {
5359#ifdef atomic_store_rel_8
5360		atomic_store_rel_8(&rxq->iq.intr_params, v);
5361#else
5362		rxq->iq.intr_params = v;
5363#endif
5364	}
5365#ifdef TCP_OFFLOAD
5366	for_each_ofld_rxq(vi, i, ofld_rxq) {
5367#ifdef atomic_store_rel_8
5368		atomic_store_rel_8(&ofld_rxq->iq.intr_params, v);
5369#else
5370		ofld_rxq->iq.intr_params = v;
5371#endif
5372	}
5373#endif
5374	vi->tmr_idx = idx;
5375
5376	end_synchronized_op(sc, LOCK_HELD);
5377	return (0);
5378}
5379
5380static int
5381sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS)
5382{
5383	struct vi_info *vi = arg1;
5384	struct adapter *sc = vi->pi->adapter;
5385	int idx, rc;
5386
5387	idx = vi->pktc_idx;
5388
5389	rc = sysctl_handle_int(oidp, &idx, 0, req);
5390	if (rc != 0 || req->newptr == NULL)
5391		return (rc);
5392
5393	if (idx < -1 || idx >= SGE_NCOUNTERS)
5394		return (EINVAL);
5395
5396	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
5397	    "t4pktc");
5398	if (rc)
5399		return (rc);
5400
5401	if (vi->flags & VI_INIT_DONE)
5402		rc = EBUSY; /* cannot be changed once the queues are created */
5403	else
5404		vi->pktc_idx = idx;
5405
5406	end_synchronized_op(sc, LOCK_HELD);
5407	return (rc);
5408}
5409
5410static int
5411sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS)
5412{
5413	struct vi_info *vi = arg1;
5414	struct adapter *sc = vi->pi->adapter;
5415	int qsize, rc;
5416
5417	qsize = vi->qsize_rxq;
5418
5419	rc = sysctl_handle_int(oidp, &qsize, 0, req);
5420	if (rc != 0 || req->newptr == NULL)
5421		return (rc);
5422
5423	if (qsize < 128 || (qsize & 7))
5424		return (EINVAL);
5425
5426	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
5427	    "t4rxqs");
5428	if (rc)
5429		return (rc);
5430
5431	if (vi->flags & VI_INIT_DONE)
5432		rc = EBUSY; /* cannot be changed once the queues are created */
5433	else
5434		vi->qsize_rxq = qsize;
5435
5436	end_synchronized_op(sc, LOCK_HELD);
5437	return (rc);
5438}
5439
5440static int
5441sysctl_qsize_txq(SYSCTL_HANDLER_ARGS)
5442{
5443	struct vi_info *vi = arg1;
5444	struct adapter *sc = vi->pi->adapter;
5445	int qsize, rc;
5446
5447	qsize = vi->qsize_txq;
5448
5449	rc = sysctl_handle_int(oidp, &qsize, 0, req);
5450	if (rc != 0 || req->newptr == NULL)
5451		return (rc);
5452
5453	if (qsize < 128 || qsize > 65536)
5454		return (EINVAL);
5455
5456	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
5457	    "t4txqs");
5458	if (rc)
5459		return (rc);
5460
5461	if (vi->flags & VI_INIT_DONE)
5462		rc = EBUSY; /* cannot be changed once the queues are created */
5463	else
5464		vi->qsize_txq = qsize;
5465
5466	end_synchronized_op(sc, LOCK_HELD);
5467	return (rc);
5468}
5469
5470static int
5471sysctl_pause_settings(SYSCTL_HANDLER_ARGS)
5472{
5473	struct port_info *pi = arg1;
5474	struct adapter *sc = pi->adapter;
5475	struct link_config *lc = &pi->link_cfg;
5476	int rc;
5477
5478	if (req->newptr == NULL) {
5479		struct sbuf *sb;
5480		static char *bits = "\20\1PAUSE_RX\2PAUSE_TX";
5481
5482		rc = sysctl_wire_old_buffer(req, 0);
5483		if (rc != 0)
5484			return(rc);
5485
5486		sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5487		if (sb == NULL)
5488			return (ENOMEM);
5489
5490		sbuf_printf(sb, "%b", lc->fc & (PAUSE_TX | PAUSE_RX), bits);
5491		rc = sbuf_finish(sb);
5492		sbuf_delete(sb);
5493	} else {
5494		char s[2];
5495		int n;
5496
5497		s[0] = '0' + (lc->requested_fc & (PAUSE_TX | PAUSE_RX));
5498		s[1] = 0;
5499
5500		rc = sysctl_handle_string(oidp, s, sizeof(s), req);
5501		if (rc != 0)
5502			return(rc);
5503
5504		if (s[1] != 0)
5505			return (EINVAL);
5506		if (s[0] < '0' || s[0] > '9')
5507			return (EINVAL);	/* not a number */
5508		n = s[0] - '0';
5509		if (n & ~(PAUSE_TX | PAUSE_RX))
5510			return (EINVAL);	/* some other bit is set too */
5511
5512		rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
5513		    "t4PAUSE");
5514		if (rc)
5515			return (rc);
5516		if ((lc->requested_fc & (PAUSE_TX | PAUSE_RX)) != n) {
5517			int link_ok = lc->link_ok;
5518
5519			lc->requested_fc &= ~(PAUSE_TX | PAUSE_RX);
5520			lc->requested_fc |= n;
5521			rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc);
5522			lc->link_ok = link_ok;	/* restore */
5523		}
5524		end_synchronized_op(sc, 0);
5525	}
5526
5527	return (rc);
5528}
5529
5530static int
5531sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS)
5532{
5533	struct adapter *sc = arg1;
5534	int reg = arg2;
5535	uint64_t val;
5536
5537	val = t4_read_reg64(sc, reg);
5538
5539	return (sysctl_handle_64(oidp, &val, 0, req));
5540}
5541
5542static int
5543sysctl_temperature(SYSCTL_HANDLER_ARGS)
5544{
5545	struct adapter *sc = arg1;
5546	int rc, t;
5547	uint32_t param, val;
5548
5549	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4temp");
5550	if (rc)
5551		return (rc);
5552	param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
5553	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
5554	    V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_TMP);
5555	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
5556	end_synchronized_op(sc, 0);
5557	if (rc)
5558		return (rc);
5559
5560	/* unknown is returned as 0 but we display -1 in that case */
5561	t = val == 0 ? -1 : val;
5562
5563	rc = sysctl_handle_int(oidp, &t, 0, req);
5564	return (rc);
5565}
5566
5567#ifdef SBUF_DRAIN
5568static int
5569sysctl_cctrl(SYSCTL_HANDLER_ARGS)
5570{
5571	struct adapter *sc = arg1;
5572	struct sbuf *sb;
5573	int rc, i;
5574	uint16_t incr[NMTUS][NCCTRL_WIN];
5575	static const char *dec_fac[] = {
5576		"0.5", "0.5625", "0.625", "0.6875", "0.75", "0.8125", "0.875",
5577		"0.9375"
5578	};
5579
5580	rc = sysctl_wire_old_buffer(req, 0);
5581	if (rc != 0)
5582		return (rc);
5583
5584	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
5585	if (sb == NULL)
5586		return (ENOMEM);
5587
5588	t4_read_cong_tbl(sc, incr);
5589
5590	for (i = 0; i < NCCTRL_WIN; ++i) {
5591		sbuf_printf(sb, "%2d: %4u %4u %4u %4u %4u %4u %4u %4u\n", i,
5592		    incr[0][i], incr[1][i], incr[2][i], incr[3][i], incr[4][i],
5593		    incr[5][i], incr[6][i], incr[7][i]);
5594		sbuf_printf(sb, "%8u %4u %4u %4u %4u %4u %4u %4u %5u %s\n",
5595		    incr[8][i], incr[9][i], incr[10][i], incr[11][i],
5596		    incr[12][i], incr[13][i], incr[14][i], incr[15][i],
5597		    sc->params.a_wnd[i], dec_fac[sc->params.b_wnd[i]]);
5598	}
5599
5600	rc = sbuf_finish(sb);
5601	sbuf_delete(sb);
5602
5603	return (rc);
5604}
5605
5606static const char *qname[CIM_NUM_IBQ + CIM_NUM_OBQ_T5] = {
5607	"TP0", "TP1", "ULP", "SGE0", "SGE1", "NC-SI",	/* ibq's */
5608	"ULP0", "ULP1", "ULP2", "ULP3", "SGE", "NC-SI",	/* obq's */
5609	"SGE0-RX", "SGE1-RX"	/* additional obq's (T5 onwards) */
5610};
5611
5612static int
5613sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS)
5614{
5615	struct adapter *sc = arg1;
5616	struct sbuf *sb;
5617	int rc, i, n, qid = arg2;
5618	uint32_t *buf, *p;
5619	char *qtype;
5620	u_int cim_num_obq = sc->chip_params->cim_num_obq;
5621
5622	KASSERT(qid >= 0 && qid < CIM_NUM_IBQ + cim_num_obq,
5623	    ("%s: bad qid %d\n", __func__, qid));
5624
5625	if (qid < CIM_NUM_IBQ) {
5626		/* inbound queue */
5627		qtype = "IBQ";
5628		n = 4 * CIM_IBQ_SIZE;
5629		buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
5630		rc = t4_read_cim_ibq(sc, qid, buf, n);
5631	} else {
5632		/* outbound queue */
5633		qtype = "OBQ";
5634		qid -= CIM_NUM_IBQ;
5635		n = 4 * cim_num_obq * CIM_OBQ_SIZE;
5636		buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
5637		rc = t4_read_cim_obq(sc, qid, buf, n);
5638	}
5639
5640	if (rc < 0) {
5641		rc = -rc;
5642		goto done;
5643	}
5644	n = rc * sizeof(uint32_t);	/* rc has # of words actually read */
5645
5646	rc = sysctl_wire_old_buffer(req, 0);
5647	if (rc != 0)
5648		goto done;
5649
5650	sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req);
5651	if (sb == NULL) {
5652		rc = ENOMEM;
5653		goto done;
5654	}
5655
5656	sbuf_printf(sb, "%s%d %s", qtype , qid, qname[arg2]);
5657	for (i = 0, p = buf; i < n; i += 16, p += 4)
5658		sbuf_printf(sb, "\n%#06x: %08x %08x %08x %08x", i, p[0], p[1],
5659		    p[2], p[3]);
5660
5661	rc = sbuf_finish(sb);
5662	sbuf_delete(sb);
5663done:
5664	free(buf, M_CXGBE);
5665	return (rc);
5666}
5667
5668static int
5669sysctl_cim_la(SYSCTL_HANDLER_ARGS)
5670{
5671	struct adapter *sc = arg1;
5672	u_int cfg;
5673	struct sbuf *sb;
5674	uint32_t *buf, *p;
5675	int rc;
5676
5677	MPASS(chip_id(sc) <= CHELSIO_T5);
5678
5679	rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg);
5680	if (rc != 0)
5681		return (rc);
5682
5683	rc = sysctl_wire_old_buffer(req, 0);
5684	if (rc != 0)
5685		return (rc);
5686
5687	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
5688	if (sb == NULL)
5689		return (ENOMEM);
5690
5691	buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE,
5692	    M_ZERO | M_WAITOK);
5693
5694	rc = -t4_cim_read_la(sc, buf, NULL);
5695	if (rc != 0)
5696		goto done;
5697
5698	sbuf_printf(sb, "Status   Data      PC%s",
5699	    cfg & F_UPDBGLACAPTPCONLY ? "" :
5700	    "     LS0Stat  LS0Addr             LS0Data");
5701
5702	for (p = buf; p <= &buf[sc->params.cim_la_size - 8]; p += 8) {
5703		if (cfg & F_UPDBGLACAPTPCONLY) {
5704			sbuf_printf(sb, "\n  %02x   %08x %08x", p[5] & 0xff,
5705			    p[6], p[7]);
5706			sbuf_printf(sb, "\n  %02x   %02x%06x %02x%06x",
5707			    (p[3] >> 8) & 0xff, p[3] & 0xff, p[4] >> 8,
5708			    p[4] & 0xff, p[5] >> 8);
5709			sbuf_printf(sb, "\n  %02x   %x%07x %x%07x",
5710			    (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
5711			    p[1] & 0xf, p[2] >> 4);
5712		} else {
5713			sbuf_printf(sb,
5714			    "\n  %02x   %x%07x %x%07x %08x %08x "
5715			    "%08x%08x%08x%08x",
5716			    (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
5717			    p[1] & 0xf, p[2] >> 4, p[2] & 0xf, p[3], p[4], p[5],
5718			    p[6], p[7]);
5719		}
5720	}
5721
5722	rc = sbuf_finish(sb);
5723	sbuf_delete(sb);
5724done:
5725	free(buf, M_CXGBE);
5726	return (rc);
5727}
5728
5729static int
5730sysctl_cim_la_t6(SYSCTL_HANDLER_ARGS)
5731{
5732	struct adapter *sc = arg1;
5733	u_int cfg;
5734	struct sbuf *sb;
5735	uint32_t *buf, *p;
5736	int rc;
5737
5738	MPASS(chip_id(sc) > CHELSIO_T5);
5739
5740	rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg);
5741	if (rc != 0)
5742		return (rc);
5743
5744	rc = sysctl_wire_old_buffer(req, 0);
5745	if (rc != 0)
5746		return (rc);
5747
5748	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
5749	if (sb == NULL)
5750		return (ENOMEM);
5751
5752	buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE,
5753	    M_ZERO | M_WAITOK);
5754
5755	rc = -t4_cim_read_la(sc, buf, NULL);
5756	if (rc != 0)
5757		goto done;
5758
5759	sbuf_printf(sb, "Status   Inst    Data      PC%s",
5760	    cfg & F_UPDBGLACAPTPCONLY ? "" :
5761	    "     LS0Stat  LS0Addr  LS0Data  LS1Stat  LS1Addr  LS1Data");
5762
5763	for (p = buf; p <= &buf[sc->params.cim_la_size - 10]; p += 10) {
5764		if (cfg & F_UPDBGLACAPTPCONLY) {
5765			sbuf_printf(sb, "\n  %02x   %08x %08x %08x",
5766			    p[3] & 0xff, p[2], p[1], p[0]);
5767			sbuf_printf(sb, "\n  %02x   %02x%06x %02x%06x %02x%06x",
5768			    (p[6] >> 8) & 0xff, p[6] & 0xff, p[5] >> 8,
5769			    p[5] & 0xff, p[4] >> 8, p[4] & 0xff, p[3] >> 8);
5770			sbuf_printf(sb, "\n  %02x   %04x%04x %04x%04x %04x%04x",
5771			    (p[9] >> 16) & 0xff, p[9] & 0xffff, p[8] >> 16,
5772			    p[8] & 0xffff, p[7] >> 16, p[7] & 0xffff,
5773			    p[6] >> 16);
5774		} else {
5775			sbuf_printf(sb, "\n  %02x   %04x%04x %04x%04x %04x%04x "
5776			    "%08x %08x %08x %08x %08x %08x",
5777			    (p[9] >> 16) & 0xff,
5778			    p[9] & 0xffff, p[8] >> 16,
5779			    p[8] & 0xffff, p[7] >> 16,
5780			    p[7] & 0xffff, p[6] >> 16,
5781			    p[2], p[1], p[0], p[5], p[4], p[3]);
5782		}
5783	}
5784
5785	rc = sbuf_finish(sb);
5786	sbuf_delete(sb);
5787done:
5788	free(buf, M_CXGBE);
5789	return (rc);
5790}
5791
5792static int
5793sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS)
5794{
5795	struct adapter *sc = arg1;
5796	u_int i;
5797	struct sbuf *sb;
5798	uint32_t *buf, *p;
5799	int rc;
5800
5801	rc = sysctl_wire_old_buffer(req, 0);
5802	if (rc != 0)
5803		return (rc);
5804
5805	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
5806	if (sb == NULL)
5807		return (ENOMEM);
5808
5809	buf = malloc(2 * CIM_MALA_SIZE * 5 * sizeof(uint32_t), M_CXGBE,
5810	    M_ZERO | M_WAITOK);
5811
5812	t4_cim_read_ma_la(sc, buf, buf + 5 * CIM_MALA_SIZE);
5813	p = buf;
5814
5815	for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) {
5816		sbuf_printf(sb, "\n%02x%08x%08x%08x%08x", p[4], p[3], p[2],
5817		    p[1], p[0]);
5818	}
5819
5820	sbuf_printf(sb, "\n\nCnt ID Tag UE       Data       RDY VLD");
5821	for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) {
5822		sbuf_printf(sb, "\n%3u %2u  %x   %u %08x%08x  %u   %u",
5823		    (p[2] >> 10) & 0xff, (p[2] >> 7) & 7,
5824		    (p[2] >> 3) & 0xf, (p[2] >> 2) & 1,
5825		    (p[1] >> 2) | ((p[2] & 3) << 30),
5826		    (p[0] >> 2) | ((p[1] & 3) << 30), (p[0] >> 1) & 1,
5827		    p[0] & 1);
5828	}
5829
5830	rc = sbuf_finish(sb);
5831	sbuf_delete(sb);
5832	free(buf, M_CXGBE);
5833	return (rc);
5834}
5835
5836static int
5837sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS)
5838{
5839	struct adapter *sc = arg1;
5840	u_int i;
5841	struct sbuf *sb;
5842	uint32_t *buf, *p;
5843	int rc;
5844
5845	rc = sysctl_wire_old_buffer(req, 0);
5846	if (rc != 0)
5847		return (rc);
5848
5849	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
5850	if (sb == NULL)
5851		return (ENOMEM);
5852
5853	buf = malloc(2 * CIM_PIFLA_SIZE * 6 * sizeof(uint32_t), M_CXGBE,
5854	    M_ZERO | M_WAITOK);
5855
5856	t4_cim_read_pif_la(sc, buf, buf + 6 * CIM_PIFLA_SIZE, NULL, NULL);
5857	p = buf;
5858
5859	sbuf_printf(sb, "Cntl ID DataBE   Addr                 Data");
5860	for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) {
5861		sbuf_printf(sb, "\n %02x  %02x  %04x  %08x %08x%08x%08x%08x",
5862		    (p[5] >> 22) & 0xff, (p[5] >> 16) & 0x3f, p[5] & 0xffff,
5863		    p[4], p[3], p[2], p[1], p[0]);
5864	}
5865
5866	sbuf_printf(sb, "\n\nCntl ID               Data");
5867	for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) {
5868		sbuf_printf(sb, "\n %02x  %02x %08x%08x%08x%08x",
5869		    (p[4] >> 6) & 0xff, p[4] & 0x3f, p[3], p[2], p[1], p[0]);
5870	}
5871
5872	rc = sbuf_finish(sb);
5873	sbuf_delete(sb);
5874	free(buf, M_CXGBE);
5875	return (rc);
5876}
5877
5878static int
5879sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS)
5880{
5881	struct adapter *sc = arg1;
5882	struct sbuf *sb;
5883	int rc, i;
5884	uint16_t base[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
5885	uint16_t size[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
5886	uint16_t thres[CIM_NUM_IBQ];
5887	uint32_t obq_wr[2 * CIM_NUM_OBQ_T5], *wr = obq_wr;
5888	uint32_t stat[4 * (CIM_NUM_IBQ + CIM_NUM_OBQ_T5)], *p = stat;
5889	u_int cim_num_obq, ibq_rdaddr, obq_rdaddr, nq;
5890
5891	cim_num_obq = sc->chip_params->cim_num_obq;
5892	if (is_t4(sc)) {
5893		ibq_rdaddr = A_UP_IBQ_0_RDADDR;
5894		obq_rdaddr = A_UP_OBQ_0_REALADDR;
5895	} else {
5896		ibq_rdaddr = A_UP_IBQ_0_SHADOW_RDADDR;
5897		obq_rdaddr = A_UP_OBQ_0_SHADOW_REALADDR;
5898	}
5899	nq = CIM_NUM_IBQ + cim_num_obq;
5900
5901	rc = -t4_cim_read(sc, ibq_rdaddr, 4 * nq, stat);
5902	if (rc == 0)
5903		rc = -t4_cim_read(sc, obq_rdaddr, 2 * cim_num_obq, obq_wr);
5904	if (rc != 0)
5905		return (rc);
5906
5907	t4_read_cimq_cfg(sc, base, size, thres);
5908
5909	rc = sysctl_wire_old_buffer(req, 0);
5910	if (rc != 0)
5911		return (rc);
5912
5913	sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req);
5914	if (sb == NULL)
5915		return (ENOMEM);
5916
5917	sbuf_printf(sb, "Queue  Base  Size Thres RdPtr WrPtr  SOP  EOP Avail");
5918
5919	for (i = 0; i < CIM_NUM_IBQ; i++, p += 4)
5920		sbuf_printf(sb, "\n%7s %5x %5u %5u %6x  %4x %4u %4u %5u",
5921		    qname[i], base[i], size[i], thres[i], G_IBQRDADDR(p[0]),
5922		    G_IBQWRADDR(p[1]), G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
5923		    G_QUEREMFLITS(p[2]) * 16);
5924	for ( ; i < nq; i++, p += 4, wr += 2)
5925		sbuf_printf(sb, "\n%7s %5x %5u %12x  %4x %4u %4u %5u", qname[i],
5926		    base[i], size[i], G_QUERDADDR(p[0]) & 0x3fff,
5927		    wr[0] - base[i], G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
5928		    G_QUEREMFLITS(p[2]) * 16);
5929
5930	rc = sbuf_finish(sb);
5931	sbuf_delete(sb);
5932
5933	return (rc);
5934}
5935
5936static int
5937sysctl_cpl_stats(SYSCTL_HANDLER_ARGS)
5938{
5939	struct adapter *sc = arg1;
5940	struct sbuf *sb;
5941	int rc;
5942	struct tp_cpl_stats stats;
5943
5944	rc = sysctl_wire_old_buffer(req, 0);
5945	if (rc != 0)
5946		return (rc);
5947
5948	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
5949	if (sb == NULL)
5950		return (ENOMEM);
5951
5952	mtx_lock(&sc->reg_lock);
5953	t4_tp_get_cpl_stats(sc, &stats);
5954	mtx_unlock(&sc->reg_lock);
5955
5956	if (sc->chip_params->nchan > 2) {
5957		sbuf_printf(sb, "                 channel 0  channel 1"
5958		    "  channel 2  channel 3");
5959		sbuf_printf(sb, "\nCPL requests:   %10u %10u %10u %10u",
5960		    stats.req[0], stats.req[1], stats.req[2], stats.req[3]);
5961		sbuf_printf(sb, "\nCPL responses:   %10u %10u %10u %10u",
5962		    stats.rsp[0], stats.rsp[1], stats.rsp[2], stats.rsp[3]);
5963	} else {
5964		sbuf_printf(sb, "                 channel 0  channel 1");
5965		sbuf_printf(sb, "\nCPL requests:   %10u %10u",
5966		    stats.req[0], stats.req[1]);
5967		sbuf_printf(sb, "\nCPL responses:   %10u %10u",
5968		    stats.rsp[0], stats.rsp[1]);
5969	}
5970
5971	rc = sbuf_finish(sb);
5972	sbuf_delete(sb);
5973
5974	return (rc);
5975}
5976
5977static int
5978sysctl_ddp_stats(SYSCTL_HANDLER_ARGS)
5979{
5980	struct adapter *sc = arg1;
5981	struct sbuf *sb;
5982	int rc;
5983	struct tp_usm_stats stats;
5984
5985	rc = sysctl_wire_old_buffer(req, 0);
5986	if (rc != 0)
5987		return(rc);
5988
5989	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
5990	if (sb == NULL)
5991		return (ENOMEM);
5992
5993	t4_get_usm_stats(sc, &stats);
5994
5995	sbuf_printf(sb, "Frames: %u\n", stats.frames);
5996	sbuf_printf(sb, "Octets: %ju\n", stats.octets);
5997	sbuf_printf(sb, "Drops:  %u", stats.drops);
5998
5999	rc = sbuf_finish(sb);
6000	sbuf_delete(sb);
6001
6002	return (rc);
6003}
6004
6005static const char * const devlog_level_strings[] = {
6006	[FW_DEVLOG_LEVEL_EMERG]		= "EMERG",
6007	[FW_DEVLOG_LEVEL_CRIT]		= "CRIT",
6008	[FW_DEVLOG_LEVEL_ERR]		= "ERR",
6009	[FW_DEVLOG_LEVEL_NOTICE]	= "NOTICE",
6010	[FW_DEVLOG_LEVEL_INFO]		= "INFO",
6011	[FW_DEVLOG_LEVEL_DEBUG]		= "DEBUG"
6012};
6013
6014static const char * const devlog_facility_strings[] = {
6015	[FW_DEVLOG_FACILITY_CORE]	= "CORE",
6016	[FW_DEVLOG_FACILITY_CF]		= "CF",
6017	[FW_DEVLOG_FACILITY_SCHED]	= "SCHED",
6018	[FW_DEVLOG_FACILITY_TIMER]	= "TIMER",
6019	[FW_DEVLOG_FACILITY_RES]	= "RES",
6020	[FW_DEVLOG_FACILITY_HW]		= "HW",
6021	[FW_DEVLOG_FACILITY_FLR]	= "FLR",
6022	[FW_DEVLOG_FACILITY_DMAQ]	= "DMAQ",
6023	[FW_DEVLOG_FACILITY_PHY]	= "PHY",
6024	[FW_DEVLOG_FACILITY_MAC]	= "MAC",
6025	[FW_DEVLOG_FACILITY_PORT]	= "PORT",
6026	[FW_DEVLOG_FACILITY_VI]		= "VI",
6027	[FW_DEVLOG_FACILITY_FILTER]	= "FILTER",
6028	[FW_DEVLOG_FACILITY_ACL]	= "ACL",
6029	[FW_DEVLOG_FACILITY_TM]		= "TM",
6030	[FW_DEVLOG_FACILITY_QFC]	= "QFC",
6031	[FW_DEVLOG_FACILITY_DCB]	= "DCB",
6032	[FW_DEVLOG_FACILITY_ETH]	= "ETH",
6033	[FW_DEVLOG_FACILITY_OFLD]	= "OFLD",
6034	[FW_DEVLOG_FACILITY_RI]		= "RI",
6035	[FW_DEVLOG_FACILITY_ISCSI]	= "ISCSI",
6036	[FW_DEVLOG_FACILITY_FCOE]	= "FCOE",
6037	[FW_DEVLOG_FACILITY_FOISCSI]	= "FOISCSI",
6038	[FW_DEVLOG_FACILITY_FOFCOE]	= "FOFCOE",
6039	[FW_DEVLOG_FACILITY_CHNET]	= "CHNET",
6040};
6041
6042static int
6043sysctl_devlog(SYSCTL_HANDLER_ARGS)
6044{
6045	struct adapter *sc = arg1;
6046	struct devlog_params *dparams = &sc->params.devlog;
6047	struct fw_devlog_e *buf, *e;
6048	int i, j, rc, nentries, first = 0;
6049	struct sbuf *sb;
6050	uint64_t ftstamp = UINT64_MAX;
6051
6052	if (dparams->addr == 0)
6053		return (ENXIO);
6054
6055	buf = malloc(dparams->size, M_CXGBE, M_NOWAIT);
6056	if (buf == NULL)
6057		return (ENOMEM);
6058
6059	rc = read_via_memwin(sc, 1, dparams->addr, (void *)buf, dparams->size);
6060	if (rc != 0)
6061		goto done;
6062
6063	nentries = dparams->size / sizeof(struct fw_devlog_e);
6064	for (i = 0; i < nentries; i++) {
6065		e = &buf[i];
6066
6067		if (e->timestamp == 0)
6068			break;	/* end */
6069
6070		e->timestamp = be64toh(e->timestamp);
6071		e->seqno = be32toh(e->seqno);
6072		for (j = 0; j < 8; j++)
6073			e->params[j] = be32toh(e->params[j]);
6074
6075		if (e->timestamp < ftstamp) {
6076			ftstamp = e->timestamp;
6077			first = i;
6078		}
6079	}
6080
6081	if (buf[first].timestamp == 0)
6082		goto done;	/* nothing in the log */
6083
6084	rc = sysctl_wire_old_buffer(req, 0);
6085	if (rc != 0)
6086		goto done;
6087
6088	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
6089	if (sb == NULL) {
6090		rc = ENOMEM;
6091		goto done;
6092	}
6093	sbuf_printf(sb, "%10s  %15s  %8s  %8s  %s\n",
6094	    "Seq#", "Tstamp", "Level", "Facility", "Message");
6095
6096	i = first;
6097	do {
6098		e = &buf[i];
6099		if (e->timestamp == 0)
6100			break;	/* end */
6101
6102		sbuf_printf(sb, "%10d  %15ju  %8s  %8s  ",
6103		    e->seqno, e->timestamp,
6104		    (e->level < nitems(devlog_level_strings) ?
6105			devlog_level_strings[e->level] : "UNKNOWN"),
6106		    (e->facility < nitems(devlog_facility_strings) ?
6107			devlog_facility_strings[e->facility] : "UNKNOWN"));
6108		sbuf_printf(sb, e->fmt, e->params[0], e->params[1],
6109		    e->params[2], e->params[3], e->params[4],
6110		    e->params[5], e->params[6], e->params[7]);
6111
6112		if (++i == nentries)
6113			i = 0;
6114	} while (i != first);
6115
6116	rc = sbuf_finish(sb);
6117	sbuf_delete(sb);
6118done:
6119	free(buf, M_CXGBE);
6120	return (rc);
6121}
6122
6123static int
6124sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS)
6125{
6126	struct adapter *sc = arg1;
6127	struct sbuf *sb;
6128	int rc;
6129	struct tp_fcoe_stats stats[MAX_NCHAN];
6130	int i, nchan = sc->chip_params->nchan;
6131
6132	rc = sysctl_wire_old_buffer(req, 0);
6133	if (rc != 0)
6134		return (rc);
6135
6136	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6137	if (sb == NULL)
6138		return (ENOMEM);
6139
6140	for (i = 0; i < nchan; i++)
6141		t4_get_fcoe_stats(sc, i, &stats[i]);
6142
6143	if (nchan > 2) {
6144		sbuf_printf(sb, "                   channel 0        channel 1"
6145		    "        channel 2        channel 3");
6146		sbuf_printf(sb, "\noctetsDDP:  %16ju %16ju %16ju %16ju",
6147		    stats[0].octets_ddp, stats[1].octets_ddp,
6148		    stats[2].octets_ddp, stats[3].octets_ddp);
6149		sbuf_printf(sb, "\nframesDDP:  %16u %16u %16u %16u",
6150		    stats[0].frames_ddp, stats[1].frames_ddp,
6151		    stats[2].frames_ddp, stats[3].frames_ddp);
6152		sbuf_printf(sb, "\nframesDrop: %16u %16u %16u %16u",
6153		    stats[0].frames_drop, stats[1].frames_drop,
6154		    stats[2].frames_drop, stats[3].frames_drop);
6155	} else {
6156		sbuf_printf(sb, "                   channel 0        channel 1");
6157		sbuf_printf(sb, "\noctetsDDP:  %16ju %16ju",
6158		    stats[0].octets_ddp, stats[1].octets_ddp);
6159		sbuf_printf(sb, "\nframesDDP:  %16u %16u",
6160		    stats[0].frames_ddp, stats[1].frames_ddp);
6161		sbuf_printf(sb, "\nframesDrop: %16u %16u",
6162		    stats[0].frames_drop, stats[1].frames_drop);
6163	}
6164
6165	rc = sbuf_finish(sb);
6166	sbuf_delete(sb);
6167
6168	return (rc);
6169}
6170
6171static int
6172sysctl_hw_sched(SYSCTL_HANDLER_ARGS)
6173{
6174	struct adapter *sc = arg1;
6175	struct sbuf *sb;
6176	int rc, i;
6177	unsigned int map, kbps, ipg, mode;
6178	unsigned int pace_tab[NTX_SCHED];
6179
6180	rc = sysctl_wire_old_buffer(req, 0);
6181	if (rc != 0)
6182		return (rc);
6183
6184	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6185	if (sb == NULL)
6186		return (ENOMEM);
6187
6188	map = t4_read_reg(sc, A_TP_TX_MOD_QUEUE_REQ_MAP);
6189	mode = G_TIMERMODE(t4_read_reg(sc, A_TP_MOD_CONFIG));
6190	t4_read_pace_tbl(sc, pace_tab);
6191
6192	sbuf_printf(sb, "Scheduler  Mode   Channel  Rate (Kbps)   "
6193	    "Class IPG (0.1 ns)   Flow IPG (us)");
6194
6195	for (i = 0; i < NTX_SCHED; ++i, map >>= 2) {
6196		t4_get_tx_sched(sc, i, &kbps, &ipg);
6197		sbuf_printf(sb, "\n    %u      %-5s     %u     ", i,
6198		    (mode & (1 << i)) ? "flow" : "class", map & 3);
6199		if (kbps)
6200			sbuf_printf(sb, "%9u     ", kbps);
6201		else
6202			sbuf_printf(sb, " disabled     ");
6203
6204		if (ipg)
6205			sbuf_printf(sb, "%13u        ", ipg);
6206		else
6207			sbuf_printf(sb, "     disabled        ");
6208
6209		if (pace_tab[i])
6210			sbuf_printf(sb, "%10u", pace_tab[i]);
6211		else
6212			sbuf_printf(sb, "  disabled");
6213	}
6214
6215	rc = sbuf_finish(sb);
6216	sbuf_delete(sb);
6217
6218	return (rc);
6219}
6220
6221static int
6222sysctl_lb_stats(SYSCTL_HANDLER_ARGS)
6223{
6224	struct adapter *sc = arg1;
6225	struct sbuf *sb;
6226	int rc, i, j;
6227	uint64_t *p0, *p1;
6228	struct lb_port_stats s[2];
6229	static const char *stat_name[] = {
6230		"OctetsOK:", "FramesOK:", "BcastFrames:", "McastFrames:",
6231		"UcastFrames:", "ErrorFrames:", "Frames64:", "Frames65To127:",
6232		"Frames128To255:", "Frames256To511:", "Frames512To1023:",
6233		"Frames1024To1518:", "Frames1519ToMax:", "FramesDropped:",
6234		"BG0FramesDropped:", "BG1FramesDropped:", "BG2FramesDropped:",
6235		"BG3FramesDropped:", "BG0FramesTrunc:", "BG1FramesTrunc:",
6236		"BG2FramesTrunc:", "BG3FramesTrunc:"
6237	};
6238
6239	rc = sysctl_wire_old_buffer(req, 0);
6240	if (rc != 0)
6241		return (rc);
6242
6243	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
6244	if (sb == NULL)
6245		return (ENOMEM);
6246
6247	memset(s, 0, sizeof(s));
6248
6249	for (i = 0; i < sc->chip_params->nchan; i += 2) {
6250		t4_get_lb_stats(sc, i, &s[0]);
6251		t4_get_lb_stats(sc, i + 1, &s[1]);
6252
6253		p0 = &s[0].octets;
6254		p1 = &s[1].octets;
6255		sbuf_printf(sb, "%s                       Loopback %u"
6256		    "           Loopback %u", i == 0 ? "" : "\n", i, i + 1);
6257
6258		for (j = 0; j < nitems(stat_name); j++)
6259			sbuf_printf(sb, "\n%-17s %20ju %20ju", stat_name[j],
6260				   *p0++, *p1++);
6261	}
6262
6263	rc = sbuf_finish(sb);
6264	sbuf_delete(sb);
6265
6266	return (rc);
6267}
6268
6269static int
6270sysctl_linkdnrc(SYSCTL_HANDLER_ARGS)
6271{
6272	int rc = 0;
6273	struct port_info *pi = arg1;
6274	struct sbuf *sb;
6275
6276	rc = sysctl_wire_old_buffer(req, 0);
6277	if (rc != 0)
6278		return(rc);
6279	sb = sbuf_new_for_sysctl(NULL, NULL, 64, req);
6280	if (sb == NULL)
6281		return (ENOMEM);
6282
6283	if (pi->linkdnrc < 0)
6284		sbuf_printf(sb, "n/a");
6285	else
6286		sbuf_printf(sb, "%s", t4_link_down_rc_str(pi->linkdnrc));
6287
6288	rc = sbuf_finish(sb);
6289	sbuf_delete(sb);
6290
6291	return (rc);
6292}
6293
6294struct mem_desc {
6295	unsigned int base;
6296	unsigned int limit;
6297	unsigned int idx;
6298};
6299
6300static int
6301mem_desc_cmp(const void *a, const void *b)
6302{
6303	return ((const struct mem_desc *)a)->base -
6304	       ((const struct mem_desc *)b)->base;
6305}
6306
6307static void
6308mem_region_show(struct sbuf *sb, const char *name, unsigned int from,
6309    unsigned int to)
6310{
6311	unsigned int size;
6312
6313	if (from == to)
6314		return;
6315
6316	size = to - from + 1;
6317	if (size == 0)
6318		return;
6319
6320	/* XXX: need humanize_number(3) in libkern for a more readable 'size' */
6321	sbuf_printf(sb, "%-15s %#x-%#x [%u]\n", name, from, to, size);
6322}
6323
6324static int
6325sysctl_meminfo(SYSCTL_HANDLER_ARGS)
6326{
6327	struct adapter *sc = arg1;
6328	struct sbuf *sb;
6329	int rc, i, n;
6330	uint32_t lo, hi, used, alloc;
6331	static const char *memory[] = {"EDC0:", "EDC1:", "MC:", "MC0:", "MC1:"};
6332	static const char *region[] = {
6333		"DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:",
6334		"Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:",
6335		"Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:",
6336		"TDDP region:", "TPT region:", "STAG region:", "RQ region:",
6337		"RQUDP region:", "PBL region:", "TXPBL region:",
6338		"DBVFIFO region:", "ULPRX state:", "ULPTX state:",
6339		"On-chip queues:"
6340	};
6341	struct mem_desc avail[4];
6342	struct mem_desc mem[nitems(region) + 3];	/* up to 3 holes */
6343	struct mem_desc *md = mem;
6344
6345	rc = sysctl_wire_old_buffer(req, 0);
6346	if (rc != 0)
6347		return (rc);
6348
6349	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
6350	if (sb == NULL)
6351		return (ENOMEM);
6352
6353	for (i = 0; i < nitems(mem); i++) {
6354		mem[i].limit = 0;
6355		mem[i].idx = i;
6356	}
6357
6358	/* Find and sort the populated memory ranges */
6359	i = 0;
6360	lo = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
6361	if (lo & F_EDRAM0_ENABLE) {
6362		hi = t4_read_reg(sc, A_MA_EDRAM0_BAR);
6363		avail[i].base = G_EDRAM0_BASE(hi) << 20;
6364		avail[i].limit = avail[i].base + (G_EDRAM0_SIZE(hi) << 20);
6365		avail[i].idx = 0;
6366		i++;
6367	}
6368	if (lo & F_EDRAM1_ENABLE) {
6369		hi = t4_read_reg(sc, A_MA_EDRAM1_BAR);
6370		avail[i].base = G_EDRAM1_BASE(hi) << 20;
6371		avail[i].limit = avail[i].base + (G_EDRAM1_SIZE(hi) << 20);
6372		avail[i].idx = 1;
6373		i++;
6374	}
6375	if (lo & F_EXT_MEM_ENABLE) {
6376		hi = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
6377		avail[i].base = G_EXT_MEM_BASE(hi) << 20;
6378		avail[i].limit = avail[i].base +
6379		    (G_EXT_MEM_SIZE(hi) << 20);
6380		avail[i].idx = is_t5(sc) ? 3 : 2;	/* Call it MC0 for T5 */
6381		i++;
6382	}
6383	if (is_t5(sc) && lo & F_EXT_MEM1_ENABLE) {
6384		hi = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
6385		avail[i].base = G_EXT_MEM1_BASE(hi) << 20;
6386		avail[i].limit = avail[i].base +
6387		    (G_EXT_MEM1_SIZE(hi) << 20);
6388		avail[i].idx = 4;
6389		i++;
6390	}
6391	if (!i)                                    /* no memory available */
6392		return 0;
6393	qsort(avail, i, sizeof(struct mem_desc), mem_desc_cmp);
6394
6395	(md++)->base = t4_read_reg(sc, A_SGE_DBQ_CTXT_BADDR);
6396	(md++)->base = t4_read_reg(sc, A_SGE_IMSG_CTXT_BADDR);
6397	(md++)->base = t4_read_reg(sc, A_SGE_FLM_CACHE_BADDR);
6398	(md++)->base = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
6399	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_BASE);
6400	(md++)->base = t4_read_reg(sc, A_TP_CMM_TIMER_BASE);
6401	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_RX_FLST_BASE);
6402	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_TX_FLST_BASE);
6403	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_PS_FLST_BASE);
6404
6405	/* the next few have explicit upper bounds */
6406	md->base = t4_read_reg(sc, A_TP_PMM_TX_BASE);
6407	md->limit = md->base - 1 +
6408		    t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE) *
6409		    G_PMTXMAXPAGE(t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE));
6410	md++;
6411
6412	md->base = t4_read_reg(sc, A_TP_PMM_RX_BASE);
6413	md->limit = md->base - 1 +
6414		    t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) *
6415		    G_PMRXMAXPAGE(t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE));
6416	md++;
6417
6418	if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
6419		if (chip_id(sc) <= CHELSIO_T5)
6420			md->base = t4_read_reg(sc, A_LE_DB_HASH_TID_BASE);
6421		else
6422			md->base = t4_read_reg(sc, A_LE_DB_HASH_TBL_BASE_ADDR);
6423		md->limit = 0;
6424	} else {
6425		md->base = 0;
6426		md->idx = nitems(region);  /* hide it */
6427	}
6428	md++;
6429
6430#define ulp_region(reg) \
6431	md->base = t4_read_reg(sc, A_ULP_ ## reg ## _LLIMIT);\
6432	(md++)->limit = t4_read_reg(sc, A_ULP_ ## reg ## _ULIMIT)
6433
6434	ulp_region(RX_ISCSI);
6435	ulp_region(RX_TDDP);
6436	ulp_region(TX_TPT);
6437	ulp_region(RX_STAG);
6438	ulp_region(RX_RQ);
6439	ulp_region(RX_RQUDP);
6440	ulp_region(RX_PBL);
6441	ulp_region(TX_PBL);
6442#undef ulp_region
6443
6444	md->base = 0;
6445	md->idx = nitems(region);
6446	if (!is_t4(sc)) {
6447		uint32_t size = 0;
6448		uint32_t sge_ctrl = t4_read_reg(sc, A_SGE_CONTROL2);
6449		uint32_t fifo_size = t4_read_reg(sc, A_SGE_DBVFIFO_SIZE);
6450
6451		if (is_t5(sc)) {
6452			if (sge_ctrl & F_VFIFO_ENABLE)
6453				size = G_DBVFIFO_SIZE(fifo_size);
6454		} else
6455			size = G_T6_DBVFIFO_SIZE(fifo_size);
6456
6457		if (size) {
6458			md->base = G_BASEADDR(t4_read_reg(sc,
6459			    A_SGE_DBVFIFO_BADDR));
6460			md->limit = md->base + (size << 2) - 1;
6461		}
6462	}
6463	md++;
6464
6465	md->base = t4_read_reg(sc, A_ULP_RX_CTX_BASE);
6466	md->limit = 0;
6467	md++;
6468	md->base = t4_read_reg(sc, A_ULP_TX_ERR_TABLE_BASE);
6469	md->limit = 0;
6470	md++;
6471
6472	md->base = sc->vres.ocq.start;
6473	if (sc->vres.ocq.size)
6474		md->limit = md->base + sc->vres.ocq.size - 1;
6475	else
6476		md->idx = nitems(region);  /* hide it */
6477	md++;
6478
6479	/* add any address-space holes, there can be up to 3 */
6480	for (n = 0; n < i - 1; n++)
6481		if (avail[n].limit < avail[n + 1].base)
6482			(md++)->base = avail[n].limit;
6483	if (avail[n].limit)
6484		(md++)->base = avail[n].limit;
6485
6486	n = md - mem;
6487	qsort(mem, n, sizeof(struct mem_desc), mem_desc_cmp);
6488
6489	for (lo = 0; lo < i; lo++)
6490		mem_region_show(sb, memory[avail[lo].idx], avail[lo].base,
6491				avail[lo].limit - 1);
6492
6493	sbuf_printf(sb, "\n");
6494	for (i = 0; i < n; i++) {
6495		if (mem[i].idx >= nitems(region))
6496			continue;                        /* skip holes */
6497		if (!mem[i].limit)
6498			mem[i].limit = i < n - 1 ? mem[i + 1].base - 1 : ~0;
6499		mem_region_show(sb, region[mem[i].idx], mem[i].base,
6500				mem[i].limit);
6501	}
6502
6503	sbuf_printf(sb, "\n");
6504	lo = t4_read_reg(sc, A_CIM_SDRAM_BASE_ADDR);
6505	hi = t4_read_reg(sc, A_CIM_SDRAM_ADDR_SIZE) + lo - 1;
6506	mem_region_show(sb, "uP RAM:", lo, hi);
6507
6508	lo = t4_read_reg(sc, A_CIM_EXTMEM2_BASE_ADDR);
6509	hi = t4_read_reg(sc, A_CIM_EXTMEM2_ADDR_SIZE) + lo - 1;
6510	mem_region_show(sb, "uP Extmem2:", lo, hi);
6511
6512	lo = t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE);
6513	sbuf_printf(sb, "\n%u Rx pages of size %uKiB for %u channels\n",
6514		   G_PMRXMAXPAGE(lo),
6515		   t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) >> 10,
6516		   (lo & F_PMRXNUMCHN) ? 2 : 1);
6517
6518	lo = t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE);
6519	hi = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE);
6520	sbuf_printf(sb, "%u Tx pages of size %u%ciB for %u channels\n",
6521		   G_PMTXMAXPAGE(lo),
6522		   hi >= (1 << 20) ? (hi >> 20) : (hi >> 10),
6523		   hi >= (1 << 20) ? 'M' : 'K', 1 << G_PMTXNUMCHN(lo));
6524	sbuf_printf(sb, "%u p-structs\n",
6525		   t4_read_reg(sc, A_TP_CMM_MM_MAX_PSTRUCT));
6526
6527	for (i = 0; i < 4; i++) {
6528		if (chip_id(sc) > CHELSIO_T5)
6529			lo = t4_read_reg(sc, A_MPS_RX_MAC_BG_PG_CNT0 + i * 4);
6530		else
6531			lo = t4_read_reg(sc, A_MPS_RX_PG_RSV0 + i * 4);
6532		if (is_t5(sc)) {
6533			used = G_T5_USED(lo);
6534			alloc = G_T5_ALLOC(lo);
6535		} else {
6536			used = G_USED(lo);
6537			alloc = G_ALLOC(lo);
6538		}
6539		/* For T6 these are MAC buffer groups */
6540		sbuf_printf(sb, "\nPort %d using %u pages out of %u allocated",
6541		    i, used, alloc);
6542	}
6543	for (i = 0; i < sc->chip_params->nchan; i++) {
6544		if (chip_id(sc) > CHELSIO_T5)
6545			lo = t4_read_reg(sc, A_MPS_RX_LPBK_BG_PG_CNT0 + i * 4);
6546		else
6547			lo = t4_read_reg(sc, A_MPS_RX_PG_RSV4 + i * 4);
6548		if (is_t5(sc)) {
6549			used = G_T5_USED(lo);
6550			alloc = G_T5_ALLOC(lo);
6551		} else {
6552			used = G_USED(lo);
6553			alloc = G_ALLOC(lo);
6554		}
6555		/* For T6 these are MAC buffer groups */
6556		sbuf_printf(sb,
6557		    "\nLoopback %d using %u pages out of %u allocated",
6558		    i, used, alloc);
6559	}
6560
6561	rc = sbuf_finish(sb);
6562	sbuf_delete(sb);
6563
6564	return (rc);
6565}
6566
6567static inline void
6568tcamxy2valmask(uint64_t x, uint64_t y, uint8_t *addr, uint64_t *mask)
6569{
6570	*mask = x | y;
6571	y = htobe64(y);
6572	memcpy(addr, (char *)&y + 2, ETHER_ADDR_LEN);
6573}
6574
6575static int
6576sysctl_mps_tcam(SYSCTL_HANDLER_ARGS)
6577{
6578	struct adapter *sc = arg1;
6579	struct sbuf *sb;
6580	int rc, i;
6581
6582	MPASS(chip_id(sc) <= CHELSIO_T5);
6583
6584	rc = sysctl_wire_old_buffer(req, 0);
6585	if (rc != 0)
6586		return (rc);
6587
6588	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
6589	if (sb == NULL)
6590		return (ENOMEM);
6591
6592	sbuf_printf(sb,
6593	    "Idx  Ethernet address     Mask     Vld Ports PF"
6594	    "  VF              Replication             P0 P1 P2 P3  ML");
6595	for (i = 0; i < sc->chip_params->mps_tcam_size; i++) {
6596		uint64_t tcamx, tcamy, mask;
6597		uint32_t cls_lo, cls_hi;
6598		uint8_t addr[ETHER_ADDR_LEN];
6599
6600		tcamy = t4_read_reg64(sc, MPS_CLS_TCAM_Y_L(i));
6601		tcamx = t4_read_reg64(sc, MPS_CLS_TCAM_X_L(i));
6602		if (tcamx & tcamy)
6603			continue;
6604		tcamxy2valmask(tcamx, tcamy, addr, &mask);
6605		cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i));
6606		cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i));
6607		sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x %012jx"
6608			   "  %c   %#x%4u%4d", i, addr[0], addr[1], addr[2],
6609			   addr[3], addr[4], addr[5], (uintmax_t)mask,
6610			   (cls_lo & F_SRAM_VLD) ? 'Y' : 'N',
6611			   G_PORTMAP(cls_hi), G_PF(cls_lo),
6612			   (cls_lo & F_VF_VALID) ? G_VF(cls_lo) : -1);
6613
6614		if (cls_lo & F_REPLICATE) {
6615			struct fw_ldst_cmd ldst_cmd;
6616
6617			memset(&ldst_cmd, 0, sizeof(ldst_cmd));
6618			ldst_cmd.op_to_addrspace =
6619			    htobe32(V_FW_CMD_OP(FW_LDST_CMD) |
6620				F_FW_CMD_REQUEST | F_FW_CMD_READ |
6621				V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS));
6622			ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd));
6623			ldst_cmd.u.mps.rplc.fid_idx =
6624			    htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) |
6625				V_FW_LDST_CMD_IDX(i));
6626
6627			rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
6628			    "t4mps");
6629			if (rc)
6630				break;
6631			rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd,
6632			    sizeof(ldst_cmd), &ldst_cmd);
6633			end_synchronized_op(sc, 0);
6634
6635			if (rc != 0) {
6636				sbuf_printf(sb, "%36d", rc);
6637				rc = 0;
6638			} else {
6639				sbuf_printf(sb, " %08x %08x %08x %08x",
6640				    be32toh(ldst_cmd.u.mps.rplc.rplc127_96),
6641				    be32toh(ldst_cmd.u.mps.rplc.rplc95_64),
6642				    be32toh(ldst_cmd.u.mps.rplc.rplc63_32),
6643				    be32toh(ldst_cmd.u.mps.rplc.rplc31_0));
6644			}
6645		} else
6646			sbuf_printf(sb, "%36s", "");
6647
6648		sbuf_printf(sb, "%4u%3u%3u%3u %#3x", G_SRAM_PRIO0(cls_lo),
6649		    G_SRAM_PRIO1(cls_lo), G_SRAM_PRIO2(cls_lo),
6650		    G_SRAM_PRIO3(cls_lo), (cls_lo >> S_MULTILISTEN0) & 0xf);
6651	}
6652
6653	if (rc)
6654		(void) sbuf_finish(sb);
6655	else
6656		rc = sbuf_finish(sb);
6657	sbuf_delete(sb);
6658
6659	return (rc);
6660}
6661
6662static int
6663sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS)
6664{
6665	struct adapter *sc = arg1;
6666	struct sbuf *sb;
6667	int rc, i;
6668
6669	MPASS(chip_id(sc) > CHELSIO_T5);
6670
6671	rc = sysctl_wire_old_buffer(req, 0);
6672	if (rc != 0)
6673		return (rc);
6674
6675	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
6676	if (sb == NULL)
6677		return (ENOMEM);
6678
6679	sbuf_printf(sb, "Idx  Ethernet address     Mask       VNI   Mask"
6680	    "   IVLAN Vld DIP_Hit   Lookup  Port Vld Ports PF  VF"
6681	    "                           Replication"
6682	    "                                    P0 P1 P2 P3  ML\n");
6683
6684	for (i = 0; i < sc->chip_params->mps_tcam_size; i++) {
6685		uint8_t dip_hit, vlan_vld, lookup_type, port_num;
6686		uint16_t ivlan;
6687		uint64_t tcamx, tcamy, val, mask;
6688		uint32_t cls_lo, cls_hi, ctl, data2, vnix, vniy;
6689		uint8_t addr[ETHER_ADDR_LEN];
6690
6691		ctl = V_CTLREQID(1) | V_CTLCMDTYPE(0) | V_CTLXYBITSEL(0);
6692		if (i < 256)
6693			ctl |= V_CTLTCAMINDEX(i) | V_CTLTCAMSEL(0);
6694		else
6695			ctl |= V_CTLTCAMINDEX(i - 256) | V_CTLTCAMSEL(1);
6696		t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl);
6697		val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1);
6698		tcamy = G_DMACH(val) << 32;
6699		tcamy |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1);
6700		data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1);
6701		lookup_type = G_DATALKPTYPE(data2);
6702		port_num = G_DATAPORTNUM(data2);
6703		if (lookup_type && lookup_type != M_DATALKPTYPE) {
6704			/* Inner header VNI */
6705			vniy = ((data2 & F_DATAVIDH2) << 23) |
6706				       (G_DATAVIDH1(data2) << 16) | G_VIDL(val);
6707			dip_hit = data2 & F_DATADIPHIT;
6708			vlan_vld = 0;
6709		} else {
6710			vniy = 0;
6711			dip_hit = 0;
6712			vlan_vld = data2 & F_DATAVIDH2;
6713			ivlan = G_VIDL(val);
6714		}
6715
6716		ctl |= V_CTLXYBITSEL(1);
6717		t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl);
6718		val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1);
6719		tcamx = G_DMACH(val) << 32;
6720		tcamx |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1);
6721		data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1);
6722		if (lookup_type && lookup_type != M_DATALKPTYPE) {
6723			/* Inner header VNI mask */
6724			vnix = ((data2 & F_DATAVIDH2) << 23) |
6725			       (G_DATAVIDH1(data2) << 16) | G_VIDL(val);
6726		} else
6727			vnix = 0;
6728
6729		if (tcamx & tcamy)
6730			continue;
6731		tcamxy2valmask(tcamx, tcamy, addr, &mask);
6732
6733		cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i));
6734		cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i));
6735
6736		if (lookup_type && lookup_type != M_DATALKPTYPE) {
6737			sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x "
6738			    "%012jx %06x %06x    -    -   %3c"
6739			    "      'I'  %4x   %3c   %#x%4u%4d", i, addr[0],
6740			    addr[1], addr[2], addr[3], addr[4], addr[5],
6741			    (uintmax_t)mask, vniy, vnix, dip_hit ? 'Y' : 'N',
6742			    port_num, cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N',
6743			    G_PORTMAP(cls_hi), G_T6_PF(cls_lo),
6744			    cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1);
6745		} else {
6746			sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x "
6747			    "%012jx    -       -   ", i, addr[0], addr[1],
6748			    addr[2], addr[3], addr[4], addr[5],
6749			    (uintmax_t)mask);
6750
6751			if (vlan_vld)
6752				sbuf_printf(sb, "%4u   Y     ", ivlan);
6753			else
6754				sbuf_printf(sb, "  -    N     ");
6755
6756			sbuf_printf(sb, "-      %3c  %4x   %3c   %#x%4u%4d",
6757			    lookup_type ? 'I' : 'O', port_num,
6758			    cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N',
6759			    G_PORTMAP(cls_hi), G_T6_PF(cls_lo),
6760			    cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1);
6761		}
6762
6763
6764		if (cls_lo & F_T6_REPLICATE) {
6765			struct fw_ldst_cmd ldst_cmd;
6766
6767			memset(&ldst_cmd, 0, sizeof(ldst_cmd));
6768			ldst_cmd.op_to_addrspace =
6769			    htobe32(V_FW_CMD_OP(FW_LDST_CMD) |
6770				F_FW_CMD_REQUEST | F_FW_CMD_READ |
6771				V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS));
6772			ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd));
6773			ldst_cmd.u.mps.rplc.fid_idx =
6774			    htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) |
6775				V_FW_LDST_CMD_IDX(i));
6776
6777			rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
6778			    "t6mps");
6779			if (rc)
6780				break;
6781			rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd,
6782			    sizeof(ldst_cmd), &ldst_cmd);
6783			end_synchronized_op(sc, 0);
6784
6785			if (rc != 0) {
6786				sbuf_printf(sb, "%72d", rc);
6787				rc = 0;
6788			} else {
6789				sbuf_printf(sb, " %08x %08x %08x %08x"
6790				    " %08x %08x %08x %08x",
6791				    be32toh(ldst_cmd.u.mps.rplc.rplc255_224),
6792				    be32toh(ldst_cmd.u.mps.rplc.rplc223_192),
6793				    be32toh(ldst_cmd.u.mps.rplc.rplc191_160),
6794				    be32toh(ldst_cmd.u.mps.rplc.rplc159_128),
6795				    be32toh(ldst_cmd.u.mps.rplc.rplc127_96),
6796				    be32toh(ldst_cmd.u.mps.rplc.rplc95_64),
6797				    be32toh(ldst_cmd.u.mps.rplc.rplc63_32),
6798				    be32toh(ldst_cmd.u.mps.rplc.rplc31_0));
6799			}
6800		} else
6801			sbuf_printf(sb, "%72s", "");
6802
6803		sbuf_printf(sb, "%4u%3u%3u%3u %#x",
6804		    G_T6_SRAM_PRIO0(cls_lo), G_T6_SRAM_PRIO1(cls_lo),
6805		    G_T6_SRAM_PRIO2(cls_lo), G_T6_SRAM_PRIO3(cls_lo),
6806		    (cls_lo >> S_T6_MULTILISTEN0) & 0xf);
6807	}
6808
6809	if (rc)
6810		(void) sbuf_finish(sb);
6811	else
6812		rc = sbuf_finish(sb);
6813	sbuf_delete(sb);
6814
6815	return (rc);
6816}
6817
6818static int
6819sysctl_path_mtus(SYSCTL_HANDLER_ARGS)
6820{
6821	struct adapter *sc = arg1;
6822	struct sbuf *sb;
6823	int rc;
6824	uint16_t mtus[NMTUS];
6825
6826	rc = sysctl_wire_old_buffer(req, 0);
6827	if (rc != 0)
6828		return (rc);
6829
6830	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6831	if (sb == NULL)
6832		return (ENOMEM);
6833
6834	t4_read_mtu_tbl(sc, mtus, NULL);
6835
6836	sbuf_printf(sb, "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u",
6837	    mtus[0], mtus[1], mtus[2], mtus[3], mtus[4], mtus[5], mtus[6],
6838	    mtus[7], mtus[8], mtus[9], mtus[10], mtus[11], mtus[12], mtus[13],
6839	    mtus[14], mtus[15]);
6840
6841	rc = sbuf_finish(sb);
6842	sbuf_delete(sb);
6843
6844	return (rc);
6845}
6846
6847static int
6848sysctl_pm_stats(SYSCTL_HANDLER_ARGS)
6849{
6850	struct adapter *sc = arg1;
6851	struct sbuf *sb;
6852	int rc, i;
6853	uint32_t tx_cnt[MAX_PM_NSTATS], rx_cnt[MAX_PM_NSTATS];
6854	uint64_t tx_cyc[MAX_PM_NSTATS], rx_cyc[MAX_PM_NSTATS];
6855	static const char *tx_stats[MAX_PM_NSTATS] = {
6856		"Read:", "Write bypass:", "Write mem:", "Bypass + mem:",
6857		"Tx FIFO wait", NULL, "Tx latency"
6858	};
6859	static const char *rx_stats[MAX_PM_NSTATS] = {
6860		"Read:", "Write bypass:", "Write mem:", "Flush:",
6861		" Rx FIFO wait", NULL, "Rx latency"
6862	};
6863
6864	rc = sysctl_wire_old_buffer(req, 0);
6865	if (rc != 0)
6866		return (rc);
6867
6868	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6869	if (sb == NULL)
6870		return (ENOMEM);
6871
6872	t4_pmtx_get_stats(sc, tx_cnt, tx_cyc);
6873	t4_pmrx_get_stats(sc, rx_cnt, rx_cyc);
6874
6875	sbuf_printf(sb, "                Tx pcmds             Tx bytes");
6876	for (i = 0; i < 4; i++) {
6877		sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
6878		    tx_cyc[i]);
6879	}
6880
6881	sbuf_printf(sb, "\n                Rx pcmds             Rx bytes");
6882	for (i = 0; i < 4; i++) {
6883		sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
6884		    rx_cyc[i]);
6885	}
6886
6887	if (chip_id(sc) > CHELSIO_T5) {
6888		sbuf_printf(sb,
6889		    "\n              Total wait      Total occupancy");
6890		sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
6891		    tx_cyc[i]);
6892		sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
6893		    rx_cyc[i]);
6894
6895		i += 2;
6896		MPASS(i < nitems(tx_stats));
6897
6898		sbuf_printf(sb,
6899		    "\n                   Reads           Total wait");
6900		sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
6901		    tx_cyc[i]);
6902		sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
6903		    rx_cyc[i]);
6904	}
6905
6906	rc = sbuf_finish(sb);
6907	sbuf_delete(sb);
6908
6909	return (rc);
6910}
6911
6912static int
6913sysctl_rdma_stats(SYSCTL_HANDLER_ARGS)
6914{
6915	struct adapter *sc = arg1;
6916	struct sbuf *sb;
6917	int rc;
6918	struct tp_rdma_stats stats;
6919
6920	rc = sysctl_wire_old_buffer(req, 0);
6921	if (rc != 0)
6922		return (rc);
6923
6924	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6925	if (sb == NULL)
6926		return (ENOMEM);
6927
6928	mtx_lock(&sc->reg_lock);
6929	t4_tp_get_rdma_stats(sc, &stats);
6930	mtx_unlock(&sc->reg_lock);
6931
6932	sbuf_printf(sb, "NoRQEModDefferals: %u\n", stats.rqe_dfr_mod);
6933	sbuf_printf(sb, "NoRQEPktDefferals: %u", stats.rqe_dfr_pkt);
6934
6935	rc = sbuf_finish(sb);
6936	sbuf_delete(sb);
6937
6938	return (rc);
6939}
6940
6941static int
6942sysctl_tcp_stats(SYSCTL_HANDLER_ARGS)
6943{
6944	struct adapter *sc = arg1;
6945	struct sbuf *sb;
6946	int rc;
6947	struct tp_tcp_stats v4, v6;
6948
6949	rc = sysctl_wire_old_buffer(req, 0);
6950	if (rc != 0)
6951		return (rc);
6952
6953	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6954	if (sb == NULL)
6955		return (ENOMEM);
6956
6957	mtx_lock(&sc->reg_lock);
6958	t4_tp_get_tcp_stats(sc, &v4, &v6);
6959	mtx_unlock(&sc->reg_lock);
6960
6961	sbuf_printf(sb,
6962	    "                                IP                 IPv6\n");
6963	sbuf_printf(sb, "OutRsts:      %20u %20u\n",
6964	    v4.tcp_out_rsts, v6.tcp_out_rsts);
6965	sbuf_printf(sb, "InSegs:       %20ju %20ju\n",
6966	    v4.tcp_in_segs, v6.tcp_in_segs);
6967	sbuf_printf(sb, "OutSegs:      %20ju %20ju\n",
6968	    v4.tcp_out_segs, v6.tcp_out_segs);
6969	sbuf_printf(sb, "RetransSegs:  %20ju %20ju",
6970	    v4.tcp_retrans_segs, v6.tcp_retrans_segs);
6971
6972	rc = sbuf_finish(sb);
6973	sbuf_delete(sb);
6974
6975	return (rc);
6976}
6977
6978static int
6979sysctl_tids(SYSCTL_HANDLER_ARGS)
6980{
6981	struct adapter *sc = arg1;
6982	struct sbuf *sb;
6983	int rc;
6984	struct tid_info *t = &sc->tids;
6985
6986	rc = sysctl_wire_old_buffer(req, 0);
6987	if (rc != 0)
6988		return (rc);
6989
6990	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6991	if (sb == NULL)
6992		return (ENOMEM);
6993
6994	if (t->natids) {
6995		sbuf_printf(sb, "ATID range: 0-%u, in use: %u\n", t->natids - 1,
6996		    t->atids_in_use);
6997	}
6998
6999	if (t->ntids) {
7000		if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
7001			uint32_t b = t4_read_reg(sc, A_LE_DB_SERVER_INDEX) / 4;
7002
7003			if (b) {
7004				sbuf_printf(sb, "TID range: 0-%u, %u-%u", b - 1,
7005				    t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4,
7006				    t->ntids - 1);
7007			} else {
7008				sbuf_printf(sb, "TID range: %u-%u",
7009				    t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4,
7010				    t->ntids - 1);
7011			}
7012		} else
7013			sbuf_printf(sb, "TID range: 0-%u", t->ntids - 1);
7014		sbuf_printf(sb, ", in use: %u\n",
7015		    atomic_load_acq_int(&t->tids_in_use));
7016	}
7017
7018	if (t->nstids) {
7019		sbuf_printf(sb, "STID range: %u-%u, in use: %u\n", t->stid_base,
7020		    t->stid_base + t->nstids - 1, t->stids_in_use);
7021	}
7022
7023	if (t->nftids) {
7024		sbuf_printf(sb, "FTID range: %u-%u\n", t->ftid_base,
7025		    t->ftid_base + t->nftids - 1);
7026	}
7027
7028	if (t->netids) {
7029		sbuf_printf(sb, "ETID range: %u-%u\n", t->etid_base,
7030		    t->etid_base + t->netids - 1);
7031	}
7032
7033	sbuf_printf(sb, "HW TID usage: %u IP users, %u IPv6 users",
7034	    t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV4),
7035	    t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV6));
7036
7037	rc = sbuf_finish(sb);
7038	sbuf_delete(sb);
7039
7040	return (rc);
7041}
7042
7043static int
7044sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS)
7045{
7046	struct adapter *sc = arg1;
7047	struct sbuf *sb;
7048	int rc;
7049	struct tp_err_stats stats;
7050
7051	rc = sysctl_wire_old_buffer(req, 0);
7052	if (rc != 0)
7053		return (rc);
7054
7055	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7056	if (sb == NULL)
7057		return (ENOMEM);
7058
7059	mtx_lock(&sc->reg_lock);
7060	t4_tp_get_err_stats(sc, &stats);
7061	mtx_unlock(&sc->reg_lock);
7062
7063	if (sc->chip_params->nchan > 2) {
7064		sbuf_printf(sb, "                 channel 0  channel 1"
7065		    "  channel 2  channel 3\n");
7066		sbuf_printf(sb, "macInErrs:      %10u %10u %10u %10u\n",
7067		    stats.mac_in_errs[0], stats.mac_in_errs[1],
7068		    stats.mac_in_errs[2], stats.mac_in_errs[3]);
7069		sbuf_printf(sb, "hdrInErrs:      %10u %10u %10u %10u\n",
7070		    stats.hdr_in_errs[0], stats.hdr_in_errs[1],
7071		    stats.hdr_in_errs[2], stats.hdr_in_errs[3]);
7072		sbuf_printf(sb, "tcpInErrs:      %10u %10u %10u %10u\n",
7073		    stats.tcp_in_errs[0], stats.tcp_in_errs[1],
7074		    stats.tcp_in_errs[2], stats.tcp_in_errs[3]);
7075		sbuf_printf(sb, "tcp6InErrs:     %10u %10u %10u %10u\n",
7076		    stats.tcp6_in_errs[0], stats.tcp6_in_errs[1],
7077		    stats.tcp6_in_errs[2], stats.tcp6_in_errs[3]);
7078		sbuf_printf(sb, "tnlCongDrops:   %10u %10u %10u %10u\n",
7079		    stats.tnl_cong_drops[0], stats.tnl_cong_drops[1],
7080		    stats.tnl_cong_drops[2], stats.tnl_cong_drops[3]);
7081		sbuf_printf(sb, "tnlTxDrops:     %10u %10u %10u %10u\n",
7082		    stats.tnl_tx_drops[0], stats.tnl_tx_drops[1],
7083		    stats.tnl_tx_drops[2], stats.tnl_tx_drops[3]);
7084		sbuf_printf(sb, "ofldVlanDrops:  %10u %10u %10u %10u\n",
7085		    stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1],
7086		    stats.ofld_vlan_drops[2], stats.ofld_vlan_drops[3]);
7087		sbuf_printf(sb, "ofldChanDrops:  %10u %10u %10u %10u\n\n",
7088		    stats.ofld_chan_drops[0], stats.ofld_chan_drops[1],
7089		    stats.ofld_chan_drops[2], stats.ofld_chan_drops[3]);
7090	} else {
7091		sbuf_printf(sb, "                 channel 0  channel 1\n");
7092		sbuf_printf(sb, "macInErrs:      %10u %10u\n",
7093		    stats.mac_in_errs[0], stats.mac_in_errs[1]);
7094		sbuf_printf(sb, "hdrInErrs:      %10u %10u\n",
7095		    stats.hdr_in_errs[0], stats.hdr_in_errs[1]);
7096		sbuf_printf(sb, "tcpInErrs:      %10u %10u\n",
7097		    stats.tcp_in_errs[0], stats.tcp_in_errs[1]);
7098		sbuf_printf(sb, "tcp6InErrs:     %10u %10u\n",
7099		    stats.tcp6_in_errs[0], stats.tcp6_in_errs[1]);
7100		sbuf_printf(sb, "tnlCongDrops:   %10u %10u\n",
7101		    stats.tnl_cong_drops[0], stats.tnl_cong_drops[1]);
7102		sbuf_printf(sb, "tnlTxDrops:     %10u %10u\n",
7103		    stats.tnl_tx_drops[0], stats.tnl_tx_drops[1]);
7104		sbuf_printf(sb, "ofldVlanDrops:  %10u %10u\n",
7105		    stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1]);
7106		sbuf_printf(sb, "ofldChanDrops:  %10u %10u\n\n",
7107		    stats.ofld_chan_drops[0], stats.ofld_chan_drops[1]);
7108	}
7109
7110	sbuf_printf(sb, "ofldNoNeigh:    %u\nofldCongDefer:  %u",
7111	    stats.ofld_no_neigh, stats.ofld_cong_defer);
7112
7113	rc = sbuf_finish(sb);
7114	sbuf_delete(sb);
7115
7116	return (rc);
7117}
7118
7119static int
7120sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS)
7121{
7122	struct adapter *sc = arg1;
7123	struct tp_params *tpp = &sc->params.tp;
7124	u_int mask;
7125	int rc;
7126
7127	mask = tpp->la_mask >> 16;
7128	rc = sysctl_handle_int(oidp, &mask, 0, req);
7129	if (rc != 0 || req->newptr == NULL)
7130		return (rc);
7131	if (mask > 0xffff)
7132		return (EINVAL);
7133	tpp->la_mask = mask << 16;
7134	t4_set_reg_field(sc, A_TP_DBG_LA_CONFIG, 0xffff0000U, tpp->la_mask);
7135
7136	return (0);
7137}
7138
7139struct field_desc {
7140	const char *name;
7141	u_int start;
7142	u_int width;
7143};
7144
7145static void
7146field_desc_show(struct sbuf *sb, uint64_t v, const struct field_desc *f)
7147{
7148	char buf[32];
7149	int line_size = 0;
7150
7151	while (f->name) {
7152		uint64_t mask = (1ULL << f->width) - 1;
7153		int len = snprintf(buf, sizeof(buf), "%s: %ju", f->name,
7154		    ((uintmax_t)v >> f->start) & mask);
7155
7156		if (line_size + len >= 79) {
7157			line_size = 8;
7158			sbuf_printf(sb, "\n        ");
7159		}
7160		sbuf_printf(sb, "%s ", buf);
7161		line_size += len + 1;
7162		f++;
7163	}
7164	sbuf_printf(sb, "\n");
7165}
7166
7167static const struct field_desc tp_la0[] = {
7168	{ "RcfOpCodeOut", 60, 4 },
7169	{ "State", 56, 4 },
7170	{ "WcfState", 52, 4 },
7171	{ "RcfOpcSrcOut", 50, 2 },
7172	{ "CRxError", 49, 1 },
7173	{ "ERxError", 48, 1 },
7174	{ "SanityFailed", 47, 1 },
7175	{ "SpuriousMsg", 46, 1 },
7176	{ "FlushInputMsg", 45, 1 },
7177	{ "FlushInputCpl", 44, 1 },
7178	{ "RssUpBit", 43, 1 },
7179	{ "RssFilterHit", 42, 1 },
7180	{ "Tid", 32, 10 },
7181	{ "InitTcb", 31, 1 },
7182	{ "LineNumber", 24, 7 },
7183	{ "Emsg", 23, 1 },
7184	{ "EdataOut", 22, 1 },
7185	{ "Cmsg", 21, 1 },
7186	{ "CdataOut", 20, 1 },
7187	{ "EreadPdu", 19, 1 },
7188	{ "CreadPdu", 18, 1 },
7189	{ "TunnelPkt", 17, 1 },
7190	{ "RcfPeerFin", 16, 1 },
7191	{ "RcfReasonOut", 12, 4 },
7192	{ "TxCchannel", 10, 2 },
7193	{ "RcfTxChannel", 8, 2 },
7194	{ "RxEchannel", 6, 2 },
7195	{ "RcfRxChannel", 5, 1 },
7196	{ "RcfDataOutSrdy", 4, 1 },
7197	{ "RxDvld", 3, 1 },
7198	{ "RxOoDvld", 2, 1 },
7199	{ "RxCongestion", 1, 1 },
7200	{ "TxCongestion", 0, 1 },
7201	{ NULL }
7202};
7203
7204static const struct field_desc tp_la1[] = {
7205	{ "CplCmdIn", 56, 8 },
7206	{ "CplCmdOut", 48, 8 },
7207	{ "ESynOut", 47, 1 },
7208	{ "EAckOut", 46, 1 },
7209	{ "EFinOut", 45, 1 },
7210	{ "ERstOut", 44, 1 },
7211	{ "SynIn", 43, 1 },
7212	{ "AckIn", 42, 1 },
7213	{ "FinIn", 41, 1 },
7214	{ "RstIn", 40, 1 },
7215	{ "DataIn", 39, 1 },
7216	{ "DataInVld", 38, 1 },
7217	{ "PadIn", 37, 1 },
7218	{ "RxBufEmpty", 36, 1 },
7219	{ "RxDdp", 35, 1 },
7220	{ "RxFbCongestion", 34, 1 },
7221	{ "TxFbCongestion", 33, 1 },
7222	{ "TxPktSumSrdy", 32, 1 },
7223	{ "RcfUlpType", 28, 4 },
7224	{ "Eread", 27, 1 },
7225	{ "Ebypass", 26, 1 },
7226	{ "Esave", 25, 1 },
7227	{ "Static0", 24, 1 },
7228	{ "Cread", 23, 1 },
7229	{ "Cbypass", 22, 1 },
7230	{ "Csave", 21, 1 },
7231	{ "CPktOut", 20, 1 },
7232	{ "RxPagePoolFull", 18, 2 },
7233	{ "RxLpbkPkt", 17, 1 },
7234	{ "TxLpbkPkt", 16, 1 },
7235	{ "RxVfValid", 15, 1 },
7236	{ "SynLearned", 14, 1 },
7237	{ "SetDelEntry", 13, 1 },
7238	{ "SetInvEntry", 12, 1 },
7239	{ "CpcmdDvld", 11, 1 },
7240	{ "CpcmdSave", 10, 1 },
7241	{ "RxPstructsFull", 8, 2 },
7242	{ "EpcmdDvld", 7, 1 },
7243	{ "EpcmdFlush", 6, 1 },
7244	{ "EpcmdTrimPrefix", 5, 1 },
7245	{ "EpcmdTrimPostfix", 4, 1 },
7246	{ "ERssIp4Pkt", 3, 1 },
7247	{ "ERssIp6Pkt", 2, 1 },
7248	{ "ERssTcpUdpPkt", 1, 1 },
7249	{ "ERssFceFipPkt", 0, 1 },
7250	{ NULL }
7251};
7252
7253static const struct field_desc tp_la2[] = {
7254	{ "CplCmdIn", 56, 8 },
7255	{ "MpsVfVld", 55, 1 },
7256	{ "MpsPf", 52, 3 },
7257	{ "MpsVf", 44, 8 },
7258	{ "SynIn", 43, 1 },
7259	{ "AckIn", 42, 1 },
7260	{ "FinIn", 41, 1 },
7261	{ "RstIn", 40, 1 },
7262	{ "DataIn", 39, 1 },
7263	{ "DataInVld", 38, 1 },
7264	{ "PadIn", 37, 1 },
7265	{ "RxBufEmpty", 36, 1 },
7266	{ "RxDdp", 35, 1 },
7267	{ "RxFbCongestion", 34, 1 },
7268	{ "TxFbCongestion", 33, 1 },
7269	{ "TxPktSumSrdy", 32, 1 },
7270	{ "RcfUlpType", 28, 4 },
7271	{ "Eread", 27, 1 },
7272	{ "Ebypass", 26, 1 },
7273	{ "Esave", 25, 1 },
7274	{ "Static0", 24, 1 },
7275	{ "Cread", 23, 1 },
7276	{ "Cbypass", 22, 1 },
7277	{ "Csave", 21, 1 },
7278	{ "CPktOut", 20, 1 },
7279	{ "RxPagePoolFull", 18, 2 },
7280	{ "RxLpbkPkt", 17, 1 },
7281	{ "TxLpbkPkt", 16, 1 },
7282	{ "RxVfValid", 15, 1 },
7283	{ "SynLearned", 14, 1 },
7284	{ "SetDelEntry", 13, 1 },
7285	{ "SetInvEntry", 12, 1 },
7286	{ "CpcmdDvld", 11, 1 },
7287	{ "CpcmdSave", 10, 1 },
7288	{ "RxPstructsFull", 8, 2 },
7289	{ "EpcmdDvld", 7, 1 },
7290	{ "EpcmdFlush", 6, 1 },
7291	{ "EpcmdTrimPrefix", 5, 1 },
7292	{ "EpcmdTrimPostfix", 4, 1 },
7293	{ "ERssIp4Pkt", 3, 1 },
7294	{ "ERssIp6Pkt", 2, 1 },
7295	{ "ERssTcpUdpPkt", 1, 1 },
7296	{ "ERssFceFipPkt", 0, 1 },
7297	{ NULL }
7298};
7299
7300static void
7301tp_la_show(struct sbuf *sb, uint64_t *p, int idx)
7302{
7303
7304	field_desc_show(sb, *p, tp_la0);
7305}
7306
7307static void
7308tp_la_show2(struct sbuf *sb, uint64_t *p, int idx)
7309{
7310
7311	if (idx)
7312		sbuf_printf(sb, "\n");
7313	field_desc_show(sb, p[0], tp_la0);
7314	if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL)
7315		field_desc_show(sb, p[1], tp_la0);
7316}
7317
7318static void
7319tp_la_show3(struct sbuf *sb, uint64_t *p, int idx)
7320{
7321
7322	if (idx)
7323		sbuf_printf(sb, "\n");
7324	field_desc_show(sb, p[0], tp_la0);
7325	if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL)
7326		field_desc_show(sb, p[1], (p[0] & (1 << 17)) ? tp_la2 : tp_la1);
7327}
7328
7329static int
7330sysctl_tp_la(SYSCTL_HANDLER_ARGS)
7331{
7332	struct adapter *sc = arg1;
7333	struct sbuf *sb;
7334	uint64_t *buf, *p;
7335	int rc;
7336	u_int i, inc;
7337	void (*show_func)(struct sbuf *, uint64_t *, int);
7338
7339	rc = sysctl_wire_old_buffer(req, 0);
7340	if (rc != 0)
7341		return (rc);
7342
7343	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7344	if (sb == NULL)
7345		return (ENOMEM);
7346
7347	buf = malloc(TPLA_SIZE * sizeof(uint64_t), M_CXGBE, M_ZERO | M_WAITOK);
7348
7349	t4_tp_read_la(sc, buf, NULL);
7350	p = buf;
7351
7352	switch (G_DBGLAMODE(t4_read_reg(sc, A_TP_DBG_LA_CONFIG))) {
7353	case 2:
7354		inc = 2;
7355		show_func = tp_la_show2;
7356		break;
7357	case 3:
7358		inc = 2;
7359		show_func = tp_la_show3;
7360		break;
7361	default:
7362		inc = 1;
7363		show_func = tp_la_show;
7364	}
7365
7366	for (i = 0; i < TPLA_SIZE / inc; i++, p += inc)
7367		(*show_func)(sb, p, i);
7368
7369	rc = sbuf_finish(sb);
7370	sbuf_delete(sb);
7371	free(buf, M_CXGBE);
7372	return (rc);
7373}
7374
7375static int
7376sysctl_tx_rate(SYSCTL_HANDLER_ARGS)
7377{
7378	struct adapter *sc = arg1;
7379	struct sbuf *sb;
7380	int rc;
7381	u64 nrate[MAX_NCHAN], orate[MAX_NCHAN];
7382
7383	rc = sysctl_wire_old_buffer(req, 0);
7384	if (rc != 0)
7385		return (rc);
7386
7387	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7388	if (sb == NULL)
7389		return (ENOMEM);
7390
7391	t4_get_chan_txrate(sc, nrate, orate);
7392
7393	if (sc->chip_params->nchan > 2) {
7394		sbuf_printf(sb, "              channel 0   channel 1"
7395		    "   channel 2   channel 3\n");
7396		sbuf_printf(sb, "NIC B/s:     %10ju  %10ju  %10ju  %10ju\n",
7397		    nrate[0], nrate[1], nrate[2], nrate[3]);
7398		sbuf_printf(sb, "Offload B/s: %10ju  %10ju  %10ju  %10ju",
7399		    orate[0], orate[1], orate[2], orate[3]);
7400	} else {
7401		sbuf_printf(sb, "              channel 0   channel 1\n");
7402		sbuf_printf(sb, "NIC B/s:     %10ju  %10ju\n",
7403		    nrate[0], nrate[1]);
7404		sbuf_printf(sb, "Offload B/s: %10ju  %10ju",
7405		    orate[0], orate[1]);
7406	}
7407
7408	rc = sbuf_finish(sb);
7409	sbuf_delete(sb);
7410
7411	return (rc);
7412}
7413
7414static int
7415sysctl_ulprx_la(SYSCTL_HANDLER_ARGS)
7416{
7417	struct adapter *sc = arg1;
7418	struct sbuf *sb;
7419	uint32_t *buf, *p;
7420	int rc, i;
7421
7422	rc = sysctl_wire_old_buffer(req, 0);
7423	if (rc != 0)
7424		return (rc);
7425
7426	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7427	if (sb == NULL)
7428		return (ENOMEM);
7429
7430	buf = malloc(ULPRX_LA_SIZE * 8 * sizeof(uint32_t), M_CXGBE,
7431	    M_ZERO | M_WAITOK);
7432
7433	t4_ulprx_read_la(sc, buf);
7434	p = buf;
7435
7436	sbuf_printf(sb, "      Pcmd        Type   Message"
7437	    "                Data");
7438	for (i = 0; i < ULPRX_LA_SIZE; i++, p += 8) {
7439		sbuf_printf(sb, "\n%08x%08x  %4x  %08x  %08x%08x%08x%08x",
7440		    p[1], p[0], p[2], p[3], p[7], p[6], p[5], p[4]);
7441	}
7442
7443	rc = sbuf_finish(sb);
7444	sbuf_delete(sb);
7445	free(buf, M_CXGBE);
7446	return (rc);
7447}
7448
7449static int
7450sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS)
7451{
7452	struct adapter *sc = arg1;
7453	struct sbuf *sb;
7454	int rc, v;
7455
7456	rc = sysctl_wire_old_buffer(req, 0);
7457	if (rc != 0)
7458		return (rc);
7459
7460	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7461	if (sb == NULL)
7462		return (ENOMEM);
7463
7464	v = t4_read_reg(sc, A_SGE_STAT_CFG);
7465	if (G_STATSOURCE_T5(v) == 7) {
7466		if (G_STATMODE(v) == 0) {
7467			sbuf_printf(sb, "total %d, incomplete %d",
7468			    t4_read_reg(sc, A_SGE_STAT_TOTAL),
7469			    t4_read_reg(sc, A_SGE_STAT_MATCH));
7470		} else if (G_STATMODE(v) == 1) {
7471			sbuf_printf(sb, "total %d, data overflow %d",
7472			    t4_read_reg(sc, A_SGE_STAT_TOTAL),
7473			    t4_read_reg(sc, A_SGE_STAT_MATCH));
7474		}
7475	}
7476	rc = sbuf_finish(sb);
7477	sbuf_delete(sb);
7478
7479	return (rc);
7480}
7481
7482static int
7483sysctl_tc_params(SYSCTL_HANDLER_ARGS)
7484{
7485	struct adapter *sc = arg1;
7486	struct tx_sched_class *tc;
7487	struct t4_sched_class_params p;
7488	struct sbuf *sb;
7489	int i, rc, port_id, flags, mbps, gbps;
7490
7491	rc = sysctl_wire_old_buffer(req, 0);
7492	if (rc != 0)
7493		return (rc);
7494
7495	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7496	if (sb == NULL)
7497		return (ENOMEM);
7498
7499	port_id = arg2 >> 16;
7500	MPASS(port_id < sc->params.nports);
7501	MPASS(sc->port[port_id] != NULL);
7502	i = arg2 & 0xffff;
7503	MPASS(i < sc->chip_params->nsched_cls);
7504	tc = &sc->port[port_id]->tc[i];
7505
7506	rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK,
7507	    "t4tc_p");
7508	if (rc)
7509		goto done;
7510	flags = tc->flags;
7511	p = tc->params;
7512	end_synchronized_op(sc, LOCK_HELD);
7513
7514	if ((flags & TX_SC_OK) == 0) {
7515		sbuf_printf(sb, "none");
7516		goto done;
7517	}
7518
7519	if (p.level == SCHED_CLASS_LEVEL_CL_WRR) {
7520		sbuf_printf(sb, "cl-wrr weight %u", p.weight);
7521		goto done;
7522	} else if (p.level == SCHED_CLASS_LEVEL_CL_RL)
7523		sbuf_printf(sb, "cl-rl");
7524	else if (p.level == SCHED_CLASS_LEVEL_CH_RL)
7525		sbuf_printf(sb, "ch-rl");
7526	else {
7527		rc = ENXIO;
7528		goto done;
7529	}
7530
7531	if (p.ratemode == SCHED_CLASS_RATEMODE_REL) {
7532		/* XXX: top speed or actual link speed? */
7533		gbps = port_top_speed(sc->port[port_id]);
7534		sbuf_printf(sb, " %u%% of %uGbps", p.maxrate, gbps);
7535	}
7536	else if (p.ratemode == SCHED_CLASS_RATEMODE_ABS) {
7537		switch (p.rateunit) {
7538		case SCHED_CLASS_RATEUNIT_BITS:
7539			mbps = p.maxrate / 1000;
7540			gbps = p.maxrate / 1000000;
7541			if (p.maxrate == gbps * 1000000)
7542				sbuf_printf(sb, " %uGbps", gbps);
7543			else if (p.maxrate == mbps * 1000)
7544				sbuf_printf(sb, " %uMbps", mbps);
7545			else
7546				sbuf_printf(sb, " %uKbps", p.maxrate);
7547			break;
7548		case SCHED_CLASS_RATEUNIT_PKTS:
7549			sbuf_printf(sb, " %upps", p.maxrate);
7550			break;
7551		default:
7552			rc = ENXIO;
7553			goto done;
7554		}
7555	}
7556
7557	switch (p.mode) {
7558	case SCHED_CLASS_MODE_CLASS:
7559		sbuf_printf(sb, " aggregate");
7560		break;
7561	case SCHED_CLASS_MODE_FLOW:
7562		sbuf_printf(sb, " per-flow");
7563		break;
7564	default:
7565		rc = ENXIO;
7566		goto done;
7567	}
7568
7569done:
7570	if (rc == 0)
7571		rc = sbuf_finish(sb);
7572	sbuf_delete(sb);
7573
7574	return (rc);
7575}
7576#endif
7577
7578#ifdef TCP_OFFLOAD
7579static void
7580unit_conv(char *buf, size_t len, u_int val, u_int factor)
7581{
7582	u_int rem = val % factor;
7583
7584	if (rem == 0)
7585		snprintf(buf, len, "%u", val / factor);
7586	else {
7587		while (rem % 10 == 0)
7588			rem /= 10;
7589		snprintf(buf, len, "%u.%u", val / factor, rem);
7590	}
7591}
7592
7593static int
7594sysctl_tp_tick(SYSCTL_HANDLER_ARGS)
7595{
7596	struct adapter *sc = arg1;
7597	char buf[16];
7598	u_int res, re;
7599	u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
7600
7601	res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION);
7602	switch (arg2) {
7603	case 0:
7604		/* timer_tick */
7605		re = G_TIMERRESOLUTION(res);
7606		break;
7607	case 1:
7608		/* TCP timestamp tick */
7609		re = G_TIMESTAMPRESOLUTION(res);
7610		break;
7611	case 2:
7612		/* DACK tick */
7613		re = G_DELAYEDACKRESOLUTION(res);
7614		break;
7615	default:
7616		return (EDOOFUS);
7617	}
7618
7619	unit_conv(buf, sizeof(buf), (cclk_ps << re), 1000000);
7620
7621	return (sysctl_handle_string(oidp, buf, sizeof(buf), req));
7622}
7623
7624static int
7625sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS)
7626{
7627	struct adapter *sc = arg1;
7628	u_int res, dack_re, v;
7629	u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
7630
7631	res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION);
7632	dack_re = G_DELAYEDACKRESOLUTION(res);
7633	v = ((cclk_ps << dack_re) / 1000000) * t4_read_reg(sc, A_TP_DACK_TIMER);
7634
7635	return (sysctl_handle_int(oidp, &v, 0, req));
7636}
7637
7638static int
7639sysctl_tp_timer(SYSCTL_HANDLER_ARGS)
7640{
7641	struct adapter *sc = arg1;
7642	int reg = arg2;
7643	u_int tre;
7644	u_long tp_tick_us, v;
7645	u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
7646
7647	MPASS(reg == A_TP_RXT_MIN || reg == A_TP_RXT_MAX ||
7648	    reg == A_TP_PERS_MIN || reg == A_TP_PERS_MAX ||
7649	    reg == A_TP_KEEP_IDLE || A_TP_KEEP_INTVL || reg == A_TP_INIT_SRTT ||
7650	    reg == A_TP_FINWAIT2_TIMER);
7651
7652	tre = G_TIMERRESOLUTION(t4_read_reg(sc, A_TP_TIMER_RESOLUTION));
7653	tp_tick_us = (cclk_ps << tre) / 1000000;
7654
7655	if (reg == A_TP_INIT_SRTT)
7656		v = tp_tick_us * G_INITSRTT(t4_read_reg(sc, reg));
7657	else
7658		v = tp_tick_us * t4_read_reg(sc, reg);
7659
7660	return (sysctl_handle_long(oidp, &v, 0, req));
7661}
7662#endif
7663
7664static uint32_t
7665fconf_iconf_to_mode(uint32_t fconf, uint32_t iconf)
7666{
7667	uint32_t mode;
7668
7669	mode = T4_FILTER_IPv4 | T4_FILTER_IPv6 | T4_FILTER_IP_SADDR |
7670	    T4_FILTER_IP_DADDR | T4_FILTER_IP_SPORT | T4_FILTER_IP_DPORT;
7671
7672	if (fconf & F_FRAGMENTATION)
7673		mode |= T4_FILTER_IP_FRAGMENT;
7674
7675	if (fconf & F_MPSHITTYPE)
7676		mode |= T4_FILTER_MPS_HIT_TYPE;
7677
7678	if (fconf & F_MACMATCH)
7679		mode |= T4_FILTER_MAC_IDX;
7680
7681	if (fconf & F_ETHERTYPE)
7682		mode |= T4_FILTER_ETH_TYPE;
7683
7684	if (fconf & F_PROTOCOL)
7685		mode |= T4_FILTER_IP_PROTO;
7686
7687	if (fconf & F_TOS)
7688		mode |= T4_FILTER_IP_TOS;
7689
7690	if (fconf & F_VLAN)
7691		mode |= T4_FILTER_VLAN;
7692
7693	if (fconf & F_VNIC_ID) {
7694		mode |= T4_FILTER_VNIC;
7695		if (iconf & F_VNIC)
7696			mode |= T4_FILTER_IC_VNIC;
7697	}
7698
7699	if (fconf & F_PORT)
7700		mode |= T4_FILTER_PORT;
7701
7702	if (fconf & F_FCOE)
7703		mode |= T4_FILTER_FCoE;
7704
7705	return (mode);
7706}
7707
7708static uint32_t
7709mode_to_fconf(uint32_t mode)
7710{
7711	uint32_t fconf = 0;
7712
7713	if (mode & T4_FILTER_IP_FRAGMENT)
7714		fconf |= F_FRAGMENTATION;
7715
7716	if (mode & T4_FILTER_MPS_HIT_TYPE)
7717		fconf |= F_MPSHITTYPE;
7718
7719	if (mode & T4_FILTER_MAC_IDX)
7720		fconf |= F_MACMATCH;
7721
7722	if (mode & T4_FILTER_ETH_TYPE)
7723		fconf |= F_ETHERTYPE;
7724
7725	if (mode & T4_FILTER_IP_PROTO)
7726		fconf |= F_PROTOCOL;
7727
7728	if (mode & T4_FILTER_IP_TOS)
7729		fconf |= F_TOS;
7730
7731	if (mode & T4_FILTER_VLAN)
7732		fconf |= F_VLAN;
7733
7734	if (mode & T4_FILTER_VNIC)
7735		fconf |= F_VNIC_ID;
7736
7737	if (mode & T4_FILTER_PORT)
7738		fconf |= F_PORT;
7739
7740	if (mode & T4_FILTER_FCoE)
7741		fconf |= F_FCOE;
7742
7743	return (fconf);
7744}
7745
7746static uint32_t
7747mode_to_iconf(uint32_t mode)
7748{
7749
7750	if (mode & T4_FILTER_IC_VNIC)
7751		return (F_VNIC);
7752	return (0);
7753}
7754
7755static int check_fspec_against_fconf_iconf(struct adapter *sc,
7756    struct t4_filter_specification *fs)
7757{
7758	struct tp_params *tpp = &sc->params.tp;
7759	uint32_t fconf = 0;
7760
7761	if (fs->val.frag || fs->mask.frag)
7762		fconf |= F_FRAGMENTATION;
7763
7764	if (fs->val.matchtype || fs->mask.matchtype)
7765		fconf |= F_MPSHITTYPE;
7766
7767	if (fs->val.macidx || fs->mask.macidx)
7768		fconf |= F_MACMATCH;
7769
7770	if (fs->val.ethtype || fs->mask.ethtype)
7771		fconf |= F_ETHERTYPE;
7772
7773	if (fs->val.proto || fs->mask.proto)
7774		fconf |= F_PROTOCOL;
7775
7776	if (fs->val.tos || fs->mask.tos)
7777		fconf |= F_TOS;
7778
7779	if (fs->val.vlan_vld || fs->mask.vlan_vld)
7780		fconf |= F_VLAN;
7781
7782	if (fs->val.ovlan_vld || fs->mask.ovlan_vld) {
7783		fconf |= F_VNIC_ID;
7784		if (tpp->ingress_config & F_VNIC)
7785			return (EINVAL);
7786	}
7787
7788	if (fs->val.pfvf_vld || fs->mask.pfvf_vld) {
7789		fconf |= F_VNIC_ID;
7790		if ((tpp->ingress_config & F_VNIC) == 0)
7791			return (EINVAL);
7792	}
7793
7794	if (fs->val.iport || fs->mask.iport)
7795		fconf |= F_PORT;
7796
7797	if (fs->val.fcoe || fs->mask.fcoe)
7798		fconf |= F_FCOE;
7799
7800	if ((tpp->vlan_pri_map | fconf) != tpp->vlan_pri_map)
7801		return (E2BIG);
7802
7803	return (0);
7804}
7805
7806static int
7807get_filter_mode(struct adapter *sc, uint32_t *mode)
7808{
7809	struct tp_params *tpp = &sc->params.tp;
7810
7811	/*
7812	 * We trust the cached values of the relevant TP registers.  This means
7813	 * things work reliably only if writes to those registers are always via
7814	 * t4_set_filter_mode.
7815	 */
7816	*mode = fconf_iconf_to_mode(tpp->vlan_pri_map, tpp->ingress_config);
7817
7818	return (0);
7819}
7820
7821static int
7822set_filter_mode(struct adapter *sc, uint32_t mode)
7823{
7824	struct tp_params *tpp = &sc->params.tp;
7825	uint32_t fconf, iconf;
7826	int rc;
7827
7828	iconf = mode_to_iconf(mode);
7829	if ((iconf ^ tpp->ingress_config) & F_VNIC) {
7830		/*
7831		 * For now we just complain if A_TP_INGRESS_CONFIG is not
7832		 * already set to the correct value for the requested filter
7833		 * mode.  It's not clear if it's safe to write to this register
7834		 * on the fly.  (And we trust the cached value of the register).
7835		 */
7836		return (EBUSY);
7837	}
7838
7839	fconf = mode_to_fconf(mode);
7840
7841	rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK,
7842	    "t4setfm");
7843	if (rc)
7844		return (rc);
7845
7846	if (sc->tids.ftids_in_use > 0) {
7847		rc = EBUSY;
7848		goto done;
7849	}
7850
7851#ifdef TCP_OFFLOAD
7852	if (uld_active(sc, ULD_TOM)) {
7853		rc = EBUSY;
7854		goto done;
7855	}
7856#endif
7857
7858	rc = -t4_set_filter_mode(sc, fconf);
7859done:
7860	end_synchronized_op(sc, LOCK_HELD);
7861	return (rc);
7862}
7863
7864static inline uint64_t
7865get_filter_hits(struct adapter *sc, uint32_t fid)
7866{
7867	uint32_t tcb_addr;
7868
7869	tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE) +
7870	    (fid + sc->tids.ftid_base) * TCB_SIZE;
7871
7872	if (is_t4(sc)) {
7873		uint64_t hits;
7874
7875		read_via_memwin(sc, 0, tcb_addr + 16, (uint32_t *)&hits, 8);
7876		return (be64toh(hits));
7877	} else {
7878		uint32_t hits;
7879
7880		read_via_memwin(sc, 0, tcb_addr + 24, &hits, 4);
7881		return (be32toh(hits));
7882	}
7883}
7884
7885static int
7886get_filter(struct adapter *sc, struct t4_filter *t)
7887{
7888	int i, rc, nfilters = sc->tids.nftids;
7889	struct filter_entry *f;
7890
7891	rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK,
7892	    "t4getf");
7893	if (rc)
7894		return (rc);
7895
7896	if (sc->tids.ftids_in_use == 0 || sc->tids.ftid_tab == NULL ||
7897	    t->idx >= nfilters) {
7898		t->idx = 0xffffffff;
7899		goto done;
7900	}
7901
7902	f = &sc->tids.ftid_tab[t->idx];
7903	for (i = t->idx; i < nfilters; i++, f++) {
7904		if (f->valid) {
7905			t->idx = i;
7906			t->l2tidx = f->l2t ? f->l2t->idx : 0;
7907			t->smtidx = f->smtidx;
7908			if (f->fs.hitcnts)
7909				t->hits = get_filter_hits(sc, t->idx);
7910			else
7911				t->hits = UINT64_MAX;
7912			t->fs = f->fs;
7913
7914			goto done;
7915		}
7916	}
7917
7918	t->idx = 0xffffffff;
7919done:
7920	end_synchronized_op(sc, LOCK_HELD);
7921	return (0);
7922}
7923
7924static int
7925set_filter(struct adapter *sc, struct t4_filter *t)
7926{
7927	unsigned int nfilters, nports;
7928	struct filter_entry *f;
7929	int i, rc;
7930
7931	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setf");
7932	if (rc)
7933		return (rc);
7934
7935	nfilters = sc->tids.nftids;
7936	nports = sc->params.nports;
7937
7938	if (nfilters == 0) {
7939		rc = ENOTSUP;
7940		goto done;
7941	}
7942
7943	if (!(sc->flags & FULL_INIT_DONE)) {
7944		rc = EAGAIN;
7945		goto done;
7946	}
7947
7948	if (t->idx >= nfilters) {
7949		rc = EINVAL;
7950		goto done;
7951	}
7952
7953	/* Validate against the global filter mode and ingress config */
7954	rc = check_fspec_against_fconf_iconf(sc, &t->fs);
7955	if (rc != 0)
7956		goto done;
7957
7958	if (t->fs.action == FILTER_SWITCH && t->fs.eport >= nports) {
7959		rc = EINVAL;
7960		goto done;
7961	}
7962
7963	if (t->fs.val.iport >= nports) {
7964		rc = EINVAL;
7965		goto done;
7966	}
7967
7968	/* Can't specify an iq if not steering to it */
7969	if (!t->fs.dirsteer && t->fs.iq) {
7970		rc = EINVAL;
7971		goto done;
7972	}
7973
7974	/* IPv6 filter idx must be 4 aligned */
7975	if (t->fs.type == 1 &&
7976	    ((t->idx & 0x3) || t->idx + 4 >= nfilters)) {
7977		rc = EINVAL;
7978		goto done;
7979	}
7980
7981	if (sc->tids.ftid_tab == NULL) {
7982		KASSERT(sc->tids.ftids_in_use == 0,
7983		    ("%s: no memory allocated but filters_in_use > 0",
7984		    __func__));
7985
7986		sc->tids.ftid_tab = malloc(sizeof (struct filter_entry) *
7987		    nfilters, M_CXGBE, M_NOWAIT | M_ZERO);
7988		if (sc->tids.ftid_tab == NULL) {
7989			rc = ENOMEM;
7990			goto done;
7991		}
7992		mtx_init(&sc->tids.ftid_lock, "T4 filters", 0, MTX_DEF);
7993	}
7994
7995	for (i = 0; i < 4; i++) {
7996		f = &sc->tids.ftid_tab[t->idx + i];
7997
7998		if (f->pending || f->valid) {
7999			rc = EBUSY;
8000			goto done;
8001		}
8002		if (f->locked) {
8003			rc = EPERM;
8004			goto done;
8005		}
8006
8007		if (t->fs.type == 0)
8008			break;
8009	}
8010
8011	f = &sc->tids.ftid_tab[t->idx];
8012	f->fs = t->fs;
8013
8014	rc = set_filter_wr(sc, t->idx);
8015done:
8016	end_synchronized_op(sc, 0);
8017
8018	if (rc == 0) {
8019		mtx_lock(&sc->tids.ftid_lock);
8020		for (;;) {
8021			if (f->pending == 0) {
8022				rc = f->valid ? 0 : EIO;
8023				break;
8024			}
8025
8026			if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock,
8027			    PCATCH, "t4setfw", 0)) {
8028				rc = EINPROGRESS;
8029				break;
8030			}
8031		}
8032		mtx_unlock(&sc->tids.ftid_lock);
8033	}
8034	return (rc);
8035}
8036
8037static int
8038del_filter(struct adapter *sc, struct t4_filter *t)
8039{
8040	unsigned int nfilters;
8041	struct filter_entry *f;
8042	int rc;
8043
8044	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4delf");
8045	if (rc)
8046		return (rc);
8047
8048	nfilters = sc->tids.nftids;
8049
8050	if (nfilters == 0) {
8051		rc = ENOTSUP;
8052		goto done;
8053	}
8054
8055	if (sc->tids.ftid_tab == NULL || sc->tids.ftids_in_use == 0 ||
8056	    t->idx >= nfilters) {
8057		rc = EINVAL;
8058		goto done;
8059	}
8060
8061	if (!(sc->flags & FULL_INIT_DONE)) {
8062		rc = EAGAIN;
8063		goto done;
8064	}
8065
8066	f = &sc->tids.ftid_tab[t->idx];
8067
8068	if (f->pending) {
8069		rc = EBUSY;
8070		goto done;
8071	}
8072	if (f->locked) {
8073		rc = EPERM;
8074		goto done;
8075	}
8076
8077	if (f->valid) {
8078		t->fs = f->fs;	/* extra info for the caller */
8079		rc = del_filter_wr(sc, t->idx);
8080	}
8081
8082done:
8083	end_synchronized_op(sc, 0);
8084
8085	if (rc == 0) {
8086		mtx_lock(&sc->tids.ftid_lock);
8087		for (;;) {
8088			if (f->pending == 0) {
8089				rc = f->valid ? EIO : 0;
8090				break;
8091			}
8092
8093			if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock,
8094			    PCATCH, "t4delfw", 0)) {
8095				rc = EINPROGRESS;
8096				break;
8097			}
8098		}
8099		mtx_unlock(&sc->tids.ftid_lock);
8100	}
8101
8102	return (rc);
8103}
8104
8105static void
8106clear_filter(struct filter_entry *f)
8107{
8108	if (f->l2t)
8109		t4_l2t_release(f->l2t);
8110
8111	bzero(f, sizeof (*f));
8112}
8113
8114static int
8115set_filter_wr(struct adapter *sc, int fidx)
8116{
8117	struct filter_entry *f = &sc->tids.ftid_tab[fidx];
8118	struct fw_filter_wr *fwr;
8119	unsigned int ftid, vnic_vld, vnic_vld_mask;
8120	struct wrq_cookie cookie;
8121
8122	ASSERT_SYNCHRONIZED_OP(sc);
8123
8124	if (f->fs.newdmac || f->fs.newvlan) {
8125		/* This filter needs an L2T entry; allocate one. */
8126		f->l2t = t4_l2t_alloc_switching(sc->l2t);
8127		if (f->l2t == NULL)
8128			return (EAGAIN);
8129		if (t4_l2t_set_switching(sc, f->l2t, f->fs.vlan, f->fs.eport,
8130		    f->fs.dmac)) {
8131			t4_l2t_release(f->l2t);
8132			f->l2t = NULL;
8133			return (ENOMEM);
8134		}
8135	}
8136
8137	/* Already validated against fconf, iconf */
8138	MPASS((f->fs.val.pfvf_vld & f->fs.val.ovlan_vld) == 0);
8139	MPASS((f->fs.mask.pfvf_vld & f->fs.mask.ovlan_vld) == 0);
8140	if (f->fs.val.pfvf_vld || f->fs.val.ovlan_vld)
8141		vnic_vld = 1;
8142	else
8143		vnic_vld = 0;
8144	if (f->fs.mask.pfvf_vld || f->fs.mask.ovlan_vld)
8145		vnic_vld_mask = 1;
8146	else
8147		vnic_vld_mask = 0;
8148
8149	ftid = sc->tids.ftid_base + fidx;
8150
8151	fwr = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*fwr), 16), &cookie);
8152	if (fwr == NULL)
8153		return (ENOMEM);
8154	bzero(fwr, sizeof(*fwr));
8155
8156	fwr->op_pkd = htobe32(V_FW_WR_OP(FW_FILTER_WR));
8157	fwr->len16_pkd = htobe32(FW_LEN16(*fwr));
8158	fwr->tid_to_iq =
8159	    htobe32(V_FW_FILTER_WR_TID(ftid) |
8160		V_FW_FILTER_WR_RQTYPE(f->fs.type) |
8161		V_FW_FILTER_WR_NOREPLY(0) |
8162		V_FW_FILTER_WR_IQ(f->fs.iq));
8163	fwr->del_filter_to_l2tix =
8164	    htobe32(V_FW_FILTER_WR_RPTTID(f->fs.rpttid) |
8165		V_FW_FILTER_WR_DROP(f->fs.action == FILTER_DROP) |
8166		V_FW_FILTER_WR_DIRSTEER(f->fs.dirsteer) |
8167		V_FW_FILTER_WR_MASKHASH(f->fs.maskhash) |
8168		V_FW_FILTER_WR_DIRSTEERHASH(f->fs.dirsteerhash) |
8169		V_FW_FILTER_WR_LPBK(f->fs.action == FILTER_SWITCH) |
8170		V_FW_FILTER_WR_DMAC(f->fs.newdmac) |
8171		V_FW_FILTER_WR_SMAC(f->fs.newsmac) |
8172		V_FW_FILTER_WR_INSVLAN(f->fs.newvlan == VLAN_INSERT ||
8173		    f->fs.newvlan == VLAN_REWRITE) |
8174		V_FW_FILTER_WR_RMVLAN(f->fs.newvlan == VLAN_REMOVE ||
8175		    f->fs.newvlan == VLAN_REWRITE) |
8176		V_FW_FILTER_WR_HITCNTS(f->fs.hitcnts) |
8177		V_FW_FILTER_WR_TXCHAN(f->fs.eport) |
8178		V_FW_FILTER_WR_PRIO(f->fs.prio) |
8179		V_FW_FILTER_WR_L2TIX(f->l2t ? f->l2t->idx : 0));
8180	fwr->ethtype = htobe16(f->fs.val.ethtype);
8181	fwr->ethtypem = htobe16(f->fs.mask.ethtype);
8182	fwr->frag_to_ovlan_vldm =
8183	    (V_FW_FILTER_WR_FRAG(f->fs.val.frag) |
8184		V_FW_FILTER_WR_FRAGM(f->fs.mask.frag) |
8185		V_FW_FILTER_WR_IVLAN_VLD(f->fs.val.vlan_vld) |
8186		V_FW_FILTER_WR_OVLAN_VLD(vnic_vld) |
8187		V_FW_FILTER_WR_IVLAN_VLDM(f->fs.mask.vlan_vld) |
8188		V_FW_FILTER_WR_OVLAN_VLDM(vnic_vld_mask));
8189	fwr->smac_sel = 0;
8190	fwr->rx_chan_rx_rpl_iq = htobe16(V_FW_FILTER_WR_RX_CHAN(0) |
8191	    V_FW_FILTER_WR_RX_RPL_IQ(sc->sge.fwq.abs_id));
8192	fwr->maci_to_matchtypem =
8193	    htobe32(V_FW_FILTER_WR_MACI(f->fs.val.macidx) |
8194		V_FW_FILTER_WR_MACIM(f->fs.mask.macidx) |
8195		V_FW_FILTER_WR_FCOE(f->fs.val.fcoe) |
8196		V_FW_FILTER_WR_FCOEM(f->fs.mask.fcoe) |
8197		V_FW_FILTER_WR_PORT(f->fs.val.iport) |
8198		V_FW_FILTER_WR_PORTM(f->fs.mask.iport) |
8199		V_FW_FILTER_WR_MATCHTYPE(f->fs.val.matchtype) |
8200		V_FW_FILTER_WR_MATCHTYPEM(f->fs.mask.matchtype));
8201	fwr->ptcl = f->fs.val.proto;
8202	fwr->ptclm = f->fs.mask.proto;
8203	fwr->ttyp = f->fs.val.tos;
8204	fwr->ttypm = f->fs.mask.tos;
8205	fwr->ivlan = htobe16(f->fs.val.vlan);
8206	fwr->ivlanm = htobe16(f->fs.mask.vlan);
8207	fwr->ovlan = htobe16(f->fs.val.vnic);
8208	fwr->ovlanm = htobe16(f->fs.mask.vnic);
8209	bcopy(f->fs.val.dip, fwr->lip, sizeof (fwr->lip));
8210	bcopy(f->fs.mask.dip, fwr->lipm, sizeof (fwr->lipm));
8211	bcopy(f->fs.val.sip, fwr->fip, sizeof (fwr->fip));
8212	bcopy(f->fs.mask.sip, fwr->fipm, sizeof (fwr->fipm));
8213	fwr->lp = htobe16(f->fs.val.dport);
8214	fwr->lpm = htobe16(f->fs.mask.dport);
8215	fwr->fp = htobe16(f->fs.val.sport);
8216	fwr->fpm = htobe16(f->fs.mask.sport);
8217	if (f->fs.newsmac)
8218		bcopy(f->fs.smac, fwr->sma, sizeof (fwr->sma));
8219
8220	f->pending = 1;
8221	sc->tids.ftids_in_use++;
8222
8223	commit_wrq_wr(&sc->sge.mgmtq, fwr, &cookie);
8224	return (0);
8225}
8226
8227static int
8228del_filter_wr(struct adapter *sc, int fidx)
8229{
8230	struct filter_entry *f = &sc->tids.ftid_tab[fidx];
8231	struct fw_filter_wr *fwr;
8232	unsigned int ftid;
8233	struct wrq_cookie cookie;
8234
8235	ftid = sc->tids.ftid_base + fidx;
8236
8237	fwr = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*fwr), 16), &cookie);
8238	if (fwr == NULL)
8239		return (ENOMEM);
8240	bzero(fwr, sizeof (*fwr));
8241
8242	t4_mk_filtdelwr(ftid, fwr, sc->sge.fwq.abs_id);
8243
8244	f->pending = 1;
8245	commit_wrq_wr(&sc->sge.mgmtq, fwr, &cookie);
8246	return (0);
8247}
8248
8249int
8250t4_filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
8251{
8252	struct adapter *sc = iq->adapter;
8253	const struct cpl_set_tcb_rpl *rpl = (const void *)(rss + 1);
8254	unsigned int idx = GET_TID(rpl);
8255	unsigned int rc;
8256	struct filter_entry *f;
8257
8258	KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
8259	    rss->opcode));
8260	MPASS(iq == &sc->sge.fwq);
8261	MPASS(is_ftid(sc, idx));
8262
8263	idx -= sc->tids.ftid_base;
8264	f = &sc->tids.ftid_tab[idx];
8265	rc = G_COOKIE(rpl->cookie);
8266
8267	mtx_lock(&sc->tids.ftid_lock);
8268	if (rc == FW_FILTER_WR_FLT_ADDED) {
8269		KASSERT(f->pending, ("%s: filter[%u] isn't pending.",
8270		    __func__, idx));
8271		f->smtidx = (be64toh(rpl->oldval) >> 24) & 0xff;
8272		f->pending = 0;  /* asynchronous setup completed */
8273		f->valid = 1;
8274	} else {
8275		if (rc != FW_FILTER_WR_FLT_DELETED) {
8276			/* Add or delete failed, display an error */
8277			log(LOG_ERR,
8278			    "filter %u setup failed with error %u\n",
8279			    idx, rc);
8280		}
8281
8282		clear_filter(f);
8283		sc->tids.ftids_in_use--;
8284	}
8285	wakeup(&sc->tids.ftid_tab);
8286	mtx_unlock(&sc->tids.ftid_lock);
8287
8288	return (0);
8289}
8290
8291static int
8292set_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
8293{
8294
8295	MPASS(iq->set_tcb_rpl != NULL);
8296	return (iq->set_tcb_rpl(iq, rss, m));
8297}
8298
8299static int
8300l2t_write_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
8301{
8302
8303	MPASS(iq->l2t_write_rpl != NULL);
8304	return (iq->l2t_write_rpl(iq, rss, m));
8305}
8306
8307static int
8308get_sge_context(struct adapter *sc, struct t4_sge_context *cntxt)
8309{
8310	int rc;
8311
8312	if (cntxt->cid > M_CTXTQID)
8313		return (EINVAL);
8314
8315	if (cntxt->mem_id != CTXT_EGRESS && cntxt->mem_id != CTXT_INGRESS &&
8316	    cntxt->mem_id != CTXT_FLM && cntxt->mem_id != CTXT_CNM)
8317		return (EINVAL);
8318
8319	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ctxt");
8320	if (rc)
8321		return (rc);
8322
8323	if (sc->flags & FW_OK) {
8324		rc = -t4_sge_ctxt_rd(sc, sc->mbox, cntxt->cid, cntxt->mem_id,
8325		    &cntxt->data[0]);
8326		if (rc == 0)
8327			goto done;
8328	}
8329
8330	/*
8331	 * Read via firmware failed or wasn't even attempted.  Read directly via
8332	 * the backdoor.
8333	 */
8334	rc = -t4_sge_ctxt_rd_bd(sc, cntxt->cid, cntxt->mem_id, &cntxt->data[0]);
8335done:
8336	end_synchronized_op(sc, 0);
8337	return (rc);
8338}
8339
8340static int
8341load_fw(struct adapter *sc, struct t4_data *fw)
8342{
8343	int rc;
8344	uint8_t *fw_data;
8345
8346	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldfw");
8347	if (rc)
8348		return (rc);
8349
8350	if (sc->flags & FULL_INIT_DONE) {
8351		rc = EBUSY;
8352		goto done;
8353	}
8354
8355	fw_data = malloc(fw->len, M_CXGBE, M_WAITOK);
8356	if (fw_data == NULL) {
8357		rc = ENOMEM;
8358		goto done;
8359	}
8360
8361	rc = copyin(fw->data, fw_data, fw->len);
8362	if (rc == 0)
8363		rc = -t4_load_fw(sc, fw_data, fw->len);
8364
8365	free(fw_data, M_CXGBE);
8366done:
8367	end_synchronized_op(sc, 0);
8368	return (rc);
8369}
8370
8371#define MAX_READ_BUF_SIZE (128 * 1024)
8372static int
8373read_card_mem(struct adapter *sc, int win, struct t4_mem_range *mr)
8374{
8375	uint32_t addr, remaining, n;
8376	uint32_t *buf;
8377	int rc;
8378	uint8_t *dst;
8379
8380	rc = validate_mem_range(sc, mr->addr, mr->len);
8381	if (rc != 0)
8382		return (rc);
8383
8384	buf = malloc(min(mr->len, MAX_READ_BUF_SIZE), M_CXGBE, M_WAITOK);
8385	addr = mr->addr;
8386	remaining = mr->len;
8387	dst = (void *)mr->data;
8388
8389	while (remaining) {
8390		n = min(remaining, MAX_READ_BUF_SIZE);
8391		read_via_memwin(sc, 2, addr, buf, n);
8392
8393		rc = copyout(buf, dst, n);
8394		if (rc != 0)
8395			break;
8396
8397		dst += n;
8398		remaining -= n;
8399		addr += n;
8400	}
8401
8402	free(buf, M_CXGBE);
8403	return (rc);
8404}
8405#undef MAX_READ_BUF_SIZE
8406
8407static int
8408read_i2c(struct adapter *sc, struct t4_i2c_data *i2cd)
8409{
8410	int rc;
8411
8412	if (i2cd->len == 0 || i2cd->port_id >= sc->params.nports)
8413		return (EINVAL);
8414
8415	if (i2cd->len > sizeof(i2cd->data))
8416		return (EFBIG);
8417
8418	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4i2crd");
8419	if (rc)
8420		return (rc);
8421	rc = -t4_i2c_rd(sc, sc->mbox, i2cd->port_id, i2cd->dev_addr,
8422	    i2cd->offset, i2cd->len, &i2cd->data[0]);
8423	end_synchronized_op(sc, 0);
8424
8425	return (rc);
8426}
8427
8428static int
8429in_range(int val, int lo, int hi)
8430{
8431
8432	return (val < 0 || (val <= hi && val >= lo));
8433}
8434
8435static int
8436set_sched_class_config(struct adapter *sc, int minmax)
8437{
8438	int rc;
8439
8440	if (minmax < 0)
8441		return (EINVAL);
8442
8443	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4sscc");
8444	if (rc)
8445		return (rc);
8446	rc = -t4_sched_config(sc, FW_SCHED_TYPE_PKTSCHED, minmax, 1);
8447	end_synchronized_op(sc, 0);
8448
8449	return (rc);
8450}
8451
8452static int
8453set_sched_class_params(struct adapter *sc, struct t4_sched_class_params *p,
8454    int sleep_ok)
8455{
8456	int rc, top_speed, fw_level, fw_mode, fw_rateunit, fw_ratemode;
8457	struct port_info *pi;
8458	struct tx_sched_class *tc;
8459
8460	if (p->level == SCHED_CLASS_LEVEL_CL_RL)
8461		fw_level = FW_SCHED_PARAMS_LEVEL_CL_RL;
8462	else if (p->level == SCHED_CLASS_LEVEL_CL_WRR)
8463		fw_level = FW_SCHED_PARAMS_LEVEL_CL_WRR;
8464	else if (p->level == SCHED_CLASS_LEVEL_CH_RL)
8465		fw_level = FW_SCHED_PARAMS_LEVEL_CH_RL;
8466	else
8467		return (EINVAL);
8468
8469	if (p->mode == SCHED_CLASS_MODE_CLASS)
8470		fw_mode = FW_SCHED_PARAMS_MODE_CLASS;
8471	else if (p->mode == SCHED_CLASS_MODE_FLOW)
8472		fw_mode = FW_SCHED_PARAMS_MODE_FLOW;
8473	else
8474		return (EINVAL);
8475
8476	if (p->rateunit == SCHED_CLASS_RATEUNIT_BITS)
8477		fw_rateunit = FW_SCHED_PARAMS_UNIT_BITRATE;
8478	else if (p->rateunit == SCHED_CLASS_RATEUNIT_PKTS)
8479		fw_rateunit = FW_SCHED_PARAMS_UNIT_PKTRATE;
8480	else
8481		return (EINVAL);
8482
8483	if (p->ratemode == SCHED_CLASS_RATEMODE_REL)
8484		fw_ratemode = FW_SCHED_PARAMS_RATE_REL;
8485	else if (p->ratemode == SCHED_CLASS_RATEMODE_ABS)
8486		fw_ratemode = FW_SCHED_PARAMS_RATE_ABS;
8487	else
8488		return (EINVAL);
8489
8490	/* Vet our parameters ... */
8491	if (!in_range(p->channel, 0, sc->chip_params->nchan - 1))
8492		return (ERANGE);
8493
8494	pi = sc->port[sc->chan_map[p->channel]];
8495	if (pi == NULL)
8496		return (ENXIO);
8497	MPASS(pi->tx_chan == p->channel);
8498	top_speed = port_top_speed(pi) * 1000000; /* Gbps -> Kbps */
8499
8500	if (!in_range(p->cl, 0, sc->chip_params->nsched_cls) ||
8501	    !in_range(p->minrate, 0, top_speed) ||
8502	    !in_range(p->maxrate, 0, top_speed) ||
8503	    !in_range(p->weight, 0, 100))
8504		return (ERANGE);
8505
8506	/*
8507	 * Translate any unset parameters into the firmware's
8508	 * nomenclature and/or fail the call if the parameters
8509	 * are required ...
8510	 */
8511	if (p->rateunit < 0 || p->ratemode < 0 || p->channel < 0 || p->cl < 0)
8512		return (EINVAL);
8513
8514	if (p->minrate < 0)
8515		p->minrate = 0;
8516	if (p->maxrate < 0) {
8517		if (p->level == SCHED_CLASS_LEVEL_CL_RL ||
8518		    p->level == SCHED_CLASS_LEVEL_CH_RL)
8519			return (EINVAL);
8520		else
8521			p->maxrate = 0;
8522	}
8523	if (p->weight < 0) {
8524		if (p->level == SCHED_CLASS_LEVEL_CL_WRR)
8525			return (EINVAL);
8526		else
8527			p->weight = 0;
8528	}
8529	if (p->pktsize < 0) {
8530		if (p->level == SCHED_CLASS_LEVEL_CL_RL ||
8531		    p->level == SCHED_CLASS_LEVEL_CH_RL)
8532			return (EINVAL);
8533		else
8534			p->pktsize = 0;
8535	}
8536
8537	rc = begin_synchronized_op(sc, NULL,
8538	    sleep_ok ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4sscp");
8539	if (rc)
8540		return (rc);
8541	tc = &pi->tc[p->cl];
8542	tc->params = *p;
8543	rc = -t4_sched_params(sc, FW_SCHED_TYPE_PKTSCHED, fw_level, fw_mode,
8544	    fw_rateunit, fw_ratemode, p->channel, p->cl, p->minrate, p->maxrate,
8545	    p->weight, p->pktsize, sleep_ok);
8546	if (rc == 0)
8547		tc->flags |= TX_SC_OK;
8548	else {
8549		/*
8550		 * Unknown state at this point, see tc->params for what was
8551		 * attempted.
8552		 */
8553		tc->flags &= ~TX_SC_OK;
8554	}
8555	end_synchronized_op(sc, sleep_ok ? 0 : LOCK_HELD);
8556
8557	return (rc);
8558}
8559
8560int
8561t4_set_sched_class(struct adapter *sc, struct t4_sched_params *p)
8562{
8563
8564	if (p->type != SCHED_CLASS_TYPE_PACKET)
8565		return (EINVAL);
8566
8567	if (p->subcmd == SCHED_CLASS_SUBCMD_CONFIG)
8568		return (set_sched_class_config(sc, p->u.config.minmax));
8569
8570	if (p->subcmd == SCHED_CLASS_SUBCMD_PARAMS)
8571		return (set_sched_class_params(sc, &p->u.params, 1));
8572
8573	return (EINVAL);
8574}
8575
8576int
8577t4_set_sched_queue(struct adapter *sc, struct t4_sched_queue *p)
8578{
8579	struct port_info *pi = NULL;
8580	struct vi_info *vi;
8581	struct sge_txq *txq;
8582	uint32_t fw_mnem, fw_queue, fw_class;
8583	int i, rc;
8584
8585	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setsq");
8586	if (rc)
8587		return (rc);
8588
8589	if (p->port >= sc->params.nports) {
8590		rc = EINVAL;
8591		goto done;
8592	}
8593
8594	/* XXX: Only supported for the main VI. */
8595	pi = sc->port[p->port];
8596	vi = &pi->vi[0];
8597	if (!(vi->flags & VI_INIT_DONE)) {
8598		/* tx queues not set up yet */
8599		rc = EAGAIN;
8600		goto done;
8601	}
8602
8603	if (!in_range(p->queue, 0, vi->ntxq - 1) ||
8604	    !in_range(p->cl, 0, sc->chip_params->nsched_cls - 1)) {
8605		rc = EINVAL;
8606		goto done;
8607	}
8608
8609	/*
8610	 * Create a template for the FW_PARAMS_CMD mnemonic and value (TX
8611	 * Scheduling Class in this case).
8612	 */
8613	fw_mnem = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
8614	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH));
8615	fw_class = p->cl < 0 ? 0xffffffff : p->cl;
8616
8617	/*
8618	 * If op.queue is non-negative, then we're only changing the scheduling
8619	 * on a single specified TX queue.
8620	 */
8621	if (p->queue >= 0) {
8622		txq = &sc->sge.txq[vi->first_txq + p->queue];
8623		fw_queue = (fw_mnem | V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id));
8624		rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue,
8625		    &fw_class);
8626		goto done;
8627	}
8628
8629	/*
8630	 * Change the scheduling on all the TX queues for the
8631	 * interface.
8632	 */
8633	for_each_txq(vi, i, txq) {
8634		fw_queue = (fw_mnem | V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id));
8635		rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue,
8636		    &fw_class);
8637		if (rc)
8638			goto done;
8639	}
8640
8641	rc = 0;
8642done:
8643	end_synchronized_op(sc, 0);
8644	return (rc);
8645}
8646
8647int
8648t4_os_find_pci_capability(struct adapter *sc, int cap)
8649{
8650	int i;
8651
8652	return (pci_find_cap(sc->dev, cap, &i) == 0 ? i : 0);
8653}
8654
8655int
8656t4_os_pci_save_state(struct adapter *sc)
8657{
8658	device_t dev;
8659	struct pci_devinfo *dinfo;
8660
8661	dev = sc->dev;
8662	dinfo = device_get_ivars(dev);
8663
8664	pci_cfg_save(dev, dinfo, 0);
8665	return (0);
8666}
8667
8668int
8669t4_os_pci_restore_state(struct adapter *sc)
8670{
8671	device_t dev;
8672	struct pci_devinfo *dinfo;
8673
8674	dev = sc->dev;
8675	dinfo = device_get_ivars(dev);
8676
8677	pci_cfg_restore(dev, dinfo);
8678	return (0);
8679}
8680
8681void
8682t4_os_portmod_changed(const struct adapter *sc, int idx)
8683{
8684	struct port_info *pi = sc->port[idx];
8685	struct vi_info *vi;
8686	struct ifnet *ifp;
8687	int v;
8688	static const char *mod_str[] = {
8689		NULL, "LR", "SR", "ER", "TWINAX", "active TWINAX", "LRM"
8690	};
8691
8692	for_each_vi(pi, v, vi) {
8693		build_medialist(pi, &vi->media);
8694	}
8695
8696	ifp = pi->vi[0].ifp;
8697	if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
8698		if_printf(ifp, "transceiver unplugged.\n");
8699	else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
8700		if_printf(ifp, "unknown transceiver inserted.\n");
8701	else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
8702		if_printf(ifp, "unsupported transceiver inserted.\n");
8703	else if (pi->mod_type > 0 && pi->mod_type < nitems(mod_str)) {
8704		if_printf(ifp, "%s transceiver inserted.\n",
8705		    mod_str[pi->mod_type]);
8706	} else {
8707		if_printf(ifp, "transceiver (type %d) inserted.\n",
8708		    pi->mod_type);
8709	}
8710}
8711
8712void
8713t4_os_link_changed(struct adapter *sc, int idx, int link_stat, int reason)
8714{
8715	struct port_info *pi = sc->port[idx];
8716	struct vi_info *vi;
8717	struct ifnet *ifp;
8718	int v;
8719
8720	if (link_stat)
8721		pi->linkdnrc = -1;
8722	else {
8723		if (reason >= 0)
8724			pi->linkdnrc = reason;
8725	}
8726	for_each_vi(pi, v, vi) {
8727		ifp = vi->ifp;
8728		if (ifp == NULL)
8729			continue;
8730
8731		if (link_stat) {
8732			ifp->if_baudrate = IF_Mbps(pi->link_cfg.speed);
8733			if_link_state_change(ifp, LINK_STATE_UP);
8734		} else {
8735			if_link_state_change(ifp, LINK_STATE_DOWN);
8736		}
8737	}
8738}
8739
8740void
8741t4_iterate(void (*func)(struct adapter *, void *), void *arg)
8742{
8743	struct adapter *sc;
8744
8745	sx_slock(&t4_list_lock);
8746	SLIST_FOREACH(sc, &t4_list, link) {
8747		/*
8748		 * func should not make any assumptions about what state sc is
8749		 * in - the only guarantee is that sc->sc_lock is a valid lock.
8750		 */
8751		func(sc, arg);
8752	}
8753	sx_sunlock(&t4_list_lock);
8754}
8755
8756static int
8757t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag,
8758    struct thread *td)
8759{
8760	int rc;
8761	struct adapter *sc = dev->si_drv1;
8762
8763	rc = priv_check(td, PRIV_DRIVER);
8764	if (rc != 0)
8765		return (rc);
8766
8767	switch (cmd) {
8768	case CHELSIO_T4_GETREG: {
8769		struct t4_reg *edata = (struct t4_reg *)data;
8770
8771		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
8772			return (EFAULT);
8773
8774		if (edata->size == 4)
8775			edata->val = t4_read_reg(sc, edata->addr);
8776		else if (edata->size == 8)
8777			edata->val = t4_read_reg64(sc, edata->addr);
8778		else
8779			return (EINVAL);
8780
8781		break;
8782	}
8783	case CHELSIO_T4_SETREG: {
8784		struct t4_reg *edata = (struct t4_reg *)data;
8785
8786		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
8787			return (EFAULT);
8788
8789		if (edata->size == 4) {
8790			if (edata->val & 0xffffffff00000000)
8791				return (EINVAL);
8792			t4_write_reg(sc, edata->addr, (uint32_t) edata->val);
8793		} else if (edata->size == 8)
8794			t4_write_reg64(sc, edata->addr, edata->val);
8795		else
8796			return (EINVAL);
8797		break;
8798	}
8799	case CHELSIO_T4_REGDUMP: {
8800		struct t4_regdump *regs = (struct t4_regdump *)data;
8801		int reglen = t4_get_regs_len(sc);
8802		uint8_t *buf;
8803
8804		if (regs->len < reglen) {
8805			regs->len = reglen; /* hint to the caller */
8806			return (ENOBUFS);
8807		}
8808
8809		regs->len = reglen;
8810		buf = malloc(reglen, M_CXGBE, M_WAITOK | M_ZERO);
8811		get_regs(sc, regs, buf);
8812		rc = copyout(buf, regs->data, reglen);
8813		free(buf, M_CXGBE);
8814		break;
8815	}
8816	case CHELSIO_T4_GET_FILTER_MODE:
8817		rc = get_filter_mode(sc, (uint32_t *)data);
8818		break;
8819	case CHELSIO_T4_SET_FILTER_MODE:
8820		rc = set_filter_mode(sc, *(uint32_t *)data);
8821		break;
8822	case CHELSIO_T4_GET_FILTER:
8823		rc = get_filter(sc, (struct t4_filter *)data);
8824		break;
8825	case CHELSIO_T4_SET_FILTER:
8826		rc = set_filter(sc, (struct t4_filter *)data);
8827		break;
8828	case CHELSIO_T4_DEL_FILTER:
8829		rc = del_filter(sc, (struct t4_filter *)data);
8830		break;
8831	case CHELSIO_T4_GET_SGE_CONTEXT:
8832		rc = get_sge_context(sc, (struct t4_sge_context *)data);
8833		break;
8834	case CHELSIO_T4_LOAD_FW:
8835		rc = load_fw(sc, (struct t4_data *)data);
8836		break;
8837	case CHELSIO_T4_GET_MEM:
8838		rc = read_card_mem(sc, 2, (struct t4_mem_range *)data);
8839		break;
8840	case CHELSIO_T4_GET_I2C:
8841		rc = read_i2c(sc, (struct t4_i2c_data *)data);
8842		break;
8843	case CHELSIO_T4_CLEAR_STATS: {
8844		int i, v;
8845		u_int port_id = *(uint32_t *)data;
8846		struct port_info *pi;
8847		struct vi_info *vi;
8848
8849		if (port_id >= sc->params.nports)
8850			return (EINVAL);
8851		pi = sc->port[port_id];
8852
8853		/* MAC stats */
8854		t4_clr_port_stats(sc, pi->tx_chan);
8855		pi->tx_parse_error = 0;
8856		mtx_lock(&sc->reg_lock);
8857		for_each_vi(pi, v, vi) {
8858			if (vi->flags & VI_INIT_DONE)
8859				t4_clr_vi_stats(sc, vi->viid);
8860		}
8861		mtx_unlock(&sc->reg_lock);
8862
8863		/*
8864		 * Since this command accepts a port, clear stats for
8865		 * all VIs on this port.
8866		 */
8867		for_each_vi(pi, v, vi) {
8868			if (vi->flags & VI_INIT_DONE) {
8869				struct sge_rxq *rxq;
8870				struct sge_txq *txq;
8871				struct sge_wrq *wrq;
8872
8873				for_each_rxq(vi, i, rxq) {
8874#if defined(INET) || defined(INET6)
8875					rxq->lro.lro_queued = 0;
8876					rxq->lro.lro_flushed = 0;
8877#endif
8878					rxq->rxcsum = 0;
8879					rxq->vlan_extraction = 0;
8880				}
8881
8882				for_each_txq(vi, i, txq) {
8883					txq->txcsum = 0;
8884					txq->tso_wrs = 0;
8885					txq->vlan_insertion = 0;
8886					txq->imm_wrs = 0;
8887					txq->sgl_wrs = 0;
8888					txq->txpkt_wrs = 0;
8889					txq->txpkts0_wrs = 0;
8890					txq->txpkts1_wrs = 0;
8891					txq->txpkts0_pkts = 0;
8892					txq->txpkts1_pkts = 0;
8893					mp_ring_reset_stats(txq->r);
8894				}
8895
8896#ifdef TCP_OFFLOAD
8897				/* nothing to clear for each ofld_rxq */
8898
8899				for_each_ofld_txq(vi, i, wrq) {
8900					wrq->tx_wrs_direct = 0;
8901					wrq->tx_wrs_copied = 0;
8902				}
8903#endif
8904
8905				if (IS_MAIN_VI(vi)) {
8906					wrq = &sc->sge.ctrlq[pi->port_id];
8907					wrq->tx_wrs_direct = 0;
8908					wrq->tx_wrs_copied = 0;
8909				}
8910			}
8911		}
8912		break;
8913	}
8914	case CHELSIO_T4_SCHED_CLASS:
8915		rc = t4_set_sched_class(sc, (struct t4_sched_params *)data);
8916		break;
8917	case CHELSIO_T4_SCHED_QUEUE:
8918		rc = t4_set_sched_queue(sc, (struct t4_sched_queue *)data);
8919		break;
8920	case CHELSIO_T4_GET_TRACER:
8921		rc = t4_get_tracer(sc, (struct t4_tracer *)data);
8922		break;
8923	case CHELSIO_T4_SET_TRACER:
8924		rc = t4_set_tracer(sc, (struct t4_tracer *)data);
8925		break;
8926	default:
8927		rc = ENOTTY;
8928	}
8929
8930	return (rc);
8931}
8932
8933void
8934t4_db_full(struct adapter *sc)
8935{
8936
8937	CXGBE_UNIMPLEMENTED(__func__);
8938}
8939
8940void
8941t4_db_dropped(struct adapter *sc)
8942{
8943
8944	CXGBE_UNIMPLEMENTED(__func__);
8945}
8946
8947#ifdef TCP_OFFLOAD
8948void
8949t4_iscsi_init(struct adapter *sc, u_int tag_mask, const u_int *pgsz_order)
8950{
8951
8952	t4_write_reg(sc, A_ULP_RX_ISCSI_TAGMASK, tag_mask);
8953	t4_write_reg(sc, A_ULP_RX_ISCSI_PSZ, V_HPZ0(pgsz_order[0]) |
8954		V_HPZ1(pgsz_order[1]) | V_HPZ2(pgsz_order[2]) |
8955		V_HPZ3(pgsz_order[3]));
8956}
8957
8958static int
8959toe_capability(struct vi_info *vi, int enable)
8960{
8961	int rc;
8962	struct port_info *pi = vi->pi;
8963	struct adapter *sc = pi->adapter;
8964
8965	ASSERT_SYNCHRONIZED_OP(sc);
8966
8967	if (!is_offload(sc))
8968		return (ENODEV);
8969
8970	if (enable) {
8971		if ((vi->ifp->if_capenable & IFCAP_TOE) != 0) {
8972			/* TOE is already enabled. */
8973			return (0);
8974		}
8975
8976		/*
8977		 * We need the port's queues around so that we're able to send
8978		 * and receive CPLs to/from the TOE even if the ifnet for this
8979		 * port has never been UP'd administratively.
8980		 */
8981		if (!(vi->flags & VI_INIT_DONE)) {
8982			rc = vi_full_init(vi);
8983			if (rc)
8984				return (rc);
8985		}
8986		if (!(pi->vi[0].flags & VI_INIT_DONE)) {
8987			rc = vi_full_init(&pi->vi[0]);
8988			if (rc)
8989				return (rc);
8990		}
8991
8992		if (isset(&sc->offload_map, pi->port_id)) {
8993			/* TOE is enabled on another VI of this port. */
8994			pi->uld_vis++;
8995			return (0);
8996		}
8997
8998		if (!uld_active(sc, ULD_TOM)) {
8999			rc = t4_activate_uld(sc, ULD_TOM);
9000			if (rc == EAGAIN) {
9001				log(LOG_WARNING,
9002				    "You must kldload t4_tom.ko before trying "
9003				    "to enable TOE on a cxgbe interface.\n");
9004			}
9005			if (rc != 0)
9006				return (rc);
9007			KASSERT(sc->tom_softc != NULL,
9008			    ("%s: TOM activated but softc NULL", __func__));
9009			KASSERT(uld_active(sc, ULD_TOM),
9010			    ("%s: TOM activated but flag not set", __func__));
9011		}
9012
9013		/* Activate iWARP and iSCSI too, if the modules are loaded. */
9014		if (!uld_active(sc, ULD_IWARP))
9015			(void) t4_activate_uld(sc, ULD_IWARP);
9016		if (!uld_active(sc, ULD_ISCSI))
9017			(void) t4_activate_uld(sc, ULD_ISCSI);
9018
9019		pi->uld_vis++;
9020		setbit(&sc->offload_map, pi->port_id);
9021	} else {
9022		pi->uld_vis--;
9023
9024		if (!isset(&sc->offload_map, pi->port_id) || pi->uld_vis > 0)
9025			return (0);
9026
9027		KASSERT(uld_active(sc, ULD_TOM),
9028		    ("%s: TOM never initialized?", __func__));
9029		clrbit(&sc->offload_map, pi->port_id);
9030	}
9031
9032	return (0);
9033}
9034
9035/*
9036 * Add an upper layer driver to the global list.
9037 */
9038int
9039t4_register_uld(struct uld_info *ui)
9040{
9041	int rc = 0;
9042	struct uld_info *u;
9043
9044	sx_xlock(&t4_uld_list_lock);
9045	SLIST_FOREACH(u, &t4_uld_list, link) {
9046	    if (u->uld_id == ui->uld_id) {
9047		    rc = EEXIST;
9048		    goto done;
9049	    }
9050	}
9051
9052	SLIST_INSERT_HEAD(&t4_uld_list, ui, link);
9053	ui->refcount = 0;
9054done:
9055	sx_xunlock(&t4_uld_list_lock);
9056	return (rc);
9057}
9058
9059int
9060t4_unregister_uld(struct uld_info *ui)
9061{
9062	int rc = EINVAL;
9063	struct uld_info *u;
9064
9065	sx_xlock(&t4_uld_list_lock);
9066
9067	SLIST_FOREACH(u, &t4_uld_list, link) {
9068	    if (u == ui) {
9069		    if (ui->refcount > 0) {
9070			    rc = EBUSY;
9071			    goto done;
9072		    }
9073
9074		    SLIST_REMOVE(&t4_uld_list, ui, uld_info, link);
9075		    rc = 0;
9076		    goto done;
9077	    }
9078	}
9079done:
9080	sx_xunlock(&t4_uld_list_lock);
9081	return (rc);
9082}
9083
9084int
9085t4_activate_uld(struct adapter *sc, int id)
9086{
9087	int rc;
9088	struct uld_info *ui;
9089
9090	ASSERT_SYNCHRONIZED_OP(sc);
9091
9092	if (id < 0 || id > ULD_MAX)
9093		return (EINVAL);
9094	rc = EAGAIN;	/* kldoad the module with this ULD and try again. */
9095
9096	sx_slock(&t4_uld_list_lock);
9097
9098	SLIST_FOREACH(ui, &t4_uld_list, link) {
9099		if (ui->uld_id == id) {
9100			if (!(sc->flags & FULL_INIT_DONE)) {
9101				rc = adapter_full_init(sc);
9102				if (rc != 0)
9103					break;
9104			}
9105
9106			rc = ui->activate(sc);
9107			if (rc == 0) {
9108				setbit(&sc->active_ulds, id);
9109				ui->refcount++;
9110			}
9111			break;
9112		}
9113	}
9114
9115	sx_sunlock(&t4_uld_list_lock);
9116
9117	return (rc);
9118}
9119
9120int
9121t4_deactivate_uld(struct adapter *sc, int id)
9122{
9123	int rc;
9124	struct uld_info *ui;
9125
9126	ASSERT_SYNCHRONIZED_OP(sc);
9127
9128	if (id < 0 || id > ULD_MAX)
9129		return (EINVAL);
9130	rc = ENXIO;
9131
9132	sx_slock(&t4_uld_list_lock);
9133
9134	SLIST_FOREACH(ui, &t4_uld_list, link) {
9135		if (ui->uld_id == id) {
9136			rc = ui->deactivate(sc);
9137			if (rc == 0) {
9138				clrbit(&sc->active_ulds, id);
9139				ui->refcount--;
9140			}
9141			break;
9142		}
9143	}
9144
9145	sx_sunlock(&t4_uld_list_lock);
9146
9147	return (rc);
9148}
9149
9150int
9151uld_active(struct adapter *sc, int uld_id)
9152{
9153
9154	MPASS(uld_id >= 0 && uld_id <= ULD_MAX);
9155
9156	return (isset(&sc->active_ulds, uld_id));
9157}
9158#endif
9159
9160/*
9161 * Come up with reasonable defaults for some of the tunables, provided they're
9162 * not set by the user (in which case we'll use the values as is).
9163 */
9164static void
9165tweak_tunables(void)
9166{
9167	int nc = mp_ncpus;	/* our snapshot of the number of CPUs */
9168
9169	if (t4_ntxq10g < 1) {
9170#ifdef RSS
9171		t4_ntxq10g = rss_getnumbuckets();
9172#else
9173		t4_ntxq10g = min(nc, NTXQ_10G);
9174#endif
9175	}
9176
9177	if (t4_ntxq1g < 1) {
9178#ifdef RSS
9179		/* XXX: way too many for 1GbE? */
9180		t4_ntxq1g = rss_getnumbuckets();
9181#else
9182		t4_ntxq1g = min(nc, NTXQ_1G);
9183#endif
9184	}
9185
9186	if (t4_ntxq_vi < 1)
9187		t4_ntxq_vi = min(nc, NTXQ_VI);
9188
9189	if (t4_nrxq10g < 1) {
9190#ifdef RSS
9191		t4_nrxq10g = rss_getnumbuckets();
9192#else
9193		t4_nrxq10g = min(nc, NRXQ_10G);
9194#endif
9195	}
9196
9197	if (t4_nrxq1g < 1) {
9198#ifdef RSS
9199		/* XXX: way too many for 1GbE? */
9200		t4_nrxq1g = rss_getnumbuckets();
9201#else
9202		t4_nrxq1g = min(nc, NRXQ_1G);
9203#endif
9204	}
9205
9206	if (t4_nrxq_vi < 1)
9207		t4_nrxq_vi = min(nc, NRXQ_VI);
9208
9209#ifdef TCP_OFFLOAD
9210	if (t4_nofldtxq10g < 1)
9211		t4_nofldtxq10g = min(nc, NOFLDTXQ_10G);
9212
9213	if (t4_nofldtxq1g < 1)
9214		t4_nofldtxq1g = min(nc, NOFLDTXQ_1G);
9215
9216	if (t4_nofldtxq_vi < 1)
9217		t4_nofldtxq_vi = min(nc, NOFLDTXQ_VI);
9218
9219	if (t4_nofldrxq10g < 1)
9220		t4_nofldrxq10g = min(nc, NOFLDRXQ_10G);
9221
9222	if (t4_nofldrxq1g < 1)
9223		t4_nofldrxq1g = min(nc, NOFLDRXQ_1G);
9224
9225	if (t4_nofldrxq_vi < 1)
9226		t4_nofldrxq_vi = min(nc, NOFLDRXQ_VI);
9227
9228	if (t4_toecaps_allowed == -1)
9229		t4_toecaps_allowed = FW_CAPS_CONFIG_TOE;
9230
9231	if (t4_rdmacaps_allowed == -1) {
9232		t4_rdmacaps_allowed = FW_CAPS_CONFIG_RDMA_RDDP |
9233		    FW_CAPS_CONFIG_RDMA_RDMAC;
9234	}
9235
9236	if (t4_iscsicaps_allowed == -1) {
9237		t4_iscsicaps_allowed = FW_CAPS_CONFIG_ISCSI_INITIATOR_PDU |
9238		    FW_CAPS_CONFIG_ISCSI_TARGET_PDU |
9239		    FW_CAPS_CONFIG_ISCSI_T10DIF;
9240	}
9241#else
9242	if (t4_toecaps_allowed == -1)
9243		t4_toecaps_allowed = 0;
9244
9245	if (t4_rdmacaps_allowed == -1)
9246		t4_rdmacaps_allowed = 0;
9247
9248	if (t4_iscsicaps_allowed == -1)
9249		t4_iscsicaps_allowed = 0;
9250#endif
9251
9252#ifdef DEV_NETMAP
9253	if (t4_nnmtxq_vi < 1)
9254		t4_nnmtxq_vi = min(nc, NNMTXQ_VI);
9255
9256	if (t4_nnmrxq_vi < 1)
9257		t4_nnmrxq_vi = min(nc, NNMRXQ_VI);
9258#endif
9259
9260	if (t4_tmr_idx_10g < 0 || t4_tmr_idx_10g >= SGE_NTIMERS)
9261		t4_tmr_idx_10g = TMR_IDX_10G;
9262
9263	if (t4_pktc_idx_10g < -1 || t4_pktc_idx_10g >= SGE_NCOUNTERS)
9264		t4_pktc_idx_10g = PKTC_IDX_10G;
9265
9266	if (t4_tmr_idx_1g < 0 || t4_tmr_idx_1g >= SGE_NTIMERS)
9267		t4_tmr_idx_1g = TMR_IDX_1G;
9268
9269	if (t4_pktc_idx_1g < -1 || t4_pktc_idx_1g >= SGE_NCOUNTERS)
9270		t4_pktc_idx_1g = PKTC_IDX_1G;
9271
9272	if (t4_qsize_txq < 128)
9273		t4_qsize_txq = 128;
9274
9275	if (t4_qsize_rxq < 128)
9276		t4_qsize_rxq = 128;
9277	while (t4_qsize_rxq & 7)
9278		t4_qsize_rxq++;
9279
9280	t4_intr_types &= INTR_MSIX | INTR_MSI | INTR_INTX;
9281}
9282
9283#ifdef DDB
9284static void
9285t4_dump_tcb(struct adapter *sc, int tid)
9286{
9287	uint32_t base, i, j, off, pf, reg, save, tcb_addr, win_pos;
9288
9289	reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2);
9290	save = t4_read_reg(sc, reg);
9291	base = sc->memwin[2].mw_base;
9292
9293	/* Dump TCB for the tid */
9294	tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
9295	tcb_addr += tid * TCB_SIZE;
9296
9297	if (is_t4(sc)) {
9298		pf = 0;
9299		win_pos = tcb_addr & ~0xf;	/* start must be 16B aligned */
9300	} else {
9301		pf = V_PFNUM(sc->pf);
9302		win_pos = tcb_addr & ~0x7f;	/* start must be 128B aligned */
9303	}
9304	t4_write_reg(sc, reg, win_pos | pf);
9305	t4_read_reg(sc, reg);
9306
9307	off = tcb_addr - win_pos;
9308	for (i = 0; i < 4; i++) {
9309		uint32_t buf[8];
9310		for (j = 0; j < 8; j++, off += 4)
9311			buf[j] = htonl(t4_read_reg(sc, base + off));
9312
9313		db_printf("%08x %08x %08x %08x %08x %08x %08x %08x\n",
9314		    buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6],
9315		    buf[7]);
9316	}
9317
9318	t4_write_reg(sc, reg, save);
9319	t4_read_reg(sc, reg);
9320}
9321
9322static void
9323t4_dump_devlog(struct adapter *sc)
9324{
9325	struct devlog_params *dparams = &sc->params.devlog;
9326	struct fw_devlog_e e;
9327	int i, first, j, m, nentries, rc;
9328	uint64_t ftstamp = UINT64_MAX;
9329
9330	if (dparams->start == 0) {
9331		db_printf("devlog params not valid\n");
9332		return;
9333	}
9334
9335	nentries = dparams->size / sizeof(struct fw_devlog_e);
9336	m = fwmtype_to_hwmtype(dparams->memtype);
9337
9338	/* Find the first entry. */
9339	first = -1;
9340	for (i = 0; i < nentries && !db_pager_quit; i++) {
9341		rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e),
9342		    sizeof(e), (void *)&e);
9343		if (rc != 0)
9344			break;
9345
9346		if (e.timestamp == 0)
9347			break;
9348
9349		e.timestamp = be64toh(e.timestamp);
9350		if (e.timestamp < ftstamp) {
9351			ftstamp = e.timestamp;
9352			first = i;
9353		}
9354	}
9355
9356	if (first == -1)
9357		return;
9358
9359	i = first;
9360	do {
9361		rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e),
9362		    sizeof(e), (void *)&e);
9363		if (rc != 0)
9364			return;
9365
9366		if (e.timestamp == 0)
9367			return;
9368
9369		e.timestamp = be64toh(e.timestamp);
9370		e.seqno = be32toh(e.seqno);
9371		for (j = 0; j < 8; j++)
9372			e.params[j] = be32toh(e.params[j]);
9373
9374		db_printf("%10d  %15ju  %8s  %8s  ",
9375		    e.seqno, e.timestamp,
9376		    (e.level < nitems(devlog_level_strings) ?
9377			devlog_level_strings[e.level] : "UNKNOWN"),
9378		    (e.facility < nitems(devlog_facility_strings) ?
9379			devlog_facility_strings[e.facility] : "UNKNOWN"));
9380		db_printf(e.fmt, e.params[0], e.params[1], e.params[2],
9381		    e.params[3], e.params[4], e.params[5], e.params[6],
9382		    e.params[7]);
9383
9384		if (++i == nentries)
9385			i = 0;
9386	} while (i != first && !db_pager_quit);
9387}
9388
9389static struct command_table db_t4_table = LIST_HEAD_INITIALIZER(db_t4_table);
9390_DB_SET(_show, t4, NULL, db_show_table, 0, &db_t4_table);
9391
9392DB_FUNC(devlog, db_show_devlog, db_t4_table, CS_OWN, NULL)
9393{
9394	device_t dev;
9395	int t;
9396	bool valid;
9397
9398	valid = false;
9399	t = db_read_token();
9400	if (t == tIDENT) {
9401		dev = device_lookup_by_name(db_tok_string);
9402		valid = true;
9403	}
9404	db_skip_to_eol();
9405	if (!valid) {
9406		db_printf("usage: show t4 devlog <nexus>\n");
9407		return;
9408	}
9409
9410	if (dev == NULL) {
9411		db_printf("device not found\n");
9412		return;
9413	}
9414
9415	t4_dump_devlog(device_get_softc(dev));
9416}
9417
9418DB_FUNC(tcb, db_show_t4tcb, db_t4_table, CS_OWN, NULL)
9419{
9420	device_t dev;
9421	int radix, tid, t;
9422	bool valid;
9423
9424	valid = false;
9425	radix = db_radix;
9426	db_radix = 10;
9427	t = db_read_token();
9428	if (t == tIDENT) {
9429		dev = device_lookup_by_name(db_tok_string);
9430		t = db_read_token();
9431		if (t == tNUMBER) {
9432			tid = db_tok_number;
9433			valid = true;
9434		}
9435	}
9436	db_radix = radix;
9437	db_skip_to_eol();
9438	if (!valid) {
9439		db_printf("usage: show t4 tcb <nexus> <tid>\n");
9440		return;
9441	}
9442
9443	if (dev == NULL) {
9444		db_printf("device not found\n");
9445		return;
9446	}
9447	if (tid < 0) {
9448		db_printf("invalid tid\n");
9449		return;
9450	}
9451
9452	t4_dump_tcb(device_get_softc(dev), tid);
9453}
9454#endif
9455
9456static struct sx mlu;	/* mod load unload */
9457SX_SYSINIT(cxgbe_mlu, &mlu, "cxgbe mod load/unload");
9458
9459static int
9460mod_event(module_t mod, int cmd, void *arg)
9461{
9462	int rc = 0;
9463	static int loaded = 0;
9464
9465	switch (cmd) {
9466	case MOD_LOAD:
9467		sx_xlock(&mlu);
9468		if (loaded++ == 0) {
9469			t4_sge_modload();
9470			t4_register_cpl_handler(CPL_SET_TCB_RPL, set_tcb_rpl);
9471			t4_register_cpl_handler(CPL_L2T_WRITE_RPL, l2t_write_rpl);
9472			t4_register_cpl_handler(CPL_TRACE_PKT, t4_trace_pkt);
9473			t4_register_cpl_handler(CPL_T5_TRACE_PKT, t5_trace_pkt);
9474			sx_init(&t4_list_lock, "T4/T5 adapters");
9475			SLIST_INIT(&t4_list);
9476#ifdef TCP_OFFLOAD
9477			sx_init(&t4_uld_list_lock, "T4/T5 ULDs");
9478			SLIST_INIT(&t4_uld_list);
9479#endif
9480			t4_tracer_modload();
9481			tweak_tunables();
9482		}
9483		sx_xunlock(&mlu);
9484		break;
9485
9486	case MOD_UNLOAD:
9487		sx_xlock(&mlu);
9488		if (--loaded == 0) {
9489			int tries;
9490
9491			sx_slock(&t4_list_lock);
9492			if (!SLIST_EMPTY(&t4_list)) {
9493				rc = EBUSY;
9494				sx_sunlock(&t4_list_lock);
9495				goto done_unload;
9496			}
9497#ifdef TCP_OFFLOAD
9498			sx_slock(&t4_uld_list_lock);
9499			if (!SLIST_EMPTY(&t4_uld_list)) {
9500				rc = EBUSY;
9501				sx_sunlock(&t4_uld_list_lock);
9502				sx_sunlock(&t4_list_lock);
9503				goto done_unload;
9504			}
9505#endif
9506			tries = 0;
9507			while (tries++ < 5 && t4_sge_extfree_refs() != 0) {
9508				uprintf("%ju clusters with custom free routine "
9509				    "still is use.\n", t4_sge_extfree_refs());
9510				pause("t4unload", 2 * hz);
9511			}
9512#ifdef TCP_OFFLOAD
9513			sx_sunlock(&t4_uld_list_lock);
9514#endif
9515			sx_sunlock(&t4_list_lock);
9516
9517			if (t4_sge_extfree_refs() == 0) {
9518				t4_tracer_modunload();
9519#ifdef TCP_OFFLOAD
9520				sx_destroy(&t4_uld_list_lock);
9521#endif
9522				sx_destroy(&t4_list_lock);
9523				t4_sge_modunload();
9524				loaded = 0;
9525			} else {
9526				rc = EBUSY;
9527				loaded++;	/* undo earlier decrement */
9528			}
9529		}
9530done_unload:
9531		sx_xunlock(&mlu);
9532		break;
9533	}
9534
9535	return (rc);
9536}
9537
9538static devclass_t t4_devclass, t5_devclass;
9539static devclass_t cxgbe_devclass, cxl_devclass;
9540static devclass_t vcxgbe_devclass, vcxl_devclass;
9541
9542DRIVER_MODULE(t4nex, pci, t4_driver, t4_devclass, mod_event, 0);
9543MODULE_VERSION(t4nex, 1);
9544MODULE_DEPEND(t4nex, firmware, 1, 1, 1);
9545#ifdef DEV_NETMAP
9546MODULE_DEPEND(t4nex, netmap, 1, 1, 1);
9547#endif /* DEV_NETMAP */
9548
9549
9550DRIVER_MODULE(t5nex, pci, t5_driver, t5_devclass, mod_event, 0);
9551MODULE_VERSION(t5nex, 1);
9552MODULE_DEPEND(t5nex, firmware, 1, 1, 1);
9553#ifdef DEV_NETMAP
9554MODULE_DEPEND(t5nex, netmap, 1, 1, 1);
9555#endif /* DEV_NETMAP */
9556
9557DRIVER_MODULE(cxgbe, t4nex, cxgbe_driver, cxgbe_devclass, 0, 0);
9558MODULE_VERSION(cxgbe, 1);
9559
9560DRIVER_MODULE(cxl, t5nex, cxl_driver, cxl_devclass, 0, 0);
9561MODULE_VERSION(cxl, 1);
9562
9563DRIVER_MODULE(vcxgbe, cxgbe, vcxgbe_driver, vcxgbe_devclass, 0, 0);
9564MODULE_VERSION(vcxgbe, 1);
9565
9566DRIVER_MODULE(vcxl, cxl, vcxl_driver, vcxl_devclass, 0, 0);
9567MODULE_VERSION(vcxl, 1);
9568