t4_main.c revision 308304
1/*-
2 * Copyright (c) 2011 Chelsio Communications, Inc.
3 * All rights reserved.
4 * Written by: Navdeep Parhar <np@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: stable/10/sys/dev/cxgbe/t4_main.c 308304 2016-11-04 18:45:06Z jhb $");
30
31#include "opt_inet.h"
32#include "opt_inet6.h"
33
34#include <sys/param.h>
35#include <sys/conf.h>
36#include <sys/priv.h>
37#include <sys/kernel.h>
38#include <sys/bus.h>
39#include <sys/systm.h>
40#include <sys/counter.h>
41#include <sys/module.h>
42#include <sys/malloc.h>
43#include <sys/queue.h>
44#include <sys/taskqueue.h>
45#include <sys/pciio.h>
46#include <dev/pci/pcireg.h>
47#include <dev/pci/pcivar.h>
48#include <dev/pci/pci_private.h>
49#include <sys/firmware.h>
50#include <sys/sbuf.h>
51#include <sys/smp.h>
52#include <sys/socket.h>
53#include <sys/sockio.h>
54#include <sys/sysctl.h>
55#include <net/ethernet.h>
56#include <net/if.h>
57#include <net/if_types.h>
58#include <net/if_dl.h>
59#include <net/if_vlan_var.h>
60#ifdef RSS
61#include <net/rss_config.h>
62#endif
63#if defined(__i386__) || defined(__amd64__)
64#include <vm/vm.h>
65#include <vm/pmap.h>
66#endif
67
68#include "common/common.h"
69#include "common/t4_msg.h"
70#include "common/t4_regs.h"
71#include "common/t4_regs_values.h"
72#include "t4_ioctl.h"
73#include "t4_l2t.h"
74#include "t4_mp_ring.h"
75
76/* T4 bus driver interface */
77static int t4_probe(device_t);
78static int t4_attach(device_t);
79static int t4_detach(device_t);
80static device_method_t t4_methods[] = {
81	DEVMETHOD(device_probe,		t4_probe),
82	DEVMETHOD(device_attach,	t4_attach),
83	DEVMETHOD(device_detach,	t4_detach),
84
85	DEVMETHOD_END
86};
87static driver_t t4_driver = {
88	"t4nex",
89	t4_methods,
90	sizeof(struct adapter)
91};
92
93
94/* T4 port (cxgbe) interface */
95static int cxgbe_probe(device_t);
96static int cxgbe_attach(device_t);
97static int cxgbe_detach(device_t);
98static device_method_t cxgbe_methods[] = {
99	DEVMETHOD(device_probe,		cxgbe_probe),
100	DEVMETHOD(device_attach,	cxgbe_attach),
101	DEVMETHOD(device_detach,	cxgbe_detach),
102	{ 0, 0 }
103};
104static driver_t cxgbe_driver = {
105	"cxgbe",
106	cxgbe_methods,
107	sizeof(struct port_info)
108};
109
110/* T4 VI (vcxgbe) interface */
111static int vcxgbe_probe(device_t);
112static int vcxgbe_attach(device_t);
113static int vcxgbe_detach(device_t);
114static device_method_t vcxgbe_methods[] = {
115	DEVMETHOD(device_probe,		vcxgbe_probe),
116	DEVMETHOD(device_attach,	vcxgbe_attach),
117	DEVMETHOD(device_detach,	vcxgbe_detach),
118	{ 0, 0 }
119};
120static driver_t vcxgbe_driver = {
121	"vcxgbe",
122	vcxgbe_methods,
123	sizeof(struct vi_info)
124};
125
126static d_ioctl_t t4_ioctl;
127static d_open_t t4_open;
128static d_close_t t4_close;
129
130static struct cdevsw t4_cdevsw = {
131       .d_version = D_VERSION,
132       .d_flags = 0,
133       .d_open = t4_open,
134       .d_close = t4_close,
135       .d_ioctl = t4_ioctl,
136       .d_name = "t4nex",
137};
138
139/* T5 bus driver interface */
140static int t5_probe(device_t);
141static device_method_t t5_methods[] = {
142	DEVMETHOD(device_probe,		t5_probe),
143	DEVMETHOD(device_attach,	t4_attach),
144	DEVMETHOD(device_detach,	t4_detach),
145
146	DEVMETHOD_END
147};
148static driver_t t5_driver = {
149	"t5nex",
150	t5_methods,
151	sizeof(struct adapter)
152};
153
154
155/* T5 port (cxl) interface */
156static driver_t cxl_driver = {
157	"cxl",
158	cxgbe_methods,
159	sizeof(struct port_info)
160};
161
162/* T5 VI (vcxl) interface */
163static driver_t vcxl_driver = {
164	"vcxl",
165	vcxgbe_methods,
166	sizeof(struct vi_info)
167};
168
169static struct cdevsw t5_cdevsw = {
170       .d_version = D_VERSION,
171       .d_flags = 0,
172       .d_open = t4_open,
173       .d_close = t4_close,
174       .d_ioctl = t4_ioctl,
175       .d_name = "t5nex",
176};
177
178/* ifnet + media interface */
179static void cxgbe_init(void *);
180static int cxgbe_ioctl(struct ifnet *, unsigned long, caddr_t);
181static int cxgbe_transmit(struct ifnet *, struct mbuf *);
182static void cxgbe_qflush(struct ifnet *);
183static int cxgbe_media_change(struct ifnet *);
184static void cxgbe_media_status(struct ifnet *, struct ifmediareq *);
185
186MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4/T5 Ethernet driver and services");
187
188/*
189 * Correct lock order when you need to acquire multiple locks is t4_list_lock,
190 * then ADAPTER_LOCK, then t4_uld_list_lock.
191 */
192static struct sx t4_list_lock;
193SLIST_HEAD(, adapter) t4_list;
194#ifdef TCP_OFFLOAD
195static struct sx t4_uld_list_lock;
196SLIST_HEAD(, uld_info) t4_uld_list;
197#endif
198
199/*
200 * Tunables.  See tweak_tunables() too.
201 *
202 * Each tunable is set to a default value here if it's known at compile-time.
203 * Otherwise it is set to -1 as an indication to tweak_tunables() that it should
204 * provide a reasonable default when the driver is loaded.
205 *
206 * Tunables applicable to both T4 and T5 are under hw.cxgbe.  Those specific to
207 * T5 are under hw.cxl.
208 */
209
210/*
211 * Number of queues for tx and rx, 10G and 1G, NIC and offload.
212 */
213#define NTXQ_10G 16
214static int t4_ntxq10g = -1;
215TUNABLE_INT("hw.cxgbe.ntxq10g", &t4_ntxq10g);
216
217#define NRXQ_10G 8
218static int t4_nrxq10g = -1;
219TUNABLE_INT("hw.cxgbe.nrxq10g", &t4_nrxq10g);
220
221#define NTXQ_1G 4
222static int t4_ntxq1g = -1;
223TUNABLE_INT("hw.cxgbe.ntxq1g", &t4_ntxq1g);
224
225#define NRXQ_1G 2
226static int t4_nrxq1g = -1;
227TUNABLE_INT("hw.cxgbe.nrxq1g", &t4_nrxq1g);
228
229#define NTXQ_VI 1
230static int t4_ntxq_vi = -1;
231TUNABLE_INT("hw.cxgbe.ntxq_vi", &t4_ntxq_vi);
232
233#define NRXQ_VI 1
234static int t4_nrxq_vi = -1;
235TUNABLE_INT("hw.cxgbe.nrxq_vi", &t4_nrxq_vi);
236
237static int t4_rsrv_noflowq = 0;
238TUNABLE_INT("hw.cxgbe.rsrv_noflowq", &t4_rsrv_noflowq);
239
240#ifdef TCP_OFFLOAD
241#define NOFLDTXQ_10G 8
242static int t4_nofldtxq10g = -1;
243TUNABLE_INT("hw.cxgbe.nofldtxq10g", &t4_nofldtxq10g);
244
245#define NOFLDRXQ_10G 2
246static int t4_nofldrxq10g = -1;
247TUNABLE_INT("hw.cxgbe.nofldrxq10g", &t4_nofldrxq10g);
248
249#define NOFLDTXQ_1G 2
250static int t4_nofldtxq1g = -1;
251TUNABLE_INT("hw.cxgbe.nofldtxq1g", &t4_nofldtxq1g);
252
253#define NOFLDRXQ_1G 1
254static int t4_nofldrxq1g = -1;
255TUNABLE_INT("hw.cxgbe.nofldrxq1g", &t4_nofldrxq1g);
256
257#define NOFLDTXQ_VI 1
258static int t4_nofldtxq_vi = -1;
259TUNABLE_INT("hw.cxgbe.nofldtxq_vi", &t4_nofldtxq_vi);
260
261#define NOFLDRXQ_VI 1
262static int t4_nofldrxq_vi = -1;
263TUNABLE_INT("hw.cxgbe.nofldrxq_vi", &t4_nofldrxq_vi);
264#endif
265
266#ifdef DEV_NETMAP
267#define NNMTXQ_VI 2
268static int t4_nnmtxq_vi = -1;
269TUNABLE_INT("hw.cxgbe.nnmtxq_vi", &t4_nnmtxq_vi);
270
271#define NNMRXQ_VI 2
272static int t4_nnmrxq_vi = -1;
273TUNABLE_INT("hw.cxgbe.nnmrxq_vi", &t4_nnmrxq_vi);
274#endif
275
276/*
277 * Holdoff parameters for 10G and 1G ports.
278 */
279#define TMR_IDX_10G 1
280static int t4_tmr_idx_10g = TMR_IDX_10G;
281TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &t4_tmr_idx_10g);
282
283#define PKTC_IDX_10G (-1)
284static int t4_pktc_idx_10g = PKTC_IDX_10G;
285TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &t4_pktc_idx_10g);
286
287#define TMR_IDX_1G 1
288static int t4_tmr_idx_1g = TMR_IDX_1G;
289TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_1G", &t4_tmr_idx_1g);
290
291#define PKTC_IDX_1G (-1)
292static int t4_pktc_idx_1g = PKTC_IDX_1G;
293TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_1G", &t4_pktc_idx_1g);
294
295/*
296 * Size (# of entries) of each tx and rx queue.
297 */
298static unsigned int t4_qsize_txq = TX_EQ_QSIZE;
299TUNABLE_INT("hw.cxgbe.qsize_txq", &t4_qsize_txq);
300
301static unsigned int t4_qsize_rxq = RX_IQ_QSIZE;
302TUNABLE_INT("hw.cxgbe.qsize_rxq", &t4_qsize_rxq);
303
304/*
305 * Interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively).
306 */
307static int t4_intr_types = INTR_MSIX | INTR_MSI | INTR_INTX;
308TUNABLE_INT("hw.cxgbe.interrupt_types", &t4_intr_types);
309
310/*
311 * Configuration file.
312 */
313#define DEFAULT_CF	"default"
314#define FLASH_CF	"flash"
315#define UWIRE_CF	"uwire"
316#define FPGA_CF		"fpga"
317static char t4_cfg_file[32] = DEFAULT_CF;
318TUNABLE_STR("hw.cxgbe.config_file", t4_cfg_file, sizeof(t4_cfg_file));
319
320/*
321 * PAUSE settings (bit 0, 1 = rx_pause, tx_pause respectively).
322 * rx_pause = 1 to heed incoming PAUSE frames, 0 to ignore them.
323 * tx_pause = 1 to emit PAUSE frames when the rx FIFO reaches its high water
324 *            mark or when signalled to do so, 0 to never emit PAUSE.
325 */
326static int t4_pause_settings = PAUSE_TX | PAUSE_RX;
327TUNABLE_INT("hw.cxgbe.pause_settings", &t4_pause_settings);
328
329/*
330 * Firmware auto-install by driver during attach (0, 1, 2 = prohibited, allowed,
331 * encouraged respectively).
332 */
333static unsigned int t4_fw_install = 1;
334TUNABLE_INT("hw.cxgbe.fw_install", &t4_fw_install);
335
336/*
337 * ASIC features that will be used.  Disable the ones you don't want so that the
338 * chip resources aren't wasted on features that will not be used.
339 */
340static int t4_nbmcaps_allowed = 0;
341TUNABLE_INT("hw.cxgbe.nbmcaps_allowed", &t4_nbmcaps_allowed);
342
343static int t4_linkcaps_allowed = 0;	/* No DCBX, PPP, etc. by default */
344TUNABLE_INT("hw.cxgbe.linkcaps_allowed", &t4_linkcaps_allowed);
345
346static int t4_switchcaps_allowed = 0;
347TUNABLE_INT("hw.cxgbe.switchcaps_allowed", &t4_switchcaps_allowed);
348
349static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC;
350TUNABLE_INT("hw.cxgbe.niccaps_allowed", &t4_niccaps_allowed);
351
352static int t4_toecaps_allowed = -1;
353TUNABLE_INT("hw.cxgbe.toecaps_allowed", &t4_toecaps_allowed);
354
355static int t4_rdmacaps_allowed = 0;
356TUNABLE_INT("hw.cxgbe.rdmacaps_allowed", &t4_rdmacaps_allowed);
357
358static int t4_tlscaps_allowed = 0;
359TUNABLE_INT("hw.cxgbe.tlscaps_allowed", &t4_tlscaps_allowed);
360
361static int t4_iscsicaps_allowed = 0;
362TUNABLE_INT("hw.cxgbe.iscsicaps_allowed", &t4_iscsicaps_allowed);
363
364static int t4_fcoecaps_allowed = 0;
365TUNABLE_INT("hw.cxgbe.fcoecaps_allowed", &t4_fcoecaps_allowed);
366
367static int t5_write_combine = 0;
368TUNABLE_INT("hw.cxl.write_combine", &t5_write_combine);
369
370static int t4_num_vis = 1;
371TUNABLE_INT("hw.cxgbe.num_vis", &t4_num_vis);
372
373/* Functions used by extra VIs to obtain unique MAC addresses for each VI. */
374static int vi_mac_funcs[] = {
375	FW_VI_FUNC_OFLD,
376	FW_VI_FUNC_IWARP,
377	FW_VI_FUNC_OPENISCSI,
378	FW_VI_FUNC_OPENFCOE,
379	FW_VI_FUNC_FOISCSI,
380	FW_VI_FUNC_FOFCOE,
381};
382
383struct intrs_and_queues {
384	uint16_t intr_type;	/* INTx, MSI, or MSI-X */
385	uint16_t nirq;		/* Total # of vectors */
386	uint16_t intr_flags_10g;/* Interrupt flags for each 10G port */
387	uint16_t intr_flags_1g;	/* Interrupt flags for each 1G port */
388	uint16_t ntxq10g;	/* # of NIC txq's for each 10G port */
389	uint16_t nrxq10g;	/* # of NIC rxq's for each 10G port */
390	uint16_t ntxq1g;	/* # of NIC txq's for each 1G port */
391	uint16_t nrxq1g;	/* # of NIC rxq's for each 1G port */
392	uint16_t rsrv_noflowq;	/* Flag whether to reserve queue 0 */
393	uint16_t nofldtxq10g;	/* # of TOE txq's for each 10G port */
394	uint16_t nofldrxq10g;	/* # of TOE rxq's for each 10G port */
395	uint16_t nofldtxq1g;	/* # of TOE txq's for each 1G port */
396	uint16_t nofldrxq1g;	/* # of TOE rxq's for each 1G port */
397
398	/* The vcxgbe/vcxl interfaces use these and not the ones above. */
399	uint16_t ntxq_vi;	/* # of NIC txq's */
400	uint16_t nrxq_vi;	/* # of NIC rxq's */
401	uint16_t nofldtxq_vi;	/* # of TOE txq's */
402	uint16_t nofldrxq_vi;	/* # of TOE rxq's */
403	uint16_t nnmtxq_vi;	/* # of netmap txq's */
404	uint16_t nnmrxq_vi;	/* # of netmap rxq's */
405};
406
407struct filter_entry {
408        uint32_t valid:1;	/* filter allocated and valid */
409        uint32_t locked:1;	/* filter is administratively locked */
410        uint32_t pending:1;	/* filter action is pending firmware reply */
411	uint32_t smtidx:8;	/* Source MAC Table index for smac */
412	struct l2t_entry *l2t;	/* Layer Two Table entry for dmac */
413
414        struct t4_filter_specification fs;
415};
416
417static int map_bars_0_and_4(struct adapter *);
418static int map_bar_2(struct adapter *);
419static void setup_memwin(struct adapter *);
420static int validate_mem_range(struct adapter *, uint32_t, int);
421static int fwmtype_to_hwmtype(int);
422static int validate_mt_off_len(struct adapter *, int, uint32_t, int,
423    uint32_t *);
424static void memwin_info(struct adapter *, int, uint32_t *, uint32_t *);
425static uint32_t position_memwin(struct adapter *, int, uint32_t);
426static int cfg_itype_and_nqueues(struct adapter *, int, int, int,
427    struct intrs_and_queues *);
428static int prep_firmware(struct adapter *);
429static int partition_resources(struct adapter *, const struct firmware *,
430    const char *);
431static int get_params__pre_init(struct adapter *);
432static int get_params__post_init(struct adapter *);
433static int set_params__post_init(struct adapter *);
434static void t4_set_desc(struct adapter *);
435static void build_medialist(struct port_info *, struct ifmedia *);
436static int cxgbe_init_synchronized(struct vi_info *);
437static int cxgbe_uninit_synchronized(struct vi_info *);
438static int setup_intr_handlers(struct adapter *);
439static void quiesce_txq(struct adapter *, struct sge_txq *);
440static void quiesce_wrq(struct adapter *, struct sge_wrq *);
441static void quiesce_iq(struct adapter *, struct sge_iq *);
442static void quiesce_fl(struct adapter *, struct sge_fl *);
443static int t4_alloc_irq(struct adapter *, struct irq *, int rid,
444    driver_intr_t *, void *, char *);
445static int t4_free_irq(struct adapter *, struct irq *);
446static void get_regs(struct adapter *, struct t4_regdump *, uint8_t *);
447static void vi_refresh_stats(struct adapter *, struct vi_info *);
448static void cxgbe_refresh_stats(struct adapter *, struct port_info *);
449static void cxgbe_tick(void *);
450static void cxgbe_vlan_config(void *, struct ifnet *, uint16_t);
451static int cpl_not_handled(struct sge_iq *, const struct rss_header *,
452    struct mbuf *);
453static int an_not_handled(struct sge_iq *, const struct rsp_ctrl *);
454static int fw_msg_not_handled(struct adapter *, const __be64 *);
455static void t4_sysctls(struct adapter *);
456static void cxgbe_sysctls(struct port_info *);
457static int sysctl_int_array(SYSCTL_HANDLER_ARGS);
458static int sysctl_bitfield(SYSCTL_HANDLER_ARGS);
459static int sysctl_btphy(SYSCTL_HANDLER_ARGS);
460static int sysctl_noflowq(SYSCTL_HANDLER_ARGS);
461static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS);
462static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS);
463static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS);
464static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS);
465static int sysctl_pause_settings(SYSCTL_HANDLER_ARGS);
466static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS);
467static int sysctl_temperature(SYSCTL_HANDLER_ARGS);
468#ifdef SBUF_DRAIN
469static int sysctl_cctrl(SYSCTL_HANDLER_ARGS);
470static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS);
471static int sysctl_cim_la(SYSCTL_HANDLER_ARGS);
472static int sysctl_cim_la_t6(SYSCTL_HANDLER_ARGS);
473static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS);
474static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS);
475static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS);
476static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS);
477static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS);
478static int sysctl_devlog(SYSCTL_HANDLER_ARGS);
479static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS);
480static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS);
481static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS);
482static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS);
483static int sysctl_meminfo(SYSCTL_HANDLER_ARGS);
484static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS);
485static int sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS);
486static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS);
487static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS);
488static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS);
489static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS);
490static int sysctl_tids(SYSCTL_HANDLER_ARGS);
491static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS);
492static int sysctl_tp_la(SYSCTL_HANDLER_ARGS);
493static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS);
494static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS);
495static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS);
496#endif
497static uint32_t fconf_iconf_to_mode(uint32_t, uint32_t);
498static uint32_t mode_to_fconf(uint32_t);
499static uint32_t mode_to_iconf(uint32_t);
500static int check_fspec_against_fconf_iconf(struct adapter *,
501    struct t4_filter_specification *);
502static int get_filter_mode(struct adapter *, uint32_t *);
503static int set_filter_mode(struct adapter *, uint32_t);
504static inline uint64_t get_filter_hits(struct adapter *, uint32_t);
505static int get_filter(struct adapter *, struct t4_filter *);
506static int set_filter(struct adapter *, struct t4_filter *);
507static int del_filter(struct adapter *, struct t4_filter *);
508static void clear_filter(struct filter_entry *);
509static int set_filter_wr(struct adapter *, int);
510static int del_filter_wr(struct adapter *, int);
511static int get_sge_context(struct adapter *, struct t4_sge_context *);
512static int load_fw(struct adapter *, struct t4_data *);
513static int read_card_mem(struct adapter *, int, struct t4_mem_range *);
514static int read_i2c(struct adapter *, struct t4_i2c_data *);
515static int set_sched_class(struct adapter *, struct t4_sched_params *);
516static int set_sched_queue(struct adapter *, struct t4_sched_queue *);
517#ifdef TCP_OFFLOAD
518static int toe_capability(struct vi_info *, int);
519#endif
520static int mod_event(module_t, int, void *);
521
522struct {
523	uint16_t device;
524	char *desc;
525} t4_pciids[] = {
526	{0xa000, "Chelsio Terminator 4 FPGA"},
527	{0x4400, "Chelsio T440-dbg"},
528	{0x4401, "Chelsio T420-CR"},
529	{0x4402, "Chelsio T422-CR"},
530	{0x4403, "Chelsio T440-CR"},
531	{0x4404, "Chelsio T420-BCH"},
532	{0x4405, "Chelsio T440-BCH"},
533	{0x4406, "Chelsio T440-CH"},
534	{0x4407, "Chelsio T420-SO"},
535	{0x4408, "Chelsio T420-CX"},
536	{0x4409, "Chelsio T420-BT"},
537	{0x440a, "Chelsio T404-BT"},
538	{0x440e, "Chelsio T440-LP-CR"},
539}, t5_pciids[] = {
540	{0xb000, "Chelsio Terminator 5 FPGA"},
541	{0x5400, "Chelsio T580-dbg"},
542	{0x5401,  "Chelsio T520-CR"},		/* 2 x 10G */
543	{0x5402,  "Chelsio T522-CR"},		/* 2 x 10G, 2 X 1G */
544	{0x5403,  "Chelsio T540-CR"},		/* 4 x 10G */
545	{0x5407,  "Chelsio T520-SO"},		/* 2 x 10G, nomem */
546	{0x5409,  "Chelsio T520-BT"},		/* 2 x 10GBaseT */
547	{0x540a,  "Chelsio T504-BT"},		/* 4 x 1G */
548	{0x540d,  "Chelsio T580-CR"},		/* 2 x 40G */
549	{0x540e,  "Chelsio T540-LP-CR"},	/* 4 x 10G */
550	{0x5410,  "Chelsio T580-LP-CR"},	/* 2 x 40G */
551	{0x5411,  "Chelsio T520-LL-CR"},	/* 2 x 10G */
552	{0x5412,  "Chelsio T560-CR"},		/* 1 x 40G, 2 x 10G */
553	{0x5414,  "Chelsio T580-LP-SO-CR"},	/* 2 x 40G, nomem */
554	{0x5415,  "Chelsio T502-BT"},		/* 2 x 1G */
555#ifdef notyet
556	{0x5404,  "Chelsio T520-BCH"},
557	{0x5405,  "Chelsio T540-BCH"},
558	{0x5406,  "Chelsio T540-CH"},
559	{0x5408,  "Chelsio T520-CX"},
560	{0x540b,  "Chelsio B520-SR"},
561	{0x540c,  "Chelsio B504-BT"},
562	{0x540f,  "Chelsio Amsterdam"},
563	{0x5413,  "Chelsio T580-CHR"},
564#endif
565};
566
567#ifdef TCP_OFFLOAD
568/*
569 * service_iq() has an iq and needs the fl.  Offset of fl from the iq should be
570 * exactly the same for both rxq and ofld_rxq.
571 */
572CTASSERT(offsetof(struct sge_ofld_rxq, iq) == offsetof(struct sge_rxq, iq));
573CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl));
574#endif
575
576/* No easy way to include t4_msg.h before adapter.h so we check this way */
577CTASSERT(nitems(((struct adapter *)0)->cpl_handler) == NUM_CPL_CMDS);
578CTASSERT(nitems(((struct adapter *)0)->fw_msg_handler) == NUM_FW6_TYPES);
579
580CTASSERT(sizeof(struct cluster_metadata) <= CL_METADATA_SIZE);
581
582static int
583t4_probe(device_t dev)
584{
585	int i;
586	uint16_t v = pci_get_vendor(dev);
587	uint16_t d = pci_get_device(dev);
588	uint8_t f = pci_get_function(dev);
589
590	if (v != PCI_VENDOR_ID_CHELSIO)
591		return (ENXIO);
592
593	/* Attach only to PF0 of the FPGA */
594	if (d == 0xa000 && f != 0)
595		return (ENXIO);
596
597	for (i = 0; i < nitems(t4_pciids); i++) {
598		if (d == t4_pciids[i].device) {
599			device_set_desc(dev, t4_pciids[i].desc);
600			return (BUS_PROBE_DEFAULT);
601		}
602	}
603
604	return (ENXIO);
605}
606
607static int
608t5_probe(device_t dev)
609{
610	int i;
611	uint16_t v = pci_get_vendor(dev);
612	uint16_t d = pci_get_device(dev);
613	uint8_t f = pci_get_function(dev);
614
615	if (v != PCI_VENDOR_ID_CHELSIO)
616		return (ENXIO);
617
618	/* Attach only to PF0 of the FPGA */
619	if (d == 0xb000 && f != 0)
620		return (ENXIO);
621
622	for (i = 0; i < nitems(t5_pciids); i++) {
623		if (d == t5_pciids[i].device) {
624			device_set_desc(dev, t5_pciids[i].desc);
625			return (BUS_PROBE_DEFAULT);
626		}
627	}
628
629	return (ENXIO);
630}
631
632static void
633t5_attribute_workaround(device_t dev)
634{
635	device_t root_port;
636	uint32_t v;
637
638	/*
639	 * The T5 chips do not properly echo the No Snoop and Relaxed
640	 * Ordering attributes when replying to a TLP from a Root
641	 * Port.  As a workaround, find the parent Root Port and
642	 * disable No Snoop and Relaxed Ordering.  Note that this
643	 * affects all devices under this root port.
644	 */
645	root_port = pci_find_pcie_root_port(dev);
646	if (root_port == NULL) {
647		device_printf(dev, "Unable to find parent root port\n");
648		return;
649	}
650
651	v = pcie_adjust_config(root_port, PCIER_DEVICE_CTL,
652	    PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE, 0, 2);
653	if ((v & (PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE)) !=
654	    0)
655		device_printf(dev, "Disabled No Snoop/Relaxed Ordering on %s\n",
656		    device_get_nameunit(root_port));
657}
658
659static int
660t4_attach(device_t dev)
661{
662	struct adapter *sc;
663	int rc = 0, i, j, n10g, n1g, rqidx, tqidx;
664	struct intrs_and_queues iaq;
665	struct sge *s;
666	uint8_t *buf;
667#ifdef TCP_OFFLOAD
668	int ofld_rqidx, ofld_tqidx;
669#endif
670#ifdef DEV_NETMAP
671	int nm_rqidx, nm_tqidx;
672#endif
673	int num_vis;
674
675	sc = device_get_softc(dev);
676	sc->dev = dev;
677	TUNABLE_INT_FETCH("hw.cxgbe.debug_flags", &sc->debug_flags);
678
679	if ((pci_get_device(dev) & 0xff00) == 0x5400)
680		t5_attribute_workaround(dev);
681	pci_enable_busmaster(dev);
682	if (pci_find_cap(dev, PCIY_EXPRESS, &i) == 0) {
683		uint32_t v;
684
685		pci_set_max_read_req(dev, 4096);
686		v = pci_read_config(dev, i + PCIER_DEVICE_CTL, 2);
687		v |= PCIEM_CTL_RELAXED_ORD_ENABLE;
688		pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2);
689
690		sc->params.pci.mps = 128 << ((v & PCIEM_CTL_MAX_PAYLOAD) >> 5);
691	}
692
693	sc->traceq = -1;
694	mtx_init(&sc->ifp_lock, sc->ifp_lockname, 0, MTX_DEF);
695	snprintf(sc->ifp_lockname, sizeof(sc->ifp_lockname), "%s tracer",
696	    device_get_nameunit(dev));
697
698	snprintf(sc->lockname, sizeof(sc->lockname), "%s",
699	    device_get_nameunit(dev));
700	mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF);
701	sx_xlock(&t4_list_lock);
702	SLIST_INSERT_HEAD(&t4_list, sc, link);
703	sx_xunlock(&t4_list_lock);
704
705	mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF);
706	TAILQ_INIT(&sc->sfl);
707	callout_init_mtx(&sc->sfl_callout, &sc->sfl_lock, 0);
708
709	mtx_init(&sc->regwin_lock, "register and memory window", 0, MTX_DEF);
710
711	rc = map_bars_0_and_4(sc);
712	if (rc != 0)
713		goto done; /* error message displayed already */
714
715	/*
716	 * This is the real PF# to which we're attaching.  Works from within PCI
717	 * passthrough environments too, where pci_get_function() could return a
718	 * different PF# depending on the passthrough configuration.  We need to
719	 * use the real PF# in all our communication with the firmware.
720	 */
721	sc->pf = G_SOURCEPF(t4_read_reg(sc, A_PL_WHOAMI));
722	sc->mbox = sc->pf;
723
724	memset(sc->chan_map, 0xff, sizeof(sc->chan_map));
725	sc->an_handler = an_not_handled;
726	for (i = 0; i < nitems(sc->cpl_handler); i++)
727		sc->cpl_handler[i] = cpl_not_handled;
728	for (i = 0; i < nitems(sc->fw_msg_handler); i++)
729		sc->fw_msg_handler[i] = fw_msg_not_handled;
730	t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, t4_filter_rpl);
731	t4_register_cpl_handler(sc, CPL_TRACE_PKT, t4_trace_pkt);
732	t4_register_cpl_handler(sc, CPL_T5_TRACE_PKT, t5_trace_pkt);
733	t4_init_sge_cpl_handlers(sc);
734
735	/* Prepare the adapter for operation. */
736	buf = malloc(PAGE_SIZE, M_CXGBE, M_ZERO | M_WAITOK);
737	rc = -t4_prep_adapter(sc, buf);
738	free(buf, M_CXGBE);
739	if (rc != 0) {
740		device_printf(dev, "failed to prepare adapter: %d.\n", rc);
741		goto done;
742	}
743
744	/*
745	 * Do this really early, with the memory windows set up even before the
746	 * character device.  The userland tool's register i/o and mem read
747	 * will work even in "recovery mode".
748	 */
749	setup_memwin(sc);
750	sc->cdev = make_dev(is_t4(sc) ? &t4_cdevsw : &t5_cdevsw,
751	    device_get_unit(dev), UID_ROOT, GID_WHEEL, 0600, "%s",
752	    device_get_nameunit(dev));
753	if (sc->cdev == NULL)
754		device_printf(dev, "failed to create nexus char device.\n");
755	else
756		sc->cdev->si_drv1 = sc;
757
758	/* Go no further if recovery mode has been requested. */
759	if (TUNABLE_INT_FETCH("hw.cxgbe.sos", &i) && i != 0) {
760		device_printf(dev, "recovery mode.\n");
761		goto done;
762	}
763
764#if defined(__i386__)
765	if ((cpu_feature & CPUID_CX8) == 0) {
766		device_printf(dev, "64 bit atomics not available.\n");
767		rc = ENOTSUP;
768		goto done;
769	}
770#endif
771
772	/* Prepare the firmware for operation */
773	rc = prep_firmware(sc);
774	if (rc != 0)
775		goto done; /* error message displayed already */
776
777	rc = get_params__post_init(sc);
778	if (rc != 0)
779		goto done; /* error message displayed already */
780
781	rc = set_params__post_init(sc);
782	if (rc != 0)
783		goto done; /* error message displayed already */
784
785	rc = map_bar_2(sc);
786	if (rc != 0)
787		goto done; /* error message displayed already */
788
789	rc = t4_create_dma_tag(sc);
790	if (rc != 0)
791		goto done; /* error message displayed already */
792
793	/*
794	 * Number of VIs to create per-port.  The first VI is the "main" regular
795	 * VI for the port.  The rest are additional virtual interfaces on the
796	 * same physical port.  Note that the main VI does not have native
797	 * netmap support but the extra VIs do.
798	 *
799	 * Limit the number of VIs per port to the number of available
800	 * MAC addresses per port.
801	 */
802	if (t4_num_vis >= 1)
803		num_vis = t4_num_vis;
804	else
805		num_vis = 1;
806	if (num_vis > nitems(vi_mac_funcs)) {
807		num_vis = nitems(vi_mac_funcs);
808		device_printf(dev, "Number of VIs limited to %d\n", num_vis);
809	}
810
811	/*
812	 * First pass over all the ports - allocate VIs and initialize some
813	 * basic parameters like mac address, port type, etc.  We also figure
814	 * out whether a port is 10G or 1G and use that information when
815	 * calculating how many interrupts to attempt to allocate.
816	 */
817	n10g = n1g = 0;
818	for_each_port(sc, i) {
819		struct port_info *pi;
820
821		pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK);
822		sc->port[i] = pi;
823
824		/* These must be set before t4_port_init */
825		pi->adapter = sc;
826		pi->port_id = i;
827		/*
828		 * XXX: vi[0] is special so we can't delay this allocation until
829		 * pi->nvi's final value is known.
830		 */
831		pi->vi = malloc(sizeof(struct vi_info) * num_vis, M_CXGBE,
832		    M_ZERO | M_WAITOK);
833
834		/*
835		 * Allocate the "main" VI and initialize parameters
836		 * like mac addr.
837		 */
838		rc = -t4_port_init(sc, sc->mbox, sc->pf, 0, i);
839		if (rc != 0) {
840			device_printf(dev, "unable to initialize port %d: %d\n",
841			    i, rc);
842			free(pi->vi, M_CXGBE);
843			free(pi, M_CXGBE);
844			sc->port[i] = NULL;
845			goto done;
846		}
847
848		pi->link_cfg.requested_fc &= ~(PAUSE_TX | PAUSE_RX);
849		pi->link_cfg.requested_fc |= t4_pause_settings;
850		pi->link_cfg.fc &= ~(PAUSE_TX | PAUSE_RX);
851		pi->link_cfg.fc |= t4_pause_settings;
852
853		rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, &pi->link_cfg);
854		if (rc != 0) {
855			device_printf(dev, "port %d l1cfg failed: %d\n", i, rc);
856			free(pi->vi, M_CXGBE);
857			free(pi, M_CXGBE);
858			sc->port[i] = NULL;
859			goto done;
860		}
861
862		snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d",
863		    device_get_nameunit(dev), i);
864		mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF);
865		sc->chan_map[pi->tx_chan] = i;
866
867		if (is_10G_port(pi) || is_40G_port(pi)) {
868			n10g++;
869		} else {
870			n1g++;
871		}
872
873		pi->linkdnrc = -1;
874
875		pi->dev = device_add_child(dev, is_t4(sc) ? "cxgbe" : "cxl", -1);
876		if (pi->dev == NULL) {
877			device_printf(dev,
878			    "failed to add device for port %d.\n", i);
879			rc = ENXIO;
880			goto done;
881		}
882		pi->vi[0].dev = pi->dev;
883		device_set_softc(pi->dev, pi);
884	}
885
886	/*
887	 * Interrupt type, # of interrupts, # of rx/tx queues, etc.
888	 */
889	rc = cfg_itype_and_nqueues(sc, n10g, n1g, num_vis, &iaq);
890	if (rc != 0)
891		goto done; /* error message displayed already */
892	if (iaq.nrxq_vi + iaq.nofldrxq_vi + iaq.nnmrxq_vi == 0)
893		num_vis = 1;
894
895	sc->intr_type = iaq.intr_type;
896	sc->intr_count = iaq.nirq;
897
898	s = &sc->sge;
899	s->nrxq = n10g * iaq.nrxq10g + n1g * iaq.nrxq1g;
900	s->ntxq = n10g * iaq.ntxq10g + n1g * iaq.ntxq1g;
901	if (num_vis > 1) {
902		s->nrxq += (n10g + n1g) * (num_vis - 1) * iaq.nrxq_vi;
903		s->ntxq += (n10g + n1g) * (num_vis - 1) * iaq.ntxq_vi;
904	}
905	s->neq = s->ntxq + s->nrxq;	/* the free list in an rxq is an eq */
906	s->neq += sc->params.nports + 1;/* ctrl queues: 1 per port + 1 mgmt */
907	s->niq = s->nrxq + 1;		/* 1 extra for firmware event queue */
908#ifdef TCP_OFFLOAD
909	if (is_offload(sc)) {
910		s->nofldrxq = n10g * iaq.nofldrxq10g + n1g * iaq.nofldrxq1g;
911		s->nofldtxq = n10g * iaq.nofldtxq10g + n1g * iaq.nofldtxq1g;
912		if (num_vis > 1) {
913			s->nofldrxq += (n10g + n1g) * (num_vis - 1) *
914			    iaq.nofldrxq_vi;
915			s->nofldtxq += (n10g + n1g) * (num_vis - 1) *
916			    iaq.nofldtxq_vi;
917		}
918		s->neq += s->nofldtxq + s->nofldrxq;
919		s->niq += s->nofldrxq;
920
921		s->ofld_rxq = malloc(s->nofldrxq * sizeof(struct sge_ofld_rxq),
922		    M_CXGBE, M_ZERO | M_WAITOK);
923		s->ofld_txq = malloc(s->nofldtxq * sizeof(struct sge_wrq),
924		    M_CXGBE, M_ZERO | M_WAITOK);
925	}
926#endif
927#ifdef DEV_NETMAP
928	if (num_vis > 1) {
929		s->nnmrxq = (n10g + n1g) * (num_vis - 1) * iaq.nnmrxq_vi;
930		s->nnmtxq = (n10g + n1g) * (num_vis - 1) * iaq.nnmtxq_vi;
931	}
932	s->neq += s->nnmtxq + s->nnmrxq;
933	s->niq += s->nnmrxq;
934
935	s->nm_rxq = malloc(s->nnmrxq * sizeof(struct sge_nm_rxq),
936	    M_CXGBE, M_ZERO | M_WAITOK);
937	s->nm_txq = malloc(s->nnmtxq * sizeof(struct sge_nm_txq),
938	    M_CXGBE, M_ZERO | M_WAITOK);
939#endif
940
941	s->ctrlq = malloc(sc->params.nports * sizeof(struct sge_wrq), M_CXGBE,
942	    M_ZERO | M_WAITOK);
943	s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE,
944	    M_ZERO | M_WAITOK);
945	s->txq = malloc(s->ntxq * sizeof(struct sge_txq), M_CXGBE,
946	    M_ZERO | M_WAITOK);
947	s->iqmap = malloc(s->niq * sizeof(struct sge_iq *), M_CXGBE,
948	    M_ZERO | M_WAITOK);
949	s->eqmap = malloc(s->neq * sizeof(struct sge_eq *), M_CXGBE,
950	    M_ZERO | M_WAITOK);
951
952	sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE,
953	    M_ZERO | M_WAITOK);
954
955	t4_init_l2t(sc, M_WAITOK);
956
957	/*
958	 * Second pass over the ports.  This time we know the number of rx and
959	 * tx queues that each port should get.
960	 */
961	rqidx = tqidx = 0;
962#ifdef TCP_OFFLOAD
963	ofld_rqidx = ofld_tqidx = 0;
964#endif
965#ifdef DEV_NETMAP
966	nm_rqidx = nm_tqidx = 0;
967#endif
968	for_each_port(sc, i) {
969		struct port_info *pi = sc->port[i];
970		struct vi_info *vi;
971
972		if (pi == NULL)
973			continue;
974
975		pi->nvi = num_vis;
976		for_each_vi(pi, j, vi) {
977			vi->pi = pi;
978			vi->qsize_rxq = t4_qsize_rxq;
979			vi->qsize_txq = t4_qsize_txq;
980
981			vi->first_rxq = rqidx;
982			vi->first_txq = tqidx;
983			if (is_10G_port(pi) || is_40G_port(pi)) {
984				vi->tmr_idx = t4_tmr_idx_10g;
985				vi->pktc_idx = t4_pktc_idx_10g;
986				vi->flags |= iaq.intr_flags_10g & INTR_RXQ;
987				vi->nrxq = j == 0 ? iaq.nrxq10g : iaq.nrxq_vi;
988				vi->ntxq = j == 0 ? iaq.ntxq10g : iaq.ntxq_vi;
989			} else {
990				vi->tmr_idx = t4_tmr_idx_1g;
991				vi->pktc_idx = t4_pktc_idx_1g;
992				vi->flags |= iaq.intr_flags_1g & INTR_RXQ;
993				vi->nrxq = j == 0 ? iaq.nrxq1g : iaq.nrxq_vi;
994				vi->ntxq = j == 0 ? iaq.ntxq1g : iaq.ntxq_vi;
995			}
996			rqidx += vi->nrxq;
997			tqidx += vi->ntxq;
998
999			if (j == 0 && vi->ntxq > 1)
1000				vi->rsrv_noflowq = iaq.rsrv_noflowq ? 1 : 0;
1001			else
1002				vi->rsrv_noflowq = 0;
1003
1004#ifdef TCP_OFFLOAD
1005			vi->first_ofld_rxq = ofld_rqidx;
1006			vi->first_ofld_txq = ofld_tqidx;
1007			if (is_10G_port(pi) || is_40G_port(pi)) {
1008				vi->flags |= iaq.intr_flags_10g & INTR_OFLD_RXQ;
1009				vi->nofldrxq = j == 0 ? iaq.nofldrxq10g :
1010				    iaq.nofldrxq_vi;
1011				vi->nofldtxq = j == 0 ? iaq.nofldtxq10g :
1012				    iaq.nofldtxq_vi;
1013			} else {
1014				vi->flags |= iaq.intr_flags_1g & INTR_OFLD_RXQ;
1015				vi->nofldrxq = j == 0 ? iaq.nofldrxq1g :
1016				    iaq.nofldrxq_vi;
1017				vi->nofldtxq = j == 0 ? iaq.nofldtxq1g :
1018				    iaq.nofldtxq_vi;
1019			}
1020			ofld_rqidx += vi->nofldrxq;
1021			ofld_tqidx += vi->nofldtxq;
1022#endif
1023#ifdef DEV_NETMAP
1024			if (j > 0) {
1025				vi->first_nm_rxq = nm_rqidx;
1026				vi->first_nm_txq = nm_tqidx;
1027				vi->nnmrxq = iaq.nnmrxq_vi;
1028				vi->nnmtxq = iaq.nnmtxq_vi;
1029				nm_rqidx += vi->nnmrxq;
1030				nm_tqidx += vi->nnmtxq;
1031			}
1032#endif
1033		}
1034	}
1035
1036	rc = setup_intr_handlers(sc);
1037	if (rc != 0) {
1038		device_printf(dev,
1039		    "failed to setup interrupt handlers: %d\n", rc);
1040		goto done;
1041	}
1042
1043	rc = bus_generic_attach(dev);
1044	if (rc != 0) {
1045		device_printf(dev,
1046		    "failed to attach all child ports: %d\n", rc);
1047		goto done;
1048	}
1049
1050	device_printf(dev,
1051	    "PCIe gen%d x%d, %d ports, %d %s interrupt%s, %d eq, %d iq\n",
1052	    sc->params.pci.speed, sc->params.pci.width, sc->params.nports,
1053	    sc->intr_count, sc->intr_type == INTR_MSIX ? "MSI-X" :
1054	    (sc->intr_type == INTR_MSI ? "MSI" : "INTx"),
1055	    sc->intr_count > 1 ? "s" : "", sc->sge.neq, sc->sge.niq);
1056
1057	t4_set_desc(sc);
1058
1059done:
1060	if (rc != 0 && sc->cdev) {
1061		/* cdev was created and so cxgbetool works; recover that way. */
1062		device_printf(dev,
1063		    "error during attach, adapter is now in recovery mode.\n");
1064		rc = 0;
1065	}
1066
1067	if (rc != 0)
1068		t4_detach(dev);
1069	else
1070		t4_sysctls(sc);
1071
1072	return (rc);
1073}
1074
1075/*
1076 * Idempotent
1077 */
1078static int
1079t4_detach(device_t dev)
1080{
1081	struct adapter *sc;
1082	struct port_info *pi;
1083	int i, rc;
1084
1085	sc = device_get_softc(dev);
1086
1087	if (sc->flags & FULL_INIT_DONE)
1088		t4_intr_disable(sc);
1089
1090	if (sc->cdev) {
1091		destroy_dev(sc->cdev);
1092		sc->cdev = NULL;
1093	}
1094
1095	rc = bus_generic_detach(dev);
1096	if (rc) {
1097		device_printf(dev,
1098		    "failed to detach child devices: %d\n", rc);
1099		return (rc);
1100	}
1101
1102	for (i = 0; i < sc->intr_count; i++)
1103		t4_free_irq(sc, &sc->irq[i]);
1104
1105	for (i = 0; i < MAX_NPORTS; i++) {
1106		pi = sc->port[i];
1107		if (pi) {
1108			t4_free_vi(sc, sc->mbox, sc->pf, 0, pi->vi[0].viid);
1109			if (pi->dev)
1110				device_delete_child(dev, pi->dev);
1111
1112			mtx_destroy(&pi->pi_lock);
1113			free(pi->vi, M_CXGBE);
1114			free(pi, M_CXGBE);
1115		}
1116	}
1117
1118	if (sc->flags & FULL_INIT_DONE)
1119		adapter_full_uninit(sc);
1120
1121	if (sc->flags & FW_OK)
1122		t4_fw_bye(sc, sc->mbox);
1123
1124	if (sc->intr_type == INTR_MSI || sc->intr_type == INTR_MSIX)
1125		pci_release_msi(dev);
1126
1127	if (sc->regs_res)
1128		bus_release_resource(dev, SYS_RES_MEMORY, sc->regs_rid,
1129		    sc->regs_res);
1130
1131	if (sc->udbs_res)
1132		bus_release_resource(dev, SYS_RES_MEMORY, sc->udbs_rid,
1133		    sc->udbs_res);
1134
1135	if (sc->msix_res)
1136		bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_rid,
1137		    sc->msix_res);
1138
1139	if (sc->l2t)
1140		t4_free_l2t(sc->l2t);
1141
1142#ifdef TCP_OFFLOAD
1143	free(sc->sge.ofld_rxq, M_CXGBE);
1144	free(sc->sge.ofld_txq, M_CXGBE);
1145#endif
1146#ifdef DEV_NETMAP
1147	free(sc->sge.nm_rxq, M_CXGBE);
1148	free(sc->sge.nm_txq, M_CXGBE);
1149#endif
1150	free(sc->irq, M_CXGBE);
1151	free(sc->sge.rxq, M_CXGBE);
1152	free(sc->sge.txq, M_CXGBE);
1153	free(sc->sge.ctrlq, M_CXGBE);
1154	free(sc->sge.iqmap, M_CXGBE);
1155	free(sc->sge.eqmap, M_CXGBE);
1156	free(sc->tids.ftid_tab, M_CXGBE);
1157	t4_destroy_dma_tag(sc);
1158	if (mtx_initialized(&sc->sc_lock)) {
1159		sx_xlock(&t4_list_lock);
1160		SLIST_REMOVE(&t4_list, sc, adapter, link);
1161		sx_xunlock(&t4_list_lock);
1162		mtx_destroy(&sc->sc_lock);
1163	}
1164
1165	callout_drain(&sc->sfl_callout);
1166	if (mtx_initialized(&sc->tids.ftid_lock))
1167		mtx_destroy(&sc->tids.ftid_lock);
1168	if (mtx_initialized(&sc->sfl_lock))
1169		mtx_destroy(&sc->sfl_lock);
1170	if (mtx_initialized(&sc->ifp_lock))
1171		mtx_destroy(&sc->ifp_lock);
1172	if (mtx_initialized(&sc->regwin_lock))
1173		mtx_destroy(&sc->regwin_lock);
1174
1175	bzero(sc, sizeof(*sc));
1176
1177	return (0);
1178}
1179
1180static int
1181cxgbe_probe(device_t dev)
1182{
1183	char buf[128];
1184	struct port_info *pi = device_get_softc(dev);
1185
1186	snprintf(buf, sizeof(buf), "port %d", pi->port_id);
1187	device_set_desc_copy(dev, buf);
1188
1189	return (BUS_PROBE_DEFAULT);
1190}
1191
1192#define T4_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
1193    IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
1194    IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6 | IFCAP_HWSTATS)
1195#define T4_CAP_ENABLE (T4_CAP)
1196
1197static int
1198cxgbe_vi_attach(device_t dev, struct vi_info *vi)
1199{
1200	struct ifnet *ifp;
1201	struct sbuf *sb;
1202
1203	vi->xact_addr_filt = -1;
1204	callout_init(&vi->tick, 1);
1205
1206	/* Allocate an ifnet and set it up */
1207	ifp = if_alloc(IFT_ETHER);
1208	if (ifp == NULL) {
1209		device_printf(dev, "Cannot allocate ifnet\n");
1210		return (ENOMEM);
1211	}
1212	vi->ifp = ifp;
1213	ifp->if_softc = vi;
1214
1215	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1216	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1217
1218	ifp->if_init = cxgbe_init;
1219	ifp->if_ioctl = cxgbe_ioctl;
1220	ifp->if_transmit = cxgbe_transmit;
1221	ifp->if_qflush = cxgbe_qflush;
1222
1223	ifp->if_capabilities = T4_CAP;
1224#ifdef TCP_OFFLOAD
1225	if (vi->nofldrxq != 0)
1226		ifp->if_capabilities |= IFCAP_TOE;
1227#endif
1228	ifp->if_capenable = T4_CAP_ENABLE;
1229	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
1230	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
1231
1232	ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
1233	ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS;
1234	ifp->if_hw_tsomaxsegsize = 65536;
1235
1236	/* Initialize ifmedia for this VI */
1237	ifmedia_init(&vi->media, IFM_IMASK, cxgbe_media_change,
1238	    cxgbe_media_status);
1239	build_medialist(vi->pi, &vi->media);
1240
1241	vi->vlan_c = EVENTHANDLER_REGISTER(vlan_config, cxgbe_vlan_config, ifp,
1242	    EVENTHANDLER_PRI_ANY);
1243
1244	ether_ifattach(ifp, vi->hw_addr);
1245#ifdef DEV_NETMAP
1246	if (vi->nnmrxq != 0)
1247		cxgbe_nm_attach(vi);
1248#endif
1249	sb = sbuf_new_auto();
1250	sbuf_printf(sb, "%d txq, %d rxq (NIC)", vi->ntxq, vi->nrxq);
1251#ifdef TCP_OFFLOAD
1252	if (ifp->if_capabilities & IFCAP_TOE)
1253		sbuf_printf(sb, "; %d txq, %d rxq (TOE)",
1254		    vi->nofldtxq, vi->nofldrxq);
1255#endif
1256#ifdef DEV_NETMAP
1257	if (ifp->if_capabilities & IFCAP_NETMAP)
1258		sbuf_printf(sb, "; %d txq, %d rxq (netmap)",
1259		    vi->nnmtxq, vi->nnmrxq);
1260#endif
1261	sbuf_finish(sb);
1262	device_printf(dev, "%s\n", sbuf_data(sb));
1263	sbuf_delete(sb);
1264
1265	vi_sysctls(vi);
1266
1267	return (0);
1268}
1269
1270static int
1271cxgbe_attach(device_t dev)
1272{
1273	struct port_info *pi = device_get_softc(dev);
1274	struct vi_info *vi;
1275	int i, rc;
1276
1277	callout_init_mtx(&pi->tick, &pi->pi_lock, 0);
1278
1279	rc = cxgbe_vi_attach(dev, &pi->vi[0]);
1280	if (rc)
1281		return (rc);
1282
1283	for_each_vi(pi, i, vi) {
1284		if (i == 0)
1285			continue;
1286		vi->dev = device_add_child(dev, is_t4(pi->adapter) ?
1287		    "vcxgbe" : "vcxl", -1);
1288		if (vi->dev == NULL) {
1289			device_printf(dev, "failed to add VI %d\n", i);
1290			continue;
1291		}
1292		device_set_softc(vi->dev, vi);
1293	}
1294
1295	cxgbe_sysctls(pi);
1296
1297	bus_generic_attach(dev);
1298
1299	return (0);
1300}
1301
1302static void
1303cxgbe_vi_detach(struct vi_info *vi)
1304{
1305	struct ifnet *ifp = vi->ifp;
1306
1307	ether_ifdetach(ifp);
1308
1309	if (vi->vlan_c)
1310		EVENTHANDLER_DEREGISTER(vlan_config, vi->vlan_c);
1311
1312	/* Let detach proceed even if these fail. */
1313#ifdef DEV_NETMAP
1314	if (ifp->if_capabilities & IFCAP_NETMAP)
1315		cxgbe_nm_detach(vi);
1316#endif
1317	cxgbe_uninit_synchronized(vi);
1318	callout_drain(&vi->tick);
1319	vi_full_uninit(vi);
1320
1321	ifmedia_removeall(&vi->media);
1322	if_free(vi->ifp);
1323	vi->ifp = NULL;
1324}
1325
1326static int
1327cxgbe_detach(device_t dev)
1328{
1329	struct port_info *pi = device_get_softc(dev);
1330	struct adapter *sc = pi->adapter;
1331	int rc;
1332
1333	/* Detach the extra VIs first. */
1334	rc = bus_generic_detach(dev);
1335	if (rc)
1336		return (rc);
1337	device_delete_children(dev);
1338
1339	doom_vi(sc, &pi->vi[0]);
1340
1341	if (pi->flags & HAS_TRACEQ) {
1342		sc->traceq = -1;	/* cloner should not create ifnet */
1343		t4_tracer_port_detach(sc);
1344	}
1345
1346	cxgbe_vi_detach(&pi->vi[0]);
1347	callout_drain(&pi->tick);
1348
1349	end_synchronized_op(sc, 0);
1350
1351	return (0);
1352}
1353
1354static void
1355cxgbe_init(void *arg)
1356{
1357	struct vi_info *vi = arg;
1358	struct adapter *sc = vi->pi->adapter;
1359
1360	if (begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4init") != 0)
1361		return;
1362	cxgbe_init_synchronized(vi);
1363	end_synchronized_op(sc, 0);
1364}
1365
1366static int
1367cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data)
1368{
1369	int rc = 0, mtu, flags, can_sleep;
1370	struct vi_info *vi = ifp->if_softc;
1371	struct adapter *sc = vi->pi->adapter;
1372	struct ifreq *ifr = (struct ifreq *)data;
1373	uint32_t mask;
1374
1375	switch (cmd) {
1376	case SIOCSIFMTU:
1377		mtu = ifr->ifr_mtu;
1378		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO))
1379			return (EINVAL);
1380
1381		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4mtu");
1382		if (rc)
1383			return (rc);
1384		ifp->if_mtu = mtu;
1385		if (vi->flags & VI_INIT_DONE) {
1386			t4_update_fl_bufsize(ifp);
1387			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1388				rc = update_mac_settings(ifp, XGMAC_MTU);
1389		}
1390		end_synchronized_op(sc, 0);
1391		break;
1392
1393	case SIOCSIFFLAGS:
1394		can_sleep = 0;
1395redo_sifflags:
1396		rc = begin_synchronized_op(sc, vi,
1397		    can_sleep ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4flg");
1398		if (rc)
1399			return (rc);
1400
1401		if (ifp->if_flags & IFF_UP) {
1402			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1403				flags = vi->if_flags;
1404				if ((ifp->if_flags ^ flags) &
1405				    (IFF_PROMISC | IFF_ALLMULTI)) {
1406					if (can_sleep == 1) {
1407						end_synchronized_op(sc, 0);
1408						can_sleep = 0;
1409						goto redo_sifflags;
1410					}
1411					rc = update_mac_settings(ifp,
1412					    XGMAC_PROMISC | XGMAC_ALLMULTI);
1413				}
1414			} else {
1415				if (can_sleep == 0) {
1416					end_synchronized_op(sc, LOCK_HELD);
1417					can_sleep = 1;
1418					goto redo_sifflags;
1419				}
1420				rc = cxgbe_init_synchronized(vi);
1421			}
1422			vi->if_flags = ifp->if_flags;
1423		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1424			if (can_sleep == 0) {
1425				end_synchronized_op(sc, LOCK_HELD);
1426				can_sleep = 1;
1427				goto redo_sifflags;
1428			}
1429			rc = cxgbe_uninit_synchronized(vi);
1430		}
1431		end_synchronized_op(sc, can_sleep ? 0 : LOCK_HELD);
1432		break;
1433
1434	case SIOCADDMULTI:
1435	case SIOCDELMULTI: /* these two are called with a mutex held :-( */
1436		rc = begin_synchronized_op(sc, vi, HOLD_LOCK, "t4multi");
1437		if (rc)
1438			return (rc);
1439		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1440			rc = update_mac_settings(ifp, XGMAC_MCADDRS);
1441		end_synchronized_op(sc, LOCK_HELD);
1442		break;
1443
1444	case SIOCSIFCAP:
1445		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4cap");
1446		if (rc)
1447			return (rc);
1448
1449		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1450		if (mask & IFCAP_TXCSUM) {
1451			ifp->if_capenable ^= IFCAP_TXCSUM;
1452			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
1453
1454			if (IFCAP_TSO4 & ifp->if_capenable &&
1455			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1456				ifp->if_capenable &= ~IFCAP_TSO4;
1457				if_printf(ifp,
1458				    "tso4 disabled due to -txcsum.\n");
1459			}
1460		}
1461		if (mask & IFCAP_TXCSUM_IPV6) {
1462			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1463			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
1464
1465			if (IFCAP_TSO6 & ifp->if_capenable &&
1466			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1467				ifp->if_capenable &= ~IFCAP_TSO6;
1468				if_printf(ifp,
1469				    "tso6 disabled due to -txcsum6.\n");
1470			}
1471		}
1472		if (mask & IFCAP_RXCSUM)
1473			ifp->if_capenable ^= IFCAP_RXCSUM;
1474		if (mask & IFCAP_RXCSUM_IPV6)
1475			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1476
1477		/*
1478		 * Note that we leave CSUM_TSO alone (it is always set).  The
1479		 * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
1480		 * sending a TSO request our way, so it's sufficient to toggle
1481		 * IFCAP_TSOx only.
1482		 */
1483		if (mask & IFCAP_TSO4) {
1484			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
1485			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1486				if_printf(ifp, "enable txcsum first.\n");
1487				rc = EAGAIN;
1488				goto fail;
1489			}
1490			ifp->if_capenable ^= IFCAP_TSO4;
1491		}
1492		if (mask & IFCAP_TSO6) {
1493			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
1494			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1495				if_printf(ifp, "enable txcsum6 first.\n");
1496				rc = EAGAIN;
1497				goto fail;
1498			}
1499			ifp->if_capenable ^= IFCAP_TSO6;
1500		}
1501		if (mask & IFCAP_LRO) {
1502#if defined(INET) || defined(INET6)
1503			int i;
1504			struct sge_rxq *rxq;
1505
1506			ifp->if_capenable ^= IFCAP_LRO;
1507			for_each_rxq(vi, i, rxq) {
1508				if (ifp->if_capenable & IFCAP_LRO)
1509					rxq->iq.flags |= IQ_LRO_ENABLED;
1510				else
1511					rxq->iq.flags &= ~IQ_LRO_ENABLED;
1512			}
1513#endif
1514		}
1515#ifdef TCP_OFFLOAD
1516		if (mask & IFCAP_TOE) {
1517			int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE;
1518
1519			rc = toe_capability(vi, enable);
1520			if (rc != 0)
1521				goto fail;
1522
1523			ifp->if_capenable ^= mask;
1524		}
1525#endif
1526		if (mask & IFCAP_VLAN_HWTAGGING) {
1527			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1528			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1529				rc = update_mac_settings(ifp, XGMAC_VLANEX);
1530		}
1531		if (mask & IFCAP_VLAN_MTU) {
1532			ifp->if_capenable ^= IFCAP_VLAN_MTU;
1533
1534			/* Need to find out how to disable auto-mtu-inflation */
1535		}
1536		if (mask & IFCAP_VLAN_HWTSO)
1537			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1538		if (mask & IFCAP_VLAN_HWCSUM)
1539			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
1540
1541#ifdef VLAN_CAPABILITIES
1542		VLAN_CAPABILITIES(ifp);
1543#endif
1544fail:
1545		end_synchronized_op(sc, 0);
1546		break;
1547
1548	case SIOCSIFMEDIA:
1549	case SIOCGIFMEDIA:
1550		ifmedia_ioctl(ifp, ifr, &vi->media, cmd);
1551		break;
1552
1553	case SIOCGI2C: {
1554		struct ifi2creq i2c;
1555
1556		rc = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
1557		if (rc != 0)
1558			break;
1559		if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) {
1560			rc = EPERM;
1561			break;
1562		}
1563		if (i2c.len > sizeof(i2c.data)) {
1564			rc = EINVAL;
1565			break;
1566		}
1567		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4i2c");
1568		if (rc)
1569			return (rc);
1570		rc = -t4_i2c_rd(sc, sc->mbox, vi->pi->port_id, i2c.dev_addr,
1571		    i2c.offset, i2c.len, &i2c.data[0]);
1572		end_synchronized_op(sc, 0);
1573		if (rc == 0)
1574			rc = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
1575		break;
1576	}
1577
1578	default:
1579		rc = ether_ioctl(ifp, cmd, data);
1580	}
1581
1582	return (rc);
1583}
1584
1585static int
1586cxgbe_transmit(struct ifnet *ifp, struct mbuf *m)
1587{
1588	struct vi_info *vi = ifp->if_softc;
1589	struct port_info *pi = vi->pi;
1590	struct adapter *sc = pi->adapter;
1591	struct sge_txq *txq;
1592	void *items[1];
1593	int rc;
1594
1595	M_ASSERTPKTHDR(m);
1596	MPASS(m->m_nextpkt == NULL);	/* not quite ready for this yet */
1597
1598	if (__predict_false(pi->link_cfg.link_ok == 0)) {
1599		m_freem(m);
1600		return (ENETDOWN);
1601	}
1602
1603	rc = parse_pkt(&m);
1604	if (__predict_false(rc != 0)) {
1605		MPASS(m == NULL);			/* was freed already */
1606		atomic_add_int(&pi->tx_parse_error, 1);	/* rare, atomic is ok */
1607		return (rc);
1608	}
1609
1610	/* Select a txq. */
1611	txq = &sc->sge.txq[vi->first_txq];
1612	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1613		txq += ((m->m_pkthdr.flowid % (vi->ntxq - vi->rsrv_noflowq)) +
1614		    vi->rsrv_noflowq);
1615
1616	items[0] = m;
1617	rc = mp_ring_enqueue(txq->r, items, 1, 4096);
1618	if (__predict_false(rc != 0))
1619		m_freem(m);
1620
1621	return (rc);
1622}
1623
1624static void
1625cxgbe_qflush(struct ifnet *ifp)
1626{
1627	struct vi_info *vi = ifp->if_softc;
1628	struct sge_txq *txq;
1629	int i;
1630
1631	/* queues do not exist if !VI_INIT_DONE. */
1632	if (vi->flags & VI_INIT_DONE) {
1633		for_each_txq(vi, i, txq) {
1634			TXQ_LOCK(txq);
1635			txq->eq.flags &= ~EQ_ENABLED;
1636			TXQ_UNLOCK(txq);
1637			while (!mp_ring_is_idle(txq->r)) {
1638				mp_ring_check_drainage(txq->r, 0);
1639				pause("qflush", 1);
1640			}
1641		}
1642	}
1643	if_qflush(ifp);
1644}
1645
1646static int
1647cxgbe_media_change(struct ifnet *ifp)
1648{
1649	struct vi_info *vi = ifp->if_softc;
1650
1651	device_printf(vi->dev, "%s unimplemented.\n", __func__);
1652
1653	return (EOPNOTSUPP);
1654}
1655
1656static void
1657cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1658{
1659	struct vi_info *vi = ifp->if_softc;
1660	struct port_info *pi = vi->pi;
1661	struct ifmedia_entry *cur;
1662	int speed = pi->link_cfg.speed;
1663
1664	cur = vi->media.ifm_cur;
1665
1666	ifmr->ifm_status = IFM_AVALID;
1667	if (!pi->link_cfg.link_ok)
1668		return;
1669
1670	ifmr->ifm_status |= IFM_ACTIVE;
1671
1672	/* active and current will differ iff current media is autoselect. */
1673	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
1674		return;
1675
1676	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
1677	if (speed == 10000)
1678		ifmr->ifm_active |= IFM_10G_T;
1679	else if (speed == 1000)
1680		ifmr->ifm_active |= IFM_1000_T;
1681	else if (speed == 100)
1682		ifmr->ifm_active |= IFM_100_TX;
1683	else if (speed == 10)
1684		ifmr->ifm_active |= IFM_10_T;
1685	else
1686		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
1687			    speed));
1688}
1689
1690static int
1691vcxgbe_probe(device_t dev)
1692{
1693	char buf[128];
1694	struct vi_info *vi = device_get_softc(dev);
1695
1696	snprintf(buf, sizeof(buf), "port %d vi %td", vi->pi->port_id,
1697	    vi - vi->pi->vi);
1698	device_set_desc_copy(dev, buf);
1699
1700	return (BUS_PROBE_DEFAULT);
1701}
1702
1703static int
1704vcxgbe_attach(device_t dev)
1705{
1706	struct vi_info *vi;
1707	struct port_info *pi;
1708	struct adapter *sc;
1709	int func, index, rc;
1710	u32 param, val;
1711
1712	vi = device_get_softc(dev);
1713	pi = vi->pi;
1714	sc = pi->adapter;
1715
1716	index = vi - pi->vi;
1717	KASSERT(index < nitems(vi_mac_funcs),
1718	    ("%s: VI %s doesn't have a MAC func", __func__,
1719	    device_get_nameunit(dev)));
1720	func = vi_mac_funcs[index];
1721	rc = t4_alloc_vi_func(sc, sc->mbox, pi->tx_chan, sc->pf, 0, 1,
1722	    vi->hw_addr, &vi->rss_size, func, 0);
1723	if (rc < 0) {
1724		device_printf(dev, "Failed to allocate virtual interface "
1725		    "for port %d: %d\n", pi->port_id, -rc);
1726		return (-rc);
1727	}
1728	vi->viid = rc;
1729
1730	param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
1731	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_RSSINFO) |
1732	    V_FW_PARAMS_PARAM_YZ(vi->viid);
1733	rc = t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
1734	if (rc)
1735		vi->rss_base = 0xffff;
1736	else {
1737		/* MPASS((val >> 16) == rss_size); */
1738		vi->rss_base = val & 0xffff;
1739	}
1740
1741	rc = cxgbe_vi_attach(dev, vi);
1742	if (rc) {
1743		t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
1744		return (rc);
1745	}
1746	return (0);
1747}
1748
1749static int
1750vcxgbe_detach(device_t dev)
1751{
1752	struct vi_info *vi;
1753	struct adapter *sc;
1754
1755	vi = device_get_softc(dev);
1756	sc = vi->pi->adapter;
1757
1758	doom_vi(sc, vi);
1759
1760	cxgbe_vi_detach(vi);
1761	t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
1762
1763	end_synchronized_op(sc, 0);
1764
1765	return (0);
1766}
1767
1768void
1769t4_fatal_err(struct adapter *sc)
1770{
1771	t4_set_reg_field(sc, A_SGE_CONTROL, F_GLOBALENABLE, 0);
1772	t4_intr_disable(sc);
1773	log(LOG_EMERG, "%s: encountered fatal error, adapter stopped.\n",
1774	    device_get_nameunit(sc->dev));
1775}
1776
1777static int
1778map_bars_0_and_4(struct adapter *sc)
1779{
1780	sc->regs_rid = PCIR_BAR(0);
1781	sc->regs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
1782	    &sc->regs_rid, RF_ACTIVE);
1783	if (sc->regs_res == NULL) {
1784		device_printf(sc->dev, "cannot map registers.\n");
1785		return (ENXIO);
1786	}
1787	sc->bt = rman_get_bustag(sc->regs_res);
1788	sc->bh = rman_get_bushandle(sc->regs_res);
1789	sc->mmio_len = rman_get_size(sc->regs_res);
1790	setbit(&sc->doorbells, DOORBELL_KDB);
1791
1792	sc->msix_rid = PCIR_BAR(4);
1793	sc->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
1794	    &sc->msix_rid, RF_ACTIVE);
1795	if (sc->msix_res == NULL) {
1796		device_printf(sc->dev, "cannot map MSI-X BAR.\n");
1797		return (ENXIO);
1798	}
1799
1800	return (0);
1801}
1802
1803static int
1804map_bar_2(struct adapter *sc)
1805{
1806
1807	/*
1808	 * T4: only iWARP driver uses the userspace doorbells.  There is no need
1809	 * to map it if RDMA is disabled.
1810	 */
1811	if (is_t4(sc) && sc->rdmacaps == 0)
1812		return (0);
1813
1814	sc->udbs_rid = PCIR_BAR(2);
1815	sc->udbs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
1816	    &sc->udbs_rid, RF_ACTIVE);
1817	if (sc->udbs_res == NULL) {
1818		device_printf(sc->dev, "cannot map doorbell BAR.\n");
1819		return (ENXIO);
1820	}
1821	sc->udbs_base = rman_get_virtual(sc->udbs_res);
1822
1823	if (is_t5(sc)) {
1824		setbit(&sc->doorbells, DOORBELL_UDB);
1825#if defined(__i386__) || defined(__amd64__)
1826		if (t5_write_combine) {
1827			int rc;
1828
1829			/*
1830			 * Enable write combining on BAR2.  This is the
1831			 * userspace doorbell BAR and is split into 128B
1832			 * (UDBS_SEG_SIZE) doorbell regions, each associated
1833			 * with an egress queue.  The first 64B has the doorbell
1834			 * and the second 64B can be used to submit a tx work
1835			 * request with an implicit doorbell.
1836			 */
1837
1838			rc = pmap_change_attr((vm_offset_t)sc->udbs_base,
1839			    rman_get_size(sc->udbs_res), PAT_WRITE_COMBINING);
1840			if (rc == 0) {
1841				clrbit(&sc->doorbells, DOORBELL_UDB);
1842				setbit(&sc->doorbells, DOORBELL_WCWR);
1843				setbit(&sc->doorbells, DOORBELL_UDBWC);
1844			} else {
1845				device_printf(sc->dev,
1846				    "couldn't enable write combining: %d\n",
1847				    rc);
1848			}
1849
1850			t4_write_reg(sc, A_SGE_STAT_CFG,
1851			    V_STATSOURCE_T5(7) | V_STATMODE(0));
1852		}
1853#endif
1854	}
1855
1856	return (0);
1857}
1858
1859static const struct memwin t4_memwin[] = {
1860	{ MEMWIN0_BASE, MEMWIN0_APERTURE },
1861	{ MEMWIN1_BASE, MEMWIN1_APERTURE },
1862	{ MEMWIN2_BASE_T4, MEMWIN2_APERTURE_T4 }
1863};
1864
1865static const struct memwin t5_memwin[] = {
1866	{ MEMWIN0_BASE, MEMWIN0_APERTURE },
1867	{ MEMWIN1_BASE, MEMWIN1_APERTURE },
1868	{ MEMWIN2_BASE_T5, MEMWIN2_APERTURE_T5 },
1869};
1870
1871static void
1872setup_memwin(struct adapter *sc)
1873{
1874	const struct memwin *mw;
1875	int i, n;
1876	uint32_t bar0;
1877
1878	if (is_t4(sc)) {
1879		/*
1880		 * Read low 32b of bar0 indirectly via the hardware backdoor
1881		 * mechanism.  Works from within PCI passthrough environments
1882		 * too, where rman_get_start() can return a different value.  We
1883		 * need to program the T4 memory window decoders with the actual
1884		 * addresses that will be coming across the PCIe link.
1885		 */
1886		bar0 = t4_hw_pci_read_cfg4(sc, PCIR_BAR(0));
1887		bar0 &= (uint32_t) PCIM_BAR_MEM_BASE;
1888
1889		mw = &t4_memwin[0];
1890		n = nitems(t4_memwin);
1891	} else {
1892		/* T5 uses the relative offset inside the PCIe BAR */
1893		bar0 = 0;
1894
1895		mw = &t5_memwin[0];
1896		n = nitems(t5_memwin);
1897	}
1898
1899	for (i = 0; i < n; i++, mw++) {
1900		t4_write_reg(sc,
1901		    PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, i),
1902		    (mw->base + bar0) | V_BIR(0) |
1903		    V_WINDOW(ilog2(mw->aperture) - 10));
1904	}
1905
1906	/* flush */
1907	t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 2));
1908}
1909
1910/*
1911 * Verify that the memory range specified by the addr/len pair is valid and lies
1912 * entirely within a single region (EDCx or MCx).
1913 */
1914static int
1915validate_mem_range(struct adapter *sc, uint32_t addr, int len)
1916{
1917	uint32_t em, addr_len, maddr, mlen;
1918
1919	/* Memory can only be accessed in naturally aligned 4 byte units */
1920	if (addr & 3 || len & 3 || len == 0)
1921		return (EINVAL);
1922
1923	/* Enabled memories */
1924	em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
1925	if (em & F_EDRAM0_ENABLE) {
1926		addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR);
1927		maddr = G_EDRAM0_BASE(addr_len) << 20;
1928		mlen = G_EDRAM0_SIZE(addr_len) << 20;
1929		if (mlen > 0 && addr >= maddr && addr < maddr + mlen &&
1930		    addr + len <= maddr + mlen)
1931			return (0);
1932	}
1933	if (em & F_EDRAM1_ENABLE) {
1934		addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR);
1935		maddr = G_EDRAM1_BASE(addr_len) << 20;
1936		mlen = G_EDRAM1_SIZE(addr_len) << 20;
1937		if (mlen > 0 && addr >= maddr && addr < maddr + mlen &&
1938		    addr + len <= maddr + mlen)
1939			return (0);
1940	}
1941	if (em & F_EXT_MEM_ENABLE) {
1942		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
1943		maddr = G_EXT_MEM_BASE(addr_len) << 20;
1944		mlen = G_EXT_MEM_SIZE(addr_len) << 20;
1945		if (mlen > 0 && addr >= maddr && addr < maddr + mlen &&
1946		    addr + len <= maddr + mlen)
1947			return (0);
1948	}
1949	if (!is_t4(sc) && em & F_EXT_MEM1_ENABLE) {
1950		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
1951		maddr = G_EXT_MEM1_BASE(addr_len) << 20;
1952		mlen = G_EXT_MEM1_SIZE(addr_len) << 20;
1953		if (mlen > 0 && addr >= maddr && addr < maddr + mlen &&
1954		    addr + len <= maddr + mlen)
1955			return (0);
1956	}
1957
1958	return (EFAULT);
1959}
1960
1961static int
1962fwmtype_to_hwmtype(int mtype)
1963{
1964
1965	switch (mtype) {
1966	case FW_MEMTYPE_EDC0:
1967		return (MEM_EDC0);
1968	case FW_MEMTYPE_EDC1:
1969		return (MEM_EDC1);
1970	case FW_MEMTYPE_EXTMEM:
1971		return (MEM_MC0);
1972	case FW_MEMTYPE_EXTMEM1:
1973		return (MEM_MC1);
1974	default:
1975		panic("%s: cannot translate fw mtype %d.", __func__, mtype);
1976	}
1977}
1978
1979/*
1980 * Verify that the memory range specified by the memtype/offset/len pair is
1981 * valid and lies entirely within the memtype specified.  The global address of
1982 * the start of the range is returned in addr.
1983 */
1984static int
1985validate_mt_off_len(struct adapter *sc, int mtype, uint32_t off, int len,
1986    uint32_t *addr)
1987{
1988	uint32_t em, addr_len, maddr, mlen;
1989
1990	/* Memory can only be accessed in naturally aligned 4 byte units */
1991	if (off & 3 || len & 3 || len == 0)
1992		return (EINVAL);
1993
1994	em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
1995	switch (fwmtype_to_hwmtype(mtype)) {
1996	case MEM_EDC0:
1997		if (!(em & F_EDRAM0_ENABLE))
1998			return (EINVAL);
1999		addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR);
2000		maddr = G_EDRAM0_BASE(addr_len) << 20;
2001		mlen = G_EDRAM0_SIZE(addr_len) << 20;
2002		break;
2003	case MEM_EDC1:
2004		if (!(em & F_EDRAM1_ENABLE))
2005			return (EINVAL);
2006		addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR);
2007		maddr = G_EDRAM1_BASE(addr_len) << 20;
2008		mlen = G_EDRAM1_SIZE(addr_len) << 20;
2009		break;
2010	case MEM_MC:
2011		if (!(em & F_EXT_MEM_ENABLE))
2012			return (EINVAL);
2013		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
2014		maddr = G_EXT_MEM_BASE(addr_len) << 20;
2015		mlen = G_EXT_MEM_SIZE(addr_len) << 20;
2016		break;
2017	case MEM_MC1:
2018		if (is_t4(sc) || !(em & F_EXT_MEM1_ENABLE))
2019			return (EINVAL);
2020		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
2021		maddr = G_EXT_MEM1_BASE(addr_len) << 20;
2022		mlen = G_EXT_MEM1_SIZE(addr_len) << 20;
2023		break;
2024	default:
2025		return (EINVAL);
2026	}
2027
2028	if (mlen > 0 && off < mlen && off + len <= mlen) {
2029		*addr = maddr + off;	/* global address */
2030		return (0);
2031	}
2032
2033	return (EFAULT);
2034}
2035
2036static void
2037memwin_info(struct adapter *sc, int win, uint32_t *base, uint32_t *aperture)
2038{
2039	const struct memwin *mw;
2040
2041	if (is_t4(sc)) {
2042		KASSERT(win >= 0 && win < nitems(t4_memwin),
2043		    ("%s: incorrect memwin# (%d)", __func__, win));
2044		mw = &t4_memwin[win];
2045	} else {
2046		KASSERT(win >= 0 && win < nitems(t5_memwin),
2047		    ("%s: incorrect memwin# (%d)", __func__, win));
2048		mw = &t5_memwin[win];
2049	}
2050
2051	if (base != NULL)
2052		*base = mw->base;
2053	if (aperture != NULL)
2054		*aperture = mw->aperture;
2055}
2056
2057/*
2058 * Positions the memory window such that it can be used to access the specified
2059 * address in the chip's address space.  The return value is the offset of addr
2060 * from the start of the window.
2061 */
2062static uint32_t
2063position_memwin(struct adapter *sc, int n, uint32_t addr)
2064{
2065	uint32_t start, pf;
2066	uint32_t reg;
2067
2068	KASSERT(n >= 0 && n <= 3,
2069	    ("%s: invalid window %d.", __func__, n));
2070	KASSERT((addr & 3) == 0,
2071	    ("%s: addr (0x%x) is not at a 4B boundary.", __func__, addr));
2072
2073	if (is_t4(sc)) {
2074		pf = 0;
2075		start = addr & ~0xf;	/* start must be 16B aligned */
2076	} else {
2077		pf = V_PFNUM(sc->pf);
2078		start = addr & ~0x7f;	/* start must be 128B aligned */
2079	}
2080	reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, n);
2081
2082	t4_write_reg(sc, reg, start | pf);
2083	t4_read_reg(sc, reg);
2084
2085	return (addr - start);
2086}
2087
2088static int
2089cfg_itype_and_nqueues(struct adapter *sc, int n10g, int n1g, int num_vis,
2090    struct intrs_and_queues *iaq)
2091{
2092	int rc, itype, navail, nrxq10g, nrxq1g, n;
2093	int nofldrxq10g = 0, nofldrxq1g = 0;
2094
2095	bzero(iaq, sizeof(*iaq));
2096
2097	iaq->ntxq10g = t4_ntxq10g;
2098	iaq->ntxq1g = t4_ntxq1g;
2099	iaq->ntxq_vi = t4_ntxq_vi;
2100	iaq->nrxq10g = nrxq10g = t4_nrxq10g;
2101	iaq->nrxq1g = nrxq1g = t4_nrxq1g;
2102	iaq->nrxq_vi = t4_nrxq_vi;
2103	iaq->rsrv_noflowq = t4_rsrv_noflowq;
2104#ifdef TCP_OFFLOAD
2105	if (is_offload(sc)) {
2106		iaq->nofldtxq10g = t4_nofldtxq10g;
2107		iaq->nofldtxq1g = t4_nofldtxq1g;
2108		iaq->nofldtxq_vi = t4_nofldtxq_vi;
2109		iaq->nofldrxq10g = nofldrxq10g = t4_nofldrxq10g;
2110		iaq->nofldrxq1g = nofldrxq1g = t4_nofldrxq1g;
2111		iaq->nofldrxq_vi = t4_nofldrxq_vi;
2112	}
2113#endif
2114#ifdef DEV_NETMAP
2115	iaq->nnmtxq_vi = t4_nnmtxq_vi;
2116	iaq->nnmrxq_vi = t4_nnmrxq_vi;
2117#endif
2118
2119	for (itype = INTR_MSIX; itype; itype >>= 1) {
2120
2121		if ((itype & t4_intr_types) == 0)
2122			continue;	/* not allowed */
2123
2124		if (itype == INTR_MSIX)
2125			navail = pci_msix_count(sc->dev);
2126		else if (itype == INTR_MSI)
2127			navail = pci_msi_count(sc->dev);
2128		else
2129			navail = 1;
2130restart:
2131		if (navail == 0)
2132			continue;
2133
2134		iaq->intr_type = itype;
2135		iaq->intr_flags_10g = 0;
2136		iaq->intr_flags_1g = 0;
2137
2138		/*
2139		 * Best option: an interrupt vector for errors, one for the
2140		 * firmware event queue, and one for every rxq (NIC and TOE) of
2141		 * every VI.  The VIs that support netmap use the same
2142		 * interrupts for the NIC rx queues and the netmap rx queues
2143		 * because only one set of queues is active at a time.
2144		 */
2145		iaq->nirq = T4_EXTRA_INTR;
2146		iaq->nirq += n10g * (nrxq10g + nofldrxq10g);
2147		iaq->nirq += n1g * (nrxq1g + nofldrxq1g);
2148		iaq->nirq += (n10g + n1g) * (num_vis - 1) *
2149		    max(iaq->nrxq_vi, iaq->nnmrxq_vi);	/* See comment above. */
2150		iaq->nirq += (n10g + n1g) * (num_vis - 1) * iaq->nofldrxq_vi;
2151		if (iaq->nirq <= navail &&
2152		    (itype != INTR_MSI || powerof2(iaq->nirq))) {
2153			iaq->intr_flags_10g = INTR_ALL;
2154			iaq->intr_flags_1g = INTR_ALL;
2155			goto allocate;
2156		}
2157
2158		/* Disable the VIs (and netmap) if there aren't enough intrs */
2159		if (num_vis > 1) {
2160			device_printf(sc->dev, "virtual interfaces disabled "
2161			    "because num_vis=%u with current settings "
2162			    "(nrxq10g=%u, nrxq1g=%u, nofldrxq10g=%u, "
2163			    "nofldrxq1g=%u, nrxq_vi=%u nofldrxq_vi=%u, "
2164			    "nnmrxq_vi=%u) would need %u interrupts but "
2165			    "only %u are available.\n", num_vis, nrxq10g,
2166			    nrxq1g, nofldrxq10g, nofldrxq1g, iaq->nrxq_vi,
2167			    iaq->nofldrxq_vi, iaq->nnmrxq_vi, iaq->nirq,
2168			    navail);
2169			num_vis = 1;
2170			iaq->ntxq_vi = iaq->nrxq_vi = 0;
2171			iaq->nofldtxq_vi = iaq->nofldrxq_vi = 0;
2172			iaq->nnmtxq_vi = iaq->nnmrxq_vi = 0;
2173			goto restart;
2174		}
2175
2176		/*
2177		 * Second best option: a vector for errors, one for the firmware
2178		 * event queue, and vectors for either all the NIC rx queues or
2179		 * all the TOE rx queues.  The queues that don't get vectors
2180		 * will forward their interrupts to those that do.
2181		 */
2182		iaq->nirq = T4_EXTRA_INTR;
2183		if (nrxq10g >= nofldrxq10g) {
2184			iaq->intr_flags_10g = INTR_RXQ;
2185			iaq->nirq += n10g * nrxq10g;
2186		} else {
2187			iaq->intr_flags_10g = INTR_OFLD_RXQ;
2188			iaq->nirq += n10g * nofldrxq10g;
2189		}
2190		if (nrxq1g >= nofldrxq1g) {
2191			iaq->intr_flags_1g = INTR_RXQ;
2192			iaq->nirq += n1g * nrxq1g;
2193		} else {
2194			iaq->intr_flags_1g = INTR_OFLD_RXQ;
2195			iaq->nirq += n1g * nofldrxq1g;
2196		}
2197		if (iaq->nirq <= navail &&
2198		    (itype != INTR_MSI || powerof2(iaq->nirq)))
2199			goto allocate;
2200
2201		/*
2202		 * Next best option: an interrupt vector for errors, one for the
2203		 * firmware event queue, and at least one per main-VI.  At this
2204		 * point we know we'll have to downsize nrxq and/or nofldrxq to
2205		 * fit what's available to us.
2206		 */
2207		iaq->nirq = T4_EXTRA_INTR;
2208		iaq->nirq += n10g + n1g;
2209		if (iaq->nirq <= navail) {
2210			int leftover = navail - iaq->nirq;
2211
2212			if (n10g > 0) {
2213				int target = max(nrxq10g, nofldrxq10g);
2214
2215				iaq->intr_flags_10g = nrxq10g >= nofldrxq10g ?
2216				    INTR_RXQ : INTR_OFLD_RXQ;
2217
2218				n = 1;
2219				while (n < target && leftover >= n10g) {
2220					leftover -= n10g;
2221					iaq->nirq += n10g;
2222					n++;
2223				}
2224				iaq->nrxq10g = min(n, nrxq10g);
2225#ifdef TCP_OFFLOAD
2226				iaq->nofldrxq10g = min(n, nofldrxq10g);
2227#endif
2228			}
2229
2230			if (n1g > 0) {
2231				int target = max(nrxq1g, nofldrxq1g);
2232
2233				iaq->intr_flags_1g = nrxq1g >= nofldrxq1g ?
2234				    INTR_RXQ : INTR_OFLD_RXQ;
2235
2236				n = 1;
2237				while (n < target && leftover >= n1g) {
2238					leftover -= n1g;
2239					iaq->nirq += n1g;
2240					n++;
2241				}
2242				iaq->nrxq1g = min(n, nrxq1g);
2243#ifdef TCP_OFFLOAD
2244				iaq->nofldrxq1g = min(n, nofldrxq1g);
2245#endif
2246			}
2247
2248			if (itype != INTR_MSI || powerof2(iaq->nirq))
2249				goto allocate;
2250		}
2251
2252		/*
2253		 * Least desirable option: one interrupt vector for everything.
2254		 */
2255		iaq->nirq = iaq->nrxq10g = iaq->nrxq1g = 1;
2256		iaq->intr_flags_10g = iaq->intr_flags_1g = 0;
2257#ifdef TCP_OFFLOAD
2258		if (is_offload(sc))
2259			iaq->nofldrxq10g = iaq->nofldrxq1g = 1;
2260#endif
2261allocate:
2262		navail = iaq->nirq;
2263		rc = 0;
2264		if (itype == INTR_MSIX)
2265			rc = pci_alloc_msix(sc->dev, &navail);
2266		else if (itype == INTR_MSI)
2267			rc = pci_alloc_msi(sc->dev, &navail);
2268
2269		if (rc == 0) {
2270			if (navail == iaq->nirq)
2271				return (0);
2272
2273			/*
2274			 * Didn't get the number requested.  Use whatever number
2275			 * the kernel is willing to allocate (it's in navail).
2276			 */
2277			device_printf(sc->dev, "fewer vectors than requested, "
2278			    "type=%d, req=%d, rcvd=%d; will downshift req.\n",
2279			    itype, iaq->nirq, navail);
2280			pci_release_msi(sc->dev);
2281			goto restart;
2282		}
2283
2284		device_printf(sc->dev,
2285		    "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n",
2286		    itype, rc, iaq->nirq, navail);
2287	}
2288
2289	device_printf(sc->dev,
2290	    "failed to find a usable interrupt type.  "
2291	    "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types,
2292	    pci_msix_count(sc->dev), pci_msi_count(sc->dev));
2293
2294	return (ENXIO);
2295}
2296
2297#define FW_VERSION(chip) ( \
2298    V_FW_HDR_FW_VER_MAJOR(chip##FW_VERSION_MAJOR) | \
2299    V_FW_HDR_FW_VER_MINOR(chip##FW_VERSION_MINOR) | \
2300    V_FW_HDR_FW_VER_MICRO(chip##FW_VERSION_MICRO) | \
2301    V_FW_HDR_FW_VER_BUILD(chip##FW_VERSION_BUILD))
2302#define FW_INTFVER(chip, intf) (chip##FW_HDR_INTFVER_##intf)
2303
2304struct fw_info {
2305	uint8_t chip;
2306	char *kld_name;
2307	char *fw_mod_name;
2308	struct fw_hdr fw_hdr;	/* XXX: waste of space, need a sparse struct */
2309} fw_info[] = {
2310	{
2311		.chip = CHELSIO_T4,
2312		.kld_name = "t4fw_cfg",
2313		.fw_mod_name = "t4fw",
2314		.fw_hdr = {
2315			.chip = FW_HDR_CHIP_T4,
2316			.fw_ver = htobe32_const(FW_VERSION(T4)),
2317			.intfver_nic = FW_INTFVER(T4, NIC),
2318			.intfver_vnic = FW_INTFVER(T4, VNIC),
2319			.intfver_ofld = FW_INTFVER(T4, OFLD),
2320			.intfver_ri = FW_INTFVER(T4, RI),
2321			.intfver_iscsipdu = FW_INTFVER(T4, ISCSIPDU),
2322			.intfver_iscsi = FW_INTFVER(T4, ISCSI),
2323			.intfver_fcoepdu = FW_INTFVER(T4, FCOEPDU),
2324			.intfver_fcoe = FW_INTFVER(T4, FCOE),
2325		},
2326	}, {
2327		.chip = CHELSIO_T5,
2328		.kld_name = "t5fw_cfg",
2329		.fw_mod_name = "t5fw",
2330		.fw_hdr = {
2331			.chip = FW_HDR_CHIP_T5,
2332			.fw_ver = htobe32_const(FW_VERSION(T5)),
2333			.intfver_nic = FW_INTFVER(T5, NIC),
2334			.intfver_vnic = FW_INTFVER(T5, VNIC),
2335			.intfver_ofld = FW_INTFVER(T5, OFLD),
2336			.intfver_ri = FW_INTFVER(T5, RI),
2337			.intfver_iscsipdu = FW_INTFVER(T5, ISCSIPDU),
2338			.intfver_iscsi = FW_INTFVER(T5, ISCSI),
2339			.intfver_fcoepdu = FW_INTFVER(T5, FCOEPDU),
2340			.intfver_fcoe = FW_INTFVER(T5, FCOE),
2341		},
2342	}
2343};
2344
2345static struct fw_info *
2346find_fw_info(int chip)
2347{
2348	int i;
2349
2350	for (i = 0; i < nitems(fw_info); i++) {
2351		if (fw_info[i].chip == chip)
2352			return (&fw_info[i]);
2353	}
2354	return (NULL);
2355}
2356
2357/*
2358 * Is the given firmware API compatible with the one the driver was compiled
2359 * with?
2360 */
2361static int
2362fw_compatible(const struct fw_hdr *hdr1, const struct fw_hdr *hdr2)
2363{
2364
2365	/* short circuit if it's the exact same firmware version */
2366	if (hdr1->chip == hdr2->chip && hdr1->fw_ver == hdr2->fw_ver)
2367		return (1);
2368
2369	/*
2370	 * XXX: Is this too conservative?  Perhaps I should limit this to the
2371	 * features that are supported in the driver.
2372	 */
2373#define SAME_INTF(x) (hdr1->intfver_##x == hdr2->intfver_##x)
2374	if (hdr1->chip == hdr2->chip && SAME_INTF(nic) && SAME_INTF(vnic) &&
2375	    SAME_INTF(ofld) && SAME_INTF(ri) && SAME_INTF(iscsipdu) &&
2376	    SAME_INTF(iscsi) && SAME_INTF(fcoepdu) && SAME_INTF(fcoe))
2377		return (1);
2378#undef SAME_INTF
2379
2380	return (0);
2381}
2382
2383/*
2384 * The firmware in the KLD is usable, but should it be installed?  This routine
2385 * explains itself in detail if it indicates the KLD firmware should be
2386 * installed.
2387 */
2388static int
2389should_install_kld_fw(struct adapter *sc, int card_fw_usable, int k, int c)
2390{
2391	const char *reason;
2392
2393	if (!card_fw_usable) {
2394		reason = "incompatible or unusable";
2395		goto install;
2396	}
2397
2398	if (k > c) {
2399		reason = "older than the version bundled with this driver";
2400		goto install;
2401	}
2402
2403	if (t4_fw_install == 2 && k != c) {
2404		reason = "different than the version bundled with this driver";
2405		goto install;
2406	}
2407
2408	return (0);
2409
2410install:
2411	if (t4_fw_install == 0) {
2412		device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
2413		    "but the driver is prohibited from installing a different "
2414		    "firmware on the card.\n",
2415		    G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
2416		    G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason);
2417
2418		return (0);
2419	}
2420
2421	device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
2422	    "installing firmware %u.%u.%u.%u on card.\n",
2423	    G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
2424	    G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason,
2425	    G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k),
2426	    G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k));
2427
2428	return (1);
2429}
2430/*
2431 * Establish contact with the firmware and determine if we are the master driver
2432 * or not, and whether we are responsible for chip initialization.
2433 */
2434static int
2435prep_firmware(struct adapter *sc)
2436{
2437	const struct firmware *fw = NULL, *default_cfg;
2438	int rc, pf, card_fw_usable, kld_fw_usable, need_fw_reset = 1;
2439	enum dev_state state;
2440	struct fw_info *fw_info;
2441	struct fw_hdr *card_fw;		/* fw on the card */
2442	const struct fw_hdr *kld_fw;	/* fw in the KLD */
2443	const struct fw_hdr *drv_fw;	/* fw header the driver was compiled
2444					   against */
2445
2446	/* Contact firmware. */
2447	rc = t4_fw_hello(sc, sc->mbox, sc->mbox, MASTER_MAY, &state);
2448	if (rc < 0 || state == DEV_STATE_ERR) {
2449		rc = -rc;
2450		device_printf(sc->dev,
2451		    "failed to connect to the firmware: %d, %d.\n", rc, state);
2452		return (rc);
2453	}
2454	pf = rc;
2455	if (pf == sc->mbox)
2456		sc->flags |= MASTER_PF;
2457	else if (state == DEV_STATE_UNINIT) {
2458		/*
2459		 * We didn't get to be the master so we definitely won't be
2460		 * configuring the chip.  It's a bug if someone else hasn't
2461		 * configured it already.
2462		 */
2463		device_printf(sc->dev, "couldn't be master(%d), "
2464		    "device not already initialized either(%d).\n", rc, state);
2465		return (EDOOFUS);
2466	}
2467
2468	/* This is the firmware whose headers the driver was compiled against */
2469	fw_info = find_fw_info(chip_id(sc));
2470	if (fw_info == NULL) {
2471		device_printf(sc->dev,
2472		    "unable to look up firmware information for chip %d.\n",
2473		    chip_id(sc));
2474		return (EINVAL);
2475	}
2476	drv_fw = &fw_info->fw_hdr;
2477
2478	/*
2479	 * The firmware KLD contains many modules.  The KLD name is also the
2480	 * name of the module that contains the default config file.
2481	 */
2482	default_cfg = firmware_get(fw_info->kld_name);
2483
2484	/* Read the header of the firmware on the card */
2485	card_fw = malloc(sizeof(*card_fw), M_CXGBE, M_ZERO | M_WAITOK);
2486	rc = -t4_read_flash(sc, FLASH_FW_START,
2487	    sizeof (*card_fw) / sizeof (uint32_t), (uint32_t *)card_fw, 1);
2488	if (rc == 0)
2489		card_fw_usable = fw_compatible(drv_fw, (const void*)card_fw);
2490	else {
2491		device_printf(sc->dev,
2492		    "Unable to read card's firmware header: %d\n", rc);
2493		card_fw_usable = 0;
2494	}
2495
2496	/* This is the firmware in the KLD */
2497	fw = firmware_get(fw_info->fw_mod_name);
2498	if (fw != NULL) {
2499		kld_fw = (const void *)fw->data;
2500		kld_fw_usable = fw_compatible(drv_fw, kld_fw);
2501	} else {
2502		kld_fw = NULL;
2503		kld_fw_usable = 0;
2504	}
2505
2506	if (card_fw_usable && card_fw->fw_ver == drv_fw->fw_ver &&
2507	    (!kld_fw_usable || kld_fw->fw_ver == drv_fw->fw_ver)) {
2508		/*
2509		 * Common case: the firmware on the card is an exact match and
2510		 * the KLD is an exact match too, or the KLD is
2511		 * absent/incompatible.  Note that t4_fw_install = 2 is ignored
2512		 * here -- use cxgbetool loadfw if you want to reinstall the
2513		 * same firmware as the one on the card.
2514		 */
2515	} else if (kld_fw_usable && state == DEV_STATE_UNINIT &&
2516	    should_install_kld_fw(sc, card_fw_usable, be32toh(kld_fw->fw_ver),
2517	    be32toh(card_fw->fw_ver))) {
2518
2519		rc = -t4_fw_upgrade(sc, sc->mbox, fw->data, fw->datasize, 0);
2520		if (rc != 0) {
2521			device_printf(sc->dev,
2522			    "failed to install firmware: %d\n", rc);
2523			goto done;
2524		}
2525
2526		/* Installed successfully, update the cached header too. */
2527		memcpy(card_fw, kld_fw, sizeof(*card_fw));
2528		card_fw_usable = 1;
2529		need_fw_reset = 0;	/* already reset as part of load_fw */
2530	}
2531
2532	if (!card_fw_usable) {
2533		uint32_t d, c, k;
2534
2535		d = ntohl(drv_fw->fw_ver);
2536		c = ntohl(card_fw->fw_ver);
2537		k = kld_fw ? ntohl(kld_fw->fw_ver) : 0;
2538
2539		device_printf(sc->dev, "Cannot find a usable firmware: "
2540		    "fw_install %d, chip state %d, "
2541		    "driver compiled with %d.%d.%d.%d, "
2542		    "card has %d.%d.%d.%d, KLD has %d.%d.%d.%d\n",
2543		    t4_fw_install, state,
2544		    G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d),
2545		    G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d),
2546		    G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
2547		    G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c),
2548		    G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k),
2549		    G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k));
2550		rc = EINVAL;
2551		goto done;
2552	}
2553
2554	/* We're using whatever's on the card and it's known to be good. */
2555	sc->params.fw_vers = ntohl(card_fw->fw_ver);
2556	snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u",
2557	    G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers),
2558	    G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers),
2559	    G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers),
2560	    G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers));
2561	t4_get_tp_version(sc, &sc->params.tp_vers);
2562
2563	/* Reset device */
2564	if (need_fw_reset &&
2565	    (rc = -t4_fw_reset(sc, sc->mbox, F_PIORSTMODE | F_PIORST)) != 0) {
2566		device_printf(sc->dev, "firmware reset failed: %d.\n", rc);
2567		if (rc != ETIMEDOUT && rc != EIO)
2568			t4_fw_bye(sc, sc->mbox);
2569		goto done;
2570	}
2571	sc->flags |= FW_OK;
2572
2573	rc = get_params__pre_init(sc);
2574	if (rc != 0)
2575		goto done; /* error message displayed already */
2576
2577	/* Partition adapter resources as specified in the config file. */
2578	if (state == DEV_STATE_UNINIT) {
2579
2580		KASSERT(sc->flags & MASTER_PF,
2581		    ("%s: trying to change chip settings when not master.",
2582		    __func__));
2583
2584		rc = partition_resources(sc, default_cfg, fw_info->kld_name);
2585		if (rc != 0)
2586			goto done;	/* error message displayed already */
2587
2588		t4_tweak_chip_settings(sc);
2589
2590		/* get basic stuff going */
2591		rc = -t4_fw_initialize(sc, sc->mbox);
2592		if (rc != 0) {
2593			device_printf(sc->dev, "fw init failed: %d.\n", rc);
2594			goto done;
2595		}
2596	} else {
2597		snprintf(sc->cfg_file, sizeof(sc->cfg_file), "pf%d", pf);
2598		sc->cfcsum = 0;
2599	}
2600
2601done:
2602	free(card_fw, M_CXGBE);
2603	if (fw != NULL)
2604		firmware_put(fw, FIRMWARE_UNLOAD);
2605	if (default_cfg != NULL)
2606		firmware_put(default_cfg, FIRMWARE_UNLOAD);
2607
2608	return (rc);
2609}
2610
2611#define FW_PARAM_DEV(param) \
2612	(V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
2613	 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
2614#define FW_PARAM_PFVF(param) \
2615	(V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
2616	 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param))
2617
2618/*
2619 * Partition chip resources for use between various PFs, VFs, etc.
2620 */
2621static int
2622partition_resources(struct adapter *sc, const struct firmware *default_cfg,
2623    const char *name_prefix)
2624{
2625	const struct firmware *cfg = NULL;
2626	int rc = 0;
2627	struct fw_caps_config_cmd caps;
2628	uint32_t mtype, moff, finicsum, cfcsum;
2629
2630	/*
2631	 * Figure out what configuration file to use.  Pick the default config
2632	 * file for the card if the user hasn't specified one explicitly.
2633	 */
2634	snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", t4_cfg_file);
2635	if (strncmp(t4_cfg_file, DEFAULT_CF, sizeof(t4_cfg_file)) == 0) {
2636		/* Card specific overrides go here. */
2637		if (pci_get_device(sc->dev) == 0x440a)
2638			snprintf(sc->cfg_file, sizeof(sc->cfg_file), UWIRE_CF);
2639		if (is_fpga(sc))
2640			snprintf(sc->cfg_file, sizeof(sc->cfg_file), FPGA_CF);
2641	}
2642
2643	/*
2644	 * We need to load another module if the profile is anything except
2645	 * "default" or "flash".
2646	 */
2647	if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) != 0 &&
2648	    strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) {
2649		char s[32];
2650
2651		snprintf(s, sizeof(s), "%s_%s", name_prefix, sc->cfg_file);
2652		cfg = firmware_get(s);
2653		if (cfg == NULL) {
2654			if (default_cfg != NULL) {
2655				device_printf(sc->dev,
2656				    "unable to load module \"%s\" for "
2657				    "configuration profile \"%s\", will use "
2658				    "the default config file instead.\n",
2659				    s, sc->cfg_file);
2660				snprintf(sc->cfg_file, sizeof(sc->cfg_file),
2661				    "%s", DEFAULT_CF);
2662			} else {
2663				device_printf(sc->dev,
2664				    "unable to load module \"%s\" for "
2665				    "configuration profile \"%s\", will use "
2666				    "the config file on the card's flash "
2667				    "instead.\n", s, sc->cfg_file);
2668				snprintf(sc->cfg_file, sizeof(sc->cfg_file),
2669				    "%s", FLASH_CF);
2670			}
2671		}
2672	}
2673
2674	if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) == 0 &&
2675	    default_cfg == NULL) {
2676		device_printf(sc->dev,
2677		    "default config file not available, will use the config "
2678		    "file on the card's flash instead.\n");
2679		snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", FLASH_CF);
2680	}
2681
2682	if (strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) {
2683		u_int cflen, i, n;
2684		const uint32_t *cfdata;
2685		uint32_t param, val, addr, off, mw_base, mw_aperture;
2686
2687		KASSERT(cfg != NULL || default_cfg != NULL,
2688		    ("%s: no config to upload", __func__));
2689
2690		/*
2691		 * Ask the firmware where it wants us to upload the config file.
2692		 */
2693		param = FW_PARAM_DEV(CF);
2694		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
2695		if (rc != 0) {
2696			/* No support for config file?  Shouldn't happen. */
2697			device_printf(sc->dev,
2698			    "failed to query config file location: %d.\n", rc);
2699			goto done;
2700		}
2701		mtype = G_FW_PARAMS_PARAM_Y(val);
2702		moff = G_FW_PARAMS_PARAM_Z(val) << 16;
2703
2704		/*
2705		 * XXX: sheer laziness.  We deliberately added 4 bytes of
2706		 * useless stuffing/comments at the end of the config file so
2707		 * it's ok to simply throw away the last remaining bytes when
2708		 * the config file is not an exact multiple of 4.  This also
2709		 * helps with the validate_mt_off_len check.
2710		 */
2711		if (cfg != NULL) {
2712			cflen = cfg->datasize & ~3;
2713			cfdata = cfg->data;
2714		} else {
2715			cflen = default_cfg->datasize & ~3;
2716			cfdata = default_cfg->data;
2717		}
2718
2719		if (cflen > FLASH_CFG_MAX_SIZE) {
2720			device_printf(sc->dev,
2721			    "config file too long (%d, max allowed is %d).  "
2722			    "Will try to use the config on the card, if any.\n",
2723			    cflen, FLASH_CFG_MAX_SIZE);
2724			goto use_config_on_flash;
2725		}
2726
2727		rc = validate_mt_off_len(sc, mtype, moff, cflen, &addr);
2728		if (rc != 0) {
2729			device_printf(sc->dev,
2730			    "%s: addr (%d/0x%x) or len %d is not valid: %d.  "
2731			    "Will try to use the config on the card, if any.\n",
2732			    __func__, mtype, moff, cflen, rc);
2733			goto use_config_on_flash;
2734		}
2735
2736		memwin_info(sc, 2, &mw_base, &mw_aperture);
2737		while (cflen) {
2738			off = position_memwin(sc, 2, addr);
2739			n = min(cflen, mw_aperture - off);
2740			for (i = 0; i < n; i += 4)
2741				t4_write_reg(sc, mw_base + off + i, *cfdata++);
2742			cflen -= n;
2743			addr += n;
2744		}
2745	} else {
2746use_config_on_flash:
2747		mtype = FW_MEMTYPE_FLASH;
2748		moff = t4_flash_cfg_addr(sc);
2749	}
2750
2751	bzero(&caps, sizeof(caps));
2752	caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
2753	    F_FW_CMD_REQUEST | F_FW_CMD_READ);
2754	caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID |
2755	    V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
2756	    V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(moff >> 16) | FW_LEN16(caps));
2757	rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
2758	if (rc != 0) {
2759		device_printf(sc->dev,
2760		    "failed to pre-process config file: %d "
2761		    "(mtype %d, moff 0x%x).\n", rc, mtype, moff);
2762		goto done;
2763	}
2764
2765	finicsum = be32toh(caps.finicsum);
2766	cfcsum = be32toh(caps.cfcsum);
2767	if (finicsum != cfcsum) {
2768		device_printf(sc->dev,
2769		    "WARNING: config file checksum mismatch: %08x %08x\n",
2770		    finicsum, cfcsum);
2771	}
2772	sc->cfcsum = cfcsum;
2773
2774#define LIMIT_CAPS(x) do { \
2775	caps.x &= htobe16(t4_##x##_allowed); \
2776} while (0)
2777
2778	/*
2779	 * Let the firmware know what features will (not) be used so it can tune
2780	 * things accordingly.
2781	 */
2782	LIMIT_CAPS(nbmcaps);
2783	LIMIT_CAPS(linkcaps);
2784	LIMIT_CAPS(switchcaps);
2785	LIMIT_CAPS(niccaps);
2786	LIMIT_CAPS(toecaps);
2787	LIMIT_CAPS(rdmacaps);
2788	LIMIT_CAPS(tlscaps);
2789	LIMIT_CAPS(iscsicaps);
2790	LIMIT_CAPS(fcoecaps);
2791#undef LIMIT_CAPS
2792
2793	caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
2794	    F_FW_CMD_REQUEST | F_FW_CMD_WRITE);
2795	caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
2796	rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), NULL);
2797	if (rc != 0) {
2798		device_printf(sc->dev,
2799		    "failed to process config file: %d.\n", rc);
2800	}
2801done:
2802	if (cfg != NULL)
2803		firmware_put(cfg, FIRMWARE_UNLOAD);
2804	return (rc);
2805}
2806
2807/*
2808 * Retrieve parameters that are needed (or nice to have) very early.
2809 */
2810static int
2811get_params__pre_init(struct adapter *sc)
2812{
2813	int rc;
2814	uint32_t param[2], val[2];
2815	struct fw_devlog_cmd cmd;
2816	struct devlog_params *dlog = &sc->params.devlog;
2817
2818	param[0] = FW_PARAM_DEV(PORTVEC);
2819	param[1] = FW_PARAM_DEV(CCLK);
2820	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
2821	if (rc != 0) {
2822		device_printf(sc->dev,
2823		    "failed to query parameters (pre_init): %d.\n", rc);
2824		return (rc);
2825	}
2826
2827	sc->params.portvec = val[0];
2828	sc->params.nports = bitcount32(val[0]);
2829	sc->params.vpd.cclk = val[1];
2830
2831	/* Read device log parameters. */
2832	bzero(&cmd, sizeof(cmd));
2833	cmd.op_to_write = htobe32(V_FW_CMD_OP(FW_DEVLOG_CMD) |
2834	    F_FW_CMD_REQUEST | F_FW_CMD_READ);
2835	cmd.retval_len16 = htobe32(FW_LEN16(cmd));
2836	rc = -t4_wr_mbox(sc, sc->mbox, &cmd, sizeof(cmd), &cmd);
2837	if (rc != 0) {
2838		device_printf(sc->dev,
2839		    "failed to get devlog parameters: %d.\n", rc);
2840		bzero(dlog, sizeof (*dlog));
2841		rc = 0;	/* devlog isn't critical for device operation */
2842	} else {
2843		val[0] = be32toh(cmd.memtype_devlog_memaddr16_devlog);
2844		dlog->memtype = G_FW_DEVLOG_CMD_MEMTYPE_DEVLOG(val[0]);
2845		dlog->start = G_FW_DEVLOG_CMD_MEMADDR16_DEVLOG(val[0]) << 4;
2846		dlog->size = be32toh(cmd.memsize_devlog);
2847	}
2848
2849	return (rc);
2850}
2851
2852/*
2853 * Retrieve various parameters that are of interest to the driver.  The device
2854 * has been initialized by the firmware at this point.
2855 */
2856static int
2857get_params__post_init(struct adapter *sc)
2858{
2859	int rc;
2860	uint32_t param[7], val[7];
2861	struct fw_caps_config_cmd caps;
2862
2863	param[0] = FW_PARAM_PFVF(IQFLINT_START);
2864	param[1] = FW_PARAM_PFVF(EQ_START);
2865	param[2] = FW_PARAM_PFVF(FILTER_START);
2866	param[3] = FW_PARAM_PFVF(FILTER_END);
2867	param[4] = FW_PARAM_PFVF(L2T_START);
2868	param[5] = FW_PARAM_PFVF(L2T_END);
2869	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
2870	if (rc != 0) {
2871		device_printf(sc->dev,
2872		    "failed to query parameters (post_init): %d.\n", rc);
2873		return (rc);
2874	}
2875
2876	sc->sge.iq_start = val[0];
2877	sc->sge.eq_start = val[1];
2878	sc->tids.ftid_base = val[2];
2879	sc->tids.nftids = val[3] - val[2] + 1;
2880	sc->params.ftid_min = val[2];
2881	sc->params.ftid_max = val[3];
2882	sc->vres.l2t.start = val[4];
2883	sc->vres.l2t.size = val[5] - val[4] + 1;
2884	KASSERT(sc->vres.l2t.size <= L2T_SIZE,
2885	    ("%s: L2 table size (%u) larger than expected (%u)",
2886	    __func__, sc->vres.l2t.size, L2T_SIZE));
2887
2888	/* get capabilites */
2889	bzero(&caps, sizeof(caps));
2890	caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
2891	    F_FW_CMD_REQUEST | F_FW_CMD_READ);
2892	caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
2893	rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
2894	if (rc != 0) {
2895		device_printf(sc->dev,
2896		    "failed to get card capabilities: %d.\n", rc);
2897		return (rc);
2898	}
2899
2900#define READ_CAPS(x) do { \
2901	sc->x = htobe16(caps.x); \
2902} while (0)
2903	READ_CAPS(nbmcaps);
2904	READ_CAPS(linkcaps);
2905	READ_CAPS(switchcaps);
2906	READ_CAPS(niccaps);
2907	READ_CAPS(toecaps);
2908	READ_CAPS(rdmacaps);
2909	READ_CAPS(tlscaps);
2910	READ_CAPS(iscsicaps);
2911	READ_CAPS(fcoecaps);
2912
2913	if (sc->niccaps & FW_CAPS_CONFIG_NIC_ETHOFLD) {
2914		param[0] = FW_PARAM_PFVF(ETHOFLD_START);
2915		param[1] = FW_PARAM_PFVF(ETHOFLD_END);
2916		param[2] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
2917		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 3, param, val);
2918		if (rc != 0) {
2919			device_printf(sc->dev,
2920			    "failed to query NIC parameters: %d.\n", rc);
2921			return (rc);
2922		}
2923		sc->tids.etid_base = val[0];
2924		sc->params.etid_min = val[0];
2925		sc->tids.netids = val[1] - val[0] + 1;
2926		sc->params.netids = sc->tids.netids;
2927		sc->params.eo_wr_cred = val[2];
2928		sc->params.ethoffload = 1;
2929	}
2930
2931	if (sc->toecaps) {
2932		/* query offload-related parameters */
2933		param[0] = FW_PARAM_DEV(NTID);
2934		param[1] = FW_PARAM_PFVF(SERVER_START);
2935		param[2] = FW_PARAM_PFVF(SERVER_END);
2936		param[3] = FW_PARAM_PFVF(TDDP_START);
2937		param[4] = FW_PARAM_PFVF(TDDP_END);
2938		param[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
2939		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
2940		if (rc != 0) {
2941			device_printf(sc->dev,
2942			    "failed to query TOE parameters: %d.\n", rc);
2943			return (rc);
2944		}
2945		sc->tids.ntids = val[0];
2946		sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS);
2947		sc->tids.stid_base = val[1];
2948		sc->tids.nstids = val[2] - val[1] + 1;
2949		sc->vres.ddp.start = val[3];
2950		sc->vres.ddp.size = val[4] - val[3] + 1;
2951		sc->params.ofldq_wr_cred = val[5];
2952		sc->params.offload = 1;
2953	}
2954	if (sc->rdmacaps) {
2955		param[0] = FW_PARAM_PFVF(STAG_START);
2956		param[1] = FW_PARAM_PFVF(STAG_END);
2957		param[2] = FW_PARAM_PFVF(RQ_START);
2958		param[3] = FW_PARAM_PFVF(RQ_END);
2959		param[4] = FW_PARAM_PFVF(PBL_START);
2960		param[5] = FW_PARAM_PFVF(PBL_END);
2961		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
2962		if (rc != 0) {
2963			device_printf(sc->dev,
2964			    "failed to query RDMA parameters(1): %d.\n", rc);
2965			return (rc);
2966		}
2967		sc->vres.stag.start = val[0];
2968		sc->vres.stag.size = val[1] - val[0] + 1;
2969		sc->vres.rq.start = val[2];
2970		sc->vres.rq.size = val[3] - val[2] + 1;
2971		sc->vres.pbl.start = val[4];
2972		sc->vres.pbl.size = val[5] - val[4] + 1;
2973
2974		param[0] = FW_PARAM_PFVF(SQRQ_START);
2975		param[1] = FW_PARAM_PFVF(SQRQ_END);
2976		param[2] = FW_PARAM_PFVF(CQ_START);
2977		param[3] = FW_PARAM_PFVF(CQ_END);
2978		param[4] = FW_PARAM_PFVF(OCQ_START);
2979		param[5] = FW_PARAM_PFVF(OCQ_END);
2980		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
2981		if (rc != 0) {
2982			device_printf(sc->dev,
2983			    "failed to query RDMA parameters(2): %d.\n", rc);
2984			return (rc);
2985		}
2986		sc->vres.qp.start = val[0];
2987		sc->vres.qp.size = val[1] - val[0] + 1;
2988		sc->vres.cq.start = val[2];
2989		sc->vres.cq.size = val[3] - val[2] + 1;
2990		sc->vres.ocq.start = val[4];
2991		sc->vres.ocq.size = val[5] - val[4] + 1;
2992	}
2993	if (sc->iscsicaps) {
2994		param[0] = FW_PARAM_PFVF(ISCSI_START);
2995		param[1] = FW_PARAM_PFVF(ISCSI_END);
2996		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
2997		if (rc != 0) {
2998			device_printf(sc->dev,
2999			    "failed to query iSCSI parameters: %d.\n", rc);
3000			return (rc);
3001		}
3002		sc->vres.iscsi.start = val[0];
3003		sc->vres.iscsi.size = val[1] - val[0] + 1;
3004	}
3005
3006	/*
3007	 * We've got the params we wanted to query via the firmware.  Now grab
3008	 * some others directly from the chip.
3009	 */
3010	rc = t4_read_chip_settings(sc);
3011
3012	return (rc);
3013}
3014
3015static int
3016set_params__post_init(struct adapter *sc)
3017{
3018	uint32_t param, val;
3019
3020	/* ask for encapsulated CPLs */
3021	param = FW_PARAM_PFVF(CPLFW4MSG_ENCAP);
3022	val = 1;
3023	(void)t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
3024
3025	return (0);
3026}
3027
3028#undef FW_PARAM_PFVF
3029#undef FW_PARAM_DEV
3030
3031static void
3032t4_set_desc(struct adapter *sc)
3033{
3034	char buf[128];
3035	struct adapter_params *p = &sc->params;
3036
3037	snprintf(buf, sizeof(buf), "Chelsio %s %sNIC (rev %d), S/N:%s, "
3038	    "P/N:%s, E/C:%s", p->vpd.id, is_offload(sc) ? "R" : "",
3039	    chip_rev(sc), p->vpd.sn, p->vpd.pn, p->vpd.ec);
3040
3041	device_set_desc_copy(sc->dev, buf);
3042}
3043
3044static void
3045build_medialist(struct port_info *pi, struct ifmedia *media)
3046{
3047	int m;
3048
3049	PORT_LOCK(pi);
3050
3051	ifmedia_removeall(media);
3052
3053	m = IFM_ETHER | IFM_FDX;
3054
3055	switch(pi->port_type) {
3056	case FW_PORT_TYPE_BT_XFI:
3057	case FW_PORT_TYPE_BT_XAUI:
3058		ifmedia_add(media, m | IFM_10G_T, 0, NULL);
3059		/* fall through */
3060
3061	case FW_PORT_TYPE_BT_SGMII:
3062		ifmedia_add(media, m | IFM_1000_T, 0, NULL);
3063		ifmedia_add(media, m | IFM_100_TX, 0, NULL);
3064		ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL);
3065		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
3066		break;
3067
3068	case FW_PORT_TYPE_CX4:
3069		ifmedia_add(media, m | IFM_10G_CX4, 0, NULL);
3070		ifmedia_set(media, m | IFM_10G_CX4);
3071		break;
3072
3073	case FW_PORT_TYPE_QSFP_10G:
3074	case FW_PORT_TYPE_SFP:
3075	case FW_PORT_TYPE_FIBER_XFI:
3076	case FW_PORT_TYPE_FIBER_XAUI:
3077		switch (pi->mod_type) {
3078
3079		case FW_PORT_MOD_TYPE_LR:
3080			ifmedia_add(media, m | IFM_10G_LR, 0, NULL);
3081			ifmedia_set(media, m | IFM_10G_LR);
3082			break;
3083
3084		case FW_PORT_MOD_TYPE_SR:
3085			ifmedia_add(media, m | IFM_10G_SR, 0, NULL);
3086			ifmedia_set(media, m | IFM_10G_SR);
3087			break;
3088
3089		case FW_PORT_MOD_TYPE_LRM:
3090			ifmedia_add(media, m | IFM_10G_LRM, 0, NULL);
3091			ifmedia_set(media, m | IFM_10G_LRM);
3092			break;
3093
3094		case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
3095		case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
3096			ifmedia_add(media, m | IFM_10G_TWINAX, 0, NULL);
3097			ifmedia_set(media, m | IFM_10G_TWINAX);
3098			break;
3099
3100		case FW_PORT_MOD_TYPE_NONE:
3101			m &= ~IFM_FDX;
3102			ifmedia_add(media, m | IFM_NONE, 0, NULL);
3103			ifmedia_set(media, m | IFM_NONE);
3104			break;
3105
3106		case FW_PORT_MOD_TYPE_NA:
3107		case FW_PORT_MOD_TYPE_ER:
3108		default:
3109			device_printf(pi->dev,
3110			    "unknown port_type (%d), mod_type (%d)\n",
3111			    pi->port_type, pi->mod_type);
3112			ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL);
3113			ifmedia_set(media, m | IFM_UNKNOWN);
3114			break;
3115		}
3116		break;
3117
3118	case FW_PORT_TYPE_QSFP:
3119		switch (pi->mod_type) {
3120
3121		case FW_PORT_MOD_TYPE_LR:
3122			ifmedia_add(media, m | IFM_40G_LR4, 0, NULL);
3123			ifmedia_set(media, m | IFM_40G_LR4);
3124			break;
3125
3126		case FW_PORT_MOD_TYPE_SR:
3127			ifmedia_add(media, m | IFM_40G_SR4, 0, NULL);
3128			ifmedia_set(media, m | IFM_40G_SR4);
3129			break;
3130
3131		case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
3132		case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
3133			ifmedia_add(media, m | IFM_40G_CR4, 0, NULL);
3134			ifmedia_set(media, m | IFM_40G_CR4);
3135			break;
3136
3137		case FW_PORT_MOD_TYPE_NONE:
3138			m &= ~IFM_FDX;
3139			ifmedia_add(media, m | IFM_NONE, 0, NULL);
3140			ifmedia_set(media, m | IFM_NONE);
3141			break;
3142
3143		default:
3144			device_printf(pi->dev,
3145			    "unknown port_type (%d), mod_type (%d)\n",
3146			    pi->port_type, pi->mod_type);
3147			ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL);
3148			ifmedia_set(media, m | IFM_UNKNOWN);
3149			break;
3150		}
3151		break;
3152
3153	default:
3154		device_printf(pi->dev,
3155		    "unknown port_type (%d), mod_type (%d)\n", pi->port_type,
3156		    pi->mod_type);
3157		ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL);
3158		ifmedia_set(media, m | IFM_UNKNOWN);
3159		break;
3160	}
3161
3162	PORT_UNLOCK(pi);
3163}
3164
3165#define FW_MAC_EXACT_CHUNK	7
3166
3167/*
3168 * Program the port's XGMAC based on parameters in ifnet.  The caller also
3169 * indicates which parameters should be programmed (the rest are left alone).
3170 */
3171int
3172update_mac_settings(struct ifnet *ifp, int flags)
3173{
3174	int rc = 0;
3175	struct vi_info *vi = ifp->if_softc;
3176	struct port_info *pi = vi->pi;
3177	struct adapter *sc = pi->adapter;
3178	int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1;
3179
3180	ASSERT_SYNCHRONIZED_OP(sc);
3181	KASSERT(flags, ("%s: not told what to update.", __func__));
3182
3183	if (flags & XGMAC_MTU)
3184		mtu = ifp->if_mtu;
3185
3186	if (flags & XGMAC_PROMISC)
3187		promisc = ifp->if_flags & IFF_PROMISC ? 1 : 0;
3188
3189	if (flags & XGMAC_ALLMULTI)
3190		allmulti = ifp->if_flags & IFF_ALLMULTI ? 1 : 0;
3191
3192	if (flags & XGMAC_VLANEX)
3193		vlanex = ifp->if_capenable & IFCAP_VLAN_HWTAGGING ? 1 : 0;
3194
3195	if (flags & (XGMAC_MTU|XGMAC_PROMISC|XGMAC_ALLMULTI|XGMAC_VLANEX)) {
3196		rc = -t4_set_rxmode(sc, sc->mbox, vi->viid, mtu, promisc,
3197		    allmulti, 1, vlanex, false);
3198		if (rc) {
3199			if_printf(ifp, "set_rxmode (%x) failed: %d\n", flags,
3200			    rc);
3201			return (rc);
3202		}
3203	}
3204
3205	if (flags & XGMAC_UCADDR) {
3206		uint8_t ucaddr[ETHER_ADDR_LEN];
3207
3208		bcopy(IF_LLADDR(ifp), ucaddr, sizeof(ucaddr));
3209		rc = t4_change_mac(sc, sc->mbox, vi->viid, vi->xact_addr_filt,
3210		    ucaddr, true, true);
3211		if (rc < 0) {
3212			rc = -rc;
3213			if_printf(ifp, "change_mac failed: %d\n", rc);
3214			return (rc);
3215		} else {
3216			vi->xact_addr_filt = rc;
3217			rc = 0;
3218		}
3219	}
3220
3221	if (flags & XGMAC_MCADDRS) {
3222		const uint8_t *mcaddr[FW_MAC_EXACT_CHUNK];
3223		int del = 1;
3224		uint64_t hash = 0;
3225		struct ifmultiaddr *ifma;
3226		int i = 0, j;
3227
3228		if_maddr_rlock(ifp);
3229		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3230			if (ifma->ifma_addr->sa_family != AF_LINK)
3231				continue;
3232			mcaddr[i] =
3233			    LLADDR((struct sockaddr_dl *)ifma->ifma_addr);
3234			MPASS(ETHER_IS_MULTICAST(mcaddr[i]));
3235			i++;
3236
3237			if (i == FW_MAC_EXACT_CHUNK) {
3238				rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid,
3239				    del, i, mcaddr, NULL, &hash, 0);
3240				if (rc < 0) {
3241					rc = -rc;
3242					for (j = 0; j < i; j++) {
3243						if_printf(ifp,
3244						    "failed to add mc address"
3245						    " %02x:%02x:%02x:"
3246						    "%02x:%02x:%02x rc=%d\n",
3247						    mcaddr[j][0], mcaddr[j][1],
3248						    mcaddr[j][2], mcaddr[j][3],
3249						    mcaddr[j][4], mcaddr[j][5],
3250						    rc);
3251					}
3252					goto mcfail;
3253				}
3254				del = 0;
3255				i = 0;
3256			}
3257		}
3258		if (i > 0) {
3259			rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, del, i,
3260			    mcaddr, NULL, &hash, 0);
3261			if (rc < 0) {
3262				rc = -rc;
3263				for (j = 0; j < i; j++) {
3264					if_printf(ifp,
3265					    "failed to add mc address"
3266					    " %02x:%02x:%02x:"
3267					    "%02x:%02x:%02x rc=%d\n",
3268					    mcaddr[j][0], mcaddr[j][1],
3269					    mcaddr[j][2], mcaddr[j][3],
3270					    mcaddr[j][4], mcaddr[j][5],
3271					    rc);
3272				}
3273				goto mcfail;
3274			}
3275		}
3276
3277		rc = -t4_set_addr_hash(sc, sc->mbox, vi->viid, 0, hash, 0);
3278		if (rc != 0)
3279			if_printf(ifp, "failed to set mc address hash: %d", rc);
3280mcfail:
3281		if_maddr_runlock(ifp);
3282	}
3283
3284	return (rc);
3285}
3286
3287/*
3288 * {begin|end}_synchronized_op must be called from the same thread.
3289 */
3290int
3291begin_synchronized_op(struct adapter *sc, struct vi_info *vi, int flags,
3292    char *wmesg)
3293{
3294	int rc, pri;
3295
3296#ifdef WITNESS
3297	/* the caller thinks it's ok to sleep, but is it really? */
3298	if (flags & SLEEP_OK)
3299		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
3300		    "begin_synchronized_op");
3301#endif
3302
3303	if (INTR_OK)
3304		pri = PCATCH;
3305	else
3306		pri = 0;
3307
3308	ADAPTER_LOCK(sc);
3309	for (;;) {
3310
3311		if (vi && IS_DOOMED(vi)) {
3312			rc = ENXIO;
3313			goto done;
3314		}
3315
3316		if (!IS_BUSY(sc)) {
3317			rc = 0;
3318			break;
3319		}
3320
3321		if (!(flags & SLEEP_OK)) {
3322			rc = EBUSY;
3323			goto done;
3324		}
3325
3326		if (mtx_sleep(&sc->flags, &sc->sc_lock, pri, wmesg, 0)) {
3327			rc = EINTR;
3328			goto done;
3329		}
3330	}
3331
3332	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
3333	SET_BUSY(sc);
3334#ifdef INVARIANTS
3335	sc->last_op = wmesg;
3336	sc->last_op_thr = curthread;
3337	sc->last_op_flags = flags;
3338#endif
3339
3340done:
3341	if (!(flags & HOLD_LOCK) || rc)
3342		ADAPTER_UNLOCK(sc);
3343
3344	return (rc);
3345}
3346
3347/*
3348 * Tell if_ioctl and if_init that the VI is going away.  This is
3349 * special variant of begin_synchronized_op and must be paired with a
3350 * call to end_synchronized_op.
3351 */
3352void
3353doom_vi(struct adapter *sc, struct vi_info *vi)
3354{
3355
3356	ADAPTER_LOCK(sc);
3357	SET_DOOMED(vi);
3358	wakeup(&sc->flags);
3359	while (IS_BUSY(sc))
3360		mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0);
3361	SET_BUSY(sc);
3362#ifdef INVARIANTS
3363	sc->last_op = "t4detach";
3364	sc->last_op_thr = curthread;
3365	sc->last_op_flags = 0;
3366#endif
3367	ADAPTER_UNLOCK(sc);
3368}
3369
3370/*
3371 * {begin|end}_synchronized_op must be called from the same thread.
3372 */
3373void
3374end_synchronized_op(struct adapter *sc, int flags)
3375{
3376
3377	if (flags & LOCK_HELD)
3378		ADAPTER_LOCK_ASSERT_OWNED(sc);
3379	else
3380		ADAPTER_LOCK(sc);
3381
3382	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
3383	CLR_BUSY(sc);
3384	wakeup(&sc->flags);
3385	ADAPTER_UNLOCK(sc);
3386}
3387
3388static int
3389cxgbe_init_synchronized(struct vi_info *vi)
3390{
3391	struct port_info *pi = vi->pi;
3392	struct adapter *sc = pi->adapter;
3393	struct ifnet *ifp = vi->ifp;
3394	int rc = 0, i;
3395	struct sge_txq *txq;
3396
3397	ASSERT_SYNCHRONIZED_OP(sc);
3398
3399	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3400		return (0);	/* already running */
3401
3402	if (!(sc->flags & FULL_INIT_DONE) &&
3403	    ((rc = adapter_full_init(sc)) != 0))
3404		return (rc);	/* error message displayed already */
3405
3406	if (!(vi->flags & VI_INIT_DONE) &&
3407	    ((rc = vi_full_init(vi)) != 0))
3408		return (rc); /* error message displayed already */
3409
3410	rc = update_mac_settings(ifp, XGMAC_ALL);
3411	if (rc)
3412		goto done;	/* error message displayed already */
3413
3414	rc = -t4_enable_vi(sc, sc->mbox, vi->viid, true, true);
3415	if (rc != 0) {
3416		if_printf(ifp, "enable_vi failed: %d\n", rc);
3417		goto done;
3418	}
3419
3420	/*
3421	 * Can't fail from this point onwards.  Review cxgbe_uninit_synchronized
3422	 * if this changes.
3423	 */
3424
3425	for_each_txq(vi, i, txq) {
3426		TXQ_LOCK(txq);
3427		txq->eq.flags |= EQ_ENABLED;
3428		TXQ_UNLOCK(txq);
3429	}
3430
3431	/*
3432	 * The first iq of the first port to come up is used for tracing.
3433	 */
3434	if (sc->traceq < 0 && IS_MAIN_VI(vi)) {
3435		sc->traceq = sc->sge.rxq[vi->first_rxq].iq.abs_id;
3436		t4_write_reg(sc, is_t4(sc) ?  A_MPS_TRC_RSS_CONTROL :
3437		    A_MPS_T5_TRC_RSS_CONTROL, V_RSSCONTROL(pi->tx_chan) |
3438		    V_QUEUENUMBER(sc->traceq));
3439		pi->flags |= HAS_TRACEQ;
3440	}
3441
3442	/* all ok */
3443	PORT_LOCK(pi);
3444	ifp->if_drv_flags |= IFF_DRV_RUNNING;
3445	pi->up_vis++;
3446
3447	if (pi->nvi > 1)
3448		callout_reset(&vi->tick, hz, vi_tick, vi);
3449	else
3450		callout_reset(&pi->tick, hz, cxgbe_tick, pi);
3451	PORT_UNLOCK(pi);
3452done:
3453	if (rc != 0)
3454		cxgbe_uninit_synchronized(vi);
3455
3456	return (rc);
3457}
3458
3459/*
3460 * Idempotent.
3461 */
3462static int
3463cxgbe_uninit_synchronized(struct vi_info *vi)
3464{
3465	struct port_info *pi = vi->pi;
3466	struct adapter *sc = pi->adapter;
3467	struct ifnet *ifp = vi->ifp;
3468	int rc, i;
3469	struct sge_txq *txq;
3470
3471	ASSERT_SYNCHRONIZED_OP(sc);
3472
3473	if (!(vi->flags & VI_INIT_DONE)) {
3474		KASSERT(!(ifp->if_drv_flags & IFF_DRV_RUNNING),
3475		    ("uninited VI is running"));
3476		return (0);
3477	}
3478
3479	/*
3480	 * Disable the VI so that all its data in either direction is discarded
3481	 * by the MPS.  Leave everything else (the queues, interrupts, and 1Hz
3482	 * tick) intact as the TP can deliver negative advice or data that it's
3483	 * holding in its RAM (for an offloaded connection) even after the VI is
3484	 * disabled.
3485	 */
3486	rc = -t4_enable_vi(sc, sc->mbox, vi->viid, false, false);
3487	if (rc) {
3488		if_printf(ifp, "disable_vi failed: %d\n", rc);
3489		return (rc);
3490	}
3491
3492	for_each_txq(vi, i, txq) {
3493		TXQ_LOCK(txq);
3494		txq->eq.flags &= ~EQ_ENABLED;
3495		TXQ_UNLOCK(txq);
3496	}
3497
3498	PORT_LOCK(pi);
3499	if (pi->nvi == 1)
3500		callout_stop(&pi->tick);
3501	else
3502		callout_stop(&vi->tick);
3503	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3504		PORT_UNLOCK(pi);
3505		return (0);
3506	}
3507	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3508	pi->up_vis--;
3509	if (pi->up_vis > 0) {
3510		PORT_UNLOCK(pi);
3511		return (0);
3512	}
3513	PORT_UNLOCK(pi);
3514
3515	pi->link_cfg.link_ok = 0;
3516	pi->link_cfg.speed = 0;
3517	pi->linkdnrc = -1;
3518	t4_os_link_changed(sc, pi->port_id, 0, -1);
3519
3520	return (0);
3521}
3522
3523/*
3524 * It is ok for this function to fail midway and return right away.  t4_detach
3525 * will walk the entire sc->irq list and clean up whatever is valid.
3526 */
3527static int
3528setup_intr_handlers(struct adapter *sc)
3529{
3530	int rc, rid, p, q, v;
3531	char s[8];
3532	struct irq *irq;
3533	struct port_info *pi;
3534	struct vi_info *vi;
3535	struct sge *sge = &sc->sge;
3536	struct sge_rxq *rxq;
3537#ifdef TCP_OFFLOAD
3538	struct sge_ofld_rxq *ofld_rxq;
3539#endif
3540#ifdef DEV_NETMAP
3541	struct sge_nm_rxq *nm_rxq;
3542#endif
3543
3544	/*
3545	 * Setup interrupts.
3546	 */
3547	irq = &sc->irq[0];
3548	rid = sc->intr_type == INTR_INTX ? 0 : 1;
3549	if (sc->intr_count == 1)
3550		return (t4_alloc_irq(sc, irq, rid, t4_intr_all, sc, "all"));
3551
3552	/* Multiple interrupts. */
3553	KASSERT(sc->intr_count >= T4_EXTRA_INTR + sc->params.nports,
3554	    ("%s: too few intr.", __func__));
3555
3556	/* The first one is always error intr */
3557	rc = t4_alloc_irq(sc, irq, rid, t4_intr_err, sc, "err");
3558	if (rc != 0)
3559		return (rc);
3560	irq++;
3561	rid++;
3562
3563	/* The second one is always the firmware event queue */
3564	rc = t4_alloc_irq(sc, irq, rid, t4_intr_evt, &sge->fwq, "evt");
3565	if (rc != 0)
3566		return (rc);
3567	irq++;
3568	rid++;
3569
3570	for_each_port(sc, p) {
3571		pi = sc->port[p];
3572		for_each_vi(pi, v, vi) {
3573			vi->first_intr = rid - 1;
3574
3575			if (vi->nnmrxq > 0) {
3576				int n = max(vi->nrxq, vi->nnmrxq);
3577
3578				MPASS(vi->flags & INTR_RXQ);
3579
3580				rxq = &sge->rxq[vi->first_rxq];
3581#ifdef DEV_NETMAP
3582				nm_rxq = &sge->nm_rxq[vi->first_nm_rxq];
3583#endif
3584				for (q = 0; q < n; q++) {
3585					snprintf(s, sizeof(s), "%x%c%x", p,
3586					    'a' + v, q);
3587					if (q < vi->nrxq)
3588						irq->rxq = rxq++;
3589#ifdef DEV_NETMAP
3590					if (q < vi->nnmrxq)
3591						irq->nm_rxq = nm_rxq++;
3592#endif
3593					rc = t4_alloc_irq(sc, irq, rid,
3594					    t4_vi_intr, irq, s);
3595					if (rc != 0)
3596						return (rc);
3597					irq++;
3598					rid++;
3599					vi->nintr++;
3600				}
3601			} else if (vi->flags & INTR_RXQ) {
3602				for_each_rxq(vi, q, rxq) {
3603					snprintf(s, sizeof(s), "%x%c%x", p,
3604					    'a' + v, q);
3605					rc = t4_alloc_irq(sc, irq, rid,
3606					    t4_intr, rxq, s);
3607					if (rc != 0)
3608						return (rc);
3609					irq++;
3610					rid++;
3611					vi->nintr++;
3612				}
3613			}
3614#ifdef TCP_OFFLOAD
3615			if (vi->flags & INTR_OFLD_RXQ) {
3616				for_each_ofld_rxq(vi, q, ofld_rxq) {
3617					snprintf(s, sizeof(s), "%x%c%x", p,
3618					    'A' + v, q);
3619					rc = t4_alloc_irq(sc, irq, rid,
3620					    t4_intr, ofld_rxq, s);
3621					if (rc != 0)
3622						return (rc);
3623					irq++;
3624					rid++;
3625					vi->nintr++;
3626				}
3627			}
3628#endif
3629		}
3630	}
3631	MPASS(irq == &sc->irq[sc->intr_count]);
3632
3633	return (0);
3634}
3635
3636int
3637adapter_full_init(struct adapter *sc)
3638{
3639	int rc, i;
3640
3641	ASSERT_SYNCHRONIZED_OP(sc);
3642	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
3643	KASSERT((sc->flags & FULL_INIT_DONE) == 0,
3644	    ("%s: FULL_INIT_DONE already", __func__));
3645
3646	/*
3647	 * queues that belong to the adapter (not any particular port).
3648	 */
3649	rc = t4_setup_adapter_queues(sc);
3650	if (rc != 0)
3651		goto done;
3652
3653	for (i = 0; i < nitems(sc->tq); i++) {
3654		sc->tq[i] = taskqueue_create("t4 taskq", M_NOWAIT,
3655		    taskqueue_thread_enqueue, &sc->tq[i]);
3656		if (sc->tq[i] == NULL) {
3657			device_printf(sc->dev,
3658			    "failed to allocate task queue %d\n", i);
3659			rc = ENOMEM;
3660			goto done;
3661		}
3662		taskqueue_start_threads(&sc->tq[i], 1, PI_NET, "%s tq%d",
3663		    device_get_nameunit(sc->dev), i);
3664	}
3665
3666	t4_intr_enable(sc);
3667	sc->flags |= FULL_INIT_DONE;
3668done:
3669	if (rc != 0)
3670		adapter_full_uninit(sc);
3671
3672	return (rc);
3673}
3674
3675int
3676adapter_full_uninit(struct adapter *sc)
3677{
3678	int i;
3679
3680	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
3681
3682	t4_teardown_adapter_queues(sc);
3683
3684	for (i = 0; i < nitems(sc->tq) && sc->tq[i]; i++) {
3685		taskqueue_free(sc->tq[i]);
3686		sc->tq[i] = NULL;
3687	}
3688
3689	sc->flags &= ~FULL_INIT_DONE;
3690
3691	return (0);
3692}
3693
3694#ifdef RSS
3695#define SUPPORTED_RSS_HASHTYPES (RSS_HASHTYPE_RSS_IPV4 | \
3696    RSS_HASHTYPE_RSS_TCP_IPV4 | RSS_HASHTYPE_RSS_IPV6 | \
3697    RSS_HASHTYPE_RSS_TCP_IPV6 | RSS_HASHTYPE_RSS_UDP_IPV4 | \
3698    RSS_HASHTYPE_RSS_UDP_IPV6)
3699
3700/* Translates kernel hash types to hardware. */
3701static int
3702hashconfig_to_hashen(int hashconfig)
3703{
3704	int hashen = 0;
3705
3706	if (hashconfig & RSS_HASHTYPE_RSS_IPV4)
3707		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN;
3708	if (hashconfig & RSS_HASHTYPE_RSS_IPV6)
3709		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN;
3710	if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV4) {
3711		hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN |
3712		    F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
3713	}
3714	if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV6) {
3715		hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN |
3716		    F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
3717	}
3718	if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV4)
3719		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
3720	if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV6)
3721		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
3722
3723	return (hashen);
3724}
3725
3726/* Translates hardware hash types to kernel. */
3727static int
3728hashen_to_hashconfig(int hashen)
3729{
3730	int hashconfig = 0;
3731
3732	if (hashen & F_FW_RSS_VI_CONFIG_CMD_UDPEN) {
3733		/*
3734		 * If UDP hashing was enabled it must have been enabled for
3735		 * either IPv4 or IPv6 (inclusive or).  Enabling UDP without
3736		 * enabling any 4-tuple hash is nonsense configuration.
3737		 */
3738		MPASS(hashen & (F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
3739		    F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN));
3740
3741		if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
3742			hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV4;
3743		if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
3744			hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV6;
3745	}
3746	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
3747		hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV4;
3748	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
3749		hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV6;
3750	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
3751		hashconfig |= RSS_HASHTYPE_RSS_IPV4;
3752	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
3753		hashconfig |= RSS_HASHTYPE_RSS_IPV6;
3754
3755	return (hashconfig);
3756}
3757#endif
3758
3759int
3760vi_full_init(struct vi_info *vi)
3761{
3762	struct adapter *sc = vi->pi->adapter;
3763	struct ifnet *ifp = vi->ifp;
3764	uint16_t *rss;
3765	struct sge_rxq *rxq;
3766	int rc, i, j, hashen;
3767#ifdef RSS
3768	int nbuckets = rss_getnumbuckets();
3769	int hashconfig = rss_gethashconfig();
3770	int extra;
3771	uint32_t raw_rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
3772	uint32_t rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
3773#endif
3774
3775	ASSERT_SYNCHRONIZED_OP(sc);
3776	KASSERT((vi->flags & VI_INIT_DONE) == 0,
3777	    ("%s: VI_INIT_DONE already", __func__));
3778
3779	sysctl_ctx_init(&vi->ctx);
3780	vi->flags |= VI_SYSCTL_CTX;
3781
3782	/*
3783	 * Allocate tx/rx/fl queues for this VI.
3784	 */
3785	rc = t4_setup_vi_queues(vi);
3786	if (rc != 0)
3787		goto done;	/* error message displayed already */
3788
3789	/*
3790	 * Setup RSS for this VI.  Save a copy of the RSS table for later use.
3791	 */
3792	if (vi->nrxq > vi->rss_size) {
3793		if_printf(ifp, "nrxq (%d) > hw RSS table size (%d); "
3794		    "some queues will never receive traffic.\n", vi->nrxq,
3795		    vi->rss_size);
3796	} else if (vi->rss_size % vi->nrxq) {
3797		if_printf(ifp, "nrxq (%d), hw RSS table size (%d); "
3798		    "expect uneven traffic distribution.\n", vi->nrxq,
3799		    vi->rss_size);
3800	}
3801#ifdef RSS
3802	MPASS(RSS_KEYSIZE == 40);
3803	if (vi->nrxq != nbuckets) {
3804		if_printf(ifp, "nrxq (%d) != kernel RSS buckets (%d);"
3805		    "performance will be impacted.\n", vi->nrxq, nbuckets);
3806	}
3807
3808	rss_getkey((void *)&raw_rss_key[0]);
3809	for (i = 0; i < nitems(rss_key); i++) {
3810		rss_key[i] = htobe32(raw_rss_key[nitems(rss_key) - 1 - i]);
3811	}
3812	t4_write_rss_key(sc, &rss_key[0], -1);
3813#endif
3814	rss = malloc(vi->rss_size * sizeof (*rss), M_CXGBE, M_ZERO | M_WAITOK);
3815	for (i = 0; i < vi->rss_size;) {
3816#ifdef RSS
3817		j = rss_get_indirection_to_bucket(i);
3818		j %= vi->nrxq;
3819		rxq = &sc->sge.rxq[vi->first_rxq + j];
3820		rss[i++] = rxq->iq.abs_id;
3821#else
3822		for_each_rxq(vi, j, rxq) {
3823			rss[i++] = rxq->iq.abs_id;
3824			if (i == vi->rss_size)
3825				break;
3826		}
3827#endif
3828	}
3829
3830	rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, rss,
3831	    vi->rss_size);
3832	if (rc != 0) {
3833		if_printf(ifp, "rss_config failed: %d\n", rc);
3834		goto done;
3835	}
3836
3837#ifdef RSS
3838	hashen = hashconfig_to_hashen(hashconfig);
3839
3840	/*
3841	 * We may have had to enable some hashes even though the global config
3842	 * wants them disabled.  This is a potential problem that must be
3843	 * reported to the user.
3844	 */
3845	extra = hashen_to_hashconfig(hashen) ^ hashconfig;
3846
3847	/*
3848	 * If we consider only the supported hash types, then the enabled hashes
3849	 * are a superset of the requested hashes.  In other words, there cannot
3850	 * be any supported hash that was requested but not enabled, but there
3851	 * can be hashes that were not requested but had to be enabled.
3852	 */
3853	extra &= SUPPORTED_RSS_HASHTYPES;
3854	MPASS((extra & hashconfig) == 0);
3855
3856	if (extra) {
3857		if_printf(ifp,
3858		    "global RSS config (0x%x) cannot be accomodated.\n",
3859		    hashconfig);
3860	}
3861	if (extra & RSS_HASHTYPE_RSS_IPV4)
3862		if_printf(ifp, "IPv4 2-tuple hashing forced on.\n");
3863	if (extra & RSS_HASHTYPE_RSS_TCP_IPV4)
3864		if_printf(ifp, "TCP/IPv4 4-tuple hashing forced on.\n");
3865	if (extra & RSS_HASHTYPE_RSS_IPV6)
3866		if_printf(ifp, "IPv6 2-tuple hashing forced on.\n");
3867	if (extra & RSS_HASHTYPE_RSS_TCP_IPV6)
3868		if_printf(ifp, "TCP/IPv6 4-tuple hashing forced on.\n");
3869	if (extra & RSS_HASHTYPE_RSS_UDP_IPV4)
3870		if_printf(ifp, "UDP/IPv4 4-tuple hashing forced on.\n");
3871	if (extra & RSS_HASHTYPE_RSS_UDP_IPV6)
3872		if_printf(ifp, "UDP/IPv6 4-tuple hashing forced on.\n");
3873#else
3874	hashen = F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN |
3875	    F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN |
3876	    F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
3877	    F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_UDPEN;
3878#endif
3879	rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, hashen, rss[0]);
3880	if (rc != 0) {
3881		if_printf(ifp, "rss hash/defaultq config failed: %d\n", rc);
3882		goto done;
3883	}
3884
3885	vi->rss = rss;
3886	vi->flags |= VI_INIT_DONE;
3887done:
3888	if (rc != 0)
3889		vi_full_uninit(vi);
3890
3891	return (rc);
3892}
3893
3894/*
3895 * Idempotent.
3896 */
3897int
3898vi_full_uninit(struct vi_info *vi)
3899{
3900	struct port_info *pi = vi->pi;
3901	struct adapter *sc = pi->adapter;
3902	int i;
3903	struct sge_rxq *rxq;
3904	struct sge_txq *txq;
3905#ifdef TCP_OFFLOAD
3906	struct sge_ofld_rxq *ofld_rxq;
3907	struct sge_wrq *ofld_txq;
3908#endif
3909
3910	if (vi->flags & VI_INIT_DONE) {
3911
3912		/* Need to quiesce queues.  */
3913
3914		/* XXX: Only for the first VI? */
3915		if (IS_MAIN_VI(vi))
3916			quiesce_wrq(sc, &sc->sge.ctrlq[pi->port_id]);
3917
3918		for_each_txq(vi, i, txq) {
3919			quiesce_txq(sc, txq);
3920		}
3921
3922#ifdef TCP_OFFLOAD
3923		for_each_ofld_txq(vi, i, ofld_txq) {
3924			quiesce_wrq(sc, ofld_txq);
3925		}
3926#endif
3927
3928		for_each_rxq(vi, i, rxq) {
3929			quiesce_iq(sc, &rxq->iq);
3930			quiesce_fl(sc, &rxq->fl);
3931		}
3932
3933#ifdef TCP_OFFLOAD
3934		for_each_ofld_rxq(vi, i, ofld_rxq) {
3935			quiesce_iq(sc, &ofld_rxq->iq);
3936			quiesce_fl(sc, &ofld_rxq->fl);
3937		}
3938#endif
3939		free(vi->rss, M_CXGBE);
3940		free(vi->nm_rss, M_CXGBE);
3941	}
3942
3943	t4_teardown_vi_queues(vi);
3944	vi->flags &= ~VI_INIT_DONE;
3945
3946	return (0);
3947}
3948
3949static void
3950quiesce_txq(struct adapter *sc, struct sge_txq *txq)
3951{
3952	struct sge_eq *eq = &txq->eq;
3953	struct sge_qstat *spg = (void *)&eq->desc[eq->sidx];
3954
3955	(void) sc;	/* unused */
3956
3957#ifdef INVARIANTS
3958	TXQ_LOCK(txq);
3959	MPASS((eq->flags & EQ_ENABLED) == 0);
3960	TXQ_UNLOCK(txq);
3961#endif
3962
3963	/* Wait for the mp_ring to empty. */
3964	while (!mp_ring_is_idle(txq->r)) {
3965		mp_ring_check_drainage(txq->r, 0);
3966		pause("rquiesce", 1);
3967	}
3968
3969	/* Then wait for the hardware to finish. */
3970	while (spg->cidx != htobe16(eq->pidx))
3971		pause("equiesce", 1);
3972
3973	/* Finally, wait for the driver to reclaim all descriptors. */
3974	while (eq->cidx != eq->pidx)
3975		pause("dquiesce", 1);
3976}
3977
3978static void
3979quiesce_wrq(struct adapter *sc, struct sge_wrq *wrq)
3980{
3981
3982	/* XXXTX */
3983}
3984
3985static void
3986quiesce_iq(struct adapter *sc, struct sge_iq *iq)
3987{
3988	(void) sc;	/* unused */
3989
3990	/* Synchronize with the interrupt handler */
3991	while (!atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_DISABLED))
3992		pause("iqfree", 1);
3993}
3994
3995static void
3996quiesce_fl(struct adapter *sc, struct sge_fl *fl)
3997{
3998	mtx_lock(&sc->sfl_lock);
3999	FL_LOCK(fl);
4000	fl->flags |= FL_DOOMED;
4001	FL_UNLOCK(fl);
4002	callout_stop(&sc->sfl_callout);
4003	mtx_unlock(&sc->sfl_lock);
4004
4005	KASSERT((fl->flags & FL_STARVING) == 0,
4006	    ("%s: still starving", __func__));
4007}
4008
4009static int
4010t4_alloc_irq(struct adapter *sc, struct irq *irq, int rid,
4011    driver_intr_t *handler, void *arg, char *name)
4012{
4013	int rc;
4014
4015	irq->rid = rid;
4016	irq->res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &irq->rid,
4017	    RF_SHAREABLE | RF_ACTIVE);
4018	if (irq->res == NULL) {
4019		device_printf(sc->dev,
4020		    "failed to allocate IRQ for rid %d, name %s.\n", rid, name);
4021		return (ENOMEM);
4022	}
4023
4024	rc = bus_setup_intr(sc->dev, irq->res, INTR_MPSAFE | INTR_TYPE_NET,
4025	    NULL, handler, arg, &irq->tag);
4026	if (rc != 0) {
4027		device_printf(sc->dev,
4028		    "failed to setup interrupt for rid %d, name %s: %d\n",
4029		    rid, name, rc);
4030	} else if (name)
4031		bus_describe_intr(sc->dev, irq->res, irq->tag, name);
4032
4033	return (rc);
4034}
4035
4036static int
4037t4_free_irq(struct adapter *sc, struct irq *irq)
4038{
4039	if (irq->tag)
4040		bus_teardown_intr(sc->dev, irq->res, irq->tag);
4041	if (irq->res)
4042		bus_release_resource(sc->dev, SYS_RES_IRQ, irq->rid, irq->res);
4043
4044	bzero(irq, sizeof(*irq));
4045
4046	return (0);
4047}
4048
4049static void
4050get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf)
4051{
4052
4053	regs->version = chip_id(sc) | chip_rev(sc) << 10;
4054	t4_get_regs(sc, buf, regs->len);
4055}
4056
4057#define	A_PL_INDIR_CMD	0x1f8
4058
4059#define	S_PL_AUTOINC	31
4060#define	M_PL_AUTOINC	0x1U
4061#define	V_PL_AUTOINC(x)	((x) << S_PL_AUTOINC)
4062#define	G_PL_AUTOINC(x)	(((x) >> S_PL_AUTOINC) & M_PL_AUTOINC)
4063
4064#define	S_PL_VFID	20
4065#define	M_PL_VFID	0xffU
4066#define	V_PL_VFID(x)	((x) << S_PL_VFID)
4067#define	G_PL_VFID(x)	(((x) >> S_PL_VFID) & M_PL_VFID)
4068
4069#define	S_PL_ADDR	0
4070#define	M_PL_ADDR	0xfffffU
4071#define	V_PL_ADDR(x)	((x) << S_PL_ADDR)
4072#define	G_PL_ADDR(x)	(((x) >> S_PL_ADDR) & M_PL_ADDR)
4073
4074#define	A_PL_INDIR_DATA	0x1fc
4075
4076static uint64_t
4077read_vf_stat(struct adapter *sc, unsigned int viid, int reg)
4078{
4079	u32 stats[2];
4080
4081	mtx_assert(&sc->regwin_lock, MA_OWNED);
4082	t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) |
4083	    V_PL_VFID(G_FW_VIID_VIN(viid)) | V_PL_ADDR(VF_MPS_REG(reg)));
4084	stats[0] = t4_read_reg(sc, A_PL_INDIR_DATA);
4085	stats[1] = t4_read_reg(sc, A_PL_INDIR_DATA);
4086	return (((uint64_t)stats[1]) << 32 | stats[0]);
4087}
4088
4089static void
4090t4_get_vi_stats(struct adapter *sc, unsigned int viid,
4091    struct fw_vi_stats_vf *stats)
4092{
4093
4094#define GET_STAT(name) \
4095	read_vf_stat(sc, viid, A_MPS_VF_STAT_##name##_L)
4096
4097	stats->tx_bcast_bytes    = GET_STAT(TX_VF_BCAST_BYTES);
4098	stats->tx_bcast_frames   = GET_STAT(TX_VF_BCAST_FRAMES);
4099	stats->tx_mcast_bytes    = GET_STAT(TX_VF_MCAST_BYTES);
4100	stats->tx_mcast_frames   = GET_STAT(TX_VF_MCAST_FRAMES);
4101	stats->tx_ucast_bytes    = GET_STAT(TX_VF_UCAST_BYTES);
4102	stats->tx_ucast_frames   = GET_STAT(TX_VF_UCAST_FRAMES);
4103	stats->tx_drop_frames    = GET_STAT(TX_VF_DROP_FRAMES);
4104	stats->tx_offload_bytes  = GET_STAT(TX_VF_OFFLOAD_BYTES);
4105	stats->tx_offload_frames = GET_STAT(TX_VF_OFFLOAD_FRAMES);
4106	stats->rx_bcast_bytes    = GET_STAT(RX_VF_BCAST_BYTES);
4107	stats->rx_bcast_frames   = GET_STAT(RX_VF_BCAST_FRAMES);
4108	stats->rx_mcast_bytes    = GET_STAT(RX_VF_MCAST_BYTES);
4109	stats->rx_mcast_frames   = GET_STAT(RX_VF_MCAST_FRAMES);
4110	stats->rx_ucast_bytes    = GET_STAT(RX_VF_UCAST_BYTES);
4111	stats->rx_ucast_frames   = GET_STAT(RX_VF_UCAST_FRAMES);
4112	stats->rx_err_frames     = GET_STAT(RX_VF_ERR_FRAMES);
4113
4114#undef GET_STAT
4115}
4116
4117static void
4118t4_clr_vi_stats(struct adapter *sc, unsigned int viid)
4119{
4120	int reg;
4121
4122	t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) |
4123	    V_PL_VFID(G_FW_VIID_VIN(viid)) |
4124	    V_PL_ADDR(VF_MPS_REG(A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L)));
4125	for (reg = A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L;
4126	     reg <= A_MPS_VF_STAT_RX_VF_ERR_FRAMES_H; reg += 4)
4127		t4_write_reg(sc, A_PL_INDIR_DATA, 0);
4128}
4129
4130static void
4131vi_refresh_stats(struct adapter *sc, struct vi_info *vi)
4132{
4133	struct ifnet *ifp = vi->ifp;
4134	struct sge_txq *txq;
4135	int i, drops;
4136	struct fw_vi_stats_vf *s = &vi->stats;
4137	struct timeval tv;
4138	const struct timeval interval = {0, 250000};	/* 250ms */
4139
4140	if (!(vi->flags & VI_INIT_DONE))
4141		return;
4142
4143	getmicrotime(&tv);
4144	timevalsub(&tv, &interval);
4145	if (timevalcmp(&tv, &vi->last_refreshed, <))
4146		return;
4147
4148	mtx_lock(&sc->regwin_lock);
4149	t4_get_vi_stats(sc, vi->viid, &vi->stats);
4150
4151	ifp->if_ipackets = s->rx_bcast_frames + s->rx_mcast_frames +
4152	    s->rx_ucast_frames;
4153	ifp->if_ierrors = s->rx_err_frames;
4154	ifp->if_opackets = s->tx_bcast_frames + s->tx_mcast_frames +
4155	    s->tx_ucast_frames + s->tx_offload_frames;
4156	ifp->if_oerrors = s->tx_drop_frames;
4157	ifp->if_ibytes = s->rx_bcast_bytes + s->rx_mcast_bytes +
4158	    s->rx_ucast_bytes;
4159	ifp->if_obytes = s->tx_bcast_bytes + s->tx_mcast_bytes +
4160	    s->tx_ucast_bytes + s->tx_offload_bytes;
4161	ifp->if_imcasts = s->rx_mcast_frames;
4162	ifp->if_omcasts = s->tx_mcast_frames;
4163
4164	drops = 0;
4165	for_each_txq(vi, i, txq)
4166		drops += counter_u64_fetch(txq->r->drops);
4167	ifp->if_snd.ifq_drops = drops;
4168
4169	getmicrotime(&vi->last_refreshed);
4170	mtx_unlock(&sc->regwin_lock);
4171}
4172
4173static void
4174cxgbe_refresh_stats(struct adapter *sc, struct port_info *pi)
4175{
4176	struct vi_info *vi = &pi->vi[0];
4177	struct ifnet *ifp = vi->ifp;
4178	struct sge_txq *txq;
4179	int i, drops;
4180	struct port_stats *s = &pi->stats;
4181	struct timeval tv;
4182	const struct timeval interval = {0, 250000};	/* 250ms */
4183
4184	getmicrotime(&tv);
4185	timevalsub(&tv, &interval);
4186	if (timevalcmp(&tv, &pi->last_refreshed, <))
4187		return;
4188
4189	t4_get_port_stats(sc, pi->tx_chan, s);
4190
4191	ifp->if_opackets = s->tx_frames - s->tx_pause;
4192	ifp->if_ipackets = s->rx_frames - s->rx_pause;
4193	ifp->if_obytes = s->tx_octets - s->tx_pause * 64;
4194	ifp->if_ibytes = s->rx_octets - s->rx_pause * 64;
4195	ifp->if_omcasts = s->tx_mcast_frames - s->tx_pause;
4196	ifp->if_imcasts = s->rx_mcast_frames - s->rx_pause;
4197	ifp->if_iqdrops = s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 +
4198	    s->rx_ovflow3 + s->rx_trunc0 + s->rx_trunc1 + s->rx_trunc2 +
4199	    s->rx_trunc3;
4200	for (i = 0; i < sc->chip_params->nchan; i++) {
4201		if (pi->rx_chan_map & (1 << i)) {
4202			uint32_t v;
4203
4204			mtx_lock(&sc->regwin_lock);
4205			t4_read_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v,
4206			    1, A_TP_MIB_TNL_CNG_DROP_0 + i);
4207			mtx_unlock(&sc->regwin_lock);
4208			ifp->if_iqdrops += v;
4209		}
4210	}
4211
4212	drops = s->tx_drop;
4213	for_each_txq(vi, i, txq)
4214		drops += counter_u64_fetch(txq->r->drops);
4215	ifp->if_snd.ifq_drops = drops;
4216
4217	ifp->if_oerrors = s->tx_error_frames;
4218	ifp->if_ierrors = s->rx_jabber + s->rx_runt + s->rx_too_long +
4219	    s->rx_fcs_err + s->rx_len_err;
4220
4221	getmicrotime(&pi->last_refreshed);
4222}
4223
4224static void
4225cxgbe_tick(void *arg)
4226{
4227	struct port_info *pi = arg;
4228	struct adapter *sc = pi->adapter;
4229
4230	PORT_LOCK_ASSERT_OWNED(pi);
4231	cxgbe_refresh_stats(sc, pi);
4232
4233	callout_schedule(&pi->tick, hz);
4234}
4235
4236void
4237vi_tick(void *arg)
4238{
4239	struct vi_info *vi = arg;
4240	struct adapter *sc = vi->pi->adapter;
4241
4242	vi_refresh_stats(sc, vi);
4243
4244	callout_schedule(&vi->tick, hz);
4245}
4246
4247static void
4248cxgbe_vlan_config(void *arg, struct ifnet *ifp, uint16_t vid)
4249{
4250	struct ifnet *vlan;
4251
4252	if (arg != ifp || ifp->if_type != IFT_ETHER)
4253		return;
4254
4255	vlan = VLAN_DEVAT(ifp, vid);
4256	VLAN_SETCOOKIE(vlan, ifp);
4257}
4258
4259static int
4260cpl_not_handled(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
4261{
4262
4263#ifdef INVARIANTS
4264	panic("%s: opcode 0x%02x on iq %p with payload %p",
4265	    __func__, rss->opcode, iq, m);
4266#else
4267	log(LOG_ERR, "%s: opcode 0x%02x on iq %p with payload %p\n",
4268	    __func__, rss->opcode, iq, m);
4269	m_freem(m);
4270#endif
4271	return (EDOOFUS);
4272}
4273
4274int
4275t4_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h)
4276{
4277	uintptr_t *loc, new;
4278
4279	if (opcode >= nitems(sc->cpl_handler))
4280		return (EINVAL);
4281
4282	new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled;
4283	loc = (uintptr_t *) &sc->cpl_handler[opcode];
4284	atomic_store_rel_ptr(loc, new);
4285
4286	return (0);
4287}
4288
4289static int
4290an_not_handled(struct sge_iq *iq, const struct rsp_ctrl *ctrl)
4291{
4292
4293#ifdef INVARIANTS
4294	panic("%s: async notification on iq %p (ctrl %p)", __func__, iq, ctrl);
4295#else
4296	log(LOG_ERR, "%s: async notification on iq %p (ctrl %p)\n",
4297	    __func__, iq, ctrl);
4298#endif
4299	return (EDOOFUS);
4300}
4301
4302int
4303t4_register_an_handler(struct adapter *sc, an_handler_t h)
4304{
4305	uintptr_t *loc, new;
4306
4307	new = h ? (uintptr_t)h : (uintptr_t)an_not_handled;
4308	loc = (uintptr_t *) &sc->an_handler;
4309	atomic_store_rel_ptr(loc, new);
4310
4311	return (0);
4312}
4313
4314static int
4315fw_msg_not_handled(struct adapter *sc, const __be64 *rpl)
4316{
4317	const struct cpl_fw6_msg *cpl =
4318	    __containerof(rpl, struct cpl_fw6_msg, data[0]);
4319
4320#ifdef INVARIANTS
4321	panic("%s: fw_msg type %d", __func__, cpl->type);
4322#else
4323	log(LOG_ERR, "%s: fw_msg type %d\n", __func__, cpl->type);
4324#endif
4325	return (EDOOFUS);
4326}
4327
4328int
4329t4_register_fw_msg_handler(struct adapter *sc, int type, fw_msg_handler_t h)
4330{
4331	uintptr_t *loc, new;
4332
4333	if (type >= nitems(sc->fw_msg_handler))
4334		return (EINVAL);
4335
4336	/*
4337	 * These are dispatched by the handler for FW{4|6}_CPL_MSG using the CPL
4338	 * handler dispatch table.  Reject any attempt to install a handler for
4339	 * this subtype.
4340	 */
4341	if (type == FW_TYPE_RSSCPL || type == FW6_TYPE_RSSCPL)
4342		return (EINVAL);
4343
4344	new = h ? (uintptr_t)h : (uintptr_t)fw_msg_not_handled;
4345	loc = (uintptr_t *) &sc->fw_msg_handler[type];
4346	atomic_store_rel_ptr(loc, new);
4347
4348	return (0);
4349}
4350
4351/*
4352 * Should match fw_caps_config_<foo> enums in t4fw_interface.h
4353 */
4354static char *caps_decoder[] = {
4355	"\20\001IPMI\002NCSI",				/* 0: NBM */
4356	"\20\001PPP\002QFC\003DCBX",			/* 1: link */
4357	"\20\001INGRESS\002EGRESS",			/* 2: switch */
4358	"\20\001NIC\002VM\003IDS\004UM\005UM_ISGL"	/* 3: NIC */
4359	    "\006HASHFILTER\007ETHOFLD",
4360	"\20\001TOE",					/* 4: TOE */
4361	"\20\001RDDP\002RDMAC",				/* 5: RDMA */
4362	"\20\001INITIATOR_PDU\002TARGET_PDU"		/* 6: iSCSI */
4363	    "\003INITIATOR_CNXOFLD\004TARGET_CNXOFLD"
4364	    "\005INITIATOR_SSNOFLD\006TARGET_SSNOFLD"
4365	    "\007T10DIF"
4366	    "\010INITIATOR_CMDOFLD\011TARGET_CMDOFLD",
4367	"\20\00KEYS",					/* 7: TLS */
4368	"\20\001INITIATOR\002TARGET\003CTRL_OFLD"	/* 8: FCoE */
4369		    "\004PO_INITIATOR\005PO_TARGET",
4370};
4371
4372static void
4373t4_sysctls(struct adapter *sc)
4374{
4375	struct sysctl_ctx_list *ctx;
4376	struct sysctl_oid *oid;
4377	struct sysctl_oid_list *children, *c0;
4378	static char *doorbells = {"\20\1UDB\2WCWR\3UDBWC\4KDB"};
4379
4380	ctx = device_get_sysctl_ctx(sc->dev);
4381
4382	/*
4383	 * dev.t4nex.X.
4384	 */
4385	oid = device_get_sysctl_tree(sc->dev);
4386	c0 = children = SYSCTL_CHILDREN(oid);
4387
4388	sc->sc_do_rxcopy = 1;
4389	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "do_rx_copy", CTLFLAG_RW,
4390	    &sc->sc_do_rxcopy, 1, "Do RX copy of small frames");
4391
4392	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nports", CTLFLAG_RD, NULL,
4393	    sc->params.nports, "# of ports");
4394
4395	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD,
4396	    NULL, chip_rev(sc), "chip hardware revision");
4397
4398	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version",
4399	    CTLFLAG_RD, sc->fw_version, 0, "firmware version");
4400
4401	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "cf",
4402	    CTLFLAG_RD, sc->cfg_file, 0, "configuration file");
4403
4404	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cfcsum", CTLFLAG_RD, NULL,
4405	    sc->cfcsum, "config file checksum");
4406
4407	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "doorbells",
4408	    CTLTYPE_STRING | CTLFLAG_RD, doorbells, sc->doorbells,
4409	    sysctl_bitfield, "A", "available doorbells");
4410
4411#define SYSCTL_CAP(name, n, text) \
4412	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, #name, \
4413	    CTLTYPE_STRING | CTLFLAG_RD, caps_decoder[n], sc->name, \
4414	    sysctl_bitfield, "A", "available " text "capabilities")
4415
4416	SYSCTL_CAP(nbmcaps, 0, "NBM");
4417	SYSCTL_CAP(linkcaps, 1, "link");
4418	SYSCTL_CAP(switchcaps, 2, "switch");
4419	SYSCTL_CAP(niccaps, 3, "NIC");
4420	SYSCTL_CAP(toecaps, 4, "TCP offload");
4421	SYSCTL_CAP(rdmacaps, 5, "RDMA");
4422	SYSCTL_CAP(iscsicaps, 6, "iSCSI");
4423	SYSCTL_CAP(tlscaps, 7, "TLS");
4424	SYSCTL_CAP(fcoecaps, 8, "FCoE");
4425#undef SYSCTL_CAP
4426
4427	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD, NULL,
4428	    sc->params.vpd.cclk, "core clock frequency (in KHz)");
4429
4430	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_timers",
4431	    CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.timer_val,
4432	    sizeof(sc->params.sge.timer_val), sysctl_int_array, "A",
4433	    "interrupt holdoff timer values (us)");
4434
4435	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pkt_counts",
4436	    CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.counter_val,
4437	    sizeof(sc->params.sge.counter_val), sysctl_int_array, "A",
4438	    "interrupt holdoff packet counter values");
4439
4440	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nfilters", CTLFLAG_RD,
4441	    NULL, sc->tids.nftids, "number of filters");
4442
4443	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", CTLTYPE_INT |
4444	    CTLFLAG_RD, sc, 0, sysctl_temperature, "I",
4445	    "chip temperature (in Celsius)");
4446
4447	t4_sge_sysctls(sc, ctx, children);
4448
4449	sc->lro_timeout = 100;
4450	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lro_timeout", CTLFLAG_RW,
4451	    &sc->lro_timeout, 0, "lro inactive-flush timeout (in us)");
4452
4453	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "debug_flags", CTLFLAG_RW,
4454	    &sc->debug_flags, 0, "flags to enable runtime debugging");
4455
4456#ifdef SBUF_DRAIN
4457	/*
4458	 * dev.t4nex.X.misc.  Marked CTLFLAG_SKIP to avoid information overload.
4459	 */
4460	oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "misc",
4461	    CTLFLAG_RD | CTLFLAG_SKIP, NULL,
4462	    "logs and miscellaneous information");
4463	children = SYSCTL_CHILDREN(oid);
4464
4465	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cctrl",
4466	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4467	    sysctl_cctrl, "A", "congestion control");
4468
4469	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp0",
4470	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4471	    sysctl_cim_ibq_obq, "A", "CIM IBQ 0 (TP0)");
4472
4473	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp1",
4474	    CTLTYPE_STRING | CTLFLAG_RD, sc, 1,
4475	    sysctl_cim_ibq_obq, "A", "CIM IBQ 1 (TP1)");
4476
4477	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ulp",
4478	    CTLTYPE_STRING | CTLFLAG_RD, sc, 2,
4479	    sysctl_cim_ibq_obq, "A", "CIM IBQ 2 (ULP)");
4480
4481	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge0",
4482	    CTLTYPE_STRING | CTLFLAG_RD, sc, 3,
4483	    sysctl_cim_ibq_obq, "A", "CIM IBQ 3 (SGE0)");
4484
4485	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge1",
4486	    CTLTYPE_STRING | CTLFLAG_RD, sc, 4,
4487	    sysctl_cim_ibq_obq, "A", "CIM IBQ 4 (SGE1)");
4488
4489	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ncsi",
4490	    CTLTYPE_STRING | CTLFLAG_RD, sc, 5,
4491	    sysctl_cim_ibq_obq, "A", "CIM IBQ 5 (NCSI)");
4492
4493	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_la",
4494	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4495	    chip_id(sc) <= CHELSIO_T5 ? sysctl_cim_la : sysctl_cim_la_t6,
4496	    "A", "CIM logic analyzer");
4497
4498	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ma_la",
4499	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4500	    sysctl_cim_ma_la, "A", "CIM MA logic analyzer");
4501
4502	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp0",
4503	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0 + CIM_NUM_IBQ,
4504	    sysctl_cim_ibq_obq, "A", "CIM OBQ 0 (ULP0)");
4505
4506	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp1",
4507	    CTLTYPE_STRING | CTLFLAG_RD, sc, 1 + CIM_NUM_IBQ,
4508	    sysctl_cim_ibq_obq, "A", "CIM OBQ 1 (ULP1)");
4509
4510	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp2",
4511	    CTLTYPE_STRING | CTLFLAG_RD, sc, 2 + CIM_NUM_IBQ,
4512	    sysctl_cim_ibq_obq, "A", "CIM OBQ 2 (ULP2)");
4513
4514	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp3",
4515	    CTLTYPE_STRING | CTLFLAG_RD, sc, 3 + CIM_NUM_IBQ,
4516	    sysctl_cim_ibq_obq, "A", "CIM OBQ 3 (ULP3)");
4517
4518	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge",
4519	    CTLTYPE_STRING | CTLFLAG_RD, sc, 4 + CIM_NUM_IBQ,
4520	    sysctl_cim_ibq_obq, "A", "CIM OBQ 4 (SGE)");
4521
4522	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ncsi",
4523	    CTLTYPE_STRING | CTLFLAG_RD, sc, 5 + CIM_NUM_IBQ,
4524	    sysctl_cim_ibq_obq, "A", "CIM OBQ 5 (NCSI)");
4525
4526	if (chip_id(sc) > CHELSIO_T4) {
4527		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge0_rx",
4528		    CTLTYPE_STRING | CTLFLAG_RD, sc, 6 + CIM_NUM_IBQ,
4529		    sysctl_cim_ibq_obq, "A", "CIM OBQ 6 (SGE0-RX)");
4530
4531		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge1_rx",
4532		    CTLTYPE_STRING | CTLFLAG_RD, sc, 7 + CIM_NUM_IBQ,
4533		    sysctl_cim_ibq_obq, "A", "CIM OBQ 7 (SGE1-RX)");
4534	}
4535
4536	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_pif_la",
4537	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4538	    sysctl_cim_pif_la, "A", "CIM PIF logic analyzer");
4539
4540	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_qcfg",
4541	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4542	    sysctl_cim_qcfg, "A", "CIM queue configuration");
4543
4544	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cpl_stats",
4545	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4546	    sysctl_cpl_stats, "A", "CPL statistics");
4547
4548	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ddp_stats",
4549	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4550	    sysctl_ddp_stats, "A", "non-TCP DDP statistics");
4551
4552	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "devlog",
4553	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4554	    sysctl_devlog, "A", "firmware's device log");
4555
4556	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoe_stats",
4557	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4558	    sysctl_fcoe_stats, "A", "FCoE statistics");
4559
4560	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "hw_sched",
4561	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4562	    sysctl_hw_sched, "A", "hardware scheduler ");
4563
4564	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "l2t",
4565	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4566	    sysctl_l2t, "A", "hardware L2 table");
4567
4568	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "lb_stats",
4569	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4570	    sysctl_lb_stats, "A", "loopback statistics");
4571
4572	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "meminfo",
4573	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4574	    sysctl_meminfo, "A", "memory regions");
4575
4576	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "mps_tcam",
4577	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4578	    chip_id(sc) <= CHELSIO_T5 ? sysctl_mps_tcam : sysctl_mps_tcam_t6,
4579	    "A", "MPS TCAM entries");
4580
4581	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "path_mtus",
4582	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4583	    sysctl_path_mtus, "A", "path MTUs");
4584
4585	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pm_stats",
4586	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4587	    sysctl_pm_stats, "A", "PM statistics");
4588
4589	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_stats",
4590	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4591	    sysctl_rdma_stats, "A", "RDMA statistics");
4592
4593	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tcp_stats",
4594	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4595	    sysctl_tcp_stats, "A", "TCP statistics");
4596
4597	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tids",
4598	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4599	    sysctl_tids, "A", "TID information");
4600
4601	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_err_stats",
4602	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4603	    sysctl_tp_err_stats, "A", "TP error statistics");
4604
4605	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la",
4606	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4607	    sysctl_tp_la, "A", "TP logic analyzer");
4608
4609	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_rate",
4610	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4611	    sysctl_tx_rate, "A", "Tx rate");
4612
4613	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ulprx_la",
4614	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4615	    sysctl_ulprx_la, "A", "ULPRX logic analyzer");
4616
4617	if (is_t5(sc)) {
4618		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "wcwr_stats",
4619		    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4620		    sysctl_wcwr_stats, "A", "write combined work requests");
4621	}
4622#endif
4623
4624#ifdef TCP_OFFLOAD
4625	if (is_offload(sc)) {
4626		/*
4627		 * dev.t4nex.X.toe.
4628		 */
4629		oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "toe", CTLFLAG_RD,
4630		    NULL, "TOE parameters");
4631		children = SYSCTL_CHILDREN(oid);
4632
4633		sc->tt.sndbuf = 256 * 1024;
4634		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sndbuf", CTLFLAG_RW,
4635		    &sc->tt.sndbuf, 0, "max hardware send buffer size");
4636
4637		sc->tt.ddp = 0;
4638		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp", CTLFLAG_RW,
4639		    &sc->tt.ddp, 0, "DDP allowed");
4640
4641		sc->tt.indsz = G_INDICATESIZE(t4_read_reg(sc, A_TP_PARA_REG5));
4642		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "indsz", CTLFLAG_RW,
4643		    &sc->tt.indsz, 0, "DDP max indicate size allowed");
4644
4645		sc->tt.ddp_thres =
4646		    G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2));
4647		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp_thres", CTLFLAG_RW,
4648		    &sc->tt.ddp_thres, 0, "DDP threshold");
4649
4650		sc->tt.rx_coalesce = 1;
4651		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_coalesce",
4652		    CTLFLAG_RW, &sc->tt.rx_coalesce, 0, "receive coalescing");
4653
4654		sc->tt.tx_align = 1;
4655		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_align",
4656		    CTLFLAG_RW, &sc->tt.tx_align, 0, "chop and align payload");
4657	}
4658#endif
4659}
4660
4661void
4662vi_sysctls(struct vi_info *vi)
4663{
4664	struct sysctl_ctx_list *ctx;
4665	struct sysctl_oid *oid;
4666	struct sysctl_oid_list *children;
4667
4668	ctx = device_get_sysctl_ctx(vi->dev);
4669
4670	/*
4671	 * dev.v?(cxgbe|cxl).X.
4672	 */
4673	oid = device_get_sysctl_tree(vi->dev);
4674	children = SYSCTL_CHILDREN(oid);
4675
4676	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "viid", CTLFLAG_RD, NULL,
4677	    vi->viid, "VI identifer");
4678	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nrxq", CTLFLAG_RD,
4679	    &vi->nrxq, 0, "# of rx queues");
4680	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ntxq", CTLFLAG_RD,
4681	    &vi->ntxq, 0, "# of tx queues");
4682	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_rxq", CTLFLAG_RD,
4683	    &vi->first_rxq, 0, "index of first rx queue");
4684	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_txq", CTLFLAG_RD,
4685	    &vi->first_txq, 0, "index of first tx queue");
4686
4687	if (IS_MAIN_VI(vi)) {
4688		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rsrv_noflowq",
4689		    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_noflowq, "IU",
4690		    "Reserve queue 0 for non-flowid packets");
4691	}
4692
4693#ifdef TCP_OFFLOAD
4694	if (vi->nofldrxq != 0) {
4695		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD,
4696		    &vi->nofldrxq, 0,
4697		    "# of rx queues for offloaded TCP connections");
4698		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldtxq", CTLFLAG_RD,
4699		    &vi->nofldtxq, 0,
4700		    "# of tx queues for offloaded TCP connections");
4701		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_rxq",
4702		    CTLFLAG_RD, &vi->first_ofld_rxq, 0,
4703		    "index of first TOE rx queue");
4704		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_txq",
4705		    CTLFLAG_RD, &vi->first_ofld_txq, 0,
4706		    "index of first TOE tx queue");
4707	}
4708#endif
4709#ifdef DEV_NETMAP
4710	if (vi->nnmrxq != 0) {
4711		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmrxq", CTLFLAG_RD,
4712		    &vi->nnmrxq, 0, "# of netmap rx queues");
4713		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmtxq", CTLFLAG_RD,
4714		    &vi->nnmtxq, 0, "# of netmap tx queues");
4715		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_rxq",
4716		    CTLFLAG_RD, &vi->first_nm_rxq, 0,
4717		    "index of first netmap rx queue");
4718		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_txq",
4719		    CTLFLAG_RD, &vi->first_nm_txq, 0,
4720		    "index of first netmap tx queue");
4721	}
4722#endif
4723
4724	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx",
4725	    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_tmr_idx, "I",
4726	    "holdoff timer index");
4727	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx",
4728	    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_pktc_idx, "I",
4729	    "holdoff packet counter index");
4730
4731	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_rxq",
4732	    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_rxq, "I",
4733	    "rx queue size");
4734	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_txq",
4735	    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_txq, "I",
4736	    "tx queue size");
4737}
4738
4739static void
4740cxgbe_sysctls(struct port_info *pi)
4741{
4742	struct sysctl_ctx_list *ctx;
4743	struct sysctl_oid *oid;
4744	struct sysctl_oid_list *children;
4745	struct adapter *sc = pi->adapter;
4746
4747	ctx = device_get_sysctl_ctx(pi->dev);
4748
4749	/*
4750	 * dev.cxgbe.X.
4751	 */
4752	oid = device_get_sysctl_tree(pi->dev);
4753	children = SYSCTL_CHILDREN(oid);
4754
4755	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkdnrc", CTLTYPE_STRING |
4756	   CTLFLAG_RD, pi, 0, sysctl_linkdnrc, "A", "reason why link is down");
4757	if (pi->port_type == FW_PORT_TYPE_BT_XAUI) {
4758		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature",
4759		    CTLTYPE_INT | CTLFLAG_RD, pi, 0, sysctl_btphy, "I",
4760		    "PHY temperature (in Celsius)");
4761		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fw_version",
4762		    CTLTYPE_INT | CTLFLAG_RD, pi, 1, sysctl_btphy, "I",
4763		    "PHY firmware version");
4764	}
4765
4766	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pause_settings",
4767	    CTLTYPE_STRING | CTLFLAG_RW, pi, PAUSE_TX, sysctl_pause_settings,
4768	    "A", "PAUSE settings (bit 0 = rx_pause, bit 1 = tx_pause)");
4769
4770	/*
4771	 * dev.cxgbe.X.stats.
4772	 */
4773	oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "stats", CTLFLAG_RD,
4774	    NULL, "port statistics");
4775	children = SYSCTL_CHILDREN(oid);
4776	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_parse_error", CTLFLAG_RD,
4777	    &pi->tx_parse_error, 0,
4778	    "# of tx packets with invalid length or # of segments");
4779
4780#define SYSCTL_ADD_T4_REG64(pi, name, desc, reg) \
4781	SYSCTL_ADD_OID(ctx, children, OID_AUTO, name, \
4782	    CTLTYPE_U64 | CTLFLAG_RD, sc, reg, \
4783	    sysctl_handle_t4_reg64, "QU", desc)
4784
4785	SYSCTL_ADD_T4_REG64(pi, "tx_octets", "# of octets in good frames",
4786	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BYTES_L));
4787	SYSCTL_ADD_T4_REG64(pi, "tx_frames", "total # of good frames",
4788	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_FRAMES_L));
4789	SYSCTL_ADD_T4_REG64(pi, "tx_bcast_frames", "# of broadcast frames",
4790	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BCAST_L));
4791	SYSCTL_ADD_T4_REG64(pi, "tx_mcast_frames", "# of multicast frames",
4792	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_MCAST_L));
4793	SYSCTL_ADD_T4_REG64(pi, "tx_ucast_frames", "# of unicast frames",
4794	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_UCAST_L));
4795	SYSCTL_ADD_T4_REG64(pi, "tx_error_frames", "# of error frames",
4796	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_ERROR_L));
4797	SYSCTL_ADD_T4_REG64(pi, "tx_frames_64",
4798	    "# of tx frames in this range",
4799	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_64B_L));
4800	SYSCTL_ADD_T4_REG64(pi, "tx_frames_65_127",
4801	    "# of tx frames in this range",
4802	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_65B_127B_L));
4803	SYSCTL_ADD_T4_REG64(pi, "tx_frames_128_255",
4804	    "# of tx frames in this range",
4805	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_128B_255B_L));
4806	SYSCTL_ADD_T4_REG64(pi, "tx_frames_256_511",
4807	    "# of tx frames in this range",
4808	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_256B_511B_L));
4809	SYSCTL_ADD_T4_REG64(pi, "tx_frames_512_1023",
4810	    "# of tx frames in this range",
4811	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_512B_1023B_L));
4812	SYSCTL_ADD_T4_REG64(pi, "tx_frames_1024_1518",
4813	    "# of tx frames in this range",
4814	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1024B_1518B_L));
4815	SYSCTL_ADD_T4_REG64(pi, "tx_frames_1519_max",
4816	    "# of tx frames in this range",
4817	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1519B_MAX_L));
4818	SYSCTL_ADD_T4_REG64(pi, "tx_drop", "# of dropped tx frames",
4819	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_DROP_L));
4820	SYSCTL_ADD_T4_REG64(pi, "tx_pause", "# of pause frames transmitted",
4821	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PAUSE_L));
4822	SYSCTL_ADD_T4_REG64(pi, "tx_ppp0", "# of PPP prio 0 frames transmitted",
4823	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP0_L));
4824	SYSCTL_ADD_T4_REG64(pi, "tx_ppp1", "# of PPP prio 1 frames transmitted",
4825	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP1_L));
4826	SYSCTL_ADD_T4_REG64(pi, "tx_ppp2", "# of PPP prio 2 frames transmitted",
4827	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP2_L));
4828	SYSCTL_ADD_T4_REG64(pi, "tx_ppp3", "# of PPP prio 3 frames transmitted",
4829	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP3_L));
4830	SYSCTL_ADD_T4_REG64(pi, "tx_ppp4", "# of PPP prio 4 frames transmitted",
4831	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP4_L));
4832	SYSCTL_ADD_T4_REG64(pi, "tx_ppp5", "# of PPP prio 5 frames transmitted",
4833	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP5_L));
4834	SYSCTL_ADD_T4_REG64(pi, "tx_ppp6", "# of PPP prio 6 frames transmitted",
4835	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP6_L));
4836	SYSCTL_ADD_T4_REG64(pi, "tx_ppp7", "# of PPP prio 7 frames transmitted",
4837	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP7_L));
4838
4839	SYSCTL_ADD_T4_REG64(pi, "rx_octets", "# of octets in good frames",
4840	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BYTES_L));
4841	SYSCTL_ADD_T4_REG64(pi, "rx_frames", "total # of good frames",
4842	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_FRAMES_L));
4843	SYSCTL_ADD_T4_REG64(pi, "rx_bcast_frames", "# of broadcast frames",
4844	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BCAST_L));
4845	SYSCTL_ADD_T4_REG64(pi, "rx_mcast_frames", "# of multicast frames",
4846	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MCAST_L));
4847	SYSCTL_ADD_T4_REG64(pi, "rx_ucast_frames", "# of unicast frames",
4848	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_UCAST_L));
4849	SYSCTL_ADD_T4_REG64(pi, "rx_too_long", "# of frames exceeding MTU",
4850	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_ERROR_L));
4851	SYSCTL_ADD_T4_REG64(pi, "rx_jabber", "# of jabber frames",
4852	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_CRC_ERROR_L));
4853	SYSCTL_ADD_T4_REG64(pi, "rx_fcs_err",
4854	    "# of frames received with bad FCS",
4855	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_CRC_ERROR_L));
4856	SYSCTL_ADD_T4_REG64(pi, "rx_len_err",
4857	    "# of frames received with length error",
4858	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LEN_ERROR_L));
4859	SYSCTL_ADD_T4_REG64(pi, "rx_symbol_err", "symbol errors",
4860	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_SYM_ERROR_L));
4861	SYSCTL_ADD_T4_REG64(pi, "rx_runt", "# of short frames received",
4862	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LESS_64B_L));
4863	SYSCTL_ADD_T4_REG64(pi, "rx_frames_64",
4864	    "# of rx frames in this range",
4865	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_64B_L));
4866	SYSCTL_ADD_T4_REG64(pi, "rx_frames_65_127",
4867	    "# of rx frames in this range",
4868	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_65B_127B_L));
4869	SYSCTL_ADD_T4_REG64(pi, "rx_frames_128_255",
4870	    "# of rx frames in this range",
4871	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_128B_255B_L));
4872	SYSCTL_ADD_T4_REG64(pi, "rx_frames_256_511",
4873	    "# of rx frames in this range",
4874	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_256B_511B_L));
4875	SYSCTL_ADD_T4_REG64(pi, "rx_frames_512_1023",
4876	    "# of rx frames in this range",
4877	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_512B_1023B_L));
4878	SYSCTL_ADD_T4_REG64(pi, "rx_frames_1024_1518",
4879	    "# of rx frames in this range",
4880	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1024B_1518B_L));
4881	SYSCTL_ADD_T4_REG64(pi, "rx_frames_1519_max",
4882	    "# of rx frames in this range",
4883	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1519B_MAX_L));
4884	SYSCTL_ADD_T4_REG64(pi, "rx_pause", "# of pause frames received",
4885	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PAUSE_L));
4886	SYSCTL_ADD_T4_REG64(pi, "rx_ppp0", "# of PPP prio 0 frames received",
4887	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP0_L));
4888	SYSCTL_ADD_T4_REG64(pi, "rx_ppp1", "# of PPP prio 1 frames received",
4889	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP1_L));
4890	SYSCTL_ADD_T4_REG64(pi, "rx_ppp2", "# of PPP prio 2 frames received",
4891	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP2_L));
4892	SYSCTL_ADD_T4_REG64(pi, "rx_ppp3", "# of PPP prio 3 frames received",
4893	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP3_L));
4894	SYSCTL_ADD_T4_REG64(pi, "rx_ppp4", "# of PPP prio 4 frames received",
4895	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP4_L));
4896	SYSCTL_ADD_T4_REG64(pi, "rx_ppp5", "# of PPP prio 5 frames received",
4897	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP5_L));
4898	SYSCTL_ADD_T4_REG64(pi, "rx_ppp6", "# of PPP prio 6 frames received",
4899	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP6_L));
4900	SYSCTL_ADD_T4_REG64(pi, "rx_ppp7", "# of PPP prio 7 frames received",
4901	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP7_L));
4902
4903#undef SYSCTL_ADD_T4_REG64
4904
4905#define SYSCTL_ADD_T4_PORTSTAT(name, desc) \
4906	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, #name, CTLFLAG_RD, \
4907	    &pi->stats.name, desc)
4908
4909	/* We get these from port_stats and they may be stale by upto 1s */
4910	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow0,
4911	    "# drops due to buffer-group 0 overflows");
4912	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow1,
4913	    "# drops due to buffer-group 1 overflows");
4914	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow2,
4915	    "# drops due to buffer-group 2 overflows");
4916	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow3,
4917	    "# drops due to buffer-group 3 overflows");
4918	SYSCTL_ADD_T4_PORTSTAT(rx_trunc0,
4919	    "# of buffer-group 0 truncated packets");
4920	SYSCTL_ADD_T4_PORTSTAT(rx_trunc1,
4921	    "# of buffer-group 1 truncated packets");
4922	SYSCTL_ADD_T4_PORTSTAT(rx_trunc2,
4923	    "# of buffer-group 2 truncated packets");
4924	SYSCTL_ADD_T4_PORTSTAT(rx_trunc3,
4925	    "# of buffer-group 3 truncated packets");
4926
4927#undef SYSCTL_ADD_T4_PORTSTAT
4928}
4929
4930static int
4931sysctl_int_array(SYSCTL_HANDLER_ARGS)
4932{
4933	int rc, *i, space = 0;
4934	struct sbuf sb;
4935
4936	sbuf_new(&sb, NULL, 32, SBUF_AUTOEXTEND);
4937	for (i = arg1; arg2; arg2 -= sizeof(int), i++) {
4938		if (space)
4939			sbuf_printf(&sb, " ");
4940		sbuf_printf(&sb, "%d", *i);
4941		space = 1;
4942	}
4943	sbuf_finish(&sb);
4944	rc = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
4945	sbuf_delete(&sb);
4946	return (rc);
4947}
4948
4949static int
4950sysctl_bitfield(SYSCTL_HANDLER_ARGS)
4951{
4952	int rc;
4953	struct sbuf *sb;
4954
4955	rc = sysctl_wire_old_buffer(req, 0);
4956	if (rc != 0)
4957		return(rc);
4958
4959	sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
4960	if (sb == NULL)
4961		return (ENOMEM);
4962
4963	sbuf_printf(sb, "%b", (int)arg2, (char *)arg1);
4964	rc = sbuf_finish(sb);
4965	sbuf_delete(sb);
4966
4967	return (rc);
4968}
4969
4970static int
4971sysctl_btphy(SYSCTL_HANDLER_ARGS)
4972{
4973	struct port_info *pi = arg1;
4974	int op = arg2;
4975	struct adapter *sc = pi->adapter;
4976	u_int v;
4977	int rc;
4978
4979	rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4btt");
4980	if (rc)
4981		return (rc);
4982	/* XXX: magic numbers */
4983	rc = -t4_mdio_rd(sc, sc->mbox, pi->mdio_addr, 0x1e, op ? 0x20 : 0xc820,
4984	    &v);
4985	end_synchronized_op(sc, 0);
4986	if (rc)
4987		return (rc);
4988	if (op == 0)
4989		v /= 256;
4990
4991	rc = sysctl_handle_int(oidp, &v, 0, req);
4992	return (rc);
4993}
4994
4995static int
4996sysctl_noflowq(SYSCTL_HANDLER_ARGS)
4997{
4998	struct vi_info *vi = arg1;
4999	int rc, val;
5000
5001	val = vi->rsrv_noflowq;
5002	rc = sysctl_handle_int(oidp, &val, 0, req);
5003	if (rc != 0 || req->newptr == NULL)
5004		return (rc);
5005
5006	if ((val >= 1) && (vi->ntxq > 1))
5007		vi->rsrv_noflowq = 1;
5008	else
5009		vi->rsrv_noflowq = 0;
5010
5011	return (rc);
5012}
5013
5014static int
5015sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS)
5016{
5017	struct vi_info *vi = arg1;
5018	struct adapter *sc = vi->pi->adapter;
5019	int idx, rc, i;
5020	struct sge_rxq *rxq;
5021#ifdef TCP_OFFLOAD
5022	struct sge_ofld_rxq *ofld_rxq;
5023#endif
5024	uint8_t v;
5025
5026	idx = vi->tmr_idx;
5027
5028	rc = sysctl_handle_int(oidp, &idx, 0, req);
5029	if (rc != 0 || req->newptr == NULL)
5030		return (rc);
5031
5032	if (idx < 0 || idx >= SGE_NTIMERS)
5033		return (EINVAL);
5034
5035	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
5036	    "t4tmr");
5037	if (rc)
5038		return (rc);
5039
5040	v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->pktc_idx != -1);
5041	for_each_rxq(vi, i, rxq) {
5042#ifdef atomic_store_rel_8
5043		atomic_store_rel_8(&rxq->iq.intr_params, v);
5044#else
5045		rxq->iq.intr_params = v;
5046#endif
5047	}
5048#ifdef TCP_OFFLOAD
5049	for_each_ofld_rxq(vi, i, ofld_rxq) {
5050#ifdef atomic_store_rel_8
5051		atomic_store_rel_8(&ofld_rxq->iq.intr_params, v);
5052#else
5053		ofld_rxq->iq.intr_params = v;
5054#endif
5055	}
5056#endif
5057	vi->tmr_idx = idx;
5058
5059	end_synchronized_op(sc, LOCK_HELD);
5060	return (0);
5061}
5062
5063static int
5064sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS)
5065{
5066	struct vi_info *vi = arg1;
5067	struct adapter *sc = vi->pi->adapter;
5068	int idx, rc;
5069
5070	idx = vi->pktc_idx;
5071
5072	rc = sysctl_handle_int(oidp, &idx, 0, req);
5073	if (rc != 0 || req->newptr == NULL)
5074		return (rc);
5075
5076	if (idx < -1 || idx >= SGE_NCOUNTERS)
5077		return (EINVAL);
5078
5079	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
5080	    "t4pktc");
5081	if (rc)
5082		return (rc);
5083
5084	if (vi->flags & VI_INIT_DONE)
5085		rc = EBUSY; /* cannot be changed once the queues are created */
5086	else
5087		vi->pktc_idx = idx;
5088
5089	end_synchronized_op(sc, LOCK_HELD);
5090	return (rc);
5091}
5092
5093static int
5094sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS)
5095{
5096	struct vi_info *vi = arg1;
5097	struct adapter *sc = vi->pi->adapter;
5098	int qsize, rc;
5099
5100	qsize = vi->qsize_rxq;
5101
5102	rc = sysctl_handle_int(oidp, &qsize, 0, req);
5103	if (rc != 0 || req->newptr == NULL)
5104		return (rc);
5105
5106	if (qsize < 128 || (qsize & 7))
5107		return (EINVAL);
5108
5109	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
5110	    "t4rxqs");
5111	if (rc)
5112		return (rc);
5113
5114	if (vi->flags & VI_INIT_DONE)
5115		rc = EBUSY; /* cannot be changed once the queues are created */
5116	else
5117		vi->qsize_rxq = qsize;
5118
5119	end_synchronized_op(sc, LOCK_HELD);
5120	return (rc);
5121}
5122
5123static int
5124sysctl_qsize_txq(SYSCTL_HANDLER_ARGS)
5125{
5126	struct vi_info *vi = arg1;
5127	struct adapter *sc = vi->pi->adapter;
5128	int qsize, rc;
5129
5130	qsize = vi->qsize_txq;
5131
5132	rc = sysctl_handle_int(oidp, &qsize, 0, req);
5133	if (rc != 0 || req->newptr == NULL)
5134		return (rc);
5135
5136	if (qsize < 128 || qsize > 65536)
5137		return (EINVAL);
5138
5139	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
5140	    "t4txqs");
5141	if (rc)
5142		return (rc);
5143
5144	if (vi->flags & VI_INIT_DONE)
5145		rc = EBUSY; /* cannot be changed once the queues are created */
5146	else
5147		vi->qsize_txq = qsize;
5148
5149	end_synchronized_op(sc, LOCK_HELD);
5150	return (rc);
5151}
5152
5153static int
5154sysctl_pause_settings(SYSCTL_HANDLER_ARGS)
5155{
5156	struct port_info *pi = arg1;
5157	struct adapter *sc = pi->adapter;
5158	struct link_config *lc = &pi->link_cfg;
5159	int rc;
5160
5161	if (req->newptr == NULL) {
5162		struct sbuf *sb;
5163		static char *bits = "\20\1PAUSE_RX\2PAUSE_TX";
5164
5165		rc = sysctl_wire_old_buffer(req, 0);
5166		if (rc != 0)
5167			return(rc);
5168
5169		sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5170		if (sb == NULL)
5171			return (ENOMEM);
5172
5173		sbuf_printf(sb, "%b", lc->fc & (PAUSE_TX | PAUSE_RX), bits);
5174		rc = sbuf_finish(sb);
5175		sbuf_delete(sb);
5176	} else {
5177		char s[2];
5178		int n;
5179
5180		s[0] = '0' + (lc->requested_fc & (PAUSE_TX | PAUSE_RX));
5181		s[1] = 0;
5182
5183		rc = sysctl_handle_string(oidp, s, sizeof(s), req);
5184		if (rc != 0)
5185			return(rc);
5186
5187		if (s[1] != 0)
5188			return (EINVAL);
5189		if (s[0] < '0' || s[0] > '9')
5190			return (EINVAL);	/* not a number */
5191		n = s[0] - '0';
5192		if (n & ~(PAUSE_TX | PAUSE_RX))
5193			return (EINVAL);	/* some other bit is set too */
5194
5195		rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
5196		    "t4PAUSE");
5197		if (rc)
5198			return (rc);
5199		if ((lc->requested_fc & (PAUSE_TX | PAUSE_RX)) != n) {
5200			int link_ok = lc->link_ok;
5201
5202			lc->requested_fc &= ~(PAUSE_TX | PAUSE_RX);
5203			lc->requested_fc |= n;
5204			rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc);
5205			lc->link_ok = link_ok;	/* restore */
5206		}
5207		end_synchronized_op(sc, 0);
5208	}
5209
5210	return (rc);
5211}
5212
5213static int
5214sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS)
5215{
5216	struct adapter *sc = arg1;
5217	int reg = arg2;
5218	uint64_t val;
5219
5220	val = t4_read_reg64(sc, reg);
5221
5222	return (sysctl_handle_64(oidp, &val, 0, req));
5223}
5224
5225static int
5226sysctl_temperature(SYSCTL_HANDLER_ARGS)
5227{
5228	struct adapter *sc = arg1;
5229	int rc, t;
5230	uint32_t param, val;
5231
5232	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4temp");
5233	if (rc)
5234		return (rc);
5235	param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
5236	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
5237	    V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_TMP);
5238	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
5239	end_synchronized_op(sc, 0);
5240	if (rc)
5241		return (rc);
5242
5243	/* unknown is returned as 0 but we display -1 in that case */
5244	t = val == 0 ? -1 : val;
5245
5246	rc = sysctl_handle_int(oidp, &t, 0, req);
5247	return (rc);
5248}
5249
5250#ifdef SBUF_DRAIN
5251static int
5252sysctl_cctrl(SYSCTL_HANDLER_ARGS)
5253{
5254	struct adapter *sc = arg1;
5255	struct sbuf *sb;
5256	int rc, i;
5257	uint16_t incr[NMTUS][NCCTRL_WIN];
5258	static const char *dec_fac[] = {
5259		"0.5", "0.5625", "0.625", "0.6875", "0.75", "0.8125", "0.875",
5260		"0.9375"
5261	};
5262
5263	rc = sysctl_wire_old_buffer(req, 0);
5264	if (rc != 0)
5265		return (rc);
5266
5267	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
5268	if (sb == NULL)
5269		return (ENOMEM);
5270
5271	t4_read_cong_tbl(sc, incr);
5272
5273	for (i = 0; i < NCCTRL_WIN; ++i) {
5274		sbuf_printf(sb, "%2d: %4u %4u %4u %4u %4u %4u %4u %4u\n", i,
5275		    incr[0][i], incr[1][i], incr[2][i], incr[3][i], incr[4][i],
5276		    incr[5][i], incr[6][i], incr[7][i]);
5277		sbuf_printf(sb, "%8u %4u %4u %4u %4u %4u %4u %4u %5u %s\n",
5278		    incr[8][i], incr[9][i], incr[10][i], incr[11][i],
5279		    incr[12][i], incr[13][i], incr[14][i], incr[15][i],
5280		    sc->params.a_wnd[i], dec_fac[sc->params.b_wnd[i]]);
5281	}
5282
5283	rc = sbuf_finish(sb);
5284	sbuf_delete(sb);
5285
5286	return (rc);
5287}
5288
5289static const char *qname[CIM_NUM_IBQ + CIM_NUM_OBQ_T5] = {
5290	"TP0", "TP1", "ULP", "SGE0", "SGE1", "NC-SI",	/* ibq's */
5291	"ULP0", "ULP1", "ULP2", "ULP3", "SGE", "NC-SI",	/* obq's */
5292	"SGE0-RX", "SGE1-RX"	/* additional obq's (T5 onwards) */
5293};
5294
5295static int
5296sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS)
5297{
5298	struct adapter *sc = arg1;
5299	struct sbuf *sb;
5300	int rc, i, n, qid = arg2;
5301	uint32_t *buf, *p;
5302	char *qtype;
5303	u_int cim_num_obq = sc->chip_params->cim_num_obq;
5304
5305	KASSERT(qid >= 0 && qid < CIM_NUM_IBQ + cim_num_obq,
5306	    ("%s: bad qid %d\n", __func__, qid));
5307
5308	if (qid < CIM_NUM_IBQ) {
5309		/* inbound queue */
5310		qtype = "IBQ";
5311		n = 4 * CIM_IBQ_SIZE;
5312		buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
5313		rc = t4_read_cim_ibq(sc, qid, buf, n);
5314	} else {
5315		/* outbound queue */
5316		qtype = "OBQ";
5317		qid -= CIM_NUM_IBQ;
5318		n = 4 * cim_num_obq * CIM_OBQ_SIZE;
5319		buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
5320		rc = t4_read_cim_obq(sc, qid, buf, n);
5321	}
5322
5323	if (rc < 0) {
5324		rc = -rc;
5325		goto done;
5326	}
5327	n = rc * sizeof(uint32_t);	/* rc has # of words actually read */
5328
5329	rc = sysctl_wire_old_buffer(req, 0);
5330	if (rc != 0)
5331		goto done;
5332
5333	sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req);
5334	if (sb == NULL) {
5335		rc = ENOMEM;
5336		goto done;
5337	}
5338
5339	sbuf_printf(sb, "%s%d %s", qtype , qid, qname[arg2]);
5340	for (i = 0, p = buf; i < n; i += 16, p += 4)
5341		sbuf_printf(sb, "\n%#06x: %08x %08x %08x %08x", i, p[0], p[1],
5342		    p[2], p[3]);
5343
5344	rc = sbuf_finish(sb);
5345	sbuf_delete(sb);
5346done:
5347	free(buf, M_CXGBE);
5348	return (rc);
5349}
5350
5351static int
5352sysctl_cim_la(SYSCTL_HANDLER_ARGS)
5353{
5354	struct adapter *sc = arg1;
5355	u_int cfg;
5356	struct sbuf *sb;
5357	uint32_t *buf, *p;
5358	int rc;
5359
5360	MPASS(chip_id(sc) <= CHELSIO_T5);
5361
5362	rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg);
5363	if (rc != 0)
5364		return (rc);
5365
5366	rc = sysctl_wire_old_buffer(req, 0);
5367	if (rc != 0)
5368		return (rc);
5369
5370	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
5371	if (sb == NULL)
5372		return (ENOMEM);
5373
5374	buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE,
5375	    M_ZERO | M_WAITOK);
5376
5377	rc = -t4_cim_read_la(sc, buf, NULL);
5378	if (rc != 0)
5379		goto done;
5380
5381	sbuf_printf(sb, "Status   Data      PC%s",
5382	    cfg & F_UPDBGLACAPTPCONLY ? "" :
5383	    "     LS0Stat  LS0Addr             LS0Data");
5384
5385	for (p = buf; p <= &buf[sc->params.cim_la_size - 8]; p += 8) {
5386		if (cfg & F_UPDBGLACAPTPCONLY) {
5387			sbuf_printf(sb, "\n  %02x   %08x %08x", p[5] & 0xff,
5388			    p[6], p[7]);
5389			sbuf_printf(sb, "\n  %02x   %02x%06x %02x%06x",
5390			    (p[3] >> 8) & 0xff, p[3] & 0xff, p[4] >> 8,
5391			    p[4] & 0xff, p[5] >> 8);
5392			sbuf_printf(sb, "\n  %02x   %x%07x %x%07x",
5393			    (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
5394			    p[1] & 0xf, p[2] >> 4);
5395		} else {
5396			sbuf_printf(sb,
5397			    "\n  %02x   %x%07x %x%07x %08x %08x "
5398			    "%08x%08x%08x%08x",
5399			    (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
5400			    p[1] & 0xf, p[2] >> 4, p[2] & 0xf, p[3], p[4], p[5],
5401			    p[6], p[7]);
5402		}
5403	}
5404
5405	rc = sbuf_finish(sb);
5406	sbuf_delete(sb);
5407done:
5408	free(buf, M_CXGBE);
5409	return (rc);
5410}
5411
5412static int
5413sysctl_cim_la_t6(SYSCTL_HANDLER_ARGS)
5414{
5415	struct adapter *sc = arg1;
5416	u_int cfg;
5417	struct sbuf *sb;
5418	uint32_t *buf, *p;
5419	int rc;
5420
5421	MPASS(chip_id(sc) > CHELSIO_T5);
5422
5423	rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg);
5424	if (rc != 0)
5425		return (rc);
5426
5427	rc = sysctl_wire_old_buffer(req, 0);
5428	if (rc != 0)
5429		return (rc);
5430
5431	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
5432	if (sb == NULL)
5433		return (ENOMEM);
5434
5435	buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE,
5436	    M_ZERO | M_WAITOK);
5437
5438	rc = -t4_cim_read_la(sc, buf, NULL);
5439	if (rc != 0)
5440		goto done;
5441
5442	sbuf_printf(sb, "Status   Inst    Data      PC%s",
5443	    cfg & F_UPDBGLACAPTPCONLY ? "" :
5444	    "     LS0Stat  LS0Addr  LS0Data  LS1Stat  LS1Addr  LS1Data");
5445
5446	for (p = buf; p <= &buf[sc->params.cim_la_size - 10]; p += 10) {
5447		if (cfg & F_UPDBGLACAPTPCONLY) {
5448			sbuf_printf(sb, "\n  %02x   %08x %08x %08x",
5449			    p[3] & 0xff, p[2], p[1], p[0]);
5450			sbuf_printf(sb, "\n  %02x   %02x%06x %02x%06x %02x%06x",
5451			    (p[6] >> 8) & 0xff, p[6] & 0xff, p[5] >> 8,
5452			    p[5] & 0xff, p[4] >> 8, p[4] & 0xff, p[3] >> 8);
5453			sbuf_printf(sb, "\n  %02x   %04x%04x %04x%04x %04x%04x",
5454			    (p[9] >> 16) & 0xff, p[9] & 0xffff, p[8] >> 16,
5455			    p[8] & 0xffff, p[7] >> 16, p[7] & 0xffff,
5456			    p[6] >> 16);
5457		} else {
5458			sbuf_printf(sb, "\n  %02x   %04x%04x %04x%04x %04x%04x "
5459			    "%08x %08x %08x %08x %08x %08x",
5460			    (p[9] >> 16) & 0xff,
5461			    p[9] & 0xffff, p[8] >> 16,
5462			    p[8] & 0xffff, p[7] >> 16,
5463			    p[7] & 0xffff, p[6] >> 16,
5464			    p[2], p[1], p[0], p[5], p[4], p[3]);
5465		}
5466	}
5467
5468	rc = sbuf_finish(sb);
5469	sbuf_delete(sb);
5470done:
5471	free(buf, M_CXGBE);
5472	return (rc);
5473}
5474
5475static int
5476sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS)
5477{
5478	struct adapter *sc = arg1;
5479	u_int i;
5480	struct sbuf *sb;
5481	uint32_t *buf, *p;
5482	int rc;
5483
5484	rc = sysctl_wire_old_buffer(req, 0);
5485	if (rc != 0)
5486		return (rc);
5487
5488	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
5489	if (sb == NULL)
5490		return (ENOMEM);
5491
5492	buf = malloc(2 * CIM_MALA_SIZE * 5 * sizeof(uint32_t), M_CXGBE,
5493	    M_ZERO | M_WAITOK);
5494
5495	t4_cim_read_ma_la(sc, buf, buf + 5 * CIM_MALA_SIZE);
5496	p = buf;
5497
5498	for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) {
5499		sbuf_printf(sb, "\n%02x%08x%08x%08x%08x", p[4], p[3], p[2],
5500		    p[1], p[0]);
5501	}
5502
5503	sbuf_printf(sb, "\n\nCnt ID Tag UE       Data       RDY VLD");
5504	for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) {
5505		sbuf_printf(sb, "\n%3u %2u  %x   %u %08x%08x  %u   %u",
5506		    (p[2] >> 10) & 0xff, (p[2] >> 7) & 7,
5507		    (p[2] >> 3) & 0xf, (p[2] >> 2) & 1,
5508		    (p[1] >> 2) | ((p[2] & 3) << 30),
5509		    (p[0] >> 2) | ((p[1] & 3) << 30), (p[0] >> 1) & 1,
5510		    p[0] & 1);
5511	}
5512
5513	rc = sbuf_finish(sb);
5514	sbuf_delete(sb);
5515	free(buf, M_CXGBE);
5516	return (rc);
5517}
5518
5519static int
5520sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS)
5521{
5522	struct adapter *sc = arg1;
5523	u_int i;
5524	struct sbuf *sb;
5525	uint32_t *buf, *p;
5526	int rc;
5527
5528	rc = sysctl_wire_old_buffer(req, 0);
5529	if (rc != 0)
5530		return (rc);
5531
5532	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
5533	if (sb == NULL)
5534		return (ENOMEM);
5535
5536	buf = malloc(2 * CIM_PIFLA_SIZE * 6 * sizeof(uint32_t), M_CXGBE,
5537	    M_ZERO | M_WAITOK);
5538
5539	t4_cim_read_pif_la(sc, buf, buf + 6 * CIM_PIFLA_SIZE, NULL, NULL);
5540	p = buf;
5541
5542	sbuf_printf(sb, "Cntl ID DataBE   Addr                 Data");
5543	for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) {
5544		sbuf_printf(sb, "\n %02x  %02x  %04x  %08x %08x%08x%08x%08x",
5545		    (p[5] >> 22) & 0xff, (p[5] >> 16) & 0x3f, p[5] & 0xffff,
5546		    p[4], p[3], p[2], p[1], p[0]);
5547	}
5548
5549	sbuf_printf(sb, "\n\nCntl ID               Data");
5550	for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) {
5551		sbuf_printf(sb, "\n %02x  %02x %08x%08x%08x%08x",
5552		    (p[4] >> 6) & 0xff, p[4] & 0x3f, p[3], p[2], p[1], p[0]);
5553	}
5554
5555	rc = sbuf_finish(sb);
5556	sbuf_delete(sb);
5557	free(buf, M_CXGBE);
5558	return (rc);
5559}
5560
5561static int
5562sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS)
5563{
5564	struct adapter *sc = arg1;
5565	struct sbuf *sb;
5566	int rc, i;
5567	uint16_t base[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
5568	uint16_t size[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
5569	uint16_t thres[CIM_NUM_IBQ];
5570	uint32_t obq_wr[2 * CIM_NUM_OBQ_T5], *wr = obq_wr;
5571	uint32_t stat[4 * (CIM_NUM_IBQ + CIM_NUM_OBQ_T5)], *p = stat;
5572	u_int cim_num_obq, ibq_rdaddr, obq_rdaddr, nq;
5573
5574	cim_num_obq = sc->chip_params->cim_num_obq;
5575	if (is_t4(sc)) {
5576		ibq_rdaddr = A_UP_IBQ_0_RDADDR;
5577		obq_rdaddr = A_UP_OBQ_0_REALADDR;
5578	} else {
5579		ibq_rdaddr = A_UP_IBQ_0_SHADOW_RDADDR;
5580		obq_rdaddr = A_UP_OBQ_0_SHADOW_REALADDR;
5581	}
5582	nq = CIM_NUM_IBQ + cim_num_obq;
5583
5584	rc = -t4_cim_read(sc, ibq_rdaddr, 4 * nq, stat);
5585	if (rc == 0)
5586		rc = -t4_cim_read(sc, obq_rdaddr, 2 * cim_num_obq, obq_wr);
5587	if (rc != 0)
5588		return (rc);
5589
5590	t4_read_cimq_cfg(sc, base, size, thres);
5591
5592	rc = sysctl_wire_old_buffer(req, 0);
5593	if (rc != 0)
5594		return (rc);
5595
5596	sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req);
5597	if (sb == NULL)
5598		return (ENOMEM);
5599
5600	sbuf_printf(sb, "Queue  Base  Size Thres RdPtr WrPtr  SOP  EOP Avail");
5601
5602	for (i = 0; i < CIM_NUM_IBQ; i++, p += 4)
5603		sbuf_printf(sb, "\n%7s %5x %5u %5u %6x  %4x %4u %4u %5u",
5604		    qname[i], base[i], size[i], thres[i], G_IBQRDADDR(p[0]),
5605		    G_IBQWRADDR(p[1]), G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
5606		    G_QUEREMFLITS(p[2]) * 16);
5607	for ( ; i < nq; i++, p += 4, wr += 2)
5608		sbuf_printf(sb, "\n%7s %5x %5u %12x  %4x %4u %4u %5u", qname[i],
5609		    base[i], size[i], G_QUERDADDR(p[0]) & 0x3fff,
5610		    wr[0] - base[i], G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
5611		    G_QUEREMFLITS(p[2]) * 16);
5612
5613	rc = sbuf_finish(sb);
5614	sbuf_delete(sb);
5615
5616	return (rc);
5617}
5618
5619static int
5620sysctl_cpl_stats(SYSCTL_HANDLER_ARGS)
5621{
5622	struct adapter *sc = arg1;
5623	struct sbuf *sb;
5624	int rc;
5625	struct tp_cpl_stats stats;
5626
5627	rc = sysctl_wire_old_buffer(req, 0);
5628	if (rc != 0)
5629		return (rc);
5630
5631	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
5632	if (sb == NULL)
5633		return (ENOMEM);
5634
5635	mtx_lock(&sc->regwin_lock);
5636	t4_tp_get_cpl_stats(sc, &stats);
5637	mtx_unlock(&sc->regwin_lock);
5638
5639	if (sc->chip_params->nchan > 2) {
5640		sbuf_printf(sb, "                 channel 0  channel 1"
5641		    "  channel 2  channel 3");
5642		sbuf_printf(sb, "\nCPL requests:   %10u %10u %10u %10u",
5643		    stats.req[0], stats.req[1], stats.req[2], stats.req[3]);
5644		sbuf_printf(sb, "\nCPL responses:   %10u %10u %10u %10u",
5645		    stats.rsp[0], stats.rsp[1], stats.rsp[2], stats.rsp[3]);
5646	} else {
5647		sbuf_printf(sb, "                 channel 0  channel 1");
5648		sbuf_printf(sb, "\nCPL requests:   %10u %10u",
5649		    stats.req[0], stats.req[1]);
5650		sbuf_printf(sb, "\nCPL responses:   %10u %10u",
5651		    stats.rsp[0], stats.rsp[1]);
5652	}
5653
5654	rc = sbuf_finish(sb);
5655	sbuf_delete(sb);
5656
5657	return (rc);
5658}
5659
5660static int
5661sysctl_ddp_stats(SYSCTL_HANDLER_ARGS)
5662{
5663	struct adapter *sc = arg1;
5664	struct sbuf *sb;
5665	int rc;
5666	struct tp_usm_stats stats;
5667
5668	rc = sysctl_wire_old_buffer(req, 0);
5669	if (rc != 0)
5670		return(rc);
5671
5672	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
5673	if (sb == NULL)
5674		return (ENOMEM);
5675
5676	t4_get_usm_stats(sc, &stats);
5677
5678	sbuf_printf(sb, "Frames: %u\n", stats.frames);
5679	sbuf_printf(sb, "Octets: %ju\n", stats.octets);
5680	sbuf_printf(sb, "Drops:  %u", stats.drops);
5681
5682	rc = sbuf_finish(sb);
5683	sbuf_delete(sb);
5684
5685	return (rc);
5686}
5687
5688const char *devlog_level_strings[] = {
5689	[FW_DEVLOG_LEVEL_EMERG]		= "EMERG",
5690	[FW_DEVLOG_LEVEL_CRIT]		= "CRIT",
5691	[FW_DEVLOG_LEVEL_ERR]		= "ERR",
5692	[FW_DEVLOG_LEVEL_NOTICE]	= "NOTICE",
5693	[FW_DEVLOG_LEVEL_INFO]		= "INFO",
5694	[FW_DEVLOG_LEVEL_DEBUG]		= "DEBUG"
5695};
5696
5697const char *devlog_facility_strings[] = {
5698	[FW_DEVLOG_FACILITY_CORE]	= "CORE",
5699	[FW_DEVLOG_FACILITY_CF]		= "CF",
5700	[FW_DEVLOG_FACILITY_SCHED]	= "SCHED",
5701	[FW_DEVLOG_FACILITY_TIMER]	= "TIMER",
5702	[FW_DEVLOG_FACILITY_RES]	= "RES",
5703	[FW_DEVLOG_FACILITY_HW]		= "HW",
5704	[FW_DEVLOG_FACILITY_FLR]	= "FLR",
5705	[FW_DEVLOG_FACILITY_DMAQ]	= "DMAQ",
5706	[FW_DEVLOG_FACILITY_PHY]	= "PHY",
5707	[FW_DEVLOG_FACILITY_MAC]	= "MAC",
5708	[FW_DEVLOG_FACILITY_PORT]	= "PORT",
5709	[FW_DEVLOG_FACILITY_VI]		= "VI",
5710	[FW_DEVLOG_FACILITY_FILTER]	= "FILTER",
5711	[FW_DEVLOG_FACILITY_ACL]	= "ACL",
5712	[FW_DEVLOG_FACILITY_TM]		= "TM",
5713	[FW_DEVLOG_FACILITY_QFC]	= "QFC",
5714	[FW_DEVLOG_FACILITY_DCB]	= "DCB",
5715	[FW_DEVLOG_FACILITY_ETH]	= "ETH",
5716	[FW_DEVLOG_FACILITY_OFLD]	= "OFLD",
5717	[FW_DEVLOG_FACILITY_RI]		= "RI",
5718	[FW_DEVLOG_FACILITY_ISCSI]	= "ISCSI",
5719	[FW_DEVLOG_FACILITY_FCOE]	= "FCOE",
5720	[FW_DEVLOG_FACILITY_FOISCSI]	= "FOISCSI",
5721	[FW_DEVLOG_FACILITY_FOFCOE]	= "FOFCOE"
5722};
5723
5724static int
5725sysctl_devlog(SYSCTL_HANDLER_ARGS)
5726{
5727	struct adapter *sc = arg1;
5728	struct devlog_params *dparams = &sc->params.devlog;
5729	struct fw_devlog_e *buf, *e;
5730	int i, j, rc, nentries, first = 0, m;
5731	struct sbuf *sb;
5732	uint64_t ftstamp = UINT64_MAX;
5733
5734	if (dparams->start == 0) {
5735		dparams->memtype = FW_MEMTYPE_EDC0;
5736		dparams->start = 0x84000;
5737		dparams->size = 32768;
5738	}
5739
5740	nentries = dparams->size / sizeof(struct fw_devlog_e);
5741
5742	buf = malloc(dparams->size, M_CXGBE, M_NOWAIT);
5743	if (buf == NULL)
5744		return (ENOMEM);
5745
5746	m = fwmtype_to_hwmtype(dparams->memtype);
5747	rc = -t4_mem_read(sc, m, dparams->start, dparams->size, (void *)buf);
5748	if (rc != 0)
5749		goto done;
5750
5751	for (i = 0; i < nentries; i++) {
5752		e = &buf[i];
5753
5754		if (e->timestamp == 0)
5755			break;	/* end */
5756
5757		e->timestamp = be64toh(e->timestamp);
5758		e->seqno = be32toh(e->seqno);
5759		for (j = 0; j < 8; j++)
5760			e->params[j] = be32toh(e->params[j]);
5761
5762		if (e->timestamp < ftstamp) {
5763			ftstamp = e->timestamp;
5764			first = i;
5765		}
5766	}
5767
5768	if (buf[first].timestamp == 0)
5769		goto done;	/* nothing in the log */
5770
5771	rc = sysctl_wire_old_buffer(req, 0);
5772	if (rc != 0)
5773		goto done;
5774
5775	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
5776	if (sb == NULL) {
5777		rc = ENOMEM;
5778		goto done;
5779	}
5780	sbuf_printf(sb, "%10s  %15s  %8s  %8s  %s\n",
5781	    "Seq#", "Tstamp", "Level", "Facility", "Message");
5782
5783	i = first;
5784	do {
5785		e = &buf[i];
5786		if (e->timestamp == 0)
5787			break;	/* end */
5788
5789		sbuf_printf(sb, "%10d  %15ju  %8s  %8s  ",
5790		    e->seqno, e->timestamp,
5791		    (e->level < nitems(devlog_level_strings) ?
5792			devlog_level_strings[e->level] : "UNKNOWN"),
5793		    (e->facility < nitems(devlog_facility_strings) ?
5794			devlog_facility_strings[e->facility] : "UNKNOWN"));
5795		sbuf_printf(sb, e->fmt, e->params[0], e->params[1],
5796		    e->params[2], e->params[3], e->params[4],
5797		    e->params[5], e->params[6], e->params[7]);
5798
5799		if (++i == nentries)
5800			i = 0;
5801	} while (i != first);
5802
5803	rc = sbuf_finish(sb);
5804	sbuf_delete(sb);
5805done:
5806	free(buf, M_CXGBE);
5807	return (rc);
5808}
5809
5810static int
5811sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS)
5812{
5813	struct adapter *sc = arg1;
5814	struct sbuf *sb;
5815	int rc;
5816	struct tp_fcoe_stats stats[MAX_NCHAN];
5817	int i, nchan = sc->chip_params->nchan;
5818
5819	rc = sysctl_wire_old_buffer(req, 0);
5820	if (rc != 0)
5821		return (rc);
5822
5823	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
5824	if (sb == NULL)
5825		return (ENOMEM);
5826
5827	for (i = 0; i < nchan; i++)
5828		t4_get_fcoe_stats(sc, i, &stats[i]);
5829
5830	if (nchan > 2) {
5831		sbuf_printf(sb, "                   channel 0        channel 1"
5832		    "        channel 2        channel 3");
5833		sbuf_printf(sb, "\noctetsDDP:  %16ju %16ju %16ju %16ju",
5834		    stats[0].octets_ddp, stats[1].octets_ddp,
5835		    stats[2].octets_ddp, stats[3].octets_ddp);
5836		sbuf_printf(sb, "\nframesDDP:  %16u %16u %16u %16u",
5837		    stats[0].frames_ddp, stats[1].frames_ddp,
5838		    stats[2].frames_ddp, stats[3].frames_ddp);
5839		sbuf_printf(sb, "\nframesDrop: %16u %16u %16u %16u",
5840		    stats[0].frames_drop, stats[1].frames_drop,
5841		    stats[2].frames_drop, stats[3].frames_drop);
5842	} else {
5843		sbuf_printf(sb, "                   channel 0        channel 1");
5844		sbuf_printf(sb, "\noctetsDDP:  %16ju %16ju",
5845		    stats[0].octets_ddp, stats[1].octets_ddp);
5846		sbuf_printf(sb, "\nframesDDP:  %16u %16u",
5847		    stats[0].frames_ddp, stats[1].frames_ddp);
5848		sbuf_printf(sb, "\nframesDrop: %16u %16u",
5849		    stats[0].frames_drop, stats[1].frames_drop);
5850	}
5851
5852	rc = sbuf_finish(sb);
5853	sbuf_delete(sb);
5854
5855	return (rc);
5856}
5857
5858static int
5859sysctl_hw_sched(SYSCTL_HANDLER_ARGS)
5860{
5861	struct adapter *sc = arg1;
5862	struct sbuf *sb;
5863	int rc, i;
5864	unsigned int map, kbps, ipg, mode;
5865	unsigned int pace_tab[NTX_SCHED];
5866
5867	rc = sysctl_wire_old_buffer(req, 0);
5868	if (rc != 0)
5869		return (rc);
5870
5871	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
5872	if (sb == NULL)
5873		return (ENOMEM);
5874
5875	map = t4_read_reg(sc, A_TP_TX_MOD_QUEUE_REQ_MAP);
5876	mode = G_TIMERMODE(t4_read_reg(sc, A_TP_MOD_CONFIG));
5877	t4_read_pace_tbl(sc, pace_tab);
5878
5879	sbuf_printf(sb, "Scheduler  Mode   Channel  Rate (Kbps)   "
5880	    "Class IPG (0.1 ns)   Flow IPG (us)");
5881
5882	for (i = 0; i < NTX_SCHED; ++i, map >>= 2) {
5883		t4_get_tx_sched(sc, i, &kbps, &ipg);
5884		sbuf_printf(sb, "\n    %u      %-5s     %u     ", i,
5885		    (mode & (1 << i)) ? "flow" : "class", map & 3);
5886		if (kbps)
5887			sbuf_printf(sb, "%9u     ", kbps);
5888		else
5889			sbuf_printf(sb, " disabled     ");
5890
5891		if (ipg)
5892			sbuf_printf(sb, "%13u        ", ipg);
5893		else
5894			sbuf_printf(sb, "     disabled        ");
5895
5896		if (pace_tab[i])
5897			sbuf_printf(sb, "%10u", pace_tab[i]);
5898		else
5899			sbuf_printf(sb, "  disabled");
5900	}
5901
5902	rc = sbuf_finish(sb);
5903	sbuf_delete(sb);
5904
5905	return (rc);
5906}
5907
5908static int
5909sysctl_lb_stats(SYSCTL_HANDLER_ARGS)
5910{
5911	struct adapter *sc = arg1;
5912	struct sbuf *sb;
5913	int rc, i, j;
5914	uint64_t *p0, *p1;
5915	struct lb_port_stats s[2];
5916	static const char *stat_name[] = {
5917		"OctetsOK:", "FramesOK:", "BcastFrames:", "McastFrames:",
5918		"UcastFrames:", "ErrorFrames:", "Frames64:", "Frames65To127:",
5919		"Frames128To255:", "Frames256To511:", "Frames512To1023:",
5920		"Frames1024To1518:", "Frames1519ToMax:", "FramesDropped:",
5921		"BG0FramesDropped:", "BG1FramesDropped:", "BG2FramesDropped:",
5922		"BG3FramesDropped:", "BG0FramesTrunc:", "BG1FramesTrunc:",
5923		"BG2FramesTrunc:", "BG3FramesTrunc:"
5924	};
5925
5926	rc = sysctl_wire_old_buffer(req, 0);
5927	if (rc != 0)
5928		return (rc);
5929
5930	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
5931	if (sb == NULL)
5932		return (ENOMEM);
5933
5934	memset(s, 0, sizeof(s));
5935
5936	for (i = 0; i < sc->chip_params->nchan; i += 2) {
5937		t4_get_lb_stats(sc, i, &s[0]);
5938		t4_get_lb_stats(sc, i + 1, &s[1]);
5939
5940		p0 = &s[0].octets;
5941		p1 = &s[1].octets;
5942		sbuf_printf(sb, "%s                       Loopback %u"
5943		    "           Loopback %u", i == 0 ? "" : "\n", i, i + 1);
5944
5945		for (j = 0; j < nitems(stat_name); j++)
5946			sbuf_printf(sb, "\n%-17s %20ju %20ju", stat_name[j],
5947				   *p0++, *p1++);
5948	}
5949
5950	rc = sbuf_finish(sb);
5951	sbuf_delete(sb);
5952
5953	return (rc);
5954}
5955
5956static int
5957sysctl_linkdnrc(SYSCTL_HANDLER_ARGS)
5958{
5959	int rc = 0;
5960	struct port_info *pi = arg1;
5961	struct sbuf *sb;
5962
5963	rc = sysctl_wire_old_buffer(req, 0);
5964	if (rc != 0)
5965		return(rc);
5966	sb = sbuf_new_for_sysctl(NULL, NULL, 64, req);
5967	if (sb == NULL)
5968		return (ENOMEM);
5969
5970	if (pi->linkdnrc < 0)
5971		sbuf_printf(sb, "n/a");
5972	else
5973		sbuf_printf(sb, "%s", t4_link_down_rc_str(pi->linkdnrc));
5974
5975	rc = sbuf_finish(sb);
5976	sbuf_delete(sb);
5977
5978	return (rc);
5979}
5980
5981struct mem_desc {
5982	unsigned int base;
5983	unsigned int limit;
5984	unsigned int idx;
5985};
5986
5987static int
5988mem_desc_cmp(const void *a, const void *b)
5989{
5990	return ((const struct mem_desc *)a)->base -
5991	       ((const struct mem_desc *)b)->base;
5992}
5993
5994static void
5995mem_region_show(struct sbuf *sb, const char *name, unsigned int from,
5996    unsigned int to)
5997{
5998	unsigned int size;
5999
6000	size = to - from + 1;
6001	if (size == 0)
6002		return;
6003
6004	/* XXX: need humanize_number(3) in libkern for a more readable 'size' */
6005	sbuf_printf(sb, "%-15s %#x-%#x [%u]\n", name, from, to, size);
6006}
6007
6008static int
6009sysctl_meminfo(SYSCTL_HANDLER_ARGS)
6010{
6011	struct adapter *sc = arg1;
6012	struct sbuf *sb;
6013	int rc, i, n;
6014	uint32_t lo, hi, used, alloc;
6015	static const char *memory[] = {"EDC0:", "EDC1:", "MC:", "MC0:", "MC1:"};
6016	static const char *region[] = {
6017		"DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:",
6018		"Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:",
6019		"Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:",
6020		"TDDP region:", "TPT region:", "STAG region:", "RQ region:",
6021		"RQUDP region:", "PBL region:", "TXPBL region:",
6022		"DBVFIFO region:", "ULPRX state:", "ULPTX state:",
6023		"On-chip queues:"
6024	};
6025	struct mem_desc avail[4];
6026	struct mem_desc mem[nitems(region) + 3];	/* up to 3 holes */
6027	struct mem_desc *md = mem;
6028
6029	rc = sysctl_wire_old_buffer(req, 0);
6030	if (rc != 0)
6031		return (rc);
6032
6033	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
6034	if (sb == NULL)
6035		return (ENOMEM);
6036
6037	for (i = 0; i < nitems(mem); i++) {
6038		mem[i].limit = 0;
6039		mem[i].idx = i;
6040	}
6041
6042	/* Find and sort the populated memory ranges */
6043	i = 0;
6044	lo = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
6045	if (lo & F_EDRAM0_ENABLE) {
6046		hi = t4_read_reg(sc, A_MA_EDRAM0_BAR);
6047		avail[i].base = G_EDRAM0_BASE(hi) << 20;
6048		avail[i].limit = avail[i].base + (G_EDRAM0_SIZE(hi) << 20);
6049		avail[i].idx = 0;
6050		i++;
6051	}
6052	if (lo & F_EDRAM1_ENABLE) {
6053		hi = t4_read_reg(sc, A_MA_EDRAM1_BAR);
6054		avail[i].base = G_EDRAM1_BASE(hi) << 20;
6055		avail[i].limit = avail[i].base + (G_EDRAM1_SIZE(hi) << 20);
6056		avail[i].idx = 1;
6057		i++;
6058	}
6059	if (lo & F_EXT_MEM_ENABLE) {
6060		hi = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
6061		avail[i].base = G_EXT_MEM_BASE(hi) << 20;
6062		avail[i].limit = avail[i].base +
6063		    (G_EXT_MEM_SIZE(hi) << 20);
6064		avail[i].idx = is_t5(sc) ? 3 : 2;	/* Call it MC0 for T5 */
6065		i++;
6066	}
6067	if (is_t5(sc) && lo & F_EXT_MEM1_ENABLE) {
6068		hi = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
6069		avail[i].base = G_EXT_MEM1_BASE(hi) << 20;
6070		avail[i].limit = avail[i].base +
6071		    (G_EXT_MEM1_SIZE(hi) << 20);
6072		avail[i].idx = 4;
6073		i++;
6074	}
6075	if (!i)                                    /* no memory available */
6076		return 0;
6077	qsort(avail, i, sizeof(struct mem_desc), mem_desc_cmp);
6078
6079	(md++)->base = t4_read_reg(sc, A_SGE_DBQ_CTXT_BADDR);
6080	(md++)->base = t4_read_reg(sc, A_SGE_IMSG_CTXT_BADDR);
6081	(md++)->base = t4_read_reg(sc, A_SGE_FLM_CACHE_BADDR);
6082	(md++)->base = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
6083	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_BASE);
6084	(md++)->base = t4_read_reg(sc, A_TP_CMM_TIMER_BASE);
6085	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_RX_FLST_BASE);
6086	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_TX_FLST_BASE);
6087	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_PS_FLST_BASE);
6088
6089	/* the next few have explicit upper bounds */
6090	md->base = t4_read_reg(sc, A_TP_PMM_TX_BASE);
6091	md->limit = md->base - 1 +
6092		    t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE) *
6093		    G_PMTXMAXPAGE(t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE));
6094	md++;
6095
6096	md->base = t4_read_reg(sc, A_TP_PMM_RX_BASE);
6097	md->limit = md->base - 1 +
6098		    t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) *
6099		    G_PMRXMAXPAGE(t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE));
6100	md++;
6101
6102	if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
6103		if (chip_id(sc) <= CHELSIO_T5) {
6104			hi = t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4;
6105			md->base = t4_read_reg(sc, A_LE_DB_HASH_TID_BASE);
6106		} else {
6107			hi = t4_read_reg(sc, A_LE_DB_HASH_TID_BASE);
6108			md->base = t4_read_reg(sc, A_LE_DB_HASH_TBL_BASE_ADDR);
6109		}
6110		md->limit = 0;
6111	} else {
6112		md->base = 0;
6113		md->idx = nitems(region);  /* hide it */
6114	}
6115	md++;
6116
6117#define ulp_region(reg) \
6118	md->base = t4_read_reg(sc, A_ULP_ ## reg ## _LLIMIT);\
6119	(md++)->limit = t4_read_reg(sc, A_ULP_ ## reg ## _ULIMIT)
6120
6121	ulp_region(RX_ISCSI);
6122	ulp_region(RX_TDDP);
6123	ulp_region(TX_TPT);
6124	ulp_region(RX_STAG);
6125	ulp_region(RX_RQ);
6126	ulp_region(RX_RQUDP);
6127	ulp_region(RX_PBL);
6128	ulp_region(TX_PBL);
6129#undef ulp_region
6130
6131	md->base = 0;
6132	md->idx = nitems(region);
6133	if (!is_t4(sc)) {
6134		uint32_t size = 0;
6135		uint32_t sge_ctrl = t4_read_reg(sc, A_SGE_CONTROL2);
6136		uint32_t fifo_size = t4_read_reg(sc, A_SGE_DBVFIFO_SIZE);
6137
6138		if (is_t5(sc)) {
6139			if (sge_ctrl & F_VFIFO_ENABLE)
6140				size = G_DBVFIFO_SIZE(fifo_size);
6141		} else
6142			size = G_T6_DBVFIFO_SIZE(fifo_size);
6143
6144		if (size) {
6145			md->base = G_BASEADDR(t4_read_reg(sc,
6146			    A_SGE_DBVFIFO_BADDR));
6147			md->limit = md->base + (size << 2) - 1;
6148		}
6149	}
6150	md++;
6151
6152	md->base = t4_read_reg(sc, A_ULP_RX_CTX_BASE);
6153	md->limit = 0;
6154	md++;
6155	md->base = t4_read_reg(sc, A_ULP_TX_ERR_TABLE_BASE);
6156	md->limit = 0;
6157	md++;
6158
6159	md->base = sc->vres.ocq.start;
6160	if (sc->vres.ocq.size)
6161		md->limit = md->base + sc->vres.ocq.size - 1;
6162	else
6163		md->idx = nitems(region);  /* hide it */
6164	md++;
6165
6166	/* add any address-space holes, there can be up to 3 */
6167	for (n = 0; n < i - 1; n++)
6168		if (avail[n].limit < avail[n + 1].base)
6169			(md++)->base = avail[n].limit;
6170	if (avail[n].limit)
6171		(md++)->base = avail[n].limit;
6172
6173	n = md - mem;
6174	qsort(mem, n, sizeof(struct mem_desc), mem_desc_cmp);
6175
6176	for (lo = 0; lo < i; lo++)
6177		mem_region_show(sb, memory[avail[lo].idx], avail[lo].base,
6178				avail[lo].limit - 1);
6179
6180	sbuf_printf(sb, "\n");
6181	for (i = 0; i < n; i++) {
6182		if (mem[i].idx >= nitems(region))
6183			continue;                        /* skip holes */
6184		if (!mem[i].limit)
6185			mem[i].limit = i < n - 1 ? mem[i + 1].base - 1 : ~0;
6186		mem_region_show(sb, region[mem[i].idx], mem[i].base,
6187				mem[i].limit);
6188	}
6189
6190	sbuf_printf(sb, "\n");
6191	lo = t4_read_reg(sc, A_CIM_SDRAM_BASE_ADDR);
6192	hi = t4_read_reg(sc, A_CIM_SDRAM_ADDR_SIZE) + lo - 1;
6193	mem_region_show(sb, "uP RAM:", lo, hi);
6194
6195	lo = t4_read_reg(sc, A_CIM_EXTMEM2_BASE_ADDR);
6196	hi = t4_read_reg(sc, A_CIM_EXTMEM2_ADDR_SIZE) + lo - 1;
6197	mem_region_show(sb, "uP Extmem2:", lo, hi);
6198
6199	lo = t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE);
6200	sbuf_printf(sb, "\n%u Rx pages of size %uKiB for %u channels\n",
6201		   G_PMRXMAXPAGE(lo),
6202		   t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) >> 10,
6203		   (lo & F_PMRXNUMCHN) ? 2 : 1);
6204
6205	lo = t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE);
6206	hi = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE);
6207	sbuf_printf(sb, "%u Tx pages of size %u%ciB for %u channels\n",
6208		   G_PMTXMAXPAGE(lo),
6209		   hi >= (1 << 20) ? (hi >> 20) : (hi >> 10),
6210		   hi >= (1 << 20) ? 'M' : 'K', 1 << G_PMTXNUMCHN(lo));
6211	sbuf_printf(sb, "%u p-structs\n",
6212		   t4_read_reg(sc, A_TP_CMM_MM_MAX_PSTRUCT));
6213
6214	for (i = 0; i < 4; i++) {
6215		if (chip_id(sc) > CHELSIO_T5)
6216			lo = t4_read_reg(sc, A_MPS_RX_MAC_BG_PG_CNT0 + i * 4);
6217		else
6218			lo = t4_read_reg(sc, A_MPS_RX_PG_RSV0 + i * 4);
6219		if (is_t5(sc)) {
6220			used = G_T5_USED(lo);
6221			alloc = G_T5_ALLOC(lo);
6222		} else {
6223			used = G_USED(lo);
6224			alloc = G_ALLOC(lo);
6225		}
6226		/* For T6 these are MAC buffer groups */
6227		sbuf_printf(sb, "\nPort %d using %u pages out of %u allocated",
6228		    i, used, alloc);
6229	}
6230	for (i = 0; i < sc->chip_params->nchan; i++) {
6231		if (chip_id(sc) > CHELSIO_T5)
6232			lo = t4_read_reg(sc, A_MPS_RX_LPBK_BG_PG_CNT0 + i * 4);
6233		else
6234			lo = t4_read_reg(sc, A_MPS_RX_PG_RSV4 + i * 4);
6235		if (is_t5(sc)) {
6236			used = G_T5_USED(lo);
6237			alloc = G_T5_ALLOC(lo);
6238		} else {
6239			used = G_USED(lo);
6240			alloc = G_ALLOC(lo);
6241		}
6242		/* For T6 these are MAC buffer groups */
6243		sbuf_printf(sb,
6244		    "\nLoopback %d using %u pages out of %u allocated",
6245		    i, used, alloc);
6246	}
6247
6248	rc = sbuf_finish(sb);
6249	sbuf_delete(sb);
6250
6251	return (rc);
6252}
6253
6254static inline void
6255tcamxy2valmask(uint64_t x, uint64_t y, uint8_t *addr, uint64_t *mask)
6256{
6257	*mask = x | y;
6258	y = htobe64(y);
6259	memcpy(addr, (char *)&y + 2, ETHER_ADDR_LEN);
6260}
6261
6262static int
6263sysctl_mps_tcam(SYSCTL_HANDLER_ARGS)
6264{
6265	struct adapter *sc = arg1;
6266	struct sbuf *sb;
6267	int rc, i;
6268
6269	MPASS(chip_id(sc) <= CHELSIO_T5);
6270
6271	rc = sysctl_wire_old_buffer(req, 0);
6272	if (rc != 0)
6273		return (rc);
6274
6275	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
6276	if (sb == NULL)
6277		return (ENOMEM);
6278
6279	sbuf_printf(sb,
6280	    "Idx  Ethernet address     Mask     Vld Ports PF"
6281	    "  VF              Replication             P0 P1 P2 P3  ML");
6282	for (i = 0; i < sc->chip_params->mps_tcam_size; i++) {
6283		uint64_t tcamx, tcamy, mask;
6284		uint32_t cls_lo, cls_hi;
6285		uint8_t addr[ETHER_ADDR_LEN];
6286
6287		tcamy = t4_read_reg64(sc, MPS_CLS_TCAM_Y_L(i));
6288		tcamx = t4_read_reg64(sc, MPS_CLS_TCAM_X_L(i));
6289		if (tcamx & tcamy)
6290			continue;
6291		tcamxy2valmask(tcamx, tcamy, addr, &mask);
6292		cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i));
6293		cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i));
6294		sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x %012jx"
6295			   "  %c   %#x%4u%4d", i, addr[0], addr[1], addr[2],
6296			   addr[3], addr[4], addr[5], (uintmax_t)mask,
6297			   (cls_lo & F_SRAM_VLD) ? 'Y' : 'N',
6298			   G_PORTMAP(cls_hi), G_PF(cls_lo),
6299			   (cls_lo & F_VF_VALID) ? G_VF(cls_lo) : -1);
6300
6301		if (cls_lo & F_REPLICATE) {
6302			struct fw_ldst_cmd ldst_cmd;
6303
6304			memset(&ldst_cmd, 0, sizeof(ldst_cmd));
6305			ldst_cmd.op_to_addrspace =
6306			    htobe32(V_FW_CMD_OP(FW_LDST_CMD) |
6307				F_FW_CMD_REQUEST | F_FW_CMD_READ |
6308				V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS));
6309			ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd));
6310			ldst_cmd.u.mps.rplc.fid_idx =
6311			    htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) |
6312				V_FW_LDST_CMD_IDX(i));
6313
6314			rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
6315			    "t4mps");
6316			if (rc)
6317				break;
6318			rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd,
6319			    sizeof(ldst_cmd), &ldst_cmd);
6320			end_synchronized_op(sc, 0);
6321
6322			if (rc != 0) {
6323				sbuf_printf(sb, "%36d", rc);
6324				rc = 0;
6325			} else {
6326				sbuf_printf(sb, " %08x %08x %08x %08x",
6327				    be32toh(ldst_cmd.u.mps.rplc.rplc127_96),
6328				    be32toh(ldst_cmd.u.mps.rplc.rplc95_64),
6329				    be32toh(ldst_cmd.u.mps.rplc.rplc63_32),
6330				    be32toh(ldst_cmd.u.mps.rplc.rplc31_0));
6331			}
6332		} else
6333			sbuf_printf(sb, "%36s", "");
6334
6335		sbuf_printf(sb, "%4u%3u%3u%3u %#3x", G_SRAM_PRIO0(cls_lo),
6336		    G_SRAM_PRIO1(cls_lo), G_SRAM_PRIO2(cls_lo),
6337		    G_SRAM_PRIO3(cls_lo), (cls_lo >> S_MULTILISTEN0) & 0xf);
6338	}
6339
6340	if (rc)
6341		(void) sbuf_finish(sb);
6342	else
6343		rc = sbuf_finish(sb);
6344	sbuf_delete(sb);
6345
6346	return (rc);
6347}
6348
6349static int
6350sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS)
6351{
6352	struct adapter *sc = arg1;
6353	struct sbuf *sb;
6354	int rc, i;
6355
6356	MPASS(chip_id(sc) > CHELSIO_T5);
6357
6358	rc = sysctl_wire_old_buffer(req, 0);
6359	if (rc != 0)
6360		return (rc);
6361
6362	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
6363	if (sb == NULL)
6364		return (ENOMEM);
6365
6366	sbuf_printf(sb, "Idx  Ethernet address     Mask       VNI   Mask"
6367	    "   IVLAN Vld DIP_Hit   Lookup  Port Vld Ports PF  VF"
6368	    "                           Replication"
6369	    "                                    P0 P1 P2 P3  ML\n");
6370
6371	for (i = 0; i < sc->chip_params->mps_tcam_size; i++) {
6372		uint8_t dip_hit, vlan_vld, lookup_type, port_num;
6373		uint16_t ivlan;
6374		uint64_t tcamx, tcamy, val, mask;
6375		uint32_t cls_lo, cls_hi, ctl, data2, vnix, vniy;
6376		uint8_t addr[ETHER_ADDR_LEN];
6377
6378		ctl = V_CTLREQID(1) | V_CTLCMDTYPE(0) | V_CTLXYBITSEL(0);
6379		if (i < 256)
6380			ctl |= V_CTLTCAMINDEX(i) | V_CTLTCAMSEL(0);
6381		else
6382			ctl |= V_CTLTCAMINDEX(i - 256) | V_CTLTCAMSEL(1);
6383		t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl);
6384		val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1);
6385		tcamy = G_DMACH(val) << 32;
6386		tcamy |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1);
6387		data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1);
6388		lookup_type = G_DATALKPTYPE(data2);
6389		port_num = G_DATAPORTNUM(data2);
6390		if (lookup_type && lookup_type != M_DATALKPTYPE) {
6391			/* Inner header VNI */
6392			vniy = ((data2 & F_DATAVIDH2) << 23) |
6393				       (G_DATAVIDH1(data2) << 16) | G_VIDL(val);
6394			dip_hit = data2 & F_DATADIPHIT;
6395			vlan_vld = 0;
6396		} else {
6397			vniy = 0;
6398			dip_hit = 0;
6399			vlan_vld = data2 & F_DATAVIDH2;
6400			ivlan = G_VIDL(val);
6401		}
6402
6403		ctl |= V_CTLXYBITSEL(1);
6404		t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl);
6405		val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1);
6406		tcamx = G_DMACH(val) << 32;
6407		tcamx |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1);
6408		data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1);
6409		if (lookup_type && lookup_type != M_DATALKPTYPE) {
6410			/* Inner header VNI mask */
6411			vnix = ((data2 & F_DATAVIDH2) << 23) |
6412			       (G_DATAVIDH1(data2) << 16) | G_VIDL(val);
6413		} else
6414			vnix = 0;
6415
6416		if (tcamx & tcamy)
6417			continue;
6418		tcamxy2valmask(tcamx, tcamy, addr, &mask);
6419
6420		cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i));
6421		cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i));
6422
6423		if (lookup_type && lookup_type != M_DATALKPTYPE) {
6424			sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x "
6425			    "%012jx %06x %06x    -    -   %3c"
6426			    "      'I'  %4x   %3c   %#x%4u%4d", i, addr[0],
6427			    addr[1], addr[2], addr[3], addr[4], addr[5],
6428			    (uintmax_t)mask, vniy, vnix, dip_hit ? 'Y' : 'N',
6429			    port_num, cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N',
6430			    G_PORTMAP(cls_hi), G_T6_PF(cls_lo),
6431			    cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1);
6432		} else {
6433			sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x "
6434			    "%012jx    -       -   ", i, addr[0], addr[1],
6435			    addr[2], addr[3], addr[4], addr[5],
6436			    (uintmax_t)mask);
6437
6438			if (vlan_vld)
6439				sbuf_printf(sb, "%4u   Y     ", ivlan);
6440			else
6441				sbuf_printf(sb, "  -    N     ");
6442
6443			sbuf_printf(sb, "-      %3c  %4x   %3c   %#x%4u%4d",
6444			    lookup_type ? 'I' : 'O', port_num,
6445			    cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N',
6446			    G_PORTMAP(cls_hi), G_T6_PF(cls_lo),
6447			    cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1);
6448		}
6449
6450
6451		if (cls_lo & F_T6_REPLICATE) {
6452			struct fw_ldst_cmd ldst_cmd;
6453
6454			memset(&ldst_cmd, 0, sizeof(ldst_cmd));
6455			ldst_cmd.op_to_addrspace =
6456			    htobe32(V_FW_CMD_OP(FW_LDST_CMD) |
6457				F_FW_CMD_REQUEST | F_FW_CMD_READ |
6458				V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS));
6459			ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd));
6460			ldst_cmd.u.mps.rplc.fid_idx =
6461			    htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) |
6462				V_FW_LDST_CMD_IDX(i));
6463
6464			rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
6465			    "t6mps");
6466			if (rc)
6467				break;
6468			rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd,
6469			    sizeof(ldst_cmd), &ldst_cmd);
6470			end_synchronized_op(sc, 0);
6471
6472			if (rc != 0) {
6473				sbuf_printf(sb, "%72d", rc);
6474				rc = 0;
6475			} else {
6476				sbuf_printf(sb, " %08x %08x %08x %08x"
6477				    " %08x %08x %08x %08x",
6478				    be32toh(ldst_cmd.u.mps.rplc.rplc255_224),
6479				    be32toh(ldst_cmd.u.mps.rplc.rplc223_192),
6480				    be32toh(ldst_cmd.u.mps.rplc.rplc191_160),
6481				    be32toh(ldst_cmd.u.mps.rplc.rplc159_128),
6482				    be32toh(ldst_cmd.u.mps.rplc.rplc127_96),
6483				    be32toh(ldst_cmd.u.mps.rplc.rplc95_64),
6484				    be32toh(ldst_cmd.u.mps.rplc.rplc63_32),
6485				    be32toh(ldst_cmd.u.mps.rplc.rplc31_0));
6486			}
6487		} else
6488			sbuf_printf(sb, "%72s", "");
6489
6490		sbuf_printf(sb, "%4u%3u%3u%3u %#x",
6491		    G_T6_SRAM_PRIO0(cls_lo), G_T6_SRAM_PRIO1(cls_lo),
6492		    G_T6_SRAM_PRIO2(cls_lo), G_T6_SRAM_PRIO3(cls_lo),
6493		    (cls_lo >> S_T6_MULTILISTEN0) & 0xf);
6494	}
6495
6496	if (rc)
6497		(void) sbuf_finish(sb);
6498	else
6499		rc = sbuf_finish(sb);
6500	sbuf_delete(sb);
6501
6502	return (rc);
6503}
6504
6505static int
6506sysctl_path_mtus(SYSCTL_HANDLER_ARGS)
6507{
6508	struct adapter *sc = arg1;
6509	struct sbuf *sb;
6510	int rc;
6511	uint16_t mtus[NMTUS];
6512
6513	rc = sysctl_wire_old_buffer(req, 0);
6514	if (rc != 0)
6515		return (rc);
6516
6517	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6518	if (sb == NULL)
6519		return (ENOMEM);
6520
6521	t4_read_mtu_tbl(sc, mtus, NULL);
6522
6523	sbuf_printf(sb, "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u",
6524	    mtus[0], mtus[1], mtus[2], mtus[3], mtus[4], mtus[5], mtus[6],
6525	    mtus[7], mtus[8], mtus[9], mtus[10], mtus[11], mtus[12], mtus[13],
6526	    mtus[14], mtus[15]);
6527
6528	rc = sbuf_finish(sb);
6529	sbuf_delete(sb);
6530
6531	return (rc);
6532}
6533
6534static int
6535sysctl_pm_stats(SYSCTL_HANDLER_ARGS)
6536{
6537	struct adapter *sc = arg1;
6538	struct sbuf *sb;
6539	int rc, i;
6540	uint32_t tx_cnt[MAX_PM_NSTATS], rx_cnt[MAX_PM_NSTATS];
6541	uint64_t tx_cyc[MAX_PM_NSTATS], rx_cyc[MAX_PM_NSTATS];
6542	static const char *tx_stats[MAX_PM_NSTATS] = {
6543		"Read:", "Write bypass:", "Write mem:", "Bypass + mem:",
6544		"Tx FIFO wait", NULL, "Tx latency"
6545	};
6546	static const char *rx_stats[MAX_PM_NSTATS] = {
6547		"Read:", "Write bypass:", "Write mem:", "Flush:",
6548		" Rx FIFO wait", NULL, "Rx latency"
6549	};
6550
6551	rc = sysctl_wire_old_buffer(req, 0);
6552	if (rc != 0)
6553		return (rc);
6554
6555	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6556	if (sb == NULL)
6557		return (ENOMEM);
6558
6559	t4_pmtx_get_stats(sc, tx_cnt, tx_cyc);
6560	t4_pmrx_get_stats(sc, rx_cnt, rx_cyc);
6561
6562	sbuf_printf(sb, "                Tx pcmds             Tx bytes");
6563	for (i = 0; i < 4; i++) {
6564		sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
6565		    tx_cyc[i]);
6566	}
6567
6568	sbuf_printf(sb, "\n                Rx pcmds             Rx bytes");
6569	for (i = 0; i < 4; i++) {
6570		sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
6571		    rx_cyc[i]);
6572	}
6573
6574	if (chip_id(sc) > CHELSIO_T5) {
6575		sbuf_printf(sb,
6576		    "\n              Total wait      Total occupancy");
6577		sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
6578		    tx_cyc[i]);
6579		sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
6580		    rx_cyc[i]);
6581
6582		i += 2;
6583		MPASS(i < nitems(tx_stats));
6584
6585		sbuf_printf(sb,
6586		    "\n                   Reads           Total wait");
6587		sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
6588		    tx_cyc[i]);
6589		sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
6590		    rx_cyc[i]);
6591	}
6592
6593	rc = sbuf_finish(sb);
6594	sbuf_delete(sb);
6595
6596	return (rc);
6597}
6598
6599static int
6600sysctl_rdma_stats(SYSCTL_HANDLER_ARGS)
6601{
6602	struct adapter *sc = arg1;
6603	struct sbuf *sb;
6604	int rc;
6605	struct tp_rdma_stats stats;
6606
6607	rc = sysctl_wire_old_buffer(req, 0);
6608	if (rc != 0)
6609		return (rc);
6610
6611	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6612	if (sb == NULL)
6613		return (ENOMEM);
6614
6615	mtx_lock(&sc->regwin_lock);
6616	t4_tp_get_rdma_stats(sc, &stats);
6617	mtx_unlock(&sc->regwin_lock);
6618
6619	sbuf_printf(sb, "NoRQEModDefferals: %u\n", stats.rqe_dfr_mod);
6620	sbuf_printf(sb, "NoRQEPktDefferals: %u", stats.rqe_dfr_pkt);
6621
6622	rc = sbuf_finish(sb);
6623	sbuf_delete(sb);
6624
6625	return (rc);
6626}
6627
6628static int
6629sysctl_tcp_stats(SYSCTL_HANDLER_ARGS)
6630{
6631	struct adapter *sc = arg1;
6632	struct sbuf *sb;
6633	int rc;
6634	struct tp_tcp_stats v4, v6;
6635
6636	rc = sysctl_wire_old_buffer(req, 0);
6637	if (rc != 0)
6638		return (rc);
6639
6640	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6641	if (sb == NULL)
6642		return (ENOMEM);
6643
6644	mtx_lock(&sc->regwin_lock);
6645	t4_tp_get_tcp_stats(sc, &v4, &v6);
6646	mtx_unlock(&sc->regwin_lock);
6647
6648	sbuf_printf(sb,
6649	    "                                IP                 IPv6\n");
6650	sbuf_printf(sb, "OutRsts:      %20u %20u\n",
6651	    v4.tcp_out_rsts, v6.tcp_out_rsts);
6652	sbuf_printf(sb, "InSegs:       %20ju %20ju\n",
6653	    v4.tcp_in_segs, v6.tcp_in_segs);
6654	sbuf_printf(sb, "OutSegs:      %20ju %20ju\n",
6655	    v4.tcp_out_segs, v6.tcp_out_segs);
6656	sbuf_printf(sb, "RetransSegs:  %20ju %20ju",
6657	    v4.tcp_retrans_segs, v6.tcp_retrans_segs);
6658
6659	rc = sbuf_finish(sb);
6660	sbuf_delete(sb);
6661
6662	return (rc);
6663}
6664
6665static int
6666sysctl_tids(SYSCTL_HANDLER_ARGS)
6667{
6668	struct adapter *sc = arg1;
6669	struct sbuf *sb;
6670	int rc;
6671	struct tid_info *t = &sc->tids;
6672
6673	rc = sysctl_wire_old_buffer(req, 0);
6674	if (rc != 0)
6675		return (rc);
6676
6677	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6678	if (sb == NULL)
6679		return (ENOMEM);
6680
6681	if (t->natids) {
6682		sbuf_printf(sb, "ATID range: 0-%u, in use: %u\n", t->natids - 1,
6683		    t->atids_in_use);
6684	}
6685
6686	if (t->ntids) {
6687		if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
6688			uint32_t b = t4_read_reg(sc, A_LE_DB_SERVER_INDEX) / 4;
6689
6690			if (b) {
6691				sbuf_printf(sb, "TID range: 0-%u, %u-%u", b - 1,
6692				    t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4,
6693				    t->ntids - 1);
6694			} else {
6695				sbuf_printf(sb, "TID range: %u-%u",
6696				    t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4,
6697				    t->ntids - 1);
6698			}
6699		} else
6700			sbuf_printf(sb, "TID range: 0-%u", t->ntids - 1);
6701		sbuf_printf(sb, ", in use: %u\n",
6702		    atomic_load_acq_int(&t->tids_in_use));
6703	}
6704
6705	if (t->nstids) {
6706		sbuf_printf(sb, "STID range: %u-%u, in use: %u\n", t->stid_base,
6707		    t->stid_base + t->nstids - 1, t->stids_in_use);
6708	}
6709
6710	if (t->nftids) {
6711		sbuf_printf(sb, "FTID range: %u-%u\n", t->ftid_base,
6712		    t->ftid_base + t->nftids - 1);
6713	}
6714
6715	if (t->netids) {
6716		sbuf_printf(sb, "ETID range: %u-%u\n", t->etid_base,
6717		    t->etid_base + t->netids - 1);
6718	}
6719
6720	sbuf_printf(sb, "HW TID usage: %u IP users, %u IPv6 users",
6721	    t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV4),
6722	    t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV6));
6723
6724	rc = sbuf_finish(sb);
6725	sbuf_delete(sb);
6726
6727	return (rc);
6728}
6729
6730static int
6731sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS)
6732{
6733	struct adapter *sc = arg1;
6734	struct sbuf *sb;
6735	int rc;
6736	struct tp_err_stats stats;
6737
6738	rc = sysctl_wire_old_buffer(req, 0);
6739	if (rc != 0)
6740		return (rc);
6741
6742	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6743	if (sb == NULL)
6744		return (ENOMEM);
6745
6746	mtx_lock(&sc->regwin_lock);
6747	t4_tp_get_err_stats(sc, &stats);
6748	mtx_unlock(&sc->regwin_lock);
6749
6750	if (sc->chip_params->nchan > 2) {
6751		sbuf_printf(sb, "                 channel 0  channel 1"
6752		    "  channel 2  channel 3\n");
6753		sbuf_printf(sb, "macInErrs:      %10u %10u %10u %10u\n",
6754		    stats.mac_in_errs[0], stats.mac_in_errs[1],
6755		    stats.mac_in_errs[2], stats.mac_in_errs[3]);
6756		sbuf_printf(sb, "hdrInErrs:      %10u %10u %10u %10u\n",
6757		    stats.hdr_in_errs[0], stats.hdr_in_errs[1],
6758		    stats.hdr_in_errs[2], stats.hdr_in_errs[3]);
6759		sbuf_printf(sb, "tcpInErrs:      %10u %10u %10u %10u\n",
6760		    stats.tcp_in_errs[0], stats.tcp_in_errs[1],
6761		    stats.tcp_in_errs[2], stats.tcp_in_errs[3]);
6762		sbuf_printf(sb, "tcp6InErrs:     %10u %10u %10u %10u\n",
6763		    stats.tcp6_in_errs[0], stats.tcp6_in_errs[1],
6764		    stats.tcp6_in_errs[2], stats.tcp6_in_errs[3]);
6765		sbuf_printf(sb, "tnlCongDrops:   %10u %10u %10u %10u\n",
6766		    stats.tnl_cong_drops[0], stats.tnl_cong_drops[1],
6767		    stats.tnl_cong_drops[2], stats.tnl_cong_drops[3]);
6768		sbuf_printf(sb, "tnlTxDrops:     %10u %10u %10u %10u\n",
6769		    stats.tnl_tx_drops[0], stats.tnl_tx_drops[1],
6770		    stats.tnl_tx_drops[2], stats.tnl_tx_drops[3]);
6771		sbuf_printf(sb, "ofldVlanDrops:  %10u %10u %10u %10u\n",
6772		    stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1],
6773		    stats.ofld_vlan_drops[2], stats.ofld_vlan_drops[3]);
6774		sbuf_printf(sb, "ofldChanDrops:  %10u %10u %10u %10u\n\n",
6775		    stats.ofld_chan_drops[0], stats.ofld_chan_drops[1],
6776		    stats.ofld_chan_drops[2], stats.ofld_chan_drops[3]);
6777	} else {
6778		sbuf_printf(sb, "                 channel 0  channel 1\n");
6779		sbuf_printf(sb, "macInErrs:      %10u %10u\n",
6780		    stats.mac_in_errs[0], stats.mac_in_errs[1]);
6781		sbuf_printf(sb, "hdrInErrs:      %10u %10u\n",
6782		    stats.hdr_in_errs[0], stats.hdr_in_errs[1]);
6783		sbuf_printf(sb, "tcpInErrs:      %10u %10u\n",
6784		    stats.tcp_in_errs[0], stats.tcp_in_errs[1]);
6785		sbuf_printf(sb, "tcp6InErrs:     %10u %10u\n",
6786		    stats.tcp6_in_errs[0], stats.tcp6_in_errs[1]);
6787		sbuf_printf(sb, "tnlCongDrops:   %10u %10u\n",
6788		    stats.tnl_cong_drops[0], stats.tnl_cong_drops[1]);
6789		sbuf_printf(sb, "tnlTxDrops:     %10u %10u\n",
6790		    stats.tnl_tx_drops[0], stats.tnl_tx_drops[1]);
6791		sbuf_printf(sb, "ofldVlanDrops:  %10u %10u\n",
6792		    stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1]);
6793		sbuf_printf(sb, "ofldChanDrops:  %10u %10u\n\n",
6794		    stats.ofld_chan_drops[0], stats.ofld_chan_drops[1]);
6795	}
6796
6797	sbuf_printf(sb, "ofldNoNeigh:    %u\nofldCongDefer:  %u",
6798	    stats.ofld_no_neigh, stats.ofld_cong_defer);
6799
6800	rc = sbuf_finish(sb);
6801	sbuf_delete(sb);
6802
6803	return (rc);
6804}
6805
6806struct field_desc {
6807	const char *name;
6808	u_int start;
6809	u_int width;
6810};
6811
6812static void
6813field_desc_show(struct sbuf *sb, uint64_t v, const struct field_desc *f)
6814{
6815	char buf[32];
6816	int line_size = 0;
6817
6818	while (f->name) {
6819		uint64_t mask = (1ULL << f->width) - 1;
6820		int len = snprintf(buf, sizeof(buf), "%s: %ju", f->name,
6821		    ((uintmax_t)v >> f->start) & mask);
6822
6823		if (line_size + len >= 79) {
6824			line_size = 8;
6825			sbuf_printf(sb, "\n        ");
6826		}
6827		sbuf_printf(sb, "%s ", buf);
6828		line_size += len + 1;
6829		f++;
6830	}
6831	sbuf_printf(sb, "\n");
6832}
6833
6834static const struct field_desc tp_la0[] = {
6835	{ "RcfOpCodeOut", 60, 4 },
6836	{ "State", 56, 4 },
6837	{ "WcfState", 52, 4 },
6838	{ "RcfOpcSrcOut", 50, 2 },
6839	{ "CRxError", 49, 1 },
6840	{ "ERxError", 48, 1 },
6841	{ "SanityFailed", 47, 1 },
6842	{ "SpuriousMsg", 46, 1 },
6843	{ "FlushInputMsg", 45, 1 },
6844	{ "FlushInputCpl", 44, 1 },
6845	{ "RssUpBit", 43, 1 },
6846	{ "RssFilterHit", 42, 1 },
6847	{ "Tid", 32, 10 },
6848	{ "InitTcb", 31, 1 },
6849	{ "LineNumber", 24, 7 },
6850	{ "Emsg", 23, 1 },
6851	{ "EdataOut", 22, 1 },
6852	{ "Cmsg", 21, 1 },
6853	{ "CdataOut", 20, 1 },
6854	{ "EreadPdu", 19, 1 },
6855	{ "CreadPdu", 18, 1 },
6856	{ "TunnelPkt", 17, 1 },
6857	{ "RcfPeerFin", 16, 1 },
6858	{ "RcfReasonOut", 12, 4 },
6859	{ "TxCchannel", 10, 2 },
6860	{ "RcfTxChannel", 8, 2 },
6861	{ "RxEchannel", 6, 2 },
6862	{ "RcfRxChannel", 5, 1 },
6863	{ "RcfDataOutSrdy", 4, 1 },
6864	{ "RxDvld", 3, 1 },
6865	{ "RxOoDvld", 2, 1 },
6866	{ "RxCongestion", 1, 1 },
6867	{ "TxCongestion", 0, 1 },
6868	{ NULL }
6869};
6870
6871static const struct field_desc tp_la1[] = {
6872	{ "CplCmdIn", 56, 8 },
6873	{ "CplCmdOut", 48, 8 },
6874	{ "ESynOut", 47, 1 },
6875	{ "EAckOut", 46, 1 },
6876	{ "EFinOut", 45, 1 },
6877	{ "ERstOut", 44, 1 },
6878	{ "SynIn", 43, 1 },
6879	{ "AckIn", 42, 1 },
6880	{ "FinIn", 41, 1 },
6881	{ "RstIn", 40, 1 },
6882	{ "DataIn", 39, 1 },
6883	{ "DataInVld", 38, 1 },
6884	{ "PadIn", 37, 1 },
6885	{ "RxBufEmpty", 36, 1 },
6886	{ "RxDdp", 35, 1 },
6887	{ "RxFbCongestion", 34, 1 },
6888	{ "TxFbCongestion", 33, 1 },
6889	{ "TxPktSumSrdy", 32, 1 },
6890	{ "RcfUlpType", 28, 4 },
6891	{ "Eread", 27, 1 },
6892	{ "Ebypass", 26, 1 },
6893	{ "Esave", 25, 1 },
6894	{ "Static0", 24, 1 },
6895	{ "Cread", 23, 1 },
6896	{ "Cbypass", 22, 1 },
6897	{ "Csave", 21, 1 },
6898	{ "CPktOut", 20, 1 },
6899	{ "RxPagePoolFull", 18, 2 },
6900	{ "RxLpbkPkt", 17, 1 },
6901	{ "TxLpbkPkt", 16, 1 },
6902	{ "RxVfValid", 15, 1 },
6903	{ "SynLearned", 14, 1 },
6904	{ "SetDelEntry", 13, 1 },
6905	{ "SetInvEntry", 12, 1 },
6906	{ "CpcmdDvld", 11, 1 },
6907	{ "CpcmdSave", 10, 1 },
6908	{ "RxPstructsFull", 8, 2 },
6909	{ "EpcmdDvld", 7, 1 },
6910	{ "EpcmdFlush", 6, 1 },
6911	{ "EpcmdTrimPrefix", 5, 1 },
6912	{ "EpcmdTrimPostfix", 4, 1 },
6913	{ "ERssIp4Pkt", 3, 1 },
6914	{ "ERssIp6Pkt", 2, 1 },
6915	{ "ERssTcpUdpPkt", 1, 1 },
6916	{ "ERssFceFipPkt", 0, 1 },
6917	{ NULL }
6918};
6919
6920static const struct field_desc tp_la2[] = {
6921	{ "CplCmdIn", 56, 8 },
6922	{ "MpsVfVld", 55, 1 },
6923	{ "MpsPf", 52, 3 },
6924	{ "MpsVf", 44, 8 },
6925	{ "SynIn", 43, 1 },
6926	{ "AckIn", 42, 1 },
6927	{ "FinIn", 41, 1 },
6928	{ "RstIn", 40, 1 },
6929	{ "DataIn", 39, 1 },
6930	{ "DataInVld", 38, 1 },
6931	{ "PadIn", 37, 1 },
6932	{ "RxBufEmpty", 36, 1 },
6933	{ "RxDdp", 35, 1 },
6934	{ "RxFbCongestion", 34, 1 },
6935	{ "TxFbCongestion", 33, 1 },
6936	{ "TxPktSumSrdy", 32, 1 },
6937	{ "RcfUlpType", 28, 4 },
6938	{ "Eread", 27, 1 },
6939	{ "Ebypass", 26, 1 },
6940	{ "Esave", 25, 1 },
6941	{ "Static0", 24, 1 },
6942	{ "Cread", 23, 1 },
6943	{ "Cbypass", 22, 1 },
6944	{ "Csave", 21, 1 },
6945	{ "CPktOut", 20, 1 },
6946	{ "RxPagePoolFull", 18, 2 },
6947	{ "RxLpbkPkt", 17, 1 },
6948	{ "TxLpbkPkt", 16, 1 },
6949	{ "RxVfValid", 15, 1 },
6950	{ "SynLearned", 14, 1 },
6951	{ "SetDelEntry", 13, 1 },
6952	{ "SetInvEntry", 12, 1 },
6953	{ "CpcmdDvld", 11, 1 },
6954	{ "CpcmdSave", 10, 1 },
6955	{ "RxPstructsFull", 8, 2 },
6956	{ "EpcmdDvld", 7, 1 },
6957	{ "EpcmdFlush", 6, 1 },
6958	{ "EpcmdTrimPrefix", 5, 1 },
6959	{ "EpcmdTrimPostfix", 4, 1 },
6960	{ "ERssIp4Pkt", 3, 1 },
6961	{ "ERssIp6Pkt", 2, 1 },
6962	{ "ERssTcpUdpPkt", 1, 1 },
6963	{ "ERssFceFipPkt", 0, 1 },
6964	{ NULL }
6965};
6966
6967static void
6968tp_la_show(struct sbuf *sb, uint64_t *p, int idx)
6969{
6970
6971	field_desc_show(sb, *p, tp_la0);
6972}
6973
6974static void
6975tp_la_show2(struct sbuf *sb, uint64_t *p, int idx)
6976{
6977
6978	if (idx)
6979		sbuf_printf(sb, "\n");
6980	field_desc_show(sb, p[0], tp_la0);
6981	if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL)
6982		field_desc_show(sb, p[1], tp_la0);
6983}
6984
6985static void
6986tp_la_show3(struct sbuf *sb, uint64_t *p, int idx)
6987{
6988
6989	if (idx)
6990		sbuf_printf(sb, "\n");
6991	field_desc_show(sb, p[0], tp_la0);
6992	if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL)
6993		field_desc_show(sb, p[1], (p[0] & (1 << 17)) ? tp_la2 : tp_la1);
6994}
6995
6996static int
6997sysctl_tp_la(SYSCTL_HANDLER_ARGS)
6998{
6999	struct adapter *sc = arg1;
7000	struct sbuf *sb;
7001	uint64_t *buf, *p;
7002	int rc;
7003	u_int i, inc;
7004	void (*show_func)(struct sbuf *, uint64_t *, int);
7005
7006	rc = sysctl_wire_old_buffer(req, 0);
7007	if (rc != 0)
7008		return (rc);
7009
7010	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7011	if (sb == NULL)
7012		return (ENOMEM);
7013
7014	buf = malloc(TPLA_SIZE * sizeof(uint64_t), M_CXGBE, M_ZERO | M_WAITOK);
7015
7016	t4_tp_read_la(sc, buf, NULL);
7017	p = buf;
7018
7019	switch (G_DBGLAMODE(t4_read_reg(sc, A_TP_DBG_LA_CONFIG))) {
7020	case 2:
7021		inc = 2;
7022		show_func = tp_la_show2;
7023		break;
7024	case 3:
7025		inc = 2;
7026		show_func = tp_la_show3;
7027		break;
7028	default:
7029		inc = 1;
7030		show_func = tp_la_show;
7031	}
7032
7033	for (i = 0; i < TPLA_SIZE / inc; i++, p += inc)
7034		(*show_func)(sb, p, i);
7035
7036	rc = sbuf_finish(sb);
7037	sbuf_delete(sb);
7038	free(buf, M_CXGBE);
7039	return (rc);
7040}
7041
7042static int
7043sysctl_tx_rate(SYSCTL_HANDLER_ARGS)
7044{
7045	struct adapter *sc = arg1;
7046	struct sbuf *sb;
7047	int rc;
7048	u64 nrate[MAX_NCHAN], orate[MAX_NCHAN];
7049
7050	rc = sysctl_wire_old_buffer(req, 0);
7051	if (rc != 0)
7052		return (rc);
7053
7054	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7055	if (sb == NULL)
7056		return (ENOMEM);
7057
7058	t4_get_chan_txrate(sc, nrate, orate);
7059
7060	if (sc->chip_params->nchan > 2) {
7061		sbuf_printf(sb, "              channel 0   channel 1"
7062		    "   channel 2   channel 3\n");
7063		sbuf_printf(sb, "NIC B/s:     %10ju  %10ju  %10ju  %10ju\n",
7064		    nrate[0], nrate[1], nrate[2], nrate[3]);
7065		sbuf_printf(sb, "Offload B/s: %10ju  %10ju  %10ju  %10ju",
7066		    orate[0], orate[1], orate[2], orate[3]);
7067	} else {
7068		sbuf_printf(sb, "              channel 0   channel 1\n");
7069		sbuf_printf(sb, "NIC B/s:     %10ju  %10ju\n",
7070		    nrate[0], nrate[1]);
7071		sbuf_printf(sb, "Offload B/s: %10ju  %10ju",
7072		    orate[0], orate[1]);
7073	}
7074
7075	rc = sbuf_finish(sb);
7076	sbuf_delete(sb);
7077
7078	return (rc);
7079}
7080
7081static int
7082sysctl_ulprx_la(SYSCTL_HANDLER_ARGS)
7083{
7084	struct adapter *sc = arg1;
7085	struct sbuf *sb;
7086	uint32_t *buf, *p;
7087	int rc, i;
7088
7089	rc = sysctl_wire_old_buffer(req, 0);
7090	if (rc != 0)
7091		return (rc);
7092
7093	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7094	if (sb == NULL)
7095		return (ENOMEM);
7096
7097	buf = malloc(ULPRX_LA_SIZE * 8 * sizeof(uint32_t), M_CXGBE,
7098	    M_ZERO | M_WAITOK);
7099
7100	t4_ulprx_read_la(sc, buf);
7101	p = buf;
7102
7103	sbuf_printf(sb, "      Pcmd        Type   Message"
7104	    "                Data");
7105	for (i = 0; i < ULPRX_LA_SIZE; i++, p += 8) {
7106		sbuf_printf(sb, "\n%08x%08x  %4x  %08x  %08x%08x%08x%08x",
7107		    p[1], p[0], p[2], p[3], p[7], p[6], p[5], p[4]);
7108	}
7109
7110	rc = sbuf_finish(sb);
7111	sbuf_delete(sb);
7112	free(buf, M_CXGBE);
7113	return (rc);
7114}
7115
7116static int
7117sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS)
7118{
7119	struct adapter *sc = arg1;
7120	struct sbuf *sb;
7121	int rc, v;
7122
7123	rc = sysctl_wire_old_buffer(req, 0);
7124	if (rc != 0)
7125		return (rc);
7126
7127	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7128	if (sb == NULL)
7129		return (ENOMEM);
7130
7131	v = t4_read_reg(sc, A_SGE_STAT_CFG);
7132	if (G_STATSOURCE_T5(v) == 7) {
7133		if (G_STATMODE(v) == 0) {
7134			sbuf_printf(sb, "total %d, incomplete %d",
7135			    t4_read_reg(sc, A_SGE_STAT_TOTAL),
7136			    t4_read_reg(sc, A_SGE_STAT_MATCH));
7137		} else if (G_STATMODE(v) == 1) {
7138			sbuf_printf(sb, "total %d, data overflow %d",
7139			    t4_read_reg(sc, A_SGE_STAT_TOTAL),
7140			    t4_read_reg(sc, A_SGE_STAT_MATCH));
7141		}
7142	}
7143	rc = sbuf_finish(sb);
7144	sbuf_delete(sb);
7145
7146	return (rc);
7147}
7148#endif
7149
7150static uint32_t
7151fconf_iconf_to_mode(uint32_t fconf, uint32_t iconf)
7152{
7153	uint32_t mode;
7154
7155	mode = T4_FILTER_IPv4 | T4_FILTER_IPv6 | T4_FILTER_IP_SADDR |
7156	    T4_FILTER_IP_DADDR | T4_FILTER_IP_SPORT | T4_FILTER_IP_DPORT;
7157
7158	if (fconf & F_FRAGMENTATION)
7159		mode |= T4_FILTER_IP_FRAGMENT;
7160
7161	if (fconf & F_MPSHITTYPE)
7162		mode |= T4_FILTER_MPS_HIT_TYPE;
7163
7164	if (fconf & F_MACMATCH)
7165		mode |= T4_FILTER_MAC_IDX;
7166
7167	if (fconf & F_ETHERTYPE)
7168		mode |= T4_FILTER_ETH_TYPE;
7169
7170	if (fconf & F_PROTOCOL)
7171		mode |= T4_FILTER_IP_PROTO;
7172
7173	if (fconf & F_TOS)
7174		mode |= T4_FILTER_IP_TOS;
7175
7176	if (fconf & F_VLAN)
7177		mode |= T4_FILTER_VLAN;
7178
7179	if (fconf & F_VNIC_ID) {
7180		mode |= T4_FILTER_VNIC;
7181		if (iconf & F_VNIC)
7182			mode |= T4_FILTER_IC_VNIC;
7183	}
7184
7185	if (fconf & F_PORT)
7186		mode |= T4_FILTER_PORT;
7187
7188	if (fconf & F_FCOE)
7189		mode |= T4_FILTER_FCoE;
7190
7191	return (mode);
7192}
7193
7194static uint32_t
7195mode_to_fconf(uint32_t mode)
7196{
7197	uint32_t fconf = 0;
7198
7199	if (mode & T4_FILTER_IP_FRAGMENT)
7200		fconf |= F_FRAGMENTATION;
7201
7202	if (mode & T4_FILTER_MPS_HIT_TYPE)
7203		fconf |= F_MPSHITTYPE;
7204
7205	if (mode & T4_FILTER_MAC_IDX)
7206		fconf |= F_MACMATCH;
7207
7208	if (mode & T4_FILTER_ETH_TYPE)
7209		fconf |= F_ETHERTYPE;
7210
7211	if (mode & T4_FILTER_IP_PROTO)
7212		fconf |= F_PROTOCOL;
7213
7214	if (mode & T4_FILTER_IP_TOS)
7215		fconf |= F_TOS;
7216
7217	if (mode & T4_FILTER_VLAN)
7218		fconf |= F_VLAN;
7219
7220	if (mode & T4_FILTER_VNIC)
7221		fconf |= F_VNIC_ID;
7222
7223	if (mode & T4_FILTER_PORT)
7224		fconf |= F_PORT;
7225
7226	if (mode & T4_FILTER_FCoE)
7227		fconf |= F_FCOE;
7228
7229	return (fconf);
7230}
7231
7232static uint32_t
7233mode_to_iconf(uint32_t mode)
7234{
7235
7236	if (mode & T4_FILTER_IC_VNIC)
7237		return (F_VNIC);
7238	return (0);
7239}
7240
7241static int check_fspec_against_fconf_iconf(struct adapter *sc,
7242    struct t4_filter_specification *fs)
7243{
7244	struct tp_params *tpp = &sc->params.tp;
7245	uint32_t fconf = 0;
7246
7247	if (fs->val.frag || fs->mask.frag)
7248		fconf |= F_FRAGMENTATION;
7249
7250	if (fs->val.matchtype || fs->mask.matchtype)
7251		fconf |= F_MPSHITTYPE;
7252
7253	if (fs->val.macidx || fs->mask.macidx)
7254		fconf |= F_MACMATCH;
7255
7256	if (fs->val.ethtype || fs->mask.ethtype)
7257		fconf |= F_ETHERTYPE;
7258
7259	if (fs->val.proto || fs->mask.proto)
7260		fconf |= F_PROTOCOL;
7261
7262	if (fs->val.tos || fs->mask.tos)
7263		fconf |= F_TOS;
7264
7265	if (fs->val.vlan_vld || fs->mask.vlan_vld)
7266		fconf |= F_VLAN;
7267
7268	if (fs->val.ovlan_vld || fs->mask.ovlan_vld) {
7269		fconf |= F_VNIC_ID;
7270		if (tpp->ingress_config & F_VNIC)
7271			return (EINVAL);
7272	}
7273
7274	if (fs->val.pfvf_vld || fs->mask.pfvf_vld) {
7275		fconf |= F_VNIC_ID;
7276		if ((tpp->ingress_config & F_VNIC) == 0)
7277			return (EINVAL);
7278	}
7279
7280	if (fs->val.iport || fs->mask.iport)
7281		fconf |= F_PORT;
7282
7283	if (fs->val.fcoe || fs->mask.fcoe)
7284		fconf |= F_FCOE;
7285
7286	if ((tpp->vlan_pri_map | fconf) != tpp->vlan_pri_map)
7287		return (E2BIG);
7288
7289	return (0);
7290}
7291
7292static int
7293get_filter_mode(struct adapter *sc, uint32_t *mode)
7294{
7295	struct tp_params *tpp = &sc->params.tp;
7296
7297	/*
7298	 * We trust the cached values of the relevant TP registers.  This means
7299	 * things work reliably only if writes to those registers are always via
7300	 * t4_set_filter_mode.
7301	 */
7302	*mode = fconf_iconf_to_mode(tpp->vlan_pri_map, tpp->ingress_config);
7303
7304	return (0);
7305}
7306
7307static int
7308set_filter_mode(struct adapter *sc, uint32_t mode)
7309{
7310	struct tp_params *tpp = &sc->params.tp;
7311	uint32_t fconf, iconf;
7312	int rc;
7313
7314	iconf = mode_to_iconf(mode);
7315	if ((iconf ^ tpp->ingress_config) & F_VNIC) {
7316		/*
7317		 * For now we just complain if A_TP_INGRESS_CONFIG is not
7318		 * already set to the correct value for the requested filter
7319		 * mode.  It's not clear if it's safe to write to this register
7320		 * on the fly.  (And we trust the cached value of the register).
7321		 */
7322		return (EBUSY);
7323	}
7324
7325	fconf = mode_to_fconf(mode);
7326
7327	rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK,
7328	    "t4setfm");
7329	if (rc)
7330		return (rc);
7331
7332	if (sc->tids.ftids_in_use > 0) {
7333		rc = EBUSY;
7334		goto done;
7335	}
7336
7337#ifdef TCP_OFFLOAD
7338	if (uld_active(sc, ULD_TOM)) {
7339		rc = EBUSY;
7340		goto done;
7341	}
7342#endif
7343
7344	rc = -t4_set_filter_mode(sc, fconf);
7345done:
7346	end_synchronized_op(sc, LOCK_HELD);
7347	return (rc);
7348}
7349
7350static inline uint64_t
7351get_filter_hits(struct adapter *sc, uint32_t fid)
7352{
7353	uint32_t mw_base, off, tcb_base = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
7354	uint64_t hits;
7355
7356	memwin_info(sc, 0, &mw_base, NULL);
7357
7358	off = position_memwin(sc, 0,
7359	    tcb_base + (fid + sc->tids.ftid_base) * TCB_SIZE);
7360	if (is_t4(sc)) {
7361		hits = t4_read_reg64(sc, mw_base + off + 16);
7362		hits = be64toh(hits);
7363	} else {
7364		hits = t4_read_reg(sc, mw_base + off + 24);
7365		hits = be32toh(hits);
7366	}
7367
7368	return (hits);
7369}
7370
7371static int
7372get_filter(struct adapter *sc, struct t4_filter *t)
7373{
7374	int i, rc, nfilters = sc->tids.nftids;
7375	struct filter_entry *f;
7376
7377	rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK,
7378	    "t4getf");
7379	if (rc)
7380		return (rc);
7381
7382	if (sc->tids.ftids_in_use == 0 || sc->tids.ftid_tab == NULL ||
7383	    t->idx >= nfilters) {
7384		t->idx = 0xffffffff;
7385		goto done;
7386	}
7387
7388	f = &sc->tids.ftid_tab[t->idx];
7389	for (i = t->idx; i < nfilters; i++, f++) {
7390		if (f->valid) {
7391			t->idx = i;
7392			t->l2tidx = f->l2t ? f->l2t->idx : 0;
7393			t->smtidx = f->smtidx;
7394			if (f->fs.hitcnts)
7395				t->hits = get_filter_hits(sc, t->idx);
7396			else
7397				t->hits = UINT64_MAX;
7398			t->fs = f->fs;
7399
7400			goto done;
7401		}
7402	}
7403
7404	t->idx = 0xffffffff;
7405done:
7406	end_synchronized_op(sc, LOCK_HELD);
7407	return (0);
7408}
7409
7410static int
7411set_filter(struct adapter *sc, struct t4_filter *t)
7412{
7413	unsigned int nfilters, nports;
7414	struct filter_entry *f;
7415	int i, rc;
7416
7417	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setf");
7418	if (rc)
7419		return (rc);
7420
7421	nfilters = sc->tids.nftids;
7422	nports = sc->params.nports;
7423
7424	if (nfilters == 0) {
7425		rc = ENOTSUP;
7426		goto done;
7427	}
7428
7429	if (!(sc->flags & FULL_INIT_DONE)) {
7430		rc = EAGAIN;
7431		goto done;
7432	}
7433
7434	if (t->idx >= nfilters) {
7435		rc = EINVAL;
7436		goto done;
7437	}
7438
7439	/* Validate against the global filter mode and ingress config */
7440	rc = check_fspec_against_fconf_iconf(sc, &t->fs);
7441	if (rc != 0)
7442		goto done;
7443
7444	if (t->fs.action == FILTER_SWITCH && t->fs.eport >= nports) {
7445		rc = EINVAL;
7446		goto done;
7447	}
7448
7449	if (t->fs.val.iport >= nports) {
7450		rc = EINVAL;
7451		goto done;
7452	}
7453
7454	/* Can't specify an iq if not steering to it */
7455	if (!t->fs.dirsteer && t->fs.iq) {
7456		rc = EINVAL;
7457		goto done;
7458	}
7459
7460	/* IPv6 filter idx must be 4 aligned */
7461	if (t->fs.type == 1 &&
7462	    ((t->idx & 0x3) || t->idx + 4 >= nfilters)) {
7463		rc = EINVAL;
7464		goto done;
7465	}
7466
7467	if (sc->tids.ftid_tab == NULL) {
7468		KASSERT(sc->tids.ftids_in_use == 0,
7469		    ("%s: no memory allocated but filters_in_use > 0",
7470		    __func__));
7471
7472		sc->tids.ftid_tab = malloc(sizeof (struct filter_entry) *
7473		    nfilters, M_CXGBE, M_NOWAIT | M_ZERO);
7474		if (sc->tids.ftid_tab == NULL) {
7475			rc = ENOMEM;
7476			goto done;
7477		}
7478		mtx_init(&sc->tids.ftid_lock, "T4 filters", 0, MTX_DEF);
7479	}
7480
7481	for (i = 0; i < 4; i++) {
7482		f = &sc->tids.ftid_tab[t->idx + i];
7483
7484		if (f->pending || f->valid) {
7485			rc = EBUSY;
7486			goto done;
7487		}
7488		if (f->locked) {
7489			rc = EPERM;
7490			goto done;
7491		}
7492
7493		if (t->fs.type == 0)
7494			break;
7495	}
7496
7497	f = &sc->tids.ftid_tab[t->idx];
7498	f->fs = t->fs;
7499
7500	rc = set_filter_wr(sc, t->idx);
7501done:
7502	end_synchronized_op(sc, 0);
7503
7504	if (rc == 0) {
7505		mtx_lock(&sc->tids.ftid_lock);
7506		for (;;) {
7507			if (f->pending == 0) {
7508				rc = f->valid ? 0 : EIO;
7509				break;
7510			}
7511
7512			if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock,
7513			    PCATCH, "t4setfw", 0)) {
7514				rc = EINPROGRESS;
7515				break;
7516			}
7517		}
7518		mtx_unlock(&sc->tids.ftid_lock);
7519	}
7520	return (rc);
7521}
7522
7523static int
7524del_filter(struct adapter *sc, struct t4_filter *t)
7525{
7526	unsigned int nfilters;
7527	struct filter_entry *f;
7528	int rc;
7529
7530	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4delf");
7531	if (rc)
7532		return (rc);
7533
7534	nfilters = sc->tids.nftids;
7535
7536	if (nfilters == 0) {
7537		rc = ENOTSUP;
7538		goto done;
7539	}
7540
7541	if (sc->tids.ftid_tab == NULL || sc->tids.ftids_in_use == 0 ||
7542	    t->idx >= nfilters) {
7543		rc = EINVAL;
7544		goto done;
7545	}
7546
7547	if (!(sc->flags & FULL_INIT_DONE)) {
7548		rc = EAGAIN;
7549		goto done;
7550	}
7551
7552	f = &sc->tids.ftid_tab[t->idx];
7553
7554	if (f->pending) {
7555		rc = EBUSY;
7556		goto done;
7557	}
7558	if (f->locked) {
7559		rc = EPERM;
7560		goto done;
7561	}
7562
7563	if (f->valid) {
7564		t->fs = f->fs;	/* extra info for the caller */
7565		rc = del_filter_wr(sc, t->idx);
7566	}
7567
7568done:
7569	end_synchronized_op(sc, 0);
7570
7571	if (rc == 0) {
7572		mtx_lock(&sc->tids.ftid_lock);
7573		for (;;) {
7574			if (f->pending == 0) {
7575				rc = f->valid ? EIO : 0;
7576				break;
7577			}
7578
7579			if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock,
7580			    PCATCH, "t4delfw", 0)) {
7581				rc = EINPROGRESS;
7582				break;
7583			}
7584		}
7585		mtx_unlock(&sc->tids.ftid_lock);
7586	}
7587
7588	return (rc);
7589}
7590
7591static void
7592clear_filter(struct filter_entry *f)
7593{
7594	if (f->l2t)
7595		t4_l2t_release(f->l2t);
7596
7597	bzero(f, sizeof (*f));
7598}
7599
7600static int
7601set_filter_wr(struct adapter *sc, int fidx)
7602{
7603	struct filter_entry *f = &sc->tids.ftid_tab[fidx];
7604	struct fw_filter_wr *fwr;
7605	unsigned int ftid, vnic_vld, vnic_vld_mask;
7606	struct wrq_cookie cookie;
7607
7608	ASSERT_SYNCHRONIZED_OP(sc);
7609
7610	if (f->fs.newdmac || f->fs.newvlan) {
7611		/* This filter needs an L2T entry; allocate one. */
7612		f->l2t = t4_l2t_alloc_switching(sc->l2t);
7613		if (f->l2t == NULL)
7614			return (EAGAIN);
7615		if (t4_l2t_set_switching(sc, f->l2t, f->fs.vlan, f->fs.eport,
7616		    f->fs.dmac)) {
7617			t4_l2t_release(f->l2t);
7618			f->l2t = NULL;
7619			return (ENOMEM);
7620		}
7621	}
7622
7623	/* Already validated against fconf, iconf */
7624	MPASS((f->fs.val.pfvf_vld & f->fs.val.ovlan_vld) == 0);
7625	MPASS((f->fs.mask.pfvf_vld & f->fs.mask.ovlan_vld) == 0);
7626	if (f->fs.val.pfvf_vld || f->fs.val.ovlan_vld)
7627		vnic_vld = 1;
7628	else
7629		vnic_vld = 0;
7630	if (f->fs.mask.pfvf_vld || f->fs.mask.ovlan_vld)
7631		vnic_vld_mask = 1;
7632	else
7633		vnic_vld_mask = 0;
7634
7635	ftid = sc->tids.ftid_base + fidx;
7636
7637	fwr = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*fwr), 16), &cookie);
7638	if (fwr == NULL)
7639		return (ENOMEM);
7640	bzero(fwr, sizeof(*fwr));
7641
7642	fwr->op_pkd = htobe32(V_FW_WR_OP(FW_FILTER_WR));
7643	fwr->len16_pkd = htobe32(FW_LEN16(*fwr));
7644	fwr->tid_to_iq =
7645	    htobe32(V_FW_FILTER_WR_TID(ftid) |
7646		V_FW_FILTER_WR_RQTYPE(f->fs.type) |
7647		V_FW_FILTER_WR_NOREPLY(0) |
7648		V_FW_FILTER_WR_IQ(f->fs.iq));
7649	fwr->del_filter_to_l2tix =
7650	    htobe32(V_FW_FILTER_WR_RPTTID(f->fs.rpttid) |
7651		V_FW_FILTER_WR_DROP(f->fs.action == FILTER_DROP) |
7652		V_FW_FILTER_WR_DIRSTEER(f->fs.dirsteer) |
7653		V_FW_FILTER_WR_MASKHASH(f->fs.maskhash) |
7654		V_FW_FILTER_WR_DIRSTEERHASH(f->fs.dirsteerhash) |
7655		V_FW_FILTER_WR_LPBK(f->fs.action == FILTER_SWITCH) |
7656		V_FW_FILTER_WR_DMAC(f->fs.newdmac) |
7657		V_FW_FILTER_WR_SMAC(f->fs.newsmac) |
7658		V_FW_FILTER_WR_INSVLAN(f->fs.newvlan == VLAN_INSERT ||
7659		    f->fs.newvlan == VLAN_REWRITE) |
7660		V_FW_FILTER_WR_RMVLAN(f->fs.newvlan == VLAN_REMOVE ||
7661		    f->fs.newvlan == VLAN_REWRITE) |
7662		V_FW_FILTER_WR_HITCNTS(f->fs.hitcnts) |
7663		V_FW_FILTER_WR_TXCHAN(f->fs.eport) |
7664		V_FW_FILTER_WR_PRIO(f->fs.prio) |
7665		V_FW_FILTER_WR_L2TIX(f->l2t ? f->l2t->idx : 0));
7666	fwr->ethtype = htobe16(f->fs.val.ethtype);
7667	fwr->ethtypem = htobe16(f->fs.mask.ethtype);
7668	fwr->frag_to_ovlan_vldm =
7669	    (V_FW_FILTER_WR_FRAG(f->fs.val.frag) |
7670		V_FW_FILTER_WR_FRAGM(f->fs.mask.frag) |
7671		V_FW_FILTER_WR_IVLAN_VLD(f->fs.val.vlan_vld) |
7672		V_FW_FILTER_WR_OVLAN_VLD(vnic_vld) |
7673		V_FW_FILTER_WR_IVLAN_VLDM(f->fs.mask.vlan_vld) |
7674		V_FW_FILTER_WR_OVLAN_VLDM(vnic_vld_mask));
7675	fwr->smac_sel = 0;
7676	fwr->rx_chan_rx_rpl_iq = htobe16(V_FW_FILTER_WR_RX_CHAN(0) |
7677	    V_FW_FILTER_WR_RX_RPL_IQ(sc->sge.fwq.abs_id));
7678	fwr->maci_to_matchtypem =
7679	    htobe32(V_FW_FILTER_WR_MACI(f->fs.val.macidx) |
7680		V_FW_FILTER_WR_MACIM(f->fs.mask.macidx) |
7681		V_FW_FILTER_WR_FCOE(f->fs.val.fcoe) |
7682		V_FW_FILTER_WR_FCOEM(f->fs.mask.fcoe) |
7683		V_FW_FILTER_WR_PORT(f->fs.val.iport) |
7684		V_FW_FILTER_WR_PORTM(f->fs.mask.iport) |
7685		V_FW_FILTER_WR_MATCHTYPE(f->fs.val.matchtype) |
7686		V_FW_FILTER_WR_MATCHTYPEM(f->fs.mask.matchtype));
7687	fwr->ptcl = f->fs.val.proto;
7688	fwr->ptclm = f->fs.mask.proto;
7689	fwr->ttyp = f->fs.val.tos;
7690	fwr->ttypm = f->fs.mask.tos;
7691	fwr->ivlan = htobe16(f->fs.val.vlan);
7692	fwr->ivlanm = htobe16(f->fs.mask.vlan);
7693	fwr->ovlan = htobe16(f->fs.val.vnic);
7694	fwr->ovlanm = htobe16(f->fs.mask.vnic);
7695	bcopy(f->fs.val.dip, fwr->lip, sizeof (fwr->lip));
7696	bcopy(f->fs.mask.dip, fwr->lipm, sizeof (fwr->lipm));
7697	bcopy(f->fs.val.sip, fwr->fip, sizeof (fwr->fip));
7698	bcopy(f->fs.mask.sip, fwr->fipm, sizeof (fwr->fipm));
7699	fwr->lp = htobe16(f->fs.val.dport);
7700	fwr->lpm = htobe16(f->fs.mask.dport);
7701	fwr->fp = htobe16(f->fs.val.sport);
7702	fwr->fpm = htobe16(f->fs.mask.sport);
7703	if (f->fs.newsmac)
7704		bcopy(f->fs.smac, fwr->sma, sizeof (fwr->sma));
7705
7706	f->pending = 1;
7707	sc->tids.ftids_in_use++;
7708
7709	commit_wrq_wr(&sc->sge.mgmtq, fwr, &cookie);
7710	return (0);
7711}
7712
7713static int
7714del_filter_wr(struct adapter *sc, int fidx)
7715{
7716	struct filter_entry *f = &sc->tids.ftid_tab[fidx];
7717	struct fw_filter_wr *fwr;
7718	unsigned int ftid;
7719	struct wrq_cookie cookie;
7720
7721	ftid = sc->tids.ftid_base + fidx;
7722
7723	fwr = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*fwr), 16), &cookie);
7724	if (fwr == NULL)
7725		return (ENOMEM);
7726	bzero(fwr, sizeof (*fwr));
7727
7728	t4_mk_filtdelwr(ftid, fwr, sc->sge.fwq.abs_id);
7729
7730	f->pending = 1;
7731	commit_wrq_wr(&sc->sge.mgmtq, fwr, &cookie);
7732	return (0);
7733}
7734
7735int
7736t4_filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
7737{
7738	struct adapter *sc = iq->adapter;
7739	const struct cpl_set_tcb_rpl *rpl = (const void *)(rss + 1);
7740	unsigned int idx = GET_TID(rpl);
7741	unsigned int rc;
7742	struct filter_entry *f;
7743
7744	KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
7745	    rss->opcode));
7746
7747	if (is_ftid(sc, idx)) {
7748
7749		idx -= sc->tids.ftid_base;
7750		f = &sc->tids.ftid_tab[idx];
7751		rc = G_COOKIE(rpl->cookie);
7752
7753		mtx_lock(&sc->tids.ftid_lock);
7754		if (rc == FW_FILTER_WR_FLT_ADDED) {
7755			KASSERT(f->pending, ("%s: filter[%u] isn't pending.",
7756			    __func__, idx));
7757			f->smtidx = (be64toh(rpl->oldval) >> 24) & 0xff;
7758			f->pending = 0;  /* asynchronous setup completed */
7759			f->valid = 1;
7760		} else {
7761			if (rc != FW_FILTER_WR_FLT_DELETED) {
7762				/* Add or delete failed, display an error */
7763				log(LOG_ERR,
7764				    "filter %u setup failed with error %u\n",
7765				    idx, rc);
7766			}
7767
7768			clear_filter(f);
7769			sc->tids.ftids_in_use--;
7770		}
7771		wakeup(&sc->tids.ftid_tab);
7772		mtx_unlock(&sc->tids.ftid_lock);
7773	}
7774
7775	return (0);
7776}
7777
7778static int
7779get_sge_context(struct adapter *sc, struct t4_sge_context *cntxt)
7780{
7781	int rc;
7782
7783	if (cntxt->cid > M_CTXTQID)
7784		return (EINVAL);
7785
7786	if (cntxt->mem_id != CTXT_EGRESS && cntxt->mem_id != CTXT_INGRESS &&
7787	    cntxt->mem_id != CTXT_FLM && cntxt->mem_id != CTXT_CNM)
7788		return (EINVAL);
7789
7790	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ctxt");
7791	if (rc)
7792		return (rc);
7793
7794	if (sc->flags & FW_OK) {
7795		rc = -t4_sge_ctxt_rd(sc, sc->mbox, cntxt->cid, cntxt->mem_id,
7796		    &cntxt->data[0]);
7797		if (rc == 0)
7798			goto done;
7799	}
7800
7801	/*
7802	 * Read via firmware failed or wasn't even attempted.  Read directly via
7803	 * the backdoor.
7804	 */
7805	rc = -t4_sge_ctxt_rd_bd(sc, cntxt->cid, cntxt->mem_id, &cntxt->data[0]);
7806done:
7807	end_synchronized_op(sc, 0);
7808	return (rc);
7809}
7810
7811static int
7812load_fw(struct adapter *sc, struct t4_data *fw)
7813{
7814	int rc;
7815	uint8_t *fw_data;
7816
7817	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldfw");
7818	if (rc)
7819		return (rc);
7820
7821	if (sc->flags & FULL_INIT_DONE) {
7822		rc = EBUSY;
7823		goto done;
7824	}
7825
7826	fw_data = malloc(fw->len, M_CXGBE, M_WAITOK);
7827	if (fw_data == NULL) {
7828		rc = ENOMEM;
7829		goto done;
7830	}
7831
7832	rc = copyin(fw->data, fw_data, fw->len);
7833	if (rc == 0)
7834		rc = -t4_load_fw(sc, fw_data, fw->len);
7835
7836	free(fw_data, M_CXGBE);
7837done:
7838	end_synchronized_op(sc, 0);
7839	return (rc);
7840}
7841
7842static int
7843read_card_mem(struct adapter *sc, int win, struct t4_mem_range *mr)
7844{
7845	uint32_t addr, off, remaining, i, n;
7846	uint32_t *buf, *b;
7847	uint32_t mw_base, mw_aperture;
7848	int rc;
7849	uint8_t *dst;
7850
7851	rc = validate_mem_range(sc, mr->addr, mr->len);
7852	if (rc != 0)
7853		return (rc);
7854
7855	memwin_info(sc, win, &mw_base, &mw_aperture);
7856	buf = b = malloc(min(mr->len, mw_aperture), M_CXGBE, M_WAITOK);
7857	addr = mr->addr;
7858	remaining = mr->len;
7859	dst = (void *)mr->data;
7860
7861	while (remaining) {
7862		off = position_memwin(sc, win, addr);
7863
7864		/* number of bytes that we'll copy in the inner loop */
7865		n = min(remaining, mw_aperture - off);
7866		for (i = 0; i < n; i += 4)
7867			*b++ = t4_read_reg(sc, mw_base + off + i);
7868
7869		rc = copyout(buf, dst, n);
7870		if (rc != 0)
7871			break;
7872
7873		b = buf;
7874		dst += n;
7875		remaining -= n;
7876		addr += n;
7877	}
7878
7879	free(buf, M_CXGBE);
7880	return (rc);
7881}
7882
7883static int
7884read_i2c(struct adapter *sc, struct t4_i2c_data *i2cd)
7885{
7886	int rc;
7887
7888	if (i2cd->len == 0 || i2cd->port_id >= sc->params.nports)
7889		return (EINVAL);
7890
7891	if (i2cd->len > sizeof(i2cd->data))
7892		return (EFBIG);
7893
7894	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4i2crd");
7895	if (rc)
7896		return (rc);
7897	rc = -t4_i2c_rd(sc, sc->mbox, i2cd->port_id, i2cd->dev_addr,
7898	    i2cd->offset, i2cd->len, &i2cd->data[0]);
7899	end_synchronized_op(sc, 0);
7900
7901	return (rc);
7902}
7903
7904static int
7905in_range(int val, int lo, int hi)
7906{
7907
7908	return (val < 0 || (val <= hi && val >= lo));
7909}
7910
7911static int
7912set_sched_class(struct adapter *sc, struct t4_sched_params *p)
7913{
7914	int fw_subcmd, fw_type, rc;
7915
7916	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setsc");
7917	if (rc)
7918		return (rc);
7919
7920	if (!(sc->flags & FULL_INIT_DONE)) {
7921		rc = EAGAIN;
7922		goto done;
7923	}
7924
7925	/*
7926	 * Translate the cxgbetool parameters into T4 firmware parameters.  (The
7927	 * sub-command and type are in common locations.)
7928	 */
7929	if (p->subcmd == SCHED_CLASS_SUBCMD_CONFIG)
7930		fw_subcmd = FW_SCHED_SC_CONFIG;
7931	else if (p->subcmd == SCHED_CLASS_SUBCMD_PARAMS)
7932		fw_subcmd = FW_SCHED_SC_PARAMS;
7933	else {
7934		rc = EINVAL;
7935		goto done;
7936	}
7937	if (p->type == SCHED_CLASS_TYPE_PACKET)
7938		fw_type = FW_SCHED_TYPE_PKTSCHED;
7939	else {
7940		rc = EINVAL;
7941		goto done;
7942	}
7943
7944	if (fw_subcmd == FW_SCHED_SC_CONFIG) {
7945		/* Vet our parameters ..*/
7946		if (p->u.config.minmax < 0) {
7947			rc = EINVAL;
7948			goto done;
7949		}
7950
7951		/* And pass the request to the firmware ...*/
7952		rc = -t4_sched_config(sc, fw_type, p->u.config.minmax, 1);
7953		goto done;
7954	}
7955
7956	if (fw_subcmd == FW_SCHED_SC_PARAMS) {
7957		int fw_level;
7958		int fw_mode;
7959		int fw_rateunit;
7960		int fw_ratemode;
7961
7962		if (p->u.params.level == SCHED_CLASS_LEVEL_CL_RL)
7963			fw_level = FW_SCHED_PARAMS_LEVEL_CL_RL;
7964		else if (p->u.params.level == SCHED_CLASS_LEVEL_CL_WRR)
7965			fw_level = FW_SCHED_PARAMS_LEVEL_CL_WRR;
7966		else if (p->u.params.level == SCHED_CLASS_LEVEL_CH_RL)
7967			fw_level = FW_SCHED_PARAMS_LEVEL_CH_RL;
7968		else {
7969			rc = EINVAL;
7970			goto done;
7971		}
7972
7973		if (p->u.params.mode == SCHED_CLASS_MODE_CLASS)
7974			fw_mode = FW_SCHED_PARAMS_MODE_CLASS;
7975		else if (p->u.params.mode == SCHED_CLASS_MODE_FLOW)
7976			fw_mode = FW_SCHED_PARAMS_MODE_FLOW;
7977		else {
7978			rc = EINVAL;
7979			goto done;
7980		}
7981
7982		if (p->u.params.rateunit == SCHED_CLASS_RATEUNIT_BITS)
7983			fw_rateunit = FW_SCHED_PARAMS_UNIT_BITRATE;
7984		else if (p->u.params.rateunit == SCHED_CLASS_RATEUNIT_PKTS)
7985			fw_rateunit = FW_SCHED_PARAMS_UNIT_PKTRATE;
7986		else {
7987			rc = EINVAL;
7988			goto done;
7989		}
7990
7991		if (p->u.params.ratemode == SCHED_CLASS_RATEMODE_REL)
7992			fw_ratemode = FW_SCHED_PARAMS_RATE_REL;
7993		else if (p->u.params.ratemode == SCHED_CLASS_RATEMODE_ABS)
7994			fw_ratemode = FW_SCHED_PARAMS_RATE_ABS;
7995		else {
7996			rc = EINVAL;
7997			goto done;
7998		}
7999
8000		/* Vet our parameters ... */
8001		if (!in_range(p->u.params.channel, 0, 3) ||
8002		    !in_range(p->u.params.cl, 0, sc->chip_params->nsched_cls) ||
8003		    !in_range(p->u.params.minrate, 0, 10000000) ||
8004		    !in_range(p->u.params.maxrate, 0, 10000000) ||
8005		    !in_range(p->u.params.weight, 0, 100)) {
8006			rc = ERANGE;
8007			goto done;
8008		}
8009
8010		/*
8011		 * Translate any unset parameters into the firmware's
8012		 * nomenclature and/or fail the call if the parameters
8013		 * are required ...
8014		 */
8015		if (p->u.params.rateunit < 0 || p->u.params.ratemode < 0 ||
8016		    p->u.params.channel < 0 || p->u.params.cl < 0) {
8017			rc = EINVAL;
8018			goto done;
8019		}
8020		if (p->u.params.minrate < 0)
8021			p->u.params.minrate = 0;
8022		if (p->u.params.maxrate < 0) {
8023			if (p->u.params.level == SCHED_CLASS_LEVEL_CL_RL ||
8024			    p->u.params.level == SCHED_CLASS_LEVEL_CH_RL) {
8025				rc = EINVAL;
8026				goto done;
8027			} else
8028				p->u.params.maxrate = 0;
8029		}
8030		if (p->u.params.weight < 0) {
8031			if (p->u.params.level == SCHED_CLASS_LEVEL_CL_WRR) {
8032				rc = EINVAL;
8033				goto done;
8034			} else
8035				p->u.params.weight = 0;
8036		}
8037		if (p->u.params.pktsize < 0) {
8038			if (p->u.params.level == SCHED_CLASS_LEVEL_CL_RL ||
8039			    p->u.params.level == SCHED_CLASS_LEVEL_CH_RL) {
8040				rc = EINVAL;
8041				goto done;
8042			} else
8043				p->u.params.pktsize = 0;
8044		}
8045
8046		/* See what the firmware thinks of the request ... */
8047		rc = -t4_sched_params(sc, fw_type, fw_level, fw_mode,
8048		    fw_rateunit, fw_ratemode, p->u.params.channel,
8049		    p->u.params.cl, p->u.params.minrate, p->u.params.maxrate,
8050		    p->u.params.weight, p->u.params.pktsize, 1);
8051		goto done;
8052	}
8053
8054	rc = EINVAL;
8055done:
8056	end_synchronized_op(sc, 0);
8057	return (rc);
8058}
8059
8060static int
8061set_sched_queue(struct adapter *sc, struct t4_sched_queue *p)
8062{
8063	struct port_info *pi = NULL;
8064	struct vi_info *vi;
8065	struct sge_txq *txq;
8066	uint32_t fw_mnem, fw_queue, fw_class;
8067	int i, rc;
8068
8069	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setsq");
8070	if (rc)
8071		return (rc);
8072
8073	if (!(sc->flags & FULL_INIT_DONE)) {
8074		rc = EAGAIN;
8075		goto done;
8076	}
8077
8078	if (p->port >= sc->params.nports) {
8079		rc = EINVAL;
8080		goto done;
8081	}
8082
8083	/* XXX: Only supported for the main VI. */
8084	pi = sc->port[p->port];
8085	vi = &pi->vi[0];
8086	if (!in_range(p->queue, 0, vi->ntxq - 1) || !in_range(p->cl, 0, 7)) {
8087		rc = EINVAL;
8088		goto done;
8089	}
8090
8091	/*
8092	 * Create a template for the FW_PARAMS_CMD mnemonic and value (TX
8093	 * Scheduling Class in this case).
8094	 */
8095	fw_mnem = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
8096	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH));
8097	fw_class = p->cl < 0 ? 0xffffffff : p->cl;
8098
8099	/*
8100	 * If op.queue is non-negative, then we're only changing the scheduling
8101	 * on a single specified TX queue.
8102	 */
8103	if (p->queue >= 0) {
8104		txq = &sc->sge.txq[vi->first_txq + p->queue];
8105		fw_queue = (fw_mnem | V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id));
8106		rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue,
8107		    &fw_class);
8108		goto done;
8109	}
8110
8111	/*
8112	 * Change the scheduling on all the TX queues for the
8113	 * interface.
8114	 */
8115	for_each_txq(vi, i, txq) {
8116		fw_queue = (fw_mnem | V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id));
8117		rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue,
8118		    &fw_class);
8119		if (rc)
8120			goto done;
8121	}
8122
8123	rc = 0;
8124done:
8125	end_synchronized_op(sc, 0);
8126	return (rc);
8127}
8128
8129int
8130t4_os_find_pci_capability(struct adapter *sc, int cap)
8131{
8132	int i;
8133
8134	return (pci_find_cap(sc->dev, cap, &i) == 0 ? i : 0);
8135}
8136
8137int
8138t4_os_pci_save_state(struct adapter *sc)
8139{
8140	device_t dev;
8141	struct pci_devinfo *dinfo;
8142
8143	dev = sc->dev;
8144	dinfo = device_get_ivars(dev);
8145
8146	pci_cfg_save(dev, dinfo, 0);
8147	return (0);
8148}
8149
8150int
8151t4_os_pci_restore_state(struct adapter *sc)
8152{
8153	device_t dev;
8154	struct pci_devinfo *dinfo;
8155
8156	dev = sc->dev;
8157	dinfo = device_get_ivars(dev);
8158
8159	pci_cfg_restore(dev, dinfo);
8160	return (0);
8161}
8162
8163void
8164t4_os_portmod_changed(const struct adapter *sc, int idx)
8165{
8166	struct port_info *pi = sc->port[idx];
8167	struct vi_info *vi;
8168	struct ifnet *ifp;
8169	int v;
8170	static const char *mod_str[] = {
8171		NULL, "LR", "SR", "ER", "TWINAX", "active TWINAX", "LRM"
8172	};
8173
8174	for_each_vi(pi, v, vi) {
8175		build_medialist(pi, &vi->media);
8176	}
8177
8178	ifp = pi->vi[0].ifp;
8179	if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
8180		if_printf(ifp, "transceiver unplugged.\n");
8181	else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
8182		if_printf(ifp, "unknown transceiver inserted.\n");
8183	else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
8184		if_printf(ifp, "unsupported transceiver inserted.\n");
8185	else if (pi->mod_type > 0 && pi->mod_type < nitems(mod_str)) {
8186		if_printf(ifp, "%s transceiver inserted.\n",
8187		    mod_str[pi->mod_type]);
8188	} else {
8189		if_printf(ifp, "transceiver (type %d) inserted.\n",
8190		    pi->mod_type);
8191	}
8192}
8193
8194void
8195t4_os_link_changed(struct adapter *sc, int idx, int link_stat, int reason)
8196{
8197	struct port_info *pi = sc->port[idx];
8198	struct vi_info *vi;
8199	struct ifnet *ifp;
8200	int v;
8201
8202	if (link_stat)
8203		pi->linkdnrc = -1;
8204	else {
8205		if (reason >= 0)
8206			pi->linkdnrc = reason;
8207	}
8208	for_each_vi(pi, v, vi) {
8209		ifp = vi->ifp;
8210		if (ifp == NULL)
8211			continue;
8212
8213		if (link_stat) {
8214			ifp->if_baudrate = IF_Mbps(pi->link_cfg.speed);
8215			if_link_state_change(ifp, LINK_STATE_UP);
8216		} else {
8217			if_link_state_change(ifp, LINK_STATE_DOWN);
8218		}
8219	}
8220}
8221
8222void
8223t4_iterate(void (*func)(struct adapter *, void *), void *arg)
8224{
8225	struct adapter *sc;
8226
8227	sx_slock(&t4_list_lock);
8228	SLIST_FOREACH(sc, &t4_list, link) {
8229		/*
8230		 * func should not make any assumptions about what state sc is
8231		 * in - the only guarantee is that sc->sc_lock is a valid lock.
8232		 */
8233		func(sc, arg);
8234	}
8235	sx_sunlock(&t4_list_lock);
8236}
8237
8238static int
8239t4_open(struct cdev *dev, int flags, int type, struct thread *td)
8240{
8241       return (0);
8242}
8243
8244static int
8245t4_close(struct cdev *dev, int flags, int type, struct thread *td)
8246{
8247       return (0);
8248}
8249
8250static int
8251t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag,
8252    struct thread *td)
8253{
8254	int rc;
8255	struct adapter *sc = dev->si_drv1;
8256
8257	rc = priv_check(td, PRIV_DRIVER);
8258	if (rc != 0)
8259		return (rc);
8260
8261	switch (cmd) {
8262	case CHELSIO_T4_GETREG: {
8263		struct t4_reg *edata = (struct t4_reg *)data;
8264
8265		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
8266			return (EFAULT);
8267
8268		if (edata->size == 4)
8269			edata->val = t4_read_reg(sc, edata->addr);
8270		else if (edata->size == 8)
8271			edata->val = t4_read_reg64(sc, edata->addr);
8272		else
8273			return (EINVAL);
8274
8275		break;
8276	}
8277	case CHELSIO_T4_SETREG: {
8278		struct t4_reg *edata = (struct t4_reg *)data;
8279
8280		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
8281			return (EFAULT);
8282
8283		if (edata->size == 4) {
8284			if (edata->val & 0xffffffff00000000)
8285				return (EINVAL);
8286			t4_write_reg(sc, edata->addr, (uint32_t) edata->val);
8287		} else if (edata->size == 8)
8288			t4_write_reg64(sc, edata->addr, edata->val);
8289		else
8290			return (EINVAL);
8291		break;
8292	}
8293	case CHELSIO_T4_REGDUMP: {
8294		struct t4_regdump *regs = (struct t4_regdump *)data;
8295		int reglen = is_t4(sc) ? T4_REGDUMP_SIZE : T5_REGDUMP_SIZE;
8296		uint8_t *buf;
8297
8298		if (regs->len < reglen) {
8299			regs->len = reglen; /* hint to the caller */
8300			return (ENOBUFS);
8301		}
8302
8303		regs->len = reglen;
8304		buf = malloc(reglen, M_CXGBE, M_WAITOK | M_ZERO);
8305		get_regs(sc, regs, buf);
8306		rc = copyout(buf, regs->data, reglen);
8307		free(buf, M_CXGBE);
8308		break;
8309	}
8310	case CHELSIO_T4_GET_FILTER_MODE:
8311		rc = get_filter_mode(sc, (uint32_t *)data);
8312		break;
8313	case CHELSIO_T4_SET_FILTER_MODE:
8314		rc = set_filter_mode(sc, *(uint32_t *)data);
8315		break;
8316	case CHELSIO_T4_GET_FILTER:
8317		rc = get_filter(sc, (struct t4_filter *)data);
8318		break;
8319	case CHELSIO_T4_SET_FILTER:
8320		rc = set_filter(sc, (struct t4_filter *)data);
8321		break;
8322	case CHELSIO_T4_DEL_FILTER:
8323		rc = del_filter(sc, (struct t4_filter *)data);
8324		break;
8325	case CHELSIO_T4_GET_SGE_CONTEXT:
8326		rc = get_sge_context(sc, (struct t4_sge_context *)data);
8327		break;
8328	case CHELSIO_T4_LOAD_FW:
8329		rc = load_fw(sc, (struct t4_data *)data);
8330		break;
8331	case CHELSIO_T4_GET_MEM:
8332		rc = read_card_mem(sc, 2, (struct t4_mem_range *)data);
8333		break;
8334	case CHELSIO_T4_GET_I2C:
8335		rc = read_i2c(sc, (struct t4_i2c_data *)data);
8336		break;
8337	case CHELSIO_T4_CLEAR_STATS: {
8338		int i, v;
8339		u_int port_id = *(uint32_t *)data;
8340		struct port_info *pi;
8341		struct vi_info *vi;
8342
8343		if (port_id >= sc->params.nports)
8344			return (EINVAL);
8345		pi = sc->port[port_id];
8346
8347		/* MAC stats */
8348		t4_clr_port_stats(sc, pi->tx_chan);
8349		pi->tx_parse_error = 0;
8350		mtx_lock(&sc->regwin_lock);
8351		for_each_vi(pi, v, vi) {
8352			if (vi->flags & VI_INIT_DONE)
8353				t4_clr_vi_stats(sc, vi->viid);
8354		}
8355		mtx_unlock(&sc->regwin_lock);
8356
8357		/*
8358		 * Since this command accepts a port, clear stats for
8359		 * all VIs on this port.
8360		 */
8361		for_each_vi(pi, v, vi) {
8362			if (vi->flags & VI_INIT_DONE) {
8363				struct sge_rxq *rxq;
8364				struct sge_txq *txq;
8365				struct sge_wrq *wrq;
8366
8367				for_each_rxq(vi, i, rxq) {
8368#if defined(INET) || defined(INET6)
8369					rxq->lro.lro_queued = 0;
8370					rxq->lro.lro_flushed = 0;
8371#endif
8372					rxq->rxcsum = 0;
8373					rxq->vlan_extraction = 0;
8374				}
8375
8376				for_each_txq(vi, i, txq) {
8377					txq->txcsum = 0;
8378					txq->tso_wrs = 0;
8379					txq->vlan_insertion = 0;
8380					txq->imm_wrs = 0;
8381					txq->sgl_wrs = 0;
8382					txq->txpkt_wrs = 0;
8383					txq->txpkts0_wrs = 0;
8384					txq->txpkts1_wrs = 0;
8385					txq->txpkts0_pkts = 0;
8386					txq->txpkts1_pkts = 0;
8387					mp_ring_reset_stats(txq->r);
8388				}
8389
8390#ifdef TCP_OFFLOAD
8391				/* nothing to clear for each ofld_rxq */
8392
8393				for_each_ofld_txq(vi, i, wrq) {
8394					wrq->tx_wrs_direct = 0;
8395					wrq->tx_wrs_copied = 0;
8396				}
8397#endif
8398
8399				if (IS_MAIN_VI(vi)) {
8400					wrq = &sc->sge.ctrlq[pi->port_id];
8401					wrq->tx_wrs_direct = 0;
8402					wrq->tx_wrs_copied = 0;
8403				}
8404			}
8405		}
8406		break;
8407	}
8408	case CHELSIO_T4_SCHED_CLASS:
8409		rc = set_sched_class(sc, (struct t4_sched_params *)data);
8410		break;
8411	case CHELSIO_T4_SCHED_QUEUE:
8412		rc = set_sched_queue(sc, (struct t4_sched_queue *)data);
8413		break;
8414	case CHELSIO_T4_GET_TRACER:
8415		rc = t4_get_tracer(sc, (struct t4_tracer *)data);
8416		break;
8417	case CHELSIO_T4_SET_TRACER:
8418		rc = t4_set_tracer(sc, (struct t4_tracer *)data);
8419		break;
8420	default:
8421		rc = EINVAL;
8422	}
8423
8424	return (rc);
8425}
8426
8427void
8428t4_db_full(struct adapter *sc)
8429{
8430
8431	CXGBE_UNIMPLEMENTED(__func__);
8432}
8433
8434void
8435t4_db_dropped(struct adapter *sc)
8436{
8437
8438	CXGBE_UNIMPLEMENTED(__func__);
8439}
8440
8441#ifdef TCP_OFFLOAD
8442void
8443t4_iscsi_init(struct ifnet *ifp, unsigned int tag_mask,
8444    const unsigned int *pgsz_order)
8445{
8446	struct vi_info *vi = ifp->if_softc;
8447	struct adapter *sc = vi->pi->adapter;
8448
8449	t4_write_reg(sc, A_ULP_RX_ISCSI_TAGMASK, tag_mask);
8450	t4_write_reg(sc, A_ULP_RX_ISCSI_PSZ, V_HPZ0(pgsz_order[0]) |
8451		V_HPZ1(pgsz_order[1]) | V_HPZ2(pgsz_order[2]) |
8452		V_HPZ3(pgsz_order[3]));
8453}
8454
8455static int
8456toe_capability(struct vi_info *vi, int enable)
8457{
8458	int rc;
8459	struct port_info *pi = vi->pi;
8460	struct adapter *sc = pi->adapter;
8461
8462	ASSERT_SYNCHRONIZED_OP(sc);
8463
8464	if (!is_offload(sc))
8465		return (ENODEV);
8466
8467	if (enable) {
8468		if ((vi->ifp->if_capenable & IFCAP_TOE) != 0) {
8469			/* TOE is already enabled. */
8470			return (0);
8471		}
8472
8473		/*
8474		 * We need the port's queues around so that we're able to send
8475		 * and receive CPLs to/from the TOE even if the ifnet for this
8476		 * port has never been UP'd administratively.
8477		 */
8478		if (!(vi->flags & VI_INIT_DONE)) {
8479			rc = vi_full_init(vi);
8480			if (rc)
8481				return (rc);
8482		}
8483		if (!(pi->vi[0].flags & VI_INIT_DONE)) {
8484			rc = vi_full_init(&pi->vi[0]);
8485			if (rc)
8486				return (rc);
8487		}
8488
8489		if (isset(&sc->offload_map, pi->port_id)) {
8490			/* TOE is enabled on another VI of this port. */
8491			pi->uld_vis++;
8492			return (0);
8493		}
8494
8495		if (!uld_active(sc, ULD_TOM)) {
8496			rc = t4_activate_uld(sc, ULD_TOM);
8497			if (rc == EAGAIN) {
8498				log(LOG_WARNING,
8499				    "You must kldload t4_tom.ko before trying "
8500				    "to enable TOE on a cxgbe interface.\n");
8501			}
8502			if (rc != 0)
8503				return (rc);
8504			KASSERT(sc->tom_softc != NULL,
8505			    ("%s: TOM activated but softc NULL", __func__));
8506			KASSERT(uld_active(sc, ULD_TOM),
8507			    ("%s: TOM activated but flag not set", __func__));
8508		}
8509
8510		/* Activate iWARP and iSCSI too, if the modules are loaded. */
8511		if (!uld_active(sc, ULD_IWARP))
8512			(void) t4_activate_uld(sc, ULD_IWARP);
8513		if (!uld_active(sc, ULD_ISCSI))
8514			(void) t4_activate_uld(sc, ULD_ISCSI);
8515
8516		pi->uld_vis++;
8517		setbit(&sc->offload_map, pi->port_id);
8518	} else {
8519		pi->uld_vis--;
8520
8521		if (!isset(&sc->offload_map, pi->port_id) || pi->uld_vis > 0)
8522			return (0);
8523
8524		KASSERT(uld_active(sc, ULD_TOM),
8525		    ("%s: TOM never initialized?", __func__));
8526		clrbit(&sc->offload_map, pi->port_id);
8527	}
8528
8529	return (0);
8530}
8531
8532/*
8533 * Add an upper layer driver to the global list.
8534 */
8535int
8536t4_register_uld(struct uld_info *ui)
8537{
8538	int rc = 0;
8539	struct uld_info *u;
8540
8541	sx_xlock(&t4_uld_list_lock);
8542	SLIST_FOREACH(u, &t4_uld_list, link) {
8543	    if (u->uld_id == ui->uld_id) {
8544		    rc = EEXIST;
8545		    goto done;
8546	    }
8547	}
8548
8549	SLIST_INSERT_HEAD(&t4_uld_list, ui, link);
8550	ui->refcount = 0;
8551done:
8552	sx_xunlock(&t4_uld_list_lock);
8553	return (rc);
8554}
8555
8556int
8557t4_unregister_uld(struct uld_info *ui)
8558{
8559	int rc = EINVAL;
8560	struct uld_info *u;
8561
8562	sx_xlock(&t4_uld_list_lock);
8563
8564	SLIST_FOREACH(u, &t4_uld_list, link) {
8565	    if (u == ui) {
8566		    if (ui->refcount > 0) {
8567			    rc = EBUSY;
8568			    goto done;
8569		    }
8570
8571		    SLIST_REMOVE(&t4_uld_list, ui, uld_info, link);
8572		    rc = 0;
8573		    goto done;
8574	    }
8575	}
8576done:
8577	sx_xunlock(&t4_uld_list_lock);
8578	return (rc);
8579}
8580
8581int
8582t4_activate_uld(struct adapter *sc, int id)
8583{
8584	int rc;
8585	struct uld_info *ui;
8586
8587	ASSERT_SYNCHRONIZED_OP(sc);
8588
8589	if (id < 0 || id > ULD_MAX)
8590		return (EINVAL);
8591	rc = EAGAIN;	/* kldoad the module with this ULD and try again. */
8592
8593	sx_slock(&t4_uld_list_lock);
8594
8595	SLIST_FOREACH(ui, &t4_uld_list, link) {
8596		if (ui->uld_id == id) {
8597			if (!(sc->flags & FULL_INIT_DONE)) {
8598				rc = adapter_full_init(sc);
8599				if (rc != 0)
8600					break;
8601			}
8602
8603			rc = ui->activate(sc);
8604			if (rc == 0) {
8605				setbit(&sc->active_ulds, id);
8606				ui->refcount++;
8607			}
8608			break;
8609		}
8610	}
8611
8612	sx_sunlock(&t4_uld_list_lock);
8613
8614	return (rc);
8615}
8616
8617int
8618t4_deactivate_uld(struct adapter *sc, int id)
8619{
8620	int rc;
8621	struct uld_info *ui;
8622
8623	ASSERT_SYNCHRONIZED_OP(sc);
8624
8625	if (id < 0 || id > ULD_MAX)
8626		return (EINVAL);
8627	rc = ENXIO;
8628
8629	sx_slock(&t4_uld_list_lock);
8630
8631	SLIST_FOREACH(ui, &t4_uld_list, link) {
8632		if (ui->uld_id == id) {
8633			rc = ui->deactivate(sc);
8634			if (rc == 0) {
8635				clrbit(&sc->active_ulds, id);
8636				ui->refcount--;
8637			}
8638			break;
8639		}
8640	}
8641
8642	sx_sunlock(&t4_uld_list_lock);
8643
8644	return (rc);
8645}
8646
8647int
8648uld_active(struct adapter *sc, int uld_id)
8649{
8650
8651	MPASS(uld_id >= 0 && uld_id <= ULD_MAX);
8652
8653	return (isset(&sc->active_ulds, uld_id));
8654}
8655#endif
8656
8657/*
8658 * Come up with reasonable defaults for some of the tunables, provided they're
8659 * not set by the user (in which case we'll use the values as is).
8660 */
8661static void
8662tweak_tunables(void)
8663{
8664	int nc = mp_ncpus;	/* our snapshot of the number of CPUs */
8665
8666	if (t4_ntxq10g < 1) {
8667#ifdef RSS
8668		t4_ntxq10g = rss_getnumbuckets();
8669#else
8670		t4_ntxq10g = min(nc, NTXQ_10G);
8671#endif
8672	}
8673
8674	if (t4_ntxq1g < 1) {
8675#ifdef RSS
8676		/* XXX: way too many for 1GbE? */
8677		t4_ntxq1g = rss_getnumbuckets();
8678#else
8679		t4_ntxq1g = min(nc, NTXQ_1G);
8680#endif
8681	}
8682
8683	if (t4_ntxq_vi < 1)
8684		t4_ntxq_vi = min(nc, NTXQ_VI);
8685
8686	if (t4_nrxq10g < 1) {
8687#ifdef RSS
8688		t4_nrxq10g = rss_getnumbuckets();
8689#else
8690		t4_nrxq10g = min(nc, NRXQ_10G);
8691#endif
8692	}
8693
8694	if (t4_nrxq1g < 1) {
8695#ifdef RSS
8696		/* XXX: way too many for 1GbE? */
8697		t4_nrxq1g = rss_getnumbuckets();
8698#else
8699		t4_nrxq1g = min(nc, NRXQ_1G);
8700#endif
8701	}
8702
8703	if (t4_nrxq_vi < 1)
8704		t4_nrxq_vi = min(nc, NRXQ_VI);
8705
8706#ifdef TCP_OFFLOAD
8707	if (t4_nofldtxq10g < 1)
8708		t4_nofldtxq10g = min(nc, NOFLDTXQ_10G);
8709
8710	if (t4_nofldtxq1g < 1)
8711		t4_nofldtxq1g = min(nc, NOFLDTXQ_1G);
8712
8713	if (t4_nofldtxq_vi < 1)
8714		t4_nofldtxq_vi = min(nc, NOFLDTXQ_VI);
8715
8716	if (t4_nofldrxq10g < 1)
8717		t4_nofldrxq10g = min(nc, NOFLDRXQ_10G);
8718
8719	if (t4_nofldrxq1g < 1)
8720		t4_nofldrxq1g = min(nc, NOFLDRXQ_1G);
8721
8722	if (t4_nofldrxq_vi < 1)
8723		t4_nofldrxq_vi = min(nc, NOFLDRXQ_VI);
8724
8725	if (t4_toecaps_allowed == -1)
8726		t4_toecaps_allowed = FW_CAPS_CONFIG_TOE;
8727#else
8728	if (t4_toecaps_allowed == -1)
8729		t4_toecaps_allowed = 0;
8730#endif
8731
8732#ifdef DEV_NETMAP
8733	if (t4_nnmtxq_vi < 1)
8734		t4_nnmtxq_vi = min(nc, NNMTXQ_VI);
8735
8736	if (t4_nnmrxq_vi < 1)
8737		t4_nnmrxq_vi = min(nc, NNMRXQ_VI);
8738#endif
8739
8740	if (t4_tmr_idx_10g < 0 || t4_tmr_idx_10g >= SGE_NTIMERS)
8741		t4_tmr_idx_10g = TMR_IDX_10G;
8742
8743	if (t4_pktc_idx_10g < -1 || t4_pktc_idx_10g >= SGE_NCOUNTERS)
8744		t4_pktc_idx_10g = PKTC_IDX_10G;
8745
8746	if (t4_tmr_idx_1g < 0 || t4_tmr_idx_1g >= SGE_NTIMERS)
8747		t4_tmr_idx_1g = TMR_IDX_1G;
8748
8749	if (t4_pktc_idx_1g < -1 || t4_pktc_idx_1g >= SGE_NCOUNTERS)
8750		t4_pktc_idx_1g = PKTC_IDX_1G;
8751
8752	if (t4_qsize_txq < 128)
8753		t4_qsize_txq = 128;
8754
8755	if (t4_qsize_rxq < 128)
8756		t4_qsize_rxq = 128;
8757	while (t4_qsize_rxq & 7)
8758		t4_qsize_rxq++;
8759
8760	t4_intr_types &= INTR_MSIX | INTR_MSI | INTR_INTX;
8761}
8762
8763static struct sx mlu;	/* mod load unload */
8764SX_SYSINIT(cxgbe_mlu, &mlu, "cxgbe mod load/unload");
8765
8766static int
8767mod_event(module_t mod, int cmd, void *arg)
8768{
8769	int rc = 0;
8770	static int loaded = 0;
8771
8772	switch (cmd) {
8773	case MOD_LOAD:
8774		sx_xlock(&mlu);
8775		if (loaded++ == 0) {
8776			t4_sge_modload();
8777			sx_init(&t4_list_lock, "T4/T5 adapters");
8778			SLIST_INIT(&t4_list);
8779#ifdef TCP_OFFLOAD
8780			sx_init(&t4_uld_list_lock, "T4/T5 ULDs");
8781			SLIST_INIT(&t4_uld_list);
8782#endif
8783			t4_tracer_modload();
8784			tweak_tunables();
8785		}
8786		sx_xunlock(&mlu);
8787		break;
8788
8789	case MOD_UNLOAD:
8790		sx_xlock(&mlu);
8791		if (--loaded == 0) {
8792			int tries;
8793
8794			sx_slock(&t4_list_lock);
8795			if (!SLIST_EMPTY(&t4_list)) {
8796				rc = EBUSY;
8797				sx_sunlock(&t4_list_lock);
8798				goto done_unload;
8799			}
8800#ifdef TCP_OFFLOAD
8801			sx_slock(&t4_uld_list_lock);
8802			if (!SLIST_EMPTY(&t4_uld_list)) {
8803				rc = EBUSY;
8804				sx_sunlock(&t4_uld_list_lock);
8805				sx_sunlock(&t4_list_lock);
8806				goto done_unload;
8807			}
8808#endif
8809			tries = 0;
8810			while (tries++ < 5 && t4_sge_extfree_refs() != 0) {
8811				uprintf("%ju clusters with custom free routine "
8812				    "still is use.\n", t4_sge_extfree_refs());
8813				pause("t4unload", 2 * hz);
8814			}
8815#ifdef TCP_OFFLOAD
8816			sx_sunlock(&t4_uld_list_lock);
8817#endif
8818			sx_sunlock(&t4_list_lock);
8819
8820			if (t4_sge_extfree_refs() == 0) {
8821				t4_tracer_modunload();
8822#ifdef TCP_OFFLOAD
8823				sx_destroy(&t4_uld_list_lock);
8824#endif
8825				sx_destroy(&t4_list_lock);
8826				t4_sge_modunload();
8827				loaded = 0;
8828			} else {
8829				rc = EBUSY;
8830				loaded++;	/* undo earlier decrement */
8831			}
8832		}
8833done_unload:
8834		sx_xunlock(&mlu);
8835		break;
8836	}
8837
8838	return (rc);
8839}
8840
8841static devclass_t t4_devclass, t5_devclass;
8842static devclass_t cxgbe_devclass, cxl_devclass;
8843static devclass_t vcxgbe_devclass, vcxl_devclass;
8844
8845DRIVER_MODULE(t4nex, pci, t4_driver, t4_devclass, mod_event, 0);
8846MODULE_VERSION(t4nex, 1);
8847MODULE_DEPEND(t4nex, firmware, 1, 1, 1);
8848
8849DRIVER_MODULE(t5nex, pci, t5_driver, t5_devclass, mod_event, 0);
8850MODULE_VERSION(t5nex, 1);
8851MODULE_DEPEND(t5nex, firmware, 1, 1, 1);
8852
8853DRIVER_MODULE(cxgbe, t4nex, cxgbe_driver, cxgbe_devclass, 0, 0);
8854MODULE_VERSION(cxgbe, 1);
8855
8856DRIVER_MODULE(cxl, t5nex, cxl_driver, cxl_devclass, 0, 0);
8857MODULE_VERSION(cxl, 1);
8858
8859DRIVER_MODULE(vcxgbe, cxgbe, vcxgbe_driver, vcxgbe_devclass, 0, 0);
8860MODULE_VERSION(vcxgbe, 1);
8861
8862DRIVER_MODULE(vcxl, cxl, vcxl_driver, vcxl_devclass, 0, 0);
8863MODULE_VERSION(vcxl, 1);
8864