1/* SPDX-License-Identifier: BSD-3-Clause */
2/*  Copyright (c) 2024, Intel Corporation
3 *  All rights reserved.
4 *
5 *  Redistribution and use in source and binary forms, with or without
6 *  modification, are permitted provided that the following conditions are met:
7 *
8 *   1. Redistributions of source code must retain the above copyright notice,
9 *      this list of conditions and the following disclaimer.
10 *
11 *   2. Redistributions in binary form must reproduce the above copyright
12 *      notice, this list of conditions and the following disclaimer in the
13 *      documentation and/or other materials provided with the distribution.
14 *
15 *   3. Neither the name of the Intel Corporation nor the names of its
16 *      contributors may be used to endorse or promote products derived from
17 *      this software without specific prior written permission.
18 *
19 *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 *  POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/**
33 * @file ice_lib.c
34 * @brief Generic device setup and sysctl functions
35 *
36 * Library of generic device functions not specific to the networking stack.
37 *
38 * This includes hardware initialization functions, as well as handlers for
39 * many of the device sysctls used to probe driver status or tune specific
40 * behaviors.
41 */
42
43#include "ice_lib.h"
44#include "ice_iflib.h"
45#include <dev/pci/pcivar.h>
46#include <dev/pci/pcireg.h>
47#include <machine/resource.h>
48#include <net/if_dl.h>
49#include <sys/firmware.h>
50#include <sys/priv.h>
51#include <sys/limits.h>
52
53/**
54 * @var M_ICE
55 * @brief main ice driver allocation type
56 *
57 * malloc(9) allocation type used by the majority of memory allocations in the
58 * ice driver.
59 */
60MALLOC_DEFINE(M_ICE, "ice", "Intel(R) 100Gb Network Driver lib allocations");
61
62/*
63 * Helper function prototypes
64 */
65static int ice_get_next_vsi(struct ice_vsi **all_vsi, int size);
66static void ice_set_default_vsi_ctx(struct ice_vsi_ctx *ctx);
67static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctx, enum ice_vsi_type type);
68static int ice_setup_vsi_qmap(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx);
69static int ice_setup_tx_ctx(struct ice_tx_queue *txq,
70			    struct ice_tlan_ctx *tlan_ctx, u16 pf_q);
71static int ice_setup_rx_ctx(struct ice_rx_queue *rxq);
72static int ice_is_rxq_ready(struct ice_hw *hw, int pf_q, u32 *reg);
73static void ice_free_fltr_list(struct ice_list_head *list);
74static int ice_add_mac_to_list(struct ice_vsi *vsi, struct ice_list_head *list,
75			       const u8 *addr, enum ice_sw_fwd_act_type action);
76static void ice_check_ctrlq_errors(struct ice_softc *sc, const char *qname,
77				   struct ice_ctl_q_info *cq);
78static void ice_process_link_event(struct ice_softc *sc, struct ice_rq_event_info *e);
79static void ice_process_ctrlq_event(struct ice_softc *sc, const char *qname,
80				    struct ice_rq_event_info *event);
81static void ice_nvm_version_str(struct ice_hw *hw, struct sbuf *buf);
82static void ice_active_pkg_version_str(struct ice_hw *hw, struct sbuf *buf);
83static void ice_os_pkg_version_str(struct ice_hw *hw, struct sbuf *buf);
84static bool ice_filter_is_mcast(struct ice_vsi *vsi, struct ice_fltr_info *info);
85static u_int ice_sync_one_mcast_filter(void *p, struct sockaddr_dl *sdl, u_int errors);
86static void ice_add_debug_tunables(struct ice_softc *sc);
87static void ice_add_debug_sysctls(struct ice_softc *sc);
88static void ice_vsi_set_rss_params(struct ice_vsi *vsi);
89static void ice_get_default_rss_key(u8 *seed);
90static int  ice_set_rss_key(struct ice_vsi *vsi);
91static int  ice_set_rss_lut(struct ice_vsi *vsi);
92static void ice_set_rss_flow_flds(struct ice_vsi *vsi);
93static void ice_clean_vsi_rss_cfg(struct ice_vsi *vsi);
94static const char *ice_aq_speed_to_str(struct ice_port_info *pi);
95static const char *ice_requested_fec_mode(struct ice_port_info *pi);
96static const char *ice_negotiated_fec_mode(struct ice_port_info *pi);
97static const char *ice_autoneg_mode(struct ice_port_info *pi);
98static const char *ice_flowcontrol_mode(struct ice_port_info *pi);
99static void ice_print_bus_link_data(device_t dev, struct ice_hw *hw);
100static void ice_set_pci_link_status_data(struct ice_hw *hw, u16 link_status);
101static uint8_t ice_pcie_bandwidth_check(struct ice_softc *sc);
102static uint64_t ice_pcie_bus_speed_to_rate(enum ice_pcie_bus_speed speed);
103static int ice_pcie_lnk_width_to_int(enum ice_pcie_link_width width);
104static uint64_t ice_phy_types_to_max_rate(struct ice_port_info *pi);
105static void ice_add_sysctls_sw_stats(struct ice_vsi *vsi,
106				     struct sysctl_ctx_list *ctx,
107				     struct sysctl_oid *parent);
108static void
109ice_add_sysctls_mac_pfc_one_stat(struct sysctl_ctx_list *ctx,
110				 struct sysctl_oid_list *parent_list,
111				 u64* pfc_stat_location,
112				 const char *node_name,
113				 const char *descr);
114static void ice_add_sysctls_mac_pfc_stats(struct sysctl_ctx_list *ctx,
115					  struct sysctl_oid *parent,
116					  struct ice_hw_port_stats *stats);
117static void ice_setup_vsi_common(struct ice_softc *sc, struct ice_vsi *vsi,
118				 enum ice_vsi_type type, int idx,
119				 bool dynamic);
120static void ice_handle_mib_change_event(struct ice_softc *sc,
121				 struct ice_rq_event_info *event);
122static void
123ice_handle_lan_overflow_event(struct ice_softc *sc,
124			      struct ice_rq_event_info *event);
125static int ice_add_ethertype_to_list(struct ice_vsi *vsi,
126				     struct ice_list_head *list,
127				     u16 ethertype, u16 direction,
128				     enum ice_sw_fwd_act_type action);
129static void ice_del_rx_lldp_filter(struct ice_softc *sc);
130static u16 ice_aq_phy_types_to_link_speeds(u64 phy_type_low,
131					   u64 phy_type_high);
132struct ice_phy_data;
133static int
134ice_intersect_phy_types_and_speeds(struct ice_softc *sc,
135				   struct ice_phy_data *phy_data);
136static int
137ice_apply_saved_phy_req_to_cfg(struct ice_softc *sc,
138			       struct ice_aqc_set_phy_cfg_data *cfg);
139static int
140ice_apply_saved_fec_req_to_cfg(struct ice_softc *sc,
141			       struct ice_aqc_set_phy_cfg_data *cfg);
142static void
143ice_apply_saved_fc_req_to_cfg(struct ice_port_info *pi,
144			      struct ice_aqc_set_phy_cfg_data *cfg);
145static void
146ice_print_ldo_tlv(struct ice_softc *sc,
147		  struct ice_link_default_override_tlv *tlv);
148static void
149ice_sysctl_speeds_to_aq_phy_types(u16 sysctl_speeds, u64 *phy_type_low,
150				  u64 *phy_type_high);
151static u16 ice_apply_supported_speed_filter(u16 report_speeds, u8 mod_type);
152static void
153ice_handle_health_status_event(struct ice_softc *sc,
154			       struct ice_rq_event_info *event);
155static void
156ice_print_health_status_string(device_t dev,
157			       struct ice_aqc_health_status_elem *elem);
158static void
159ice_debug_print_mib_change_event(struct ice_softc *sc,
160				 struct ice_rq_event_info *event);
161static bool ice_check_ets_bw(u8 *table);
162static u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg);
163static bool
164ice_dcb_needs_reconfig(struct ice_softc *sc, struct ice_dcbx_cfg *old_cfg,
165		       struct ice_dcbx_cfg *new_cfg);
166static void ice_dcb_recfg(struct ice_softc *sc);
167static u8 ice_dcb_tc_contig(u8 tc_map);
168static int ice_ets_str_to_tbl(const char *str, u8 *table, u8 limit);
169static int ice_pf_vsi_cfg_tc(struct ice_softc *sc, u8 tc_map);
170static void ice_sbuf_print_ets_cfg(struct sbuf *sbuf, const char *name,
171				   struct ice_dcb_ets_cfg *ets);
172static void ice_stop_pf_vsi(struct ice_softc *sc);
173static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt);
174static int ice_config_pfc(struct ice_softc *sc, u8 new_mode);
175void
176ice_add_dscp2tc_map_sysctls(struct ice_softc *sc,
177			    struct sysctl_ctx_list *ctx,
178			    struct sysctl_oid_list *ctx_list);
179static void ice_set_default_local_mib_settings(struct ice_softc *sc);
180static bool ice_dscp_is_mapped(struct ice_dcbx_cfg *dcbcfg);
181static void ice_start_dcbx_agent(struct ice_softc *sc);
182static u16 ice_fw_debug_dump_print_cluster(struct ice_softc *sc,
183					   struct sbuf *sbuf, u16 cluster_id);
184static void ice_remove_vsi_mirroring(struct ice_vsi *vsi);
185
186static int ice_module_init(void);
187static int ice_module_exit(void);
188
189/*
190 * package version comparison functions
191 */
192static bool pkg_ver_empty(struct ice_pkg_ver *pkg_ver, u8 *pkg_name);
193static int pkg_ver_compatible(struct ice_pkg_ver *pkg_ver);
194
195/*
196 * dynamic sysctl handlers
197 */
198static int ice_sysctl_show_fw(SYSCTL_HANDLER_ARGS);
199static int ice_sysctl_pkg_version(SYSCTL_HANDLER_ARGS);
200static int ice_sysctl_os_pkg_version(SYSCTL_HANDLER_ARGS);
201static int ice_sysctl_dump_mac_filters(SYSCTL_HANDLER_ARGS);
202static int ice_sysctl_dump_vlan_filters(SYSCTL_HANDLER_ARGS);
203static int ice_sysctl_dump_ethertype_filters(SYSCTL_HANDLER_ARGS);
204static int ice_sysctl_dump_ethertype_mac_filters(SYSCTL_HANDLER_ARGS);
205static int ice_sysctl_current_speed(SYSCTL_HANDLER_ARGS);
206static int ice_sysctl_request_reset(SYSCTL_HANDLER_ARGS);
207static int ice_sysctl_dump_state_flags(SYSCTL_HANDLER_ARGS);
208static int ice_sysctl_fec_config(SYSCTL_HANDLER_ARGS);
209static int ice_sysctl_fc_config(SYSCTL_HANDLER_ARGS);
210static int ice_sysctl_negotiated_fc(SYSCTL_HANDLER_ARGS);
211static int ice_sysctl_negotiated_fec(SYSCTL_HANDLER_ARGS);
212static int ice_sysctl_phy_type_low(SYSCTL_HANDLER_ARGS);
213static int ice_sysctl_phy_type_high(SYSCTL_HANDLER_ARGS);
214static int __ice_sysctl_phy_type_handler(SYSCTL_HANDLER_ARGS,
215					 bool is_phy_type_high);
216static int ice_sysctl_advertise_speed(SYSCTL_HANDLER_ARGS);
217static int ice_sysctl_rx_itr(SYSCTL_HANDLER_ARGS);
218static int ice_sysctl_tx_itr(SYSCTL_HANDLER_ARGS);
219static int ice_sysctl_fw_lldp_agent(SYSCTL_HANDLER_ARGS);
220static int ice_sysctl_fw_cur_lldp_persist_status(SYSCTL_HANDLER_ARGS);
221static int ice_sysctl_fw_dflt_lldp_persist_status(SYSCTL_HANDLER_ARGS);
222static int ice_sysctl_phy_caps(SYSCTL_HANDLER_ARGS, u8 report_mode);
223static int ice_sysctl_phy_sw_caps(SYSCTL_HANDLER_ARGS);
224static int ice_sysctl_phy_nvm_caps(SYSCTL_HANDLER_ARGS);
225static int ice_sysctl_phy_topo_caps(SYSCTL_HANDLER_ARGS);
226static int ice_sysctl_phy_link_status(SYSCTL_HANDLER_ARGS);
227static int ice_sysctl_read_i2c_diag_data(SYSCTL_HANDLER_ARGS);
228static int ice_sysctl_tx_cso_stat(SYSCTL_HANDLER_ARGS);
229static int ice_sysctl_rx_cso_stat(SYSCTL_HANDLER_ARGS);
230static int ice_sysctl_pba_number(SYSCTL_HANDLER_ARGS);
231static int ice_sysctl_rx_errors_stat(SYSCTL_HANDLER_ARGS);
232static int ice_sysctl_dump_dcbx_cfg(SYSCTL_HANDLER_ARGS);
233static int ice_sysctl_dump_vsi_cfg(SYSCTL_HANDLER_ARGS);
234static int ice_sysctl_ets_min_rate(SYSCTL_HANDLER_ARGS);
235static int ice_sysctl_up2tc_map(SYSCTL_HANDLER_ARGS);
236static int ice_sysctl_pfc_config(SYSCTL_HANDLER_ARGS);
237static int ice_sysctl_query_port_ets(SYSCTL_HANDLER_ARGS);
238static int ice_sysctl_dscp2tc_map(SYSCTL_HANDLER_ARGS);
239static int ice_sysctl_pfc_mode(SYSCTL_HANDLER_ARGS);
240static int ice_sysctl_fw_debug_dump_cluster_setting(SYSCTL_HANDLER_ARGS);
241static int ice_sysctl_fw_debug_dump_do_dump(SYSCTL_HANDLER_ARGS);
242static int ice_sysctl_allow_no_fec_mod_in_auto(SYSCTL_HANDLER_ARGS);
243static int ice_sysctl_set_link_active(SYSCTL_HANDLER_ARGS);
244static int ice_sysctl_debug_set_link(SYSCTL_HANDLER_ARGS);
245static int ice_sysctl_temperature(SYSCTL_HANDLER_ARGS);
246static int ice_sysctl_create_mirror_interface(SYSCTL_HANDLER_ARGS);
247static int ice_sysctl_destroy_mirror_interface(SYSCTL_HANDLER_ARGS);
248
249/**
250 * ice_map_bar - Map PCIe BAR memory
251 * @dev: the PCIe device
252 * @bar: the BAR info structure
253 * @bar_num: PCIe BAR number
254 *
255 * Maps the specified PCIe BAR. Stores the mapping data in struct
256 * ice_bar_info.
257 */
258int
259ice_map_bar(device_t dev, struct ice_bar_info *bar, int bar_num)
260{
261	if (bar->res != NULL) {
262		device_printf(dev, "PCI BAR%d already mapped\n", bar_num);
263		return (EDOOFUS);
264	}
265
266	bar->rid = PCIR_BAR(bar_num);
267	bar->res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar->rid,
268					  RF_ACTIVE);
269	if (!bar->res) {
270		device_printf(dev, "PCI BAR%d mapping failed\n", bar_num);
271		return (ENXIO);
272	}
273
274	bar->tag = rman_get_bustag(bar->res);
275	bar->handle = rman_get_bushandle(bar->res);
276	bar->size = rman_get_size(bar->res);
277
278	return (0);
279}
280
281/**
282 * ice_free_bar - Free PCIe BAR memory
283 * @dev: the PCIe device
284 * @bar: the BAR info structure
285 *
286 * Frees the specified PCIe BAR, releasing its resources.
287 */
288void
289ice_free_bar(device_t dev, struct ice_bar_info *bar)
290{
291	if (bar->res != NULL)
292		bus_release_resource(dev, SYS_RES_MEMORY, bar->rid, bar->res);
293	bar->res = NULL;
294}
295
296/**
297 * ice_set_ctrlq_len - Configure ctrlq lengths for a device
298 * @hw: the device hardware structure
299 *
300 * Configures the control queues for the given device, setting up the
301 * specified lengths, prior to initializing hardware.
302 */
303void
304ice_set_ctrlq_len(struct ice_hw *hw)
305{
306	hw->adminq.num_rq_entries = ICE_AQ_LEN;
307	hw->adminq.num_sq_entries = ICE_AQ_LEN;
308	hw->adminq.rq_buf_size = ICE_AQ_MAX_BUF_LEN;
309	hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN;
310
311	hw->mailboxq.num_rq_entries = ICE_MBXQ_LEN;
312	hw->mailboxq.num_sq_entries = ICE_MBXQ_LEN;
313	hw->mailboxq.rq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
314	hw->mailboxq.sq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
315
316}
317
318/**
319 * ice_get_next_vsi - Get the next available VSI slot
320 * @all_vsi: the VSI list
321 * @size: the size of the VSI list
322 *
323 * Returns the index to the first available VSI slot. Will return size (one
324 * past the last index) if there are no slots available.
325 */
326static int
327ice_get_next_vsi(struct ice_vsi **all_vsi, int size)
328{
329	int i;
330
331	for (i = 0; i < size; i++) {
332		if (all_vsi[i] == NULL)
333			return i;
334	}
335
336	return size;
337}
338
339/**
340 * ice_setup_vsi_common - Common VSI setup for both dynamic and static VSIs
341 * @sc: the device private softc structure
342 * @vsi: the VSI to setup
343 * @type: the VSI type of the new VSI
344 * @idx: the index in the all_vsi array to use
345 * @dynamic: whether this VSI memory was dynamically allocated
346 *
347 * Perform setup for a VSI that is common to both dynamically allocated VSIs
348 * and the static PF VSI which is embedded in the softc structure.
349 */
350static void
351ice_setup_vsi_common(struct ice_softc *sc, struct ice_vsi *vsi,
352		     enum ice_vsi_type type, int idx, bool dynamic)
353{
354	/* Store important values in VSI struct */
355	vsi->type = type;
356	vsi->sc = sc;
357	vsi->idx = idx;
358	sc->all_vsi[idx] = vsi;
359	vsi->dynamic = dynamic;
360
361	/* Set default mirroring rule information */
362	vsi->rule_mir_ingress = ICE_INVAL_MIRROR_RULE_ID;
363	vsi->rule_mir_egress = ICE_INVAL_MIRROR_RULE_ID;
364
365	/* Setup the VSI tunables now */
366	ice_add_vsi_tunables(vsi, sc->vsi_sysctls);
367}
368
369/**
370 * ice_alloc_vsi - Allocate a dynamic VSI
371 * @sc: device softc structure
372 * @type: VSI type
373 *
374 * Allocates a new dynamic VSI structure and inserts it into the VSI list.
375 */
376struct ice_vsi *
377ice_alloc_vsi(struct ice_softc *sc, enum ice_vsi_type type)
378{
379	struct ice_vsi *vsi;
380	int idx;
381
382	/* Find an open index for a new VSI to be allocated. If the returned
383	 * index is >= the num_available_vsi then it means no slot is
384	 * available.
385	 */
386	idx = ice_get_next_vsi(sc->all_vsi, sc->num_available_vsi);
387	if (idx >= sc->num_available_vsi) {
388		device_printf(sc->dev, "No available VSI slots\n");
389		return NULL;
390	}
391
392	vsi = (struct ice_vsi *)malloc(sizeof(*vsi), M_ICE, M_NOWAIT | M_ZERO);
393	if (!vsi) {
394		device_printf(sc->dev, "Unable to allocate VSI memory\n");
395		return NULL;
396	}
397
398	ice_setup_vsi_common(sc, vsi, type, idx, true);
399
400	return vsi;
401}
402
403/**
404 * ice_setup_pf_vsi - Setup the PF VSI
405 * @sc: the device private softc
406 *
407 * Setup the PF VSI structure which is embedded as sc->pf_vsi in the device
408 * private softc. Unlike other VSIs, the PF VSI memory is allocated as part of
409 * the softc memory, instead of being dynamically allocated at creation.
410 */
411void
412ice_setup_pf_vsi(struct ice_softc *sc)
413{
414	ice_setup_vsi_common(sc, &sc->pf_vsi, ICE_VSI_PF, 0, false);
415}
416
417/**
418 * ice_alloc_vsi_qmap
419 * @vsi: VSI structure
420 * @max_tx_queues: Number of transmit queues to identify
421 * @max_rx_queues: Number of receive queues to identify
422 *
423 * Allocates a max_[t|r]x_queues array of words for the VSI where each
424 * word contains the index of the queue it represents.  In here, all
425 * words are initialized to an index of ICE_INVALID_RES_IDX, indicating
426 * all queues for this VSI are not yet assigned an index and thus,
427 * not ready for use.
428 *
429 * Returns an error code on failure.
430 */
431int
432ice_alloc_vsi_qmap(struct ice_vsi *vsi, const int max_tx_queues,
433		   const int max_rx_queues)
434{
435	struct ice_softc *sc = vsi->sc;
436	int i;
437
438	MPASS(max_tx_queues > 0);
439	MPASS(max_rx_queues > 0);
440
441	/* Allocate Tx queue mapping memory */
442	if (!(vsi->tx_qmap =
443	      (u16 *) malloc(sizeof(u16) * max_tx_queues, M_ICE, M_WAITOK))) {
444		device_printf(sc->dev, "Unable to allocate Tx qmap memory\n");
445		return (ENOMEM);
446	}
447
448	/* Allocate Rx queue mapping memory */
449	if (!(vsi->rx_qmap =
450	      (u16 *) malloc(sizeof(u16) * max_rx_queues, M_ICE, M_WAITOK))) {
451		device_printf(sc->dev, "Unable to allocate Rx qmap memory\n");
452		goto free_tx_qmap;
453	}
454
455	/* Mark every queue map as invalid to start with */
456	for (i = 0; i < max_tx_queues; i++) {
457		vsi->tx_qmap[i] = ICE_INVALID_RES_IDX;
458	}
459	for (i = 0; i < max_rx_queues; i++) {
460		vsi->rx_qmap[i] = ICE_INVALID_RES_IDX;
461	}
462
463	return 0;
464
465free_tx_qmap:
466	free(vsi->tx_qmap, M_ICE);
467	vsi->tx_qmap = NULL;
468
469	return (ENOMEM);
470}
471
472/**
473 * ice_free_vsi_qmaps - Free the PF qmaps associated with a VSI
474 * @vsi: the VSI private structure
475 *
476 * Frees the PF qmaps associated with the given VSI. Generally this will be
477 * called by ice_release_vsi, but may need to be called during attach cleanup,
478 * depending on when the qmaps were allocated.
479 */
480void
481ice_free_vsi_qmaps(struct ice_vsi *vsi)
482{
483	struct ice_softc *sc = vsi->sc;
484
485	if (vsi->tx_qmap) {
486		ice_resmgr_release_map(&sc->tx_qmgr, vsi->tx_qmap,
487					   vsi->num_tx_queues);
488		free(vsi->tx_qmap, M_ICE);
489		vsi->tx_qmap = NULL;
490	}
491
492	if (vsi->rx_qmap) {
493		ice_resmgr_release_map(&sc->rx_qmgr, vsi->rx_qmap,
494					   vsi->num_rx_queues);
495		free(vsi->rx_qmap, M_ICE);
496		vsi->rx_qmap = NULL;
497	}
498}
499
500/**
501 * ice_set_default_vsi_ctx - Setup default VSI context parameters
502 * @ctx: the VSI context to initialize
503 *
504 * Initialize and prepare a default VSI context for configuring a new VSI.
505 */
506static void
507ice_set_default_vsi_ctx(struct ice_vsi_ctx *ctx)
508{
509	u32 table = 0;
510
511	memset(&ctx->info, 0, sizeof(ctx->info));
512	/* VSI will be allocated from shared pool */
513	ctx->alloc_from_pool = true;
514	/* Enable source pruning by default */
515	ctx->info.sw_flags = ICE_AQ_VSI_SW_FLAG_SRC_PRUNE;
516	/* Traffic from VSI can be sent to LAN */
517	ctx->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA;
518	/* Allow all packets untagged/tagged */
519	ctx->info.inner_vlan_flags = ((ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL &
520				       ICE_AQ_VSI_INNER_VLAN_TX_MODE_M) >>
521				       ICE_AQ_VSI_INNER_VLAN_TX_MODE_S);
522	/* Show VLAN/UP from packets in Rx descriptors */
523	ctx->info.inner_vlan_flags |= ((ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH &
524					ICE_AQ_VSI_INNER_VLAN_EMODE_M) >>
525					ICE_AQ_VSI_INNER_VLAN_EMODE_S);
526	/* Have 1:1 UP mapping for both ingress/egress tables */
527	table |= ICE_UP_TABLE_TRANSLATE(0, 0);
528	table |= ICE_UP_TABLE_TRANSLATE(1, 1);
529	table |= ICE_UP_TABLE_TRANSLATE(2, 2);
530	table |= ICE_UP_TABLE_TRANSLATE(3, 3);
531	table |= ICE_UP_TABLE_TRANSLATE(4, 4);
532	table |= ICE_UP_TABLE_TRANSLATE(5, 5);
533	table |= ICE_UP_TABLE_TRANSLATE(6, 6);
534	table |= ICE_UP_TABLE_TRANSLATE(7, 7);
535	ctx->info.ingress_table = CPU_TO_LE32(table);
536	ctx->info.egress_table = CPU_TO_LE32(table);
537	/* Have 1:1 UP mapping for outer to inner UP table */
538	ctx->info.outer_up_table = CPU_TO_LE32(table);
539	/* No Outer tag support, so outer_vlan_flags remains zero */
540}
541
542/**
543 * ice_set_rss_vsi_ctx - Setup VSI context parameters for RSS
544 * @ctx: the VSI context to configure
545 * @type: the VSI type
546 *
547 * Configures the VSI context for RSS, based on the VSI type.
548 */
549static void
550ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctx, enum ice_vsi_type type)
551{
552	u8 lut_type, hash_type;
553
554	switch (type) {
555	case ICE_VSI_PF:
556		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_PF;
557		hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
558		break;
559	case ICE_VSI_VF:
560	case ICE_VSI_VMDQ2:
561		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI;
562		hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
563		break;
564	default:
565		/* Other VSI types do not support RSS */
566		return;
567	}
568
569	ctx->info.q_opt_rss = (((lut_type << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) &
570				 ICE_AQ_VSI_Q_OPT_RSS_LUT_M) |
571				((hash_type << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) &
572				 ICE_AQ_VSI_Q_OPT_RSS_HASH_M));
573}
574
575/**
576 * ice_setup_vsi_qmap - Setup the queue mapping for a VSI
577 * @vsi: the VSI to configure
578 * @ctx: the VSI context to configure
579 *
580 * Configures the context for the given VSI, setting up how the firmware
581 * should map the queues for this VSI.
582 *
583 * @pre vsi->qmap_type is set to a valid type
584 */
585static int
586ice_setup_vsi_qmap(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx)
587{
588	int pow = 0;
589	u16 qmap;
590
591	MPASS(vsi->rx_qmap != NULL);
592
593	switch (vsi->qmap_type) {
594	case ICE_RESMGR_ALLOC_CONTIGUOUS:
595		ctx->info.mapping_flags |= CPU_TO_LE16(ICE_AQ_VSI_Q_MAP_CONTIG);
596
597		ctx->info.q_mapping[0] = CPU_TO_LE16(vsi->rx_qmap[0]);
598		ctx->info.q_mapping[1] = CPU_TO_LE16(vsi->num_rx_queues);
599
600		break;
601	case ICE_RESMGR_ALLOC_SCATTERED:
602		ctx->info.mapping_flags |= CPU_TO_LE16(ICE_AQ_VSI_Q_MAP_NONCONTIG);
603
604		for (int i = 0; i < vsi->num_rx_queues; i++)
605			ctx->info.q_mapping[i] = CPU_TO_LE16(vsi->rx_qmap[i]);
606		break;
607	default:
608		return (EOPNOTSUPP);
609	}
610
611	/* Calculate the next power-of-2 of number of queues */
612	if (vsi->num_rx_queues)
613		pow = flsl(vsi->num_rx_queues - 1);
614
615	/* Assign all the queues to traffic class zero */
616	qmap = (pow << ICE_AQ_VSI_TC_Q_NUM_S) & ICE_AQ_VSI_TC_Q_NUM_M;
617	ctx->info.tc_mapping[0] = CPU_TO_LE16(qmap);
618
619	/* Fill out default driver TC queue info for VSI */
620	vsi->tc_info[0].qoffset = 0;
621	vsi->tc_info[0].qcount_rx = vsi->num_rx_queues;
622	vsi->tc_info[0].qcount_tx = vsi->num_tx_queues;
623	for (int i = 1; i < ICE_MAX_TRAFFIC_CLASS; i++) {
624		vsi->tc_info[i].qoffset = 0;
625		vsi->tc_info[i].qcount_rx = 1;
626		vsi->tc_info[i].qcount_tx = 1;
627	}
628	vsi->tc_map = 0x1;
629
630	return 0;
631}
632
633/**
634 * ice_setup_vsi_mirroring -- Setup a VSI for mirroring PF VSI traffic
635 * @vsi: VSI to setup
636 *
637 * @pre vsi->mirror_src_vsi is set to the SW VSI num that traffic is to be
638 * mirrored from
639 *
640 * Returns 0 on success, EINVAL on failure.
641 */
642int
643ice_setup_vsi_mirroring(struct ice_vsi *vsi)
644{
645	struct ice_mir_rule_buf rule = { };
646	struct ice_softc *sc = vsi->sc;
647	struct ice_hw *hw = &sc->hw;
648	device_t dev = sc->dev;
649	enum ice_status status;
650	u16 rule_id, dest_vsi;
651	u16 count = 1;
652
653	rule.vsi_idx = ice_get_hw_vsi_num(hw, vsi->mirror_src_vsi);
654	rule.add = true;
655
656	dest_vsi = ice_get_hw_vsi_num(hw, vsi->idx);
657	rule_id = ICE_INVAL_MIRROR_RULE_ID;
658	status = ice_aq_add_update_mir_rule(hw, ICE_AQC_RULE_TYPE_VPORT_INGRESS,
659					    dest_vsi, count, &rule, NULL,
660					    &rule_id);
661	if (status) {
662		device_printf(dev,
663		    "Could not add INGRESS rule for mirror vsi %d to vsi %d, err %s aq_err %s\n",
664		    rule.vsi_idx, dest_vsi, ice_status_str(status),
665		    ice_aq_str(hw->adminq.sq_last_status));
666		return (EINVAL);
667	}
668
669	vsi->rule_mir_ingress = rule_id;
670
671	rule_id = ICE_INVAL_MIRROR_RULE_ID;
672	status = ice_aq_add_update_mir_rule(hw, ICE_AQC_RULE_TYPE_VPORT_EGRESS,
673					    dest_vsi, count, &rule, NULL, &rule_id);
674	if (status) {
675		device_printf(dev,
676		    "Could not add EGRESS rule for mirror vsi %d to vsi %d, err %s aq_err %s\n",
677		    rule.vsi_idx, dest_vsi, ice_status_str(status),
678		    ice_aq_str(hw->adminq.sq_last_status));
679		return (EINVAL);
680	}
681
682	vsi->rule_mir_egress = rule_id;
683
684	return (0);
685}
686
687/**
688 * ice_remove_vsi_mirroring -- Teardown any VSI mirroring rules
689 * @vsi: VSI to remove mirror rules from
690 */
691static void
692ice_remove_vsi_mirroring(struct ice_vsi *vsi)
693{
694	struct ice_hw *hw = &vsi->sc->hw;
695	enum ice_status status = ICE_SUCCESS;
696	bool keep_alloc = false;
697
698	if (vsi->rule_mir_ingress != ICE_INVAL_MIRROR_RULE_ID)
699		status = ice_aq_delete_mir_rule(hw, vsi->rule_mir_ingress, keep_alloc, NULL);
700
701	if (status)
702		device_printf(vsi->sc->dev, "Could not remove mirror VSI ingress rule, err %s aq_err %s\n",
703			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
704
705	status = ICE_SUCCESS;
706
707	if (vsi->rule_mir_egress != ICE_INVAL_MIRROR_RULE_ID)
708		status = ice_aq_delete_mir_rule(hw, vsi->rule_mir_egress, keep_alloc, NULL);
709
710	if (status)
711		device_printf(vsi->sc->dev, "Could not remove mirror VSI egress rule, err %s aq_err %s\n",
712			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
713}
714
715/**
716 * ice_initialize_vsi - Initialize a VSI for use
717 * @vsi: the vsi to initialize
718 *
719 * Initialize a VSI over the adminq and prepare it for operation.
720 *
721 * @pre vsi->num_tx_queues is set
722 * @pre vsi->num_rx_queues is set
723 */
724int
725ice_initialize_vsi(struct ice_vsi *vsi)
726{
727	struct ice_vsi_ctx ctx = { 0 };
728	struct ice_hw *hw = &vsi->sc->hw;
729	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
730	enum ice_status status;
731	int err;
732
733	/* For now, we only have code supporting PF VSIs */
734	switch (vsi->type) {
735	case ICE_VSI_PF:
736		ctx.flags = ICE_AQ_VSI_TYPE_PF;
737		break;
738	case ICE_VSI_VMDQ2:
739		ctx.flags = ICE_AQ_VSI_TYPE_VMDQ2;
740		break;
741	default:
742		return (ENODEV);
743	}
744
745	ice_set_default_vsi_ctx(&ctx);
746	ice_set_rss_vsi_ctx(&ctx, vsi->type);
747
748	/* XXX: VSIs of other types may need different port info? */
749	ctx.info.sw_id = hw->port_info->sw_id;
750
751	/* Set some RSS parameters based on the VSI type */
752	ice_vsi_set_rss_params(vsi);
753
754	/* Initialize the Rx queue mapping for this VSI */
755	err = ice_setup_vsi_qmap(vsi, &ctx);
756	if (err) {
757		return err;
758	}
759
760	/* (Re-)add VSI to HW VSI handle list */
761	status = ice_add_vsi(hw, vsi->idx, &ctx, NULL);
762	if (status != 0) {
763		device_printf(vsi->sc->dev,
764		    "Add VSI AQ call failed, err %s aq_err %s\n",
765		    ice_status_str(status),
766		    ice_aq_str(hw->adminq.sq_last_status));
767		return (EIO);
768	}
769	vsi->info = ctx.info;
770
771	/* Initialize VSI with just 1 TC to start */
772	max_txqs[0] = vsi->num_tx_queues;
773
774	status = ice_cfg_vsi_lan(hw->port_info, vsi->idx,
775			      ICE_DFLT_TRAFFIC_CLASS, max_txqs);
776	if (status) {
777		device_printf(vsi->sc->dev,
778		    "Failed VSI lan queue config, err %s aq_err %s\n",
779		    ice_status_str(status),
780		    ice_aq_str(hw->adminq.sq_last_status));
781		ice_deinit_vsi(vsi);
782		return (ENODEV);
783	}
784
785	/* Reset VSI stats */
786	ice_reset_vsi_stats(vsi);
787
788	return 0;
789}
790
791/**
792 * ice_deinit_vsi - Tell firmware to release resources for a VSI
793 * @vsi: the VSI to release
794 *
795 * Helper function which requests the firmware to release the hardware
796 * resources associated with a given VSI.
797 */
798void
799ice_deinit_vsi(struct ice_vsi *vsi)
800{
801	struct ice_vsi_ctx ctx = { 0 };
802	struct ice_softc *sc = vsi->sc;
803	struct ice_hw *hw = &sc->hw;
804	enum ice_status status;
805
806	/* Assert that the VSI pointer matches in the list */
807	MPASS(vsi == sc->all_vsi[vsi->idx]);
808
809	ctx.info = vsi->info;
810
811	status = ice_rm_vsi_lan_cfg(hw->port_info, vsi->idx);
812	if (status) {
813		/*
814		 * This should only fail if the VSI handle is invalid, or if
815		 * any of the nodes have leaf nodes which are still in use.
816		 */
817		device_printf(sc->dev,
818			      "Unable to remove scheduler nodes for VSI %d, err %s\n",
819			      vsi->idx, ice_status_str(status));
820	}
821
822	/* Tell firmware to release the VSI resources */
823	status = ice_free_vsi(hw, vsi->idx, &ctx, false, NULL);
824	if (status != 0) {
825		device_printf(sc->dev,
826		    "Free VSI %u AQ call failed, err %s aq_err %s\n",
827		    vsi->idx, ice_status_str(status),
828		    ice_aq_str(hw->adminq.sq_last_status));
829	}
830}
831
832/**
833 * ice_release_vsi - Release resources associated with a VSI
834 * @vsi: the VSI to release
835 *
836 * Release software and firmware resources associated with a VSI. Release the
837 * queue managers associated with this VSI. Also free the VSI structure memory
838 * if the VSI was allocated dynamically using ice_alloc_vsi().
839 */
840void
841ice_release_vsi(struct ice_vsi *vsi)
842{
843	struct ice_softc *sc = vsi->sc;
844	int idx = vsi->idx;
845
846	/* Assert that the VSI pointer matches in the list */
847	MPASS(vsi == sc->all_vsi[idx]);
848
849	/* Cleanup RSS configuration */
850	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_RSS))
851		ice_clean_vsi_rss_cfg(vsi);
852
853	ice_del_vsi_sysctl_ctx(vsi);
854
855	/* Remove the configured mirror rule, if it exists */
856	ice_remove_vsi_mirroring(vsi);
857
858	/*
859	 * If we unload the driver after a reset fails, we do not need to do
860	 * this step.
861	 */
862	if (!ice_test_state(&sc->state, ICE_STATE_RESET_FAILED))
863		ice_deinit_vsi(vsi);
864
865	ice_free_vsi_qmaps(vsi);
866
867	if (vsi->dynamic) {
868		free(sc->all_vsi[idx], M_ICE);
869	}
870
871	sc->all_vsi[idx] = NULL;
872}
873
874/**
875 * ice_aq_speed_to_rate - Convert AdminQ speed enum to baudrate
876 * @pi: port info data
877 *
878 * Returns the baudrate value for the current link speed of a given port.
879 */
880uint64_t
881ice_aq_speed_to_rate(struct ice_port_info *pi)
882{
883	switch (pi->phy.link_info.link_speed) {
884	case ICE_AQ_LINK_SPEED_100GB:
885		return IF_Gbps(100);
886	case ICE_AQ_LINK_SPEED_50GB:
887		return IF_Gbps(50);
888	case ICE_AQ_LINK_SPEED_40GB:
889		return IF_Gbps(40);
890	case ICE_AQ_LINK_SPEED_25GB:
891		return IF_Gbps(25);
892	case ICE_AQ_LINK_SPEED_10GB:
893		return IF_Gbps(10);
894	case ICE_AQ_LINK_SPEED_5GB:
895		return IF_Gbps(5);
896	case ICE_AQ_LINK_SPEED_2500MB:
897		return IF_Mbps(2500);
898	case ICE_AQ_LINK_SPEED_1000MB:
899		return IF_Mbps(1000);
900	case ICE_AQ_LINK_SPEED_100MB:
901		return IF_Mbps(100);
902	case ICE_AQ_LINK_SPEED_10MB:
903		return IF_Mbps(10);
904	case ICE_AQ_LINK_SPEED_UNKNOWN:
905	default:
906		/* return 0 if we don't know the link speed */
907		return 0;
908	}
909}
910
911/**
912 * ice_aq_speed_to_str - Convert AdminQ speed enum to string representation
913 * @pi: port info data
914 *
915 * Returns the string representation of the current link speed for a given
916 * port.
917 */
918static const char *
919ice_aq_speed_to_str(struct ice_port_info *pi)
920{
921	switch (pi->phy.link_info.link_speed) {
922	case ICE_AQ_LINK_SPEED_100GB:
923		return "100 Gbps";
924	case ICE_AQ_LINK_SPEED_50GB:
925		return "50 Gbps";
926	case ICE_AQ_LINK_SPEED_40GB:
927		return "40 Gbps";
928	case ICE_AQ_LINK_SPEED_25GB:
929		return "25 Gbps";
930	case ICE_AQ_LINK_SPEED_20GB:
931		return "20 Gbps";
932	case ICE_AQ_LINK_SPEED_10GB:
933		return "10 Gbps";
934	case ICE_AQ_LINK_SPEED_5GB:
935		return "5 Gbps";
936	case ICE_AQ_LINK_SPEED_2500MB:
937		return "2.5 Gbps";
938	case ICE_AQ_LINK_SPEED_1000MB:
939		return "1 Gbps";
940	case ICE_AQ_LINK_SPEED_100MB:
941		return "100 Mbps";
942	case ICE_AQ_LINK_SPEED_10MB:
943		return "10 Mbps";
944	case ICE_AQ_LINK_SPEED_UNKNOWN:
945	default:
946		return "Unknown speed";
947	}
948}
949
950/**
951 * ice_get_phy_type_low - Get media associated with phy_type_low
952 * @phy_type_low: the low 64bits of phy_type from the AdminQ
953 *
954 * Given the lower 64bits of the phy_type from the hardware, return the
955 * ifm_active bit associated. Return IFM_UNKNOWN when phy_type_low is unknown.
956 * Note that only one of ice_get_phy_type_low or ice_get_phy_type_high should
957 * be called. If phy_type_low is zero, call ice_phy_type_high.
958 */
959int
960ice_get_phy_type_low(uint64_t phy_type_low)
961{
962	switch (phy_type_low) {
963	case ICE_PHY_TYPE_LOW_100BASE_TX:
964		return IFM_100_TX;
965	case ICE_PHY_TYPE_LOW_100M_SGMII:
966		return IFM_100_SGMII;
967	case ICE_PHY_TYPE_LOW_1000BASE_T:
968		return IFM_1000_T;
969	case ICE_PHY_TYPE_LOW_1000BASE_SX:
970		return IFM_1000_SX;
971	case ICE_PHY_TYPE_LOW_1000BASE_LX:
972		return IFM_1000_LX;
973	case ICE_PHY_TYPE_LOW_1000BASE_KX:
974		return IFM_1000_KX;
975	case ICE_PHY_TYPE_LOW_1G_SGMII:
976		return IFM_1000_SGMII;
977	case ICE_PHY_TYPE_LOW_2500BASE_T:
978		return IFM_2500_T;
979	case ICE_PHY_TYPE_LOW_2500BASE_X:
980		return IFM_2500_X;
981	case ICE_PHY_TYPE_LOW_2500BASE_KX:
982		return IFM_2500_KX;
983	case ICE_PHY_TYPE_LOW_5GBASE_T:
984		return IFM_5000_T;
985	case ICE_PHY_TYPE_LOW_5GBASE_KR:
986		return IFM_5000_KR;
987	case ICE_PHY_TYPE_LOW_10GBASE_T:
988		return IFM_10G_T;
989	case ICE_PHY_TYPE_LOW_10G_SFI_DA:
990		return IFM_10G_TWINAX;
991	case ICE_PHY_TYPE_LOW_10GBASE_SR:
992		return IFM_10G_SR;
993	case ICE_PHY_TYPE_LOW_10GBASE_LR:
994		return IFM_10G_LR;
995	case ICE_PHY_TYPE_LOW_10GBASE_KR_CR1:
996		return IFM_10G_KR;
997	case ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC:
998		return IFM_10G_AOC;
999	case ICE_PHY_TYPE_LOW_10G_SFI_C2C:
1000		return IFM_10G_SFI;
1001	case ICE_PHY_TYPE_LOW_25GBASE_T:
1002		return IFM_25G_T;
1003	case ICE_PHY_TYPE_LOW_25GBASE_CR:
1004		return IFM_25G_CR;
1005	case ICE_PHY_TYPE_LOW_25GBASE_CR_S:
1006		return IFM_25G_CR_S;
1007	case ICE_PHY_TYPE_LOW_25GBASE_CR1:
1008		return IFM_25G_CR1;
1009	case ICE_PHY_TYPE_LOW_25GBASE_SR:
1010		return IFM_25G_SR;
1011	case ICE_PHY_TYPE_LOW_25GBASE_LR:
1012		return IFM_25G_LR;
1013	case ICE_PHY_TYPE_LOW_25GBASE_KR:
1014		return IFM_25G_KR;
1015	case ICE_PHY_TYPE_LOW_25GBASE_KR_S:
1016		return IFM_25G_KR_S;
1017	case ICE_PHY_TYPE_LOW_25GBASE_KR1:
1018		return IFM_25G_KR1;
1019	case ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC:
1020		return IFM_25G_AOC;
1021	case ICE_PHY_TYPE_LOW_25G_AUI_C2C:
1022		return IFM_25G_AUI;
1023	case ICE_PHY_TYPE_LOW_40GBASE_CR4:
1024		return IFM_40G_CR4;
1025	case ICE_PHY_TYPE_LOW_40GBASE_SR4:
1026		return IFM_40G_SR4;
1027	case ICE_PHY_TYPE_LOW_40GBASE_LR4:
1028		return IFM_40G_LR4;
1029	case ICE_PHY_TYPE_LOW_40GBASE_KR4:
1030		return IFM_40G_KR4;
1031	case ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC:
1032		return IFM_40G_XLAUI_AC;
1033	case ICE_PHY_TYPE_LOW_40G_XLAUI:
1034		return IFM_40G_XLAUI;
1035	case ICE_PHY_TYPE_LOW_50GBASE_CR2:
1036		return IFM_50G_CR2;
1037	case ICE_PHY_TYPE_LOW_50GBASE_SR2:
1038		return IFM_50G_SR2;
1039	case ICE_PHY_TYPE_LOW_50GBASE_LR2:
1040		return IFM_50G_LR2;
1041	case ICE_PHY_TYPE_LOW_50GBASE_KR2:
1042		return IFM_50G_KR2;
1043	case ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC:
1044		return IFM_50G_LAUI2_AC;
1045	case ICE_PHY_TYPE_LOW_50G_LAUI2:
1046		return IFM_50G_LAUI2;
1047	case ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC:
1048		return IFM_50G_AUI2_AC;
1049	case ICE_PHY_TYPE_LOW_50G_AUI2:
1050		return IFM_50G_AUI2;
1051	case ICE_PHY_TYPE_LOW_50GBASE_CP:
1052		return IFM_50G_CP;
1053	case ICE_PHY_TYPE_LOW_50GBASE_SR:
1054		return IFM_50G_SR;
1055	case ICE_PHY_TYPE_LOW_50GBASE_FR:
1056		return IFM_50G_FR;
1057	case ICE_PHY_TYPE_LOW_50GBASE_LR:
1058		return IFM_50G_LR;
1059	case ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4:
1060		return IFM_50G_KR_PAM4;
1061	case ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC:
1062		return IFM_50G_AUI1_AC;
1063	case ICE_PHY_TYPE_LOW_50G_AUI1:
1064		return IFM_50G_AUI1;
1065	case ICE_PHY_TYPE_LOW_100GBASE_CR4:
1066		return IFM_100G_CR4;
1067	case ICE_PHY_TYPE_LOW_100GBASE_SR4:
1068		return IFM_100G_SR4;
1069	case ICE_PHY_TYPE_LOW_100GBASE_LR4:
1070		return IFM_100G_LR4;
1071	case ICE_PHY_TYPE_LOW_100GBASE_KR4:
1072		return IFM_100G_KR4;
1073	case ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC:
1074		return IFM_100G_CAUI4_AC;
1075	case ICE_PHY_TYPE_LOW_100G_CAUI4:
1076		return IFM_100G_CAUI4;
1077	case ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC:
1078		return IFM_100G_AUI4_AC;
1079	case ICE_PHY_TYPE_LOW_100G_AUI4:
1080		return IFM_100G_AUI4;
1081	case ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4:
1082		return IFM_100G_CR_PAM4;
1083	case ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4:
1084		return IFM_100G_KR_PAM4;
1085	case ICE_PHY_TYPE_LOW_100GBASE_CP2:
1086		return IFM_100G_CP2;
1087	case ICE_PHY_TYPE_LOW_100GBASE_SR2:
1088		return IFM_100G_SR2;
1089	case ICE_PHY_TYPE_LOW_100GBASE_DR:
1090		return IFM_100G_DR;
1091	default:
1092		return IFM_UNKNOWN;
1093	}
1094}
1095
1096/**
1097 * ice_get_phy_type_high - Get media associated with phy_type_high
1098 * @phy_type_high: the upper 64bits of phy_type from the AdminQ
1099 *
1100 * Given the upper 64bits of the phy_type from the hardware, return the
1101 * ifm_active bit associated. Return IFM_UNKNOWN on an unknown value. Note
1102 * that only one of ice_get_phy_type_low or ice_get_phy_type_high should be
1103 * called. If phy_type_high is zero, call ice_get_phy_type_low.
1104 */
1105int
1106ice_get_phy_type_high(uint64_t phy_type_high)
1107{
1108	switch (phy_type_high) {
1109	case ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4:
1110		return IFM_100G_KR2_PAM4;
1111	case ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC:
1112		return IFM_100G_CAUI2_AC;
1113	case ICE_PHY_TYPE_HIGH_100G_CAUI2:
1114		return IFM_100G_CAUI2;
1115	case ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC:
1116		return IFM_100G_AUI2_AC;
1117	case ICE_PHY_TYPE_HIGH_100G_AUI2:
1118		return IFM_100G_AUI2;
1119	default:
1120		return IFM_UNKNOWN;
1121	}
1122}
1123
1124/**
1125 * ice_phy_types_to_max_rate - Returns port's max supported baudrate
1126 * @pi: port info struct
1127 *
1128 * ice_aq_get_phy_caps() w/ ICE_AQC_REPORT_TOPO_CAP_MEDIA parameter needs
1129 * to have been called before this function for it to work.
1130 */
1131static uint64_t
1132ice_phy_types_to_max_rate(struct ice_port_info *pi)
1133{
1134	uint64_t phy_low = pi->phy.phy_type_low;
1135	uint64_t phy_high = pi->phy.phy_type_high;
1136	uint64_t max_rate = 0;
1137	int bit;
1138
1139	/*
1140	 * These are based on the indices used in the BIT() macros for
1141	 * ICE_PHY_TYPE_LOW_*
1142	 */
1143	static const uint64_t phy_rates[] = {
1144	    IF_Mbps(100),
1145	    IF_Mbps(100),
1146	    IF_Gbps(1ULL),
1147	    IF_Gbps(1ULL),
1148	    IF_Gbps(1ULL),
1149	    IF_Gbps(1ULL),
1150	    IF_Gbps(1ULL),
1151	    IF_Mbps(2500ULL),
1152	    IF_Mbps(2500ULL),
1153	    IF_Mbps(2500ULL),
1154	    IF_Gbps(5ULL),
1155	    IF_Gbps(5ULL),
1156	    IF_Gbps(10ULL),
1157	    IF_Gbps(10ULL),
1158	    IF_Gbps(10ULL),
1159	    IF_Gbps(10ULL),
1160	    IF_Gbps(10ULL),
1161	    IF_Gbps(10ULL),
1162	    IF_Gbps(10ULL),
1163	    IF_Gbps(25ULL),
1164	    IF_Gbps(25ULL),
1165	    IF_Gbps(25ULL),
1166	    IF_Gbps(25ULL),
1167	    IF_Gbps(25ULL),
1168	    IF_Gbps(25ULL),
1169	    IF_Gbps(25ULL),
1170	    IF_Gbps(25ULL),
1171	    IF_Gbps(25ULL),
1172	    IF_Gbps(25ULL),
1173	    IF_Gbps(25ULL),
1174	    IF_Gbps(40ULL),
1175	    IF_Gbps(40ULL),
1176	    IF_Gbps(40ULL),
1177	    IF_Gbps(40ULL),
1178	    IF_Gbps(40ULL),
1179	    IF_Gbps(40ULL),
1180	    IF_Gbps(50ULL),
1181	    IF_Gbps(50ULL),
1182	    IF_Gbps(50ULL),
1183	    IF_Gbps(50ULL),
1184	    IF_Gbps(50ULL),
1185	    IF_Gbps(50ULL),
1186	    IF_Gbps(50ULL),
1187	    IF_Gbps(50ULL),
1188	    IF_Gbps(50ULL),
1189	    IF_Gbps(50ULL),
1190	    IF_Gbps(50ULL),
1191	    IF_Gbps(50ULL),
1192	    IF_Gbps(50ULL),
1193	    IF_Gbps(50ULL),
1194	    IF_Gbps(50ULL),
1195	    IF_Gbps(100ULL),
1196	    IF_Gbps(100ULL),
1197	    IF_Gbps(100ULL),
1198	    IF_Gbps(100ULL),
1199	    IF_Gbps(100ULL),
1200	    IF_Gbps(100ULL),
1201	    IF_Gbps(100ULL),
1202	    IF_Gbps(100ULL),
1203	    IF_Gbps(100ULL),
1204	    IF_Gbps(100ULL),
1205	    IF_Gbps(100ULL),
1206	    IF_Gbps(100ULL),
1207	    IF_Gbps(100ULL),
1208	    /* These rates are for ICE_PHY_TYPE_HIGH_* */
1209	    IF_Gbps(100ULL),
1210	    IF_Gbps(100ULL),
1211	    IF_Gbps(100ULL),
1212	    IF_Gbps(100ULL),
1213	    IF_Gbps(100ULL)
1214	};
1215
1216	/* coverity[address_of] */
1217	for_each_set_bit(bit, &phy_high, 64)
1218		if ((bit + 64) < (int)ARRAY_SIZE(phy_rates))
1219			max_rate = uqmax(max_rate, phy_rates[(bit + 64)]);
1220
1221	/* coverity[address_of] */
1222	for_each_set_bit(bit, &phy_low, 64)
1223		max_rate = uqmax(max_rate, phy_rates[bit]);
1224
1225	return (max_rate);
1226}
1227
1228/* The if_media type is split over the original 5 bit media variant field,
1229 * along with extended types using up extra bits in the options section.
1230 * We want to convert this split number into a bitmap index, so we reverse the
1231 * calculation of IFM_X here.
1232 */
1233#define IFM_IDX(x) (((x) & IFM_TMASK) | \
1234		    (((x) & IFM_ETH_XTYPE) >> IFM_ETH_XSHIFT))
1235
1236/**
1237 * ice_add_media_types - Add supported media types to the media structure
1238 * @sc: ice private softc structure
1239 * @media: ifmedia structure to setup
1240 *
1241 * Looks up the supported phy types, and initializes the various media types
1242 * available.
1243 *
1244 * @pre this function must be protected from being called while another thread
1245 * is accessing the ifmedia types.
1246 */
1247enum ice_status
1248ice_add_media_types(struct ice_softc *sc, struct ifmedia *media)
1249{
1250	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
1251	struct ice_port_info *pi = sc->hw.port_info;
1252	enum ice_status status;
1253	uint64_t phy_low, phy_high;
1254	int bit;
1255
1256	ASSERT_CFG_LOCKED(sc);
1257
1258	/* the maximum possible media type index is 511. We probably don't
1259	 * need most of this space, but this ensures future compatibility when
1260	 * additional media types are used.
1261	 */
1262	ice_declare_bitmap(already_added, 511);
1263
1264	/* Remove all previous media types */
1265	ifmedia_removeall(media);
1266
1267	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
1268				     &pcaps, NULL);
1269	if (status != ICE_SUCCESS) {
1270		device_printf(sc->dev,
1271		    "%s: ice_aq_get_phy_caps (ACTIVE) failed; status %s, aq_err %s\n",
1272		    __func__, ice_status_str(status),
1273		    ice_aq_str(sc->hw.adminq.sq_last_status));
1274		return (status);
1275	}
1276	phy_low = le64toh(pcaps.phy_type_low);
1277	phy_high = le64toh(pcaps.phy_type_high);
1278
1279	/* make sure the added bitmap is zero'd */
1280	memset(already_added, 0, sizeof(already_added));
1281
1282	/* coverity[address_of] */
1283	for_each_set_bit(bit, &phy_low, 64) {
1284		uint64_t type = BIT_ULL(bit);
1285		int ostype;
1286
1287		/* get the OS media type */
1288		ostype = ice_get_phy_type_low(type);
1289
1290		/* don't bother adding the unknown type */
1291		if (ostype == IFM_UNKNOWN)
1292			continue;
1293
1294		/* only add each media type to the list once */
1295		if (ice_is_bit_set(already_added, IFM_IDX(ostype)))
1296			continue;
1297
1298		ifmedia_add(media, IFM_ETHER | ostype, 0, NULL);
1299		ice_set_bit(IFM_IDX(ostype), already_added);
1300	}
1301
1302	/* coverity[address_of] */
1303	for_each_set_bit(bit, &phy_high, 64) {
1304		uint64_t type = BIT_ULL(bit);
1305		int ostype;
1306
1307		/* get the OS media type */
1308		ostype = ice_get_phy_type_high(type);
1309
1310		/* don't bother adding the unknown type */
1311		if (ostype == IFM_UNKNOWN)
1312			continue;
1313
1314		/* only add each media type to the list once */
1315		if (ice_is_bit_set(already_added, IFM_IDX(ostype)))
1316			continue;
1317
1318		ifmedia_add(media, IFM_ETHER | ostype, 0, NULL);
1319		ice_set_bit(IFM_IDX(ostype), already_added);
1320	}
1321
1322	/* Use autoselect media by default */
1323	ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL);
1324	ifmedia_set(media, IFM_ETHER | IFM_AUTO);
1325
1326	return (ICE_SUCCESS);
1327}
1328
1329/**
1330 * ice_configure_rxq_interrupt - Configure HW Rx queue for an MSI-X interrupt
1331 * @hw: ice hw structure
1332 * @rxqid: Rx queue index in PF space
1333 * @vector: MSI-X vector index in PF/VF space
1334 * @itr_idx: ITR index to use for interrupt
1335 *
1336 * @remark ice_flush() may need to be called after this
1337 */
1338void
1339ice_configure_rxq_interrupt(struct ice_hw *hw, u16 rxqid, u16 vector, u8 itr_idx)
1340{
1341	u32 val;
1342
1343	MPASS(itr_idx <= ICE_ITR_NONE);
1344
1345	val = (QINT_RQCTL_CAUSE_ENA_M |
1346	       (itr_idx << QINT_RQCTL_ITR_INDX_S) |
1347	       (vector << QINT_RQCTL_MSIX_INDX_S));
1348	wr32(hw, QINT_RQCTL(rxqid), val);
1349}
1350
1351/**
1352 * ice_configure_all_rxq_interrupts - Configure HW Rx queues for MSI-X interrupts
1353 * @vsi: the VSI to configure
1354 *
1355 * Called when setting up MSI-X interrupts to configure the Rx hardware queues.
1356 */
1357void
1358ice_configure_all_rxq_interrupts(struct ice_vsi *vsi)
1359{
1360	struct ice_hw *hw = &vsi->sc->hw;
1361	int i;
1362
1363	for (i = 0; i < vsi->num_rx_queues; i++) {
1364		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1365
1366		ice_configure_rxq_interrupt(hw, vsi->rx_qmap[rxq->me],
1367					    rxq->irqv->me, ICE_RX_ITR);
1368
1369		ice_debug(hw, ICE_DBG_INIT,
1370		    "RXQ(%d) intr enable: me %d rxqid %d vector %d\n",
1371		    i, rxq->me, vsi->rx_qmap[rxq->me], rxq->irqv->me);
1372	}
1373
1374	ice_flush(hw);
1375}
1376
1377/**
1378 * ice_configure_txq_interrupt - Configure HW Tx queue for an MSI-X interrupt
1379 * @hw: ice hw structure
1380 * @txqid: Tx queue index in PF space
1381 * @vector: MSI-X vector index in PF/VF space
1382 * @itr_idx: ITR index to use for interrupt
1383 *
1384 * @remark ice_flush() may need to be called after this
1385 */
1386void
1387ice_configure_txq_interrupt(struct ice_hw *hw, u16 txqid, u16 vector, u8 itr_idx)
1388{
1389	u32 val;
1390
1391	MPASS(itr_idx <= ICE_ITR_NONE);
1392
1393	val = (QINT_TQCTL_CAUSE_ENA_M |
1394	       (itr_idx << QINT_TQCTL_ITR_INDX_S) |
1395	       (vector << QINT_TQCTL_MSIX_INDX_S));
1396	wr32(hw, QINT_TQCTL(txqid), val);
1397}
1398
1399/**
1400 * ice_configure_all_txq_interrupts - Configure HW Tx queues for MSI-X interrupts
1401 * @vsi: the VSI to configure
1402 *
1403 * Called when setting up MSI-X interrupts to configure the Tx hardware queues.
1404 */
1405void
1406ice_configure_all_txq_interrupts(struct ice_vsi *vsi)
1407{
1408	struct ice_hw *hw = &vsi->sc->hw;
1409	int i;
1410
1411	for (i = 0; i < vsi->num_tx_queues; i++) {
1412		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1413
1414		ice_configure_txq_interrupt(hw, vsi->tx_qmap[txq->me],
1415					    txq->irqv->me, ICE_TX_ITR);
1416	}
1417
1418	ice_flush(hw);
1419}
1420
1421/**
1422 * ice_flush_rxq_interrupts - Unconfigure Hw Rx queues MSI-X interrupt cause
1423 * @vsi: the VSI to configure
1424 *
1425 * Unset the CAUSE_ENA flag of the TQCTL register for each queue, then trigger
1426 * a software interrupt on that cause. This is required as part of the Rx
1427 * queue disable logic to dissociate the Rx queue from the interrupt.
1428 *
1429 * Note: this function must be called prior to disabling Rx queues with
1430 * ice_control_all_rx_queues, otherwise the Rx queue may not be disabled properly.
1431 */
1432void
1433ice_flush_rxq_interrupts(struct ice_vsi *vsi)
1434{
1435	struct ice_hw *hw = &vsi->sc->hw;
1436	int i;
1437
1438	for (i = 0; i < vsi->num_rx_queues; i++) {
1439		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1440		u32 reg, val;
1441
1442		/* Clear the CAUSE_ENA flag */
1443		reg = vsi->rx_qmap[rxq->me];
1444		val = rd32(hw, QINT_RQCTL(reg));
1445		val &= ~QINT_RQCTL_CAUSE_ENA_M;
1446		wr32(hw, QINT_RQCTL(reg), val);
1447
1448		ice_flush(hw);
1449
1450		/* Trigger a software interrupt to complete interrupt
1451		 * dissociation.
1452		 */
1453		wr32(hw, GLINT_DYN_CTL(rxq->irqv->me),
1454		     GLINT_DYN_CTL_SWINT_TRIG_M | GLINT_DYN_CTL_INTENA_MSK_M);
1455	}
1456}
1457
1458/**
1459 * ice_flush_txq_interrupts - Unconfigure Hw Tx queues MSI-X interrupt cause
1460 * @vsi: the VSI to configure
1461 *
1462 * Unset the CAUSE_ENA flag of the TQCTL register for each queue, then trigger
1463 * a software interrupt on that cause. This is required as part of the Tx
1464 * queue disable logic to dissociate the Tx queue from the interrupt.
1465 *
1466 * Note: this function must be called prior to ice_vsi_disable_tx, otherwise
1467 * the Tx queue disable may not complete properly.
1468 */
1469void
1470ice_flush_txq_interrupts(struct ice_vsi *vsi)
1471{
1472	struct ice_hw *hw = &vsi->sc->hw;
1473	int i;
1474
1475	for (i = 0; i < vsi->num_tx_queues; i++) {
1476		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1477		u32 reg, val;
1478
1479		/* Clear the CAUSE_ENA flag */
1480		reg = vsi->tx_qmap[txq->me];
1481		val = rd32(hw, QINT_TQCTL(reg));
1482		val &= ~QINT_TQCTL_CAUSE_ENA_M;
1483		wr32(hw, QINT_TQCTL(reg), val);
1484
1485		ice_flush(hw);
1486
1487		/* Trigger a software interrupt to complete interrupt
1488		 * dissociation.
1489		 */
1490		wr32(hw, GLINT_DYN_CTL(txq->irqv->me),
1491		     GLINT_DYN_CTL_SWINT_TRIG_M | GLINT_DYN_CTL_INTENA_MSK_M);
1492	}
1493}
1494
1495/**
1496 * ice_configure_rx_itr - Configure the Rx ITR settings for this VSI
1497 * @vsi: the VSI to configure
1498 *
1499 * Program the hardware ITR registers with the settings for this VSI.
1500 */
1501void
1502ice_configure_rx_itr(struct ice_vsi *vsi)
1503{
1504	struct ice_hw *hw = &vsi->sc->hw;
1505	int i;
1506
1507	/* TODO: Handle per-queue/per-vector ITR? */
1508
1509	for (i = 0; i < vsi->num_rx_queues; i++) {
1510		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1511
1512		wr32(hw, GLINT_ITR(ICE_RX_ITR, rxq->irqv->me),
1513		     ice_itr_to_reg(hw, vsi->rx_itr));
1514	}
1515
1516	ice_flush(hw);
1517}
1518
1519/**
1520 * ice_configure_tx_itr - Configure the Tx ITR settings for this VSI
1521 * @vsi: the VSI to configure
1522 *
1523 * Program the hardware ITR registers with the settings for this VSI.
1524 */
1525void
1526ice_configure_tx_itr(struct ice_vsi *vsi)
1527{
1528	struct ice_hw *hw = &vsi->sc->hw;
1529	int i;
1530
1531	/* TODO: Handle per-queue/per-vector ITR? */
1532
1533	for (i = 0; i < vsi->num_tx_queues; i++) {
1534		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1535
1536		wr32(hw, GLINT_ITR(ICE_TX_ITR, txq->irqv->me),
1537		     ice_itr_to_reg(hw, vsi->tx_itr));
1538	}
1539
1540	ice_flush(hw);
1541}
1542
1543/**
1544 * ice_setup_tx_ctx - Setup an ice_tlan_ctx structure for a queue
1545 * @txq: the Tx queue to configure
1546 * @tlan_ctx: the Tx LAN queue context structure to initialize
1547 * @pf_q: real queue number
1548 */
1549static int
1550ice_setup_tx_ctx(struct ice_tx_queue *txq, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
1551{
1552	struct ice_vsi *vsi = txq->vsi;
1553	struct ice_softc *sc = vsi->sc;
1554	struct ice_hw *hw = &sc->hw;
1555
1556	tlan_ctx->port_num = hw->port_info->lport;
1557
1558	/* number of descriptors in the queue */
1559	tlan_ctx->qlen = txq->desc_count;
1560
1561	/* set the transmit queue base address, defined in 128 byte units */
1562	tlan_ctx->base = txq->tx_paddr >> 7;
1563
1564	tlan_ctx->pf_num = hw->pf_id;
1565
1566	switch (vsi->type) {
1567	case ICE_VSI_PF:
1568		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
1569		break;
1570	case ICE_VSI_VMDQ2:
1571		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VMQ;
1572		break;
1573	default:
1574		return (ENODEV);
1575	}
1576
1577	tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx);
1578
1579	/* Enable TSO */
1580	tlan_ctx->tso_ena = 1;
1581	tlan_ctx->internal_usage_flag = 1;
1582
1583	tlan_ctx->tso_qnum = pf_q;
1584
1585	/*
1586	 * Stick with the older legacy Tx queue interface, instead of the new
1587	 * advanced queue interface.
1588	 */
1589	tlan_ctx->legacy_int = 1;
1590
1591	/* Descriptor WB mode */
1592	tlan_ctx->wb_mode = 0;
1593
1594	return (0);
1595}
1596
1597/**
1598 * ice_cfg_vsi_for_tx - Configure the hardware for Tx
1599 * @vsi: the VSI to configure
1600 *
1601 * Configure the device Tx queues through firmware AdminQ commands. After
1602 * this, Tx queues will be ready for transmit.
1603 */
1604int
1605ice_cfg_vsi_for_tx(struct ice_vsi *vsi)
1606{
1607	struct ice_aqc_add_tx_qgrp *qg;
1608	struct ice_hw *hw = &vsi->sc->hw;
1609	device_t dev = vsi->sc->dev;
1610	enum ice_status status;
1611	int i;
1612	int err = 0;
1613	u16 qg_size, pf_q;
1614
1615	qg_size = ice_struct_size(qg, txqs, 1);
1616	qg = (struct ice_aqc_add_tx_qgrp *)malloc(qg_size, M_ICE, M_NOWAIT|M_ZERO);
1617	if (!qg)
1618		return (ENOMEM);
1619
1620	qg->num_txqs = 1;
1621
1622	for (i = 0; i < vsi->num_tx_queues; i++) {
1623		struct ice_tlan_ctx tlan_ctx = { 0 };
1624		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1625
1626		pf_q = vsi->tx_qmap[txq->me];
1627		qg->txqs[0].txq_id = htole16(pf_q);
1628
1629		err = ice_setup_tx_ctx(txq, &tlan_ctx, pf_q);
1630		if (err)
1631			goto free_txqg;
1632
1633		ice_set_ctx(hw, (u8 *)&tlan_ctx, qg->txqs[0].txq_ctx,
1634			    ice_tlan_ctx_info);
1635
1636		status = ice_ena_vsi_txq(hw->port_info, vsi->idx, txq->tc,
1637					 txq->q_handle, 1, qg, qg_size, NULL);
1638		if (status) {
1639			device_printf(dev,
1640				      "Failed to set LAN Tx queue %d (TC %d, handle %d) context, err %s aq_err %s\n",
1641				      i, txq->tc, txq->q_handle,
1642				      ice_status_str(status),
1643				      ice_aq_str(hw->adminq.sq_last_status));
1644			err = ENODEV;
1645			goto free_txqg;
1646		}
1647
1648		/* Keep track of the Tx queue TEID */
1649		if (pf_q == le16toh(qg->txqs[0].txq_id))
1650			txq->q_teid = le32toh(qg->txqs[0].q_teid);
1651	}
1652
1653free_txqg:
1654	free(qg, M_ICE);
1655
1656	return (err);
1657}
1658
1659/**
1660 * ice_setup_rx_ctx - Setup an Rx context structure for a receive queue
1661 * @rxq: the receive queue to program
1662 *
1663 * Setup an Rx queue context structure and program it into the hardware
1664 * registers. This is a necessary step for enabling the Rx queue.
1665 *
1666 * @pre the VSI associated with this queue must have initialized mbuf_sz
1667 */
1668static int
1669ice_setup_rx_ctx(struct ice_rx_queue *rxq)
1670{
1671	struct ice_rlan_ctx rlan_ctx = {0};
1672	struct ice_vsi *vsi = rxq->vsi;
1673	struct ice_softc *sc = vsi->sc;
1674	struct ice_hw *hw = &sc->hw;
1675	enum ice_status status;
1676	u32 rxdid = ICE_RXDID_FLEX_NIC;
1677	u32 regval;
1678	u16 pf_q;
1679
1680	pf_q = vsi->rx_qmap[rxq->me];
1681
1682	/* set the receive queue base address, defined in 128 byte units */
1683	rlan_ctx.base = rxq->rx_paddr >> 7;
1684
1685	rlan_ctx.qlen = rxq->desc_count;
1686
1687	rlan_ctx.dbuf = vsi->mbuf_sz >> ICE_RLAN_CTX_DBUF_S;
1688
1689	/* use 32 byte descriptors */
1690	rlan_ctx.dsize = 1;
1691
1692	/* Strip the Ethernet CRC bytes before the packet is posted to the
1693	 * host memory.
1694	 */
1695	rlan_ctx.crcstrip = 1;
1696
1697	rlan_ctx.l2tsel = 1;
1698
1699	/* don't do header splitting */
1700	rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
1701	rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
1702	rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT;
1703
1704	/* strip VLAN from inner headers */
1705	rlan_ctx.showiv = 1;
1706
1707	rlan_ctx.rxmax = min(vsi->max_frame_size,
1708			     ICE_MAX_RX_SEGS * vsi->mbuf_sz);
1709
1710	rlan_ctx.lrxqthresh = 1;
1711
1712	if (vsi->type != ICE_VSI_VF) {
1713		regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
1714		regval &= ~QRXFLXP_CNTXT_RXDID_IDX_M;
1715		regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
1716			QRXFLXP_CNTXT_RXDID_IDX_M;
1717
1718		regval &= ~QRXFLXP_CNTXT_RXDID_PRIO_M;
1719		regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) &
1720			QRXFLXP_CNTXT_RXDID_PRIO_M;
1721
1722		wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
1723	}
1724
1725	status = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
1726	if (status) {
1727		device_printf(sc->dev,
1728			      "Failed to set LAN Rx queue context, err %s aq_err %s\n",
1729			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
1730		return (EIO);
1731	}
1732
1733	wr32(hw, rxq->tail, 0);
1734
1735	return 0;
1736}
1737
1738/**
1739 * ice_cfg_vsi_for_rx - Configure the hardware for Rx
1740 * @vsi: the VSI to configure
1741 *
1742 * Prepare an Rx context descriptor and configure the device to receive
1743 * traffic.
1744 *
1745 * @pre the VSI must have initialized mbuf_sz
1746 */
1747int
1748ice_cfg_vsi_for_rx(struct ice_vsi *vsi)
1749{
1750	int i, err;
1751
1752	for (i = 0; i < vsi->num_rx_queues; i++) {
1753		MPASS(vsi->mbuf_sz > 0);
1754		err = ice_setup_rx_ctx(&vsi->rx_queues[i]);
1755		if (err)
1756			return err;
1757	}
1758
1759	return (0);
1760}
1761
1762/**
1763 * ice_is_rxq_ready - Check if an Rx queue is ready
1764 * @hw: ice hw structure
1765 * @pf_q: absolute PF queue index to check
1766 * @reg: on successful return, contains qrx_ctrl contents
1767 *
1768 * Reads the QRX_CTRL register and verifies if the queue is in a consistent
1769 * state. That is, QENA_REQ matches QENA_STAT. Used to check before making
1770 * a request to change the queue, as well as to verify the request has
1771 * finished. The queue should change status within a few microseconds, so we
1772 * use a small delay while polling the register.
1773 *
1774 * Returns an error code if the queue does not update after a few retries.
1775 */
1776static int
1777ice_is_rxq_ready(struct ice_hw *hw, int pf_q, u32 *reg)
1778{
1779	u32 qrx_ctrl, qena_req, qena_stat;
1780	int i;
1781
1782	for (i = 0; i < ICE_Q_WAIT_RETRY_LIMIT; i++) {
1783		qrx_ctrl = rd32(hw, QRX_CTRL(pf_q));
1784		qena_req = (qrx_ctrl >> QRX_CTRL_QENA_REQ_S) & 1;
1785		qena_stat = (qrx_ctrl >> QRX_CTRL_QENA_STAT_S) & 1;
1786
1787		/* if the request and status bits equal, then the queue is
1788		 * fully disabled or enabled.
1789		 */
1790		if (qena_req == qena_stat) {
1791			*reg = qrx_ctrl;
1792			return (0);
1793		}
1794
1795		/* wait a few microseconds before we check again */
1796		DELAY(10);
1797	}
1798
1799	return (ETIMEDOUT);
1800}
1801
1802/**
1803 * ice_control_rx_queue - Configure hardware to start or stop an Rx queue
1804 * @vsi: VSI containing queue to enable/disable
1805 * @qidx: Queue index in VSI space
1806 * @enable: true to enable queue, false to disable
1807 *
1808 * Control the Rx queue through the QRX_CTRL register, enabling or disabling
1809 * it. Wait for the appropriate time to ensure that the queue has actually
1810 * reached the expected state.
1811 */
1812int
1813ice_control_rx_queue(struct ice_vsi *vsi, u16 qidx, bool enable)
1814{
1815	struct ice_hw *hw = &vsi->sc->hw;
1816	device_t dev = vsi->sc->dev;
1817	u32 qrx_ctrl = 0;
1818	int err;
1819
1820	struct ice_rx_queue *rxq = &vsi->rx_queues[qidx];
1821	int pf_q = vsi->rx_qmap[rxq->me];
1822
1823	err = ice_is_rxq_ready(hw, pf_q, &qrx_ctrl);
1824	if (err) {
1825		device_printf(dev,
1826			      "Rx queue %d is not ready\n",
1827			      pf_q);
1828		return err;
1829	}
1830
1831	/* Skip if the queue is already in correct state */
1832	if (enable == !!(qrx_ctrl & QRX_CTRL_QENA_STAT_M))
1833		return (0);
1834
1835	if (enable)
1836		qrx_ctrl |= QRX_CTRL_QENA_REQ_M;
1837	else
1838		qrx_ctrl &= ~QRX_CTRL_QENA_REQ_M;
1839	wr32(hw, QRX_CTRL(pf_q), qrx_ctrl);
1840
1841	/* wait for the queue to finalize the request */
1842	err = ice_is_rxq_ready(hw, pf_q, &qrx_ctrl);
1843	if (err) {
1844		device_printf(dev,
1845			      "Rx queue %d %sable timeout\n",
1846			      pf_q, (enable ? "en" : "dis"));
1847		return err;
1848	}
1849
1850	/* this should never happen */
1851	if (enable != !!(qrx_ctrl & QRX_CTRL_QENA_STAT_M)) {
1852		device_printf(dev,
1853			      "Rx queue %d invalid state\n",
1854			      pf_q);
1855		return (EDOOFUS);
1856	}
1857
1858	return (0);
1859}
1860
1861/**
1862 * ice_control_all_rx_queues - Configure hardware to start or stop the Rx queues
1863 * @vsi: VSI to enable/disable queues
1864 * @enable: true to enable queues, false to disable
1865 *
1866 * Control the Rx queues through the QRX_CTRL register, enabling or disabling
1867 * them. Wait for the appropriate time to ensure that the queues have actually
1868 * reached the expected state.
1869 */
1870int
1871ice_control_all_rx_queues(struct ice_vsi *vsi, bool enable)
1872{
1873	int i, err;
1874
1875	/* TODO: amortize waits by changing all queues up front and then
1876	 * checking their status afterwards. This will become more necessary
1877	 * when we have a large number of queues.
1878	 */
1879	for (i = 0; i < vsi->num_rx_queues; i++) {
1880		err = ice_control_rx_queue(vsi, i, enable);
1881		if (err)
1882			break;
1883	}
1884
1885	return (0);
1886}
1887
1888/**
1889 * ice_add_mac_to_list - Add MAC filter to a MAC filter list
1890 * @vsi: the VSI to forward to
1891 * @list: list which contains MAC filter entries
1892 * @addr: the MAC address to be added
1893 * @action: filter action to perform on match
1894 *
1895 * Adds a MAC address filter to the list which will be forwarded to firmware
1896 * to add a series of MAC address filters.
1897 *
1898 * Returns 0 on success, and an error code on failure.
1899 *
1900 */
1901static int
1902ice_add_mac_to_list(struct ice_vsi *vsi, struct ice_list_head *list,
1903		    const u8 *addr, enum ice_sw_fwd_act_type action)
1904{
1905	struct ice_fltr_list_entry *entry;
1906
1907	entry = (__typeof(entry))malloc(sizeof(*entry), M_ICE, M_NOWAIT|M_ZERO);
1908	if (!entry)
1909		return (ENOMEM);
1910
1911	entry->fltr_info.flag = ICE_FLTR_TX;
1912	entry->fltr_info.src_id = ICE_SRC_ID_VSI;
1913	entry->fltr_info.lkup_type = ICE_SW_LKUP_MAC;
1914	entry->fltr_info.fltr_act = action;
1915	entry->fltr_info.vsi_handle = vsi->idx;
1916	bcopy(addr, entry->fltr_info.l_data.mac.mac_addr, ETHER_ADDR_LEN);
1917
1918	LIST_ADD(&entry->list_entry, list);
1919
1920	return 0;
1921}
1922
1923/**
1924 * ice_free_fltr_list - Free memory associated with a MAC address list
1925 * @list: the list to free
1926 *
1927 * Free the memory of each entry associated with the list.
1928 */
1929static void
1930ice_free_fltr_list(struct ice_list_head *list)
1931{
1932	struct ice_fltr_list_entry *e, *tmp;
1933
1934	LIST_FOR_EACH_ENTRY_SAFE(e, tmp, list, ice_fltr_list_entry, list_entry) {
1935		LIST_DEL(&e->list_entry);
1936		free(e, M_ICE);
1937	}
1938}
1939
1940/**
1941 * ice_add_vsi_mac_filter - Add a MAC address filter for a VSI
1942 * @vsi: the VSI to add the filter for
1943 * @addr: MAC address to add a filter for
1944 *
1945 * Add a MAC address filter for a given VSI. This is a wrapper around
1946 * ice_add_mac to simplify the interface. First, it only accepts a single
1947 * address, so we don't have to mess around with the list setup in other
1948 * functions. Second, it ignores the ICE_ERR_ALREADY_EXISTS error, so that
1949 * callers don't need to worry about attempting to add the same filter twice.
1950 */
1951int
1952ice_add_vsi_mac_filter(struct ice_vsi *vsi, const u8 *addr)
1953{
1954	struct ice_list_head mac_addr_list;
1955	struct ice_hw *hw = &vsi->sc->hw;
1956	device_t dev = vsi->sc->dev;
1957	enum ice_status status;
1958	int err = 0;
1959
1960	INIT_LIST_HEAD(&mac_addr_list);
1961
1962	err = ice_add_mac_to_list(vsi, &mac_addr_list, addr, ICE_FWD_TO_VSI);
1963	if (err)
1964		goto free_mac_list;
1965
1966	status = ice_add_mac(hw, &mac_addr_list);
1967	if (status == ICE_ERR_ALREADY_EXISTS) {
1968		; /* Don't complain if we try to add a filter that already exists */
1969	} else if (status) {
1970		device_printf(dev,
1971			      "Failed to add a filter for MAC %6D, err %s aq_err %s\n",
1972			      addr, ":",
1973			      ice_status_str(status),
1974			      ice_aq_str(hw->adminq.sq_last_status));
1975		err = (EIO);
1976	}
1977
1978free_mac_list:
1979	ice_free_fltr_list(&mac_addr_list);
1980	return err;
1981}
1982
1983/**
1984 * ice_cfg_pf_default_mac_filters - Setup default unicast and broadcast addrs
1985 * @sc: device softc structure
1986 *
1987 * Program the default unicast and broadcast filters for the PF VSI.
1988 */
1989int
1990ice_cfg_pf_default_mac_filters(struct ice_softc *sc)
1991{
1992	struct ice_vsi *vsi = &sc->pf_vsi;
1993	struct ice_hw *hw = &sc->hw;
1994	int err;
1995
1996	/* Add the LAN MAC address */
1997	err = ice_add_vsi_mac_filter(vsi, hw->port_info->mac.lan_addr);
1998	if (err)
1999		return err;
2000
2001	/* Add the broadcast address */
2002	err = ice_add_vsi_mac_filter(vsi, broadcastaddr);
2003	if (err)
2004		return err;
2005
2006	return (0);
2007}
2008
2009/**
2010 * ice_remove_vsi_mac_filter - Remove a MAC address filter for a VSI
2011 * @vsi: the VSI to add the filter for
2012 * @addr: MAC address to remove a filter for
2013 *
2014 * Remove a MAC address filter from a given VSI. This is a wrapper around
2015 * ice_remove_mac to simplify the interface. First, it only accepts a single
2016 * address, so we don't have to mess around with the list setup in other
2017 * functions. Second, it ignores the ICE_ERR_DOES_NOT_EXIST error, so that
2018 * callers don't need to worry about attempting to remove filters which
2019 * haven't yet been added.
2020 */
2021int
2022ice_remove_vsi_mac_filter(struct ice_vsi *vsi, const u8 *addr)
2023{
2024	struct ice_list_head mac_addr_list;
2025	struct ice_hw *hw = &vsi->sc->hw;
2026	device_t dev = vsi->sc->dev;
2027	enum ice_status status;
2028	int err = 0;
2029
2030	INIT_LIST_HEAD(&mac_addr_list);
2031
2032	err = ice_add_mac_to_list(vsi, &mac_addr_list, addr, ICE_FWD_TO_VSI);
2033	if (err)
2034		goto free_mac_list;
2035
2036	status = ice_remove_mac(hw, &mac_addr_list);
2037	if (status == ICE_ERR_DOES_NOT_EXIST) {
2038		; /* Don't complain if we try to remove a filter that doesn't exist */
2039	} else if (status) {
2040		device_printf(dev,
2041			      "Failed to remove a filter for MAC %6D, err %s aq_err %s\n",
2042			      addr, ":",
2043			      ice_status_str(status),
2044			      ice_aq_str(hw->adminq.sq_last_status));
2045		err = (EIO);
2046	}
2047
2048free_mac_list:
2049	ice_free_fltr_list(&mac_addr_list);
2050	return err;
2051}
2052
2053/**
2054 * ice_rm_pf_default_mac_filters - Remove default unicast and broadcast addrs
2055 * @sc: device softc structure
2056 *
2057 * Remove the default unicast and broadcast filters from the PF VSI.
2058 */
2059int
2060ice_rm_pf_default_mac_filters(struct ice_softc *sc)
2061{
2062	struct ice_vsi *vsi = &sc->pf_vsi;
2063	struct ice_hw *hw = &sc->hw;
2064	int err;
2065
2066	/* Remove the LAN MAC address */
2067	err = ice_remove_vsi_mac_filter(vsi, hw->port_info->mac.lan_addr);
2068	if (err)
2069		return err;
2070
2071	/* Remove the broadcast address */
2072	err = ice_remove_vsi_mac_filter(vsi, broadcastaddr);
2073	if (err)
2074		return (EIO);
2075
2076	return (0);
2077}
2078
2079/**
2080 * ice_check_ctrlq_errors - Check for and report controlq errors
2081 * @sc: device private structure
2082 * @qname: name of the controlq
2083 * @cq: the controlq to check
2084 *
2085 * Check and report controlq errors. Currently all we do is report them to the
2086 * kernel message log, but we might want to improve this in the future, such
2087 * as to keep track of statistics.
2088 */
2089static void
2090ice_check_ctrlq_errors(struct ice_softc *sc, const char *qname,
2091		       struct ice_ctl_q_info *cq)
2092{
2093	struct ice_hw *hw = &sc->hw;
2094	u32 val;
2095
2096	/* Check for error indications. Note that all the controlqs use the
2097	 * same register layout, so we use the PF_FW_AxQLEN defines only.
2098	 */
2099	val = rd32(hw, cq->rq.len);
2100	if (val & (PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
2101		   PF_FW_ARQLEN_ARQCRIT_M)) {
2102		if (val & PF_FW_ARQLEN_ARQVFE_M)
2103			device_printf(sc->dev,
2104				"%s Receive Queue VF Error detected\n", qname);
2105		if (val & PF_FW_ARQLEN_ARQOVFL_M)
2106			device_printf(sc->dev,
2107				"%s Receive Queue Overflow Error detected\n",
2108				qname);
2109		if (val & PF_FW_ARQLEN_ARQCRIT_M)
2110			device_printf(sc->dev,
2111				"%s Receive Queue Critical Error detected\n",
2112				qname);
2113		val &= ~(PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
2114			 PF_FW_ARQLEN_ARQCRIT_M);
2115		wr32(hw, cq->rq.len, val);
2116	}
2117
2118	val = rd32(hw, cq->sq.len);
2119	if (val & (PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
2120		   PF_FW_ATQLEN_ATQCRIT_M)) {
2121		if (val & PF_FW_ATQLEN_ATQVFE_M)
2122			device_printf(sc->dev,
2123				"%s Send Queue VF Error detected\n", qname);
2124		if (val & PF_FW_ATQLEN_ATQOVFL_M)
2125			device_printf(sc->dev,
2126				"%s Send Queue Overflow Error detected\n",
2127				qname);
2128		if (val & PF_FW_ATQLEN_ATQCRIT_M)
2129			device_printf(sc->dev,
2130				"%s Send Queue Critical Error detected\n",
2131				qname);
2132		val &= ~(PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
2133			 PF_FW_ATQLEN_ATQCRIT_M);
2134		wr32(hw, cq->sq.len, val);
2135	}
2136}
2137
2138/**
2139 * ice_process_link_event - Process a link event indication from firmware
2140 * @sc: device softc structure
2141 * @e: the received event data
2142 *
2143 * Gets the current link status from hardware, and may print a message if an
2144 * unqualified is detected.
2145 */
2146static void
2147ice_process_link_event(struct ice_softc *sc,
2148		       struct ice_rq_event_info __invariant_only *e)
2149{
2150	struct ice_port_info *pi = sc->hw.port_info;
2151	struct ice_hw *hw = &sc->hw;
2152	device_t dev = sc->dev;
2153	enum ice_status status;
2154
2155	/* Sanity check that the data length isn't too small */
2156	MPASS(le16toh(e->desc.datalen) >= ICE_GET_LINK_STATUS_DATALEN_V1);
2157
2158	/*
2159	 * Even though the adapter gets link status information inside the
2160	 * event, it needs to send a Get Link Status AQ command in order
2161	 * to re-enable link events.
2162	 */
2163	pi->phy.get_link_info = true;
2164	ice_get_link_status(pi, &sc->link_up);
2165
2166	if (pi->phy.link_info.topo_media_conflict &
2167	   (ICE_AQ_LINK_TOPO_CONFLICT | ICE_AQ_LINK_MEDIA_CONFLICT |
2168	    ICE_AQ_LINK_TOPO_CORRUPT))
2169		device_printf(dev,
2170		    "Possible mis-configuration of the Ethernet port detected; please use the Intel (R) Ethernet Port Configuration Tool utility to address the issue.\n");
2171
2172	if ((pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) &&
2173	    !(pi->phy.link_info.link_info & ICE_AQ_LINK_UP)) {
2174		if (!(pi->phy.link_info.an_info & ICE_AQ_QUALIFIED_MODULE))
2175			device_printf(dev,
2176			    "Link is disabled on this device because an unsupported module type was detected! Refer to the Intel (R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
2177		if (pi->phy.link_info.link_cfg_err & ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED)
2178			device_printf(dev,
2179			    "The module's power requirements exceed the device's power supply. Cannot start link.\n");
2180		if (pi->phy.link_info.link_cfg_err & ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT)
2181			device_printf(dev,
2182			    "The installed module is incompatible with the device's NVM image. Cannot start link.\n");
2183	}
2184
2185	if (!(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) {
2186		if (!ice_testandset_state(&sc->state, ICE_STATE_NO_MEDIA)) {
2187			status = ice_aq_set_link_restart_an(pi, false, NULL);
2188			if (status != ICE_SUCCESS && hw->adminq.sq_last_status != ICE_AQ_RC_EMODE)
2189				device_printf(dev,
2190				    "%s: ice_aq_set_link_restart_an: status %s, aq_err %s\n",
2191				    __func__, ice_status_str(status),
2192				    ice_aq_str(hw->adminq.sq_last_status));
2193		}
2194	}
2195	/* ICE_STATE_NO_MEDIA is cleared when polling task detects media */
2196
2197	/* Indicate that link status must be reported again */
2198	ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED);
2199
2200	/* OS link info is updated elsewhere */
2201}
2202
2203/**
2204 * ice_process_ctrlq_event - Respond to a controlq event
2205 * @sc: device private structure
2206 * @qname: the name for this controlq
2207 * @event: the event to process
2208 *
2209 * Perform actions in response to various controlq event notifications.
2210 */
2211static void
2212ice_process_ctrlq_event(struct ice_softc *sc, const char *qname,
2213			struct ice_rq_event_info *event)
2214{
2215	u16 opcode;
2216
2217	opcode = le16toh(event->desc.opcode);
2218
2219	switch (opcode) {
2220	case ice_aqc_opc_get_link_status:
2221		ice_process_link_event(sc, event);
2222		break;
2223	case ice_aqc_opc_fw_logs_event:
2224		ice_handle_fw_log_event(sc, &event->desc, event->msg_buf);
2225		break;
2226	case ice_aqc_opc_lldp_set_mib_change:
2227		ice_handle_mib_change_event(sc, event);
2228		break;
2229	case ice_aqc_opc_event_lan_overflow:
2230		ice_handle_lan_overflow_event(sc, event);
2231		break;
2232	case ice_aqc_opc_get_health_status:
2233		ice_handle_health_status_event(sc, event);
2234		break;
2235	default:
2236		device_printf(sc->dev,
2237			      "%s Receive Queue unhandled event 0x%04x ignored\n",
2238			      qname, opcode);
2239	}
2240}
2241
2242/**
2243 * ice_process_ctrlq - helper function to process controlq rings
2244 * @sc: device private structure
2245 * @q_type: specific control queue type
2246 * @pending: return parameter to track remaining events
2247 *
2248 * Process controlq events for a given control queue type. Returns zero on
2249 * success, and an error code on failure. If successful, pending is the number
2250 * of remaining events left in the queue.
2251 */
2252int
2253ice_process_ctrlq(struct ice_softc *sc, enum ice_ctl_q q_type, u16 *pending)
2254{
2255	struct ice_rq_event_info event = { { 0 } };
2256	struct ice_hw *hw = &sc->hw;
2257	struct ice_ctl_q_info *cq;
2258	enum ice_status status;
2259	const char *qname;
2260	int loop = 0;
2261
2262	switch (q_type) {
2263	case ICE_CTL_Q_ADMIN:
2264		cq = &hw->adminq;
2265		qname = "Admin";
2266		break;
2267	case ICE_CTL_Q_MAILBOX:
2268		cq = &hw->mailboxq;
2269		qname = "Mailbox";
2270		break;
2271	default:
2272		device_printf(sc->dev,
2273			      "Unknown control queue type 0x%x\n",
2274			      q_type);
2275		return 0;
2276	}
2277
2278	ice_check_ctrlq_errors(sc, qname, cq);
2279
2280	/*
2281	 * Control queue processing happens during the admin task which may be
2282	 * holding a non-sleepable lock, so we *must* use M_NOWAIT here.
2283	 */
2284	event.buf_len = cq->rq_buf_size;
2285	event.msg_buf = (u8 *)malloc(event.buf_len, M_ICE, M_ZERO | M_NOWAIT);
2286	if (!event.msg_buf) {
2287		device_printf(sc->dev,
2288			      "Unable to allocate memory for %s Receive Queue event\n",
2289			      qname);
2290		return (ENOMEM);
2291	}
2292
2293	do {
2294		status = ice_clean_rq_elem(hw, cq, &event, pending);
2295		if (status == ICE_ERR_AQ_NO_WORK)
2296			break;
2297		if (status) {
2298			if (q_type == ICE_CTL_Q_ADMIN)
2299				device_printf(sc->dev,
2300					      "%s Receive Queue event error %s\n",
2301					      qname, ice_status_str(status));
2302			else
2303				device_printf(sc->dev,
2304					      "%s Receive Queue event error %s\n",
2305					      qname, ice_status_str(status));
2306			free(event.msg_buf, M_ICE);
2307			return (EIO);
2308		}
2309		/* XXX should we separate this handler by controlq type? */
2310		ice_process_ctrlq_event(sc, qname, &event);
2311	} while (*pending && (++loop < ICE_CTRLQ_WORK_LIMIT));
2312
2313	free(event.msg_buf, M_ICE);
2314
2315	return 0;
2316}
2317
2318/**
2319 * pkg_ver_empty - Check if a package version is empty
2320 * @pkg_ver: the package version to check
2321 * @pkg_name: the package name to check
2322 *
2323 * Checks if the package version structure is empty. We consider a package
2324 * version as empty if none of the versions are non-zero and the name string
2325 * is null as well.
2326 *
2327 * This is used to check if the package version was initialized by the driver,
2328 * as we do not expect an actual DDP package file to have a zero'd version and
2329 * name.
2330 *
2331 * @returns true if the package version is valid, or false otherwise.
2332 */
2333static bool
2334pkg_ver_empty(struct ice_pkg_ver *pkg_ver, u8 *pkg_name)
2335{
2336	return (pkg_name[0] == '\0' &&
2337		pkg_ver->major == 0 &&
2338		pkg_ver->minor == 0 &&
2339		pkg_ver->update == 0 &&
2340		pkg_ver->draft == 0);
2341}
2342
2343/**
2344 * pkg_ver_compatible - Check if the package version is compatible
2345 * @pkg_ver: the package version to check
2346 *
2347 * Compares the package version number to the driver's expected major/minor
2348 * version. Returns an integer indicating whether the version is older, newer,
2349 * or compatible with the driver.
2350 *
2351 * @returns 0 if the package version is compatible, -1 if the package version
2352 * is older, and 1 if the package version is newer than the driver version.
2353 */
2354static int
2355pkg_ver_compatible(struct ice_pkg_ver *pkg_ver)
2356{
2357	if (pkg_ver->major > ICE_PKG_SUPP_VER_MAJ)
2358		return (1); /* newer */
2359	else if ((pkg_ver->major == ICE_PKG_SUPP_VER_MAJ) &&
2360		 (pkg_ver->minor > ICE_PKG_SUPP_VER_MNR))
2361		return (1); /* newer */
2362	else if ((pkg_ver->major == ICE_PKG_SUPP_VER_MAJ) &&
2363		 (pkg_ver->minor == ICE_PKG_SUPP_VER_MNR))
2364		return (0); /* compatible */
2365	else
2366		return (-1); /* older */
2367}
2368
2369/**
2370 * ice_os_pkg_version_str - Format OS package version info into a sbuf
2371 * @hw: device hw structure
2372 * @buf: string buffer to store name/version string
2373 *
2374 * Formats the name and version of the OS DDP package as found in the ice_ddp
2375 * module into a string.
2376 *
2377 * @remark This will almost always be the same as the active package, but
2378 * could be different in some cases. Use ice_active_pkg_version_str to get the
2379 * version of the active DDP package.
2380 */
2381static void
2382ice_os_pkg_version_str(struct ice_hw *hw, struct sbuf *buf)
2383{
2384	char name_buf[ICE_PKG_NAME_SIZE];
2385
2386	/* If the OS DDP package info is empty, use "None" */
2387	if (pkg_ver_empty(&hw->pkg_ver, hw->pkg_name)) {
2388		sbuf_printf(buf, "None");
2389		return;
2390	}
2391
2392	/*
2393	 * This should already be null-terminated, but since this is a raw
2394	 * value from an external source, strlcpy() into a new buffer to
2395	 * make sure.
2396	 */
2397	bzero(name_buf, sizeof(name_buf));
2398	strlcpy(name_buf, (char *)hw->pkg_name, ICE_PKG_NAME_SIZE);
2399
2400	sbuf_printf(buf, "%s version %u.%u.%u.%u",
2401	    name_buf,
2402	    hw->pkg_ver.major,
2403	    hw->pkg_ver.minor,
2404	    hw->pkg_ver.update,
2405	    hw->pkg_ver.draft);
2406}
2407
2408/**
2409 * ice_active_pkg_version_str - Format active package version info into a sbuf
2410 * @hw: device hw structure
2411 * @buf: string buffer to store name/version string
2412 *
2413 * Formats the name and version of the active DDP package info into a string
2414 * buffer for use.
2415 */
2416static void
2417ice_active_pkg_version_str(struct ice_hw *hw, struct sbuf *buf)
2418{
2419	char name_buf[ICE_PKG_NAME_SIZE];
2420
2421	/* If the active DDP package info is empty, use "None" */
2422	if (pkg_ver_empty(&hw->active_pkg_ver, hw->active_pkg_name)) {
2423		sbuf_printf(buf, "None");
2424		return;
2425	}
2426
2427	/*
2428	 * This should already be null-terminated, but since this is a raw
2429	 * value from an external source, strlcpy() into a new buffer to
2430	 * make sure.
2431	 */
2432	bzero(name_buf, sizeof(name_buf));
2433	strlcpy(name_buf, (char *)hw->active_pkg_name, ICE_PKG_NAME_SIZE);
2434
2435	sbuf_printf(buf, "%s version %u.%u.%u.%u",
2436	    name_buf,
2437	    hw->active_pkg_ver.major,
2438	    hw->active_pkg_ver.minor,
2439	    hw->active_pkg_ver.update,
2440	    hw->active_pkg_ver.draft);
2441
2442	if (hw->active_track_id != 0)
2443		sbuf_printf(buf, ", track id 0x%08x", hw->active_track_id);
2444}
2445
2446/**
2447 * ice_nvm_version_str - Format the NVM version information into a sbuf
2448 * @hw: device hw structure
2449 * @buf: string buffer to store version string
2450 *
2451 * Formats the NVM information including firmware version, API version, NVM
2452 * version, the EETRACK id, and OEM specific version information into a string
2453 * buffer.
2454 */
2455static void
2456ice_nvm_version_str(struct ice_hw *hw, struct sbuf *buf)
2457{
2458	struct ice_nvm_info *nvm = &hw->flash.nvm;
2459	struct ice_orom_info *orom = &hw->flash.orom;
2460	struct ice_netlist_info *netlist = &hw->flash.netlist;
2461
2462	/* Note that the netlist versions are stored in packed Binary Coded
2463	 * Decimal format. The use of '%x' will correctly display these as
2464	 * decimal numbers. This works because every 4 bits will be displayed
2465	 * as a hexadecimal digit, and the BCD format will only use the values
2466	 * 0-9.
2467	 */
2468	sbuf_printf(buf,
2469		    "fw %u.%u.%u api %u.%u nvm %x.%02x etid %08x netlist %x.%x.%x-%x.%x.%x.%04x oem %u.%u.%u",
2470		    hw->fw_maj_ver, hw->fw_min_ver, hw->fw_patch,
2471		    hw->api_maj_ver, hw->api_min_ver,
2472		    nvm->major, nvm->minor, nvm->eetrack,
2473		    netlist->major, netlist->minor,
2474		    netlist->type >> 16, netlist->type & 0xFFFF,
2475		    netlist->rev, netlist->cust_ver, netlist->hash,
2476		    orom->major, orom->build, orom->patch);
2477}
2478
2479/**
2480 * ice_print_nvm_version - Print the NVM info to the kernel message log
2481 * @sc: the device softc structure
2482 *
2483 * Format and print an NVM version string using ice_nvm_version_str().
2484 */
2485void
2486ice_print_nvm_version(struct ice_softc *sc)
2487{
2488	struct ice_hw *hw = &sc->hw;
2489	device_t dev = sc->dev;
2490	struct sbuf *sbuf;
2491
2492	sbuf = sbuf_new_auto();
2493	ice_nvm_version_str(hw, sbuf);
2494	sbuf_finish(sbuf);
2495	device_printf(dev, "%s\n", sbuf_data(sbuf));
2496	sbuf_delete(sbuf);
2497}
2498
2499/**
2500 * ice_update_vsi_hw_stats - Update VSI-specific ethernet statistics counters
2501 * @vsi: the VSI to be updated
2502 *
2503 * Reads hardware stats and updates the ice_vsi_hw_stats tracking structure with
2504 * the updated values.
2505 */
2506void
2507ice_update_vsi_hw_stats(struct ice_vsi *vsi)
2508{
2509	struct ice_eth_stats *prev_es, *cur_es;
2510	struct ice_hw *hw = &vsi->sc->hw;
2511	u16 vsi_num;
2512
2513	if (!ice_is_vsi_valid(hw, vsi->idx))
2514		return;
2515
2516	vsi_num = ice_get_hw_vsi_num(hw, vsi->idx); /* HW absolute index of a VSI */
2517	prev_es = &vsi->hw_stats.prev;
2518	cur_es = &vsi->hw_stats.cur;
2519
2520#define ICE_VSI_STAT40(name, location) \
2521	ice_stat_update40(hw, name ## L(vsi_num), \
2522			  vsi->hw_stats.offsets_loaded, \
2523			  &prev_es->location, &cur_es->location)
2524
2525#define ICE_VSI_STAT32(name, location) \
2526	ice_stat_update32(hw, name(vsi_num), \
2527			  vsi->hw_stats.offsets_loaded, \
2528			  &prev_es->location, &cur_es->location)
2529
2530	ICE_VSI_STAT40(GLV_GORC, rx_bytes);
2531	ICE_VSI_STAT40(GLV_UPRC, rx_unicast);
2532	ICE_VSI_STAT40(GLV_MPRC, rx_multicast);
2533	ICE_VSI_STAT40(GLV_BPRC, rx_broadcast);
2534	ICE_VSI_STAT32(GLV_RDPC, rx_discards);
2535	ICE_VSI_STAT40(GLV_GOTC, tx_bytes);
2536	ICE_VSI_STAT40(GLV_UPTC, tx_unicast);
2537	ICE_VSI_STAT40(GLV_MPTC, tx_multicast);
2538	ICE_VSI_STAT40(GLV_BPTC, tx_broadcast);
2539	ICE_VSI_STAT32(GLV_TEPC, tx_errors);
2540
2541	ice_stat_update_repc(hw, vsi->idx, vsi->hw_stats.offsets_loaded,
2542			     cur_es);
2543
2544#undef ICE_VSI_STAT40
2545#undef ICE_VSI_STAT32
2546
2547	vsi->hw_stats.offsets_loaded = true;
2548}
2549
2550/**
2551 * ice_reset_vsi_stats - Reset VSI statistics counters
2552 * @vsi: VSI structure
2553 *
2554 * Resets the software tracking counters for the VSI statistics, and indicate
2555 * that the offsets haven't been loaded. This is intended to be called
2556 * post-reset so that VSI statistics count from zero again.
2557 */
2558void
2559ice_reset_vsi_stats(struct ice_vsi *vsi)
2560{
2561	/* Reset HW stats */
2562	memset(&vsi->hw_stats.prev, 0, sizeof(vsi->hw_stats.prev));
2563	memset(&vsi->hw_stats.cur, 0, sizeof(vsi->hw_stats.cur));
2564	vsi->hw_stats.offsets_loaded = false;
2565}
2566
2567/**
2568 * ice_update_pf_stats - Update port stats counters
2569 * @sc: device private softc structure
2570 *
2571 * Reads hardware statistics registers and updates the software tracking
2572 * structure with new values.
2573 */
2574void
2575ice_update_pf_stats(struct ice_softc *sc)
2576{
2577	struct ice_hw_port_stats *prev_ps, *cur_ps;
2578	struct ice_hw *hw = &sc->hw;
2579	u8 lport;
2580
2581	MPASS(hw->port_info);
2582
2583	prev_ps = &sc->stats.prev;
2584	cur_ps = &sc->stats.cur;
2585	lport = hw->port_info->lport;
2586
2587#define ICE_PF_STAT_PFC(name, location, index) \
2588	ice_stat_update40(hw, name(lport, index), \
2589			  sc->stats.offsets_loaded, \
2590			  &prev_ps->location[index], &cur_ps->location[index])
2591
2592#define ICE_PF_STAT40(name, location) \
2593	ice_stat_update40(hw, name ## L(lport), \
2594			  sc->stats.offsets_loaded, \
2595			  &prev_ps->location, &cur_ps->location)
2596
2597#define ICE_PF_STAT32(name, location) \
2598	ice_stat_update32(hw, name(lport), \
2599			  sc->stats.offsets_loaded, \
2600			  &prev_ps->location, &cur_ps->location)
2601
2602	ICE_PF_STAT40(GLPRT_GORC, eth.rx_bytes);
2603	ICE_PF_STAT40(GLPRT_UPRC, eth.rx_unicast);
2604	ICE_PF_STAT40(GLPRT_MPRC, eth.rx_multicast);
2605	ICE_PF_STAT40(GLPRT_BPRC, eth.rx_broadcast);
2606	ICE_PF_STAT40(GLPRT_GOTC, eth.tx_bytes);
2607	ICE_PF_STAT40(GLPRT_UPTC, eth.tx_unicast);
2608	ICE_PF_STAT40(GLPRT_MPTC, eth.tx_multicast);
2609	ICE_PF_STAT40(GLPRT_BPTC, eth.tx_broadcast);
2610	/* This stat register doesn't have an lport */
2611	ice_stat_update32(hw, PRTRPB_RDPC,
2612			  sc->stats.offsets_loaded,
2613			  &prev_ps->eth.rx_discards, &cur_ps->eth.rx_discards);
2614
2615	ICE_PF_STAT32(GLPRT_TDOLD, tx_dropped_link_down);
2616	ICE_PF_STAT40(GLPRT_PRC64, rx_size_64);
2617	ICE_PF_STAT40(GLPRT_PRC127, rx_size_127);
2618	ICE_PF_STAT40(GLPRT_PRC255, rx_size_255);
2619	ICE_PF_STAT40(GLPRT_PRC511, rx_size_511);
2620	ICE_PF_STAT40(GLPRT_PRC1023, rx_size_1023);
2621	ICE_PF_STAT40(GLPRT_PRC1522, rx_size_1522);
2622	ICE_PF_STAT40(GLPRT_PRC9522, rx_size_big);
2623	ICE_PF_STAT40(GLPRT_PTC64, tx_size_64);
2624	ICE_PF_STAT40(GLPRT_PTC127, tx_size_127);
2625	ICE_PF_STAT40(GLPRT_PTC255, tx_size_255);
2626	ICE_PF_STAT40(GLPRT_PTC511, tx_size_511);
2627	ICE_PF_STAT40(GLPRT_PTC1023, tx_size_1023);
2628	ICE_PF_STAT40(GLPRT_PTC1522, tx_size_1522);
2629	ICE_PF_STAT40(GLPRT_PTC9522, tx_size_big);
2630
2631	/* Update Priority Flow Control Stats */
2632	for (int i = 0; i <= GLPRT_PXOFFRXC_MAX_INDEX; i++) {
2633		ICE_PF_STAT_PFC(GLPRT_PXONRXC, priority_xon_rx, i);
2634		ICE_PF_STAT_PFC(GLPRT_PXOFFRXC, priority_xoff_rx, i);
2635		ICE_PF_STAT_PFC(GLPRT_PXONTXC, priority_xon_tx, i);
2636		ICE_PF_STAT_PFC(GLPRT_PXOFFTXC, priority_xoff_tx, i);
2637		ICE_PF_STAT_PFC(GLPRT_RXON2OFFCNT, priority_xon_2_xoff, i);
2638	}
2639
2640	ICE_PF_STAT32(GLPRT_LXONRXC, link_xon_rx);
2641	ICE_PF_STAT32(GLPRT_LXOFFRXC, link_xoff_rx);
2642	ICE_PF_STAT32(GLPRT_LXONTXC, link_xon_tx);
2643	ICE_PF_STAT32(GLPRT_LXOFFTXC, link_xoff_tx);
2644	ICE_PF_STAT32(GLPRT_CRCERRS, crc_errors);
2645	ICE_PF_STAT32(GLPRT_ILLERRC, illegal_bytes);
2646	ICE_PF_STAT32(GLPRT_MLFC, mac_local_faults);
2647	ICE_PF_STAT32(GLPRT_MRFC, mac_remote_faults);
2648	ICE_PF_STAT32(GLPRT_RLEC, rx_len_errors);
2649	ICE_PF_STAT32(GLPRT_RUC, rx_undersize);
2650	ICE_PF_STAT32(GLPRT_RFC, rx_fragments);
2651	ICE_PF_STAT32(GLPRT_ROC, rx_oversize);
2652	ICE_PF_STAT32(GLPRT_RJC, rx_jabber);
2653
2654#undef ICE_PF_STAT40
2655#undef ICE_PF_STAT32
2656#undef ICE_PF_STAT_PFC
2657
2658	sc->stats.offsets_loaded = true;
2659}
2660
2661/**
2662 * ice_reset_pf_stats - Reset port stats counters
2663 * @sc: Device private softc structure
2664 *
2665 * Reset software tracking values for statistics to zero, and indicate that
2666 * offsets haven't been loaded. Intended to be called after a device reset so
2667 * that statistics count from zero again.
2668 */
2669void
2670ice_reset_pf_stats(struct ice_softc *sc)
2671{
2672	memset(&sc->stats.prev, 0, sizeof(sc->stats.prev));
2673	memset(&sc->stats.cur, 0, sizeof(sc->stats.cur));
2674	sc->stats.offsets_loaded = false;
2675}
2676
2677/**
2678 * ice_sysctl_show_fw - sysctl callback to show firmware information
2679 * @oidp: sysctl oid structure
2680 * @arg1: pointer to private data structure
2681 * @arg2: unused
2682 * @req: sysctl request pointer
2683 *
2684 * Callback for the fw_version sysctl, to display the current firmware
2685 * information found at hardware init time.
2686 */
2687static int
2688ice_sysctl_show_fw(SYSCTL_HANDLER_ARGS)
2689{
2690	struct ice_softc *sc = (struct ice_softc *)arg1;
2691	struct ice_hw *hw = &sc->hw;
2692	struct sbuf *sbuf;
2693
2694	UNREFERENCED_PARAMETER(oidp);
2695	UNREFERENCED_PARAMETER(arg2);
2696
2697	if (ice_driver_is_detaching(sc))
2698		return (ESHUTDOWN);
2699
2700	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
2701	ice_nvm_version_str(hw, sbuf);
2702	sbuf_finish(sbuf);
2703	sbuf_delete(sbuf);
2704
2705	return (0);
2706}
2707
2708/**
2709 * ice_sysctl_pba_number - sysctl callback to show PBA number
2710 * @oidp: sysctl oid structure
2711 * @arg1: pointer to private data structure
2712 * @arg2: unused
2713 * @req: sysctl request pointer
2714 *
2715 * Callback for the pba_number sysctl, used to read the Product Board Assembly
2716 * number for this device.
2717 */
2718static int
2719ice_sysctl_pba_number(SYSCTL_HANDLER_ARGS)
2720{
2721	struct ice_softc *sc = (struct ice_softc *)arg1;
2722	struct ice_hw *hw = &sc->hw;
2723	device_t dev = sc->dev;
2724	u8 pba_string[32] = "";
2725	enum ice_status status;
2726
2727	UNREFERENCED_PARAMETER(arg2);
2728
2729	if (ice_driver_is_detaching(sc))
2730		return (ESHUTDOWN);
2731
2732	status = ice_read_pba_string(hw, pba_string, sizeof(pba_string));
2733	if (status) {
2734		device_printf(dev,
2735		    "%s: failed to read PBA string from NVM; status %s, aq_err %s\n",
2736		    __func__, ice_status_str(status),
2737		    ice_aq_str(hw->adminq.sq_last_status));
2738		return (EIO);
2739	}
2740
2741	return sysctl_handle_string(oidp, pba_string, sizeof(pba_string), req);
2742}
2743
2744/**
2745 * ice_sysctl_pkg_version - sysctl to show the active package version info
2746 * @oidp: sysctl oid structure
2747 * @arg1: pointer to private data structure
2748 * @arg2: unused
2749 * @req: sysctl request pointer
2750 *
2751 * Callback for the pkg_version sysctl, to display the active DDP package name
2752 * and version information.
2753 */
2754static int
2755ice_sysctl_pkg_version(SYSCTL_HANDLER_ARGS)
2756{
2757	struct ice_softc *sc = (struct ice_softc *)arg1;
2758	struct ice_hw *hw = &sc->hw;
2759	struct sbuf *sbuf;
2760
2761	UNREFERENCED_PARAMETER(oidp);
2762	UNREFERENCED_PARAMETER(arg2);
2763
2764	if (ice_driver_is_detaching(sc))
2765		return (ESHUTDOWN);
2766
2767	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
2768	ice_active_pkg_version_str(hw, sbuf);
2769	sbuf_finish(sbuf);
2770	sbuf_delete(sbuf);
2771
2772	return (0);
2773}
2774
2775/**
2776 * ice_sysctl_os_pkg_version - sysctl to show the OS package version info
2777 * @oidp: sysctl oid structure
2778 * @arg1: pointer to private data structure
2779 * @arg2: unused
2780 * @req: sysctl request pointer
2781 *
2782 * Callback for the pkg_version sysctl, to display the OS DDP package name and
2783 * version info found in the ice_ddp module.
2784 */
2785static int
2786ice_sysctl_os_pkg_version(SYSCTL_HANDLER_ARGS)
2787{
2788	struct ice_softc *sc = (struct ice_softc *)arg1;
2789	struct ice_hw *hw = &sc->hw;
2790	struct sbuf *sbuf;
2791
2792	UNREFERENCED_PARAMETER(oidp);
2793	UNREFERENCED_PARAMETER(arg2);
2794
2795	if (ice_driver_is_detaching(sc))
2796		return (ESHUTDOWN);
2797
2798	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
2799	ice_os_pkg_version_str(hw, sbuf);
2800	sbuf_finish(sbuf);
2801	sbuf_delete(sbuf);
2802
2803	return (0);
2804}
2805
2806/**
2807 * ice_sysctl_current_speed - sysctl callback to show current link speed
2808 * @oidp: sysctl oid structure
2809 * @arg1: pointer to private data structure
2810 * @arg2: unused
2811 * @req: sysctl request pointer
2812 *
2813 * Callback for the current_speed sysctl, to display the string representing
2814 * the current link speed.
2815 */
2816static int
2817ice_sysctl_current_speed(SYSCTL_HANDLER_ARGS)
2818{
2819	struct ice_softc *sc = (struct ice_softc *)arg1;
2820	struct ice_hw *hw = &sc->hw;
2821	struct sbuf *sbuf;
2822
2823	UNREFERENCED_PARAMETER(oidp);
2824	UNREFERENCED_PARAMETER(arg2);
2825
2826	if (ice_driver_is_detaching(sc))
2827		return (ESHUTDOWN);
2828
2829	sbuf = sbuf_new_for_sysctl(NULL, NULL, 10, req);
2830	sbuf_printf(sbuf, "%s", ice_aq_speed_to_str(hw->port_info));
2831	sbuf_finish(sbuf);
2832	sbuf_delete(sbuf);
2833
2834	return (0);
2835}
2836
2837/**
2838 * @var phy_link_speeds
2839 * @brief PHY link speed conversion array
2840 *
2841 * Array of link speeds to convert ICE_PHY_TYPE_LOW and ICE_PHY_TYPE_HIGH into
2842 * link speeds used by the link speed sysctls.
2843 *
2844 * @remark these are based on the indices used in the BIT() macros for the
2845 * ICE_PHY_TYPE_LOW_* and ICE_PHY_TYPE_HIGH_* definitions.
2846 */
2847static const uint16_t phy_link_speeds[] = {
2848    ICE_AQ_LINK_SPEED_100MB,
2849    ICE_AQ_LINK_SPEED_100MB,
2850    ICE_AQ_LINK_SPEED_1000MB,
2851    ICE_AQ_LINK_SPEED_1000MB,
2852    ICE_AQ_LINK_SPEED_1000MB,
2853    ICE_AQ_LINK_SPEED_1000MB,
2854    ICE_AQ_LINK_SPEED_1000MB,
2855    ICE_AQ_LINK_SPEED_2500MB,
2856    ICE_AQ_LINK_SPEED_2500MB,
2857    ICE_AQ_LINK_SPEED_2500MB,
2858    ICE_AQ_LINK_SPEED_5GB,
2859    ICE_AQ_LINK_SPEED_5GB,
2860    ICE_AQ_LINK_SPEED_10GB,
2861    ICE_AQ_LINK_SPEED_10GB,
2862    ICE_AQ_LINK_SPEED_10GB,
2863    ICE_AQ_LINK_SPEED_10GB,
2864    ICE_AQ_LINK_SPEED_10GB,
2865    ICE_AQ_LINK_SPEED_10GB,
2866    ICE_AQ_LINK_SPEED_10GB,
2867    ICE_AQ_LINK_SPEED_25GB,
2868    ICE_AQ_LINK_SPEED_25GB,
2869    ICE_AQ_LINK_SPEED_25GB,
2870    ICE_AQ_LINK_SPEED_25GB,
2871    ICE_AQ_LINK_SPEED_25GB,
2872    ICE_AQ_LINK_SPEED_25GB,
2873    ICE_AQ_LINK_SPEED_25GB,
2874    ICE_AQ_LINK_SPEED_25GB,
2875    ICE_AQ_LINK_SPEED_25GB,
2876    ICE_AQ_LINK_SPEED_25GB,
2877    ICE_AQ_LINK_SPEED_25GB,
2878    ICE_AQ_LINK_SPEED_40GB,
2879    ICE_AQ_LINK_SPEED_40GB,
2880    ICE_AQ_LINK_SPEED_40GB,
2881    ICE_AQ_LINK_SPEED_40GB,
2882    ICE_AQ_LINK_SPEED_40GB,
2883    ICE_AQ_LINK_SPEED_40GB,
2884    ICE_AQ_LINK_SPEED_50GB,
2885    ICE_AQ_LINK_SPEED_50GB,
2886    ICE_AQ_LINK_SPEED_50GB,
2887    ICE_AQ_LINK_SPEED_50GB,
2888    ICE_AQ_LINK_SPEED_50GB,
2889    ICE_AQ_LINK_SPEED_50GB,
2890    ICE_AQ_LINK_SPEED_50GB,
2891    ICE_AQ_LINK_SPEED_50GB,
2892    ICE_AQ_LINK_SPEED_50GB,
2893    ICE_AQ_LINK_SPEED_50GB,
2894    ICE_AQ_LINK_SPEED_50GB,
2895    ICE_AQ_LINK_SPEED_50GB,
2896    ICE_AQ_LINK_SPEED_50GB,
2897    ICE_AQ_LINK_SPEED_50GB,
2898    ICE_AQ_LINK_SPEED_50GB,
2899    ICE_AQ_LINK_SPEED_100GB,
2900    ICE_AQ_LINK_SPEED_100GB,
2901    ICE_AQ_LINK_SPEED_100GB,
2902    ICE_AQ_LINK_SPEED_100GB,
2903    ICE_AQ_LINK_SPEED_100GB,
2904    ICE_AQ_LINK_SPEED_100GB,
2905    ICE_AQ_LINK_SPEED_100GB,
2906    ICE_AQ_LINK_SPEED_100GB,
2907    ICE_AQ_LINK_SPEED_100GB,
2908    ICE_AQ_LINK_SPEED_100GB,
2909    ICE_AQ_LINK_SPEED_100GB,
2910    ICE_AQ_LINK_SPEED_100GB,
2911    ICE_AQ_LINK_SPEED_100GB,
2912    /* These rates are for ICE_PHY_TYPE_HIGH_* */
2913    ICE_AQ_LINK_SPEED_100GB,
2914    ICE_AQ_LINK_SPEED_100GB,
2915    ICE_AQ_LINK_SPEED_100GB,
2916    ICE_AQ_LINK_SPEED_100GB,
2917    ICE_AQ_LINK_SPEED_100GB
2918};
2919
2920#define ICE_SYSCTL_HELP_ADVERTISE_SPEED		\
2921"\nControl advertised link speed."		\
2922"\nFlags:"					\
2923"\n\t   0x0 - Auto"				\
2924"\n\t   0x1 - 10 Mb"				\
2925"\n\t   0x2 - 100 Mb"				\
2926"\n\t   0x4 - 1G"				\
2927"\n\t   0x8 - 2.5G"				\
2928"\n\t  0x10 - 5G"				\
2929"\n\t  0x20 - 10G"				\
2930"\n\t  0x40 - 20G"				\
2931"\n\t  0x80 - 25G"				\
2932"\n\t 0x100 - 40G"				\
2933"\n\t 0x200 - 50G"				\
2934"\n\t 0x400 - 100G"				\
2935"\n\t0x8000 - Unknown"				\
2936"\n\t"						\
2937"\nUse \"sysctl -x\" to view flags properly."
2938
2939#define ICE_PHYS_100MB			\
2940    (ICE_PHY_TYPE_LOW_100BASE_TX |	\
2941     ICE_PHY_TYPE_LOW_100M_SGMII)
2942#define ICE_PHYS_1000MB			\
2943    (ICE_PHY_TYPE_LOW_1000BASE_T |	\
2944     ICE_PHY_TYPE_LOW_1000BASE_SX |	\
2945     ICE_PHY_TYPE_LOW_1000BASE_LX |	\
2946     ICE_PHY_TYPE_LOW_1000BASE_KX |	\
2947     ICE_PHY_TYPE_LOW_1G_SGMII)
2948#define ICE_PHYS_2500MB			\
2949    (ICE_PHY_TYPE_LOW_2500BASE_T |	\
2950     ICE_PHY_TYPE_LOW_2500BASE_X |	\
2951     ICE_PHY_TYPE_LOW_2500BASE_KX)
2952#define ICE_PHYS_5GB			\
2953    (ICE_PHY_TYPE_LOW_5GBASE_T |	\
2954     ICE_PHY_TYPE_LOW_5GBASE_KR)
2955#define ICE_PHYS_10GB			\
2956    (ICE_PHY_TYPE_LOW_10GBASE_T |	\
2957     ICE_PHY_TYPE_LOW_10G_SFI_DA |	\
2958     ICE_PHY_TYPE_LOW_10GBASE_SR |	\
2959     ICE_PHY_TYPE_LOW_10GBASE_LR |	\
2960     ICE_PHY_TYPE_LOW_10GBASE_KR_CR1 |	\
2961     ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC |	\
2962     ICE_PHY_TYPE_LOW_10G_SFI_C2C)
2963#define ICE_PHYS_25GB			\
2964    (ICE_PHY_TYPE_LOW_25GBASE_T |	\
2965     ICE_PHY_TYPE_LOW_25GBASE_CR |	\
2966     ICE_PHY_TYPE_LOW_25GBASE_CR_S |	\
2967     ICE_PHY_TYPE_LOW_25GBASE_CR1 |	\
2968     ICE_PHY_TYPE_LOW_25GBASE_SR |	\
2969     ICE_PHY_TYPE_LOW_25GBASE_LR |	\
2970     ICE_PHY_TYPE_LOW_25GBASE_KR |	\
2971     ICE_PHY_TYPE_LOW_25GBASE_KR_S |	\
2972     ICE_PHY_TYPE_LOW_25GBASE_KR1 |	\
2973     ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC |	\
2974     ICE_PHY_TYPE_LOW_25G_AUI_C2C)
2975#define ICE_PHYS_40GB			\
2976    (ICE_PHY_TYPE_LOW_40GBASE_CR4 |	\
2977     ICE_PHY_TYPE_LOW_40GBASE_SR4 |	\
2978     ICE_PHY_TYPE_LOW_40GBASE_LR4 |	\
2979     ICE_PHY_TYPE_LOW_40GBASE_KR4 |	\
2980     ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC | \
2981     ICE_PHY_TYPE_LOW_40G_XLAUI)
2982#define ICE_PHYS_50GB			\
2983    (ICE_PHY_TYPE_LOW_50GBASE_CR2 |	\
2984     ICE_PHY_TYPE_LOW_50GBASE_SR2 |	\
2985     ICE_PHY_TYPE_LOW_50GBASE_LR2 |	\
2986     ICE_PHY_TYPE_LOW_50GBASE_KR2 |	\
2987     ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC | \
2988     ICE_PHY_TYPE_LOW_50G_LAUI2 |	\
2989     ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC | \
2990     ICE_PHY_TYPE_LOW_50G_AUI2 |	\
2991     ICE_PHY_TYPE_LOW_50GBASE_CP |	\
2992     ICE_PHY_TYPE_LOW_50GBASE_SR |	\
2993     ICE_PHY_TYPE_LOW_50GBASE_FR |	\
2994     ICE_PHY_TYPE_LOW_50GBASE_LR |	\
2995     ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4 |	\
2996     ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC | \
2997     ICE_PHY_TYPE_LOW_50G_AUI1)
2998#define ICE_PHYS_100GB_LOW		\
2999    (ICE_PHY_TYPE_LOW_100GBASE_CR4 |	\
3000     ICE_PHY_TYPE_LOW_100GBASE_SR4 |	\
3001     ICE_PHY_TYPE_LOW_100GBASE_LR4 |	\
3002     ICE_PHY_TYPE_LOW_100GBASE_KR4 |	\
3003     ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC | \
3004     ICE_PHY_TYPE_LOW_100G_CAUI4 |	\
3005     ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC | \
3006     ICE_PHY_TYPE_LOW_100G_AUI4 |	\
3007     ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4 | \
3008     ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4 | \
3009     ICE_PHY_TYPE_LOW_100GBASE_CP2 |	\
3010     ICE_PHY_TYPE_LOW_100GBASE_SR2 |	\
3011     ICE_PHY_TYPE_LOW_100GBASE_DR)
3012#define ICE_PHYS_100GB_HIGH		\
3013    (ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4 | \
3014     ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC | \
3015     ICE_PHY_TYPE_HIGH_100G_CAUI2 |	\
3016     ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC | \
3017     ICE_PHY_TYPE_HIGH_100G_AUI2)
3018
3019/**
3020 * ice_aq_phy_types_to_link_speeds - Convert the PHY Types to speeds
3021 * @phy_type_low: lower 64-bit PHY Type bitmask
3022 * @phy_type_high: upper 64-bit PHY Type bitmask
3023 *
3024 * Convert the PHY Type fields from Get PHY Abilities and Set PHY Config into
3025 * link speed flags. If phy_type_high has an unknown PHY type, then the return
3026 * value will include the "ICE_AQ_LINK_SPEED_UNKNOWN" flag as well.
3027 */
3028static u16
3029ice_aq_phy_types_to_link_speeds(u64 phy_type_low, u64 phy_type_high)
3030{
3031	u16 sysctl_speeds = 0;
3032	int bit;
3033
3034	/* coverity[address_of] */
3035	for_each_set_bit(bit, &phy_type_low, 64)
3036		sysctl_speeds |= phy_link_speeds[bit];
3037
3038	/* coverity[address_of] */
3039	for_each_set_bit(bit, &phy_type_high, 64) {
3040		if ((bit + 64) < (int)ARRAY_SIZE(phy_link_speeds))
3041			sysctl_speeds |= phy_link_speeds[bit + 64];
3042		else
3043			sysctl_speeds |= ICE_AQ_LINK_SPEED_UNKNOWN;
3044	}
3045
3046	return (sysctl_speeds);
3047}
3048
3049/**
3050 * ice_sysctl_speeds_to_aq_phy_types - Convert sysctl speed flags to AQ PHY flags
3051 * @sysctl_speeds: 16-bit sysctl speeds or AQ_LINK_SPEED flags
3052 * @phy_type_low: output parameter for lower AQ PHY flags
3053 * @phy_type_high: output parameter for higher AQ PHY flags
3054 *
3055 * Converts the given link speed flags into AQ PHY type flag sets appropriate
3056 * for use in a Set PHY Config command.
3057 */
3058static void
3059ice_sysctl_speeds_to_aq_phy_types(u16 sysctl_speeds, u64 *phy_type_low,
3060				  u64 *phy_type_high)
3061{
3062	*phy_type_low = 0, *phy_type_high = 0;
3063
3064	if (sysctl_speeds & ICE_AQ_LINK_SPEED_100MB)
3065		*phy_type_low |= ICE_PHYS_100MB;
3066	if (sysctl_speeds & ICE_AQ_LINK_SPEED_1000MB)
3067		*phy_type_low |= ICE_PHYS_1000MB;
3068	if (sysctl_speeds & ICE_AQ_LINK_SPEED_2500MB)
3069		*phy_type_low |= ICE_PHYS_2500MB;
3070	if (sysctl_speeds & ICE_AQ_LINK_SPEED_5GB)
3071		*phy_type_low |= ICE_PHYS_5GB;
3072	if (sysctl_speeds & ICE_AQ_LINK_SPEED_10GB)
3073		*phy_type_low |= ICE_PHYS_10GB;
3074	if (sysctl_speeds & ICE_AQ_LINK_SPEED_25GB)
3075		*phy_type_low |= ICE_PHYS_25GB;
3076	if (sysctl_speeds & ICE_AQ_LINK_SPEED_40GB)
3077		*phy_type_low |= ICE_PHYS_40GB;
3078	if (sysctl_speeds & ICE_AQ_LINK_SPEED_50GB)
3079		*phy_type_low |= ICE_PHYS_50GB;
3080	if (sysctl_speeds & ICE_AQ_LINK_SPEED_100GB) {
3081		*phy_type_low |= ICE_PHYS_100GB_LOW;
3082		*phy_type_high |= ICE_PHYS_100GB_HIGH;
3083	}
3084}
3085
3086/**
3087 * @struct ice_phy_data
3088 * @brief PHY caps and link speeds
3089 *
3090 * Buffer providing report mode and user speeds;
3091 * returning intersection of PHY types and speeds.
3092 */
3093struct ice_phy_data {
3094	u64 phy_low_orig;     /* PHY low quad from report */
3095	u64 phy_high_orig;    /* PHY high quad from report */
3096	u64 phy_low_intr;     /* PHY low quad intersection with user speeds */
3097	u64 phy_high_intr;    /* PHY high quad intersection with user speeds */
3098	u16 user_speeds_orig; /* Input from caller - See ICE_AQ_LINK_SPEED_* */
3099	u16 user_speeds_intr; /* Intersect with report speeds */
3100	u8 report_mode;       /* See ICE_AQC_REPORT_* */
3101};
3102
3103/**
3104 * ice_intersect_phy_types_and_speeds - Return intersection of link speeds
3105 * @sc: device private structure
3106 * @phy_data: device PHY data
3107 *
3108 * On read: Displays the currently supported speeds
3109 * On write: Sets the device's supported speeds
3110 * Valid input flags: see ICE_SYSCTL_HELP_ADVERTISE_SPEED
3111 */
3112static int
3113ice_intersect_phy_types_and_speeds(struct ice_softc *sc,
3114				   struct ice_phy_data *phy_data)
3115{
3116	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
3117	const char *report_types[5] = { "w/o MEDIA",
3118					"w/MEDIA",
3119					"ACTIVE",
3120					"EDOOFUS", /* Not used */
3121					"DFLT" };
3122	struct ice_hw *hw = &sc->hw;
3123	struct ice_port_info *pi = hw->port_info;
3124	enum ice_status status;
3125	u16 report_speeds, temp_speeds;
3126	u8 report_type;
3127	bool apply_speed_filter = false;
3128
3129	switch (phy_data->report_mode) {
3130	case ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA:
3131	case ICE_AQC_REPORT_TOPO_CAP_MEDIA:
3132	case ICE_AQC_REPORT_ACTIVE_CFG:
3133	case ICE_AQC_REPORT_DFLT_CFG:
3134		report_type = phy_data->report_mode >> 1;
3135		break;
3136	default:
3137		device_printf(sc->dev,
3138		    "%s: phy_data.report_mode \"%u\" doesn't exist\n",
3139		    __func__, phy_data->report_mode);
3140		return (EINVAL);
3141	}
3142
3143	/* 0 is treated as "Auto"; the driver will handle selecting the
3144	 * correct speeds. Including, in some cases, applying an override
3145	 * if provided.
3146	 */
3147	if (phy_data->user_speeds_orig == 0)
3148		phy_data->user_speeds_orig = USHRT_MAX;
3149	else if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE))
3150		apply_speed_filter = true;
3151
3152	status = ice_aq_get_phy_caps(pi, false, phy_data->report_mode, &pcaps, NULL);
3153	if (status != ICE_SUCCESS) {
3154		device_printf(sc->dev,
3155		    "%s: ice_aq_get_phy_caps (%s) failed; status %s, aq_err %s\n",
3156		    __func__, report_types[report_type],
3157		    ice_status_str(status),
3158		    ice_aq_str(sc->hw.adminq.sq_last_status));
3159		return (EIO);
3160	}
3161
3162	phy_data->phy_low_orig = le64toh(pcaps.phy_type_low);
3163	phy_data->phy_high_orig = le64toh(pcaps.phy_type_high);
3164	report_speeds = ice_aq_phy_types_to_link_speeds(phy_data->phy_low_orig,
3165	    phy_data->phy_high_orig);
3166	if (apply_speed_filter) {
3167		temp_speeds = ice_apply_supported_speed_filter(report_speeds,
3168		    pcaps.module_type[0]);
3169		if ((phy_data->user_speeds_orig & temp_speeds) == 0) {
3170			device_printf(sc->dev,
3171			    "User-specified speeds (\"0x%04X\") not supported\n",
3172			    phy_data->user_speeds_orig);
3173			return (EINVAL);
3174		}
3175		report_speeds = temp_speeds;
3176	}
3177	ice_sysctl_speeds_to_aq_phy_types(phy_data->user_speeds_orig,
3178	    &phy_data->phy_low_intr, &phy_data->phy_high_intr);
3179	phy_data->user_speeds_intr = phy_data->user_speeds_orig & report_speeds;
3180	phy_data->phy_low_intr &= phy_data->phy_low_orig;
3181	phy_data->phy_high_intr &= phy_data->phy_high_orig;
3182
3183	return (0);
3184 }
3185
3186/**
3187 * ice_sysctl_advertise_speed - Display/change link speeds supported by port
3188 * @oidp: sysctl oid structure
3189 * @arg1: pointer to private data structure
3190 * @arg2: unused
3191 * @req: sysctl request pointer
3192 *
3193 * On read: Displays the currently supported speeds
3194 * On write: Sets the device's supported speeds
3195 * Valid input flags: see ICE_SYSCTL_HELP_ADVERTISE_SPEED
3196 */
3197static int
3198ice_sysctl_advertise_speed(SYSCTL_HANDLER_ARGS)
3199{
3200	struct ice_softc *sc = (struct ice_softc *)arg1;
3201	struct ice_port_info *pi = sc->hw.port_info;
3202	struct ice_phy_data phy_data = { 0 };
3203	device_t dev = sc->dev;
3204	u16 sysctl_speeds;
3205	int ret;
3206
3207	UNREFERENCED_PARAMETER(arg2);
3208
3209	if (ice_driver_is_detaching(sc))
3210		return (ESHUTDOWN);
3211
3212	/* Get the current speeds from the adapter's "active" configuration. */
3213	phy_data.report_mode = ICE_AQC_REPORT_ACTIVE_CFG;
3214	ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
3215	if (ret) {
3216		/* Error message already printed within function */
3217		return (ret);
3218	}
3219
3220	sysctl_speeds = phy_data.user_speeds_intr;
3221
3222	ret = sysctl_handle_16(oidp, &sysctl_speeds, 0, req);
3223	if ((ret) || (req->newptr == NULL))
3224		return (ret);
3225
3226	if (sysctl_speeds > 0x7FF) {
3227		device_printf(dev,
3228			      "%s: \"%u\" is outside of the range of acceptable values.\n",
3229			      __func__, sysctl_speeds);
3230		return (EINVAL);
3231	}
3232
3233	pi->phy.curr_user_speed_req = sysctl_speeds;
3234
3235	if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) && !sc->link_up)
3236		return 0;
3237
3238	/* Apply settings requested by user */
3239	return ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS);
3240}
3241
3242#define ICE_SYSCTL_HELP_FEC_CONFIG			\
3243"\nDisplay or set the port's requested FEC mode."	\
3244"\n\tauto - " ICE_FEC_STRING_AUTO			\
3245"\n\tfc - " ICE_FEC_STRING_BASER			\
3246"\n\trs - " ICE_FEC_STRING_RS				\
3247"\n\tnone - " ICE_FEC_STRING_NONE			\
3248"\nEither of the left or right strings above can be used to set the requested mode."
3249
3250/**
3251 * ice_sysctl_fec_config - Display/change the configured FEC mode
3252 * @oidp: sysctl oid structure
3253 * @arg1: pointer to private data structure
3254 * @arg2: unused
3255 * @req: sysctl request pointer
3256 *
3257 * On read: Displays the configured FEC mode
3258 * On write: Sets the device's FEC mode to the input string, if it's valid.
3259 * Valid input strings: see ICE_SYSCTL_HELP_FEC_CONFIG
3260 */
3261static int
3262ice_sysctl_fec_config(SYSCTL_HANDLER_ARGS)
3263{
3264	struct ice_softc *sc = (struct ice_softc *)arg1;
3265	struct ice_port_info *pi = sc->hw.port_info;
3266	enum ice_fec_mode new_mode;
3267	device_t dev = sc->dev;
3268	char req_fec[32];
3269	int ret;
3270
3271	UNREFERENCED_PARAMETER(arg2);
3272
3273	if (ice_driver_is_detaching(sc))
3274		return (ESHUTDOWN);
3275
3276	bzero(req_fec, sizeof(req_fec));
3277	strlcpy(req_fec, ice_requested_fec_mode(pi), sizeof(req_fec));
3278
3279	ret = sysctl_handle_string(oidp, req_fec, sizeof(req_fec), req);
3280	if ((ret) || (req->newptr == NULL))
3281		return (ret);
3282
3283	if (strcmp(req_fec, "auto") == 0 ||
3284	    strcmp(req_fec, ice_fec_str(ICE_FEC_AUTO)) == 0) {
3285		if (sc->allow_no_fec_mod_in_auto)
3286			new_mode = ICE_FEC_DIS_AUTO;
3287		else
3288			new_mode = ICE_FEC_AUTO;
3289	} else if (strcmp(req_fec, "fc") == 0 ||
3290	    strcmp(req_fec, ice_fec_str(ICE_FEC_BASER)) == 0) {
3291		new_mode = ICE_FEC_BASER;
3292	} else if (strcmp(req_fec, "rs") == 0 ||
3293	    strcmp(req_fec, ice_fec_str(ICE_FEC_RS)) == 0) {
3294		new_mode = ICE_FEC_RS;
3295	} else if (strcmp(req_fec, "none") == 0 ||
3296	    strcmp(req_fec, ice_fec_str(ICE_FEC_NONE)) == 0) {
3297		new_mode = ICE_FEC_NONE;
3298	} else {
3299		device_printf(dev,
3300		    "%s: \"%s\" is not a valid FEC mode\n",
3301		    __func__, req_fec);
3302		return (EINVAL);
3303	}
3304
3305	/* Cache user FEC mode for later link ups */
3306	pi->phy.curr_user_fec_req = new_mode;
3307
3308	if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) && !sc->link_up)
3309		return 0;
3310
3311	/* Apply settings requested by user */
3312	return ice_apply_saved_phy_cfg(sc, ICE_APPLY_FEC);
3313}
3314
3315/**
3316 * ice_sysctl_negotiated_fec - Display the negotiated FEC mode on the link
3317 * @oidp: sysctl oid structure
3318 * @arg1: pointer to private data structure
3319 * @arg2: unused
3320 * @req: sysctl request pointer
3321 *
3322 * On read: Displays the negotiated FEC mode, in a string
3323 */
3324static int
3325ice_sysctl_negotiated_fec(SYSCTL_HANDLER_ARGS)
3326{
3327	struct ice_softc *sc = (struct ice_softc *)arg1;
3328	struct ice_hw *hw = &sc->hw;
3329	char neg_fec[32];
3330	int ret;
3331
3332	UNREFERENCED_PARAMETER(arg2);
3333
3334	if (ice_driver_is_detaching(sc))
3335		return (ESHUTDOWN);
3336
3337	/* Copy const string into a buffer to drop const qualifier */
3338	bzero(neg_fec, sizeof(neg_fec));
3339	strlcpy(neg_fec, ice_negotiated_fec_mode(hw->port_info), sizeof(neg_fec));
3340
3341	ret = sysctl_handle_string(oidp, neg_fec, 0, req);
3342	if (req->newptr != NULL)
3343		return (EPERM);
3344
3345	return (ret);
3346}
3347
3348#define ICE_SYSCTL_HELP_FC_CONFIG				\
3349"\nDisplay or set the port's advertised flow control mode.\n"	\
3350"\t0 - " ICE_FC_STRING_NONE					\
3351"\n\t1 - " ICE_FC_STRING_RX					\
3352"\n\t2 - " ICE_FC_STRING_TX					\
3353"\n\t3 - " ICE_FC_STRING_FULL					\
3354"\nEither the numbers or the strings above can be used to set the advertised mode."
3355
3356/**
3357 * ice_sysctl_fc_config - Display/change the advertised flow control mode
3358 * @oidp: sysctl oid structure
3359 * @arg1: pointer to private data structure
3360 * @arg2: unused
3361 * @req: sysctl request pointer
3362 *
3363 * On read: Displays the configured flow control mode
3364 * On write: Sets the device's flow control mode to the input, if it's valid.
3365 * Valid input strings: see ICE_SYSCTL_HELP_FC_CONFIG
3366 */
3367static int
3368ice_sysctl_fc_config(SYSCTL_HANDLER_ARGS)
3369{
3370	struct ice_softc *sc = (struct ice_softc *)arg1;
3371	struct ice_port_info *pi = sc->hw.port_info;
3372	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
3373	enum ice_fc_mode old_mode, new_mode;
3374	struct ice_hw *hw = &sc->hw;
3375	device_t dev = sc->dev;
3376	enum ice_status status;
3377	int ret, fc_num;
3378	bool mode_set = false;
3379	struct sbuf buf;
3380	char *fc_str_end;
3381	char fc_str[32];
3382
3383	UNREFERENCED_PARAMETER(arg2);
3384
3385	if (ice_driver_is_detaching(sc))
3386		return (ESHUTDOWN);
3387
3388	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
3389				     &pcaps, NULL);
3390	if (status != ICE_SUCCESS) {
3391		device_printf(dev,
3392		    "%s: ice_aq_get_phy_caps failed; status %s, aq_err %s\n",
3393		    __func__, ice_status_str(status),
3394		    ice_aq_str(hw->adminq.sq_last_status));
3395		return (EIO);
3396	}
3397
3398	/* Convert HW response format to SW enum value */
3399	if ((pcaps.caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE) &&
3400	    (pcaps.caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE))
3401		old_mode = ICE_FC_FULL;
3402	else if (pcaps.caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE)
3403		old_mode = ICE_FC_TX_PAUSE;
3404	else if (pcaps.caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)
3405		old_mode = ICE_FC_RX_PAUSE;
3406	else
3407		old_mode = ICE_FC_NONE;
3408
3409	/* Create "old" string for output */
3410	bzero(fc_str, sizeof(fc_str));
3411	sbuf_new_for_sysctl(&buf, fc_str, sizeof(fc_str), req);
3412	sbuf_printf(&buf, "%d<%s>", old_mode, ice_fc_str(old_mode));
3413	sbuf_finish(&buf);
3414	sbuf_delete(&buf);
3415
3416	ret = sysctl_handle_string(oidp, fc_str, sizeof(fc_str), req);
3417	if ((ret) || (req->newptr == NULL))
3418		return (ret);
3419
3420	/* Try to parse input as a string, first */
3421	if (strcasecmp(ice_fc_str(ICE_FC_FULL), fc_str) == 0) {
3422		new_mode = ICE_FC_FULL;
3423		mode_set = true;
3424	}
3425	else if (strcasecmp(ice_fc_str(ICE_FC_TX_PAUSE), fc_str) == 0) {
3426		new_mode = ICE_FC_TX_PAUSE;
3427		mode_set = true;
3428	}
3429	else if (strcasecmp(ice_fc_str(ICE_FC_RX_PAUSE), fc_str) == 0) {
3430		new_mode = ICE_FC_RX_PAUSE;
3431		mode_set = true;
3432	}
3433	else if (strcasecmp(ice_fc_str(ICE_FC_NONE), fc_str) == 0) {
3434		new_mode = ICE_FC_NONE;
3435		mode_set = true;
3436	}
3437
3438	/*
3439	 * Then check if it's an integer, for compatibility with the method
3440	 * used in older drivers.
3441	 */
3442	if (!mode_set) {
3443		fc_num = strtol(fc_str, &fc_str_end, 0);
3444		if (fc_str_end == fc_str)
3445			fc_num = -1;
3446		switch (fc_num) {
3447		case 3:
3448			new_mode = ICE_FC_FULL;
3449			break;
3450		case 2:
3451			new_mode = ICE_FC_TX_PAUSE;
3452			break;
3453		case 1:
3454			new_mode = ICE_FC_RX_PAUSE;
3455			break;
3456		case 0:
3457			new_mode = ICE_FC_NONE;
3458			break;
3459		default:
3460			device_printf(dev,
3461			    "%s: \"%s\" is not a valid flow control mode\n",
3462			    __func__, fc_str);
3463			return (EINVAL);
3464		}
3465	}
3466
3467	/* Save flow control mode from user */
3468	pi->phy.curr_user_fc_req = new_mode;
3469
3470	/* Turn off Priority Flow Control when Link Flow Control is enabled */
3471	if ((hw->port_info->qos_cfg.is_sw_lldp) &&
3472	    (hw->port_info->qos_cfg.local_dcbx_cfg.pfc.pfcena != 0) &&
3473	    (new_mode != ICE_FC_NONE)) {
3474		ret = ice_config_pfc(sc, 0x0);
3475		if (ret)
3476			return (ret);
3477	}
3478
3479	if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) && !sc->link_up)
3480		return 0;
3481
3482	/* Apply settings requested by user */
3483	return ice_apply_saved_phy_cfg(sc, ICE_APPLY_FC);
3484}
3485
3486/**
3487 * ice_sysctl_negotiated_fc - Display currently negotiated FC mode
3488 * @oidp: sysctl oid structure
3489 * @arg1: pointer to private data structure
3490 * @arg2: unused
3491 * @req: sysctl request pointer
3492 *
3493 * On read: Displays the currently negotiated flow control settings.
3494 *
3495 * If link is not established, this will report ICE_FC_NONE, as no flow
3496 * control is negotiated while link is down.
3497 */
3498static int
3499ice_sysctl_negotiated_fc(SYSCTL_HANDLER_ARGS)
3500{
3501	struct ice_softc *sc = (struct ice_softc *)arg1;
3502	struct ice_port_info *pi = sc->hw.port_info;
3503	const char *negotiated_fc;
3504
3505	UNREFERENCED_PARAMETER(arg2);
3506
3507	if (ice_driver_is_detaching(sc))
3508		return (ESHUTDOWN);
3509
3510	negotiated_fc = ice_flowcontrol_mode(pi);
3511
3512	return sysctl_handle_string(oidp, __DECONST(char *, negotiated_fc), 0, req);
3513}
3514
3515/**
3516 * __ice_sysctl_phy_type_handler - Display/change supported PHY types/speeds
3517 * @oidp: sysctl oid structure
3518 * @arg1: pointer to private data structure
3519 * @arg2: unused
3520 * @req: sysctl request pointer
3521 * @is_phy_type_high: if true, handle the high PHY type instead of the low PHY type
3522 *
3523 * Private handler for phy_type_high and phy_type_low sysctls.
3524 */
3525static int
3526__ice_sysctl_phy_type_handler(SYSCTL_HANDLER_ARGS, bool is_phy_type_high)
3527{
3528	struct ice_softc *sc = (struct ice_softc *)arg1;
3529	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
3530	struct ice_aqc_set_phy_cfg_data cfg = { 0 };
3531	struct ice_hw *hw = &sc->hw;
3532	device_t dev = sc->dev;
3533	enum ice_status status;
3534	uint64_t types;
3535	int ret;
3536
3537	UNREFERENCED_PARAMETER(arg2);
3538
3539	if (ice_driver_is_detaching(sc))
3540		return (ESHUTDOWN);
3541
3542	status = ice_aq_get_phy_caps(hw->port_info, false, ICE_AQC_REPORT_ACTIVE_CFG,
3543				     &pcaps, NULL);
3544	if (status != ICE_SUCCESS) {
3545		device_printf(dev,
3546		    "%s: ice_aq_get_phy_caps failed; status %s, aq_err %s\n",
3547		    __func__, ice_status_str(status),
3548		    ice_aq_str(hw->adminq.sq_last_status));
3549		return (EIO);
3550	}
3551
3552	if (is_phy_type_high)
3553		types = pcaps.phy_type_high;
3554	else
3555		types = pcaps.phy_type_low;
3556
3557	ret = sysctl_handle_64(oidp, &types, sizeof(types), req);
3558	if ((ret) || (req->newptr == NULL))
3559		return (ret);
3560
3561	ice_copy_phy_caps_to_cfg(hw->port_info, &pcaps, &cfg);
3562
3563	if (is_phy_type_high)
3564		cfg.phy_type_high = types & hw->port_info->phy.phy_type_high;
3565	else
3566		cfg.phy_type_low = types & hw->port_info->phy.phy_type_low;
3567	cfg.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
3568
3569	status = ice_aq_set_phy_cfg(hw, hw->port_info, &cfg, NULL);
3570	if (status != ICE_SUCCESS) {
3571		device_printf(dev,
3572		    "%s: ice_aq_set_phy_cfg failed; status %s, aq_err %s\n",
3573		    __func__, ice_status_str(status),
3574		    ice_aq_str(hw->adminq.sq_last_status));
3575		return (EIO);
3576	}
3577
3578	return (0);
3579
3580}
3581
3582/**
3583 * ice_sysctl_phy_type_low - Display/change supported lower PHY types/speeds
3584 * @oidp: sysctl oid structure
3585 * @arg1: pointer to private data structure
3586 * @arg2: unused
3587 * @req: sysctl request pointer
3588 *
3589 * On read: Displays the currently supported lower PHY types
3590 * On write: Sets the device's supported low PHY types
3591 */
3592static int
3593ice_sysctl_phy_type_low(SYSCTL_HANDLER_ARGS)
3594{
3595	return __ice_sysctl_phy_type_handler(oidp, arg1, arg2, req, false);
3596}
3597
3598/**
3599 * ice_sysctl_phy_type_high - Display/change supported higher PHY types/speeds
3600 * @oidp: sysctl oid structure
3601 * @arg1: pointer to private data structure
3602 * @arg2: unused
3603 * @req: sysctl request pointer
3604 *
3605 * On read: Displays the currently supported higher PHY types
3606 * On write: Sets the device's supported high PHY types
3607 */
3608static int
3609ice_sysctl_phy_type_high(SYSCTL_HANDLER_ARGS)
3610{
3611	return __ice_sysctl_phy_type_handler(oidp, arg1, arg2, req, true);
3612}
3613
3614/**
3615 * ice_sysctl_phy_caps - Display response from Get PHY abililties
3616 * @oidp: sysctl oid structure
3617 * @arg1: pointer to private data structure
3618 * @arg2: unused
3619 * @req: sysctl request pointer
3620 * @report_mode: the mode to report
3621 *
3622 * On read: Display the response from Get PHY abillities with the given report
3623 * mode.
3624 */
3625static int
3626ice_sysctl_phy_caps(SYSCTL_HANDLER_ARGS, u8 report_mode)
3627{
3628	struct ice_softc *sc = (struct ice_softc *)arg1;
3629	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
3630	struct ice_hw *hw = &sc->hw;
3631	struct ice_port_info *pi = hw->port_info;
3632	device_t dev = sc->dev;
3633	enum ice_status status;
3634	int ret;
3635
3636	UNREFERENCED_PARAMETER(arg2);
3637
3638	ret = priv_check(curthread, PRIV_DRIVER);
3639	if (ret)
3640		return (ret);
3641
3642	if (ice_driver_is_detaching(sc))
3643		return (ESHUTDOWN);
3644
3645	status = ice_aq_get_phy_caps(pi, true, report_mode, &pcaps, NULL);
3646	if (status != ICE_SUCCESS) {
3647		device_printf(dev,
3648		    "%s: ice_aq_get_phy_caps failed; status %s, aq_err %s\n",
3649		    __func__, ice_status_str(status),
3650		    ice_aq_str(hw->adminq.sq_last_status));
3651		return (EIO);
3652	}
3653
3654	ret = sysctl_handle_opaque(oidp, &pcaps, sizeof(pcaps), req);
3655	if (req->newptr != NULL)
3656		return (EPERM);
3657
3658	return (ret);
3659}
3660
3661/**
3662 * ice_sysctl_phy_sw_caps - Display response from Get PHY abililties
3663 * @oidp: sysctl oid structure
3664 * @arg1: pointer to private data structure
3665 * @arg2: unused
3666 * @req: sysctl request pointer
3667 *
3668 * On read: Display the response from Get PHY abillities reporting the last
3669 * software configuration.
3670 */
3671static int
3672ice_sysctl_phy_sw_caps(SYSCTL_HANDLER_ARGS)
3673{
3674	return ice_sysctl_phy_caps(oidp, arg1, arg2, req,
3675				   ICE_AQC_REPORT_ACTIVE_CFG);
3676}
3677
3678/**
3679 * ice_sysctl_phy_nvm_caps - Display response from Get PHY abililties
3680 * @oidp: sysctl oid structure
3681 * @arg1: pointer to private data structure
3682 * @arg2: unused
3683 * @req: sysctl request pointer
3684 *
3685 * On read: Display the response from Get PHY abillities reporting the NVM
3686 * configuration.
3687 */
3688static int
3689ice_sysctl_phy_nvm_caps(SYSCTL_HANDLER_ARGS)
3690{
3691	return ice_sysctl_phy_caps(oidp, arg1, arg2, req,
3692				   ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA);
3693}
3694
3695/**
3696 * ice_sysctl_phy_topo_caps - Display response from Get PHY abililties
3697 * @oidp: sysctl oid structure
3698 * @arg1: pointer to private data structure
3699 * @arg2: unused
3700 * @req: sysctl request pointer
3701 *
3702 * On read: Display the response from Get PHY abillities reporting the
3703 * topology configuration.
3704 */
3705static int
3706ice_sysctl_phy_topo_caps(SYSCTL_HANDLER_ARGS)
3707{
3708	return ice_sysctl_phy_caps(oidp, arg1, arg2, req,
3709				   ICE_AQC_REPORT_TOPO_CAP_MEDIA);
3710}
3711
3712/**
3713 * ice_sysctl_phy_link_status - Display response from Get Link Status
3714 * @oidp: sysctl oid structure
3715 * @arg1: pointer to private data structure
3716 * @arg2: unused
3717 * @req: sysctl request pointer
3718 *
3719 * On read: Display the response from firmware for the Get Link Status
3720 * request.
3721 */
3722static int
3723ice_sysctl_phy_link_status(SYSCTL_HANDLER_ARGS)
3724{
3725	struct ice_aqc_get_link_status_data link_data = { 0 };
3726	struct ice_softc *sc = (struct ice_softc *)arg1;
3727	struct ice_hw *hw = &sc->hw;
3728	struct ice_port_info *pi = hw->port_info;
3729	struct ice_aqc_get_link_status *resp;
3730	struct ice_aq_desc desc;
3731	device_t dev = sc->dev;
3732	enum ice_status status;
3733	int ret;
3734
3735	UNREFERENCED_PARAMETER(arg2);
3736
3737	/*
3738	 * Ensure that only contexts with driver privilege are allowed to
3739	 * access this information
3740	 */
3741	ret = priv_check(curthread, PRIV_DRIVER);
3742	if (ret)
3743		return (ret);
3744
3745	if (ice_driver_is_detaching(sc))
3746		return (ESHUTDOWN);
3747
3748	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_status);
3749	resp = &desc.params.get_link_status;
3750	resp->lport_num = pi->lport;
3751
3752	status = ice_aq_send_cmd(hw, &desc, &link_data, sizeof(link_data), NULL);
3753	if (status != ICE_SUCCESS) {
3754		device_printf(dev,
3755		    "%s: ice_aq_send_cmd failed; status %s, aq_err %s\n",
3756		    __func__, ice_status_str(status),
3757		    ice_aq_str(hw->adminq.sq_last_status));
3758		return (EIO);
3759	}
3760
3761	ret = sysctl_handle_opaque(oidp, &link_data, sizeof(link_data), req);
3762	if (req->newptr != NULL)
3763		return (EPERM);
3764
3765	return (ret);
3766}
3767
3768/**
3769 * ice_sysctl_fw_cur_lldp_persist_status - Display current FW LLDP status
3770 * @oidp: sysctl oid structure
3771 * @arg1: pointer to private softc structure
3772 * @arg2: unused
3773 * @req: sysctl request pointer
3774 *
3775 * On read: Displays current persistent LLDP status.
3776 */
3777static int
3778ice_sysctl_fw_cur_lldp_persist_status(SYSCTL_HANDLER_ARGS)
3779{
3780	struct ice_softc *sc = (struct ice_softc *)arg1;
3781	struct ice_hw *hw = &sc->hw;
3782	device_t dev = sc->dev;
3783	enum ice_status status;
3784	struct sbuf *sbuf;
3785	u32 lldp_state;
3786
3787	UNREFERENCED_PARAMETER(arg2);
3788	UNREFERENCED_PARAMETER(oidp);
3789
3790	if (ice_driver_is_detaching(sc))
3791		return (ESHUTDOWN);
3792
3793	status = ice_get_cur_lldp_persist_status(hw, &lldp_state);
3794	if (status) {
3795		device_printf(dev,
3796		    "Could not acquire current LLDP persistence status, err %s aq_err %s\n",
3797		    ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
3798		return (EIO);
3799	}
3800
3801	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
3802	sbuf_printf(sbuf, "%s", ice_fw_lldp_status(lldp_state));
3803	sbuf_finish(sbuf);
3804	sbuf_delete(sbuf);
3805
3806	return (0);
3807}
3808
3809/**
3810 * ice_sysctl_fw_dflt_lldp_persist_status - Display default FW LLDP status
3811 * @oidp: sysctl oid structure
3812 * @arg1: pointer to private softc structure
3813 * @arg2: unused
3814 * @req: sysctl request pointer
3815 *
3816 * On read: Displays default persistent LLDP status.
3817 */
3818static int
3819ice_sysctl_fw_dflt_lldp_persist_status(SYSCTL_HANDLER_ARGS)
3820{
3821	struct ice_softc *sc = (struct ice_softc *)arg1;
3822	struct ice_hw *hw = &sc->hw;
3823	device_t dev = sc->dev;
3824	enum ice_status status;
3825	struct sbuf *sbuf;
3826	u32 lldp_state;
3827
3828	UNREFERENCED_PARAMETER(arg2);
3829	UNREFERENCED_PARAMETER(oidp);
3830
3831	if (ice_driver_is_detaching(sc))
3832		return (ESHUTDOWN);
3833
3834	status = ice_get_dflt_lldp_persist_status(hw, &lldp_state);
3835	if (status) {
3836		device_printf(dev,
3837		    "Could not acquire default LLDP persistence status, err %s aq_err %s\n",
3838		    ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
3839		return (EIO);
3840	}
3841
3842	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
3843	sbuf_printf(sbuf, "%s", ice_fw_lldp_status(lldp_state));
3844	sbuf_finish(sbuf);
3845	sbuf_delete(sbuf);
3846
3847	return (0);
3848}
3849
3850/**
3851 * ice_dscp_is_mapped - Check for non-zero DSCP to TC mappings
3852 * @dcbcfg: Configuration struct to check for mappings in
3853 *
3854 * @return true if there exists a non-zero DSCP to TC mapping
3855 * inside the input DCB configuration struct.
3856 */
3857static bool
3858ice_dscp_is_mapped(struct ice_dcbx_cfg *dcbcfg)
3859{
3860	for (int i = 0; i < ICE_DSCP_NUM_VAL; i++)
3861		if (dcbcfg->dscp_map[i] != 0)
3862			return (true);
3863
3864	return (false);
3865}
3866
3867#define ICE_SYSCTL_HELP_FW_LLDP_AGENT	\
3868"\nDisplay or change FW LLDP agent state:" \
3869"\n\t0 - disabled"			\
3870"\n\t1 - enabled"
3871
3872/**
3873 * ice_sysctl_fw_lldp_agent - Display or change the FW LLDP agent status
3874 * @oidp: sysctl oid structure
3875 * @arg1: pointer to private softc structure
3876 * @arg2: unused
3877 * @req: sysctl request pointer
3878 *
3879 * On read: Displays whether the FW LLDP agent is running
3880 * On write: Persistently enables or disables the FW LLDP agent
3881 */
3882static int
3883ice_sysctl_fw_lldp_agent(SYSCTL_HANDLER_ARGS)
3884{
3885	struct ice_softc *sc = (struct ice_softc *)arg1;
3886	struct ice_dcbx_cfg *local_dcbx_cfg;
3887	struct ice_hw *hw = &sc->hw;
3888	device_t dev = sc->dev;
3889	enum ice_status status;
3890	int ret;
3891	u32 old_state;
3892	u8 fw_lldp_enabled;
3893	bool retried_start_lldp = false;
3894
3895	UNREFERENCED_PARAMETER(arg2);
3896
3897	if (ice_driver_is_detaching(sc))
3898		return (ESHUTDOWN);
3899
3900	status = ice_get_cur_lldp_persist_status(hw, &old_state);
3901	if (status) {
3902		device_printf(dev,
3903		    "Could not acquire current LLDP persistence status, err %s aq_err %s\n",
3904		    ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
3905		return (EIO);
3906	}
3907
3908	if (old_state > ICE_LLDP_ADMINSTATUS_ENA_RXTX) {
3909		status = ice_get_dflt_lldp_persist_status(hw, &old_state);
3910		if (status) {
3911			device_printf(dev,
3912			    "Could not acquire default LLDP persistence status, err %s aq_err %s\n",
3913			    ice_status_str(status),
3914			    ice_aq_str(hw->adminq.sq_last_status));
3915			return (EIO);
3916		}
3917	}
3918	if (old_state == 0)
3919		fw_lldp_enabled = false;
3920	else
3921		fw_lldp_enabled = true;
3922
3923	ret = sysctl_handle_bool(oidp, &fw_lldp_enabled, 0, req);
3924	if ((ret) || (req->newptr == NULL))
3925		return (ret);
3926
3927	if (old_state == 0 && fw_lldp_enabled == false)
3928		return (0);
3929
3930	if (old_state != 0 && fw_lldp_enabled == true)
3931		return (0);
3932
3933	/* Block transition to FW LLDP if DSCP mode is enabled */
3934	local_dcbx_cfg = &hw->port_info->qos_cfg.local_dcbx_cfg;
3935	if ((local_dcbx_cfg->pfc_mode == ICE_QOS_MODE_DSCP) ||
3936	    ice_dscp_is_mapped(local_dcbx_cfg)) {
3937		device_printf(dev,
3938			      "Cannot enable FW-LLDP agent while DSCP QoS is active.\n");
3939		return (EOPNOTSUPP);
3940	}
3941
3942	if (fw_lldp_enabled == false) {
3943		status = ice_aq_stop_lldp(hw, true, true, NULL);
3944		/* EPERM is returned if the LLDP agent is already shutdown */
3945		if (status && hw->adminq.sq_last_status != ICE_AQ_RC_EPERM) {
3946			device_printf(dev,
3947			    "%s: ice_aq_stop_lldp failed; status %s, aq_err %s\n",
3948			    __func__, ice_status_str(status),
3949			    ice_aq_str(hw->adminq.sq_last_status));
3950			return (EIO);
3951		}
3952		ice_aq_set_dcb_parameters(hw, true, NULL);
3953		hw->port_info->qos_cfg.is_sw_lldp = true;
3954		ice_add_rx_lldp_filter(sc);
3955	} else {
3956		ice_del_rx_lldp_filter(sc);
3957retry_start_lldp:
3958		status = ice_aq_start_lldp(hw, true, NULL);
3959		if (status) {
3960			switch (hw->adminq.sq_last_status) {
3961			/* EEXIST is returned if the LLDP agent is already started */
3962			case ICE_AQ_RC_EEXIST:
3963				break;
3964			case ICE_AQ_RC_EAGAIN:
3965				/* Retry command after a 2 second wait */
3966				if (retried_start_lldp == false) {
3967					retried_start_lldp = true;
3968					pause("slldp", ICE_START_LLDP_RETRY_WAIT);
3969					goto retry_start_lldp;
3970				}
3971				/* Fallthrough */
3972			default:
3973				device_printf(dev,
3974				    "%s: ice_aq_start_lldp failed; status %s, aq_err %s\n",
3975				    __func__, ice_status_str(status),
3976				    ice_aq_str(hw->adminq.sq_last_status));
3977				return (EIO);
3978			}
3979		}
3980		ice_start_dcbx_agent(sc);
3981
3982		/* Init DCB needs to be done during enabling LLDP to properly
3983		 * propagate the configuration.
3984		 */
3985		status = ice_init_dcb(hw, true);
3986		if (status) {
3987			device_printf(dev,
3988			    "%s: ice_init_dcb failed; status %s, aq_err %s\n",
3989			    __func__, ice_status_str(status),
3990			    ice_aq_str(hw->adminq.sq_last_status));
3991			hw->port_info->qos_cfg.dcbx_status = ICE_DCBX_STATUS_NOT_STARTED;
3992		}
3993	}
3994
3995	return (ret);
3996}
3997
3998#define ICE_SYSCTL_HELP_ETS_MIN_RATE \
3999"\nIn FW DCB mode (fw_lldp_agent=1), displays the current ETS bandwidth table." \
4000"\nIn SW DCB mode, displays and allows setting the table." \
4001"\nInput must be in the format e.g. 30,10,10,10,10,10,10,10" \
4002"\nWhere the bandwidth total must add up to 100"
4003
4004/**
4005 * ice_sysctl_ets_min_rate - Report/configure ETS bandwidth
4006 * @oidp: sysctl oid structure
4007 * @arg1: pointer to private data structure
4008 * @arg2: unused
4009 * @req: sysctl request pointer
4010 *
4011 * Returns the current ETS TC bandwidth table
4012 * cached by the driver.
4013 *
4014 * In SW DCB mode this sysctl also accepts a value that will
4015 * be sent to the firmware for configuration.
4016 */
4017static int
4018ice_sysctl_ets_min_rate(SYSCTL_HANDLER_ARGS)
4019{
4020	struct ice_softc *sc = (struct ice_softc *)arg1;
4021	struct ice_dcbx_cfg *local_dcbx_cfg;
4022	struct ice_port_info *pi;
4023	struct ice_hw *hw = &sc->hw;
4024	device_t dev = sc->dev;
4025	enum ice_status status;
4026	struct sbuf *sbuf;
4027	int ret;
4028
4029	/* Store input rates from user */
4030	char ets_user_buf[128] = "";
4031	u8 new_ets_table[ICE_MAX_TRAFFIC_CLASS] = {};
4032
4033	UNREFERENCED_PARAMETER(arg2);
4034
4035	if (ice_driver_is_detaching(sc))
4036		return (ESHUTDOWN);
4037
4038	if (req->oldptr == NULL && req->newptr == NULL) {
4039		ret = SYSCTL_OUT(req, 0, 128);
4040		return (ret);
4041	}
4042
4043	pi = hw->port_info;
4044	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4045
4046	sbuf = sbuf_new(NULL, ets_user_buf, 128, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
4047
4048	/* Format ETS BW data for output */
4049	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
4050		sbuf_printf(sbuf, "%d", local_dcbx_cfg->etscfg.tcbwtable[i]);
4051		if (i != ICE_MAX_TRAFFIC_CLASS - 1)
4052			sbuf_printf(sbuf, ",");
4053	}
4054
4055	sbuf_finish(sbuf);
4056	sbuf_delete(sbuf);
4057
4058	/* Read in the new ETS values */
4059	ret = sysctl_handle_string(oidp, ets_user_buf, sizeof(ets_user_buf), req);
4060	if ((ret) || (req->newptr == NULL))
4061		return (ret);
4062
4063	/* Don't allow setting changes in FW DCB mode */
4064	if (!hw->port_info->qos_cfg.is_sw_lldp)
4065		return (EPERM);
4066
4067	ret = ice_ets_str_to_tbl(ets_user_buf, new_ets_table, 100);
4068	if (ret) {
4069		device_printf(dev, "%s: Could not parse input BW table: %s\n",
4070		    __func__, ets_user_buf);
4071		return (ret);
4072	}
4073
4074	if (!ice_check_ets_bw(new_ets_table)) {
4075		device_printf(dev, "%s: Bandwidth sum does not equal 100: %s\n",
4076		    __func__, ets_user_buf);
4077		return (EINVAL);
4078	}
4079
4080	memcpy(local_dcbx_cfg->etscfg.tcbwtable, new_ets_table,
4081	    sizeof(new_ets_table));
4082
4083	/* If BW > 0, then set TSA entry to 2 */
4084	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
4085		if (new_ets_table[i] > 0)
4086			local_dcbx_cfg->etscfg.tsatable[i] = 2;
4087		else
4088			local_dcbx_cfg->etscfg.tsatable[i] = 0;
4089	}
4090	local_dcbx_cfg->etscfg.willing = 0;
4091	local_dcbx_cfg->etsrec = local_dcbx_cfg->etscfg;
4092	local_dcbx_cfg->app_mode = ICE_DCBX_APPS_NON_WILLING;
4093
4094	status = ice_set_dcb_cfg(pi);
4095	if (status) {
4096		device_printf(dev,
4097		    "%s: Failed to set DCB config; status %s, aq_err %s\n",
4098		    __func__, ice_status_str(status),
4099		    ice_aq_str(hw->adminq.sq_last_status));
4100		return (EIO);
4101	}
4102
4103	ice_do_dcb_reconfig(sc, false);
4104
4105	return (0);
4106}
4107
4108#define ICE_SYSCTL_HELP_UP2TC_MAP \
4109"\nIn FW DCB mode (fw_lldp_agent=1), displays the current ETS priority assignment table." \
4110"\nIn SW DCB mode, displays and allows setting the table." \
4111"\nInput must be in this format: 0,1,2,3,4,5,6,7" \
4112"\nWhere the 1st number is the TC for UP0, 2nd number is the TC for UP1, etc"
4113
4114/**
4115 * ice_sysctl_up2tc_map - Report or configure UP2TC mapping
4116 * @oidp: sysctl oid structure
4117 * @arg1: pointer to private data structure
4118 * @arg2: unused
4119 * @req: sysctl request pointer
4120 *
4121 * In FW DCB mode, returns the current ETS prio table /
4122 * UP2TC mapping from the local MIB.
4123 *
4124 * In SW DCB mode this sysctl also accepts a value that will
4125 * be sent to the firmware for configuration.
4126 */
4127static int
4128ice_sysctl_up2tc_map(SYSCTL_HANDLER_ARGS)
4129{
4130	struct ice_softc *sc = (struct ice_softc *)arg1;
4131	struct ice_dcbx_cfg *local_dcbx_cfg;
4132	struct ice_port_info *pi;
4133	struct ice_hw *hw = &sc->hw;
4134	device_t dev = sc->dev;
4135	enum ice_status status;
4136	struct sbuf *sbuf;
4137	int ret;
4138
4139	/* Store input rates from user */
4140	char up2tc_user_buf[128] = "";
4141	/* This array is indexed by UP, not TC */
4142	u8 new_up2tc[ICE_MAX_TRAFFIC_CLASS] = {};
4143
4144	UNREFERENCED_PARAMETER(arg2);
4145
4146	if (ice_driver_is_detaching(sc))
4147		return (ESHUTDOWN);
4148
4149	if (req->oldptr == NULL && req->newptr == NULL) {
4150		ret = SYSCTL_OUT(req, 0, 128);
4151		return (ret);
4152	}
4153
4154	pi = hw->port_info;
4155	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4156
4157	sbuf = sbuf_new(NULL, up2tc_user_buf, 128, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
4158
4159	/* Format ETS Priority Mapping Table for output */
4160	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
4161		sbuf_printf(sbuf, "%d", local_dcbx_cfg->etscfg.prio_table[i]);
4162		if (i != ICE_MAX_TRAFFIC_CLASS - 1)
4163			sbuf_printf(sbuf, ",");
4164	}
4165
4166	sbuf_finish(sbuf);
4167	sbuf_delete(sbuf);
4168
4169	/* Read in the new ETS priority mapping */
4170	ret = sysctl_handle_string(oidp, up2tc_user_buf, sizeof(up2tc_user_buf), req);
4171	if ((ret) || (req->newptr == NULL))
4172		return (ret);
4173
4174	/* Don't allow setting changes in FW DCB mode */
4175	if (!hw->port_info->qos_cfg.is_sw_lldp)
4176		return (EPERM);
4177
4178	ret = ice_ets_str_to_tbl(up2tc_user_buf, new_up2tc, 7);
4179	if (ret) {
4180		device_printf(dev, "%s: Could not parse input priority assignment table: %s\n",
4181		    __func__, up2tc_user_buf);
4182		return (ret);
4183	}
4184
4185	/* Prepare updated ETS CFG/REC TLVs */
4186	memcpy(local_dcbx_cfg->etscfg.prio_table, new_up2tc,
4187	    sizeof(new_up2tc));
4188	memcpy(local_dcbx_cfg->etsrec.prio_table, new_up2tc,
4189	    sizeof(new_up2tc));
4190
4191	status = ice_set_dcb_cfg(pi);
4192	if (status) {
4193		device_printf(dev,
4194		    "%s: Failed to set DCB config; status %s, aq_err %s\n",
4195		    __func__, ice_status_str(status),
4196		    ice_aq_str(hw->adminq.sq_last_status));
4197		return (EIO);
4198	}
4199
4200	ice_do_dcb_reconfig(sc, false);
4201
4202	return (0);
4203}
4204
4205/**
4206 * ice_config_pfc - helper function to set PFC config in FW
4207 * @sc: device private structure
4208 * @new_mode: bit flags indicating PFC status for TCs
4209 *
4210 * @pre must be in SW DCB mode
4211 *
4212 * Configures the driver's local PFC TLV and sends it to the
4213 * FW for configuration, then reconfigures the driver/VSI
4214 * for DCB if needed.
4215 */
4216static int
4217ice_config_pfc(struct ice_softc *sc, u8 new_mode)
4218{
4219	struct ice_dcbx_cfg *local_dcbx_cfg;
4220	struct ice_hw *hw = &sc->hw;
4221	struct ice_port_info *pi;
4222	device_t dev = sc->dev;
4223	enum ice_status status;
4224
4225	pi = hw->port_info;
4226	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4227
4228	/* Prepare updated PFC TLV */
4229	local_dcbx_cfg->pfc.pfcena = new_mode;
4230	local_dcbx_cfg->pfc.pfccap = ICE_MAX_TRAFFIC_CLASS;
4231	local_dcbx_cfg->pfc.willing = 0;
4232	local_dcbx_cfg->pfc.mbc = 0;
4233
4234	/* Warn if PFC is being disabled with RoCE v2 in use */
4235	if (new_mode == 0 && sc->rdma_entry.attached)
4236		device_printf(dev,
4237		    "WARNING: Recommended that Priority Flow Control is enabled when RoCEv2 is in use\n");
4238
4239	status = ice_set_dcb_cfg(pi);
4240	if (status) {
4241		device_printf(dev,
4242		    "%s: Failed to set DCB config; status %s, aq_err %s\n",
4243		    __func__, ice_status_str(status),
4244		    ice_aq_str(hw->adminq.sq_last_status));
4245		return (EIO);
4246	}
4247
4248	ice_do_dcb_reconfig(sc, false);
4249
4250	return (0);
4251}
4252
4253#define ICE_SYSCTL_HELP_PFC_CONFIG \
4254"\nIn FW DCB mode (fw_lldp_agent=1), displays the current Priority Flow Control configuration" \
4255"\nIn SW DCB mode, displays and allows setting the configuration" \
4256"\nInput/Output is in this format: 0xff" \
4257"\nWhere bit position # enables/disables PFC for that Traffic Class #"
4258
4259/**
4260 * ice_sysctl_pfc_config - Report or configure enabled PFC TCs
4261 * @oidp: sysctl oid structure
4262 * @arg1: pointer to private data structure
4263 * @arg2: unused
4264 * @req: sysctl request pointer
4265 *
4266 * In FW DCB mode, returns a bitmap containing the current TCs
4267 * that have PFC enabled on them.
4268 *
4269 * In SW DCB mode this sysctl also accepts a value that will
4270 * be sent to the firmware for configuration.
4271 */
4272static int
4273ice_sysctl_pfc_config(SYSCTL_HANDLER_ARGS)
4274{
4275	struct ice_softc *sc = (struct ice_softc *)arg1;
4276	struct ice_dcbx_cfg *local_dcbx_cfg;
4277	struct ice_port_info *pi;
4278	struct ice_hw *hw = &sc->hw;
4279	int ret;
4280
4281	/* Store input flags from user */
4282	u8 user_pfc;
4283
4284	UNREFERENCED_PARAMETER(arg2);
4285
4286	if (ice_driver_is_detaching(sc))
4287		return (ESHUTDOWN);
4288
4289	if (req->oldptr == NULL && req->newptr == NULL) {
4290		ret = SYSCTL_OUT(req, 0, sizeof(u8));
4291		return (ret);
4292	}
4293
4294	pi = hw->port_info;
4295	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4296
4297	/* Format current PFC enable setting for output */
4298	user_pfc = local_dcbx_cfg->pfc.pfcena;
4299
4300	/* Read in the new PFC config */
4301	ret = sysctl_handle_8(oidp, &user_pfc, 0, req);
4302	if ((ret) || (req->newptr == NULL))
4303		return (ret);
4304
4305	/* Don't allow setting changes in FW DCB mode */
4306	if (!hw->port_info->qos_cfg.is_sw_lldp)
4307		return (EPERM);
4308
4309	/* If LFC is active and PFC is going to be turned on, turn LFC off */
4310	if (user_pfc != 0 && pi->phy.curr_user_fc_req != ICE_FC_NONE) {
4311		pi->phy.curr_user_fc_req = ICE_FC_NONE;
4312		if (ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) ||
4313			 sc->link_up) {
4314			ret = ice_apply_saved_phy_cfg(sc, ICE_APPLY_FC);
4315			if (ret)
4316				return (ret);
4317		}
4318	}
4319
4320	return ice_config_pfc(sc, user_pfc);
4321}
4322
4323#define ICE_SYSCTL_HELP_PFC_MODE \
4324"\nDisplay and set the current QoS mode for the firmware" \
4325"\n\t0: VLAN UP mode" \
4326"\n\t1: DSCP mode"
4327
4328/**
4329 * ice_sysctl_pfc_mode
4330 * @oidp: sysctl oid structure
4331 * @arg1: pointer to private data structure
4332 * @arg2: unused
4333 * @req: sysctl request pointer
4334 *
4335 * Gets and sets whether the port is in DSCP or VLAN PCP-based
4336 * PFC mode. This is also used to set whether DSCP or VLAN PCP
4337 * -based settings are configured for DCB.
4338 */
4339static int
4340ice_sysctl_pfc_mode(SYSCTL_HANDLER_ARGS)
4341{
4342	struct ice_softc *sc = (struct ice_softc *)arg1;
4343	struct ice_dcbx_cfg *local_dcbx_cfg;
4344	struct ice_port_info *pi;
4345	struct ice_hw *hw = &sc->hw;
4346	device_t dev = sc->dev;
4347	enum ice_status status;
4348	u8 user_pfc_mode, aq_pfc_mode;
4349	int ret;
4350
4351	UNREFERENCED_PARAMETER(arg2);
4352
4353	if (ice_driver_is_detaching(sc))
4354		return (ESHUTDOWN);
4355
4356	if (req->oldptr == NULL && req->newptr == NULL) {
4357		ret = SYSCTL_OUT(req, 0, sizeof(u8));
4358		return (ret);
4359	}
4360
4361	pi = hw->port_info;
4362	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
4363
4364	user_pfc_mode = local_dcbx_cfg->pfc_mode;
4365
4366	/* Read in the new mode */
4367	ret = sysctl_handle_8(oidp, &user_pfc_mode, 0, req);
4368	if ((ret) || (req->newptr == NULL))
4369		return (ret);
4370
4371	/* Don't allow setting changes in FW DCB mode */
4372	if (!hw->port_info->qos_cfg.is_sw_lldp)
4373		return (EPERM);
4374
4375	/* Currently, there are only two modes */
4376	switch (user_pfc_mode) {
4377	case 0:
4378		aq_pfc_mode = ICE_AQC_PFC_VLAN_BASED_PFC;
4379		break;
4380	case 1:
4381		aq_pfc_mode = ICE_AQC_PFC_DSCP_BASED_PFC;
4382		break;
4383	default:
4384		device_printf(dev,
4385		    "%s: Valid input range is 0-1 (input %d)\n",
4386		    __func__, user_pfc_mode);
4387		return (EINVAL);
4388	}
4389
4390	status = ice_aq_set_pfc_mode(hw, aq_pfc_mode, NULL);
4391	if (status == ICE_ERR_NOT_SUPPORTED) {
4392		device_printf(dev,
4393		    "%s: Failed to set PFC mode; DCB not supported\n",
4394		    __func__);
4395		return (ENODEV);
4396	}
4397	if (status) {
4398		device_printf(dev,
4399		    "%s: Failed to set PFC mode; status %s, aq_err %s\n",
4400		    __func__, ice_status_str(status),
4401		    ice_aq_str(hw->adminq.sq_last_status));
4402		return (EIO);
4403	}
4404
4405	/* Reset settings to default when mode is changed */
4406	ice_set_default_local_mib_settings(sc);
4407	/* Cache current settings and reconfigure */
4408	local_dcbx_cfg->pfc_mode = user_pfc_mode;
4409	ice_do_dcb_reconfig(sc, false);
4410
4411	return (0);
4412}
4413
4414#define ICE_SYSCTL_HELP_SET_LINK_ACTIVE \
4415"\nKeep link active after setting interface down:" \
4416"\n\t0 - disable" \
4417"\n\t1 - enable"
4418
4419/**
4420 * ice_sysctl_set_link_active
4421 * @oidp: sysctl oid structure
4422 * @arg1: pointer to private data structure
4423 * @arg2: unused
4424 * @req: sysctl request pointer
4425 *
4426 * Set the link_active_on_if_down sysctl flag.
4427 */
4428static int
4429ice_sysctl_set_link_active(SYSCTL_HANDLER_ARGS)
4430{
4431	struct ice_softc *sc = (struct ice_softc *)arg1;
4432	bool mode;
4433	int ret;
4434
4435	UNREFERENCED_PARAMETER(arg2);
4436
4437	mode = ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN);
4438
4439	ret = sysctl_handle_bool(oidp, &mode, 0, req);
4440	if ((ret) || (req->newptr == NULL))
4441		return (ret);
4442
4443	if (mode)
4444		ice_set_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN);
4445	else
4446		ice_clear_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN);
4447
4448	return (0);
4449}
4450
4451/**
4452 * ice_sysctl_debug_set_link
4453 * @oidp: sysctl oid structure
4454 * @arg1: pointer to private data structure
4455 * @arg2: unused
4456 * @req: sysctl request pointer
4457 *
4458 * Set link up/down in debug session.
4459 */
4460static int
4461ice_sysctl_debug_set_link(SYSCTL_HANDLER_ARGS)
4462{
4463	struct ice_softc *sc = (struct ice_softc *)arg1;
4464	bool mode;
4465	int ret;
4466
4467	UNREFERENCED_PARAMETER(arg2);
4468
4469	ret = sysctl_handle_bool(oidp, &mode, 0, req);
4470	if ((ret) || (req->newptr == NULL))
4471		return (ret);
4472
4473	ice_set_link(sc, mode != 0);
4474
4475	return (0);
4476}
4477
4478/**
4479 * ice_add_device_sysctls - add device specific dynamic sysctls
4480 * @sc: device private structure
4481 *
4482 * Add per-device dynamic sysctls which show device configuration or enable
4483 * configuring device functionality. For tunable values which can be set prior
4484 * to load, see ice_add_device_tunables.
4485 *
4486 * This function depends on the sysctl layout setup by ice_add_device_tunables,
4487 * and likely should be called near the end of the attach process.
4488 */
4489void
4490ice_add_device_sysctls(struct ice_softc *sc)
4491{
4492	struct sysctl_oid *hw_node;
4493	device_t dev = sc->dev;
4494
4495	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
4496	struct sysctl_oid_list *ctx_list =
4497	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
4498
4499	SYSCTL_ADD_PROC(ctx, ctx_list,
4500	    OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD,
4501	    sc, 0, ice_sysctl_show_fw, "A", "Firmware version");
4502
4503	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_HAS_PBA)) {
4504		SYSCTL_ADD_PROC(ctx, ctx_list,
4505		    OID_AUTO, "pba_number", CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4506		    ice_sysctl_pba_number, "A", "Product Board Assembly Number");
4507	}
4508	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_TEMP_SENSOR)) {
4509		SYSCTL_ADD_PROC(ctx, ctx_list,
4510		    OID_AUTO, "temp", CTLTYPE_S8 | CTLFLAG_RD,
4511		    sc, 0, ice_sysctl_temperature, "CU",
4512		    "Device temperature in degrees Celcius (C)");
4513	}
4514
4515	SYSCTL_ADD_PROC(ctx, ctx_list,
4516	    OID_AUTO, "ddp_version", CTLTYPE_STRING | CTLFLAG_RD,
4517	    sc, 0, ice_sysctl_pkg_version, "A", "Active DDP package name and version");
4518
4519	SYSCTL_ADD_PROC(ctx, ctx_list,
4520	    OID_AUTO, "current_speed", CTLTYPE_STRING | CTLFLAG_RD,
4521	    sc, 0, ice_sysctl_current_speed, "A", "Current Port Link Speed");
4522
4523	SYSCTL_ADD_PROC(ctx, ctx_list,
4524	    OID_AUTO, "requested_fec", CTLTYPE_STRING | CTLFLAG_RW,
4525	    sc, 0, ice_sysctl_fec_config, "A", ICE_SYSCTL_HELP_FEC_CONFIG);
4526
4527	SYSCTL_ADD_PROC(ctx, ctx_list,
4528	    OID_AUTO, "negotiated_fec", CTLTYPE_STRING | CTLFLAG_RD,
4529	    sc, 0, ice_sysctl_negotiated_fec, "A", "Current Negotiated FEC mode");
4530
4531	SYSCTL_ADD_PROC(ctx, ctx_list,
4532	    OID_AUTO, "fc", CTLTYPE_STRING | CTLFLAG_RW,
4533	    sc, 0, ice_sysctl_fc_config, "A", ICE_SYSCTL_HELP_FC_CONFIG);
4534
4535	SYSCTL_ADD_PROC(ctx, ctx_list,
4536	    OID_AUTO, "advertise_speed", CTLTYPE_U16 | CTLFLAG_RW,
4537	    sc, 0, ice_sysctl_advertise_speed, "SU", ICE_SYSCTL_HELP_ADVERTISE_SPEED);
4538
4539	SYSCTL_ADD_PROC(ctx, ctx_list,
4540	    OID_AUTO, "fw_lldp_agent", CTLTYPE_U8 | CTLFLAG_RWTUN,
4541	    sc, 0, ice_sysctl_fw_lldp_agent, "CU", ICE_SYSCTL_HELP_FW_LLDP_AGENT);
4542
4543	SYSCTL_ADD_PROC(ctx, ctx_list,
4544	    OID_AUTO, "ets_min_rate", CTLTYPE_STRING | CTLFLAG_RW,
4545	    sc, 0, ice_sysctl_ets_min_rate, "A", ICE_SYSCTL_HELP_ETS_MIN_RATE);
4546
4547	SYSCTL_ADD_PROC(ctx, ctx_list,
4548	    OID_AUTO, "up2tc_map", CTLTYPE_STRING | CTLFLAG_RW,
4549	    sc, 0, ice_sysctl_up2tc_map, "A", ICE_SYSCTL_HELP_UP2TC_MAP);
4550
4551	SYSCTL_ADD_PROC(ctx, ctx_list,
4552	    OID_AUTO, "pfc", CTLTYPE_U8 | CTLFLAG_RW,
4553	    sc, 0, ice_sysctl_pfc_config, "CU", ICE_SYSCTL_HELP_PFC_CONFIG);
4554
4555	SYSCTL_ADD_PROC(ctx, ctx_list,
4556	    OID_AUTO, "pfc_mode", CTLTYPE_U8 | CTLFLAG_RWTUN,
4557	    sc, 0, ice_sysctl_pfc_mode, "CU", ICE_SYSCTL_HELP_PFC_MODE);
4558
4559	SYSCTL_ADD_PROC(ctx, ctx_list,
4560	    OID_AUTO, "allow_no_fec_modules_in_auto",
4561	    CTLTYPE_U8 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
4562	    sc, 0, ice_sysctl_allow_no_fec_mod_in_auto, "CU",
4563	    "Allow \"No FEC\" mode in FEC auto-negotiation");
4564
4565	SYSCTL_ADD_PROC(ctx, ctx_list,
4566	    OID_AUTO, "link_active_on_if_down", CTLTYPE_U8 | CTLFLAG_RWTUN,
4567	    sc, 0, ice_sysctl_set_link_active, "CU", ICE_SYSCTL_HELP_SET_LINK_ACTIVE);
4568
4569	SYSCTL_ADD_PROC(ctx, ctx_list,
4570	    OID_AUTO, "create_mirror_interface", CTLTYPE_STRING | CTLFLAG_RW,
4571	    sc, 0, ice_sysctl_create_mirror_interface, "A", "");
4572
4573	SYSCTL_ADD_PROC(ctx, ctx_list,
4574	    OID_AUTO, "destroy_mirror_interface", CTLTYPE_STRING | CTLFLAG_RW,
4575	    sc, 0, ice_sysctl_destroy_mirror_interface, "A", "");
4576
4577	ice_add_dscp2tc_map_sysctls(sc, ctx, ctx_list);
4578
4579	/* Differentiate software and hardware statistics, by keeping hw stats
4580	 * in their own node. This isn't in ice_add_device_tunables, because
4581	 * we won't have any CTLFLAG_TUN sysctls under this node.
4582	 */
4583	hw_node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "hw", CTLFLAG_RD,
4584				  NULL, "Port Hardware Statistics");
4585
4586	ice_add_sysctls_mac_stats(ctx, hw_node, &sc->stats.cur);
4587
4588	/* Add the main PF VSI stats now. Other VSIs will add their own stats
4589	 * during creation
4590	 */
4591	ice_add_vsi_sysctls(&sc->pf_vsi);
4592
4593	/* Add sysctls related to debugging the device driver. This includes
4594	 * sysctls which display additional internal driver state for use in
4595	 * understanding what is happening within the driver.
4596	 */
4597	ice_add_debug_sysctls(sc);
4598}
4599
4600/**
4601 * @enum hmc_error_type
4602 * @brief enumeration of HMC errors
4603 *
4604 * Enumeration defining the possible HMC errors that might occur.
4605 */
4606enum hmc_error_type {
4607	HMC_ERR_PMF_INVALID = 0,
4608	HMC_ERR_VF_IDX_INVALID = 1,
4609	HMC_ERR_VF_PARENT_PF_INVALID = 2,
4610	/* 3 is reserved */
4611	HMC_ERR_INDEX_TOO_BIG = 4,
4612	HMC_ERR_ADDRESS_TOO_LARGE = 5,
4613	HMC_ERR_SEGMENT_DESC_INVALID = 6,
4614	HMC_ERR_SEGMENT_DESC_TOO_SMALL = 7,
4615	HMC_ERR_PAGE_DESC_INVALID = 8,
4616	HMC_ERR_UNSUPPORTED_REQUEST_COMPLETION = 9,
4617	/* 10 is reserved */
4618	HMC_ERR_INVALID_OBJECT_TYPE = 11,
4619	/* 12 is reserved */
4620};
4621
4622/**
4623 * ice_log_hmc_error - Log an HMC error message
4624 * @hw: device hw structure
4625 * @dev: the device to pass to device_printf()
4626 *
4627 * Log a message when an HMC error interrupt is triggered.
4628 */
4629void
4630ice_log_hmc_error(struct ice_hw *hw, device_t dev)
4631{
4632	u32 info, data;
4633	u8 index, errtype, objtype;
4634	bool isvf;
4635
4636	info = rd32(hw, PFHMC_ERRORINFO);
4637	data = rd32(hw, PFHMC_ERRORDATA);
4638
4639	index = (u8)(info & PFHMC_ERRORINFO_PMF_INDEX_M);
4640	errtype = (u8)((info & PFHMC_ERRORINFO_HMC_ERROR_TYPE_M) >>
4641		       PFHMC_ERRORINFO_HMC_ERROR_TYPE_S);
4642	objtype = (u8)((info & PFHMC_ERRORINFO_HMC_OBJECT_TYPE_M) >>
4643		       PFHMC_ERRORINFO_HMC_OBJECT_TYPE_S);
4644
4645	isvf = info & PFHMC_ERRORINFO_PMF_ISVF_M;
4646
4647	device_printf(dev, "%s HMC Error detected on PMF index %d:\n",
4648		      isvf ? "VF" : "PF", index);
4649
4650	device_printf(dev, "error type %d, object type %d, data 0x%08x\n",
4651		      errtype, objtype, data);
4652
4653	switch (errtype) {
4654	case HMC_ERR_PMF_INVALID:
4655		device_printf(dev, "Private Memory Function is not valid\n");
4656		break;
4657	case HMC_ERR_VF_IDX_INVALID:
4658		device_printf(dev, "Invalid Private Memory Function index for PE enabled VF\n");
4659		break;
4660	case HMC_ERR_VF_PARENT_PF_INVALID:
4661		device_printf(dev, "Invalid parent PF for PE enabled VF\n");
4662		break;
4663	case HMC_ERR_INDEX_TOO_BIG:
4664		device_printf(dev, "Object index too big\n");
4665		break;
4666	case HMC_ERR_ADDRESS_TOO_LARGE:
4667		device_printf(dev, "Address extends beyond segment descriptor limit\n");
4668		break;
4669	case HMC_ERR_SEGMENT_DESC_INVALID:
4670		device_printf(dev, "Segment descriptor is invalid\n");
4671		break;
4672	case HMC_ERR_SEGMENT_DESC_TOO_SMALL:
4673		device_printf(dev, "Segment descriptor is too small\n");
4674		break;
4675	case HMC_ERR_PAGE_DESC_INVALID:
4676		device_printf(dev, "Page descriptor is invalid\n");
4677		break;
4678	case HMC_ERR_UNSUPPORTED_REQUEST_COMPLETION:
4679		device_printf(dev, "Unsupported Request completion received from PCIe\n");
4680		break;
4681	case HMC_ERR_INVALID_OBJECT_TYPE:
4682		device_printf(dev, "Invalid object type\n");
4683		break;
4684	default:
4685		device_printf(dev, "Unknown HMC error\n");
4686	}
4687
4688	/* Clear the error indication */
4689	wr32(hw, PFHMC_ERRORINFO, 0);
4690}
4691
4692/**
4693 * @struct ice_sysctl_info
4694 * @brief sysctl information
4695 *
4696 * Structure used to simplify the process of defining the many similar
4697 * statistics sysctls.
4698 */
4699struct ice_sysctl_info {
4700	u64		*stat;
4701	const char	*name;
4702	const char	*description;
4703};
4704
4705/**
4706 * ice_add_sysctls_eth_stats - Add sysctls for ethernet statistics
4707 * @ctx: sysctl ctx to use
4708 * @parent: the parent node to add sysctls under
4709 * @stats: the ethernet stats structure to source values from
4710 *
4711 * Adds statistics sysctls for the ethernet statistics of the MAC or a VSI.
4712 * Will add them under the parent node specified.
4713 *
4714 * Note that tx_errors is only meaningful for VSIs and not the global MAC/PF
4715 * statistics, so it is not included here. Similarly, rx_discards has different
4716 * descriptions for VSIs and MAC/PF stats, so it is also not included here.
4717 */
4718void
4719ice_add_sysctls_eth_stats(struct sysctl_ctx_list *ctx,
4720			  struct sysctl_oid *parent,
4721			  struct ice_eth_stats *stats)
4722{
4723	const struct ice_sysctl_info ctls[] = {
4724		/* Rx Stats */
4725		{ &stats->rx_bytes, "good_octets_rcvd", "Good Octets Received" },
4726		{ &stats->rx_unicast, "ucast_pkts_rcvd", "Unicast Packets Received" },
4727		{ &stats->rx_multicast, "mcast_pkts_rcvd", "Multicast Packets Received" },
4728		{ &stats->rx_broadcast, "bcast_pkts_rcvd", "Broadcast Packets Received" },
4729		/* Tx Stats */
4730		{ &stats->tx_bytes, "good_octets_txd", "Good Octets Transmitted" },
4731		{ &stats->tx_unicast, "ucast_pkts_txd", "Unicast Packets Transmitted" },
4732		{ &stats->tx_multicast, "mcast_pkts_txd", "Multicast Packets Transmitted" },
4733		{ &stats->tx_broadcast, "bcast_pkts_txd", "Broadcast Packets Transmitted" },
4734		/* End */
4735		{ 0, 0, 0 }
4736	};
4737
4738	struct sysctl_oid_list *parent_list = SYSCTL_CHILDREN(parent);
4739
4740	const struct ice_sysctl_info *entry = ctls;
4741	while (entry->stat != 0) {
4742		SYSCTL_ADD_U64(ctx, parent_list, OID_AUTO, entry->name,
4743			       CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0,
4744			       entry->description);
4745		entry++;
4746	}
4747}
4748
4749/**
4750 * ice_sysctl_tx_cso_stat - Display Tx checksum offload statistic
4751 * @oidp: sysctl oid structure
4752 * @arg1: pointer to private data structure
4753 * @arg2: Tx CSO stat to read
4754 * @req: sysctl request pointer
4755 *
4756 * On read: Sums the per-queue Tx CSO stat and displays it.
4757 */
4758static int
4759ice_sysctl_tx_cso_stat(SYSCTL_HANDLER_ARGS)
4760{
4761	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
4762	enum ice_tx_cso_stat type = (enum ice_tx_cso_stat)arg2;
4763	u64 stat = 0;
4764	int i;
4765
4766	if (ice_driver_is_detaching(vsi->sc))
4767		return (ESHUTDOWN);
4768
4769	/* Check that the type is valid */
4770	if (type >= ICE_CSO_STAT_TX_COUNT)
4771		return (EDOOFUS);
4772
4773	/* Sum the stat for each of the Tx queues */
4774	for (i = 0; i < vsi->num_tx_queues; i++)
4775		stat += vsi->tx_queues[i].stats.cso[type];
4776
4777	return sysctl_handle_64(oidp, NULL, stat, req);
4778}
4779
4780/**
4781 * ice_sysctl_rx_cso_stat - Display Rx checksum offload statistic
4782 * @oidp: sysctl oid structure
4783 * @arg1: pointer to private data structure
4784 * @arg2: Rx CSO stat to read
4785 * @req: sysctl request pointer
4786 *
4787 * On read: Sums the per-queue Rx CSO stat and displays it.
4788 */
4789static int
4790ice_sysctl_rx_cso_stat(SYSCTL_HANDLER_ARGS)
4791{
4792	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
4793	enum ice_rx_cso_stat type = (enum ice_rx_cso_stat)arg2;
4794	u64 stat = 0;
4795	int i;
4796
4797	if (ice_driver_is_detaching(vsi->sc))
4798		return (ESHUTDOWN);
4799
4800	/* Check that the type is valid */
4801	if (type >= ICE_CSO_STAT_RX_COUNT)
4802		return (EDOOFUS);
4803
4804	/* Sum the stat for each of the Rx queues */
4805	for (i = 0; i < vsi->num_rx_queues; i++)
4806		stat += vsi->rx_queues[i].stats.cso[type];
4807
4808	return sysctl_handle_64(oidp, NULL, stat, req);
4809}
4810
4811/**
4812 * ice_sysctl_rx_errors_stat - Display aggregate of Rx errors
4813 * @oidp: sysctl oid structure
4814 * @arg1: pointer to private data structure
4815 * @arg2: unused
4816 * @req: sysctl request pointer
4817 *
4818 * On read: Sums current values of Rx error statistics and
4819 * displays it.
4820 */
4821static int
4822ice_sysctl_rx_errors_stat(SYSCTL_HANDLER_ARGS)
4823{
4824	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
4825	struct ice_hw_port_stats *hs = &vsi->sc->stats.cur;
4826	u64 stat = 0;
4827	int i, type;
4828
4829	UNREFERENCED_PARAMETER(arg2);
4830
4831	if (ice_driver_is_detaching(vsi->sc))
4832		return (ESHUTDOWN);
4833
4834	stat += hs->rx_undersize;
4835	stat += hs->rx_fragments;
4836	stat += hs->rx_oversize;
4837	stat += hs->rx_jabber;
4838	stat += hs->rx_len_errors;
4839	stat += hs->crc_errors;
4840	stat += hs->illegal_bytes;
4841
4842	/* Checksum error stats */
4843	for (i = 0; i < vsi->num_rx_queues; i++)
4844		for (type = ICE_CSO_STAT_RX_IP4_ERR;
4845		     type < ICE_CSO_STAT_RX_COUNT;
4846		     type++)
4847			stat += vsi->rx_queues[i].stats.cso[type];
4848
4849	return sysctl_handle_64(oidp, NULL, stat, req);
4850}
4851
4852/**
4853 * @struct ice_rx_cso_stat_info
4854 * @brief sysctl information for an Rx checksum offload statistic
4855 *
4856 * Structure used to simplify the process of defining the checksum offload
4857 * statistics.
4858 */
4859struct ice_rx_cso_stat_info {
4860	enum ice_rx_cso_stat	type;
4861	const char		*name;
4862	const char		*description;
4863};
4864
4865/**
4866 * @struct ice_tx_cso_stat_info
4867 * @brief sysctl information for a Tx checksum offload statistic
4868 *
4869 * Structure used to simplify the process of defining the checksum offload
4870 * statistics.
4871 */
4872struct ice_tx_cso_stat_info {
4873	enum ice_tx_cso_stat	type;
4874	const char		*name;
4875	const char		*description;
4876};
4877
4878/**
4879 * ice_add_sysctls_sw_stats - Add sysctls for software statistics
4880 * @vsi: pointer to the VSI to add sysctls for
4881 * @ctx: sysctl ctx to use
4882 * @parent: the parent node to add sysctls under
4883 *
4884 * Add statistics sysctls for software tracked statistics of a VSI.
4885 *
4886 * Currently this only adds checksum offload statistics, but more counters may
4887 * be added in the future.
4888 */
4889static void
4890ice_add_sysctls_sw_stats(struct ice_vsi *vsi,
4891			 struct sysctl_ctx_list *ctx,
4892			 struct sysctl_oid *parent)
4893{
4894	struct sysctl_oid *cso_node;
4895	struct sysctl_oid_list *cso_list;
4896
4897	/* Tx CSO Stats */
4898	const struct ice_tx_cso_stat_info tx_ctls[] = {
4899		{ ICE_CSO_STAT_TX_TCP, "tx_tcp", "Transmit TCP Packets marked for HW checksum" },
4900		{ ICE_CSO_STAT_TX_UDP, "tx_udp", "Transmit UDP Packets marked for HW checksum" },
4901		{ ICE_CSO_STAT_TX_SCTP, "tx_sctp", "Transmit SCTP Packets marked for HW checksum" },
4902		{ ICE_CSO_STAT_TX_IP4, "tx_ip4", "Transmit IPv4 Packets marked for HW checksum" },
4903		{ ICE_CSO_STAT_TX_IP6, "tx_ip6", "Transmit IPv6 Packets marked for HW checksum" },
4904		{ ICE_CSO_STAT_TX_L3_ERR, "tx_l3_err", "Transmit packets that driver failed to set L3 HW CSO bits for" },
4905		{ ICE_CSO_STAT_TX_L4_ERR, "tx_l4_err", "Transmit packets that driver failed to set L4 HW CSO bits for" },
4906		/* End */
4907		{ ICE_CSO_STAT_TX_COUNT, 0, 0 }
4908	};
4909
4910	/* Rx CSO Stats */
4911	const struct ice_rx_cso_stat_info rx_ctls[] = {
4912		{ ICE_CSO_STAT_RX_IP4_ERR, "rx_ip4_err", "Received packets with invalid IPv4 checksum indicated by HW" },
4913		{ ICE_CSO_STAT_RX_IP6_ERR, "rx_ip6_err", "Received IPv6 packets with extension headers" },
4914		{ ICE_CSO_STAT_RX_L3_ERR, "rx_l3_err", "Received packets with an unexpected invalid L3 checksum indicated by HW" },
4915		{ ICE_CSO_STAT_RX_TCP_ERR, "rx_tcp_err", "Received packets with invalid TCP checksum indicated by HW" },
4916		{ ICE_CSO_STAT_RX_UDP_ERR, "rx_udp_err", "Received packets with invalid UDP checksum indicated by HW" },
4917		{ ICE_CSO_STAT_RX_SCTP_ERR, "rx_sctp_err", "Received packets with invalid SCTP checksum indicated by HW" },
4918		{ ICE_CSO_STAT_RX_L4_ERR, "rx_l4_err", "Received packets with an unexpected invalid L4 checksum indicated by HW" },
4919		/* End */
4920		{ ICE_CSO_STAT_RX_COUNT, 0, 0 }
4921	};
4922
4923	struct sysctl_oid_list *parent_list = SYSCTL_CHILDREN(parent);
4924
4925	/* Add a node for statistics tracked by software. */
4926	cso_node = SYSCTL_ADD_NODE(ctx, parent_list, OID_AUTO, "cso", CTLFLAG_RD,
4927				  NULL, "Checksum offload Statistics");
4928	cso_list = SYSCTL_CHILDREN(cso_node);
4929
4930	const struct ice_tx_cso_stat_info *tx_entry = tx_ctls;
4931	while (tx_entry->name && tx_entry->description) {
4932		SYSCTL_ADD_PROC(ctx, cso_list, OID_AUTO, tx_entry->name,
4933				CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4934				vsi, tx_entry->type, ice_sysctl_tx_cso_stat, "QU",
4935				tx_entry->description);
4936		tx_entry++;
4937	}
4938
4939	const struct ice_rx_cso_stat_info *rx_entry = rx_ctls;
4940	while (rx_entry->name && rx_entry->description) {
4941		SYSCTL_ADD_PROC(ctx, cso_list, OID_AUTO, rx_entry->name,
4942				CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4943				vsi, rx_entry->type, ice_sysctl_rx_cso_stat, "QU",
4944				rx_entry->description);
4945		rx_entry++;
4946	}
4947}
4948
4949/**
4950 * ice_add_vsi_sysctls - Add sysctls for a VSI
4951 * @vsi: pointer to VSI structure
4952 *
4953 * Add various sysctls for a given VSI.
4954 */
4955void
4956ice_add_vsi_sysctls(struct ice_vsi *vsi)
4957{
4958	struct sysctl_ctx_list *ctx = &vsi->ctx;
4959	struct sysctl_oid *hw_node, *sw_node;
4960	struct sysctl_oid_list *vsi_list, *hw_list;
4961
4962	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
4963
4964	/* Keep hw stats in their own node. */
4965	hw_node = SYSCTL_ADD_NODE(ctx, vsi_list, OID_AUTO, "hw", CTLFLAG_RD,
4966				  NULL, "VSI Hardware Statistics");
4967	hw_list = SYSCTL_CHILDREN(hw_node);
4968
4969	/* Add the ethernet statistics for this VSI */
4970	ice_add_sysctls_eth_stats(ctx, hw_node, &vsi->hw_stats.cur);
4971
4972	SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "rx_discards",
4973			CTLFLAG_RD | CTLFLAG_STATS, &vsi->hw_stats.cur.rx_discards,
4974			0, "Discarded Rx Packets (see rx_errors or rx_no_desc)");
4975
4976	SYSCTL_ADD_PROC(ctx, hw_list, OID_AUTO, "rx_errors",
4977			CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4978			vsi, 0, ice_sysctl_rx_errors_stat, "QU",
4979			"Aggregate of all Rx errors");
4980
4981	SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "rx_no_desc",
4982		       CTLFLAG_RD | CTLFLAG_STATS, &vsi->hw_stats.cur.rx_no_desc,
4983		       0, "Rx Packets Discarded Due To Lack Of Descriptors");
4984
4985	SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "tx_errors",
4986			CTLFLAG_RD | CTLFLAG_STATS, &vsi->hw_stats.cur.tx_errors,
4987			0, "Tx Packets Discarded Due To Error");
4988
4989	/* Add a node for statistics tracked by software. */
4990	sw_node = SYSCTL_ADD_NODE(ctx, vsi_list, OID_AUTO, "sw", CTLFLAG_RD,
4991				  NULL, "VSI Software Statistics");
4992
4993	ice_add_sysctls_sw_stats(vsi, ctx, sw_node);
4994}
4995
4996/**
4997 * ice_add_sysctls_mac_pfc_one_stat - Add sysctl node for a PFC statistic
4998 * @ctx: sysctl ctx to use
4999 * @parent_list: parent sysctl list to add sysctls under
5000 * @pfc_stat_location: address of statistic for sysctl to display
5001 * @node_name: Name for statistic node
5002 * @descr: Description used for nodes added in this function
5003 *
5004 * A helper function for ice_add_sysctls_mac_pfc_stats that adds a node
5005 * for a stat and leaves for each traffic class for that stat.
5006 */
5007static void
5008ice_add_sysctls_mac_pfc_one_stat(struct sysctl_ctx_list *ctx,
5009				 struct sysctl_oid_list *parent_list,
5010				 u64* pfc_stat_location,
5011				 const char *node_name,
5012				 const char *descr)
5013{
5014	struct sysctl_oid_list *node_list;
5015	struct sysctl_oid *node;
5016	struct sbuf *namebuf, *descbuf;
5017
5018	node = SYSCTL_ADD_NODE(ctx, parent_list, OID_AUTO, node_name, CTLFLAG_RD,
5019				   NULL, descr);
5020	node_list = SYSCTL_CHILDREN(node);
5021
5022	namebuf = sbuf_new_auto();
5023	descbuf = sbuf_new_auto();
5024	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
5025		sbuf_clear(namebuf);
5026		sbuf_clear(descbuf);
5027
5028		sbuf_printf(namebuf, "%d", i);
5029		sbuf_printf(descbuf, "%s for TC %d", descr, i);
5030
5031		sbuf_finish(namebuf);
5032		sbuf_finish(descbuf);
5033
5034		SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, sbuf_data(namebuf),
5035			CTLFLAG_RD | CTLFLAG_STATS, &pfc_stat_location[i], 0,
5036			sbuf_data(descbuf));
5037	}
5038
5039	sbuf_delete(namebuf);
5040	sbuf_delete(descbuf);
5041}
5042
5043/**
5044 * ice_add_sysctls_mac_pfc_stats - Add sysctls for MAC PFC statistics
5045 * @ctx: the sysctl ctx to use
5046 * @parent: parent node to add the sysctls under
5047 * @stats: the hw ports stat structure to pull values from
5048 *
5049 * Add global Priority Flow Control MAC statistics sysctls. These are
5050 * structured as a node with the PFC statistic, where there are eight
5051 * nodes for each traffic class.
5052 */
5053static void
5054ice_add_sysctls_mac_pfc_stats(struct sysctl_ctx_list *ctx,
5055			      struct sysctl_oid *parent,
5056			      struct ice_hw_port_stats *stats)
5057{
5058	struct sysctl_oid_list *parent_list;
5059
5060	parent_list = SYSCTL_CHILDREN(parent);
5061
5062	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xon_rx,
5063	    "p_xon_recvd", "PFC XON received");
5064	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xoff_rx,
5065	    "p_xoff_recvd", "PFC XOFF received");
5066	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xon_tx,
5067	    "p_xon_txd", "PFC XON transmitted");
5068	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xoff_tx,
5069	    "p_xoff_txd", "PFC XOFF transmitted");
5070	ice_add_sysctls_mac_pfc_one_stat(ctx, parent_list, stats->priority_xon_2_xoff,
5071	    "p_xon2xoff", "PFC XON to XOFF transitions");
5072}
5073
5074/**
5075 * ice_add_sysctls_mac_stats - Add sysctls for global MAC statistics
5076 * @ctx: the sysctl ctx to use
5077 * @parent: parent node to add the sysctls under
5078 * @stats: the hw ports stat structure to pull values from
5079 *
5080 * Add global MAC statistics sysctls.
5081 */
5082void
5083ice_add_sysctls_mac_stats(struct sysctl_ctx_list *ctx,
5084			  struct sysctl_oid *parent,
5085			  struct ice_hw_port_stats *stats)
5086{
5087	struct sysctl_oid *mac_node;
5088	struct sysctl_oid_list *parent_list, *mac_list;
5089
5090	parent_list = SYSCTL_CHILDREN(parent);
5091
5092	mac_node = SYSCTL_ADD_NODE(ctx, parent_list, OID_AUTO, "mac", CTLFLAG_RD,
5093				   NULL, "Mac Hardware Statistics");
5094	mac_list = SYSCTL_CHILDREN(mac_node);
5095
5096	/* Add the ethernet statistics common to VSI and MAC */
5097	ice_add_sysctls_eth_stats(ctx, mac_node, &stats->eth);
5098
5099	/* Add PFC stats that add per-TC counters */
5100	ice_add_sysctls_mac_pfc_stats(ctx, mac_node, stats);
5101
5102	const struct ice_sysctl_info ctls[] = {
5103		/* Packet Reception Stats */
5104		{&stats->rx_size_64, "rx_frames_64", "64 byte frames received"},
5105		{&stats->rx_size_127, "rx_frames_65_127", "65-127 byte frames received"},
5106		{&stats->rx_size_255, "rx_frames_128_255", "128-255 byte frames received"},
5107		{&stats->rx_size_511, "rx_frames_256_511", "256-511 byte frames received"},
5108		{&stats->rx_size_1023, "rx_frames_512_1023", "512-1023 byte frames received"},
5109		{&stats->rx_size_1522, "rx_frames_1024_1522", "1024-1522 byte frames received"},
5110		{&stats->rx_size_big, "rx_frames_big", "1523-9522 byte frames received"},
5111		{&stats->rx_undersize, "rx_undersize", "Undersized packets received"},
5112		{&stats->rx_fragments, "rx_fragmented", "Fragmented packets received"},
5113		{&stats->rx_oversize, "rx_oversized", "Oversized packets received"},
5114		{&stats->rx_jabber, "rx_jabber", "Received Jabber"},
5115		{&stats->rx_len_errors, "rx_length_errors", "Receive Length Errors"},
5116		{&stats->eth.rx_discards, "rx_discards",
5117		    "Discarded Rx Packets by Port (shortage of storage space)"},
5118		/* Packet Transmission Stats */
5119		{&stats->tx_size_64, "tx_frames_64", "64 byte frames transmitted"},
5120		{&stats->tx_size_127, "tx_frames_65_127", "65-127 byte frames transmitted"},
5121		{&stats->tx_size_255, "tx_frames_128_255", "128-255 byte frames transmitted"},
5122		{&stats->tx_size_511, "tx_frames_256_511", "256-511 byte frames transmitted"},
5123		{&stats->tx_size_1023, "tx_frames_512_1023", "512-1023 byte frames transmitted"},
5124		{&stats->tx_size_1522, "tx_frames_1024_1522", "1024-1522 byte frames transmitted"},
5125		{&stats->tx_size_big, "tx_frames_big", "1523-9522 byte frames transmitted"},
5126		{&stats->tx_dropped_link_down, "tx_dropped", "Tx Dropped Due To Link Down"},
5127		/* Flow control */
5128		{&stats->link_xon_tx, "xon_txd", "Link XON transmitted"},
5129		{&stats->link_xon_rx, "xon_recvd", "Link XON received"},
5130		{&stats->link_xoff_tx, "xoff_txd", "Link XOFF transmitted"},
5131		{&stats->link_xoff_rx, "xoff_recvd", "Link XOFF received"},
5132		/* Other */
5133		{&stats->crc_errors, "crc_errors", "CRC Errors"},
5134		{&stats->illegal_bytes, "illegal_bytes", "Illegal Byte Errors"},
5135		{&stats->mac_local_faults, "local_faults", "MAC Local Faults"},
5136		{&stats->mac_remote_faults, "remote_faults", "MAC Remote Faults"},
5137		/* End */
5138		{ 0, 0, 0 }
5139	};
5140
5141	const struct ice_sysctl_info *entry = ctls;
5142	while (entry->stat != 0) {
5143		SYSCTL_ADD_U64(ctx, mac_list, OID_AUTO, entry->name,
5144			CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0,
5145			entry->description);
5146		entry++;
5147	}
5148}
5149
5150/**
5151 * ice_configure_misc_interrupts - enable 'other' interrupt causes
5152 * @sc: pointer to device private softc
5153 *
5154 * Enable various "other" interrupt causes, and associate them to interrupt 0,
5155 * which is our administrative interrupt.
5156 */
5157void
5158ice_configure_misc_interrupts(struct ice_softc *sc)
5159{
5160	struct ice_hw *hw = &sc->hw;
5161	u32 val;
5162
5163	/* Read the OICR register to clear it */
5164	rd32(hw, PFINT_OICR);
5165
5166	/* Enable useful "other" interrupt causes */
5167	val = (PFINT_OICR_ECC_ERR_M |
5168	       PFINT_OICR_MAL_DETECT_M |
5169	       PFINT_OICR_GRST_M |
5170	       PFINT_OICR_PCI_EXCEPTION_M |
5171	       PFINT_OICR_VFLR_M |
5172	       PFINT_OICR_HMC_ERR_M |
5173	       PFINT_OICR_PE_CRITERR_M);
5174
5175	wr32(hw, PFINT_OICR_ENA, val);
5176
5177	/* Note that since we're using MSI-X index 0, and ITR index 0, we do
5178	 * not explicitly program them when writing to the PFINT_*_CTL
5179	 * registers. Nevertheless, these writes are associating the
5180	 * interrupts with the ITR 0 vector
5181	 */
5182
5183	/* Associate the OICR interrupt with ITR 0, and enable it */
5184	wr32(hw, PFINT_OICR_CTL, PFINT_OICR_CTL_CAUSE_ENA_M);
5185
5186	/* Associate the Mailbox interrupt with ITR 0, and enable it */
5187	wr32(hw, PFINT_MBX_CTL, PFINT_MBX_CTL_CAUSE_ENA_M);
5188
5189	/* Associate the AdminQ interrupt with ITR 0, and enable it */
5190	wr32(hw, PFINT_FW_CTL, PFINT_FW_CTL_CAUSE_ENA_M);
5191}
5192
5193/**
5194 * ice_filter_is_mcast - Check if info is a multicast filter
5195 * @vsi: vsi structure addresses are targeted towards
5196 * @info: filter info
5197 *
5198 * @returns true if the provided info is a multicast filter, and false
5199 * otherwise.
5200 */
5201static bool
5202ice_filter_is_mcast(struct ice_vsi *vsi, struct ice_fltr_info *info)
5203{
5204	const u8 *addr = info->l_data.mac.mac_addr;
5205
5206	/*
5207	 * Check if this info matches a multicast filter added by
5208	 * ice_add_mac_to_list
5209	 */
5210	if ((info->flag == ICE_FLTR_TX) &&
5211	    (info->src_id == ICE_SRC_ID_VSI) &&
5212	    (info->lkup_type == ICE_SW_LKUP_MAC) &&
5213	    (info->vsi_handle == vsi->idx) &&
5214	    ETHER_IS_MULTICAST(addr) && !ETHER_IS_BROADCAST(addr))
5215		return true;
5216
5217	return false;
5218}
5219
5220/**
5221 * @struct ice_mcast_sync_data
5222 * @brief data used by ice_sync_one_mcast_filter function
5223 *
5224 * Structure used to store data needed for processing by the
5225 * ice_sync_one_mcast_filter. This structure contains a linked list of filters
5226 * to be added, an error indication, and a pointer to the device softc.
5227 */
5228struct ice_mcast_sync_data {
5229	struct ice_list_head add_list;
5230	struct ice_softc *sc;
5231	int err;
5232};
5233
5234/**
5235 * ice_sync_one_mcast_filter - Check if we need to program the filter
5236 * @p: void pointer to algorithm data
5237 * @sdl: link level socket address
5238 * @count: unused count value
5239 *
5240 * Called by if_foreach_llmaddr to operate on each filter in the ifp filter
5241 * list. For the given address, search our internal list to see if we have
5242 * found the filter. If not, add it to our list of filters that need to be
5243 * programmed.
5244 *
5245 * @returns (1) if we've actually setup the filter to be added
5246 */
5247static u_int
5248ice_sync_one_mcast_filter(void *p, struct sockaddr_dl *sdl,
5249			  u_int __unused count)
5250{
5251	struct ice_mcast_sync_data *data = (struct ice_mcast_sync_data *)p;
5252	struct ice_softc *sc = data->sc;
5253	struct ice_hw *hw = &sc->hw;
5254	struct ice_switch_info *sw = hw->switch_info;
5255	const u8 *sdl_addr = (const u8 *)LLADDR(sdl);
5256	struct ice_fltr_mgmt_list_entry *itr;
5257	struct ice_list_head *rules;
5258	int err;
5259
5260	rules = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
5261
5262	/*
5263	 * If a previous filter already indicated an error, there is no need
5264	 * for us to finish processing the rest of the filters.
5265	 */
5266	if (data->err)
5267		return (0);
5268
5269	/* See if this filter has already been programmed */
5270	LIST_FOR_EACH_ENTRY(itr, rules, ice_fltr_mgmt_list_entry, list_entry) {
5271		struct ice_fltr_info *info = &itr->fltr_info;
5272		const u8 *addr = info->l_data.mac.mac_addr;
5273
5274		/* Only check multicast filters */
5275		if (!ice_filter_is_mcast(&sc->pf_vsi, info))
5276			continue;
5277
5278		/*
5279		 * If this filter matches, mark the internal filter as
5280		 * "found", and exit.
5281		 */
5282		if (bcmp(addr, sdl_addr, ETHER_ADDR_LEN) == 0) {
5283			itr->marker = ICE_FLTR_FOUND;
5284			return (1);
5285		}
5286	}
5287
5288	/*
5289	 * If we failed to locate the filter in our internal list, we need to
5290	 * place it into our add list.
5291	 */
5292	err = ice_add_mac_to_list(&sc->pf_vsi, &data->add_list, sdl_addr,
5293				  ICE_FWD_TO_VSI);
5294	if (err) {
5295		device_printf(sc->dev,
5296			      "Failed to place MAC %6D onto add list, err %s\n",
5297			      sdl_addr, ":", ice_err_str(err));
5298		data->err = err;
5299
5300		return (0);
5301	}
5302
5303	return (1);
5304}
5305
5306/**
5307 * ice_sync_multicast_filters - Synchronize OS and internal filter list
5308 * @sc: device private structure
5309 *
5310 * Called in response to SIOCDELMULTI to synchronize the operating system
5311 * multicast address list with the internal list of filters programmed to
5312 * firmware.
5313 *
5314 * Works in one phase to find added and deleted filters using a marker bit on
5315 * the internal list.
5316 *
5317 * First, a loop over the internal list clears the marker bit. Second, for
5318 * each filter in the ifp list is checked. If we find it in the internal list,
5319 * the marker bit is set. Otherwise, the filter is added to the add list.
5320 * Third, a loop over the internal list determines if any filters have not
5321 * been found. Each of these is added to the delete list. Finally, the add and
5322 * delete lists are programmed to firmware to update the filters.
5323 *
5324 * @returns zero on success or an integer error code on failure.
5325 */
5326int
5327ice_sync_multicast_filters(struct ice_softc *sc)
5328{
5329	struct ice_hw *hw = &sc->hw;
5330	struct ice_switch_info *sw = hw->switch_info;
5331	struct ice_fltr_mgmt_list_entry *itr;
5332	struct ice_mcast_sync_data data = {};
5333	struct ice_list_head *rules, remove_list;
5334	enum ice_status status;
5335	int err = 0;
5336
5337	INIT_LIST_HEAD(&data.add_list);
5338	INIT_LIST_HEAD(&remove_list);
5339	data.sc = sc;
5340	data.err = 0;
5341
5342	rules = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
5343
5344	/* Acquire the lock for the entire duration */
5345	ice_acquire_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock);
5346
5347	/* (1) Reset the marker state for all filters */
5348	LIST_FOR_EACH_ENTRY(itr, rules, ice_fltr_mgmt_list_entry, list_entry)
5349		itr->marker = ICE_FLTR_NOT_FOUND;
5350
5351	/* (2) determine which filters need to be added and removed */
5352	if_foreach_llmaddr(sc->ifp, ice_sync_one_mcast_filter, (void *)&data);
5353	if (data.err) {
5354		/* ice_sync_one_mcast_filter already prints an error */
5355		err = data.err;
5356		ice_release_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock);
5357		goto free_filter_lists;
5358	}
5359
5360	LIST_FOR_EACH_ENTRY(itr, rules, ice_fltr_mgmt_list_entry, list_entry) {
5361		struct ice_fltr_info *info = &itr->fltr_info;
5362		const u8 *addr = info->l_data.mac.mac_addr;
5363
5364		/* Only check multicast filters */
5365		if (!ice_filter_is_mcast(&sc->pf_vsi, info))
5366			continue;
5367
5368		/*
5369		 * If the filter is not marked as found, then it must no
5370		 * longer be in the ifp address list, so we need to remove it.
5371		 */
5372		if (itr->marker == ICE_FLTR_NOT_FOUND) {
5373			err = ice_add_mac_to_list(&sc->pf_vsi, &remove_list,
5374						  addr, ICE_FWD_TO_VSI);
5375			if (err) {
5376				device_printf(sc->dev,
5377					      "Failed to place MAC %6D onto remove list, err %s\n",
5378					      addr, ":", ice_err_str(err));
5379				ice_release_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock);
5380				goto free_filter_lists;
5381			}
5382		}
5383	}
5384
5385	ice_release_lock(&sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock);
5386
5387	status = ice_add_mac(hw, &data.add_list);
5388	if (status) {
5389		device_printf(sc->dev,
5390			      "Could not add new MAC filters, err %s aq_err %s\n",
5391			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
5392		err = (EIO);
5393		goto free_filter_lists;
5394	}
5395
5396	status = ice_remove_mac(hw, &remove_list);
5397	if (status) {
5398		device_printf(sc->dev,
5399			      "Could not remove old MAC filters, err %s aq_err %s\n",
5400			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
5401		err = (EIO);
5402		goto free_filter_lists;
5403	}
5404
5405free_filter_lists:
5406	ice_free_fltr_list(&data.add_list);
5407	ice_free_fltr_list(&remove_list);
5408
5409	return (err);
5410}
5411
5412/**
5413 * ice_add_vlan_hw_filters - Add multiple VLAN filters for a given VSI
5414 * @vsi: The VSI to add the filter for
5415 * @vid: array of VLAN ids to add
5416 * @length: length of vid array
5417 *
5418 * Programs HW filters so that the given VSI will receive the specified VLANs.
5419 */
5420enum ice_status
5421ice_add_vlan_hw_filters(struct ice_vsi *vsi, u16 *vid, u16 length)
5422{
5423	struct ice_hw *hw = &vsi->sc->hw;
5424	struct ice_list_head vlan_list;
5425	struct ice_fltr_list_entry *vlan_entries;
5426	enum ice_status status;
5427
5428	MPASS(length > 0);
5429
5430	INIT_LIST_HEAD(&vlan_list);
5431
5432	vlan_entries = (struct ice_fltr_list_entry *)
5433	    malloc(sizeof(*vlan_entries) * length, M_ICE, M_NOWAIT | M_ZERO);
5434	if (!vlan_entries)
5435		return (ICE_ERR_NO_MEMORY);
5436
5437	for (u16 i = 0; i < length; i++) {
5438		vlan_entries[i].fltr_info.lkup_type = ICE_SW_LKUP_VLAN;
5439		vlan_entries[i].fltr_info.fltr_act = ICE_FWD_TO_VSI;
5440		vlan_entries[i].fltr_info.flag = ICE_FLTR_TX;
5441		vlan_entries[i].fltr_info.src_id = ICE_SRC_ID_VSI;
5442		vlan_entries[i].fltr_info.vsi_handle = vsi->idx;
5443		vlan_entries[i].fltr_info.l_data.vlan.vlan_id = vid[i];
5444
5445		LIST_ADD(&vlan_entries[i].list_entry, &vlan_list);
5446	}
5447
5448	status = ice_add_vlan(hw, &vlan_list);
5449	if (!status)
5450		goto done;
5451
5452	device_printf(vsi->sc->dev, "Failed to add VLAN filters:\n");
5453	for (u16 i = 0; i < length; i++) {
5454		device_printf(vsi->sc->dev,
5455		    "- vlan %d, status %d\n",
5456		    vlan_entries[i].fltr_info.l_data.vlan.vlan_id,
5457		    vlan_entries[i].status);
5458	}
5459done:
5460	free(vlan_entries, M_ICE);
5461	return (status);
5462}
5463
5464/**
5465 * ice_add_vlan_hw_filter - Add a VLAN filter for a given VSI
5466 * @vsi: The VSI to add the filter for
5467 * @vid: VLAN to add
5468 *
5469 * Programs a HW filter so that the given VSI will receive the specified VLAN.
5470 */
5471enum ice_status
5472ice_add_vlan_hw_filter(struct ice_vsi *vsi, u16 vid)
5473{
5474	return ice_add_vlan_hw_filters(vsi, &vid, 1);
5475}
5476
5477/**
5478 * ice_remove_vlan_hw_filters - Remove multiple VLAN filters for a given VSI
5479 * @vsi: The VSI to remove the filters from
5480 * @vid: array of VLAN ids to remove
5481 * @length: length of vid array
5482 *
5483 * Removes previously programmed HW filters for the specified VSI.
5484 */
5485enum ice_status
5486ice_remove_vlan_hw_filters(struct ice_vsi *vsi, u16 *vid, u16 length)
5487{
5488	struct ice_hw *hw = &vsi->sc->hw;
5489	struct ice_list_head vlan_list;
5490	struct ice_fltr_list_entry *vlan_entries;
5491	enum ice_status status;
5492
5493	MPASS(length > 0);
5494
5495	INIT_LIST_HEAD(&vlan_list);
5496
5497	vlan_entries = (struct ice_fltr_list_entry *)
5498	    malloc(sizeof(*vlan_entries) * length, M_ICE, M_NOWAIT | M_ZERO);
5499	if (!vlan_entries)
5500		return (ICE_ERR_NO_MEMORY);
5501
5502	for (u16 i = 0; i < length; i++) {
5503		vlan_entries[i].fltr_info.lkup_type = ICE_SW_LKUP_VLAN;
5504		vlan_entries[i].fltr_info.fltr_act = ICE_FWD_TO_VSI;
5505		vlan_entries[i].fltr_info.flag = ICE_FLTR_TX;
5506		vlan_entries[i].fltr_info.src_id = ICE_SRC_ID_VSI;
5507		vlan_entries[i].fltr_info.vsi_handle = vsi->idx;
5508		vlan_entries[i].fltr_info.l_data.vlan.vlan_id = vid[i];
5509
5510		LIST_ADD(&vlan_entries[i].list_entry, &vlan_list);
5511	}
5512
5513	status = ice_remove_vlan(hw, &vlan_list);
5514	if (!status)
5515		goto done;
5516
5517	device_printf(vsi->sc->dev, "Failed to remove VLAN filters:\n");
5518	for (u16 i = 0; i < length; i++) {
5519		device_printf(vsi->sc->dev,
5520		    "- vlan %d, status %d\n",
5521		    vlan_entries[i].fltr_info.l_data.vlan.vlan_id,
5522		    vlan_entries[i].status);
5523	}
5524done:
5525	free(vlan_entries, M_ICE);
5526	return (status);
5527}
5528
5529/**
5530 * ice_remove_vlan_hw_filter - Remove a VLAN filter for a given VSI
5531 * @vsi: The VSI to remove the filter from
5532 * @vid: VLAN to remove
5533 *
5534 * Removes a previously programmed HW filter for the specified VSI.
5535 */
5536enum ice_status
5537ice_remove_vlan_hw_filter(struct ice_vsi *vsi, u16 vid)
5538{
5539	return ice_remove_vlan_hw_filters(vsi, &vid, 1);
5540}
5541
5542#define ICE_SYSCTL_HELP_RX_ITR			\
5543"\nControl Rx interrupt throttle rate."		\
5544"\n\t0-8160 - sets interrupt rate in usecs"	\
5545"\n\t    -1 - reset the Rx itr to default"
5546
5547/**
5548 * ice_sysctl_rx_itr - Display or change the Rx ITR for a VSI
5549 * @oidp: sysctl oid structure
5550 * @arg1: pointer to private data structure
5551 * @arg2: unused
5552 * @req: sysctl request pointer
5553 *
5554 * On read: Displays the current Rx ITR value
5555 * on write: Sets the Rx ITR value, reconfiguring device if it is up
5556 */
5557static int
5558ice_sysctl_rx_itr(SYSCTL_HANDLER_ARGS)
5559{
5560	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
5561	struct ice_softc *sc = vsi->sc;
5562	int increment, ret;
5563
5564	UNREFERENCED_PARAMETER(arg2);
5565
5566	if (ice_driver_is_detaching(sc))
5567		return (ESHUTDOWN);
5568
5569	ret = sysctl_handle_16(oidp, &vsi->rx_itr, 0, req);
5570	if ((ret) || (req->newptr == NULL))
5571		return (ret);
5572
5573	if (vsi->rx_itr < 0)
5574		vsi->rx_itr = ICE_DFLT_RX_ITR;
5575	if (vsi->rx_itr > ICE_ITR_MAX)
5576		vsi->rx_itr = ICE_ITR_MAX;
5577
5578	/* Assume 2usec increment if it hasn't been loaded yet */
5579	increment = sc->hw.itr_gran ? : 2;
5580
5581	/* We need to round the value to the hardware's ITR granularity */
5582	vsi->rx_itr = (vsi->rx_itr / increment ) * increment;
5583
5584	/* If the driver has finished initializing, then we need to reprogram
5585	 * the ITR registers now. Otherwise, they will be programmed during
5586	 * driver initialization.
5587	 */
5588	if (ice_test_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
5589		ice_configure_rx_itr(vsi);
5590
5591	return (0);
5592}
5593
5594#define ICE_SYSCTL_HELP_TX_ITR			\
5595"\nControl Tx interrupt throttle rate."		\
5596"\n\t0-8160 - sets interrupt rate in usecs"	\
5597"\n\t    -1 - reset the Tx itr to default"
5598
5599/**
5600 * ice_sysctl_tx_itr - Display or change the Tx ITR for a VSI
5601 * @oidp: sysctl oid structure
5602 * @arg1: pointer to private data structure
5603 * @arg2: unused
5604 * @req: sysctl request pointer
5605 *
5606 * On read: Displays the current Tx ITR value
5607 * on write: Sets the Tx ITR value, reconfiguring device if it is up
5608 */
5609static int
5610ice_sysctl_tx_itr(SYSCTL_HANDLER_ARGS)
5611{
5612	struct ice_vsi *vsi = (struct ice_vsi *)arg1;
5613	struct ice_softc *sc = vsi->sc;
5614	int increment, ret;
5615
5616	UNREFERENCED_PARAMETER(arg2);
5617
5618	if (ice_driver_is_detaching(sc))
5619		return (ESHUTDOWN);
5620
5621	ret = sysctl_handle_16(oidp, &vsi->tx_itr, 0, req);
5622	if ((ret) || (req->newptr == NULL))
5623		return (ret);
5624
5625	/* Allow configuring a negative value to reset to the default */
5626	if (vsi->tx_itr < 0)
5627		vsi->tx_itr = ICE_DFLT_TX_ITR;
5628	if (vsi->tx_itr > ICE_ITR_MAX)
5629		vsi->tx_itr = ICE_ITR_MAX;
5630
5631	/* Assume 2usec increment if it hasn't been loaded yet */
5632	increment = sc->hw.itr_gran ? : 2;
5633
5634	/* We need to round the value to the hardware's ITR granularity */
5635	vsi->tx_itr = (vsi->tx_itr / increment ) * increment;
5636
5637	/* If the driver has finished initializing, then we need to reprogram
5638	 * the ITR registers now. Otherwise, they will be programmed during
5639	 * driver initialization.
5640	 */
5641	if (ice_test_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
5642		ice_configure_tx_itr(vsi);
5643
5644	return (0);
5645}
5646
5647/**
5648 * ice_add_vsi_tunables - Add tunables and nodes for a VSI
5649 * @vsi: pointer to VSI structure
5650 * @parent: parent node to add the tunables under
5651 *
5652 * Create a sysctl context for the VSI, so that sysctls for the VSI can be
5653 * dynamically removed upon VSI removal.
5654 *
5655 * Add various tunables and set up the basic node structure for the VSI. Must
5656 * be called *prior* to ice_add_vsi_sysctls. It should be called as soon as
5657 * possible after the VSI memory is initialized.
5658 *
5659 * VSI specific sysctls with CTLFLAG_TUN should be initialized here so that
5660 * their values can be read from loader.conf prior to their first use in the
5661 * driver.
5662 */
5663void
5664ice_add_vsi_tunables(struct ice_vsi *vsi, struct sysctl_oid *parent)
5665{
5666	struct sysctl_oid_list *vsi_list;
5667	char vsi_name[32], vsi_desc[32];
5668
5669	struct sysctl_oid_list *parent_list = SYSCTL_CHILDREN(parent);
5670
5671	/* Initialize the sysctl context for this VSI */
5672	sysctl_ctx_init(&vsi->ctx);
5673
5674	/* Add a node to collect this VSI's statistics together */
5675	snprintf(vsi_name, sizeof(vsi_name), "%u", vsi->idx);
5676	snprintf(vsi_desc, sizeof(vsi_desc), "VSI %u", vsi->idx);
5677	vsi->vsi_node = SYSCTL_ADD_NODE(&vsi->ctx, parent_list, OID_AUTO, vsi_name,
5678					CTLFLAG_RD, NULL, vsi_desc);
5679	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
5680
5681	vsi->rx_itr = ICE_DFLT_TX_ITR;
5682	SYSCTL_ADD_PROC(&vsi->ctx, vsi_list, OID_AUTO, "rx_itr",
5683			CTLTYPE_S16 | CTLFLAG_RWTUN,
5684			vsi, 0, ice_sysctl_rx_itr, "S",
5685			ICE_SYSCTL_HELP_RX_ITR);
5686
5687	vsi->tx_itr = ICE_DFLT_TX_ITR;
5688	SYSCTL_ADD_PROC(&vsi->ctx, vsi_list, OID_AUTO, "tx_itr",
5689			CTLTYPE_S16 | CTLFLAG_RWTUN,
5690			vsi, 0, ice_sysctl_tx_itr, "S",
5691			ICE_SYSCTL_HELP_TX_ITR);
5692}
5693
5694/**
5695 * ice_del_vsi_sysctl_ctx - Delete the sysctl context(s) of a VSI
5696 * @vsi: the VSI to remove contexts for
5697 *
5698 * Free the context for the VSI sysctls. This includes the main context, as
5699 * well as the per-queue sysctls.
5700 */
5701void
5702ice_del_vsi_sysctl_ctx(struct ice_vsi *vsi)
5703{
5704	device_t dev = vsi->sc->dev;
5705	int err;
5706
5707	if (vsi->vsi_node) {
5708		err = sysctl_ctx_free(&vsi->ctx);
5709		if (err)
5710			device_printf(dev, "failed to free VSI %d sysctl context, err %s\n",
5711				      vsi->idx, ice_err_str(err));
5712		vsi->vsi_node = NULL;
5713	}
5714}
5715
5716/**
5717 * ice_add_dscp2tc_map_sysctls - Add sysctl tree for DSCP to TC mapping
5718 * @sc: pointer to device private softc
5719 * @ctx: the sysctl ctx to use
5720 * @ctx_list: list of sysctl children for device (to add sysctl tree to)
5721 *
5722 * Add a sysctl tree for individual dscp2tc_map sysctls. Each child of this
5723 * node can map 8 DSCPs to TC values; there are 8 of these in turn for a total
5724 * of 64 DSCP to TC map values that the user can configure.
5725 */
5726void
5727ice_add_dscp2tc_map_sysctls(struct ice_softc *sc,
5728			    struct sysctl_ctx_list *ctx,
5729			    struct sysctl_oid_list *ctx_list)
5730{
5731	struct sysctl_oid_list *node_list;
5732	struct sysctl_oid *node;
5733	struct sbuf *namebuf, *descbuf;
5734	int first_dscp_val, last_dscp_val;
5735
5736	node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "dscp2tc_map", CTLFLAG_RD,
5737			       NULL, "Map of DSCP values to DCB TCs");
5738	node_list = SYSCTL_CHILDREN(node);
5739
5740	namebuf = sbuf_new_auto();
5741	descbuf = sbuf_new_auto();
5742	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
5743		sbuf_clear(namebuf);
5744		sbuf_clear(descbuf);
5745
5746		first_dscp_val = i * 8;
5747		last_dscp_val = first_dscp_val + 7;
5748
5749		sbuf_printf(namebuf, "%d-%d", first_dscp_val, last_dscp_val);
5750		sbuf_printf(descbuf, "Map DSCP values %d to %d to TCs",
5751			    first_dscp_val, last_dscp_val);
5752
5753		sbuf_finish(namebuf);
5754		sbuf_finish(descbuf);
5755
5756		SYSCTL_ADD_PROC(ctx, node_list,
5757		    OID_AUTO, sbuf_data(namebuf), CTLTYPE_STRING | CTLFLAG_RW,
5758		    sc, i, ice_sysctl_dscp2tc_map, "A", sbuf_data(descbuf));
5759	}
5760
5761	sbuf_delete(namebuf);
5762	sbuf_delete(descbuf);
5763}
5764
5765/**
5766 * ice_add_device_tunables - Add early tunable sysctls and sysctl nodes
5767 * @sc: device private structure
5768 *
5769 * Add per-device dynamic tunable sysctls, and setup the general sysctl trees
5770 * for re-use by ice_add_device_sysctls.
5771 *
5772 * In order for the sysctl fields to be initialized before use, this function
5773 * should be called as early as possible during attach activities.
5774 *
5775 * Any non-global sysctl marked as CTLFLAG_TUN should likely be initialized
5776 * here in this function, rather than later in ice_add_device_sysctls.
5777 *
5778 * To make things easier, this function is also expected to setup the various
5779 * sysctl nodes in addition to tunables so that other sysctls which can't be
5780 * initialized early can hook into the same nodes.
5781 */
5782void
5783ice_add_device_tunables(struct ice_softc *sc)
5784{
5785	device_t dev = sc->dev;
5786
5787	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5788	struct sysctl_oid_list *ctx_list =
5789		SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
5790
5791	sc->enable_health_events = ice_enable_health_events;
5792
5793	SYSCTL_ADD_BOOL(ctx, ctx_list, OID_AUTO, "enable_health_events",
5794			CTLFLAG_RDTUN, &sc->enable_health_events, 0,
5795			"Enable FW health event reporting for this PF");
5796
5797	/* Add a node to track VSI sysctls. Keep track of the node in the
5798	 * softc so that we can hook other sysctls into it later. This
5799	 * includes both the VSI statistics, as well as potentially dynamic
5800	 * VSIs in the future.
5801	 */
5802
5803	sc->vsi_sysctls = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "vsi",
5804					  CTLFLAG_RD, NULL, "VSI Configuration and Statistics");
5805
5806	/* Add debug tunables */
5807	ice_add_debug_tunables(sc);
5808}
5809
5810/**
5811 * ice_sysctl_dump_mac_filters - Dump a list of all HW MAC Filters
5812 * @oidp: sysctl oid structure
5813 * @arg1: pointer to private data structure
5814 * @arg2: unused
5815 * @req: sysctl request pointer
5816 *
5817 * Callback for "mac_filters" sysctl to dump the programmed MAC filters.
5818 */
5819static int
5820ice_sysctl_dump_mac_filters(SYSCTL_HANDLER_ARGS)
5821{
5822	struct ice_softc *sc = (struct ice_softc *)arg1;
5823	struct ice_hw *hw = &sc->hw;
5824	struct ice_switch_info *sw = hw->switch_info;
5825	struct ice_fltr_mgmt_list_entry *fm_entry;
5826	struct ice_list_head *rule_head;
5827	struct ice_lock *rule_lock;
5828	struct ice_fltr_info *fi;
5829	struct sbuf *sbuf;
5830	int ret;
5831
5832	UNREFERENCED_PARAMETER(oidp);
5833	UNREFERENCED_PARAMETER(arg2);
5834
5835	if (ice_driver_is_detaching(sc))
5836		return (ESHUTDOWN);
5837
5838	/* Wire the old buffer so we can take a non-sleepable lock */
5839	ret = sysctl_wire_old_buffer(req, 0);
5840	if (ret)
5841		return (ret);
5842
5843	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5844
5845	rule_lock = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock;
5846	rule_head = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
5847
5848	sbuf_printf(sbuf, "MAC Filter List");
5849
5850	ice_acquire_lock(rule_lock);
5851
5852	LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) {
5853		fi = &fm_entry->fltr_info;
5854
5855		sbuf_printf(sbuf,
5856			    "\nmac = %6D, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %d",
5857			    fi->l_data.mac.mac_addr, ":", fi->vsi_handle,
5858			    ice_fltr_flag_str(fi->flag), fi->lb_en, fi->lan_en,
5859			    ice_fwd_act_str(fi->fltr_act), fi->fltr_rule_id);
5860
5861		/* if we have a vsi_list_info, print some information about that */
5862		if (fm_entry->vsi_list_info) {
5863			sbuf_printf(sbuf,
5864				    ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d",
5865				    fm_entry->vsi_count,
5866				    fm_entry->vsi_list_info->vsi_list_id,
5867				    fm_entry->vsi_list_info->ref_cnt);
5868		}
5869	}
5870
5871	ice_release_lock(rule_lock);
5872
5873	sbuf_finish(sbuf);
5874	sbuf_delete(sbuf);
5875
5876	return (0);
5877}
5878
5879/**
5880 * ice_sysctl_dump_vlan_filters - Dump a list of all HW VLAN Filters
5881 * @oidp: sysctl oid structure
5882 * @arg1: pointer to private data structure
5883 * @arg2: unused
5884 * @req: sysctl request pointer
5885 *
5886 * Callback for "vlan_filters" sysctl to dump the programmed VLAN filters.
5887 */
5888static int
5889ice_sysctl_dump_vlan_filters(SYSCTL_HANDLER_ARGS)
5890{
5891	struct ice_softc *sc = (struct ice_softc *)arg1;
5892	struct ice_hw *hw = &sc->hw;
5893	struct ice_switch_info *sw = hw->switch_info;
5894	struct ice_fltr_mgmt_list_entry *fm_entry;
5895	struct ice_list_head *rule_head;
5896	struct ice_lock *rule_lock;
5897	struct ice_fltr_info *fi;
5898	struct sbuf *sbuf;
5899	int ret;
5900
5901	UNREFERENCED_PARAMETER(oidp);
5902	UNREFERENCED_PARAMETER(arg2);
5903
5904	if (ice_driver_is_detaching(sc))
5905		return (ESHUTDOWN);
5906
5907	/* Wire the old buffer so we can take a non-sleepable lock */
5908	ret = sysctl_wire_old_buffer(req, 0);
5909	if (ret)
5910		return (ret);
5911
5912	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5913
5914	rule_lock = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rule_lock;
5915	rule_head = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rules;
5916
5917	sbuf_printf(sbuf, "VLAN Filter List");
5918
5919	ice_acquire_lock(rule_lock);
5920
5921	LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) {
5922		fi = &fm_entry->fltr_info;
5923
5924		sbuf_printf(sbuf,
5925			    "\nvlan_id = %4d, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %4d",
5926			    fi->l_data.vlan.vlan_id, fi->vsi_handle,
5927			    ice_fltr_flag_str(fi->flag), fi->lb_en, fi->lan_en,
5928			    ice_fwd_act_str(fi->fltr_act), fi->fltr_rule_id);
5929
5930		/* if we have a vsi_list_info, print some information about that */
5931		if (fm_entry->vsi_list_info) {
5932			sbuf_printf(sbuf,
5933				    ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d",
5934				    fm_entry->vsi_count,
5935				    fm_entry->vsi_list_info->vsi_list_id,
5936				    fm_entry->vsi_list_info->ref_cnt);
5937		}
5938	}
5939
5940	ice_release_lock(rule_lock);
5941
5942	sbuf_finish(sbuf);
5943	sbuf_delete(sbuf);
5944
5945	return (0);
5946}
5947
5948/**
5949 * ice_sysctl_dump_ethertype_filters - Dump a list of all HW Ethertype filters
5950 * @oidp: sysctl oid structure
5951 * @arg1: pointer to private data structure
5952 * @arg2: unused
5953 * @req: sysctl request pointer
5954 *
5955 * Callback for "ethertype_filters" sysctl to dump the programmed Ethertype
5956 * filters.
5957 */
5958static int
5959ice_sysctl_dump_ethertype_filters(SYSCTL_HANDLER_ARGS)
5960{
5961	struct ice_softc *sc = (struct ice_softc *)arg1;
5962	struct ice_hw *hw = &sc->hw;
5963	struct ice_switch_info *sw = hw->switch_info;
5964	struct ice_fltr_mgmt_list_entry *fm_entry;
5965	struct ice_list_head *rule_head;
5966	struct ice_lock *rule_lock;
5967	struct ice_fltr_info *fi;
5968	struct sbuf *sbuf;
5969	int ret;
5970
5971	UNREFERENCED_PARAMETER(oidp);
5972	UNREFERENCED_PARAMETER(arg2);
5973
5974	if (ice_driver_is_detaching(sc))
5975		return (ESHUTDOWN);
5976
5977	/* Wire the old buffer so we can take a non-sleepable lock */
5978	ret = sysctl_wire_old_buffer(req, 0);
5979	if (ret)
5980		return (ret);
5981
5982	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5983
5984	rule_lock = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE].filt_rule_lock;
5985	rule_head = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE].filt_rules;
5986
5987	sbuf_printf(sbuf, "Ethertype Filter List");
5988
5989	ice_acquire_lock(rule_lock);
5990
5991	LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) {
5992		fi = &fm_entry->fltr_info;
5993
5994		sbuf_printf(sbuf,
5995			    "\nethertype = 0x%04x, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %4d",
5996			fi->l_data.ethertype_mac.ethertype,
5997			fi->vsi_handle, ice_fltr_flag_str(fi->flag),
5998			fi->lb_en, fi->lan_en, ice_fwd_act_str(fi->fltr_act),
5999			fi->fltr_rule_id);
6000
6001		/* if we have a vsi_list_info, print some information about that */
6002		if (fm_entry->vsi_list_info) {
6003			sbuf_printf(sbuf,
6004				    ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d",
6005				    fm_entry->vsi_count,
6006				    fm_entry->vsi_list_info->vsi_list_id,
6007				    fm_entry->vsi_list_info->ref_cnt);
6008		}
6009	}
6010
6011	ice_release_lock(rule_lock);
6012
6013	sbuf_finish(sbuf);
6014	sbuf_delete(sbuf);
6015
6016	return (0);
6017}
6018
6019/**
6020 * ice_sysctl_dump_ethertype_mac_filters - Dump a list of all HW Ethertype/MAC filters
6021 * @oidp: sysctl oid structure
6022 * @arg1: pointer to private data structure
6023 * @arg2: unused
6024 * @req: sysctl request pointer
6025 *
6026 * Callback for "ethertype_mac_filters" sysctl to dump the programmed
6027 * Ethertype/MAC filters.
6028 */
6029static int
6030ice_sysctl_dump_ethertype_mac_filters(SYSCTL_HANDLER_ARGS)
6031{
6032	struct ice_softc *sc = (struct ice_softc *)arg1;
6033	struct ice_hw *hw = &sc->hw;
6034	struct ice_switch_info *sw = hw->switch_info;
6035	struct ice_fltr_mgmt_list_entry *fm_entry;
6036	struct ice_list_head *rule_head;
6037	struct ice_lock *rule_lock;
6038	struct ice_fltr_info *fi;
6039	struct sbuf *sbuf;
6040	int ret;
6041
6042	UNREFERENCED_PARAMETER(oidp);
6043	UNREFERENCED_PARAMETER(arg2);
6044
6045	if (ice_driver_is_detaching(sc))
6046		return (ESHUTDOWN);
6047
6048	/* Wire the old buffer so we can take a non-sleepable lock */
6049	ret = sysctl_wire_old_buffer(req, 0);
6050	if (ret)
6051		return (ret);
6052
6053	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6054
6055	rule_lock = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE_MAC].filt_rule_lock;
6056	rule_head = &sw->recp_list[ICE_SW_LKUP_ETHERTYPE_MAC].filt_rules;
6057
6058	sbuf_printf(sbuf, "Ethertype/MAC Filter List");
6059
6060	ice_acquire_lock(rule_lock);
6061
6062	LIST_FOR_EACH_ENTRY(fm_entry, rule_head, ice_fltr_mgmt_list_entry, list_entry) {
6063		fi = &fm_entry->fltr_info;
6064
6065		sbuf_printf(sbuf,
6066			    "\nethertype = 0x%04x, mac = %6D, vsi_handle = %3d, fw_act_flag = %5s, lb_en = %1d, lan_en = %1d, fltr_act = %15s, fltr_rule_id = %4d",
6067			    fi->l_data.ethertype_mac.ethertype,
6068			    fi->l_data.ethertype_mac.mac_addr, ":",
6069			    fi->vsi_handle, ice_fltr_flag_str(fi->flag),
6070			    fi->lb_en, fi->lan_en, ice_fwd_act_str(fi->fltr_act),
6071			    fi->fltr_rule_id);
6072
6073		/* if we have a vsi_list_info, print some information about that */
6074		if (fm_entry->vsi_list_info) {
6075			sbuf_printf(sbuf,
6076				    ", vsi_count = %3d, vsi_list_id = %3d, ref_cnt = %3d",
6077				    fm_entry->vsi_count,
6078				    fm_entry->vsi_list_info->vsi_list_id,
6079				    fm_entry->vsi_list_info->ref_cnt);
6080		}
6081	}
6082
6083	ice_release_lock(rule_lock);
6084
6085	sbuf_finish(sbuf);
6086	sbuf_delete(sbuf);
6087
6088	return (0);
6089}
6090
6091/**
6092 * ice_sysctl_dump_state_flags - Dump device driver state flags
6093 * @oidp: sysctl oid structure
6094 * @arg1: pointer to private data structure
6095 * @arg2: unused
6096 * @req: sysctl request pointer
6097 *
6098 * Callback for "state" sysctl to display currently set driver state flags.
6099 */
6100static int
6101ice_sysctl_dump_state_flags(SYSCTL_HANDLER_ARGS)
6102{
6103	struct ice_softc *sc = (struct ice_softc *)arg1;
6104	struct sbuf *sbuf;
6105	u32 copied_state;
6106	unsigned int i;
6107	bool at_least_one = false;
6108
6109	UNREFERENCED_PARAMETER(oidp);
6110	UNREFERENCED_PARAMETER(arg2);
6111
6112	if (ice_driver_is_detaching(sc))
6113		return (ESHUTDOWN);
6114
6115	/* Make a copy of the state to ensure we display coherent values */
6116	copied_state = atomic_load_acq_32(&sc->state);
6117
6118	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6119
6120	/* Add the string for each set state to the sbuf */
6121	for (i = 0; i < 32; i++) {
6122		if (copied_state & BIT(i)) {
6123			const char *str = ice_state_to_str((enum ice_state)i);
6124
6125			at_least_one = true;
6126
6127			if (str)
6128				sbuf_printf(sbuf, "\n%s", str);
6129			else
6130				sbuf_printf(sbuf, "\nBIT(%u)", i);
6131		}
6132	}
6133
6134	if (!at_least_one)
6135		sbuf_printf(sbuf, "Nothing set");
6136
6137	sbuf_finish(sbuf);
6138	sbuf_delete(sbuf);
6139
6140	return (0);
6141}
6142
6143#define ICE_SYSCTL_DEBUG_MASK_HELP \
6144"\nSelect debug statements to print to kernel messages"		\
6145"\nFlags:"							\
6146"\n\t        0x1 - Function Tracing"				\
6147"\n\t        0x2 - Driver Initialization"			\
6148"\n\t        0x4 - Release"					\
6149"\n\t        0x8 - FW Logging"					\
6150"\n\t       0x10 - Link"					\
6151"\n\t       0x20 - PHY"						\
6152"\n\t       0x40 - Queue Context"				\
6153"\n\t       0x80 - NVM"						\
6154"\n\t      0x100 - LAN"						\
6155"\n\t      0x200 - Flow"					\
6156"\n\t      0x400 - DCB"						\
6157"\n\t      0x800 - Diagnostics"					\
6158"\n\t     0x1000 - Flow Director"				\
6159"\n\t     0x2000 - Switch"					\
6160"\n\t     0x4000 - Scheduler"					\
6161"\n\t     0x8000 - RDMA"					\
6162"\n\t    0x10000 - DDP Package"					\
6163"\n\t    0x20000 - Resources"					\
6164"\n\t    0x40000 - ACL"						\
6165"\n\t    0x80000 - PTP"						\
6166"\n\t   0x100000 - Admin Queue messages"			\
6167"\n\t   0x200000 - Admin Queue descriptors"			\
6168"\n\t   0x400000 - Admin Queue descriptor buffers"		\
6169"\n\t   0x800000 - Admin Queue commands"			\
6170"\n\t  0x1000000 - Parser"					\
6171"\n\t  ..."							\
6172"\n\t  0x8000000 - (Reserved for user)"				\
6173"\n\t"								\
6174"\nUse \"sysctl -x\" to view flags properly."
6175
6176/**
6177 * ice_add_debug_tunables - Add tunables helpful for debugging the device driver
6178 * @sc: device private structure
6179 *
6180 * Add sysctl tunable values related to debugging the device driver. For now,
6181 * this means a tunable to set the debug mask early during driver load.
6182 *
6183 * The debug node will be marked CTLFLAG_SKIP unless INVARIANTS is defined, so
6184 * that in normal kernel builds, these will all be hidden, but on a debug
6185 * kernel they will be more easily visible.
6186 */
6187static void
6188ice_add_debug_tunables(struct ice_softc *sc)
6189{
6190	struct sysctl_oid_list *debug_list;
6191	device_t dev = sc->dev;
6192
6193	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
6194	struct sysctl_oid_list *ctx_list =
6195	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
6196
6197	sc->debug_sysctls = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "debug",
6198					    ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6199					    NULL, "Debug Sysctls");
6200	debug_list = SYSCTL_CHILDREN(sc->debug_sysctls);
6201
6202	SYSCTL_ADD_U64(ctx, debug_list, OID_AUTO, "debug_mask",
6203		       ICE_CTLFLAG_DEBUG | CTLFLAG_RWTUN,
6204		       &sc->hw.debug_mask, 0,
6205		       ICE_SYSCTL_DEBUG_MASK_HELP);
6206
6207	/* Load the default value from the global sysctl first */
6208	sc->enable_tx_fc_filter = ice_enable_tx_fc_filter;
6209
6210	SYSCTL_ADD_BOOL(ctx, debug_list, OID_AUTO, "enable_tx_fc_filter",
6211			ICE_CTLFLAG_DEBUG | CTLFLAG_RDTUN,
6212			&sc->enable_tx_fc_filter, 0,
6213			"Drop Ethertype 0x8808 control frames originating from software on this PF");
6214
6215	sc->tx_balance_en = ice_tx_balance_en;
6216	SYSCTL_ADD_BOOL(ctx, debug_list, OID_AUTO, "tx_balance",
6217			ICE_CTLFLAG_DEBUG | CTLFLAG_RWTUN,
6218			&sc->tx_balance_en, 0,
6219			"Enable 5-layer scheduler topology");
6220
6221	/* Load the default value from the global sysctl first */
6222	sc->enable_tx_lldp_filter = ice_enable_tx_lldp_filter;
6223
6224	SYSCTL_ADD_BOOL(ctx, debug_list, OID_AUTO, "enable_tx_lldp_filter",
6225			ICE_CTLFLAG_DEBUG | CTLFLAG_RDTUN,
6226			&sc->enable_tx_lldp_filter, 0,
6227			"Drop Ethertype 0x88cc LLDP frames originating from software on this PF");
6228
6229	ice_add_fw_logging_tunables(sc, sc->debug_sysctls);
6230}
6231
6232#define ICE_SYSCTL_HELP_REQUEST_RESET		\
6233"\nRequest the driver to initiate a reset."	\
6234"\n\tpfr - Initiate a PF reset"			\
6235"\n\tcorer - Initiate a CORE reset"		\
6236"\n\tglobr - Initiate a GLOBAL reset"
6237
6238/**
6239 * @var rl_sysctl_ticks
6240 * @brief timestamp for latest reset request sysctl call
6241 *
6242 * Helps rate-limit the call to the sysctl which resets the device
6243 */
6244int rl_sysctl_ticks = 0;
6245
6246/**
6247 * ice_sysctl_request_reset - Request that the driver initiate a reset
6248 * @oidp: sysctl oid structure
6249 * @arg1: pointer to private data structure
6250 * @arg2: unused
6251 * @req: sysctl request pointer
6252 *
6253 * Callback for "request_reset" sysctl to request that the driver initiate
6254 * a reset. Expects to be passed one of the following strings
6255 *
6256 * "pfr" - Initiate a PF reset
6257 * "corer" - Initiate a CORE reset
6258 * "globr" - Initiate a Global reset
6259 */
6260static int
6261ice_sysctl_request_reset(SYSCTL_HANDLER_ARGS)
6262{
6263	struct ice_softc *sc = (struct ice_softc *)arg1;
6264	struct ice_hw *hw = &sc->hw;
6265	enum ice_status status;
6266	enum ice_reset_req reset_type = ICE_RESET_INVAL;
6267	const char *reset_message;
6268	int ret;
6269
6270	/* Buffer to store the requested reset string. Must contain enough
6271	 * space to store the largest expected reset string, which currently
6272	 * means 6 bytes of space.
6273	 */
6274	char reset[6] = "";
6275
6276	UNREFERENCED_PARAMETER(arg2);
6277
6278	ret = priv_check(curthread, PRIV_DRIVER);
6279	if (ret)
6280		return (ret);
6281
6282	if (ice_driver_is_detaching(sc))
6283		return (ESHUTDOWN);
6284
6285	/* Read in the requested reset type. */
6286	ret = sysctl_handle_string(oidp, reset, sizeof(reset), req);
6287	if ((ret) || (req->newptr == NULL))
6288		return (ret);
6289
6290	if (strcmp(reset, "pfr") == 0) {
6291		reset_message = "Requesting a PF reset";
6292		reset_type = ICE_RESET_PFR;
6293	} else if (strcmp(reset, "corer") == 0) {
6294		reset_message = "Initiating a CORE reset";
6295		reset_type = ICE_RESET_CORER;
6296	} else if (strcmp(reset, "globr") == 0) {
6297		reset_message = "Initiating a GLOBAL reset";
6298		reset_type = ICE_RESET_GLOBR;
6299	} else if (strcmp(reset, "empr") == 0) {
6300		device_printf(sc->dev, "Triggering an EMP reset via software is not currently supported\n");
6301		return (EOPNOTSUPP);
6302	}
6303
6304	if (reset_type == ICE_RESET_INVAL) {
6305		device_printf(sc->dev, "%s is not a valid reset request\n", reset);
6306		return (EINVAL);
6307	}
6308
6309	/*
6310	 * Rate-limit the frequency at which this function is called.
6311	 * Assuming this is called successfully once, typically,
6312	 * everything should be handled within the allotted time frame.
6313	 * However, in the odd setup situations, we've also put in
6314	 * guards for when the reset has finished, but we're in the
6315	 * process of rebuilding. And instead of queueing an intent,
6316	 * simply error out and let the caller retry, if so desired.
6317	 */
6318	if (TICKS_2_MSEC(ticks - rl_sysctl_ticks) < 500) {
6319		device_printf(sc->dev,
6320		    "Call frequency too high. Operation aborted.\n");
6321		return (EBUSY);
6322	}
6323	rl_sysctl_ticks = ticks;
6324
6325	if (TICKS_2_MSEC(ticks - sc->rebuild_ticks) < 100) {
6326		device_printf(sc->dev, "Device rebuilding. Operation aborted.\n");
6327		return (EBUSY);
6328	}
6329
6330	if (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_DEVSTATE_M) {
6331		device_printf(sc->dev, "Device in reset. Operation aborted.\n");
6332		return (EBUSY);
6333	}
6334
6335	device_printf(sc->dev, "%s\n", reset_message);
6336
6337	/* Initiate the PF reset during the admin status task */
6338	if (reset_type == ICE_RESET_PFR) {
6339		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
6340		return (0);
6341	}
6342
6343	/*
6344	 * Other types of resets including CORE and GLOBAL resets trigger an
6345	 * interrupt on all PFs. Initiate the reset now. Preparation and
6346	 * rebuild logic will be handled by the admin status task.
6347	 */
6348	status = ice_reset(hw, reset_type);
6349
6350	/*
6351	 * Resets can take a long time and we still don't want another call
6352	 * to this function before we settle down.
6353	 */
6354	rl_sysctl_ticks = ticks;
6355
6356	if (status) {
6357		device_printf(sc->dev, "failed to initiate device reset, err %s\n",
6358			      ice_status_str(status));
6359		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
6360		return (EFAULT);
6361	}
6362
6363	return (0);
6364}
6365
6366#define ICE_AQC_DBG_DUMP_CLUSTER_ID_INVALID	(0xFFFFFF)
6367#define ICE_SYSCTL_HELP_FW_DEBUG_DUMP_CLUSTER_SETTING		\
6368"\nSelect clusters to dump with \"dump\" sysctl"		\
6369"\nFlags:"							\
6370"\n\t      0x1 - Switch"					\
6371"\n\t      0x2 - ACL"						\
6372"\n\t      0x4 - Tx Scheduler"					\
6373"\n\t      0x8 - Profile Configuration"			\
6374"\n\t     0x20 - Link"						\
6375"\n\t     0x80 - DCB"						\
6376"\n\t    0x100 - L2P"						\
6377"\n\t 0x400000 - Manageability Transactions"			\
6378"\n\t"								\
6379"\nUse \"sysctl -x\" to view flags properly."
6380
6381/**
6382 * ice_sysctl_fw_debug_dump_cluster_setting - Set which clusters to dump
6383 *     from FW when FW debug dump occurs
6384 * @oidp: sysctl oid structure
6385 * @arg1: pointer to private data structure
6386 * @arg2: unused
6387 * @req: sysctl request pointer
6388 */
6389static int
6390ice_sysctl_fw_debug_dump_cluster_setting(SYSCTL_HANDLER_ARGS)
6391{
6392	struct ice_softc *sc = (struct ice_softc *)arg1;
6393	device_t dev = sc->dev;
6394	u32 clusters;
6395	int ret;
6396
6397	UNREFERENCED_PARAMETER(arg2);
6398
6399	ret = priv_check(curthread, PRIV_DRIVER);
6400	if (ret)
6401		return (ret);
6402
6403	if (ice_driver_is_detaching(sc))
6404		return (ESHUTDOWN);
6405
6406	clusters = sc->fw_debug_dump_cluster_mask;
6407
6408	ret = sysctl_handle_32(oidp, &clusters, 0, req);
6409	if ((ret) || (req->newptr == NULL))
6410		return (ret);
6411
6412	if (clusters & ~(ICE_FW_DEBUG_DUMP_VALID_CLUSTER_MASK)) {
6413		device_printf(dev,
6414		    "%s: ERROR: Incorrect settings requested\n",
6415		    __func__);
6416		sc->fw_debug_dump_cluster_mask = ICE_AQC_DBG_DUMP_CLUSTER_ID_INVALID;
6417		return (EINVAL);
6418	}
6419
6420	sc->fw_debug_dump_cluster_mask = clusters;
6421
6422	return (0);
6423}
6424
6425#define ICE_FW_DUMP_AQ_COUNT_LIMIT	(10000)
6426
6427/**
6428 * ice_fw_debug_dump_print_cluster - Print formatted cluster data from FW
6429 * @sc: the device softc
6430 * @sbuf: initialized sbuf to print data to
6431 * @cluster_id: FW cluster ID to print data from
6432 *
6433 * Reads debug data from the specified cluster id in the FW and prints it to
6434 * the input sbuf. This function issues multiple AQ commands to the FW in
6435 * order to get all of the data in the cluster.
6436 *
6437 * @remark Only intended to be used by the sysctl handler
6438 * ice_sysctl_fw_debug_dump_do_dump
6439 */
6440static u16
6441ice_fw_debug_dump_print_cluster(struct ice_softc *sc, struct sbuf *sbuf, u16 cluster_id)
6442{
6443	struct ice_hw *hw = &sc->hw;
6444	device_t dev = sc->dev;
6445	u16 data_buf_size = ICE_AQ_MAX_BUF_LEN;
6446	const u8 reserved_buf[8] = {};
6447	enum ice_status status;
6448	int counter = 0;
6449	u8 *data_buf;
6450
6451	/* Input parameters / loop variables */
6452	u16 table_id = 0;
6453	u32 offset = 0;
6454
6455	/* Output from the Get Internal Data AQ command */
6456	u16 ret_buf_size = 0;
6457	u16 ret_next_cluster = 0;
6458	u16 ret_next_table = 0;
6459	u32 ret_next_index = 0;
6460
6461	/* Other setup */
6462	data_buf = (u8 *)malloc(data_buf_size, M_ICE, M_NOWAIT | M_ZERO);
6463	if (!data_buf)
6464		return ret_next_cluster;
6465
6466	ice_debug(hw, ICE_DBG_DIAG, "%s: dumping cluster id %d\n", __func__,
6467	    cluster_id);
6468
6469	for (;;) {
6470		/* Do not trust the FW behavior to be completely correct */
6471		if (counter++ >= ICE_FW_DUMP_AQ_COUNT_LIMIT) {
6472			device_printf(dev,
6473			    "%s: Exceeded counter limit for cluster %d\n",
6474			    __func__, cluster_id);
6475			break;
6476		}
6477
6478		ice_debug(hw, ICE_DBG_DIAG, "---\n");
6479		ice_debug(hw, ICE_DBG_DIAG,
6480		    "table_id 0x%04x offset 0x%08x buf_size %d\n",
6481		    table_id, offset, data_buf_size);
6482
6483		status = ice_aq_get_internal_data(hw, cluster_id, table_id,
6484		    offset, data_buf, data_buf_size, &ret_buf_size,
6485		    &ret_next_cluster, &ret_next_table, &ret_next_index, NULL);
6486		if (status) {
6487			device_printf(dev,
6488			    "%s: ice_aq_get_internal_data in cluster %d: err %s aq_err %s\n",
6489			    __func__, cluster_id, ice_status_str(status),
6490			    ice_aq_str(hw->adminq.sq_last_status));
6491			break;
6492		}
6493
6494		ice_debug(hw, ICE_DBG_DIAG,
6495		    "ret_table_id 0x%04x ret_offset 0x%08x ret_buf_size %d\n",
6496		    ret_next_table, ret_next_index, ret_buf_size);
6497
6498		/* Print cluster id */
6499		u32 print_cluster_id = (u32)cluster_id;
6500		sbuf_bcat(sbuf, &print_cluster_id, sizeof(print_cluster_id));
6501		/* Print table id */
6502		u32 print_table_id = (u32)table_id;
6503		sbuf_bcat(sbuf, &print_table_id, sizeof(print_table_id));
6504		/* Print table length */
6505		u32 print_table_length = (u32)ret_buf_size;
6506		sbuf_bcat(sbuf, &print_table_length, sizeof(print_table_length));
6507		/* Print current offset */
6508		u32 print_curr_offset = offset;
6509		sbuf_bcat(sbuf, &print_curr_offset, sizeof(print_curr_offset));
6510		/* Print reserved bytes */
6511		sbuf_bcat(sbuf, reserved_buf, sizeof(reserved_buf));
6512		/* Print data */
6513		sbuf_bcat(sbuf, data_buf, ret_buf_size);
6514
6515		/* Adjust loop variables */
6516		memset(data_buf, 0, data_buf_size);
6517		bool same_table_next = (table_id == ret_next_table);
6518		bool last_table_next = (ret_next_table == 0xff || ret_next_table == 0xffff);
6519		bool last_offset_next = (ret_next_index == 0xffffffff || ret_next_index == 0);
6520
6521		if ((!same_table_next && !last_offset_next) ||
6522		    (same_table_next && last_table_next)) {
6523			device_printf(dev,
6524			    "%s: Unexpected conditions for same_table_next(%d) last_table_next(%d) last_offset_next(%d), ending cluster (%d)\n",
6525			    __func__, same_table_next, last_table_next, last_offset_next, cluster_id);
6526			break;
6527		}
6528
6529		if (!same_table_next && !last_table_next && last_offset_next) {
6530			/* We've hit the end of the table */
6531			table_id = ret_next_table;
6532			offset = 0;
6533		}
6534		else if (!same_table_next && last_table_next && last_offset_next) {
6535			/* We've hit the end of the cluster */
6536			break;
6537		}
6538		else if (same_table_next && !last_table_next && last_offset_next) {
6539			if (cluster_id == 0x1 && table_id < 39)
6540				table_id += 1;
6541			else
6542				break;
6543		}
6544		else { /* if (same_table_next && !last_table_next && !last_offset_next) */
6545			/* More data left in the table */
6546			offset = ret_next_index;
6547		}
6548	}
6549
6550	free(data_buf, M_ICE);
6551	return ret_next_cluster;
6552}
6553
6554#define ICE_SYSCTL_HELP_FW_DEBUG_DUMP_DO_DUMP \
6555"\nWrite 1 to output a FW debug dump containing the clusters specified by the \"clusters\" sysctl" \
6556"\nThe \"-b\" flag must be used in order to dump this data as binary data because" \
6557"\nthis data is opaque and not a string."
6558
6559#define ICE_FW_DUMP_BASE_TEXT_SIZE	(1024 * 1024)
6560#define ICE_FW_DUMP_ALL_TEXT_SIZE	(10 * 1024 * 1024)
6561#define ICE_FW_DUMP_CLUST0_TEXT_SIZE	(2 * 1024 * 1024)
6562#define ICE_FW_DUMP_CLUST1_TEXT_SIZE	(128 * 1024)
6563#define ICE_FW_DUMP_CLUST2_TEXT_SIZE	(2 * 1024 * 1024)
6564
6565/**
6566 * ice_sysctl_fw_debug_dump_do_dump - Dump data from FW to sysctl output
6567 * @oidp: sysctl oid structure
6568 * @arg1: pointer to private data structure
6569 * @arg2: unused
6570 * @req: sysctl request pointer
6571 *
6572 * Sysctl handler for the debug.dump.dump sysctl. Prints out a specially-
6573 * formatted dump of some debug FW data intended to be processed by a special
6574 * Intel tool. Prints out the cluster data specified by the "clusters"
6575 * sysctl.
6576 *
6577 * @remark The actual AQ calls and printing are handled by a helper
6578 * function above.
6579 */
6580static int
6581ice_sysctl_fw_debug_dump_do_dump(SYSCTL_HANDLER_ARGS)
6582{
6583	struct ice_softc *sc = (struct ice_softc *)arg1;
6584	device_t dev = sc->dev;
6585	struct sbuf *sbuf;
6586	int bit, ret;
6587
6588	UNREFERENCED_PARAMETER(arg2);
6589
6590	ret = priv_check(curthread, PRIV_DRIVER);
6591	if (ret)
6592		return (ret);
6593
6594	if (ice_driver_is_detaching(sc))
6595		return (ESHUTDOWN);
6596
6597	/* If the user hasn't written "1" to this sysctl yet: */
6598	if (!ice_test_state(&sc->state, ICE_STATE_DO_FW_DEBUG_DUMP)) {
6599		/* Avoid output on the first set of reads to this sysctl in
6600		 * order to prevent a null byte from being written to the
6601		 * end result when called via sysctl(8).
6602		 */
6603		if (req->oldptr == NULL && req->newptr == NULL) {
6604			ret = SYSCTL_OUT(req, 0, 0);
6605			return (ret);
6606		}
6607
6608		char input_buf[2] = "";
6609		ret = sysctl_handle_string(oidp, input_buf, sizeof(input_buf), req);
6610		if ((ret) || (req->newptr == NULL))
6611			return (ret);
6612
6613		/* If we get '1', then indicate we'll do a dump in the next
6614		 * sysctl read call.
6615		 */
6616		if (input_buf[0] == '1') {
6617			if (sc->fw_debug_dump_cluster_mask == ICE_AQC_DBG_DUMP_CLUSTER_ID_INVALID) {
6618				device_printf(dev,
6619				    "%s: Debug Dump failed because an invalid cluster was specified.\n",
6620				    __func__);
6621				return (EINVAL);
6622			}
6623
6624			ice_set_state(&sc->state, ICE_STATE_DO_FW_DEBUG_DUMP);
6625			return (0);
6626		}
6627
6628		return (EINVAL);
6629	}
6630
6631	/* --- FW debug dump state is set --- */
6632
6633
6634	/* Caller just wants the upper bound for size */
6635	if (req->oldptr == NULL && req->newptr == NULL) {
6636		size_t est_output_len = ICE_FW_DUMP_BASE_TEXT_SIZE;
6637		if (sc->fw_debug_dump_cluster_mask == 0)
6638			est_output_len += ICE_FW_DUMP_ALL_TEXT_SIZE;
6639		else {
6640			if (sc->fw_debug_dump_cluster_mask & 0x1)
6641				est_output_len += ICE_FW_DUMP_CLUST0_TEXT_SIZE;
6642			if (sc->fw_debug_dump_cluster_mask & 0x2)
6643				est_output_len += ICE_FW_DUMP_CLUST1_TEXT_SIZE;
6644			if (sc->fw_debug_dump_cluster_mask & 0x4)
6645				est_output_len += ICE_FW_DUMP_CLUST2_TEXT_SIZE;
6646		}
6647
6648		ret = SYSCTL_OUT(req, 0, est_output_len);
6649		return (ret);
6650	}
6651
6652	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6653	sbuf_clear_flags(sbuf, SBUF_INCLUDENUL);
6654
6655	ice_debug(&sc->hw, ICE_DBG_DIAG, "%s: Debug Dump running...\n", __func__);
6656
6657	if (sc->fw_debug_dump_cluster_mask) {
6658		for_each_set_bit(bit, &sc->fw_debug_dump_cluster_mask,
6659		    sizeof(sc->fw_debug_dump_cluster_mask) * 8)
6660			ice_fw_debug_dump_print_cluster(sc, sbuf, bit);
6661	} else {
6662		u16 next_cluster_id = 0;
6663		/* We don't support QUEUE_MNG and FULL_CSR_SPACE */
6664		do {
6665			next_cluster_id = ice_fw_debug_dump_print_cluster(sc, sbuf, next_cluster_id);
6666		} while (next_cluster_id != 0 && next_cluster_id < ICE_AQC_DBG_DUMP_CLUSTER_ID_QUEUE_MNG);
6667	}
6668
6669	sbuf_finish(sbuf);
6670	sbuf_delete(sbuf);
6671
6672	ice_clear_state(&sc->state, ICE_STATE_DO_FW_DEBUG_DUMP);
6673	return (ret);
6674}
6675
6676/**
6677 * ice_add_debug_sysctls - Add sysctls helpful for debugging the device driver
6678 * @sc: device private structure
6679 *
6680 * Add sysctls related to debugging the device driver. Generally these should
6681 * simply be sysctls which dump internal driver state, to aid in understanding
6682 * what the driver is doing.
6683 */
6684static void
6685ice_add_debug_sysctls(struct ice_softc *sc)
6686{
6687	struct sysctl_oid *sw_node, *dump_node;
6688	struct sysctl_oid_list *debug_list, *sw_list, *dump_list;
6689	device_t dev = sc->dev;
6690
6691	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
6692
6693	debug_list = SYSCTL_CHILDREN(sc->debug_sysctls);
6694
6695	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "request_reset",
6696			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_WR, sc, 0,
6697			ice_sysctl_request_reset, "A",
6698			ICE_SYSCTL_HELP_REQUEST_RESET);
6699
6700	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "pfr_count",
6701		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6702		       &sc->soft_stats.pfr_count, 0,
6703		       "# of PF resets handled");
6704
6705	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "corer_count",
6706		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6707		       &sc->soft_stats.corer_count, 0,
6708		       "# of CORE resets handled");
6709
6710	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "globr_count",
6711		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6712		       &sc->soft_stats.globr_count, 0,
6713		       "# of Global resets handled");
6714
6715	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "empr_count",
6716		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6717		       &sc->soft_stats.empr_count, 0,
6718		       "# of EMP resets handled");
6719
6720	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "tx_mdd_count",
6721		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6722		       &sc->soft_stats.tx_mdd_count, 0,
6723		       "# of Tx MDD events detected");
6724
6725	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "rx_mdd_count",
6726		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD,
6727		       &sc->soft_stats.rx_mdd_count, 0,
6728		       "# of Rx MDD events detected");
6729
6730	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "state",
6731			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6732			ice_sysctl_dump_state_flags, "A",
6733			"Driver State Flags");
6734
6735	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "set_link",
6736			ICE_CTLFLAG_DEBUG | CTLTYPE_U8 | CTLFLAG_RW, sc, 0,
6737			ice_sysctl_debug_set_link, "CU", "Set link");
6738
6739	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_type_low",
6740			ICE_CTLFLAG_DEBUG | CTLTYPE_U64 | CTLFLAG_RW, sc, 0,
6741			ice_sysctl_phy_type_low, "QU",
6742			"PHY type Low from Get PHY Caps/Set PHY Cfg");
6743
6744	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_type_high",
6745			ICE_CTLFLAG_DEBUG | CTLTYPE_U64 | CTLFLAG_RW, sc, 0,
6746			ice_sysctl_phy_type_high, "QU",
6747			"PHY type High from Get PHY Caps/Set PHY Cfg");
6748
6749	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_sw_caps",
6750			ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0,
6751			ice_sysctl_phy_sw_caps, "",
6752			"Get PHY Capabilities (Software configuration)");
6753
6754	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_nvm_caps",
6755			ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0,
6756			ice_sysctl_phy_nvm_caps, "",
6757			"Get PHY Capabilities (NVM configuration)");
6758
6759	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_topo_caps",
6760			ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0,
6761			ice_sysctl_phy_topo_caps, "",
6762			"Get PHY Capabilities (Topology configuration)");
6763
6764	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_link_status",
6765			ICE_CTLFLAG_DEBUG | CTLTYPE_STRUCT | CTLFLAG_RD, sc, 0,
6766			ice_sysctl_phy_link_status, "",
6767			"Get PHY Link Status");
6768
6769	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "read_i2c_diag_data",
6770			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6771			ice_sysctl_read_i2c_diag_data, "A",
6772			"Dump selected diagnostic data from FW");
6773
6774	SYSCTL_ADD_U32(ctx, debug_list, OID_AUTO, "fw_build",
6775		       ICE_CTLFLAG_DEBUG | CTLFLAG_RD, &sc->hw.fw_build, 0,
6776		       "FW Build ID");
6777
6778	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "os_ddp_version",
6779			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6780			ice_sysctl_os_pkg_version, "A",
6781			"DDP package name and version found in ice_ddp");
6782
6783	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "cur_lldp_persist_status",
6784			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6785			ice_sysctl_fw_cur_lldp_persist_status, "A",
6786			"Current LLDP persistent status");
6787
6788	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "dflt_lldp_persist_status",
6789			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6790			ice_sysctl_fw_dflt_lldp_persist_status, "A",
6791			"Default LLDP persistent status");
6792
6793	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "negotiated_fc",
6794			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6795			ice_sysctl_negotiated_fc, "A",
6796			"Current Negotiated Flow Control mode");
6797
6798	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "local_dcbx_cfg",
6799			CTLTYPE_STRING | CTLFLAG_RD, sc, ICE_AQ_LLDP_MIB_LOCAL,
6800			ice_sysctl_dump_dcbx_cfg, "A",
6801			"Dumps Local MIB information from firmware");
6802
6803	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "remote_dcbx_cfg",
6804			CTLTYPE_STRING | CTLFLAG_RD, sc, ICE_AQ_LLDP_MIB_REMOTE,
6805			ice_sysctl_dump_dcbx_cfg, "A",
6806			"Dumps Remote MIB information from firmware");
6807
6808	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "pf_vsi_cfg", CTLTYPE_STRING | CTLFLAG_RD,
6809			sc, 0, ice_sysctl_dump_vsi_cfg, "A",
6810			"Dumps Selected PF VSI parameters from firmware");
6811
6812	SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "query_port_ets", CTLTYPE_STRING | CTLFLAG_RD,
6813			sc, 0, ice_sysctl_query_port_ets, "A",
6814			"Prints selected output from Query Port ETS AQ command");
6815
6816	sw_node = SYSCTL_ADD_NODE(ctx, debug_list, OID_AUTO, "switch",
6817				  ICE_CTLFLAG_DEBUG | CTLFLAG_RD, NULL,
6818				  "Switch Configuration");
6819	sw_list = SYSCTL_CHILDREN(sw_node);
6820
6821	SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "mac_filters",
6822			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6823			ice_sysctl_dump_mac_filters, "A",
6824			"MAC Filters");
6825
6826	SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "vlan_filters",
6827			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6828			ice_sysctl_dump_vlan_filters, "A",
6829			"VLAN Filters");
6830
6831	SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "ethertype_filters",
6832			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6833			ice_sysctl_dump_ethertype_filters, "A",
6834			"Ethertype Filters");
6835
6836	SYSCTL_ADD_PROC(ctx, sw_list, OID_AUTO, "ethertype_mac_filters",
6837			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6838			ice_sysctl_dump_ethertype_mac_filters, "A",
6839			"Ethertype/MAC Filters");
6840
6841	dump_node = SYSCTL_ADD_NODE(ctx, debug_list, OID_AUTO, "dump",
6842				  ICE_CTLFLAG_DEBUG | CTLFLAG_RD, NULL,
6843				  "Internal FW Dump");
6844	dump_list = SYSCTL_CHILDREN(dump_node);
6845
6846	SYSCTL_ADD_PROC(ctx, dump_list, OID_AUTO, "clusters",
6847			ICE_CTLFLAG_DEBUG | CTLTYPE_U32 | CTLFLAG_RW, sc, 0,
6848			ice_sysctl_fw_debug_dump_cluster_setting, "SU",
6849			ICE_SYSCTL_HELP_FW_DEBUG_DUMP_CLUSTER_SETTING);
6850
6851	SYSCTL_ADD_PROC(ctx, dump_list, OID_AUTO, "dump",
6852			ICE_CTLFLAG_DEBUG | CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
6853			ice_sysctl_fw_debug_dump_do_dump, "",
6854			ICE_SYSCTL_HELP_FW_DEBUG_DUMP_DO_DUMP);
6855}
6856
6857/**
6858 * ice_vsi_disable_tx - Disable (unconfigure) Tx queues for a VSI
6859 * @vsi: the VSI to disable
6860 *
6861 * Disables the Tx queues associated with this VSI. Essentially the opposite
6862 * of ice_cfg_vsi_for_tx.
6863 */
6864int
6865ice_vsi_disable_tx(struct ice_vsi *vsi)
6866{
6867	struct ice_softc *sc = vsi->sc;
6868	struct ice_hw *hw = &sc->hw;
6869	enum ice_status status;
6870	u32 *q_teids;
6871	u16 *q_ids, *q_handles;
6872	size_t q_teids_size, q_ids_size, q_handles_size;
6873	int tc, j, buf_idx, err = 0;
6874
6875	if (vsi->num_tx_queues > 255)
6876		return (ENOSYS);
6877
6878	q_teids_size = sizeof(*q_teids) * vsi->num_tx_queues;
6879	q_teids = (u32 *)malloc(q_teids_size, M_ICE, M_NOWAIT|M_ZERO);
6880	if (!q_teids)
6881		return (ENOMEM);
6882
6883	q_ids_size = sizeof(*q_ids) * vsi->num_tx_queues;
6884	q_ids = (u16 *)malloc(q_ids_size, M_ICE, M_NOWAIT|M_ZERO);
6885	if (!q_ids) {
6886		err = (ENOMEM);
6887		goto free_q_teids;
6888	}
6889
6890	q_handles_size = sizeof(*q_handles) * vsi->num_tx_queues;
6891	q_handles = (u16 *)malloc(q_handles_size, M_ICE, M_NOWAIT|M_ZERO);
6892	if (!q_handles) {
6893		err = (ENOMEM);
6894		goto free_q_ids;
6895	}
6896
6897	ice_for_each_traffic_class(tc) {
6898		struct ice_tc_info *tc_info = &vsi->tc_info[tc];
6899		u16 start_idx, end_idx;
6900
6901		/* Skip rest of disabled TCs once the first
6902		 * disabled TC is found */
6903		if (!(vsi->tc_map & BIT(tc)))
6904			break;
6905
6906		/* Fill out TX queue information for this TC */
6907		start_idx = tc_info->qoffset;
6908		end_idx = start_idx + tc_info->qcount_tx;
6909		buf_idx = 0;
6910		for (j = start_idx; j < end_idx; j++) {
6911			struct ice_tx_queue *txq = &vsi->tx_queues[j];
6912
6913			q_ids[buf_idx] = vsi->tx_qmap[j];
6914			q_handles[buf_idx] = txq->q_handle;
6915			q_teids[buf_idx] = txq->q_teid;
6916			buf_idx++;
6917		}
6918
6919		status = ice_dis_vsi_txq(hw->port_info, vsi->idx, tc, buf_idx,
6920					 q_handles, q_ids, q_teids, ICE_NO_RESET, 0, NULL);
6921		if (status == ICE_ERR_DOES_NOT_EXIST) {
6922			; /* Queues have already been disabled, no need to report this as an error */
6923		} else if (status == ICE_ERR_RESET_ONGOING) {
6924			device_printf(sc->dev,
6925				      "Reset in progress. LAN Tx queues already disabled\n");
6926			break;
6927		} else if (status) {
6928			device_printf(sc->dev,
6929				      "Failed to disable LAN Tx queues: err %s aq_err %s\n",
6930				      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
6931			err = (ENODEV);
6932			break;
6933		}
6934
6935		/* Clear buffers */
6936		memset(q_teids, 0, q_teids_size);
6937		memset(q_ids, 0, q_ids_size);
6938		memset(q_handles, 0, q_handles_size);
6939	}
6940
6941/* free_q_handles: */
6942	free(q_handles, M_ICE);
6943free_q_ids:
6944	free(q_ids, M_ICE);
6945free_q_teids:
6946	free(q_teids, M_ICE);
6947
6948	return err;
6949}
6950
6951/**
6952 * ice_vsi_set_rss_params - Set the RSS parameters for the VSI
6953 * @vsi: the VSI to configure
6954 *
6955 * Sets the RSS table size and lookup table type for the VSI based on its
6956 * VSI type.
6957 */
6958static void
6959ice_vsi_set_rss_params(struct ice_vsi *vsi)
6960{
6961	struct ice_softc *sc = vsi->sc;
6962	struct ice_hw_common_caps *cap;
6963
6964	cap = &sc->hw.func_caps.common_cap;
6965
6966	switch (vsi->type) {
6967	case ICE_VSI_PF:
6968		/* The PF VSI inherits RSS instance of the PF */
6969		vsi->rss_table_size = cap->rss_table_size;
6970		vsi->rss_lut_type = ICE_LUT_PF;
6971		break;
6972	case ICE_VSI_VF:
6973	case ICE_VSI_VMDQ2:
6974		vsi->rss_table_size = ICE_VSIQF_HLUT_ARRAY_SIZE;
6975		vsi->rss_lut_type = ICE_LUT_VSI;
6976		break;
6977	default:
6978		device_printf(sc->dev,
6979			      "VSI %d: RSS not supported for VSI type %d\n",
6980			      vsi->idx, vsi->type);
6981		break;
6982	}
6983}
6984
6985/**
6986 * ice_vsi_add_txqs_ctx - Create a sysctl context and node to store txq sysctls
6987 * @vsi: The VSI to add the context for
6988 *
6989 * Creates a sysctl context for storing txq sysctls. Additionally creates
6990 * a node rooted at the given VSI's main sysctl node. This context will be
6991 * used to store per-txq sysctls which may need to be released during the
6992 * driver's lifetime.
6993 */
6994void
6995ice_vsi_add_txqs_ctx(struct ice_vsi *vsi)
6996{
6997	struct sysctl_oid_list *vsi_list;
6998
6999	sysctl_ctx_init(&vsi->txqs_ctx);
7000
7001	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
7002
7003	vsi->txqs_node = SYSCTL_ADD_NODE(&vsi->txqs_ctx, vsi_list, OID_AUTO, "txqs",
7004					 CTLFLAG_RD, NULL, "Tx Queues");
7005}
7006
7007/**
7008 * ice_vsi_add_rxqs_ctx - Create a sysctl context and node to store rxq sysctls
7009 * @vsi: The VSI to add the context for
7010 *
7011 * Creates a sysctl context for storing rxq sysctls. Additionally creates
7012 * a node rooted at the given VSI's main sysctl node. This context will be
7013 * used to store per-rxq sysctls which may need to be released during the
7014 * driver's lifetime.
7015 */
7016void
7017ice_vsi_add_rxqs_ctx(struct ice_vsi *vsi)
7018{
7019	struct sysctl_oid_list *vsi_list;
7020
7021	sysctl_ctx_init(&vsi->rxqs_ctx);
7022
7023	vsi_list = SYSCTL_CHILDREN(vsi->vsi_node);
7024
7025	vsi->rxqs_node = SYSCTL_ADD_NODE(&vsi->rxqs_ctx, vsi_list, OID_AUTO, "rxqs",
7026					 CTLFLAG_RD, NULL, "Rx Queues");
7027}
7028
7029/**
7030 * ice_vsi_del_txqs_ctx - Delete the Tx queue sysctl context for this VSI
7031 * @vsi: The VSI to delete from
7032 *
7033 * Frees the txq sysctl context created for storing the per-queue Tx sysctls.
7034 * Must be called prior to freeing the Tx queue memory, in order to avoid
7035 * having sysctls point at stale memory.
7036 */
7037void
7038ice_vsi_del_txqs_ctx(struct ice_vsi *vsi)
7039{
7040	device_t dev = vsi->sc->dev;
7041	int err;
7042
7043	if (vsi->txqs_node) {
7044		err = sysctl_ctx_free(&vsi->txqs_ctx);
7045		if (err)
7046			device_printf(dev, "failed to free VSI %d txqs_ctx, err %s\n",
7047				      vsi->idx, ice_err_str(err));
7048		vsi->txqs_node = NULL;
7049	}
7050}
7051
7052/**
7053 * ice_vsi_del_rxqs_ctx - Delete the Rx queue sysctl context for this VSI
7054 * @vsi: The VSI to delete from
7055 *
7056 * Frees the rxq sysctl context created for storing the per-queue Rx sysctls.
7057 * Must be called prior to freeing the Rx queue memory, in order to avoid
7058 * having sysctls point at stale memory.
7059 */
7060void
7061ice_vsi_del_rxqs_ctx(struct ice_vsi *vsi)
7062{
7063	device_t dev = vsi->sc->dev;
7064	int err;
7065
7066	if (vsi->rxqs_node) {
7067		err = sysctl_ctx_free(&vsi->rxqs_ctx);
7068		if (err)
7069			device_printf(dev, "failed to free VSI %d rxqs_ctx, err %s\n",
7070				      vsi->idx, ice_err_str(err));
7071		vsi->rxqs_node = NULL;
7072	}
7073}
7074
7075/**
7076 * ice_add_txq_sysctls - Add per-queue sysctls for a Tx queue
7077 * @txq: pointer to the Tx queue
7078 *
7079* Add per-queue sysctls for a given Tx queue. Can't be called during
7080* ice_add_vsi_sysctls, since the queue memory has not yet been setup.
7081 */
7082void
7083ice_add_txq_sysctls(struct ice_tx_queue *txq)
7084{
7085	struct ice_vsi *vsi = txq->vsi;
7086	struct sysctl_ctx_list *ctx = &vsi->txqs_ctx;
7087	struct sysctl_oid_list *txqs_list, *this_txq_list;
7088	struct sysctl_oid *txq_node;
7089	char txq_name[32], txq_desc[32];
7090
7091	const struct ice_sysctl_info ctls[] = {
7092		{ &txq->stats.tx_packets, "tx_packets", "Queue Packets Transmitted" },
7093		{ &txq->stats.tx_bytes, "tx_bytes", "Queue Bytes Transmitted" },
7094		{ &txq->stats.mss_too_small, "mss_too_small", "TSO sends with an MSS less than 64" },
7095		{ 0, 0, 0 }
7096	};
7097
7098	const struct ice_sysctl_info *entry = ctls;
7099
7100	txqs_list = SYSCTL_CHILDREN(vsi->txqs_node);
7101
7102	snprintf(txq_name, sizeof(txq_name), "%u", txq->me);
7103	snprintf(txq_desc, sizeof(txq_desc), "Tx Queue %u", txq->me);
7104	txq_node = SYSCTL_ADD_NODE(ctx, txqs_list, OID_AUTO, txq_name,
7105				   CTLFLAG_RD, NULL, txq_desc);
7106	this_txq_list = SYSCTL_CHILDREN(txq_node);
7107
7108	/* Add the Tx queue statistics */
7109	while (entry->stat != 0) {
7110		SYSCTL_ADD_U64(ctx, this_txq_list, OID_AUTO, entry->name,
7111			       CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0,
7112			       entry->description);
7113		entry++;
7114	}
7115
7116	SYSCTL_ADD_U8(ctx, this_txq_list, OID_AUTO, "tc",
7117		       CTLFLAG_RD, &txq->tc, 0,
7118		       "Traffic Class that Queue belongs to");
7119}
7120
7121/**
7122 * ice_add_rxq_sysctls - Add per-queue sysctls for an Rx queue
7123 * @rxq: pointer to the Rx queue
7124 *
7125 * Add per-queue sysctls for a given Rx queue. Can't be called during
7126 * ice_add_vsi_sysctls, since the queue memory has not yet been setup.
7127 */
7128void
7129ice_add_rxq_sysctls(struct ice_rx_queue *rxq)
7130{
7131	struct ice_vsi *vsi = rxq->vsi;
7132	struct sysctl_ctx_list *ctx = &vsi->rxqs_ctx;
7133	struct sysctl_oid_list *rxqs_list, *this_rxq_list;
7134	struct sysctl_oid *rxq_node;
7135	char rxq_name[32], rxq_desc[32];
7136
7137	const struct ice_sysctl_info ctls[] = {
7138		{ &rxq->stats.rx_packets, "rx_packets", "Queue Packets Received" },
7139		{ &rxq->stats.rx_bytes, "rx_bytes", "Queue Bytes Received" },
7140		{ &rxq->stats.desc_errs, "rx_desc_errs", "Queue Rx Descriptor Errors" },
7141		{ 0, 0, 0 }
7142	};
7143
7144	const struct ice_sysctl_info *entry = ctls;
7145
7146	rxqs_list = SYSCTL_CHILDREN(vsi->rxqs_node);
7147
7148	snprintf(rxq_name, sizeof(rxq_name), "%u", rxq->me);
7149	snprintf(rxq_desc, sizeof(rxq_desc), "Rx Queue %u", rxq->me);
7150	rxq_node = SYSCTL_ADD_NODE(ctx, rxqs_list, OID_AUTO, rxq_name,
7151				   CTLFLAG_RD, NULL, rxq_desc);
7152	this_rxq_list = SYSCTL_CHILDREN(rxq_node);
7153
7154	/* Add the Rx queue statistics */
7155	while (entry->stat != 0) {
7156		SYSCTL_ADD_U64(ctx, this_rxq_list, OID_AUTO, entry->name,
7157			       CTLFLAG_RD | CTLFLAG_STATS, entry->stat, 0,
7158			       entry->description);
7159		entry++;
7160	}
7161
7162	SYSCTL_ADD_U8(ctx, this_rxq_list, OID_AUTO, "tc",
7163		       CTLFLAG_RD, &rxq->tc, 0,
7164		       "Traffic Class that Queue belongs to");
7165}
7166
7167/**
7168 * ice_get_default_rss_key - Obtain a default RSS key
7169 * @seed: storage for the RSS key data
7170 *
7171 * Copies a pre-generated RSS key into the seed memory. The seed pointer must
7172 * point to a block of memory that is at least 40 bytes in size.
7173 *
7174 * The key isn't randomly generated each time this function is called because
7175 * that makes the RSS key change every time we reconfigure RSS. This does mean
7176 * that we're hard coding a possibly 'well known' key. We might want to
7177 * investigate randomly generating this key once during the first call.
7178 */
7179static void
7180ice_get_default_rss_key(u8 *seed)
7181{
7182	const u8 default_seed[ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE] = {
7183		0x39, 0xed, 0xff, 0x4d, 0x43, 0x58, 0x42, 0xc3, 0x5f, 0xb8,
7184		0xa5, 0x32, 0x95, 0x65, 0x81, 0xcd, 0x36, 0x79, 0x71, 0x97,
7185		0xde, 0xa4, 0x41, 0x40, 0x6f, 0x27, 0xe9, 0x81, 0x13, 0xa0,
7186		0x95, 0x93, 0x5b, 0x1e, 0x9d, 0x27, 0x9d, 0x24, 0x84, 0xb5,
7187	};
7188
7189	bcopy(default_seed, seed, ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE);
7190}
7191
7192/**
7193 * ice_set_rss_key - Configure a given VSI with the default RSS key
7194 * @vsi: the VSI to configure
7195 *
7196 * Program the hardware RSS key. We use rss_getkey to grab the kernel RSS key.
7197 * If the kernel RSS interface is not available, this will fall back to our
7198 * pre-generated hash seed from ice_get_default_rss_key().
7199 */
7200static int
7201ice_set_rss_key(struct ice_vsi *vsi)
7202{
7203	struct ice_aqc_get_set_rss_keys keydata = { .standard_rss_key = {0} };
7204	struct ice_softc *sc = vsi->sc;
7205	struct ice_hw *hw = &sc->hw;
7206	enum ice_status status;
7207
7208	/*
7209	 * If the RSS kernel interface is disabled, this will return the
7210	 * default RSS key above.
7211	 */
7212	rss_getkey(keydata.standard_rss_key);
7213
7214	status = ice_aq_set_rss_key(hw, vsi->idx, &keydata);
7215	if (status) {
7216		device_printf(sc->dev,
7217			      "ice_aq_set_rss_key status %s, error %s\n",
7218			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7219		return (EIO);
7220	}
7221
7222	return (0);
7223}
7224
7225/**
7226 * ice_set_rss_flow_flds - Program the RSS hash flows after package init
7227 * @vsi: the VSI to configure
7228 *
7229 * If the package file is initialized, the default RSS flows are reset. We
7230 * need to reprogram the expected hash configuration. We'll use
7231 * rss_gethashconfig() to determine which flows to enable. If RSS kernel
7232 * support is not enabled, this macro will fall back to suitable defaults.
7233 */
7234static void
7235ice_set_rss_flow_flds(struct ice_vsi *vsi)
7236{
7237	struct ice_softc *sc = vsi->sc;
7238	struct ice_hw *hw = &sc->hw;
7239	struct ice_rss_hash_cfg rss_cfg = { 0, 0, ICE_RSS_ANY_HEADERS, false };
7240	device_t dev = sc->dev;
7241	enum ice_status status;
7242	u_int rss_hash_config;
7243
7244	rss_hash_config = rss_gethashconfig();
7245
7246	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) {
7247		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV4;
7248		rss_cfg.hash_flds = ICE_FLOW_HASH_IPV4;
7249		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7250		if (status)
7251			device_printf(dev,
7252				      "ice_add_rss_cfg on VSI %d failed for ipv4 flow, err %s aq_err %s\n",
7253				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7254	}
7255	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) {
7256		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_TCP;
7257		rss_cfg.hash_flds = ICE_HASH_TCP_IPV4;
7258		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7259		if (status)
7260			device_printf(dev,
7261				      "ice_add_rss_cfg on VSI %d failed for tcp4 flow, err %s aq_err %s\n",
7262				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7263	}
7264	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) {
7265		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_UDP;
7266		rss_cfg.hash_flds = ICE_HASH_UDP_IPV4;
7267		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7268		if (status)
7269			device_printf(dev,
7270				      "ice_add_rss_cfg on VSI %d failed for udp4 flow, err %s aq_err %s\n",
7271				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7272	}
7273	if (rss_hash_config & (RSS_HASHTYPE_RSS_IPV6 | RSS_HASHTYPE_RSS_IPV6_EX)) {
7274		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV6;
7275		rss_cfg.hash_flds = ICE_FLOW_HASH_IPV6;
7276		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7277		if (status)
7278			device_printf(dev,
7279				      "ice_add_rss_cfg on VSI %d failed for ipv6 flow, err %s aq_err %s\n",
7280				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7281	}
7282	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) {
7283		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_TCP;
7284		rss_cfg.hash_flds = ICE_HASH_TCP_IPV6;
7285		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7286		if (status)
7287			device_printf(dev,
7288				      "ice_add_rss_cfg on VSI %d failed for tcp6 flow, err %s aq_err %s\n",
7289				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7290	}
7291	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) {
7292		rss_cfg.addl_hdrs = ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_UDP;
7293		rss_cfg.hash_flds = ICE_HASH_UDP_IPV6;
7294		status = ice_add_rss_cfg(hw, vsi->idx, &rss_cfg);
7295		if (status)
7296			device_printf(dev,
7297				      "ice_add_rss_cfg on VSI %d failed for udp6 flow, err %s aq_err %s\n",
7298				      vsi->idx, ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7299	}
7300
7301	/* Warn about RSS hash types which are not supported */
7302	/* coverity[dead_error_condition] */
7303	if (rss_hash_config & ~ICE_DEFAULT_RSS_HASH_CONFIG) {
7304		device_printf(dev,
7305			      "ice_add_rss_cfg on VSI %d could not configure every requested hash type\n",
7306			      vsi->idx);
7307	}
7308}
7309
7310/**
7311 * ice_set_rss_lut - Program the RSS lookup table for a VSI
7312 * @vsi: the VSI to configure
7313 *
7314 * Programs the RSS lookup table for a given VSI. We use
7315 * rss_get_indirection_to_bucket which will use the indirection table provided
7316 * by the kernel RSS interface when available. If the kernel RSS interface is
7317 * not available, we will fall back to a simple round-robin fashion queue
7318 * assignment.
7319 */
7320static int
7321ice_set_rss_lut(struct ice_vsi *vsi)
7322{
7323	struct ice_softc *sc = vsi->sc;
7324	struct ice_hw *hw = &sc->hw;
7325	device_t dev = sc->dev;
7326	struct ice_aq_get_set_rss_lut_params lut_params;
7327	enum ice_status status;
7328	int i, err = 0;
7329	u8 *lut;
7330
7331	lut = (u8 *)malloc(vsi->rss_table_size, M_ICE, M_NOWAIT|M_ZERO);
7332	if (!lut) {
7333		device_printf(dev, "Failed to allocate RSS lut memory\n");
7334		return (ENOMEM);
7335	}
7336
7337	/* Populate the LUT with max no. of queues. If the RSS kernel
7338	 * interface is disabled, this will assign the lookup table in
7339	 * a simple round robin fashion
7340	 */
7341	for (i = 0; i < vsi->rss_table_size; i++) {
7342		/* XXX: this needs to be changed if num_rx_queues ever counts
7343		 * more than just the RSS queues */
7344		lut[i] = rss_get_indirection_to_bucket(i) % vsi->num_rx_queues;
7345	}
7346
7347	lut_params.vsi_handle = vsi->idx;
7348	lut_params.lut_size = vsi->rss_table_size;
7349	lut_params.lut_type = vsi->rss_lut_type;
7350	lut_params.lut = lut;
7351	lut_params.global_lut_id = 0;
7352	status = ice_aq_set_rss_lut(hw, &lut_params);
7353	if (status) {
7354		device_printf(dev,
7355			      "Cannot set RSS lut, err %s aq_err %s\n",
7356			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
7357		err = (EIO);
7358	}
7359
7360	free(lut, M_ICE);
7361	return err;
7362}
7363
7364/**
7365 * ice_config_rss - Configure RSS for a VSI
7366 * @vsi: the VSI to configure
7367 *
7368 * If FEATURE_RSS is enabled, configures the RSS lookup table and hash key for
7369 * a given VSI.
7370 */
7371int
7372ice_config_rss(struct ice_vsi *vsi)
7373{
7374	int err;
7375
7376	/* Nothing to do, if RSS is not enabled */
7377	if (!ice_is_bit_set(vsi->sc->feat_en, ICE_FEATURE_RSS))
7378		return 0;
7379
7380	err = ice_set_rss_key(vsi);
7381	if (err)
7382		return err;
7383
7384	ice_set_rss_flow_flds(vsi);
7385
7386	return ice_set_rss_lut(vsi);
7387}
7388
7389/**
7390 * ice_log_pkg_init - Log a message about status of DDP initialization
7391 * @sc: the device softc pointer
7392 * @pkg_status: the status result of ice_copy_and_init_pkg
7393 *
7394 * Called by ice_load_pkg after an attempt to download the DDP package
7395 * contents to the device to log an appropriate message for the system
7396 * administrator about download status.
7397 *
7398 * @post ice_is_init_pkg_successful function is used to determine
7399 * whether the download was successful and DDP package is compatible
7400 * with this driver. Otherwise driver will transition to Safe Mode.
7401 */
7402void
7403ice_log_pkg_init(struct ice_softc *sc, enum ice_ddp_state pkg_status)
7404{
7405	struct ice_hw *hw = &sc->hw;
7406	device_t dev = sc->dev;
7407	struct sbuf *active_pkg, *os_pkg;
7408
7409	active_pkg = sbuf_new_auto();
7410	ice_active_pkg_version_str(hw, active_pkg);
7411	sbuf_finish(active_pkg);
7412
7413	os_pkg = sbuf_new_auto();
7414	ice_os_pkg_version_str(hw, os_pkg);
7415	sbuf_finish(os_pkg);
7416
7417	switch (pkg_status) {
7418	case ICE_DDP_PKG_SUCCESS:
7419		device_printf(dev,
7420			      "The DDP package was successfully loaded: %s.\n",
7421			      sbuf_data(active_pkg));
7422		break;
7423	case ICE_DDP_PKG_SAME_VERSION_ALREADY_LOADED:
7424	case ICE_DDP_PKG_ALREADY_LOADED:
7425		device_printf(dev,
7426			      "DDP package already present on device: %s.\n",
7427			      sbuf_data(active_pkg));
7428		break;
7429	case ICE_DDP_PKG_COMPATIBLE_ALREADY_LOADED:
7430		device_printf(dev,
7431			      "The driver could not load the DDP package file because a compatible DDP package is already present on the device.  The device has package %s.  The ice_ddp module has package: %s.\n",
7432			      sbuf_data(active_pkg),
7433			      sbuf_data(os_pkg));
7434		break;
7435	case ICE_DDP_PKG_FILE_VERSION_TOO_HIGH:
7436		device_printf(dev,
7437			      "The device has a DDP package that is higher than the driver supports.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7438			      sbuf_data(active_pkg),
7439			      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7440		break;
7441	case ICE_DDP_PKG_FILE_VERSION_TOO_LOW:
7442		device_printf(dev,
7443			      "The device has a DDP package that is lower than the driver supports.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7444			      sbuf_data(active_pkg),
7445			      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7446		break;
7447	case ICE_DDP_PKG_ALREADY_LOADED_NOT_SUPPORTED:
7448		/*
7449		 * This assumes that the active_pkg_ver will not be
7450		 * initialized if the ice_ddp package version is not
7451		 * supported.
7452		 */
7453		if (pkg_ver_empty(&hw->active_pkg_ver, hw->active_pkg_name)) {
7454			/* The ice_ddp version is not supported */
7455			if (pkg_ver_compatible(&hw->pkg_ver) > 0) {
7456				device_printf(dev,
7457					      "The DDP package in the ice_ddp module is higher than the driver supports.  The ice_ddp module has package %s.  The driver requires version %d.%d.x.x.  Please use an updated driver.  Entering Safe Mode.\n",
7458					      sbuf_data(os_pkg),
7459					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7460			} else if (pkg_ver_compatible(&hw->pkg_ver) < 0) {
7461				device_printf(dev,
7462					      "The DDP package in the ice_ddp module is lower than the driver supports.  The ice_ddp module has package %s.  The driver requires version %d.%d.x.x.  Please use an updated ice_ddp module.  Entering Safe Mode.\n",
7463					      sbuf_data(os_pkg),
7464					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7465			} else {
7466				device_printf(dev,
7467					      "An unknown error occurred when loading the DDP package.  The ice_ddp module has package %s.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7468					      sbuf_data(os_pkg),
7469					      sbuf_data(active_pkg),
7470					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7471			}
7472		} else {
7473			if (pkg_ver_compatible(&hw->active_pkg_ver) > 0) {
7474				device_printf(dev,
7475					      "The device has a DDP package that is higher than the driver supports.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7476					      sbuf_data(active_pkg),
7477					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7478			} else if (pkg_ver_compatible(&hw->active_pkg_ver) < 0) {
7479				device_printf(dev,
7480					      "The device has a DDP package that is lower than the driver supports.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7481					      sbuf_data(active_pkg),
7482					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7483			} else {
7484				device_printf(dev,
7485					      "An unknown error occurred when loading the DDP package.  The ice_ddp module has package %s.  The device has package %s.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
7486					      sbuf_data(os_pkg),
7487					      sbuf_data(active_pkg),
7488					      ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
7489			}
7490		}
7491		break;
7492	case ICE_DDP_PKG_INVALID_FILE:
7493		device_printf(dev,
7494			      "The DDP package in the ice_ddp module is invalid.  Entering Safe Mode\n");
7495		break;
7496	case ICE_DDP_PKG_FW_MISMATCH:
7497		device_printf(dev,
7498			      "The firmware loaded on the device is not compatible with the DDP package.  Please update the device's NVM.  Entering safe mode.\n");
7499		break;
7500	case ICE_DDP_PKG_NO_SEC_MANIFEST:
7501	case ICE_DDP_PKG_FILE_SIGNATURE_INVALID:
7502		device_printf(dev,
7503			      "The DDP package in the ice_ddp module cannot be loaded because its signature is not valid.  Please use a valid ice_ddp module.  Entering Safe Mode.\n");
7504		break;
7505	case ICE_DDP_PKG_SECURE_VERSION_NBR_TOO_LOW:
7506		device_printf(dev,
7507			      "The DDP package in the ice_ddp module could not be loaded because its security revision is too low.  Please use an updated ice_ddp module.  Entering Safe Mode.\n");
7508		break;
7509	case ICE_DDP_PKG_MANIFEST_INVALID:
7510	case ICE_DDP_PKG_BUFFER_INVALID:
7511		device_printf(dev,
7512			      "An error occurred on the device while loading the DDP package.  Entering Safe Mode.\n");
7513		break;
7514	default:
7515		device_printf(dev,
7516			 "An unknown error occurred when loading the DDP package.  Entering Safe Mode.\n");
7517		break;
7518	}
7519
7520	sbuf_delete(active_pkg);
7521	sbuf_delete(os_pkg);
7522}
7523
7524/**
7525 * ice_load_pkg_file - Load the DDP package file using firmware_get
7526 * @sc: device private softc
7527 *
7528 * Use firmware_get to load the DDP package memory and then request that
7529 * firmware download the package contents and program the relevant hardware
7530 * bits.
7531 *
7532 * This function makes a copy of the DDP package memory which is tracked in
7533 * the ice_hw structure. The copy will be managed and released by
7534 * ice_deinit_hw(). This allows the firmware reference to be immediately
7535 * released using firmware_put.
7536 */
7537enum ice_status
7538ice_load_pkg_file(struct ice_softc *sc)
7539{
7540	struct ice_hw *hw = &sc->hw;
7541	device_t dev = sc->dev;
7542	enum ice_ddp_state state;
7543	const struct firmware *pkg;
7544	enum ice_status status = ICE_SUCCESS;
7545	u8 cached_layer_count;
7546	u8 *buf_copy;
7547
7548	pkg = firmware_get("ice_ddp");
7549	if (!pkg) {
7550		device_printf(dev,
7551		    "The DDP package module (ice_ddp) failed to load or could not be found. Entering Safe Mode.\n");
7552		if (cold)
7553			device_printf(dev,
7554			    "The DDP package module cannot be automatically loaded while booting. You may want to specify ice_ddp_load=\"YES\" in your loader.conf\n");
7555		status = ICE_ERR_CFG;
7556		goto err_load_pkg;
7557	}
7558
7559	/* Check for topology change */
7560	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_TX_BALANCE)) {
7561		cached_layer_count = hw->num_tx_sched_layers;
7562		buf_copy = (u8 *)malloc(pkg->datasize, M_ICE, M_NOWAIT);
7563		if (buf_copy == NULL)
7564			return ICE_ERR_NO_MEMORY;
7565		memcpy(buf_copy, pkg->data, pkg->datasize);
7566		status = ice_cfg_tx_topo(&sc->hw, buf_copy, pkg->datasize);
7567		free(buf_copy, M_ICE);
7568		/* Success indicates a change was made */
7569		if (status == ICE_SUCCESS) {
7570			/* 9 -> 5 */
7571			if (cached_layer_count == 9)
7572				device_printf(dev,
7573				    "Transmit balancing feature enabled\n");
7574			else
7575				device_printf(dev,
7576				    "Transmit balancing feature disabled\n");
7577			ice_set_bit(ICE_FEATURE_TX_BALANCE, sc->feat_en);
7578			return (status);
7579		} else if (status == ICE_ERR_CFG) {
7580			/* Status is ICE_ERR_CFG when DDP does not support transmit balancing */
7581			device_printf(dev,
7582			    "DDP package does not support transmit balancing feature - please update to the latest DDP package and try again\n");
7583		}
7584	}
7585
7586	/* Copy and download the pkg contents */
7587	state = ice_copy_and_init_pkg(hw, (const u8 *)pkg->data, pkg->datasize);
7588
7589	/* Release the firmware reference */
7590	firmware_put(pkg, FIRMWARE_UNLOAD);
7591
7592	/* Check the active DDP package version and log a message */
7593	ice_log_pkg_init(sc, state);
7594
7595	/* Place the driver into safe mode */
7596	if (ice_is_init_pkg_successful(state))
7597		return (ICE_ERR_ALREADY_EXISTS);
7598
7599err_load_pkg:
7600	ice_zero_bitmap(sc->feat_cap, ICE_FEATURE_COUNT);
7601	ice_zero_bitmap(sc->feat_en, ICE_FEATURE_COUNT);
7602	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap);
7603	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en);
7604
7605	return (status);
7606}
7607
7608/**
7609 * ice_get_ifnet_counter - Retrieve counter value for a given ifnet counter
7610 * @vsi: the vsi to retrieve the value for
7611 * @counter: the counter type to retrieve
7612 *
7613 * Returns the value for a given ifnet counter. To do so, we calculate the
7614 * value based on the matching hardware statistics.
7615 */
7616uint64_t
7617ice_get_ifnet_counter(struct ice_vsi *vsi, ift_counter counter)
7618{
7619	struct ice_hw_port_stats *hs = &vsi->sc->stats.cur;
7620	struct ice_eth_stats *es = &vsi->hw_stats.cur;
7621
7622	/* For some statistics, especially those related to error flows, we do
7623	 * not have per-VSI counters. In this case, we just report the global
7624	 * counters.
7625	 */
7626
7627	switch (counter) {
7628	case IFCOUNTER_IPACKETS:
7629		return (es->rx_unicast + es->rx_multicast + es->rx_broadcast);
7630	case IFCOUNTER_IERRORS:
7631		return (hs->crc_errors + hs->illegal_bytes +
7632			hs->mac_local_faults + hs->mac_remote_faults +
7633			hs->rx_len_errors + hs->rx_undersize +
7634			hs->rx_oversize + hs->rx_fragments + hs->rx_jabber);
7635	case IFCOUNTER_OPACKETS:
7636		return (es->tx_unicast + es->tx_multicast + es->tx_broadcast);
7637	case IFCOUNTER_OERRORS:
7638		return (es->tx_errors);
7639	case IFCOUNTER_COLLISIONS:
7640		return (0);
7641	case IFCOUNTER_IBYTES:
7642		return (es->rx_bytes);
7643	case IFCOUNTER_OBYTES:
7644		return (es->tx_bytes);
7645	case IFCOUNTER_IMCASTS:
7646		return (es->rx_multicast);
7647	case IFCOUNTER_OMCASTS:
7648		return (es->tx_multicast);
7649	case IFCOUNTER_IQDROPS:
7650		return (es->rx_discards);
7651	case IFCOUNTER_OQDROPS:
7652		return (hs->tx_dropped_link_down);
7653	case IFCOUNTER_NOPROTO:
7654		return (es->rx_unknown_protocol);
7655	default:
7656		return if_get_counter_default(vsi->sc->ifp, counter);
7657	}
7658}
7659
7660/**
7661 * ice_save_pci_info - Save PCI configuration fields in HW struct
7662 * @hw: the ice_hw struct to save the PCI information in
7663 * @dev: the device to get the PCI information from
7664 *
7665 * This should only be called once, early in the device attach
7666 * process.
7667 */
7668void
7669ice_save_pci_info(struct ice_hw *hw, device_t dev)
7670{
7671	hw->vendor_id = pci_get_vendor(dev);
7672	hw->device_id = pci_get_device(dev);
7673	hw->subsystem_vendor_id = pci_get_subvendor(dev);
7674	hw->subsystem_device_id = pci_get_subdevice(dev);
7675	hw->revision_id = pci_get_revid(dev);
7676	hw->bus.device = pci_get_slot(dev);
7677	hw->bus.func = pci_get_function(dev);
7678}
7679
7680/**
7681 * ice_replay_all_vsi_cfg - Replace configuration for all VSIs after reset
7682 * @sc: the device softc
7683 *
7684 * Replace the configuration for each VSI, and then cleanup replay
7685 * information. Called after a hardware reset in order to reconfigure the
7686 * active VSIs.
7687 */
7688int
7689ice_replay_all_vsi_cfg(struct ice_softc *sc)
7690{
7691	struct ice_hw *hw = &sc->hw;
7692	enum ice_status status;
7693	int i;
7694
7695	for (i = 0 ; i < sc->num_available_vsi; i++) {
7696		struct ice_vsi *vsi = sc->all_vsi[i];
7697
7698		if (!vsi)
7699			continue;
7700
7701		status = ice_replay_vsi(hw, vsi->idx);
7702		if (status) {
7703			device_printf(sc->dev, "Failed to replay VSI %d, err %s aq_err %s\n",
7704				      vsi->idx, ice_status_str(status),
7705				      ice_aq_str(hw->adminq.sq_last_status));
7706			return (EIO);
7707		}
7708	}
7709
7710	/* Cleanup replay filters after successful reconfiguration */
7711	ice_replay_post(hw);
7712	return (0);
7713}
7714
7715/**
7716 * ice_clean_vsi_rss_cfg - Cleanup RSS configuration for a given VSI
7717 * @vsi: pointer to the VSI structure
7718 *
7719 * Cleanup the advanced RSS configuration for a given VSI. This is necessary
7720 * during driver removal to ensure that all RSS resources are properly
7721 * released.
7722 *
7723 * @remark this function doesn't report an error as it is expected to be
7724 * called during driver reset and unload, and there isn't much the driver can
7725 * do if freeing RSS resources fails.
7726 */
7727static void
7728ice_clean_vsi_rss_cfg(struct ice_vsi *vsi)
7729{
7730	struct ice_softc *sc = vsi->sc;
7731	struct ice_hw *hw = &sc->hw;
7732	device_t dev = sc->dev;
7733	enum ice_status status;
7734
7735	status = ice_rem_vsi_rss_cfg(hw, vsi->idx);
7736	if (status)
7737		device_printf(dev,
7738			      "Failed to remove RSS configuration for VSI %d, err %s\n",
7739			      vsi->idx, ice_status_str(status));
7740
7741	/* Remove this VSI from the RSS list */
7742	ice_rem_vsi_rss_list(hw, vsi->idx);
7743}
7744
7745/**
7746 * ice_clean_all_vsi_rss_cfg - Cleanup RSS configuration for all VSIs
7747 * @sc: the device softc pointer
7748 *
7749 * Cleanup the advanced RSS configuration for all VSIs on a given PF
7750 * interface.
7751 *
7752 * @remark This should be called while preparing for a reset, to cleanup stale
7753 * RSS configuration for all VSIs.
7754 */
7755void
7756ice_clean_all_vsi_rss_cfg(struct ice_softc *sc)
7757{
7758	int i;
7759
7760	/* No need to cleanup if RSS is not enabled */
7761	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_RSS))
7762		return;
7763
7764	for (i = 0; i < sc->num_available_vsi; i++) {
7765		struct ice_vsi *vsi = sc->all_vsi[i];
7766
7767		if (vsi)
7768			ice_clean_vsi_rss_cfg(vsi);
7769	}
7770}
7771
7772/**
7773 * ice_requested_fec_mode - Return the requested FEC mode as a string
7774 * @pi: The port info structure
7775 *
7776 * Return a string representing the requested FEC mode.
7777 */
7778static const char *
7779ice_requested_fec_mode(struct ice_port_info *pi)
7780{
7781	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
7782	enum ice_status status;
7783
7784	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
7785				     &pcaps, NULL);
7786	if (status)
7787		/* Just report unknown if we can't get capabilities */
7788		return "Unknown";
7789
7790	/* Check if RS-FEC has been requested first */
7791	if (pcaps.link_fec_options & (ICE_AQC_PHY_FEC_25G_RS_528_REQ |
7792				      ICE_AQC_PHY_FEC_25G_RS_544_REQ))
7793		return ice_fec_str(ICE_FEC_RS);
7794
7795	/* If RS FEC has not been requested, then check BASE-R */
7796	if (pcaps.link_fec_options & (ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ |
7797				      ICE_AQC_PHY_FEC_25G_KR_REQ))
7798		return ice_fec_str(ICE_FEC_BASER);
7799
7800	return ice_fec_str(ICE_FEC_NONE);
7801}
7802
7803/**
7804 * ice_negotiated_fec_mode - Return the negotiated FEC mode as a string
7805 * @pi: The port info structure
7806 *
7807 * Return a string representing the current FEC mode.
7808 */
7809static const char *
7810ice_negotiated_fec_mode(struct ice_port_info *pi)
7811{
7812	/* First, check if RS has been requested first */
7813	if (pi->phy.link_info.fec_info & (ICE_AQ_LINK_25G_RS_528_FEC_EN |
7814					  ICE_AQ_LINK_25G_RS_544_FEC_EN))
7815		return ice_fec_str(ICE_FEC_RS);
7816
7817	/* If RS FEC has not been requested, then check BASE-R */
7818	if (pi->phy.link_info.fec_info & ICE_AQ_LINK_25G_KR_FEC_EN)
7819		return ice_fec_str(ICE_FEC_BASER);
7820
7821	return ice_fec_str(ICE_FEC_NONE);
7822}
7823
7824/**
7825 * ice_autoneg_mode - Return string indicating of autoneg completed
7826 * @pi: The port info structure
7827 *
7828 * Return "True" if autonegotiation is completed, "False" otherwise.
7829 */
7830static const char *
7831ice_autoneg_mode(struct ice_port_info *pi)
7832{
7833	if (pi->phy.link_info.an_info & ICE_AQ_AN_COMPLETED)
7834		return "True";
7835	else
7836		return "False";
7837}
7838
7839/**
7840 * ice_flowcontrol_mode - Return string indicating the Flow Control mode
7841 * @pi: The port info structure
7842 *
7843 * Returns the current Flow Control mode as a string.
7844 */
7845static const char *
7846ice_flowcontrol_mode(struct ice_port_info *pi)
7847{
7848	return ice_fc_str(pi->fc.current_mode);
7849}
7850
7851/**
7852 * ice_link_up_msg - Log a link up message with associated info
7853 * @sc: the device private softc
7854 *
7855 * Log a link up message with LOG_NOTICE message level. Include information
7856 * about the duplex, FEC mode, autonegotiation and flow control.
7857 */
7858void
7859ice_link_up_msg(struct ice_softc *sc)
7860{
7861	struct ice_hw *hw = &sc->hw;
7862	struct ifnet *ifp = sc->ifp;
7863	const char *speed, *req_fec, *neg_fec, *autoneg, *flowcontrol;
7864
7865	speed = ice_aq_speed_to_str(hw->port_info);
7866	req_fec = ice_requested_fec_mode(hw->port_info);
7867	neg_fec = ice_negotiated_fec_mode(hw->port_info);
7868	autoneg = ice_autoneg_mode(hw->port_info);
7869	flowcontrol = ice_flowcontrol_mode(hw->port_info);
7870
7871	log(LOG_NOTICE, "%s: Link is up, %s Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n",
7872	    if_name(ifp), speed, req_fec, neg_fec, autoneg, flowcontrol);
7873}
7874
7875/**
7876 * ice_update_laa_mac - Update MAC address if Locally Administered
7877 * @sc: the device softc
7878 *
7879 * Update the device MAC address when a Locally Administered Address is
7880 * assigned.
7881 *
7882 * This function does *not* update the MAC filter list itself. Instead, it
7883 * should be called after ice_rm_pf_default_mac_filters, so that the previous
7884 * address filter will be removed, and before ice_cfg_pf_default_mac_filters,
7885 * so that the new address filter will be assigned.
7886 */
7887int
7888ice_update_laa_mac(struct ice_softc *sc)
7889{
7890	const u8 *lladdr = (const u8 *)if_getlladdr(sc->ifp);
7891	struct ice_hw *hw = &sc->hw;
7892	enum ice_status status;
7893
7894	/* If the address is the same, then there is nothing to update */
7895	if (!memcmp(lladdr, hw->port_info->mac.lan_addr, ETHER_ADDR_LEN))
7896		return (0);
7897
7898	/* Reject Multicast addresses */
7899	if (ETHER_IS_MULTICAST(lladdr))
7900		return (EINVAL);
7901
7902	status = ice_aq_manage_mac_write(hw, lladdr, ICE_AQC_MAN_MAC_UPDATE_LAA_WOL, NULL);
7903	if (status) {
7904		device_printf(sc->dev, "Failed to write mac %6D to firmware, err %s aq_err %s\n",
7905			      lladdr, ":", ice_status_str(status),
7906			      ice_aq_str(hw->adminq.sq_last_status));
7907		return (EFAULT);
7908	}
7909
7910	/* Copy the address into place of the LAN address. */
7911	bcopy(lladdr, hw->port_info->mac.lan_addr, ETHER_ADDR_LEN);
7912
7913	return (0);
7914}
7915
7916/**
7917 * ice_get_and_print_bus_info - Save (PCI) bus info and print messages
7918 * @sc: device softc
7919 *
7920 * This will potentially print out a warning message if bus bandwidth
7921 * is insufficient for full-speed operation.
7922 *
7923 * This should only be called once, during the attach process, after
7924 * hw->port_info has been filled out with port link topology information
7925 * (from the Get PHY Capabilities Admin Queue command).
7926 */
7927void
7928ice_get_and_print_bus_info(struct ice_softc *sc)
7929{
7930	struct ice_hw *hw = &sc->hw;
7931	device_t dev = sc->dev;
7932	u16 pci_link_status;
7933	int offset;
7934
7935	pci_find_cap(dev, PCIY_EXPRESS, &offset);
7936	pci_link_status = pci_read_config(dev, offset + PCIER_LINK_STA, 2);
7937
7938	/* Fill out hw struct with PCIE link status info */
7939	ice_set_pci_link_status_data(hw, pci_link_status);
7940
7941	/* Use info to print out bandwidth messages */
7942	ice_print_bus_link_data(dev, hw);
7943
7944	if (ice_pcie_bandwidth_check(sc)) {
7945		device_printf(dev,
7946		    "PCI-Express bandwidth available for this device may be insufficient for optimal performance.\n");
7947		device_printf(dev,
7948		    "Please move the device to a different PCI-e link with more lanes and/or higher transfer rate.\n");
7949	}
7950}
7951
7952/**
7953 * ice_pcie_bus_speed_to_rate - Convert driver bus speed enum value to
7954 * a 64-bit baudrate.
7955 * @speed: enum value to convert
7956 *
7957 * This only goes up to PCIE Gen 4.
7958 */
7959static uint64_t
7960ice_pcie_bus_speed_to_rate(enum ice_pcie_bus_speed speed)
7961{
7962	/* If the PCI-E speed is Gen1 or Gen2, then report
7963	 * only 80% of bus speed to account for encoding overhead.
7964	 */
7965	switch (speed) {
7966	case ice_pcie_speed_2_5GT:
7967		return IF_Gbps(2);
7968	case ice_pcie_speed_5_0GT:
7969		return IF_Gbps(4);
7970	case ice_pcie_speed_8_0GT:
7971		return IF_Gbps(8);
7972	case ice_pcie_speed_16_0GT:
7973		return IF_Gbps(16);
7974	case ice_pcie_speed_unknown:
7975	default:
7976		return 0;
7977	}
7978}
7979
7980/**
7981 * ice_pcie_lnk_width_to_int - Convert driver pci-e width enum value to
7982 * a 32-bit number.
7983 * @width: enum value to convert
7984 */
7985static int
7986ice_pcie_lnk_width_to_int(enum ice_pcie_link_width width)
7987{
7988	switch (width) {
7989	case ice_pcie_lnk_x1:
7990		return (1);
7991	case ice_pcie_lnk_x2:
7992		return (2);
7993	case ice_pcie_lnk_x4:
7994		return (4);
7995	case ice_pcie_lnk_x8:
7996		return (8);
7997	case ice_pcie_lnk_x12:
7998		return (12);
7999	case ice_pcie_lnk_x16:
8000		return (16);
8001	case ice_pcie_lnk_x32:
8002		return (32);
8003	case ice_pcie_lnk_width_resrv:
8004	case ice_pcie_lnk_width_unknown:
8005	default:
8006		return (0);
8007	}
8008}
8009
8010/**
8011 * ice_pcie_bandwidth_check - Check if PCI-E bandwidth is sufficient for
8012 * full-speed device operation.
8013 * @sc: adapter softc
8014 *
8015 * Returns 0 if sufficient; 1 if not.
8016 */
8017static uint8_t
8018ice_pcie_bandwidth_check(struct ice_softc *sc)
8019{
8020	struct ice_hw *hw = &sc->hw;
8021	int num_ports, pcie_width;
8022	u64 pcie_speed, port_speed;
8023
8024	MPASS(hw->port_info);
8025
8026	num_ports = bitcount32(hw->func_caps.common_cap.valid_functions);
8027	port_speed = ice_phy_types_to_max_rate(hw->port_info);
8028	pcie_speed = ice_pcie_bus_speed_to_rate(hw->bus.speed);
8029	pcie_width = ice_pcie_lnk_width_to_int(hw->bus.width);
8030
8031	/*
8032	 * If 2x100, clamp ports to 1 -- 2nd port is intended for
8033	 * failover.
8034	 */
8035	if (port_speed == IF_Gbps(100))
8036		num_ports = 1;
8037
8038	return !!((num_ports * port_speed) > pcie_speed * pcie_width);
8039}
8040
8041/**
8042 * ice_print_bus_link_data - Print PCI-E bandwidth information
8043 * @dev: device to print string for
8044 * @hw: hw struct with PCI-e link information
8045 */
8046static void
8047ice_print_bus_link_data(device_t dev, struct ice_hw *hw)
8048{
8049        device_printf(dev, "PCI Express Bus: Speed %s %s\n",
8050            ((hw->bus.speed == ice_pcie_speed_16_0GT) ? "16.0GT/s" :
8051            (hw->bus.speed == ice_pcie_speed_8_0GT) ? "8.0GT/s" :
8052            (hw->bus.speed == ice_pcie_speed_5_0GT) ? "5.0GT/s" :
8053            (hw->bus.speed == ice_pcie_speed_2_5GT) ? "2.5GT/s" : "Unknown"),
8054            (hw->bus.width == ice_pcie_lnk_x32) ? "Width x32" :
8055            (hw->bus.width == ice_pcie_lnk_x16) ? "Width x16" :
8056            (hw->bus.width == ice_pcie_lnk_x12) ? "Width x12" :
8057            (hw->bus.width == ice_pcie_lnk_x8) ? "Width x8" :
8058            (hw->bus.width == ice_pcie_lnk_x4) ? "Width x4" :
8059            (hw->bus.width == ice_pcie_lnk_x2) ? "Width x2" :
8060            (hw->bus.width == ice_pcie_lnk_x1) ? "Width x1" : "Width Unknown");
8061}
8062
8063/**
8064 * ice_set_pci_link_status_data - store PCI bus info
8065 * @hw: pointer to hardware structure
8066 * @link_status: the link status word from PCI config space
8067 *
8068 * Stores the PCI bus info (speed, width, type) within the ice_hw structure
8069 **/
8070static void
8071ice_set_pci_link_status_data(struct ice_hw *hw, u16 link_status)
8072{
8073	u16 reg;
8074
8075	hw->bus.type = ice_bus_pci_express;
8076
8077	reg = (link_status & PCIEM_LINK_STA_WIDTH) >> 4;
8078
8079	switch (reg) {
8080	case ice_pcie_lnk_x1:
8081	case ice_pcie_lnk_x2:
8082	case ice_pcie_lnk_x4:
8083	case ice_pcie_lnk_x8:
8084	case ice_pcie_lnk_x12:
8085	case ice_pcie_lnk_x16:
8086	case ice_pcie_lnk_x32:
8087		hw->bus.width = (enum ice_pcie_link_width)reg;
8088		break;
8089	default:
8090		hw->bus.width = ice_pcie_lnk_width_unknown;
8091		break;
8092	}
8093
8094	reg = (link_status & PCIEM_LINK_STA_SPEED) + 0x13;
8095
8096	switch (reg) {
8097	case ice_pcie_speed_2_5GT:
8098	case ice_pcie_speed_5_0GT:
8099	case ice_pcie_speed_8_0GT:
8100	case ice_pcie_speed_16_0GT:
8101		hw->bus.speed = (enum ice_pcie_bus_speed)reg;
8102		break;
8103	default:
8104		hw->bus.speed = ice_pcie_speed_unknown;
8105		break;
8106	}
8107}
8108
8109/**
8110 * ice_init_link_events - Initialize Link Status Events mask
8111 * @sc: the device softc
8112 *
8113 * Initialize the Link Status Events mask to disable notification of link
8114 * events we don't care about in software. Also request that link status
8115 * events be enabled.
8116 */
8117int
8118ice_init_link_events(struct ice_softc *sc)
8119{
8120	struct ice_hw *hw = &sc->hw;
8121	enum ice_status status;
8122	u16 wanted_events;
8123
8124	/* Set the bits for the events that we want to be notified by */
8125	wanted_events = (ICE_AQ_LINK_EVENT_UPDOWN |
8126			 ICE_AQ_LINK_EVENT_MEDIA_NA |
8127			 ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL);
8128
8129	/* request that every event except the wanted events be masked */
8130	status = ice_aq_set_event_mask(hw, hw->port_info->lport, ~wanted_events, NULL);
8131	if (status) {
8132		device_printf(sc->dev,
8133			      "Failed to set link status event mask, err %s aq_err %s\n",
8134			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
8135		return (EIO);
8136	}
8137
8138	/* Request link info with the LSE bit set to enable link status events */
8139	status = ice_aq_get_link_info(hw->port_info, true, NULL, NULL);
8140	if (status) {
8141		device_printf(sc->dev,
8142			      "Failed to enable link status events, err %s aq_err %s\n",
8143			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
8144		return (EIO);
8145	}
8146
8147	return (0);
8148}
8149
8150/**
8151 * ice_handle_mdd_event - Handle possibly malicious events
8152 * @sc: the device softc
8153 *
8154 * Called by the admin task if an MDD detection interrupt is triggered.
8155 * Identifies possibly malicious events coming from VFs. Also triggers for
8156 * similar incorrect behavior from the PF as well.
8157 */
8158void
8159ice_handle_mdd_event(struct ice_softc *sc)
8160{
8161	struct ice_hw *hw = &sc->hw;
8162	bool mdd_detected = false, request_reinit = false;
8163	device_t dev = sc->dev;
8164	u32 reg;
8165
8166	if (!ice_testandclear_state(&sc->state, ICE_STATE_MDD_PENDING))
8167		return;
8168
8169	reg = rd32(hw, GL_MDET_TX_TCLAN);
8170	if (reg & GL_MDET_TX_TCLAN_VALID_M) {
8171		u8 pf_num  = (reg & GL_MDET_TX_TCLAN_PF_NUM_M) >> GL_MDET_TX_TCLAN_PF_NUM_S;
8172		u16 vf_num = (reg & GL_MDET_TX_TCLAN_VF_NUM_M) >> GL_MDET_TX_TCLAN_VF_NUM_S;
8173		u8 event   = (reg & GL_MDET_TX_TCLAN_MAL_TYPE_M) >> GL_MDET_TX_TCLAN_MAL_TYPE_S;
8174		u16 queue  = (reg & GL_MDET_TX_TCLAN_QNUM_M) >> GL_MDET_TX_TCLAN_QNUM_S;
8175
8176		device_printf(dev, "Malicious Driver Detection Tx Descriptor check event '%s' on Tx queue %u PF# %u VF# %u\n",
8177			      ice_mdd_tx_tclan_str(event), queue, pf_num, vf_num);
8178
8179		/* Only clear this event if it matches this PF, that way other
8180		 * PFs can read the event and determine VF and queue number.
8181		 */
8182		if (pf_num == hw->pf_id)
8183			wr32(hw, GL_MDET_TX_TCLAN, 0xffffffff);
8184
8185		mdd_detected = true;
8186	}
8187
8188	/* Determine what triggered the MDD event */
8189	reg = rd32(hw, GL_MDET_TX_PQM);
8190	if (reg & GL_MDET_TX_PQM_VALID_M) {
8191		u8 pf_num  = (reg & GL_MDET_TX_PQM_PF_NUM_M) >> GL_MDET_TX_PQM_PF_NUM_S;
8192		u16 vf_num = (reg & GL_MDET_TX_PQM_VF_NUM_M) >> GL_MDET_TX_PQM_VF_NUM_S;
8193		u8 event   = (reg & GL_MDET_TX_PQM_MAL_TYPE_M) >> GL_MDET_TX_PQM_MAL_TYPE_S;
8194		u16 queue  = (reg & GL_MDET_TX_PQM_QNUM_M) >> GL_MDET_TX_PQM_QNUM_S;
8195
8196		device_printf(dev, "Malicious Driver Detection Tx Quanta check event '%s' on Tx queue %u PF# %u VF# %u\n",
8197			      ice_mdd_tx_pqm_str(event), queue, pf_num, vf_num);
8198
8199		/* Only clear this event if it matches this PF, that way other
8200		 * PFs can read the event and determine VF and queue number.
8201		 */
8202		if (pf_num == hw->pf_id)
8203			wr32(hw, GL_MDET_TX_PQM, 0xffffffff);
8204
8205		mdd_detected = true;
8206	}
8207
8208	reg = rd32(hw, GL_MDET_RX);
8209	if (reg & GL_MDET_RX_VALID_M) {
8210		u8 pf_num  = (reg & GL_MDET_RX_PF_NUM_M) >> GL_MDET_RX_PF_NUM_S;
8211		u16 vf_num = (reg & GL_MDET_RX_VF_NUM_M) >> GL_MDET_RX_VF_NUM_S;
8212		u8 event   = (reg & GL_MDET_RX_MAL_TYPE_M) >> GL_MDET_RX_MAL_TYPE_S;
8213		u16 queue  = (reg & GL_MDET_RX_QNUM_M) >> GL_MDET_RX_QNUM_S;
8214
8215		device_printf(dev, "Malicious Driver Detection Rx event '%s' on Rx queue %u PF# %u VF# %u\n",
8216			      ice_mdd_rx_str(event), queue, pf_num, vf_num);
8217
8218		/* Only clear this event if it matches this PF, that way other
8219		 * PFs can read the event and determine VF and queue number.
8220		 */
8221		if (pf_num == hw->pf_id)
8222			wr32(hw, GL_MDET_RX, 0xffffffff);
8223
8224		mdd_detected = true;
8225	}
8226
8227	/* Now, confirm that this event actually affects this PF, by checking
8228	 * the PF registers.
8229	 */
8230	if (mdd_detected) {
8231		reg = rd32(hw, PF_MDET_TX_TCLAN);
8232		if (reg & PF_MDET_TX_TCLAN_VALID_M) {
8233			wr32(hw, PF_MDET_TX_TCLAN, 0xffff);
8234			sc->soft_stats.tx_mdd_count++;
8235			request_reinit = true;
8236		}
8237
8238		reg = rd32(hw, PF_MDET_TX_PQM);
8239		if (reg & PF_MDET_TX_PQM_VALID_M) {
8240			wr32(hw, PF_MDET_TX_PQM, 0xffff);
8241			sc->soft_stats.tx_mdd_count++;
8242			request_reinit = true;
8243		}
8244
8245		reg = rd32(hw, PF_MDET_RX);
8246		if (reg & PF_MDET_RX_VALID_M) {
8247			wr32(hw, PF_MDET_RX, 0xffff);
8248			sc->soft_stats.rx_mdd_count++;
8249			request_reinit = true;
8250		}
8251	}
8252
8253	/* TODO: Implement logic to detect and handle events caused by VFs. */
8254
8255	/* request that the upper stack re-initialize the Tx/Rx queues */
8256	if (request_reinit)
8257		ice_request_stack_reinit(sc);
8258
8259	ice_flush(hw);
8260}
8261
8262/**
8263 * ice_start_dcbx_agent - Start DCBX agent in FW via AQ command
8264 * @sc: the device softc
8265 *
8266 * @pre device is DCB capable and the FW LLDP agent has started
8267 *
8268 * Checks DCBX status and starts the DCBX agent if it is not in
8269 * a valid state via an AQ command.
8270 */
8271static void
8272ice_start_dcbx_agent(struct ice_softc *sc)
8273{
8274	struct ice_hw *hw = &sc->hw;
8275	device_t dev = sc->dev;
8276	bool dcbx_agent_status;
8277	enum ice_status status;
8278
8279	hw->port_info->qos_cfg.dcbx_status = ice_get_dcbx_status(hw);
8280
8281	if (hw->port_info->qos_cfg.dcbx_status != ICE_DCBX_STATUS_DONE &&
8282	    hw->port_info->qos_cfg.dcbx_status != ICE_DCBX_STATUS_IN_PROGRESS) {
8283		/*
8284		 * Start DCBX agent, but not LLDP. The return value isn't
8285		 * checked here because a more detailed dcbx agent status is
8286		 * retrieved and checked in ice_init_dcb() and elsewhere.
8287		 */
8288		status = ice_aq_start_stop_dcbx(hw, true, &dcbx_agent_status, NULL);
8289		if (status && hw->adminq.sq_last_status != ICE_AQ_RC_EPERM)
8290			device_printf(dev,
8291			    "start_stop_dcbx failed, err %s aq_err %s\n",
8292			    ice_status_str(status),
8293			    ice_aq_str(hw->adminq.sq_last_status));
8294	}
8295}
8296
8297/**
8298 * ice_init_dcb_setup - Initialize DCB settings for HW
8299 * @sc: the device softc
8300 *
8301 * This needs to be called after the fw_lldp_agent sysctl is added, since that
8302 * can update the device's LLDP agent status if a tunable value is set.
8303 *
8304 * Get and store the initial state of DCB settings on driver load. Print out
8305 * informational messages as well.
8306 */
8307void
8308ice_init_dcb_setup(struct ice_softc *sc)
8309{
8310	struct ice_dcbx_cfg *local_dcbx_cfg;
8311	struct ice_hw *hw = &sc->hw;
8312	device_t dev = sc->dev;
8313	enum ice_status status;
8314	u8 pfcmode_ret;
8315
8316	/* Don't do anything if DCB isn't supported */
8317	if (!ice_is_bit_set(sc->feat_cap, ICE_FEATURE_DCB)) {
8318		device_printf(dev, "%s: No DCB support\n", __func__);
8319		return;
8320	}
8321
8322	/* Starts DCBX agent if it needs starting */
8323	ice_start_dcbx_agent(sc);
8324
8325	/* This sets hw->port_info->qos_cfg.is_sw_lldp */
8326	status = ice_init_dcb(hw, true);
8327
8328	/* If there is an error, then FW LLDP is not in a usable state */
8329	if (status != 0 && status != ICE_ERR_NOT_READY) {
8330		/* Don't print an error message if the return code from the AQ
8331		 * cmd performed in ice_init_dcb() is EPERM; that means the
8332		 * FW LLDP engine is disabled, and that is a valid state.
8333		 */
8334		if (!(status == ICE_ERR_AQ_ERROR &&
8335		      hw->adminq.sq_last_status == ICE_AQ_RC_EPERM)) {
8336			device_printf(dev, "DCB init failed, err %s aq_err %s\n",
8337				      ice_status_str(status),
8338				      ice_aq_str(hw->adminq.sq_last_status));
8339		}
8340		hw->port_info->qos_cfg.dcbx_status = ICE_DCBX_STATUS_NOT_STARTED;
8341	}
8342
8343	switch (hw->port_info->qos_cfg.dcbx_status) {
8344	case ICE_DCBX_STATUS_DIS:
8345		ice_debug(hw, ICE_DBG_DCB, "DCBX disabled\n");
8346		break;
8347	case ICE_DCBX_STATUS_NOT_STARTED:
8348		ice_debug(hw, ICE_DBG_DCB, "DCBX not started\n");
8349		break;
8350	case ICE_DCBX_STATUS_MULTIPLE_PEERS:
8351		ice_debug(hw, ICE_DBG_DCB, "DCBX detected multiple peers\n");
8352		break;
8353	default:
8354		break;
8355	}
8356
8357	/* LLDP disabled in FW */
8358	if (hw->port_info->qos_cfg.is_sw_lldp) {
8359		ice_add_rx_lldp_filter(sc);
8360		device_printf(dev, "Firmware LLDP agent disabled\n");
8361	}
8362
8363	/* Query and cache PFC mode */
8364	status = ice_aq_query_pfc_mode(hw, &pfcmode_ret, NULL);
8365	if (status) {
8366		device_printf(dev, "PFC mode query failed, err %s aq_err %s\n",
8367			      ice_status_str(status),
8368			      ice_aq_str(hw->adminq.sq_last_status));
8369	}
8370	local_dcbx_cfg = &hw->port_info->qos_cfg.local_dcbx_cfg;
8371	switch (pfcmode_ret) {
8372	case ICE_AQC_PFC_VLAN_BASED_PFC:
8373		local_dcbx_cfg->pfc_mode = ICE_QOS_MODE_VLAN;
8374		break;
8375	case ICE_AQC_PFC_DSCP_BASED_PFC:
8376		local_dcbx_cfg->pfc_mode = ICE_QOS_MODE_DSCP;
8377		break;
8378	default:
8379		/* DCB is disabled, but we shouldn't get here */
8380		break;
8381	}
8382
8383	/* Set default SW MIB for init */
8384	ice_set_default_local_mib_settings(sc);
8385
8386	ice_set_bit(ICE_FEATURE_DCB, sc->feat_en);
8387}
8388
8389/**
8390 * ice_dcb_get_tc_map - Scans config to get bitmap of enabled TCs
8391 * @dcbcfg: DCB configuration to examine
8392 *
8393 * Scans a TC mapping table inside dcbcfg to find traffic classes
8394 * enabled and @returns a bitmask of enabled TCs
8395 */
8396u8
8397ice_dcb_get_tc_map(const struct ice_dcbx_cfg *dcbcfg)
8398{
8399	u8 tc_map = 0;
8400	int i = 0;
8401
8402	switch (dcbcfg->pfc_mode) {
8403	case ICE_QOS_MODE_VLAN:
8404		/* XXX: "i" is actually "User Priority" here, not
8405		 * Traffic Class, but the max for both is 8, so it works
8406		 * out here.
8407		 */
8408		for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
8409			tc_map |= BIT(dcbcfg->etscfg.prio_table[i]);
8410		break;
8411	case ICE_QOS_MODE_DSCP:
8412		for (i = 0; i < ICE_DSCP_NUM_VAL; i++)
8413			tc_map |= BIT(dcbcfg->dscp_map[i]);
8414		break;
8415	default:
8416		/* Invalid Mode */
8417		tc_map = ICE_DFLT_TRAFFIC_CLASS;
8418		break;
8419	}
8420
8421	return (tc_map);
8422}
8423
8424/**
8425 * ice_dcb_get_num_tc - Get the number of TCs from DCBX config
8426 * @dcbcfg: config to retrieve number of TCs from
8427 *
8428 * @return number of contiguous TCs found in dcbcfg's ETS Configuration
8429 * Priority Assignment Table, a value from 1 to 8. If there are
8430 * non-contiguous TCs used (e.g. assigning 1 and 3 without using 2),
8431 * then returns 0.
8432 */
8433static u8
8434ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg)
8435{
8436	u8 tc_map;
8437
8438	tc_map = ice_dcb_get_tc_map(dcbcfg);
8439
8440	return (ice_dcb_tc_contig(tc_map));
8441}
8442
8443/**
8444 * ice_debug_print_mib_change_event - helper function to log LLDP MIB change events
8445 * @sc: the device private softc
8446 * @event: event received on a control queue
8447 *
8448 * Prints out the type and contents of an LLDP MIB change event in a DCB debug message.
8449 */
8450static void
8451ice_debug_print_mib_change_event(struct ice_softc *sc, struct ice_rq_event_info *event)
8452{
8453	struct ice_aqc_lldp_get_mib *params =
8454	    (struct ice_aqc_lldp_get_mib *)&event->desc.params.lldp_get_mib;
8455	u8 mib_type, bridge_type, tx_status;
8456
8457	static const char* mib_type_strings[] = {
8458	    "Local MIB",
8459	    "Remote MIB",
8460	    "Reserved",
8461	    "Reserved"
8462	};
8463	static const char* bridge_type_strings[] = {
8464	    "Nearest Bridge",
8465	    "Non-TPMR Bridge",
8466	    "Reserved",
8467	    "Reserved"
8468	};
8469	static const char* tx_status_strings[] = {
8470	    "Port's TX active",
8471	    "Port's TX suspended and drained",
8472	    "Reserved",
8473	    "Port's TX suspended and drained; blocked TC pipe flushed"
8474	};
8475
8476	mib_type = (params->type & ICE_AQ_LLDP_MIB_TYPE_M) >>
8477	    ICE_AQ_LLDP_MIB_TYPE_S;
8478	bridge_type = (params->type & ICE_AQ_LLDP_BRID_TYPE_M) >>
8479	    ICE_AQ_LLDP_BRID_TYPE_S;
8480	tx_status = (params->type & ICE_AQ_LLDP_TX_M) >>
8481	    ICE_AQ_LLDP_TX_S;
8482
8483	ice_debug(&sc->hw, ICE_DBG_DCB, "LLDP MIB Change Event (%s, %s, %s)\n",
8484	    mib_type_strings[mib_type], bridge_type_strings[bridge_type],
8485	    tx_status_strings[tx_status]);
8486
8487	/* Nothing else to report */
8488	if (!event->msg_buf)
8489		return;
8490
8491	ice_debug(&sc->hw, ICE_DBG_DCB, "- %s contents:\n", mib_type_strings[mib_type]);
8492	ice_debug_array(&sc->hw, ICE_DBG_DCB, 16, 1, event->msg_buf,
8493			event->msg_len);
8494}
8495
8496/**
8497 * ice_dcb_needs_reconfig - Returns true if driver needs to reconfigure
8498 * @sc: the device private softc
8499 * @old_cfg: Old DCBX configuration to compare against
8500 * @new_cfg: New DCBX configuration to check
8501 *
8502 * @return true if something changed in new_cfg that requires the driver
8503 * to do some reconfiguration.
8504 */
8505static bool
8506ice_dcb_needs_reconfig(struct ice_softc *sc, struct ice_dcbx_cfg *old_cfg,
8507    struct ice_dcbx_cfg *new_cfg)
8508{
8509	struct ice_hw *hw = &sc->hw;
8510	bool needs_reconfig = false;
8511
8512	/* No change detected in DCBX config */
8513	if (!memcmp(old_cfg, new_cfg, sizeof(*old_cfg))) {
8514		ice_debug(hw, ICE_DBG_DCB,
8515		    "No change detected in local DCBX configuration\n");
8516		return (false);
8517	}
8518
8519	/* Check if ETS config has changed */
8520	if (memcmp(&new_cfg->etscfg, &old_cfg->etscfg,
8521		   sizeof(new_cfg->etscfg))) {
8522		/* If Priority Table has changed, then driver reconfig is needed */
8523		if (memcmp(&new_cfg->etscfg.prio_table,
8524			   &old_cfg->etscfg.prio_table,
8525			   sizeof(new_cfg->etscfg.prio_table))) {
8526			ice_debug(hw, ICE_DBG_DCB, "ETS UP2TC changed\n");
8527			needs_reconfig = true;
8528		}
8529
8530		/* These are just informational */
8531		if (memcmp(&new_cfg->etscfg.tcbwtable,
8532			   &old_cfg->etscfg.tcbwtable,
8533			   sizeof(new_cfg->etscfg.tcbwtable))) {
8534			ice_debug(hw, ICE_DBG_DCB, "ETS TCBW table changed\n");
8535			needs_reconfig = true;
8536		}
8537
8538		if (memcmp(&new_cfg->etscfg.tsatable,
8539			   &old_cfg->etscfg.tsatable,
8540			   sizeof(new_cfg->etscfg.tsatable))) {
8541			ice_debug(hw, ICE_DBG_DCB, "ETS TSA table changed\n");
8542			needs_reconfig = true;
8543		}
8544	}
8545
8546	/* Check if PFC config has changed */
8547	if (memcmp(&new_cfg->pfc, &old_cfg->pfc, sizeof(new_cfg->pfc))) {
8548		ice_debug(hw, ICE_DBG_DCB, "PFC config changed\n");
8549		needs_reconfig = true;
8550	}
8551
8552	/* Check if APP table has changed */
8553	if (memcmp(&new_cfg->app, &old_cfg->app, sizeof(new_cfg->app)))
8554		ice_debug(hw, ICE_DBG_DCB, "APP Table changed\n");
8555
8556	ice_debug(hw, ICE_DBG_DCB, "%s result: %d\n", __func__, needs_reconfig);
8557
8558	return (needs_reconfig);
8559}
8560
8561/**
8562 * ice_stop_pf_vsi - Stop queues for PF LAN VSI
8563 * @sc: the device private softc
8564 *
8565 * Flushes interrupts and stops the queues associated with the PF LAN VSI.
8566 */
8567static void
8568ice_stop_pf_vsi(struct ice_softc *sc)
8569{
8570	/* Dissociate the Tx and Rx queues from the interrupts */
8571	ice_flush_txq_interrupts(&sc->pf_vsi);
8572	ice_flush_rxq_interrupts(&sc->pf_vsi);
8573
8574	if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
8575		return;
8576
8577	/* Disable the Tx and Rx queues */
8578	ice_vsi_disable_tx(&sc->pf_vsi);
8579	ice_control_all_rx_queues(&sc->pf_vsi, false);
8580}
8581
8582/**
8583 * ice_vsi_setup_q_map - Setup a VSI queue map
8584 * @vsi: the VSI being configured
8585 * @ctxt: VSI context structure
8586 */
8587static void
8588ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
8589{
8590	u16 qcounts[ICE_MAX_TRAFFIC_CLASS] = {};
8591	u16 offset = 0, qmap = 0, pow = 0;
8592	u16 num_q_per_tc, qcount_rx, rem_queues;
8593	int i, j, k;
8594
8595	if (vsi->num_tcs == 0) {
8596		/* at least TC0 should be enabled by default */
8597		vsi->num_tcs = 1;
8598		vsi->tc_map = 0x1;
8599	}
8600
8601	qcount_rx = vsi->num_rx_queues;
8602	num_q_per_tc = min(qcount_rx / vsi->num_tcs, ICE_MAX_RXQS_PER_TC);
8603
8604	if (!num_q_per_tc)
8605		num_q_per_tc = 1;
8606
8607	/* Set initial values for # of queues to use for each active TC */
8608	ice_for_each_traffic_class(i)
8609		if (i < vsi->num_tcs)
8610			qcounts[i] = num_q_per_tc;
8611
8612	/* If any queues are unassigned, add them to TC 0 */
8613	rem_queues = qcount_rx % vsi->num_tcs;
8614	if (rem_queues > 0)
8615		qcounts[0] += rem_queues;
8616
8617	/* TC mapping is a function of the number of Rx queues assigned to the
8618	 * VSI for each traffic class and the offset of these queues.
8619	 * The first 10 bits are for queue offset for TC0, next 4 bits for no:of
8620	 * queues allocated to TC0. No:of queues is a power-of-2.
8621	 *
8622	 * If TC is not enabled, the queue offset is set to 0, and allocate one
8623	 * queue, this way, traffic for the given TC will be sent to the default
8624	 * queue.
8625	 *
8626	 * Setup number and offset of Rx queues for all TCs for the VSI
8627	 */
8628	ice_for_each_traffic_class(i) {
8629		if (!(vsi->tc_map & BIT(i))) {
8630			/* TC is not enabled */
8631			vsi->tc_info[i].qoffset = 0;
8632			vsi->tc_info[i].qcount_rx = 1;
8633			vsi->tc_info[i].qcount_tx = 1;
8634
8635			ctxt->info.tc_mapping[i] = 0;
8636			continue;
8637		}
8638
8639		/* TC is enabled */
8640		vsi->tc_info[i].qoffset = offset;
8641		vsi->tc_info[i].qcount_rx = qcounts[i];
8642		vsi->tc_info[i].qcount_tx = qcounts[i];
8643
8644		/* find the (rounded up) log-2 of queue count for current TC */
8645		pow = fls(qcounts[i] - 1);
8646
8647		qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) &
8648			ICE_AQ_VSI_TC_Q_OFFSET_M) |
8649			((pow << ICE_AQ_VSI_TC_Q_NUM_S) &
8650			 ICE_AQ_VSI_TC_Q_NUM_M);
8651		ctxt->info.tc_mapping[i] = CPU_TO_LE16(qmap);
8652
8653		/* Store traffic class and handle data in queue structures */
8654		for (j = offset, k = 0; j < offset + qcounts[i]; j++, k++) {
8655			vsi->tx_queues[j].q_handle = k;
8656			vsi->tx_queues[j].tc = i;
8657
8658			vsi->rx_queues[j].tc = i;
8659		}
8660
8661		offset += qcounts[i];
8662	}
8663
8664	/* Rx queue mapping */
8665	ctxt->info.mapping_flags |= CPU_TO_LE16(ICE_AQ_VSI_Q_MAP_CONTIG);
8666	ctxt->info.q_mapping[0] = CPU_TO_LE16(vsi->rx_qmap[0]);
8667	ctxt->info.q_mapping[1] = CPU_TO_LE16(vsi->num_rx_queues);
8668}
8669
8670/**
8671 * ice_pf_vsi_cfg_tc - Configure PF VSI for a given TC map
8672 * @sc: the device private softc
8673 * @tc_map: traffic class bitmap
8674 *
8675 * @pre VSI queues are stopped
8676 *
8677 * @return 0 if configuration is successful
8678 * @return EIO if Update VSI AQ cmd fails
8679 * @return ENODEV if updating Tx Scheduler fails
8680 */
8681static int
8682ice_pf_vsi_cfg_tc(struct ice_softc *sc, u8 tc_map)
8683{
8684	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
8685	struct ice_vsi *vsi = &sc->pf_vsi;
8686	struct ice_hw *hw = &sc->hw;
8687	struct ice_vsi_ctx ctx = { 0 };
8688	device_t dev = sc->dev;
8689	enum ice_status status;
8690	u8 num_tcs = 0;
8691	int i = 0;
8692
8693	/* Count the number of enabled Traffic Classes */
8694	ice_for_each_traffic_class(i)
8695		if (tc_map & BIT(i))
8696			num_tcs++;
8697
8698	vsi->tc_map = tc_map;
8699	vsi->num_tcs = num_tcs;
8700
8701	/* Set default parameters for context */
8702	ctx.vf_num = 0;
8703	ctx.info = vsi->info;
8704
8705	/* Setup queue map */
8706	ice_vsi_setup_q_map(vsi, &ctx);
8707
8708	/* Update VSI configuration in firmware (RX queues) */
8709	ctx.info.valid_sections = CPU_TO_LE16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
8710	status = ice_update_vsi(hw, vsi->idx, &ctx, NULL);
8711	if (status) {
8712		device_printf(dev,
8713		    "%s: Update VSI AQ call failed, err %s aq_err %s\n",
8714		    __func__, ice_status_str(status),
8715		    ice_aq_str(hw->adminq.sq_last_status));
8716		return (EIO);
8717	}
8718	vsi->info = ctx.info;
8719
8720	/* Use values derived in ice_vsi_setup_q_map() */
8721	for (i = 0; i < num_tcs; i++)
8722		max_txqs[i] = vsi->tc_info[i].qcount_tx;
8723
8724	if (hw->debug_mask & ICE_DBG_DCB) {
8725		device_printf(dev, "%s: max_txqs:", __func__);
8726		ice_for_each_traffic_class(i)
8727			printf(" %d", max_txqs[i]);
8728		printf("\n");
8729	}
8730
8731	/* Update LAN Tx queue info in firmware */
8732	status = ice_cfg_vsi_lan(hw->port_info, vsi->idx, vsi->tc_map,
8733				 max_txqs);
8734	if (status) {
8735		device_printf(dev,
8736		    "%s: Failed VSI lan queue config, err %s aq_err %s\n",
8737		    __func__, ice_status_str(status),
8738		    ice_aq_str(hw->adminq.sq_last_status));
8739		return (ENODEV);
8740	}
8741
8742	vsi->info.valid_sections = 0;
8743
8744	return (0);
8745}
8746
8747/**
8748 * ice_dcb_tc_contig - Count TCs if they're contiguous
8749 * @tc_map: pointer to priority table
8750 *
8751 * @return The number of traffic classes in
8752 * an 8-bit TC bitmap, or if there is a gap, then returns 0.
8753 */
8754static u8
8755ice_dcb_tc_contig(u8 tc_map)
8756{
8757	bool tc_unused = false;
8758	u8 ret = 0;
8759
8760	/* Scan bitmask for contiguous TCs starting with TC0 */
8761	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
8762		if (tc_map & BIT(i)) {
8763			if (!tc_unused) {
8764				ret++;
8765			} else {
8766				/* Non-contiguous TCs detected */
8767				return (0);
8768			}
8769		} else
8770			tc_unused = true;
8771	}
8772
8773	return (ret);
8774}
8775
8776/**
8777 * ice_dcb_recfg - Reconfigure VSI with new DCB settings
8778 * @sc: the device private softc
8779 *
8780 * @pre All VSIs have been disabled/stopped
8781 *
8782 * Reconfigures VSI settings based on local_dcbx_cfg.
8783 */
8784static void
8785ice_dcb_recfg(struct ice_softc *sc)
8786{
8787	struct ice_dcbx_cfg *dcbcfg =
8788	    &sc->hw.port_info->qos_cfg.local_dcbx_cfg;
8789	device_t dev = sc->dev;
8790	u8 tc_map = 0;
8791	int ret;
8792
8793	tc_map = ice_dcb_get_tc_map(dcbcfg);
8794
8795	/* If non-contiguous TCs are used, then configure
8796	 * the default TC instead. There's no support for
8797	 * non-contiguous TCs being used.
8798	 */
8799	if (ice_dcb_tc_contig(tc_map) == 0) {
8800		tc_map = ICE_DFLT_TRAFFIC_CLASS;
8801		ice_set_default_local_lldp_mib(sc);
8802	}
8803
8804	/* Reconfigure VSI queues to add/remove traffic classes */
8805	ret = ice_pf_vsi_cfg_tc(sc, tc_map);
8806	if (ret)
8807		device_printf(dev,
8808		    "Failed to configure TCs for PF VSI, err %s\n",
8809		    ice_err_str(ret));
8810
8811}
8812
8813/**
8814 * ice_set_default_local_mib_settings - Set Local LLDP MIB to default settings
8815 * @sc: device softc structure
8816 *
8817 * Overwrites the driver's SW local LLDP MIB with default settings. This
8818 * ensures the driver has a valid MIB when it next uses the Set Local LLDP MIB
8819 * admin queue command.
8820 */
8821static void
8822ice_set_default_local_mib_settings(struct ice_softc *sc)
8823{
8824	struct ice_dcbx_cfg *dcbcfg;
8825	struct ice_hw *hw = &sc->hw;
8826	struct ice_port_info *pi;
8827	u8 maxtcs, maxtcs_ets, old_pfc_mode;
8828
8829	pi = hw->port_info;
8830
8831	dcbcfg = &pi->qos_cfg.local_dcbx_cfg;
8832
8833	maxtcs = hw->func_caps.common_cap.maxtc;
8834	/* This value is only 3 bits; 8 TCs maps to 0 */
8835	maxtcs_ets = maxtcs & ICE_IEEE_ETS_MAXTC_M;
8836
8837	/* VLAN vs DSCP mode needs to be preserved */
8838	old_pfc_mode = dcbcfg->pfc_mode;
8839
8840	/**
8841	 * Setup the default settings used by the driver for the Set Local
8842	 * LLDP MIB Admin Queue command (0x0A08). (1TC w/ 100% BW, ETS, no
8843	 * PFC, TSA=2).
8844	 */
8845	memset(dcbcfg, 0, sizeof(*dcbcfg));
8846
8847	dcbcfg->etscfg.willing = 1;
8848	dcbcfg->etscfg.tcbwtable[0] = 100;
8849	dcbcfg->etscfg.maxtcs = maxtcs_ets;
8850	dcbcfg->etscfg.tsatable[0] = 2;
8851
8852	dcbcfg->etsrec = dcbcfg->etscfg;
8853	dcbcfg->etsrec.willing = 0;
8854
8855	dcbcfg->pfc.willing = 1;
8856	dcbcfg->pfc.pfccap = maxtcs;
8857
8858	dcbcfg->pfc_mode = old_pfc_mode;
8859}
8860
8861/**
8862 * ice_do_dcb_reconfig - notify RDMA and reconfigure PF LAN VSI
8863 * @sc: the device private softc
8864 * @pending_mib: FW has a pending MIB change to execute
8865 *
8866 * @pre Determined that the DCB configuration requires a change
8867 *
8868 * Reconfigures the PF LAN VSI based on updated DCB configuration
8869 * found in the hw struct's/port_info's/ local dcbx configuration.
8870 */
8871void
8872ice_do_dcb_reconfig(struct ice_softc *sc, bool pending_mib)
8873{
8874	struct ice_aqc_port_ets_elem port_ets = { 0 };
8875	struct ice_dcbx_cfg *local_dcbx_cfg;
8876	struct ice_hw *hw = &sc->hw;
8877	struct ice_port_info *pi;
8878	device_t dev = sc->dev;
8879	enum ice_status status;
8880
8881	pi = sc->hw.port_info;
8882	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
8883
8884	ice_rdma_notify_dcb_qos_change(sc);
8885	/* If there's a pending MIB, tell the FW to execute the MIB change
8886	 * now.
8887	 */
8888	if (pending_mib) {
8889		status = ice_lldp_execute_pending_mib(hw);
8890		if ((status == ICE_ERR_AQ_ERROR) &&
8891		    (hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT)) {
8892			device_printf(dev,
8893			    "Execute Pending LLDP MIB AQ call failed, no pending MIB\n");
8894		} else if (status) {
8895			device_printf(dev,
8896			    "Execute Pending LLDP MIB AQ call failed, err %s aq_err %s\n",
8897			    ice_status_str(status),
8898			    ice_aq_str(hw->adminq.sq_last_status));
8899			/* This won't break traffic, but QoS will not work as expected */
8900		}
8901	}
8902
8903	/* Set state when there's more than one TC */
8904	if (ice_dcb_get_num_tc(local_dcbx_cfg) > 1) {
8905		device_printf(dev, "Multiple traffic classes enabled\n");
8906		ice_set_state(&sc->state, ICE_STATE_MULTIPLE_TCS);
8907	} else {
8908		device_printf(dev, "Multiple traffic classes disabled\n");
8909		ice_clear_state(&sc->state, ICE_STATE_MULTIPLE_TCS);
8910	}
8911
8912	/* Disable PF VSI since it's going to be reconfigured */
8913	ice_stop_pf_vsi(sc);
8914
8915	/* Query ETS configuration and update SW Tx scheduler info */
8916	status = ice_query_port_ets(pi, &port_ets, sizeof(port_ets), NULL);
8917	if (status != ICE_SUCCESS) {
8918		device_printf(dev,
8919		    "Query Port ETS AQ call failed, err %s aq_err %s\n",
8920		    ice_status_str(status),
8921		    ice_aq_str(hw->adminq.sq_last_status));
8922		/* This won't break traffic, but QoS will not work as expected */
8923	}
8924
8925	/* Change PF VSI configuration */
8926	ice_dcb_recfg(sc);
8927
8928	/* Send new configuration to RDMA client driver */
8929	ice_rdma_dcb_qos_update(sc, pi);
8930
8931	ice_request_stack_reinit(sc);
8932}
8933
8934/**
8935 * ice_handle_mib_change_event - helper function to handle LLDP MIB change events
8936 * @sc: the device private softc
8937 * @event: event received on a control queue
8938 *
8939 * Checks the updated MIB it receives and possibly reconfigures the PF LAN
8940 * VSI depending on what has changed. This will also print out some debug
8941 * information about the MIB event if ICE_DBG_DCB is enabled in the debug_mask.
8942 */
8943static void
8944ice_handle_mib_change_event(struct ice_softc *sc, struct ice_rq_event_info *event)
8945{
8946	struct ice_aqc_lldp_get_mib *params =
8947	    (struct ice_aqc_lldp_get_mib *)&event->desc.params.lldp_get_mib;
8948	struct ice_dcbx_cfg tmp_dcbx_cfg, *local_dcbx_cfg;
8949	struct ice_port_info *pi;
8950	device_t dev = sc->dev;
8951	struct ice_hw *hw = &sc->hw;
8952	bool needs_reconfig, mib_is_pending;
8953	enum ice_status status;
8954	u8 mib_type, bridge_type;
8955
8956	ASSERT_CFG_LOCKED(sc);
8957
8958	ice_debug_print_mib_change_event(sc, event);
8959
8960	pi = sc->hw.port_info;
8961
8962	mib_type = (params->type & ICE_AQ_LLDP_MIB_TYPE_M) >>
8963	    ICE_AQ_LLDP_MIB_TYPE_S;
8964	bridge_type = (params->type & ICE_AQ_LLDP_BRID_TYPE_M) >>
8965	    ICE_AQ_LLDP_BRID_TYPE_S;
8966	mib_is_pending = (params->state & ICE_AQ_LLDP_MIB_CHANGE_STATE_M) >>
8967	    ICE_AQ_LLDP_MIB_CHANGE_STATE_S;
8968
8969	/* Ignore if event is not for Nearest Bridge */
8970	if (bridge_type != ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID)
8971		return;
8972
8973	/* Check MIB Type and return if event for Remote MIB update */
8974	if (mib_type == ICE_AQ_LLDP_MIB_REMOTE) {
8975		/* Update the cached remote MIB and return */
8976		status = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE,
8977					 ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID,
8978					 &pi->qos_cfg.remote_dcbx_cfg);
8979		if (status)
8980			device_printf(dev,
8981			    "%s: Failed to get Remote DCB config; status %s, aq_err %s\n",
8982			    __func__, ice_status_str(status),
8983			    ice_aq_str(hw->adminq.sq_last_status));
8984		/* Not fatal if this fails */
8985		return;
8986	}
8987
8988	/* Save line length by aliasing the local dcbx cfg */
8989	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
8990	/* Save off the old configuration and clear current config */
8991	tmp_dcbx_cfg = *local_dcbx_cfg;
8992	memset(local_dcbx_cfg, 0, sizeof(*local_dcbx_cfg));
8993
8994	/* Update the current local_dcbx_cfg with new data */
8995	if (mib_is_pending) {
8996		ice_get_dcb_cfg_from_mib_change(pi, event);
8997	} else {
8998		/* Get updated DCBX data from firmware */
8999		status = ice_get_dcb_cfg(pi);
9000		if (status) {
9001			device_printf(dev,
9002			    "%s: Failed to get Local DCB config; status %s, aq_err %s\n",
9003			    __func__, ice_status_str(status),
9004			    ice_aq_str(hw->adminq.sq_last_status));
9005			return;
9006		}
9007	}
9008
9009	/* Check to see if DCB needs reconfiguring */
9010	needs_reconfig = ice_dcb_needs_reconfig(sc, &tmp_dcbx_cfg,
9011	    local_dcbx_cfg);
9012
9013	if (!needs_reconfig && !mib_is_pending)
9014		return;
9015
9016	/* Reconfigure -- this will also notify FW that configuration is done,
9017	 * if the FW MIB change is only pending instead of executed.
9018	 */
9019	ice_do_dcb_reconfig(sc, mib_is_pending);
9020}
9021
9022/**
9023 * ice_send_version - Send driver version to firmware
9024 * @sc: the device private softc
9025 *
9026 * Send the driver version to the firmware. This must be called as early as
9027 * possible after ice_init_hw().
9028 */
9029int
9030ice_send_version(struct ice_softc *sc)
9031{
9032	struct ice_driver_ver driver_version = {0};
9033	struct ice_hw *hw = &sc->hw;
9034	device_t dev = sc->dev;
9035	enum ice_status status;
9036
9037	driver_version.major_ver = ice_major_version;
9038	driver_version.minor_ver = ice_minor_version;
9039	driver_version.build_ver = ice_patch_version;
9040	driver_version.subbuild_ver = ice_rc_version;
9041
9042	strlcpy((char *)driver_version.driver_string, ice_driver_version,
9043		sizeof(driver_version.driver_string));
9044
9045	status = ice_aq_send_driver_ver(hw, &driver_version, NULL);
9046	if (status) {
9047		device_printf(dev, "Unable to send driver version to firmware, err %s aq_err %s\n",
9048			      ice_status_str(status), ice_aq_str(hw->adminq.sq_last_status));
9049		return (EIO);
9050	}
9051
9052	return (0);
9053}
9054
9055/**
9056 * ice_handle_lan_overflow_event - helper function to log LAN overflow events
9057 * @sc: device softc
9058 * @event: event received on a control queue
9059 *
9060 * Prints out a message when a LAN overflow event is detected on a receive
9061 * queue.
9062 */
9063static void
9064ice_handle_lan_overflow_event(struct ice_softc *sc, struct ice_rq_event_info *event)
9065{
9066	struct ice_aqc_event_lan_overflow *params =
9067	    (struct ice_aqc_event_lan_overflow *)&event->desc.params.lan_overflow;
9068	struct ice_hw *hw = &sc->hw;
9069
9070	ice_debug(hw, ICE_DBG_DCB, "LAN overflow event detected, prtdcb_ruptq=0x%08x, qtx_ctl=0x%08x\n",
9071		  LE32_TO_CPU(params->prtdcb_ruptq),
9072		  LE32_TO_CPU(params->qtx_ctl));
9073}
9074
9075/**
9076 * ice_add_ethertype_to_list - Add an Ethertype filter to a filter list
9077 * @vsi: the VSI to target packets to
9078 * @list: the list to add the filter to
9079 * @ethertype: the Ethertype to filter on
9080 * @direction: The direction of the filter (Tx or Rx)
9081 * @action: the action to take
9082 *
9083 * Add an Ethertype filter to a filter list. Used to forward a series of
9084 * filters to the firmware for configuring the switch.
9085 *
9086 * Returns 0 on success, and an error code on failure.
9087 */
9088static int
9089ice_add_ethertype_to_list(struct ice_vsi *vsi, struct ice_list_head *list,
9090			  u16 ethertype, u16 direction,
9091			  enum ice_sw_fwd_act_type action)
9092{
9093	struct ice_fltr_list_entry *entry;
9094
9095	MPASS((direction == ICE_FLTR_TX) || (direction == ICE_FLTR_RX));
9096
9097	entry = (__typeof(entry))malloc(sizeof(*entry), M_ICE, M_NOWAIT|M_ZERO);
9098	if (!entry)
9099		return (ENOMEM);
9100
9101	entry->fltr_info.flag = direction;
9102	entry->fltr_info.src_id = ICE_SRC_ID_VSI;
9103	entry->fltr_info.lkup_type = ICE_SW_LKUP_ETHERTYPE;
9104	entry->fltr_info.fltr_act = action;
9105	entry->fltr_info.vsi_handle = vsi->idx;
9106	entry->fltr_info.l_data.ethertype_mac.ethertype = ethertype;
9107
9108	LIST_ADD(&entry->list_entry, list);
9109
9110	return 0;
9111}
9112
9113#define ETHERTYPE_PAUSE_FRAMES 0x8808
9114#define ETHERTYPE_LLDP_FRAMES 0x88cc
9115
9116/**
9117 * ice_cfg_pf_ethertype_filters - Configure switch to drop ethertypes
9118 * @sc: the device private softc
9119 *
9120 * Configure the switch to drop PAUSE frames and LLDP frames transmitted from
9121 * the host. This prevents malicious VFs from sending these frames and being
9122 * able to control or configure the network.
9123 */
9124int
9125ice_cfg_pf_ethertype_filters(struct ice_softc *sc)
9126{
9127	struct ice_list_head ethertype_list;
9128	struct ice_vsi *vsi = &sc->pf_vsi;
9129	struct ice_hw *hw = &sc->hw;
9130	device_t dev = sc->dev;
9131	enum ice_status status;
9132	int err = 0;
9133
9134	INIT_LIST_HEAD(&ethertype_list);
9135
9136	/*
9137	 * Note that the switch filters will ignore the VSI index for the drop
9138	 * action, so we only need to program drop filters once for the main
9139	 * VSI.
9140	 */
9141
9142	/* Configure switch to drop all Tx pause frames coming from any VSI. */
9143	if (sc->enable_tx_fc_filter) {
9144		err = ice_add_ethertype_to_list(vsi, &ethertype_list,
9145						ETHERTYPE_PAUSE_FRAMES,
9146						ICE_FLTR_TX, ICE_DROP_PACKET);
9147		if (err)
9148			goto free_ethertype_list;
9149	}
9150
9151	/* Configure switch to drop LLDP frames coming from any VSI */
9152	if (sc->enable_tx_lldp_filter) {
9153		err = ice_add_ethertype_to_list(vsi, &ethertype_list,
9154						ETHERTYPE_LLDP_FRAMES,
9155						ICE_FLTR_TX, ICE_DROP_PACKET);
9156		if (err)
9157			goto free_ethertype_list;
9158	}
9159
9160	status = ice_add_eth_mac(hw, &ethertype_list);
9161	if (status) {
9162		device_printf(dev,
9163			      "Failed to add Tx Ethertype filters, err %s aq_err %s\n",
9164			      ice_status_str(status),
9165			      ice_aq_str(hw->adminq.sq_last_status));
9166		err = (EIO);
9167	}
9168
9169free_ethertype_list:
9170	ice_free_fltr_list(&ethertype_list);
9171	return err;
9172}
9173
9174/**
9175 * ice_add_rx_lldp_filter - add ethertype filter for Rx LLDP frames
9176 * @sc: the device private structure
9177 *
9178 * Add a switch ethertype filter which forwards the LLDP frames to the main PF
9179 * VSI. Called when the fw_lldp_agent is disabled, to allow the LLDP frames to
9180 * be forwarded to the stack.
9181 */
9182void
9183ice_add_rx_lldp_filter(struct ice_softc *sc)
9184{
9185	struct ice_list_head ethertype_list;
9186	struct ice_vsi *vsi = &sc->pf_vsi;
9187	struct ice_hw *hw = &sc->hw;
9188	device_t dev = sc->dev;
9189	enum ice_status status;
9190	int err;
9191	u16 vsi_num;
9192
9193	/*
9194	 * If FW is new enough, use a direct AQ command to perform the filter
9195	 * addition.
9196	 */
9197	if (ice_fw_supports_lldp_fltr_ctrl(hw)) {
9198		vsi_num = ice_get_hw_vsi_num(hw, vsi->idx);
9199		status = ice_lldp_fltr_add_remove(hw, vsi_num, true);
9200		if (status) {
9201			device_printf(dev,
9202			    "Failed to add Rx LLDP filter, err %s aq_err %s\n",
9203			    ice_status_str(status),
9204			    ice_aq_str(hw->adminq.sq_last_status));
9205		} else
9206			ice_set_state(&sc->state,
9207			    ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER);
9208		return;
9209	}
9210
9211	INIT_LIST_HEAD(&ethertype_list);
9212
9213	/* Forward Rx LLDP frames to the stack */
9214	err = ice_add_ethertype_to_list(vsi, &ethertype_list,
9215					ETHERTYPE_LLDP_FRAMES,
9216					ICE_FLTR_RX, ICE_FWD_TO_VSI);
9217	if (err) {
9218		device_printf(dev,
9219			      "Failed to add Rx LLDP filter, err %s\n",
9220			      ice_err_str(err));
9221		goto free_ethertype_list;
9222	}
9223
9224	status = ice_add_eth_mac(hw, &ethertype_list);
9225	if (status && status != ICE_ERR_ALREADY_EXISTS) {
9226		device_printf(dev,
9227			      "Failed to add Rx LLDP filter, err %s aq_err %s\n",
9228			      ice_status_str(status),
9229			      ice_aq_str(hw->adminq.sq_last_status));
9230	} else {
9231		/*
9232		 * If status == ICE_ERR_ALREADY_EXISTS, we won't treat an
9233		 * already existing filter as an error case.
9234		 */
9235		ice_set_state(&sc->state, ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER);
9236	}
9237
9238free_ethertype_list:
9239	ice_free_fltr_list(&ethertype_list);
9240}
9241
9242/**
9243 * ice_del_rx_lldp_filter - Remove ethertype filter for Rx LLDP frames
9244 * @sc: the device private structure
9245 *
9246 * Remove the switch filter forwarding LLDP frames to the main PF VSI, called
9247 * when the firmware LLDP agent is enabled, to stop routing LLDP frames to the
9248 * stack.
9249 */
9250static void
9251ice_del_rx_lldp_filter(struct ice_softc *sc)
9252{
9253	struct ice_list_head ethertype_list;
9254	struct ice_vsi *vsi = &sc->pf_vsi;
9255	struct ice_hw *hw = &sc->hw;
9256	device_t dev = sc->dev;
9257	enum ice_status status;
9258	int err;
9259	u16 vsi_num;
9260
9261	/*
9262	 * Only in the scenario where the driver added the filter during
9263	 * this session (while the driver was loaded) would we be able to
9264	 * delete this filter.
9265	 */
9266	if (!ice_test_state(&sc->state, ICE_STATE_LLDP_RX_FLTR_FROM_DRIVER))
9267		return;
9268
9269	/*
9270	 * If FW is new enough, use a direct AQ command to perform the filter
9271	 * removal.
9272	 */
9273	if (ice_fw_supports_lldp_fltr_ctrl(hw)) {
9274		vsi_num = ice_get_hw_vsi_num(hw, vsi->idx);
9275		status = ice_lldp_fltr_add_remove(hw, vsi_num, false);
9276		if (status) {
9277			device_printf(dev,
9278			    "Failed to remove Rx LLDP filter, err %s aq_err %s\n",
9279			    ice_status_str(status),
9280			    ice_aq_str(hw->adminq.sq_last_status));
9281		}
9282		return;
9283	}
9284
9285	INIT_LIST_HEAD(&ethertype_list);
9286
9287	/* Remove filter forwarding Rx LLDP frames to the stack */
9288	err = ice_add_ethertype_to_list(vsi, &ethertype_list,
9289					ETHERTYPE_LLDP_FRAMES,
9290					ICE_FLTR_RX, ICE_FWD_TO_VSI);
9291	if (err) {
9292		device_printf(dev,
9293			      "Failed to remove Rx LLDP filter, err %s\n",
9294			      ice_err_str(err));
9295		goto free_ethertype_list;
9296	}
9297
9298	status = ice_remove_eth_mac(hw, &ethertype_list);
9299	if (status == ICE_ERR_DOES_NOT_EXIST) {
9300		; /* Don't complain if we try to remove a filter that doesn't exist */
9301	} else if (status) {
9302		device_printf(dev,
9303			      "Failed to remove Rx LLDP filter, err %s aq_err %s\n",
9304			      ice_status_str(status),
9305			      ice_aq_str(hw->adminq.sq_last_status));
9306	}
9307
9308free_ethertype_list:
9309	ice_free_fltr_list(&ethertype_list);
9310}
9311
9312/**
9313 * ice_init_link_configuration -- Setup link in different ways depending
9314 * on whether media is available or not.
9315 * @sc: device private structure
9316 *
9317 * Called at the end of the attach process to either set default link
9318 * parameters if there is media available, or force HW link down and
9319 * set a state bit if there is no media.
9320 */
9321void
9322ice_init_link_configuration(struct ice_softc *sc)
9323{
9324	struct ice_port_info *pi = sc->hw.port_info;
9325	struct ice_hw *hw = &sc->hw;
9326	device_t dev = sc->dev;
9327	enum ice_status status;
9328
9329	pi->phy.get_link_info = true;
9330	status = ice_get_link_status(pi, &sc->link_up);
9331	if (status != ICE_SUCCESS) {
9332		device_printf(dev,
9333		    "%s: ice_get_link_status failed; status %s, aq_err %s\n",
9334		    __func__, ice_status_str(status),
9335		    ice_aq_str(hw->adminq.sq_last_status));
9336		return;
9337	}
9338
9339	if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
9340		ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA);
9341		/* Apply default link settings */
9342		if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN)) {
9343			ice_set_link(sc, false);
9344			ice_set_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED);
9345		} else
9346			ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC);
9347	} else {
9348		 /* Set link down, and poll for media available in timer. This prevents the
9349		  * driver from receiving spurious link-related events.
9350		  */
9351		ice_set_state(&sc->state, ICE_STATE_NO_MEDIA);
9352		status = ice_aq_set_link_restart_an(pi, false, NULL);
9353		if (status != ICE_SUCCESS && hw->adminq.sq_last_status != ICE_AQ_RC_EMODE)
9354			device_printf(dev,
9355			    "%s: ice_aq_set_link_restart_an: status %s, aq_err %s\n",
9356			    __func__, ice_status_str(status),
9357			    ice_aq_str(hw->adminq.sq_last_status));
9358	}
9359}
9360
9361/**
9362 * ice_apply_saved_phy_req_to_cfg -- Write saved user PHY settings to cfg data
9363 * @sc: device private structure
9364 * @cfg: new PHY config data to be modified
9365 *
9366 * Applies user settings for advertised speeds to the PHY type fields in the
9367 * supplied PHY config struct. It uses the data from pcaps to check if the
9368 * saved settings are invalid and uses the pcaps data instead if they are
9369 * invalid.
9370 */
9371static int
9372ice_apply_saved_phy_req_to_cfg(struct ice_softc *sc,
9373			       struct ice_aqc_set_phy_cfg_data *cfg)
9374{
9375	struct ice_phy_data phy_data = { 0 };
9376	struct ice_port_info *pi = sc->hw.port_info;
9377	u64 phy_low = 0, phy_high = 0;
9378	u16 link_speeds;
9379	int ret;
9380
9381	link_speeds = pi->phy.curr_user_speed_req;
9382
9383	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LINK_MGMT_VER_2)) {
9384		memset(&phy_data, 0, sizeof(phy_data));
9385		phy_data.report_mode = ICE_AQC_REPORT_DFLT_CFG;
9386		phy_data.user_speeds_orig = link_speeds;
9387		ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
9388		if (ret != 0) {
9389			/* Error message already printed within function */
9390			return (ret);
9391		}
9392		phy_low = phy_data.phy_low_intr;
9393		phy_high = phy_data.phy_high_intr;
9394
9395		if (link_speeds == 0 || phy_data.user_speeds_intr)
9396			goto finalize_link_speed;
9397		if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE)) {
9398			memset(&phy_data, 0, sizeof(phy_data));
9399			phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA;
9400			phy_data.user_speeds_orig = link_speeds;
9401			ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
9402			if (ret != 0) {
9403				/* Error message already printed within function */
9404				return (ret);
9405			}
9406			phy_low = phy_data.phy_low_intr;
9407			phy_high = phy_data.phy_high_intr;
9408
9409			if (!phy_data.user_speeds_intr) {
9410				phy_low = phy_data.phy_low_orig;
9411				phy_high = phy_data.phy_high_orig;
9412			}
9413			goto finalize_link_speed;
9414		}
9415		/* If we're here, then it means the benefits of Version 2
9416		 * link management aren't utilized.  We fall through to
9417		 * handling Strict Link Mode the same as Version 1 link
9418		 * management.
9419		 */
9420	}
9421
9422	memset(&phy_data, 0, sizeof(phy_data));
9423	if ((link_speeds == 0) &&
9424	    (sc->ldo_tlv.phy_type_low || sc->ldo_tlv.phy_type_high))
9425		phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA;
9426	else
9427		phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_MEDIA;
9428	phy_data.user_speeds_orig = link_speeds;
9429	ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
9430	if (ret != 0) {
9431		/* Error message already printed within function */
9432		return (ret);
9433	}
9434	phy_low = phy_data.phy_low_intr;
9435	phy_high = phy_data.phy_high_intr;
9436
9437	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE)) {
9438		if (phy_low == 0 && phy_high == 0) {
9439			device_printf(sc->dev,
9440			    "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n");
9441			return (EINVAL);
9442		}
9443	} else {
9444		if (link_speeds == 0) {
9445			if (sc->ldo_tlv.phy_type_low & phy_low ||
9446			    sc->ldo_tlv.phy_type_high & phy_high) {
9447				phy_low &= sc->ldo_tlv.phy_type_low;
9448				phy_high &= sc->ldo_tlv.phy_type_high;
9449			}
9450		} else if (phy_low == 0 && phy_high == 0) {
9451			memset(&phy_data, 0, sizeof(phy_data));
9452			phy_data.report_mode = ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA;
9453			phy_data.user_speeds_orig = link_speeds;
9454			ret = ice_intersect_phy_types_and_speeds(sc, &phy_data);
9455			if (ret != 0) {
9456				/* Error message already printed within function */
9457				return (ret);
9458			}
9459			phy_low = phy_data.phy_low_intr;
9460			phy_high = phy_data.phy_high_intr;
9461
9462			if (!phy_data.user_speeds_intr) {
9463				phy_low = phy_data.phy_low_orig;
9464				phy_high = phy_data.phy_high_orig;
9465			}
9466		}
9467	}
9468
9469finalize_link_speed:
9470
9471	/* Cache new user settings for speeds */
9472	pi->phy.curr_user_speed_req = phy_data.user_speeds_intr;
9473	cfg->phy_type_low = htole64(phy_low);
9474	cfg->phy_type_high = htole64(phy_high);
9475
9476	return (ret);
9477}
9478
9479/**
9480 * ice_apply_saved_fec_req_to_cfg -- Write saved user FEC mode to cfg data
9481 * @sc: device private structure
9482 * @cfg: new PHY config data to be modified
9483 *
9484 * Applies user setting for FEC mode to PHY config struct. It uses the data
9485 * from pcaps to check if the saved settings are invalid and uses the pcaps
9486 * data instead if they are invalid.
9487 */
9488static int
9489ice_apply_saved_fec_req_to_cfg(struct ice_softc *sc,
9490			       struct ice_aqc_set_phy_cfg_data *cfg)
9491{
9492	struct ice_port_info *pi = sc->hw.port_info;
9493	enum ice_status status;
9494
9495	cfg->caps &= ~ICE_AQC_PHY_EN_AUTO_FEC;
9496	status = ice_cfg_phy_fec(pi, cfg, pi->phy.curr_user_fec_req);
9497	if (status)
9498		return (EIO);
9499
9500	return (0);
9501}
9502
9503/**
9504 * ice_apply_saved_fc_req_to_cfg -- Write saved user flow control mode to cfg data
9505 * @pi: port info struct
9506 * @cfg: new PHY config data to be modified
9507 *
9508 * Applies user setting for flow control mode to PHY config struct. There are
9509 * no invalid flow control mode settings; if there are, then this function
9510 * treats them like "ICE_FC_NONE".
9511 */
9512static void
9513ice_apply_saved_fc_req_to_cfg(struct ice_port_info *pi,
9514			      struct ice_aqc_set_phy_cfg_data *cfg)
9515{
9516	cfg->caps &= ~(ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY |
9517		       ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY);
9518
9519	switch (pi->phy.curr_user_fc_req) {
9520	case ICE_FC_FULL:
9521		cfg->caps |= ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY |
9522			     ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY;
9523		break;
9524	case ICE_FC_RX_PAUSE:
9525		cfg->caps |= ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY;
9526		break;
9527	case ICE_FC_TX_PAUSE:
9528		cfg->caps |= ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY;
9529		break;
9530	default:
9531		/* ICE_FC_NONE */
9532		break;
9533	}
9534}
9535
9536/**
9537 * ice_apply_saved_phy_cfg -- Re-apply user PHY config settings
9538 * @sc: device private structure
9539 * @settings: which settings to apply
9540 *
9541 * Applies user settings for advertised speeds, FEC mode, and flow
9542 * control mode to a PHY config struct; it uses the data from pcaps
9543 * to check if the saved settings are invalid and uses the pcaps
9544 * data instead if they are invalid.
9545 *
9546 * For things like sysctls where only one setting needs to be
9547 * updated, the bitmap allows the caller to specify which setting
9548 * to update.
9549 */
9550int
9551ice_apply_saved_phy_cfg(struct ice_softc *sc, u8 settings)
9552{
9553	struct ice_aqc_set_phy_cfg_data cfg = { 0 };
9554	struct ice_port_info *pi = sc->hw.port_info;
9555	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
9556	struct ice_hw *hw = &sc->hw;
9557	device_t dev = sc->dev;
9558	u64 phy_low, phy_high;
9559	enum ice_status status;
9560	enum ice_fec_mode dflt_fec_mode;
9561	u16 dflt_user_speed;
9562
9563	if (!settings || settings > ICE_APPLY_LS_FEC_FC) {
9564		ice_debug(hw, ICE_DBG_LINK, "Settings out-of-bounds: %u\n",
9565		    settings);
9566	}
9567
9568	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
9569				     &pcaps, NULL);
9570	if (status != ICE_SUCCESS) {
9571		device_printf(dev,
9572		    "%s: ice_aq_get_phy_caps (ACTIVE) failed; status %s, aq_err %s\n",
9573		    __func__, ice_status_str(status),
9574		    ice_aq_str(hw->adminq.sq_last_status));
9575		return (EIO);
9576	}
9577
9578	phy_low = le64toh(pcaps.phy_type_low);
9579	phy_high = le64toh(pcaps.phy_type_high);
9580
9581	/* Save off initial config parameters */
9582	dflt_user_speed = ice_aq_phy_types_to_link_speeds(phy_low, phy_high);
9583	dflt_fec_mode = ice_caps_to_fec_mode(pcaps.caps, pcaps.link_fec_options);
9584
9585	/* Setup new PHY config */
9586	ice_copy_phy_caps_to_cfg(pi, &pcaps, &cfg);
9587
9588	/* On error, restore active configuration values */
9589	if ((settings & ICE_APPLY_LS) &&
9590	    ice_apply_saved_phy_req_to_cfg(sc, &cfg)) {
9591		pi->phy.curr_user_speed_req = dflt_user_speed;
9592		cfg.phy_type_low = pcaps.phy_type_low;
9593		cfg.phy_type_high = pcaps.phy_type_high;
9594	}
9595	if ((settings & ICE_APPLY_FEC) &&
9596	    ice_apply_saved_fec_req_to_cfg(sc, &cfg)) {
9597		pi->phy.curr_user_fec_req = dflt_fec_mode;
9598	}
9599	if (settings & ICE_APPLY_FC) {
9600		/* No real error indicators for this process,
9601		 * so we'll just have to assume it works. */
9602		ice_apply_saved_fc_req_to_cfg(pi, &cfg);
9603	}
9604
9605	/* Enable link and re-negotiate it */
9606	cfg.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT | ICE_AQ_PHY_ENA_LINK;
9607
9608	status = ice_aq_set_phy_cfg(hw, pi, &cfg, NULL);
9609	if (status != ICE_SUCCESS) {
9610		/* Don't indicate failure if there's no media in the port.
9611		 * The settings have been saved and will apply when media
9612		 * is inserted.
9613		 */
9614		if ((status == ICE_ERR_AQ_ERROR) &&
9615		    (hw->adminq.sq_last_status == ICE_AQ_RC_EBUSY)) {
9616			device_printf(dev,
9617			    "%s: Setting will be applied when media is inserted\n",
9618			    __func__);
9619			return (0);
9620		} else {
9621			device_printf(dev,
9622			    "%s: ice_aq_set_phy_cfg failed; status %s, aq_err %s\n",
9623			    __func__, ice_status_str(status),
9624			    ice_aq_str(hw->adminq.sq_last_status));
9625			return (EIO);
9626		}
9627	}
9628
9629	return (0);
9630}
9631
9632/**
9633 * ice_print_ldo_tlv - Print out LDO TLV information
9634 * @sc: device private structure
9635 * @tlv: LDO TLV information from the adapter NVM
9636 *
9637 * Dump out the information in tlv to the kernel message buffer; intended for
9638 * debugging purposes.
9639 */
9640static void
9641ice_print_ldo_tlv(struct ice_softc *sc, struct ice_link_default_override_tlv *tlv)
9642{
9643	device_t dev = sc->dev;
9644
9645	device_printf(dev, "TLV: -options     0x%02x\n", tlv->options);
9646	device_printf(dev, "     -phy_config  0x%02x\n", tlv->phy_config);
9647	device_printf(dev, "     -fec_options 0x%02x\n", tlv->fec_options);
9648	device_printf(dev, "     -phy_high    0x%016llx\n",
9649	    (unsigned long long)tlv->phy_type_high);
9650	device_printf(dev, "     -phy_low     0x%016llx\n",
9651	    (unsigned long long)tlv->phy_type_low);
9652}
9653
9654/**
9655 * ice_set_link_management_mode -- Strict or lenient link management
9656 * @sc: device private structure
9657 *
9658 * Some NVMs give the adapter the option to advertise a superset of link
9659 * configurations.  This checks to see if that option is enabled.
9660 * Further, the NVM could also provide a specific set of configurations
9661 * to try; these are cached in the driver's private structure if they
9662 * are available.
9663 */
9664void
9665ice_set_link_management_mode(struct ice_softc *sc)
9666{
9667	struct ice_port_info *pi = sc->hw.port_info;
9668	device_t dev = sc->dev;
9669	struct ice_link_default_override_tlv tlv = { 0 };
9670	enum ice_status status;
9671
9672	/* Port must be in strict mode if FW version is below a certain
9673	 * version. (i.e. Don't set lenient mode features)
9674	 */
9675	if (!(ice_fw_supports_link_override(&sc->hw)))
9676		return;
9677
9678	status = ice_get_link_default_override(&tlv, pi);
9679	if (status != ICE_SUCCESS) {
9680		device_printf(dev,
9681		    "%s: ice_get_link_default_override failed; status %s, aq_err %s\n",
9682		    __func__, ice_status_str(status),
9683		    ice_aq_str(sc->hw.adminq.sq_last_status));
9684		return;
9685	}
9686
9687	if (sc->hw.debug_mask & ICE_DBG_LINK)
9688		ice_print_ldo_tlv(sc, &tlv);
9689
9690	/* Set lenient link mode */
9691	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_LENIENT_LINK_MODE) &&
9692	    (!(tlv.options & ICE_LINK_OVERRIDE_STRICT_MODE)))
9693		ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_en);
9694
9695	/* FW supports reporting a default configuration */
9696	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_LINK_MGMT_VER_2) &&
9697	    ice_fw_supports_report_dflt_cfg(&sc->hw)) {
9698		ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_2, sc->feat_en);
9699		/* Knowing we're at a high enough firmware revision to
9700		 * support this link management configuration, we don't
9701		 * need to check/support earlier versions.
9702		 */
9703		return;
9704	}
9705
9706	/* Default overrides only work if in lenient link mode */
9707	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_LINK_MGMT_VER_1) &&
9708	    ice_is_bit_set(sc->feat_en, ICE_FEATURE_LENIENT_LINK_MODE) &&
9709	    (tlv.options & ICE_LINK_OVERRIDE_EN))
9710		ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_1, sc->feat_en);
9711
9712	/* Cache the LDO TLV structure in the driver, since it
9713	 * won't change during the driver's lifetime.
9714	 */
9715	sc->ldo_tlv = tlv;
9716}
9717
9718/**
9719 * ice_set_link -- Set up/down link on phy
9720 * @sc: device private structure
9721 * @enabled: link status to set up
9722 *
9723 * This should be called when change of link status is needed.
9724 */
9725void
9726ice_set_link(struct ice_softc *sc, bool enabled)
9727{
9728	struct ice_hw *hw = &sc->hw;
9729	device_t dev = sc->dev;
9730	enum ice_status status;
9731
9732	if (ice_driver_is_detaching(sc))
9733		return;
9734
9735	if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA))
9736		return;
9737
9738	if (enabled)
9739		ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC);
9740	else {
9741		status = ice_aq_set_link_restart_an(hw->port_info, false, NULL);
9742		if (status != ICE_SUCCESS) {
9743			if (hw->adminq.sq_last_status == ICE_AQ_RC_EMODE)
9744				device_printf(dev,
9745				    "%s: Link control not enabled in current device mode\n",
9746				    __func__);
9747			else
9748				device_printf(dev,
9749				    "%s: ice_aq_set_link_restart_an: status %s, aq_err %s\n",
9750				    __func__, ice_status_str(status),
9751				    ice_aq_str(hw->adminq.sq_last_status));
9752		} else
9753			sc->link_up = false;
9754	}
9755}
9756
9757/**
9758 * ice_init_saved_phy_cfg -- Set cached user PHY cfg settings with NVM defaults
9759 * @sc: device private structure
9760 *
9761 * This should be called before the tunables for these link settings
9762 * (e.g. advertise_speed) are added -- so that these defaults don't overwrite
9763 * the cached values that the sysctl handlers will write.
9764 *
9765 * This also needs to be called before ice_init_link_configuration, to ensure
9766 * that there are sane values that can be written if there is media available
9767 * in the port.
9768 */
9769void
9770ice_init_saved_phy_cfg(struct ice_softc *sc)
9771{
9772	struct ice_port_info *pi = sc->hw.port_info;
9773	struct ice_aqc_get_phy_caps_data pcaps = { 0 };
9774	struct ice_hw *hw = &sc->hw;
9775	device_t dev = sc->dev;
9776	enum ice_status status;
9777	u64 phy_low, phy_high;
9778	u8 report_mode = ICE_AQC_REPORT_TOPO_CAP_MEDIA;
9779
9780	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_LINK_MGMT_VER_2))
9781		report_mode = ICE_AQC_REPORT_DFLT_CFG;
9782	status = ice_aq_get_phy_caps(pi, false, report_mode, &pcaps, NULL);
9783	if (status != ICE_SUCCESS) {
9784		device_printf(dev,
9785		    "%s: ice_aq_get_phy_caps (%s) failed; status %s, aq_err %s\n",
9786		    __func__,
9787		    report_mode == ICE_AQC_REPORT_DFLT_CFG ? "DFLT" : "w/MEDIA",
9788		    ice_status_str(status),
9789		    ice_aq_str(hw->adminq.sq_last_status));
9790		return;
9791	}
9792
9793	phy_low = le64toh(pcaps.phy_type_low);
9794	phy_high = le64toh(pcaps.phy_type_high);
9795
9796	/* Save off initial config parameters */
9797	pi->phy.curr_user_speed_req =
9798	   ice_aq_phy_types_to_link_speeds(phy_low, phy_high);
9799	pi->phy.curr_user_fec_req = ice_caps_to_fec_mode(pcaps.caps,
9800	    pcaps.link_fec_options);
9801	pi->phy.curr_user_fc_req = ice_caps_to_fc_mode(pcaps.caps);
9802}
9803
9804/**
9805 * ice_module_init - Driver callback to handle module load
9806 *
9807 * Callback for handling module load events. This function should initialize
9808 * any data structures that are used for the life of the device driver.
9809 */
9810static int
9811ice_module_init(void)
9812{
9813	ice_rdma_init();
9814	return (0);
9815}
9816
9817/**
9818 * ice_module_exit - Driver callback to handle module exit
9819 *
9820 * Callback for handling module unload events. This function should release
9821 * any resources initialized during ice_module_init.
9822 *
9823 * If this function returns non-zero, the module will not be unloaded. It
9824 * should only return such a value if the module cannot be unloaded at all,
9825 * such as due to outstanding memory references that cannot be revoked.
9826 */
9827static int
9828ice_module_exit(void)
9829{
9830	ice_rdma_exit();
9831	return (0);
9832}
9833
9834/**
9835 * ice_module_event_handler - Callback for module events
9836 * @mod: unused module_t parameter
9837 * @what: the event requested
9838 * @arg: unused event argument
9839 *
9840 * Callback used to handle module events from the stack. Used to allow the
9841 * driver to define custom behavior that should happen at module load and
9842 * unload.
9843 */
9844int
9845ice_module_event_handler(module_t __unused mod, int what, void __unused *arg)
9846{
9847	switch (what) {
9848	case MOD_LOAD:
9849		return ice_module_init();
9850	case MOD_UNLOAD:
9851		return ice_module_exit();
9852	default:
9853		/* TODO: do we need to handle MOD_QUIESCE and MOD_SHUTDOWN? */
9854		return (EOPNOTSUPP);
9855	}
9856}
9857
9858/**
9859 * ice_handle_nvm_access_ioctl - Handle an NVM access ioctl request
9860 * @sc: the device private softc
9861 * @ifd: ifdrv ioctl request pointer
9862 */
9863int
9864ice_handle_nvm_access_ioctl(struct ice_softc *sc, struct ifdrv *ifd)
9865{
9866	union ice_nvm_access_data *data;
9867	struct ice_nvm_access_cmd *cmd;
9868	size_t ifd_len = ifd->ifd_len, malloc_len;
9869	struct ice_hw *hw = &sc->hw;
9870	device_t dev = sc->dev;
9871	enum ice_status status;
9872	u8 *nvm_buffer;
9873	int err;
9874
9875	/*
9876	 * ifioctl forwards SIOCxDRVSPEC to iflib without performing
9877	 * a privilege check. In turn, iflib forwards the ioctl to the driver
9878	 * without performing a privilege check. Perform one here to ensure
9879	 * that non-privileged threads cannot access this interface.
9880	 */
9881	err = priv_check(curthread, PRIV_DRIVER);
9882	if (err)
9883		return (err);
9884
9885	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
9886		device_printf(dev, "%s: Driver must rebuild data structures after a reset. Operation aborted.\n",
9887			      __func__);
9888		return (EBUSY);
9889	}
9890
9891	if (ifd_len < sizeof(struct ice_nvm_access_cmd)) {
9892		device_printf(dev, "%s: ifdrv length is too small. Got %zu, but expected %zu\n",
9893			      __func__, ifd_len, sizeof(struct ice_nvm_access_cmd));
9894		return (EINVAL);
9895	}
9896
9897	if (ifd->ifd_data == NULL) {
9898		device_printf(dev, "%s: ifd data buffer not present.\n",
9899			      __func__);
9900		return (EINVAL);
9901	}
9902
9903	/*
9904	 * If everything works correctly, ice_handle_nvm_access should not
9905	 * modify data past the size of the ioctl length. However, it could
9906	 * lead to memory corruption if it did. Make sure to allocate at least
9907	 * enough space for the command and data regardless. This
9908	 * ensures that any access to the data union will not access invalid
9909	 * memory.
9910	 */
9911	malloc_len = max(ifd_len, sizeof(*data) + sizeof(*cmd));
9912
9913	nvm_buffer = (u8 *)malloc(malloc_len, M_ICE, M_ZERO | M_WAITOK);
9914	if (!nvm_buffer)
9915		return (ENOMEM);
9916
9917	/* Copy the NVM access command and data in from user space */
9918	/* coverity[tainted_data_argument] */
9919	err = copyin(ifd->ifd_data, nvm_buffer, ifd_len);
9920	if (err) {
9921		device_printf(dev, "%s: Copying request from user space failed, err %s\n",
9922			      __func__, ice_err_str(err));
9923		goto cleanup_free_nvm_buffer;
9924	}
9925
9926	/*
9927	 * The NVM command structure is immediately followed by data which
9928	 * varies in size based on the command.
9929	 */
9930	cmd = (struct ice_nvm_access_cmd *)nvm_buffer;
9931	data = (union ice_nvm_access_data *)(nvm_buffer + sizeof(struct ice_nvm_access_cmd));
9932
9933	/* Handle the NVM access request */
9934	status = ice_handle_nvm_access(hw, cmd, data);
9935	if (status)
9936		ice_debug(hw, ICE_DBG_NVM,
9937			  "NVM access request failed, err %s\n",
9938			  ice_status_str(status));
9939
9940	/* Copy the possibly modified contents of the handled request out */
9941	err = copyout(nvm_buffer, ifd->ifd_data, ifd_len);
9942	if (err) {
9943		device_printf(dev, "%s: Copying response back to user space failed, err %s\n",
9944			      __func__, ice_err_str(err));
9945		goto cleanup_free_nvm_buffer;
9946	}
9947
9948	/* Convert private status to an error code for proper ioctl response */
9949	switch (status) {
9950	case ICE_SUCCESS:
9951		err = (0);
9952		break;
9953	case ICE_ERR_NO_MEMORY:
9954		err = (ENOMEM);
9955		break;
9956	case ICE_ERR_OUT_OF_RANGE:
9957		err = (ENOTTY);
9958		break;
9959	case ICE_ERR_PARAM:
9960	default:
9961		err = (EINVAL);
9962		break;
9963	}
9964
9965cleanup_free_nvm_buffer:
9966	free(nvm_buffer, M_ICE);
9967	return err;
9968}
9969
9970/**
9971 * ice_read_sff_eeprom - Read data from SFF eeprom
9972 * @sc: device softc
9973 * @dev_addr: I2C device address (typically 0xA0 or 0xA2)
9974 * @offset: offset into the eeprom
9975 * @data: pointer to data buffer to store read data in
9976 * @length: length to read; max length is 16
9977 *
9978 * Read from the SFF eeprom in the module for this PF's port. For more details
9979 * on the contents of an SFF eeprom, refer to SFF-8724 (SFP), SFF-8636 (QSFP),
9980 * and SFF-8024 (both).
9981 */
9982int
9983ice_read_sff_eeprom(struct ice_softc *sc, u16 dev_addr, u16 offset, u8* data, u16 length)
9984{
9985	struct ice_hw *hw = &sc->hw;
9986	int ret = 0, retries = 0;
9987	enum ice_status status;
9988
9989	if (length > 16)
9990		return (EINVAL);
9991
9992	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
9993		return (ENOSYS);
9994
9995	if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA))
9996		return (ENXIO);
9997
9998	do {
9999		status = ice_aq_sff_eeprom(hw, 0, dev_addr,
10000					   offset, 0, 0, data, length,
10001					   false, NULL);
10002		if (!status) {
10003			ret = 0;
10004			break;
10005		}
10006		if (status == ICE_ERR_AQ_ERROR &&
10007		    hw->adminq.sq_last_status == ICE_AQ_RC_EBUSY) {
10008			ret = EBUSY;
10009			continue;
10010		}
10011		if (status == ICE_ERR_AQ_ERROR &&
10012		    hw->adminq.sq_last_status == ICE_AQ_RC_EACCES) {
10013			/* FW says I2C access isn't supported */
10014			ret = EACCES;
10015			break;
10016		}
10017		if (status == ICE_ERR_AQ_ERROR &&
10018		    hw->adminq.sq_last_status == ICE_AQ_RC_EPERM) {
10019			device_printf(sc->dev,
10020				  "%s: Module pointer location specified in command does not permit the required operation.\n",
10021				  __func__);
10022			ret = EPERM;
10023			break;
10024		} else {
10025			device_printf(sc->dev,
10026				  "%s: Error reading I2C data: err %s aq_err %s\n",
10027				  __func__, ice_status_str(status),
10028				  ice_aq_str(hw->adminq.sq_last_status));
10029			ret = EIO;
10030			break;
10031		}
10032	} while (retries++ < ICE_I2C_MAX_RETRIES);
10033
10034	if (ret == EBUSY)
10035		device_printf(sc->dev,
10036			  "%s: Error reading I2C data after %d retries\n",
10037			  __func__, ICE_I2C_MAX_RETRIES);
10038
10039	return (ret);
10040}
10041
10042/**
10043 * ice_handle_i2c_req - Driver independent I2C request handler
10044 * @sc: device softc
10045 * @req: The I2C parameters to use
10046 *
10047 * Read from the port's I2C eeprom using the parameters from the ioctl.
10048 */
10049int
10050ice_handle_i2c_req(struct ice_softc *sc, struct ifi2creq *req)
10051{
10052	return ice_read_sff_eeprom(sc, req->dev_addr, req->offset, req->data, req->len);
10053}
10054
10055/**
10056 * ice_sysctl_read_i2c_diag_data - Read some module diagnostic data via i2c
10057 * @oidp: sysctl oid structure
10058 * @arg1: pointer to private data structure
10059 * @arg2: unused
10060 * @req: sysctl request pointer
10061 *
10062 * Read 8 bytes of diagnostic data from the SFF eeprom in the (Q)SFP module
10063 * inserted into the port.
10064 *
10065 *             | SFP A2  | QSFP Lower Page
10066 * ------------|---------|----------------
10067 * Temperature | 96-97	 | 22-23
10068 * Vcc         | 98-99   | 26-27
10069 * TX power    | 102-103 | 34-35..40-41
10070 * RX power    | 104-105 | 50-51..56-57
10071 */
10072static int
10073ice_sysctl_read_i2c_diag_data(SYSCTL_HANDLER_ARGS)
10074{
10075	struct ice_softc *sc = (struct ice_softc *)arg1;
10076	device_t dev = sc->dev;
10077	struct sbuf *sbuf;
10078	int ret;
10079	u8 data[16];
10080
10081	UNREFERENCED_PARAMETER(arg2);
10082	UNREFERENCED_PARAMETER(oidp);
10083
10084	if (ice_driver_is_detaching(sc))
10085		return (ESHUTDOWN);
10086
10087	if (req->oldptr == NULL) {
10088		ret = SYSCTL_OUT(req, 0, 128);
10089		return (ret);
10090	}
10091
10092	ret = ice_read_sff_eeprom(sc, 0xA0, 0, data, 1);
10093	if (ret)
10094		return (ret);
10095
10096	/* 0x3 for SFP; 0xD/0x11 for QSFP+/QSFP28 */
10097	if (data[0] == 0x3) {
10098		/*
10099		 * Check for:
10100		 * - Internally calibrated data
10101		 * - Diagnostic monitoring is implemented
10102		 */
10103		ice_read_sff_eeprom(sc, 0xA0, 92, data, 1);
10104		if (!(data[0] & 0x60)) {
10105			device_printf(dev, "Module doesn't support diagnostics: 0xA0[92] = %02X\n", data[0]);
10106			return (ENODEV);
10107		}
10108
10109		sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
10110
10111		ice_read_sff_eeprom(sc, 0xA2, 96, data, 4);
10112		for (int i = 0; i < 4; i++)
10113			sbuf_printf(sbuf, "%02X ", data[i]);
10114
10115		ice_read_sff_eeprom(sc, 0xA2, 102, data, 4);
10116		for (int i = 0; i < 4; i++)
10117			sbuf_printf(sbuf, "%02X ", data[i]);
10118	} else if (data[0] == 0xD || data[0] == 0x11) {
10119		/*
10120		 * QSFP+ modules are always internally calibrated, and must indicate
10121		 * what types of diagnostic monitoring are implemented
10122		 */
10123		sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
10124
10125		ice_read_sff_eeprom(sc, 0xA0, 22, data, 2);
10126		for (int i = 0; i < 2; i++)
10127			sbuf_printf(sbuf, "%02X ", data[i]);
10128
10129		ice_read_sff_eeprom(sc, 0xA0, 26, data, 2);
10130		for (int i = 0; i < 2; i++)
10131			sbuf_printf(sbuf, "%02X ", data[i]);
10132
10133		ice_read_sff_eeprom(sc, 0xA0, 34, data, 2);
10134		for (int i = 0; i < 2; i++)
10135			sbuf_printf(sbuf, "%02X ", data[i]);
10136
10137		ice_read_sff_eeprom(sc, 0xA0, 50, data, 2);
10138		for (int i = 0; i < 2; i++)
10139			sbuf_printf(sbuf, "%02X ", data[i]);
10140	} else {
10141		device_printf(dev, "Module is not SFP/SFP+/SFP28/QSFP+ (%02X)\n", data[0]);
10142		return (ENODEV);
10143	}
10144
10145	sbuf_finish(sbuf);
10146	sbuf_delete(sbuf);
10147
10148	return (0);
10149}
10150
10151/**
10152 * ice_alloc_intr_tracking - Setup interrupt tracking structures
10153 * @sc: device softc structure
10154 *
10155 * Sets up the resource manager for keeping track of interrupt allocations,
10156 * and initializes the tracking maps for the PF's interrupt allocations.
10157 *
10158 * Unlike the scheme for queues, this is done in one step since both the
10159 * manager and the maps both have the same lifetime.
10160 *
10161 * @returns 0 on success, or an error code on failure.
10162 */
10163int
10164ice_alloc_intr_tracking(struct ice_softc *sc)
10165{
10166	struct ice_hw *hw = &sc->hw;
10167	device_t dev = sc->dev;
10168	int err;
10169
10170	/* Initialize the interrupt allocation manager */
10171	err = ice_resmgr_init_contig_only(&sc->dev_imgr,
10172	    hw->func_caps.common_cap.num_msix_vectors);
10173	if (err) {
10174		device_printf(dev, "Unable to initialize PF interrupt manager: %s\n",
10175			      ice_err_str(err));
10176		return (err);
10177	}
10178
10179	/* Allocate PF interrupt mapping storage */
10180	if (!(sc->pf_imap =
10181	      (u16 *)malloc(sizeof(u16) * hw->func_caps.common_cap.num_msix_vectors,
10182	      M_ICE, M_NOWAIT))) {
10183		device_printf(dev, "Unable to allocate PF imap memory\n");
10184		err = ENOMEM;
10185		goto free_imgr;
10186	}
10187	if (!(sc->rdma_imap =
10188	      (u16 *)malloc(sizeof(u16) * hw->func_caps.common_cap.num_msix_vectors,
10189	      M_ICE, M_NOWAIT))) {
10190		device_printf(dev, "Unable to allocate RDMA imap memory\n");
10191		err = ENOMEM;
10192		free(sc->pf_imap, M_ICE);
10193		goto free_imgr;
10194	}
10195	for (u32 i = 0; i < hw->func_caps.common_cap.num_msix_vectors; i++) {
10196		sc->pf_imap[i] = ICE_INVALID_RES_IDX;
10197		sc->rdma_imap[i] = ICE_INVALID_RES_IDX;
10198	}
10199
10200	return (0);
10201
10202free_imgr:
10203	ice_resmgr_destroy(&sc->dev_imgr);
10204	return (err);
10205}
10206
10207/**
10208 * ice_free_intr_tracking - Free PF interrupt tracking structures
10209 * @sc: device softc structure
10210 *
10211 * Frees the interrupt resource allocation manager and the PF's owned maps.
10212 *
10213 * VF maps are released when the owning VF's are destroyed, which should always
10214 * happen before this function is called.
10215 */
10216void
10217ice_free_intr_tracking(struct ice_softc *sc)
10218{
10219	if (sc->pf_imap) {
10220		ice_resmgr_release_map(&sc->dev_imgr, sc->pf_imap,
10221				       sc->lan_vectors);
10222		free(sc->pf_imap, M_ICE);
10223		sc->pf_imap = NULL;
10224	}
10225	if (sc->rdma_imap) {
10226		ice_resmgr_release_map(&sc->dev_imgr, sc->rdma_imap,
10227				       sc->lan_vectors);
10228		free(sc->rdma_imap, M_ICE);
10229		sc->rdma_imap = NULL;
10230	}
10231
10232	ice_resmgr_destroy(&sc->dev_imgr);
10233
10234	ice_resmgr_destroy(&sc->os_imgr);
10235}
10236
10237/**
10238 * ice_apply_supported_speed_filter - Mask off unsupported speeds
10239 * @report_speeds: bit-field for the desired link speeds
10240 * @mod_type: type of module/sgmii connection we have
10241 *
10242 * Given a bitmap of the desired lenient mode link speeds,
10243 * this function will mask off the speeds that are not currently
10244 * supported by the device.
10245 */
10246static u16
10247ice_apply_supported_speed_filter(u16 report_speeds, u8 mod_type)
10248{
10249	u16 speed_mask;
10250	enum { IS_SGMII, IS_SFP, IS_QSFP } module;
10251
10252	/*
10253	 * The SFF specification says 0 is unknown, so we'll
10254	 * treat it like we're connected through SGMII for now.
10255	 * This may need revisiting if a new type is supported
10256	 * in the future.
10257	 */
10258	switch (mod_type) {
10259	case 0:
10260		module = IS_SGMII;
10261		break;
10262	case 3:
10263		module = IS_SFP;
10264		break;
10265	default:
10266		module = IS_QSFP;
10267		break;
10268	}
10269
10270	/* We won't offer anything lower than 100M for any part,
10271	 * but we'll need to mask off other speeds based on the
10272	 * device and module type.
10273	 */
10274	speed_mask = ~((u16)ICE_AQ_LINK_SPEED_100MB - 1);
10275	if ((report_speeds & ICE_AQ_LINK_SPEED_10GB) && (module == IS_SFP))
10276		speed_mask = ~((u16)ICE_AQ_LINK_SPEED_1000MB - 1);
10277	if (report_speeds & ICE_AQ_LINK_SPEED_25GB)
10278		speed_mask = ~((u16)ICE_AQ_LINK_SPEED_1000MB - 1);
10279	if (report_speeds & ICE_AQ_LINK_SPEED_50GB) {
10280		speed_mask = ~((u16)ICE_AQ_LINK_SPEED_1000MB - 1);
10281		if (module == IS_QSFP)
10282			speed_mask = ~((u16)ICE_AQ_LINK_SPEED_10GB - 1);
10283	}
10284	if (report_speeds & ICE_AQ_LINK_SPEED_100GB)
10285		speed_mask = ~((u16)ICE_AQ_LINK_SPEED_25GB - 1);
10286	return (report_speeds & speed_mask);
10287}
10288
10289/**
10290 * ice_init_health_events - Enable FW health event reporting
10291 * @sc: device softc
10292 *
10293 * Will try to enable firmware health event reporting, but shouldn't
10294 * cause any grief (to the caller) if this fails.
10295 */
10296void
10297ice_init_health_events(struct ice_softc *sc)
10298{
10299	enum ice_status status;
10300	u8 health_mask;
10301
10302	if ((!ice_is_bit_set(sc->feat_cap, ICE_FEATURE_HEALTH_STATUS)) ||
10303		(!sc->enable_health_events))
10304		return;
10305
10306	health_mask = ICE_AQC_HEALTH_STATUS_SET_PF_SPECIFIC_MASK |
10307		      ICE_AQC_HEALTH_STATUS_SET_GLOBAL_MASK;
10308
10309	status = ice_aq_set_health_status_config(&sc->hw, health_mask, NULL);
10310	if (status)
10311		device_printf(sc->dev,
10312		    "Failed to enable firmware health events, err %s aq_err %s\n",
10313		    ice_status_str(status),
10314		    ice_aq_str(sc->hw.adminq.sq_last_status));
10315	else
10316		ice_set_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_en);
10317}
10318
10319/**
10320 * ice_print_health_status_string - Print message for given FW health event
10321 * @dev: the PCIe device
10322 * @elem: health status element containing status code
10323 *
10324 * A rather large list of possible health status codes and their associated
10325 * messages.
10326 */
10327static void
10328ice_print_health_status_string(device_t dev,
10329			       struct ice_aqc_health_status_elem *elem)
10330{
10331	u16 status_code = le16toh(elem->health_status_code);
10332
10333	switch (status_code) {
10334	case ICE_AQC_HEALTH_STATUS_INFO_RECOVERY:
10335		device_printf(dev, "The device is in firmware recovery mode.\n");
10336		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10337		break;
10338	case ICE_AQC_HEALTH_STATUS_ERR_FLASH_ACCESS:
10339		device_printf(dev, "The flash chip cannot be accessed.\n");
10340		device_printf(dev, "Possible Solution: If issue persists, call customer support.\n");
10341		break;
10342	case ICE_AQC_HEALTH_STATUS_ERR_NVM_AUTH:
10343		device_printf(dev, "NVM authentication failed.\n");
10344		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10345		break;
10346	case ICE_AQC_HEALTH_STATUS_ERR_OROM_AUTH:
10347		device_printf(dev, "Option ROM authentication failed.\n");
10348		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10349		break;
10350	case ICE_AQC_HEALTH_STATUS_ERR_DDP_AUTH:
10351		device_printf(dev, "DDP package failed.\n");
10352		device_printf(dev, "Possible Solution: Update to latest base driver and DDP package.\n");
10353		break;
10354	case ICE_AQC_HEALTH_STATUS_ERR_NVM_COMPAT:
10355		device_printf(dev, "NVM image is incompatible.\n");
10356		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10357		break;
10358	case ICE_AQC_HEALTH_STATUS_ERR_OROM_COMPAT:
10359		device_printf(dev, "Option ROM is incompatible.\n");
10360		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10361		break;
10362	case ICE_AQC_HEALTH_STATUS_ERR_DCB_MIB:
10363		device_printf(dev, "Supplied MIB file is invalid. DCB reverted to default configuration.\n");
10364		device_printf(dev, "Possible Solution: Disable FW-LLDP and check DCBx system configuration.\n");
10365		break;
10366	case ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_STRICT:
10367		device_printf(dev, "An unsupported module was detected.\n");
10368		device_printf(dev, "Possible Solution 1: Check your cable connection.\n");
10369		device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n");
10370		break;
10371	case ICE_AQC_HEALTH_STATUS_ERR_MOD_TYPE:
10372		device_printf(dev, "Module type is not supported.\n");
10373		device_printf(dev, "Possible Solution: Change or replace the module or cable.\n");
10374		break;
10375	case ICE_AQC_HEALTH_STATUS_ERR_MOD_QUAL:
10376		device_printf(dev, "Module is not qualified.\n");
10377		device_printf(dev, "Possible Solution 1: Check your cable connection.\n");
10378		device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n");
10379		device_printf(dev, "Possible Solution 3: Manually set speed and duplex.\n");
10380		break;
10381	case ICE_AQC_HEALTH_STATUS_ERR_MOD_COMM:
10382		device_printf(dev, "Device cannot communicate with the module.\n");
10383		device_printf(dev, "Possible Solution 1: Check your cable connection.\n");
10384		device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n");
10385		device_printf(dev, "Possible Solution 3: Manually set speed and duplex.\n");
10386		break;
10387	case ICE_AQC_HEALTH_STATUS_ERR_MOD_CONFLICT:
10388		device_printf(dev, "Unresolved module conflict.\n");
10389		device_printf(dev, "Possible Solution 1: Manually set speed/duplex or use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10390		device_printf(dev, "Possible Solution 2: If the problem persists, use a cable/module that is found in the supported modules and cables list for this device.\n");
10391		break;
10392	case ICE_AQC_HEALTH_STATUS_ERR_MOD_NOT_PRESENT:
10393		device_printf(dev, "Module is not present.\n");
10394		device_printf(dev, "Possible Solution 1: Check that the module is inserted correctly.\n");
10395		device_printf(dev, "Possible Solution 2: If the problem persists, use a cable/module that is found in the supported modules and cables list for this device.\n");
10396		break;
10397	case ICE_AQC_HEALTH_STATUS_INFO_MOD_UNDERUTILIZED:
10398		device_printf(dev, "Underutilized module.\n");
10399		device_printf(dev, "Possible Solution 1: Change or replace the module or cable.\n");
10400		device_printf(dev, "Possible Solution 2: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10401		break;
10402	case ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_LENIENT:
10403		device_printf(dev, "An unsupported module was detected.\n");
10404		device_printf(dev, "Possible Solution 1: Check your cable connection.\n");
10405		device_printf(dev, "Possible Solution 2: Change or replace the module or cable.\n");
10406		device_printf(dev, "Possible Solution 3: Manually set speed and duplex.\n");
10407		break;
10408	case ICE_AQC_HEALTH_STATUS_ERR_INVALID_LINK_CFG:
10409		device_printf(dev, "Invalid link configuration.\n");
10410		break;
10411	case ICE_AQC_HEALTH_STATUS_ERR_PORT_ACCESS:
10412		device_printf(dev, "Port hardware access error.\n");
10413		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10414		break;
10415	case ICE_AQC_HEALTH_STATUS_ERR_PORT_UNREACHABLE:
10416		device_printf(dev, "A port is unreachable.\n");
10417		device_printf(dev, "Possible Solution 1: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10418		device_printf(dev, "Possible Solution 2: Update to the latest NVM image.\n");
10419		break;
10420	case ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_MOD_LIMITED:
10421		device_printf(dev, "Port speed is limited due to module.\n");
10422		device_printf(dev, "Possible Solution: Change the module or use Intel(R) Ethernet Port Configuration Tool to configure the port option to match the current module speed.\n");
10423		break;
10424	case ICE_AQC_HEALTH_STATUS_ERR_PARALLEL_FAULT:
10425		device_printf(dev, "A parallel fault was detected.\n");
10426		device_printf(dev, "Possible Solution: Check link partner connection and configuration.\n");
10427		break;
10428	case ICE_AQC_HEALTH_STATUS_INFO_PORT_SPEED_PHY_LIMITED:
10429		device_printf(dev, "Port speed is limited by PHY capabilities.\n");
10430		device_printf(dev, "Possible Solution 1: Change the module to align to port option.\n");
10431		device_printf(dev, "Possible Solution 2: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10432		break;
10433	case ICE_AQC_HEALTH_STATUS_ERR_NETLIST_TOPO:
10434		device_printf(dev, "LOM topology netlist is corrupted.\n");
10435		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10436		break;
10437	case ICE_AQC_HEALTH_STATUS_ERR_NETLIST:
10438		device_printf(dev, "Unrecoverable netlist error.\n");
10439		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10440		break;
10441	case ICE_AQC_HEALTH_STATUS_ERR_TOPO_CONFLICT:
10442		device_printf(dev, "Port topology conflict.\n");
10443		device_printf(dev, "Possible Solution 1: Use Intel(R) Ethernet Port Configuration Tool to change the port option.\n");
10444		device_printf(dev, "Possible Solution 2: Update to the latest NVM image.\n");
10445		break;
10446	case ICE_AQC_HEALTH_STATUS_ERR_LINK_HW_ACCESS:
10447		device_printf(dev, "Unrecoverable hardware access error.\n");
10448		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10449		break;
10450	case ICE_AQC_HEALTH_STATUS_ERR_LINK_RUNTIME:
10451		device_printf(dev, "Unrecoverable runtime error.\n");
10452		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10453		break;
10454	case ICE_AQC_HEALTH_STATUS_ERR_DNL_INIT:
10455		device_printf(dev, "Link management engine failed to initialize.\n");
10456		device_printf(dev, "Possible Solution: Update to the latest NVM image.\n");
10457		break;
10458	default:
10459		break;
10460	}
10461}
10462
10463/**
10464 * ice_handle_health_status_event - helper function to output health status
10465 * @sc: device softc structure
10466 * @event: event received on a control queue
10467 *
10468 * Prints out the appropriate string based on the given Health Status Event
10469 * code.
10470 */
10471static void
10472ice_handle_health_status_event(struct ice_softc *sc,
10473			       struct ice_rq_event_info *event)
10474{
10475	struct ice_aqc_health_status_elem *health_info;
10476	u16 status_count;
10477	int i;
10478
10479	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_HEALTH_STATUS))
10480		return;
10481
10482	health_info = (struct ice_aqc_health_status_elem *)event->msg_buf;
10483	status_count = le16toh(event->desc.params.get_health_status.health_status_count);
10484
10485	if (status_count > (event->buf_len / sizeof(*health_info))) {
10486		device_printf(sc->dev, "Received a health status event with invalid event count\n");
10487		return;
10488	}
10489
10490	for (i = 0; i < status_count; i++) {
10491		ice_print_health_status_string(sc->dev, health_info);
10492		health_info++;
10493	}
10494}
10495
10496/**
10497 * ice_set_default_local_lldp_mib - Possibly apply local LLDP MIB to FW
10498 * @sc: device softc structure
10499 *
10500 * This function needs to be called after link up; it makes sure the FW has
10501 * certain PFC/DCB settings. In certain configurations this will re-apply a
10502 * default local LLDP MIB configuration; this is intended to workaround a FW
10503 * behavior where these settings seem to be cleared on link up.
10504 */
10505void
10506ice_set_default_local_lldp_mib(struct ice_softc *sc)
10507{
10508	struct ice_hw *hw = &sc->hw;
10509	struct ice_port_info *pi;
10510	device_t dev = sc->dev;
10511	enum ice_status status;
10512
10513	/* Set Local MIB can disrupt flow control settings for
10514	 * non-DCB-supported devices.
10515	 */
10516	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_DCB))
10517		return;
10518
10519	pi = hw->port_info;
10520
10521	/* Don't overwrite a custom SW configuration */
10522	if (!pi->qos_cfg.is_sw_lldp &&
10523	    !ice_test_state(&sc->state, ICE_STATE_MULTIPLE_TCS))
10524		ice_set_default_local_mib_settings(sc);
10525
10526	status = ice_set_dcb_cfg(pi);
10527
10528	if (status)
10529		device_printf(dev,
10530		    "Error setting Local LLDP MIB: %s aq_err %s\n",
10531		    ice_status_str(status),
10532		    ice_aq_str(hw->adminq.sq_last_status));
10533}
10534
10535/**
10536 * ice_sbuf_print_ets_cfg - Helper function to print ETS cfg
10537 * @sbuf: string buffer to print to
10538 * @name: prefix string to use
10539 * @ets: structure to pull values from
10540 *
10541 * A helper function for ice_sysctl_dump_dcbx_cfg(), this
10542 * formats the ETS rec and cfg TLVs into text.
10543 */
10544static void
10545ice_sbuf_print_ets_cfg(struct sbuf *sbuf, const char *name, struct ice_dcb_ets_cfg *ets)
10546{
10547	sbuf_printf(sbuf, "%s.willing: %u\n", name, ets->willing);
10548	sbuf_printf(sbuf, "%s.cbs: %u\n", name, ets->cbs);
10549	sbuf_printf(sbuf, "%s.maxtcs: %u\n", name, ets->maxtcs);
10550
10551	sbuf_printf(sbuf, "%s.prio_table:", name);
10552	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
10553		sbuf_printf(sbuf, " %d", ets->prio_table[i]);
10554	sbuf_printf(sbuf, "\n");
10555
10556	sbuf_printf(sbuf, "%s.tcbwtable:", name);
10557	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
10558		sbuf_printf(sbuf, " %d", ets->tcbwtable[i]);
10559	sbuf_printf(sbuf, "\n");
10560
10561	sbuf_printf(sbuf, "%s.tsatable:", name);
10562	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
10563		sbuf_printf(sbuf, " %d", ets->tsatable[i]);
10564	sbuf_printf(sbuf, "\n");
10565}
10566
10567/**
10568 * ice_sysctl_dump_dcbx_cfg - Print out DCBX/DCB config info
10569 * @oidp: sysctl oid structure
10570 * @arg1: pointer to private data structure
10571 * @arg2: AQ define for either Local or Remote MIB
10572 * @req: sysctl request pointer
10573 *
10574 * Prints out DCB/DCBX configuration, including the contents
10575 * of either the local or remote MIB, depending on the value
10576 * used in arg2.
10577 */
10578static int
10579ice_sysctl_dump_dcbx_cfg(SYSCTL_HANDLER_ARGS)
10580{
10581	struct ice_softc *sc = (struct ice_softc *)arg1;
10582	struct ice_aqc_get_cee_dcb_cfg_resp cee_cfg = {};
10583	struct ice_dcbx_cfg dcb_buf = {};
10584	struct ice_dcbx_cfg *dcbcfg;
10585	struct ice_hw *hw = &sc->hw;
10586	device_t dev = sc->dev;
10587	struct sbuf *sbuf;
10588	enum ice_status status;
10589	u8 maxtcs, dcbx_status, is_sw_lldp;
10590
10591	UNREFERENCED_PARAMETER(oidp);
10592
10593	if (ice_driver_is_detaching(sc))
10594		return (ESHUTDOWN);
10595
10596	is_sw_lldp = hw->port_info->qos_cfg.is_sw_lldp;
10597
10598	/* The driver doesn't receive a Remote MIB via SW */
10599	if (is_sw_lldp && arg2 == ICE_AQ_LLDP_MIB_REMOTE)
10600		return (ENOENT);
10601
10602	dcbcfg = &hw->port_info->qos_cfg.local_dcbx_cfg;
10603	if (!is_sw_lldp) {
10604		/* Collect information from the FW in FW LLDP mode */
10605		dcbcfg = &dcb_buf;
10606		status = ice_aq_get_dcb_cfg(hw, (u8)arg2,
10607		    ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID, dcbcfg);
10608		if (status && arg2 == ICE_AQ_LLDP_MIB_REMOTE &&
10609		    hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT) {
10610			device_printf(dev,
10611			    "Unable to query Remote MIB; port has not received one yet\n");
10612			return (ENOENT);
10613		}
10614		if (status) {
10615			device_printf(dev, "Unable to query LLDP MIB, err %s aq_err %s\n",
10616			    ice_status_str(status),
10617			    ice_aq_str(hw->adminq.sq_last_status));
10618			return (EIO);
10619		}
10620	}
10621
10622	status = ice_aq_get_cee_dcb_cfg(hw, &cee_cfg, NULL);
10623	if (status == ICE_SUCCESS)
10624		dcbcfg->dcbx_mode = ICE_DCBX_MODE_CEE;
10625	else if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT)
10626		dcbcfg->dcbx_mode = ICE_DCBX_MODE_IEEE;
10627	else
10628		device_printf(dev, "Get CEE DCB Cfg AQ cmd err %s aq_err %s\n",
10629		    ice_status_str(status),
10630		    ice_aq_str(hw->adminq.sq_last_status));
10631
10632	maxtcs = hw->func_caps.common_cap.maxtc;
10633	dcbx_status = ice_get_dcbx_status(hw);
10634
10635	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
10636
10637	/* Do the actual printing */
10638	sbuf_printf(sbuf, "\n");
10639	sbuf_printf(sbuf, "SW LLDP mode: %d\n", is_sw_lldp);
10640	sbuf_printf(sbuf, "Function caps maxtcs: %d\n", maxtcs);
10641	sbuf_printf(sbuf, "dcbx_status: %d\n", dcbx_status);
10642
10643	sbuf_printf(sbuf, "numapps: %u\n", dcbcfg->numapps);
10644	sbuf_printf(sbuf, "CEE TLV status: %u\n", dcbcfg->tlv_status);
10645	sbuf_printf(sbuf, "pfc_mode: %s\n", (dcbcfg->pfc_mode == ICE_QOS_MODE_DSCP) ?
10646	    "DSCP" : "VLAN");
10647	sbuf_printf(sbuf, "dcbx_mode: %s\n",
10648	    (dcbcfg->dcbx_mode == ICE_DCBX_MODE_IEEE) ? "IEEE" :
10649	    (dcbcfg->dcbx_mode == ICE_DCBX_MODE_CEE) ? "CEE" :
10650	    "Unknown");
10651
10652	ice_sbuf_print_ets_cfg(sbuf, "etscfg", &dcbcfg->etscfg);
10653	ice_sbuf_print_ets_cfg(sbuf, "etsrec", &dcbcfg->etsrec);
10654
10655	sbuf_printf(sbuf, "pfc.willing: %u\n", dcbcfg->pfc.willing);
10656	sbuf_printf(sbuf, "pfc.mbc: %u\n", dcbcfg->pfc.mbc);
10657	sbuf_printf(sbuf, "pfc.pfccap: 0x%0x\n", dcbcfg->pfc.pfccap);
10658	sbuf_printf(sbuf, "pfc.pfcena: 0x%0x\n", dcbcfg->pfc.pfcena);
10659
10660	if (arg2 == ICE_AQ_LLDP_MIB_LOCAL) {
10661		sbuf_printf(sbuf, "dscp_map:\n");
10662		for (int i = 0; i < 8; i++) {
10663			for (int j = 0; j < 8; j++)
10664				sbuf_printf(sbuf, " %d",
10665					    dcbcfg->dscp_map[i * 8 + j]);
10666			sbuf_printf(sbuf, "\n");
10667		}
10668
10669		sbuf_printf(sbuf, "\nLocal registers:\n");
10670		sbuf_printf(sbuf, "PRTDCB_GENC.NUMTC: %d\n",
10671		    (rd32(hw, PRTDCB_GENC) & PRTDCB_GENC_NUMTC_M)
10672		        >> PRTDCB_GENC_NUMTC_S);
10673		sbuf_printf(sbuf, "PRTDCB_TUP2TC: 0x%0x\n",
10674		    (rd32(hw, PRTDCB_TUP2TC)));
10675		sbuf_printf(sbuf, "PRTDCB_RUP2TC: 0x%0x\n",
10676		    (rd32(hw, PRTDCB_RUP2TC)));
10677		sbuf_printf(sbuf, "GLDCB_TC2PFC: 0x%0x\n",
10678		    (rd32(hw, GLDCB_TC2PFC)));
10679	}
10680
10681	/* Finish */
10682	sbuf_finish(sbuf);
10683	sbuf_delete(sbuf);
10684
10685	return (0);
10686}
10687
10688/**
10689 * ice_sysctl_dump_vsi_cfg - print PF LAN VSI configuration
10690 * @oidp: sysctl oid structure
10691 * @arg1: pointer to private data structure
10692 * @arg2: unused
10693 * @req: sysctl request pointer
10694 *
10695 * XXX: This could be extended to apply to arbitrary PF-owned VSIs,
10696 * but for simplicity, this only works on the PF's LAN VSI.
10697 */
10698static int
10699ice_sysctl_dump_vsi_cfg(SYSCTL_HANDLER_ARGS)
10700{
10701	struct ice_softc *sc = (struct ice_softc *)arg1;
10702	struct ice_vsi_ctx ctx = { 0 };
10703	struct ice_hw *hw = &sc->hw;
10704	device_t dev = sc->dev;
10705	struct sbuf *sbuf;
10706	enum ice_status status;
10707
10708	UNREFERENCED_PARAMETER(oidp);
10709	UNREFERENCED_PARAMETER(arg2);
10710
10711	if (ice_driver_is_detaching(sc))
10712		return (ESHUTDOWN);
10713
10714	/* Get HW absolute index of a VSI */
10715	ctx.vsi_num = ice_get_hw_vsi_num(hw, sc->pf_vsi.idx);
10716
10717	status = ice_aq_get_vsi_params(hw, &ctx, NULL);
10718	if (status != ICE_SUCCESS) {
10719		device_printf(dev,
10720		    "Get VSI AQ call failed, err %s aq_err %s\n",
10721		    ice_status_str(status),
10722		    ice_aq_str(hw->adminq.sq_last_status));
10723		return (EIO);
10724	}
10725
10726	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
10727
10728	/* Do the actual printing */
10729	sbuf_printf(sbuf, "\n");
10730
10731	sbuf_printf(sbuf, "VSI NUM: %d\n", ctx.vsi_num);
10732	sbuf_printf(sbuf, "VF  NUM: %d\n", ctx.vf_num);
10733	sbuf_printf(sbuf, "VSIs allocated: %d\n", ctx.vsis_allocd);
10734	sbuf_printf(sbuf, "VSIs unallocated: %d\n", ctx.vsis_unallocated);
10735
10736	sbuf_printf(sbuf, "Rx Queue Map method: %d\n",
10737	    LE16_TO_CPU(ctx.info.mapping_flags));
10738	/* The PF VSI is always contiguous, so there's no if-statement here */
10739	sbuf_printf(sbuf, "Rx Queue base: %d\n",
10740	    LE16_TO_CPU(ctx.info.q_mapping[0]));
10741	sbuf_printf(sbuf, "Rx Queue count: %d\n",
10742	    LE16_TO_CPU(ctx.info.q_mapping[1]));
10743
10744	sbuf_printf(sbuf, "TC qbases  :");
10745	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
10746		sbuf_printf(sbuf, " %4d",
10747		    ctx.info.tc_mapping[i] & ICE_AQ_VSI_TC_Q_OFFSET_M);
10748	}
10749	sbuf_printf(sbuf, "\n");
10750
10751	sbuf_printf(sbuf, "TC qcounts :");
10752	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
10753		sbuf_printf(sbuf, " %4d",
10754		    1 << (ctx.info.tc_mapping[i] >> ICE_AQ_VSI_TC_Q_NUM_S));
10755	}
10756
10757	/* Finish */
10758	sbuf_finish(sbuf);
10759	sbuf_delete(sbuf);
10760
10761	return (0);
10762}
10763
10764/**
10765 * ice_ets_str_to_tbl - Parse string into ETS table
10766 * @str: input string to parse
10767 * @table: output eight values used for ETS values
10768 * @limit: max valid value to accept for ETS values
10769 *
10770 * Parses a string and converts the eight values within
10771 * into a table that can be used in setting ETS settings
10772 * in a MIB.
10773 *
10774 * @return 0 on success, EINVAL if a parsed value is
10775 * not between 0 and limit.
10776 */
10777static int
10778ice_ets_str_to_tbl(const char *str, u8 *table, u8 limit)
10779{
10780	const char *str_start = str;
10781	char *str_end;
10782	long token;
10783
10784	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
10785		token = strtol(str_start, &str_end, 0);
10786		if (token < 0 || token > limit)
10787			return (EINVAL);
10788
10789		table[i] = (u8)token;
10790		str_start = (str_end + 1);
10791	}
10792
10793	return (0);
10794}
10795
10796/**
10797 * ice_check_ets_bw - Check if ETS bw vals are valid
10798 * @table: eight values used for ETS bandwidth
10799 *
10800 * @return true if the sum of all 8 values in table
10801 * equals 100.
10802 */
10803static bool
10804ice_check_ets_bw(u8 *table)
10805{
10806	int sum = 0;
10807	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++)
10808		sum += (int)table[i];
10809
10810	return (sum == 100);
10811}
10812
10813/**
10814 * ice_cfg_pba_num - Determine if PBA Number is retrievable
10815 * @sc: the device private softc structure
10816 *
10817 * Sets the feature flag for the existence of a PBA number
10818 * based on the success of the read command.  This does not
10819 * cache the result.
10820 */
10821void
10822ice_cfg_pba_num(struct ice_softc *sc)
10823{
10824	u8 pba_string[32] = "";
10825
10826	if ((ice_is_bit_set(sc->feat_cap, ICE_FEATURE_HAS_PBA)) &&
10827	    (ice_read_pba_string(&sc->hw, pba_string, sizeof(pba_string)) == 0))
10828		ice_set_bit(ICE_FEATURE_HAS_PBA, sc->feat_en);
10829}
10830
10831/**
10832 * ice_sysctl_query_port_ets - print Port ETS Config from AQ
10833 * @oidp: sysctl oid structure
10834 * @arg1: pointer to private data structure
10835 * @arg2: unused
10836 * @req: sysctl request pointer
10837 */
10838static int
10839ice_sysctl_query_port_ets(SYSCTL_HANDLER_ARGS)
10840{
10841	struct ice_softc *sc = (struct ice_softc *)arg1;
10842	struct ice_aqc_port_ets_elem port_ets = { 0 };
10843	struct ice_hw *hw = &sc->hw;
10844	struct ice_port_info *pi;
10845	device_t dev = sc->dev;
10846	struct sbuf *sbuf;
10847	enum ice_status status;
10848	int i = 0;
10849
10850	UNREFERENCED_PARAMETER(oidp);
10851	UNREFERENCED_PARAMETER(arg2);
10852
10853	if (ice_driver_is_detaching(sc))
10854		return (ESHUTDOWN);
10855
10856	pi = hw->port_info;
10857
10858	status = ice_aq_query_port_ets(pi, &port_ets, sizeof(port_ets), NULL);
10859	if (status != ICE_SUCCESS) {
10860		device_printf(dev,
10861		    "Query Port ETS AQ call failed, err %s aq_err %s\n",
10862		    ice_status_str(status),
10863		    ice_aq_str(hw->adminq.sq_last_status));
10864		return (EIO);
10865	}
10866
10867	sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
10868
10869	/* Do the actual printing */
10870	sbuf_printf(sbuf, "\n");
10871
10872	sbuf_printf(sbuf, "Valid TC map: 0x%x\n", port_ets.tc_valid_bits);
10873
10874	sbuf_printf(sbuf, "TC BW %%:");
10875	ice_for_each_traffic_class(i) {
10876		sbuf_printf(sbuf, " %3d", port_ets.tc_bw_share[i]);
10877	}
10878	sbuf_printf(sbuf, "\n");
10879
10880	sbuf_printf(sbuf, "EIR profile ID: %d\n", port_ets.port_eir_prof_id);
10881	sbuf_printf(sbuf, "CIR profile ID: %d\n", port_ets.port_cir_prof_id);
10882	sbuf_printf(sbuf, "TC Node prio: 0x%x\n", port_ets.tc_node_prio);
10883
10884	sbuf_printf(sbuf, "TC Node TEIDs:\n");
10885	ice_for_each_traffic_class(i) {
10886		sbuf_printf(sbuf, "%d: %d\n", i, port_ets.tc_node_teid[i]);
10887	}
10888
10889	/* Finish */
10890	sbuf_finish(sbuf);
10891	sbuf_delete(sbuf);
10892
10893	return (0);
10894}
10895
10896/**
10897 * ice_sysctl_dscp2tc_map - Map DSCP to hardware TCs
10898 * @oidp: sysctl oid structure
10899 * @arg1: pointer to private data structure
10900 * @arg2: which eight DSCP to UP mappings to configure (0 - 7)
10901 * @req: sysctl request pointer
10902 *
10903 * Gets or sets the current DSCP to UP table cached by the driver. Since there
10904 * are 64 possible DSCP values to configure, this sysctl only configures
10905 * chunks of 8 in that space at a time.
10906 *
10907 * This sysctl is only relevant in DSCP mode, and will only function in SW DCB
10908 * mode.
10909 */
10910static int
10911ice_sysctl_dscp2tc_map(SYSCTL_HANDLER_ARGS)
10912{
10913	struct ice_softc *sc = (struct ice_softc *)arg1;
10914	struct ice_dcbx_cfg *local_dcbx_cfg;
10915	struct ice_port_info *pi;
10916	struct ice_hw *hw = &sc->hw;
10917	device_t dev = sc->dev;
10918	enum ice_status status;
10919	struct sbuf *sbuf;
10920	int ret;
10921
10922	/* Store input rates from user */
10923	char dscp_user_buf[128] = "";
10924	u8 new_dscp_table_seg[ICE_MAX_TRAFFIC_CLASS] = {};
10925
10926	if (ice_driver_is_detaching(sc))
10927		return (ESHUTDOWN);
10928
10929	if (req->oldptr == NULL && req->newptr == NULL) {
10930		ret = SYSCTL_OUT(req, 0, 128);
10931		return (ret);
10932	}
10933
10934	pi = hw->port_info;
10935	local_dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
10936
10937	sbuf = sbuf_new(NULL, dscp_user_buf, 128, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
10938
10939	/* Format DSCP-to-UP data for output */
10940	for (int i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
10941		sbuf_printf(sbuf, "%d", local_dcbx_cfg->dscp_map[arg2 * 8 + i]);
10942		if (i != ICE_MAX_TRAFFIC_CLASS - 1)
10943			sbuf_printf(sbuf, ",");
10944	}
10945
10946	sbuf_finish(sbuf);
10947	sbuf_delete(sbuf);
10948
10949	/* Read in the new DSCP mapping values */
10950	ret = sysctl_handle_string(oidp, dscp_user_buf, sizeof(dscp_user_buf), req);
10951	if ((ret) || (req->newptr == NULL))
10952		return (ret);
10953
10954	/* Don't allow setting changes in FW DCB mode */
10955	if (!hw->port_info->qos_cfg.is_sw_lldp) {
10956		device_printf(dev, "%s: DSCP mapping is not allowed in FW DCBX mode\n",
10957		    __func__);
10958		return (EINVAL);
10959	}
10960
10961	/* Convert 8 values in a string to a table; this is similar to what
10962	 * needs to be done for ETS settings, so this function can be re-used
10963	 * for that purpose.
10964	 */
10965	ret = ice_ets_str_to_tbl(dscp_user_buf, new_dscp_table_seg, 8);
10966	if (ret) {
10967		device_printf(dev, "%s: Could not parse input DSCP2TC table: %s\n",
10968		    __func__, dscp_user_buf);
10969		return (ret);
10970	}
10971
10972	memcpy(&local_dcbx_cfg->dscp_map[arg2 * 8], new_dscp_table_seg,
10973	    sizeof(new_dscp_table_seg));
10974
10975	local_dcbx_cfg->app_mode = ICE_DCBX_APPS_NON_WILLING;
10976
10977	status = ice_set_dcb_cfg(pi);
10978	if (status) {
10979		device_printf(dev,
10980		    "%s: Failed to set DCB config; status %s, aq_err %s\n",
10981		    __func__, ice_status_str(status),
10982		    ice_aq_str(hw->adminq.sq_last_status));
10983		return (EIO);
10984	}
10985
10986	ice_do_dcb_reconfig(sc, false);
10987
10988	return (0);
10989}
10990
10991/**
10992 * ice_handle_debug_dump_ioctl - Handle a debug dump ioctl request
10993 * @sc: the device private softc
10994 * @ifd: ifdrv ioctl request pointer
10995 */
10996int
10997ice_handle_debug_dump_ioctl(struct ice_softc *sc, struct ifdrv *ifd)
10998{
10999	size_t ifd_len = ifd->ifd_len;
11000	struct ice_hw *hw = &sc->hw;
11001	device_t dev = sc->dev;
11002	struct ice_debug_dump_cmd *ddc;
11003	enum ice_status status;
11004	int err = 0;
11005
11006	/* Returned arguments from the Admin Queue */
11007	u16 ret_buf_size = 0;
11008	u16 ret_next_cluster = 0;
11009	u16 ret_next_table = 0;
11010	u32 ret_next_index = 0;
11011
11012	/*
11013	 * ifioctl forwards SIOCxDRVSPEC to iflib without performing
11014	 * a privilege check. In turn, iflib forwards the ioctl to the driver
11015	 * without performing a privilege check. Perform one here to ensure
11016	 * that non-privileged threads cannot access this interface.
11017	 */
11018	err = priv_check(curthread, PRIV_DRIVER);
11019	if (err)
11020		return (err);
11021
11022	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
11023		device_printf(dev,
11024		    "%s: Driver must rebuild data structures after a reset. Operation aborted.\n",
11025		    __func__);
11026		return (EBUSY);
11027	}
11028
11029	if (ifd_len < sizeof(*ddc)) {
11030		device_printf(dev,
11031		    "%s: ifdrv length is too small. Got %zu, but expected %zu\n",
11032		    __func__, ifd_len, sizeof(*ddc));
11033		return (EINVAL);
11034	}
11035
11036	if (ifd->ifd_data == NULL) {
11037		device_printf(dev, "%s: ifd data buffer not present.\n",
11038		     __func__);
11039		return (EINVAL);
11040	}
11041
11042	ddc = (struct ice_debug_dump_cmd *)malloc(ifd_len, M_ICE, M_ZERO | M_NOWAIT);
11043	if (!ddc)
11044		return (ENOMEM);
11045
11046	/* Copy the NVM access command and data in from user space */
11047	/* coverity[tainted_data_argument] */
11048	err = copyin(ifd->ifd_data, ddc, ifd_len);
11049	if (err) {
11050		device_printf(dev, "%s: Copying request from user space failed, err %s\n",
11051			      __func__, ice_err_str(err));
11052		goto out;
11053	}
11054
11055	/* The data_size arg must be at least 1 for the AQ cmd to work */
11056	if (ddc->data_size == 0) {
11057		device_printf(dev,
11058		    "%s: data_size must be greater than 0\n", __func__);
11059		err = EINVAL;
11060		goto out;
11061	}
11062	/* ...and it can't be too long */
11063	if (ddc->data_size > (ifd_len - sizeof(*ddc))) {
11064		device_printf(dev,
11065		    "%s: data_size (%d) is larger than ifd_len space (%zu)?\n", __func__,
11066		    ddc->data_size, ifd_len - sizeof(*ddc));
11067		err = EINVAL;
11068		goto out;
11069	}
11070
11071	/* Make sure any possible data buffer space is zeroed */
11072	memset(ddc->data, 0, ifd_len - sizeof(*ddc));
11073
11074	status = ice_aq_get_internal_data(hw, ddc->cluster_id, ddc->table_id, ddc->offset,
11075	    (u8 *)ddc->data, ddc->data_size, &ret_buf_size,
11076	    &ret_next_cluster, &ret_next_table, &ret_next_index, NULL);
11077	ice_debug(hw, ICE_DBG_DIAG, "%s: ret_buf_size %d, ret_next_table %d, ret_next_index %d\n",
11078	    __func__, ret_buf_size, ret_next_table, ret_next_index);
11079	if (status) {
11080		device_printf(dev,
11081		    "%s: Get Internal Data AQ command failed, err %s aq_err %s\n",
11082		    __func__,
11083		    ice_status_str(status),
11084		    ice_aq_str(hw->adminq.sq_last_status));
11085		goto aq_error;
11086	}
11087
11088	ddc->table_id = ret_next_table;
11089	ddc->offset = ret_next_index;
11090	ddc->data_size = ret_buf_size;
11091	ddc->cluster_id = ret_next_cluster;
11092
11093	/* Copy the possibly modified contents of the handled request out */
11094	err = copyout(ddc, ifd->ifd_data, ifd->ifd_len);
11095	if (err) {
11096		device_printf(dev, "%s: Copying response back to user space failed, err %s\n",
11097			      __func__, ice_err_str(err));
11098		goto out;
11099	}
11100
11101aq_error:
11102	/* Convert private status to an error code for proper ioctl response */
11103	switch (status) {
11104	case ICE_SUCCESS:
11105		err = (0);
11106		break;
11107	case ICE_ERR_NO_MEMORY:
11108		err = (ENOMEM);
11109		break;
11110	case ICE_ERR_OUT_OF_RANGE:
11111		err = (ENOTTY);
11112		break;
11113	case ICE_ERR_AQ_ERROR:
11114		err = (EIO);
11115		break;
11116	case ICE_ERR_PARAM:
11117	default:
11118		err = (EINVAL);
11119		break;
11120	}
11121
11122out:
11123	free(ddc, M_ICE);
11124	return (err);
11125}
11126
11127/**
11128 * ice_sysctl_allow_no_fec_mod_in_auto - Change Auto FEC behavior
11129 * @oidp: sysctl oid structure
11130 * @arg1: pointer to private data structure
11131 * @arg2: unused
11132 * @req: sysctl request pointer
11133 *
11134 * Allows user to let "No FEC" mode to be used in "Auto"
11135 * FEC mode during FEC negotiation. This is only supported
11136 * on newer firmware versions.
11137 */
11138static int
11139ice_sysctl_allow_no_fec_mod_in_auto(SYSCTL_HANDLER_ARGS)
11140{
11141	struct ice_softc *sc = (struct ice_softc *)arg1;
11142	struct ice_hw *hw = &sc->hw;
11143	device_t dev = sc->dev;
11144	u8 user_flag;
11145	int ret;
11146
11147	UNREFERENCED_PARAMETER(arg2);
11148
11149	ret = priv_check(curthread, PRIV_DRIVER);
11150	if (ret)
11151		return (ret);
11152
11153	if (ice_driver_is_detaching(sc))
11154		return (ESHUTDOWN);
11155
11156	user_flag = (u8)sc->allow_no_fec_mod_in_auto;
11157
11158	ret = sysctl_handle_bool(oidp, &user_flag, 0, req);
11159	if ((ret) || (req->newptr == NULL))
11160		return (ret);
11161
11162	if (!ice_fw_supports_fec_dis_auto(hw)) {
11163		log(LOG_INFO,
11164		    "%s: Enabling or disabling of auto configuration of modules that don't support FEC is unsupported by the current firmware\n",
11165		    device_get_nameunit(dev));
11166		return (ENODEV);
11167	}
11168
11169	if (user_flag == (bool)sc->allow_no_fec_mod_in_auto)
11170		return (0);
11171
11172	sc->allow_no_fec_mod_in_auto = (u8)user_flag;
11173
11174	if (sc->allow_no_fec_mod_in_auto)
11175		log(LOG_INFO, "%s: Enabled auto configuration of No FEC modules\n",
11176		    device_get_nameunit(dev));
11177	else
11178		log(LOG_INFO,
11179		    "%s: Auto configuration of No FEC modules reset to NVM defaults\n",
11180		    device_get_nameunit(dev));
11181
11182	return (0);
11183}
11184
11185/**
11186 * ice_sysctl_temperature - Retrieve NIC temp via AQ command
11187 * @oidp: sysctl oid structure
11188 * @arg1: pointer to private data structure
11189 * @arg2: unused
11190 * @req: sysctl request pointer
11191 *
11192 * If ICE_DBG_DIAG is set in the debug.debug_mask sysctl, then this will print
11193 * temperature threshold information in the kernel message log, too.
11194 */
11195static int
11196ice_sysctl_temperature(SYSCTL_HANDLER_ARGS)
11197{
11198	struct ice_aqc_get_sensor_reading_resp resp;
11199	struct ice_softc *sc = (struct ice_softc *)arg1;
11200	struct ice_hw *hw = &sc->hw;
11201	device_t dev = sc->dev;
11202	enum ice_status status;
11203
11204	UNREFERENCED_PARAMETER(oidp);
11205	UNREFERENCED_PARAMETER(arg2);
11206
11207	if (ice_driver_is_detaching(sc))
11208		return (ESHUTDOWN);
11209
11210	status = ice_aq_get_sensor_reading(hw, ICE_AQC_INT_TEMP_SENSOR,
11211	    ICE_AQC_INT_TEMP_FORMAT, &resp, NULL);
11212	if (status != ICE_SUCCESS) {
11213		device_printf(dev,
11214		    "Get Sensor Reading AQ call failed, err %s aq_err %s\n",
11215		    ice_status_str(status),
11216		    ice_aq_str(hw->adminq.sq_last_status));
11217		return (EIO);
11218	}
11219
11220	ice_debug(hw, ICE_DBG_DIAG, "%s: Warning Temp Threshold: %d\n", __func__,
11221	    resp.data.s0f0.temp_warning_threshold);
11222	ice_debug(hw, ICE_DBG_DIAG, "%s: Critical Temp Threshold: %d\n", __func__,
11223	    resp.data.s0f0.temp_critical_threshold);
11224	ice_debug(hw, ICE_DBG_DIAG, "%s: Fatal Temp Threshold: %d\n", __func__,
11225	    resp.data.s0f0.temp_fatal_threshold);
11226
11227	return sysctl_handle_8(oidp, &resp.data.s0f0.temp, 0, req);
11228}
11229
11230/**
11231 * ice_sysctl_create_mirror_interface - Create a new ifnet that monitors
11232 *     traffic from the main PF VSI
11233 */
11234static int
11235ice_sysctl_create_mirror_interface(SYSCTL_HANDLER_ARGS)
11236{
11237	struct ice_softc *sc = (struct ice_softc *)arg1;
11238	device_t dev = sc->dev;
11239	int ret;
11240
11241	UNREFERENCED_PARAMETER(arg2);
11242
11243	ret = priv_check(curthread, PRIV_DRIVER);
11244	if (ret)
11245		return (ret);
11246
11247	if (ice_driver_is_detaching(sc))
11248		return (ESHUTDOWN);
11249
11250	/* If the user hasn't written "1" to this sysctl yet: */
11251	if (!ice_test_state(&sc->state, ICE_STATE_DO_CREATE_MIRR_INTFC)) {
11252		/* Avoid output on the first set of reads to this sysctl in
11253		 * order to prevent a null byte from being written to the
11254		 * end result when called via sysctl(8).
11255		 */
11256		if (req->oldptr == NULL && req->newptr == NULL) {
11257			ret = SYSCTL_OUT(req, 0, 0);
11258			return (ret);
11259		}
11260
11261		char input_buf[2] = "";
11262		ret = sysctl_handle_string(oidp, input_buf, sizeof(input_buf), req);
11263		if ((ret) || (req->newptr == NULL))
11264			return (ret);
11265
11266		/* If we get '1', then indicate we'll create the interface in
11267		 * the next sysctl read call.
11268		 */
11269		if (input_buf[0] == '1') {
11270			if (sc->mirr_if) {
11271				device_printf(dev,
11272				    "Mirror interface %s already exists!\n",
11273				    if_name(sc->mirr_if->ifp));
11274				return (EEXIST);
11275			}
11276			ice_set_state(&sc->state, ICE_STATE_DO_CREATE_MIRR_INTFC);
11277			return (0);
11278		}
11279
11280		return (EINVAL);
11281	}
11282
11283	/* --- "Do Create Mirror Interface" is set --- */
11284
11285	/* Caller just wants the upper bound for size */
11286	if (req->oldptr == NULL && req->newptr == NULL) {
11287		ret = SYSCTL_OUT(req, 0, 128);
11288		return (ret);
11289	}
11290
11291	device_printf(dev, "Creating new mirroring interface...\n");
11292
11293	ret = ice_create_mirror_interface(sc);
11294	if (ret)
11295		return (ret);
11296
11297	ice_clear_state(&sc->state, ICE_STATE_DO_CREATE_MIRR_INTFC);
11298
11299	ret = sysctl_handle_string(oidp, __DECONST(char *, "Interface attached"), 0, req);
11300	return (ret);
11301}
11302
11303/**
11304 * ice_sysctl_destroy_mirror_interface - Destroy network interface that monitors
11305 *     traffic from the main PF VSI
11306 */
11307static int
11308ice_sysctl_destroy_mirror_interface(SYSCTL_HANDLER_ARGS)
11309{
11310	struct ice_softc *sc = (struct ice_softc *)arg1;
11311	device_t dev = sc->dev;
11312	int ret;
11313
11314	UNREFERENCED_PARAMETER(arg2);
11315
11316	ret = priv_check(curthread, PRIV_DRIVER);
11317	if (ret)
11318		return (ret);
11319
11320	if (ice_driver_is_detaching(sc))
11321		return (ESHUTDOWN);
11322
11323	/* If the user hasn't written "1" to this sysctl yet: */
11324	if (!ice_test_state(&sc->state, ICE_STATE_DO_DESTROY_MIRR_INTFC)) {
11325		/* Avoid output on the first set of reads to this sysctl in
11326		 * order to prevent a null byte from being written to the
11327		 * end result when called via sysctl(8).
11328		 */
11329		if (req->oldptr == NULL && req->newptr == NULL) {
11330			ret = SYSCTL_OUT(req, 0, 0);
11331			return (ret);
11332		}
11333
11334		char input_buf[2] = "";
11335		ret = sysctl_handle_string(oidp, input_buf, sizeof(input_buf), req);
11336		if ((ret) || (req->newptr == NULL))
11337			return (ret);
11338
11339		/* If we get '1', then indicate we'll create the interface in
11340		 * the next sysctl read call.
11341		 */
11342		if (input_buf[0] == '1') {
11343			if (!sc->mirr_if) {
11344				device_printf(dev,
11345				    "No mirror interface exists!\n");
11346				return (EINVAL);
11347			}
11348			ice_set_state(&sc->state, ICE_STATE_DO_DESTROY_MIRR_INTFC);
11349			return (0);
11350		}
11351
11352		return (EINVAL);
11353	}
11354
11355	/* --- "Do Destroy Mirror Interface" is set --- */
11356
11357	/* Caller just wants the upper bound for size */
11358	if (req->oldptr == NULL && req->newptr == NULL) {
11359		ret = SYSCTL_OUT(req, 0, 128);
11360		return (ret);
11361	}
11362
11363	device_printf(dev, "Destroying mirroring interface...\n");
11364
11365	ice_destroy_mirror_interface(sc);
11366
11367	ice_clear_state(&sc->state, ICE_STATE_DO_DESTROY_MIRR_INTFC);
11368
11369	ret = sysctl_handle_string(oidp, __DECONST(char *, "Interface destroyed"), 0, req);
11370	return (ret);
11371}
11372