1/* SPDX-License-Identifier: BSD-3-Clause */
2/*  Copyright (c) 2020, Intel Corporation
3 *  All rights reserved.
4 *
5 *  Redistribution and use in source and binary forms, with or without
6 *  modification, are permitted provided that the following conditions are met:
7 *
8 *   1. Redistributions of source code must retain the above copyright notice,
9 *      this list of conditions and the following disclaimer.
10 *
11 *   2. Redistributions in binary form must reproduce the above copyright
12 *      notice, this list of conditions and the following disclaimer in the
13 *      documentation and/or other materials provided with the distribution.
14 *
15 *   3. Neither the name of the Intel Corporation nor the names of its
16 *      contributors may be used to endorse or promote products derived from
17 *      this software without specific prior written permission.
18 *
19 *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 *  POSSIBILITY OF SUCH DAMAGE.
30 */
31/*$FreeBSD$*/
32
33/**
34 * @file if_ice_iflib.c
35 * @brief iflib driver implementation
36 *
37 * Contains the main entry point for the iflib driver implementation. It
38 * implements the various ifdi driver methods, and sets up the module and
39 * driver values to load an iflib driver.
40 */
41
42#include "ice_iflib.h"
43#include "ice_drv_info.h"
44#include "ice_switch.h"
45#include "ice_sched.h"
46
47#include <sys/module.h>
48#include <sys/sockio.h>
49#include <sys/smp.h>
50#include <dev/pci/pcivar.h>
51#include <dev/pci/pcireg.h>
52
53/*
54 * Device method prototypes
55 */
56
57static void *ice_register(device_t);
58static int  ice_if_attach_pre(if_ctx_t);
59static int  ice_attach_pre_recovery_mode(struct ice_softc *sc);
60static int  ice_if_attach_post(if_ctx_t);
61static void ice_attach_post_recovery_mode(struct ice_softc *sc);
62static int  ice_if_detach(if_ctx_t);
63static int  ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets);
64static int  ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets);
65static int ice_if_msix_intr_assign(if_ctx_t ctx, int msix);
66static void ice_if_queues_free(if_ctx_t ctx);
67static int ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu);
68static void ice_if_intr_enable(if_ctx_t ctx);
69static void ice_if_intr_disable(if_ctx_t ctx);
70static int ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid);
71static int ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid);
72static int ice_if_promisc_set(if_ctx_t ctx, int flags);
73static void ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr);
74static int ice_if_media_change(if_ctx_t ctx);
75static void ice_if_init(if_ctx_t ctx);
76static void ice_if_timer(if_ctx_t ctx, uint16_t qid);
77static void ice_if_update_admin_status(if_ctx_t ctx);
78static void ice_if_multi_set(if_ctx_t ctx);
79static void ice_if_vlan_register(if_ctx_t ctx, u16 vtag);
80static void ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag);
81static void ice_if_stop(if_ctx_t ctx);
82static uint64_t ice_if_get_counter(if_ctx_t ctx, ift_counter counter);
83static int ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data);
84static int ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req);
85static int ice_if_suspend(if_ctx_t ctx);
86static int ice_if_resume(if_ctx_t ctx);
87
88static int ice_msix_que(void *arg);
89static int ice_msix_admin(void *arg);
90
91/*
92 * Helper function prototypes
93 */
94static int ice_pci_mapping(struct ice_softc *sc);
95static void ice_free_pci_mapping(struct ice_softc *sc);
96static void ice_update_link_status(struct ice_softc *sc, bool update_media);
97static void ice_init_device_features(struct ice_softc *sc);
98static void ice_init_tx_tracking(struct ice_vsi *vsi);
99static void ice_handle_reset_event(struct ice_softc *sc);
100static void ice_handle_pf_reset_request(struct ice_softc *sc);
101static void ice_prepare_for_reset(struct ice_softc *sc);
102static int ice_rebuild_pf_vsi_qmap(struct ice_softc *sc);
103static void ice_rebuild(struct ice_softc *sc);
104static void ice_rebuild_recovery_mode(struct ice_softc *sc);
105static void ice_free_irqvs(struct ice_softc *sc);
106static void ice_update_rx_mbuf_sz(struct ice_softc *sc);
107static void ice_poll_for_media_avail(struct ice_softc *sc);
108static void ice_setup_scctx(struct ice_softc *sc);
109static int ice_allocate_msix(struct ice_softc *sc);
110static void ice_admin_timer(void *arg);
111static void ice_transition_recovery_mode(struct ice_softc *sc);
112static void ice_transition_safe_mode(struct ice_softc *sc);
113
114/*
115 * Device Interface Declaration
116 */
117
118/**
119 * @var ice_methods
120 * @brief ice driver method entry points
121 *
122 * List of device methods implementing the generic device interface used by
123 * the device stack to interact with the ice driver. Since this is an iflib
124 * driver, most of the methods point to the generic iflib implementation.
125 */
126static device_method_t ice_methods[] = {
127	/* Device interface */
128	DEVMETHOD(device_register, ice_register),
129	DEVMETHOD(device_probe,    iflib_device_probe_vendor),
130	DEVMETHOD(device_attach,   iflib_device_attach),
131	DEVMETHOD(device_detach,   iflib_device_detach),
132	DEVMETHOD(device_shutdown, iflib_device_shutdown),
133	DEVMETHOD(device_suspend,  iflib_device_suspend),
134	DEVMETHOD(device_resume,   iflib_device_resume),
135	DEVMETHOD_END
136};
137
138/**
139 * @var ice_iflib_methods
140 * @brief iflib method entry points
141 *
142 * List of device methods used by the iflib stack to interact with this
143 * driver. These are the real main entry points used to interact with this
144 * driver.
145 */
146static device_method_t ice_iflib_methods[] = {
147	DEVMETHOD(ifdi_attach_pre, ice_if_attach_pre),
148	DEVMETHOD(ifdi_attach_post, ice_if_attach_post),
149	DEVMETHOD(ifdi_detach, ice_if_detach),
150	DEVMETHOD(ifdi_tx_queues_alloc, ice_if_tx_queues_alloc),
151	DEVMETHOD(ifdi_rx_queues_alloc, ice_if_rx_queues_alloc),
152	DEVMETHOD(ifdi_msix_intr_assign, ice_if_msix_intr_assign),
153	DEVMETHOD(ifdi_queues_free, ice_if_queues_free),
154	DEVMETHOD(ifdi_mtu_set, ice_if_mtu_set),
155	DEVMETHOD(ifdi_intr_enable, ice_if_intr_enable),
156	DEVMETHOD(ifdi_intr_disable, ice_if_intr_disable),
157	DEVMETHOD(ifdi_rx_queue_intr_enable, ice_if_rx_queue_intr_enable),
158	DEVMETHOD(ifdi_tx_queue_intr_enable, ice_if_tx_queue_intr_enable),
159	DEVMETHOD(ifdi_promisc_set, ice_if_promisc_set),
160	DEVMETHOD(ifdi_media_status, ice_if_media_status),
161	DEVMETHOD(ifdi_media_change, ice_if_media_change),
162	DEVMETHOD(ifdi_init, ice_if_init),
163	DEVMETHOD(ifdi_stop, ice_if_stop),
164	DEVMETHOD(ifdi_timer, ice_if_timer),
165	DEVMETHOD(ifdi_update_admin_status, ice_if_update_admin_status),
166	DEVMETHOD(ifdi_multi_set, ice_if_multi_set),
167	DEVMETHOD(ifdi_vlan_register, ice_if_vlan_register),
168	DEVMETHOD(ifdi_vlan_unregister, ice_if_vlan_unregister),
169	DEVMETHOD(ifdi_get_counter, ice_if_get_counter),
170	DEVMETHOD(ifdi_priv_ioctl, ice_if_priv_ioctl),
171	DEVMETHOD(ifdi_i2c_req, ice_if_i2c_req),
172	DEVMETHOD(ifdi_suspend, ice_if_suspend),
173	DEVMETHOD(ifdi_resume, ice_if_resume),
174	DEVMETHOD_END
175};
176
177/**
178 * @var ice_driver
179 * @brief driver structure for the generic device stack
180 *
181 * driver_t definition used to setup the generic device methods.
182 */
183static driver_t ice_driver = {
184	.name = "ice",
185	.methods = ice_methods,
186	.size = sizeof(struct ice_softc),
187};
188
189/**
190 * @var ice_iflib_driver
191 * @brief driver structure for the iflib stack
192 *
193 * driver_t definition used to setup the iflib device methods.
194 */
195static driver_t ice_iflib_driver = {
196	.name = "ice",
197	.methods = ice_iflib_methods,
198	.size = sizeof(struct ice_softc),
199};
200
201extern struct if_txrx ice_txrx;
202extern struct if_txrx ice_recovery_txrx;
203
204/**
205 * @var ice_sctx
206 * @brief ice driver shared context
207 *
208 * Structure defining shared values (context) that is used by all instances of
209 * the device. Primarily used to setup details about how the iflib stack
210 * should treat this driver. Also defines the default, minimum, and maximum
211 * number of descriptors in each ring.
212 */
213static struct if_shared_ctx ice_sctx = {
214	.isc_magic = IFLIB_MAGIC,
215	.isc_q_align = PAGE_SIZE,
216
217	.isc_tx_maxsize = ICE_MAX_FRAME_SIZE,
218	/* We could technically set this as high as ICE_MAX_DMA_SEG_SIZE, but
219	 * that doesn't make sense since that would be larger than the maximum
220	 * size of a single packet.
221	 */
222	.isc_tx_maxsegsize = ICE_MAX_FRAME_SIZE,
223
224	/* XXX: This is only used by iflib to ensure that
225	 * scctx->isc_tx_tso_size_max + the VLAN header is a valid size.
226	 */
227	.isc_tso_maxsize = ICE_TSO_SIZE + sizeof(struct ether_vlan_header),
228	/* XXX: This is used by iflib to set the number of segments in the TSO
229	 * DMA tag. However, scctx->isc_tx_tso_segsize_max is used to set the
230	 * related ifnet parameter.
231	 */
232	.isc_tso_maxsegsize = ICE_MAX_DMA_SEG_SIZE,
233
234	.isc_rx_maxsize = ICE_MAX_FRAME_SIZE,
235	.isc_rx_nsegments = ICE_MAX_RX_SEGS,
236	.isc_rx_maxsegsize = ICE_MAX_FRAME_SIZE,
237
238	.isc_nfl = 1,
239	.isc_ntxqs = 1,
240	.isc_nrxqs = 1,
241
242	.isc_admin_intrcnt = 1,
243	.isc_vendor_info = ice_vendor_info_array,
244	.isc_driver_version = __DECONST(char *, ice_driver_version),
245	.isc_driver = &ice_iflib_driver,
246
247	/*
248	 * IFLIB_NEED_SCRATCH ensures that mbufs have scratch space available
249	 * for hardware checksum offload
250	 *
251	 * IFLIB_TSO_INIT_IP ensures that the TSO packets have zeroed out the
252	 * IP sum field, required by our hardware to calculate valid TSO
253	 * checksums.
254	 *
255	 * IFLIB_ADMIN_ALWAYS_RUN ensures that the administrative task runs
256	 * even when the interface is down.
257	 *
258	 * IFLIB_SKIP_MSIX allows the driver to handle allocating MSI-X
259	 * vectors manually instead of relying on iflib code to do this.
260	 */
261	.isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP |
262		IFLIB_ADMIN_ALWAYS_RUN | IFLIB_SKIP_MSIX,
263
264	.isc_nrxd_min = {ICE_MIN_DESC_COUNT},
265	.isc_ntxd_min = {ICE_MIN_DESC_COUNT},
266	.isc_nrxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
267	.isc_ntxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
268	.isc_nrxd_default = {ICE_DEFAULT_DESC_COUNT},
269	.isc_ntxd_default = {ICE_DEFAULT_DESC_COUNT},
270};
271
272/**
273 * @var ice_devclass
274 * @brief ice driver device class
275 *
276 * device class used to setup the ice driver module kobject class.
277 */
278devclass_t ice_devclass;
279DRIVER_MODULE(ice, pci, ice_driver, ice_devclass, ice_module_event_handler, 0);
280
281MODULE_VERSION(ice, 1);
282MODULE_DEPEND(ice, pci, 1, 1, 1);
283MODULE_DEPEND(ice, ether, 1, 1, 1);
284MODULE_DEPEND(ice, iflib, 1, 1, 1);
285
286IFLIB_PNP_INFO(pci, ice, ice_vendor_info_array);
287
288/* Static driver-wide sysctls */
289#include "ice_iflib_sysctls.h"
290
291/**
292 * ice_pci_mapping - Map PCI BAR memory
293 * @sc: device private softc
294 *
295 * Map PCI BAR 0 for device operation.
296 */
297static int
298ice_pci_mapping(struct ice_softc *sc)
299{
300	int rc;
301
302	/* Map BAR0 */
303	rc = ice_map_bar(sc->dev, &sc->bar0, 0);
304	if (rc)
305		return rc;
306
307	return 0;
308}
309
310/**
311 * ice_free_pci_mapping - Release PCI BAR memory
312 * @sc: device private softc
313 *
314 * Release PCI BARs which were previously mapped by ice_pci_mapping().
315 */
316static void
317ice_free_pci_mapping(struct ice_softc *sc)
318{
319	/* Free BAR0 */
320	ice_free_bar(sc->dev, &sc->bar0);
321}
322
323/*
324 * Device methods
325 */
326
327/**
328 * ice_register - register device method callback
329 * @dev: the device being registered
330 *
331 * Returns a pointer to the shared context structure, which is used by iflib.
332 */
333static void *
334ice_register(device_t dev __unused)
335{
336	return &ice_sctx;
337} /* ice_register */
338
339/**
340 * ice_setup_scctx - Setup the iflib softc context structure
341 * @sc: the device private structure
342 *
343 * Setup the parameters in if_softc_ctx_t structure used by the iflib stack
344 * when loading.
345 */
346static void
347ice_setup_scctx(struct ice_softc *sc)
348{
349	if_softc_ctx_t scctx = sc->scctx;
350	struct ice_hw *hw = &sc->hw;
351	bool safe_mode, recovery_mode;
352
353	safe_mode = ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE);
354	recovery_mode = ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE);
355
356	/*
357	 * If the driver loads in Safe mode or Recovery mode, limit iflib to
358	 * a single queue pair.
359	 */
360	if (safe_mode || recovery_mode) {
361		scctx->isc_ntxqsets = scctx->isc_nrxqsets = 1;
362		scctx->isc_ntxqsets_max = 1;
363		scctx->isc_nrxqsets_max = 1;
364	} else {
365		/*
366		 * iflib initially sets the isc_ntxqsets and isc_nrxqsets to
367		 * the values of the override sysctls. Cache these initial
368		 * values so that the driver can be aware of what the iflib
369		 * sysctl value is when setting up MSI-X vectors.
370		 */
371		sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets;
372		sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets;
373
374		if (scctx->isc_ntxqsets == 0)
375			scctx->isc_ntxqsets = hw->func_caps.common_cap.rss_table_size;
376		if (scctx->isc_nrxqsets == 0)
377			scctx->isc_nrxqsets = hw->func_caps.common_cap.rss_table_size;
378
379		scctx->isc_ntxqsets_max = hw->func_caps.common_cap.num_txq;
380		scctx->isc_nrxqsets_max = hw->func_caps.common_cap.num_rxq;
381
382		/*
383		 * Sanity check that the iflib sysctl values are within the
384		 * maximum supported range.
385		 */
386		if (sc->ifc_sysctl_ntxqs > scctx->isc_ntxqsets_max)
387			sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets_max;
388		if (sc->ifc_sysctl_nrxqs > scctx->isc_nrxqsets_max)
389			sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets_max;
390	}
391
392	scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]
393	    * sizeof(struct ice_tx_desc), DBA_ALIGN);
394	scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0]
395	    * sizeof(union ice_32b_rx_flex_desc), DBA_ALIGN);
396
397	scctx->isc_tx_nsegments = ICE_MAX_TX_SEGS;
398	scctx->isc_tx_tso_segments_max = ICE_MAX_TSO_SEGS;
399	scctx->isc_tx_tso_size_max = ICE_TSO_SIZE;
400	scctx->isc_tx_tso_segsize_max = ICE_MAX_DMA_SEG_SIZE;
401
402	scctx->isc_msix_bar = PCIR_BAR(ICE_MSIX_BAR);
403	scctx->isc_rss_table_size = hw->func_caps.common_cap.rss_table_size;
404
405	/*
406	 * If the driver loads in recovery mode, disable Tx/Rx functionality
407	 */
408	if (recovery_mode)
409		scctx->isc_txrx = &ice_recovery_txrx;
410	else
411		scctx->isc_txrx = &ice_txrx;
412
413	/*
414	 * If the driver loads in Safe mode or Recovery mode, disable
415	 * advanced features including hardware offloads.
416	 */
417	if (safe_mode || recovery_mode) {
418		scctx->isc_capenable = ICE_SAFE_CAPS;
419		scctx->isc_tx_csum_flags = 0;
420	} else {
421		scctx->isc_capenable = ICE_FULL_CAPS;
422		scctx->isc_tx_csum_flags = ICE_CSUM_OFFLOAD;
423	}
424
425	scctx->isc_capabilities = scctx->isc_capenable;
426} /* ice_setup_scctx */
427
428/**
429 * ice_if_attach_pre - Early device attach logic
430 * @ctx: the iflib context structure
431 *
432 * Called by iflib during the attach process. Earliest main driver entry
433 * point which performs necessary hardware and driver initialization. Called
434 * before the Tx and Rx queues are allocated.
435 */
436static int
437ice_if_attach_pre(if_ctx_t ctx)
438{
439	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
440	enum ice_fw_modes fw_mode;
441	enum ice_status status;
442	if_softc_ctx_t scctx;
443	struct ice_hw *hw;
444	device_t dev;
445	int err;
446
447	device_printf(iflib_get_dev(ctx), "Loading the iflib ice driver\n");
448
449	sc->ctx = ctx;
450	sc->media = iflib_get_media(ctx);
451	sc->sctx = iflib_get_sctx(ctx);
452	sc->iflib_ctx_lock = iflib_ctx_lock_get(ctx);
453
454	dev = sc->dev = iflib_get_dev(ctx);
455	scctx = sc->scctx = iflib_get_softc_ctx(ctx);
456
457	hw = &sc->hw;
458	hw->back = sc;
459
460	snprintf(sc->admin_mtx_name, sizeof(sc->admin_mtx_name),
461		 "%s:admin", device_get_nameunit(dev));
462	mtx_init(&sc->admin_mtx, sc->admin_mtx_name, NULL, MTX_DEF);
463	callout_init_mtx(&sc->admin_timer, &sc->admin_mtx, 0);
464
465	ASSERT_CTX_LOCKED(sc);
466
467	if (ice_pci_mapping(sc)) {
468		err = (ENXIO);
469		goto destroy_admin_timer;
470	}
471
472	/* Save off the PCI information */
473	ice_save_pci_info(hw, dev);
474
475	/* create tunables as early as possible */
476	ice_add_device_tunables(sc);
477
478	/* Setup ControlQ lengths */
479	ice_set_ctrlq_len(hw);
480
481	fw_mode = ice_get_fw_mode(hw);
482	if (fw_mode == ICE_FW_MODE_REC) {
483		device_printf(dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
484
485		err = ice_attach_pre_recovery_mode(sc);
486		if (err)
487			goto free_pci_mapping;
488
489		return (0);
490	}
491
492	/* Initialize the hw data structure */
493	status = ice_init_hw(hw);
494	if (status) {
495		if (status == ICE_ERR_FW_API_VER) {
496			/* Enter recovery mode, so that the driver remains
497			 * loaded. This way, if the system administrator
498			 * cannot update the driver, they may still attempt to
499			 * downgrade the NVM.
500			 */
501			err = ice_attach_pre_recovery_mode(sc);
502			if (err)
503				goto free_pci_mapping;
504
505			return (0);
506		} else {
507			err = EIO;
508			device_printf(dev, "Unable to initialize hw, err %s aq_err %s\n",
509				      ice_status_str(status),
510				      ice_aq_str(hw->adminq.sq_last_status));
511		}
512		goto free_pci_mapping;
513	}
514
515	/* Notify firmware of the device driver version */
516	err = ice_send_version(sc);
517	if (err)
518		goto deinit_hw;
519
520	ice_load_pkg_file(sc);
521
522	err = ice_init_link_events(sc);
523	if (err) {
524		device_printf(dev, "ice_init_link_events failed: %s\n",
525			      ice_err_str(err));
526		goto deinit_hw;
527	}
528
529	ice_print_nvm_version(sc);
530
531	ice_init_device_features(sc);
532
533	/* Setup the MAC address */
534	iflib_set_mac(ctx, hw->port_info->mac.lan_addr);
535
536	/* Setup the iflib softc context structure */
537	ice_setup_scctx(sc);
538
539	/* Initialize the Tx queue manager */
540	err = ice_resmgr_init(&sc->tx_qmgr, hw->func_caps.common_cap.num_txq);
541	if (err) {
542		device_printf(dev, "Unable to initialize Tx queue manager: %s\n",
543			      ice_err_str(err));
544		goto deinit_hw;
545	}
546
547	/* Initialize the Rx queue manager */
548	err = ice_resmgr_init(&sc->rx_qmgr, hw->func_caps.common_cap.num_rxq);
549	if (err) {
550		device_printf(dev, "Unable to initialize Rx queue manager: %s\n",
551			      ice_err_str(err));
552		goto free_tx_qmgr;
553	}
554
555	/* Initialize the interrupt resource manager */
556	err = ice_alloc_intr_tracking(sc);
557	if (err)
558		/* Errors are already printed */
559		goto free_rx_qmgr;
560
561	/* Determine maximum number of VSIs we'll prepare for */
562	sc->num_available_vsi = min(ICE_MAX_VSI_AVAILABLE,
563				    hw->func_caps.guar_num_vsi);
564
565	if (!sc->num_available_vsi) {
566		err = EIO;
567		device_printf(dev, "No VSIs allocated to host\n");
568		goto free_intr_tracking;
569	}
570
571	/* Allocate storage for the VSI pointers */
572	sc->all_vsi = (struct ice_vsi **)
573		malloc(sizeof(struct ice_vsi *) * sc->num_available_vsi,
574		       M_ICE, M_WAITOK | M_ZERO);
575	if (!sc->all_vsi) {
576		err = ENOMEM;
577		device_printf(dev, "Unable to allocate VSI array\n");
578		goto free_intr_tracking;
579	}
580
581	/*
582	 * Prepare the statically allocated primary PF VSI in the softc
583	 * structure. Other VSIs will be dynamically allocated as needed.
584	 */
585	ice_setup_pf_vsi(sc);
586
587	err = ice_alloc_vsi_qmap(&sc->pf_vsi, scctx->isc_ntxqsets_max,
588	    scctx->isc_nrxqsets_max);
589	if (err) {
590		device_printf(dev, "Unable to allocate VSI Queue maps\n");
591		goto free_main_vsi;
592	}
593
594	/* Allocate MSI-X vectors (due to isc_flags IFLIB_SKIP_MSIX) */
595	err = ice_allocate_msix(sc);
596	if (err)
597		goto free_main_vsi;
598
599	return 0;
600
601free_main_vsi:
602	/* ice_release_vsi will free the queue maps if they were allocated */
603	ice_release_vsi(&sc->pf_vsi);
604	free(sc->all_vsi, M_ICE);
605	sc->all_vsi = NULL;
606free_intr_tracking:
607	ice_free_intr_tracking(sc);
608free_rx_qmgr:
609	ice_resmgr_destroy(&sc->rx_qmgr);
610free_tx_qmgr:
611	ice_resmgr_destroy(&sc->tx_qmgr);
612deinit_hw:
613	ice_deinit_hw(hw);
614free_pci_mapping:
615	ice_free_pci_mapping(sc);
616destroy_admin_timer:
617	mtx_lock(&sc->admin_mtx);
618	callout_stop(&sc->admin_timer);
619	mtx_unlock(&sc->admin_mtx);
620	mtx_destroy(&sc->admin_mtx);
621	return err;
622} /* ice_if_attach_pre */
623
624/**
625 * ice_attach_pre_recovery_mode - Limited driver attach_pre for FW recovery
626 * @sc: the device private softc
627 *
628 * Loads the device driver in limited Firmware Recovery mode, intended to
629 * allow users to update the firmware to attempt to recover the device.
630 *
631 * @remark We may enter recovery mode in case either (a) the firmware is
632 * detected to be in an invalid state and must be re-programmed, or (b) the
633 * driver detects that the loaded firmware has a non-compatible API version
634 * that the driver cannot operate with.
635 */
636static int
637ice_attach_pre_recovery_mode(struct ice_softc *sc)
638{
639	ice_set_state(&sc->state, ICE_STATE_RECOVERY_MODE);
640
641	/* Setup the iflib softc context */
642	ice_setup_scctx(sc);
643
644	/* Setup the PF VSI back pointer */
645	sc->pf_vsi.sc = sc;
646
647	/*
648	 * We still need to allocate MSI-X vectors since we need one vector to
649	 * run the administrative admin interrupt
650	 */
651	return ice_allocate_msix(sc);
652}
653
654/**
655 * ice_update_link_status - notify OS of link state change
656 * @sc: device private softc structure
657 * @update_media: true if we should update media even if link didn't change
658 *
659 * Called to notify iflib core of link status changes. Should be called once
660 * during attach_post, and whenever link status changes during runtime.
661 *
662 * This call only updates the currently supported media types if the link
663 * status changed, or if update_media is set to true.
664 */
665static void
666ice_update_link_status(struct ice_softc *sc, bool update_media)
667{
668	struct ice_hw *hw = &sc->hw;
669	enum ice_status status;
670
671	/* Never report link up when in recovery mode */
672	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
673		return;
674
675	/* Report link status to iflib only once each time it changes */
676	if (!ice_testandset_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED)) {
677		if (sc->link_up) { /* link is up */
678			uint64_t baudrate = ice_aq_speed_to_rate(sc->hw.port_info);
679
680			ice_set_default_local_lldp_mib(sc);
681
682			iflib_link_state_change(sc->ctx, LINK_STATE_UP, baudrate);
683
684			ice_link_up_msg(sc);
685
686			update_media = true;
687		} else { /* link is down */
688			iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
689
690			update_media = true;
691		}
692	}
693
694	/* Update the supported media types */
695	if (update_media) {
696		status = ice_add_media_types(sc, sc->media);
697		if (status)
698			device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n",
699				      ice_status_str(status),
700				      ice_aq_str(hw->adminq.sq_last_status));
701	}
702
703	/* TODO: notify VFs of link state change */
704}
705
706/**
707 * ice_if_attach_post - Late device attach logic
708 * @ctx: the iflib context structure
709 *
710 * Called by iflib to finish up attaching the device. Performs any attach
711 * logic which must wait until after the Tx and Rx queues have been
712 * allocated.
713 */
714static int
715ice_if_attach_post(if_ctx_t ctx)
716{
717	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
718	if_t ifp = iflib_get_ifp(ctx);
719	int err;
720
721	ASSERT_CTX_LOCKED(sc);
722
723	/* We don't yet support loading if MSI-X is not supported */
724	if (sc->scctx->isc_intr != IFLIB_INTR_MSIX) {
725		device_printf(sc->dev, "The ice driver does not support loading without MSI-X\n");
726		return (ENOTSUP);
727	}
728
729	/* The ifnet structure hasn't yet been initialized when the attach_pre
730	 * handler is called, so wait until attach_post to setup the
731	 * isc_max_frame_size.
732	 */
733
734	sc->ifp = ifp;
735	sc->scctx->isc_max_frame_size = ifp->if_mtu +
736		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
737
738	/*
739	 * If we are in recovery mode, only perform a limited subset of
740	 * initialization to support NVM recovery.
741	 */
742	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
743		ice_attach_post_recovery_mode(sc);
744		return (0);
745	}
746
747	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
748
749	err = ice_initialize_vsi(&sc->pf_vsi);
750	if (err) {
751		device_printf(sc->dev, "Unable to initialize Main VSI: %s\n",
752			      ice_err_str(err));
753		return err;
754	}
755
756	/* Configure the main PF VSI for RSS */
757	err = ice_config_rss(&sc->pf_vsi);
758	if (err) {
759		device_printf(sc->dev,
760			      "Unable to configure RSS for the main VSI, err %s\n",
761			      ice_err_str(err));
762		return err;
763	}
764
765	/* Configure switch to drop transmitted LLDP and PAUSE frames */
766	err = ice_cfg_pf_ethertype_filters(sc);
767	if (err)
768		return err;
769
770	ice_get_and_print_bus_info(sc);
771
772	ice_set_link_management_mode(sc);
773
774	ice_init_saved_phy_cfg(sc);
775
776	ice_add_device_sysctls(sc);
777
778	/* Get DCBX/LLDP state and start DCBX agent */
779	ice_init_dcb_setup(sc);
780
781	/* Setup link configuration parameters */
782	ice_init_link_configuration(sc);
783	ice_update_link_status(sc, true);
784
785	/* Configure interrupt causes for the administrative interrupt */
786	ice_configure_misc_interrupts(sc);
787
788	/* Enable ITR 0 right away, so that we can handle admin interrupts */
789	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
790
791	/* Start the admin timer */
792	mtx_lock(&sc->admin_mtx);
793	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
794	mtx_unlock(&sc->admin_mtx);
795
796	return 0;
797} /* ice_if_attach_post */
798
799/**
800 * ice_attach_post_recovery_mode - Limited driver attach_post for FW recovery
801 * @sc: the device private softc
802 *
803 * Performs minimal work to prepare the driver to recover an NVM in case the
804 * firmware is in recovery mode.
805 */
806static void
807ice_attach_post_recovery_mode(struct ice_softc *sc)
808{
809	/* Configure interrupt causes for the administrative interrupt */
810	ice_configure_misc_interrupts(sc);
811
812	/* Enable ITR 0 right away, so that we can handle admin interrupts */
813	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
814
815	/* Start the admin timer */
816	mtx_lock(&sc->admin_mtx);
817	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
818	mtx_unlock(&sc->admin_mtx);
819}
820
821/**
822 * ice_free_irqvs - Free IRQ vector memory
823 * @sc: the device private softc structure
824 *
825 * Free IRQ vector memory allocated during ice_if_msix_intr_assign.
826 */
827static void
828ice_free_irqvs(struct ice_softc *sc)
829{
830	struct ice_vsi *vsi = &sc->pf_vsi;
831	if_ctx_t ctx = sc->ctx;
832	int i;
833
834	/* If the irqvs array is NULL, then there are no vectors to free */
835	if (sc->irqvs == NULL)
836		return;
837
838	/* Free the IRQ vectors */
839	for (i = 0; i < sc->num_irq_vectors; i++)
840		iflib_irq_free(ctx, &sc->irqvs[i].irq);
841
842	/* Clear the irqv pointers */
843	for (i = 0; i < vsi->num_rx_queues; i++)
844		vsi->rx_queues[i].irqv = NULL;
845
846	for (i = 0; i < vsi->num_tx_queues; i++)
847		vsi->tx_queues[i].irqv = NULL;
848
849	/* Release the vector array memory */
850	free(sc->irqvs, M_ICE);
851	sc->irqvs = NULL;
852	sc->num_irq_vectors = 0;
853}
854
855/**
856 * ice_if_detach - Device driver detach logic
857 * @ctx: iflib context structure
858 *
859 * Perform device shutdown logic to detach the device driver.
860 *
861 * Note that there is no guarantee of the ordering of ice_if_queues_free() and
862 * ice_if_detach(). It is possible for the functions to be called in either
863 * order, and they must not assume to have a strict ordering.
864 */
865static int
866ice_if_detach(if_ctx_t ctx)
867{
868	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
869	struct ice_vsi *vsi = &sc->pf_vsi;
870	int i;
871
872	ASSERT_CTX_LOCKED(sc);
873
874	/* Indicate that we're detaching */
875	ice_set_state(&sc->state, ICE_STATE_DETACHING);
876
877	/* Stop the admin timer */
878	mtx_lock(&sc->admin_mtx);
879	callout_stop(&sc->admin_timer);
880	mtx_unlock(&sc->admin_mtx);
881	mtx_destroy(&sc->admin_mtx);
882
883	/* Free allocated media types */
884	ifmedia_removeall(sc->media);
885
886	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
887	 * pointers. Note, the calls here and those in ice_if_queues_free()
888	 * are *BOTH* necessary, as we cannot guarantee which path will be
889	 * run first
890	 */
891	ice_vsi_del_txqs_ctx(vsi);
892	ice_vsi_del_rxqs_ctx(vsi);
893
894	/* Release MSI-X resources */
895	ice_free_irqvs(sc);
896
897	for (i = 0; i < sc->num_available_vsi; i++) {
898		if (sc->all_vsi[i])
899			ice_release_vsi(sc->all_vsi[i]);
900	}
901
902	if (sc->all_vsi) {
903		free(sc->all_vsi, M_ICE);
904		sc->all_vsi = NULL;
905	}
906
907	/* Release MSI-X memory */
908	pci_release_msi(sc->dev);
909
910	if (sc->msix_table != NULL) {
911		bus_release_resource(sc->dev, SYS_RES_MEMORY,
912				     rman_get_rid(sc->msix_table),
913				     sc->msix_table);
914		sc->msix_table = NULL;
915	}
916
917	ice_free_intr_tracking(sc);
918
919	/* Destroy the queue managers */
920	ice_resmgr_destroy(&sc->tx_qmgr);
921	ice_resmgr_destroy(&sc->rx_qmgr);
922
923	if (!ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
924		ice_deinit_hw(&sc->hw);
925
926	ice_free_pci_mapping(sc);
927
928	return 0;
929} /* ice_if_detach */
930
931/**
932 * ice_if_tx_queues_alloc - Allocate Tx queue memory
933 * @ctx: iflib context structure
934 * @vaddrs: virtual addresses for the queue memory
935 * @paddrs: physical addresses for the queue memory
936 * @ntxqs: the number of Tx queues per set (should always be 1)
937 * @ntxqsets: the number of Tx queue sets to allocate
938 *
939 * Called by iflib to allocate Tx queues for the device. Allocates driver
940 * memory to track each queue, the status arrays used for descriptor
941 * status reporting, and Tx queue sysctls.
942 */
943static int
944ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
945		       int __invariant_only ntxqs, int ntxqsets)
946{
947	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
948	struct ice_vsi *vsi = &sc->pf_vsi;
949	struct ice_tx_queue *txq;
950	int err, i, j;
951
952	MPASS(ntxqs == 1);
953	MPASS(sc->scctx->isc_ntxd[0] <= ICE_MAX_DESC_COUNT);
954	ASSERT_CTX_LOCKED(sc);
955
956	/* Do not bother allocating queues if we're in recovery mode */
957	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
958		return (0);
959
960	/* Allocate queue structure memory */
961	if (!(vsi->tx_queues =
962	      (struct ice_tx_queue *) malloc(sizeof(struct ice_tx_queue) * ntxqsets, M_ICE, M_WAITOK | M_ZERO))) {
963		device_printf(sc->dev, "Unable to allocate Tx queue memory\n");
964		return (ENOMEM);
965	}
966
967	/* Allocate report status arrays */
968	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
969		if (!(txq->tx_rsq =
970		      (uint16_t *) malloc(sizeof(uint16_t) * sc->scctx->isc_ntxd[0], M_ICE, M_WAITOK))) {
971			device_printf(sc->dev, "Unable to allocate tx_rsq memory\n");
972			err = ENOMEM;
973			goto free_tx_queues;
974		}
975		/* Initialize report status array */
976		for (j = 0; j < sc->scctx->isc_ntxd[0]; j++)
977			txq->tx_rsq[j] = QIDX_INVALID;
978	}
979
980	/* Assign queues from PF space to the main VSI */
981	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap, ntxqsets);
982	if (err) {
983		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
984			      ice_err_str(err));
985		goto free_tx_queues;
986	}
987	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
988
989	/* Add Tx queue sysctls context */
990	ice_vsi_add_txqs_ctx(vsi);
991
992	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
993		txq->me = i;
994		txq->vsi = vsi;
995
996		/* store the queue size for easier access */
997		txq->desc_count = sc->scctx->isc_ntxd[0];
998
999		/* get the virtual and physical address of the hardware queues */
1000		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
1001		txq->tx_base = (struct ice_tx_desc *)vaddrs[i];
1002		txq->tx_paddr = paddrs[i];
1003
1004		ice_add_txq_sysctls(txq);
1005	}
1006
1007	vsi->num_tx_queues = ntxqsets;
1008
1009	return (0);
1010
1011free_tx_queues:
1012	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1013		if (txq->tx_rsq != NULL) {
1014			free(txq->tx_rsq, M_ICE);
1015			txq->tx_rsq = NULL;
1016		}
1017	}
1018	free(vsi->tx_queues, M_ICE);
1019	vsi->tx_queues = NULL;
1020	return err;
1021}
1022
1023/**
1024 * ice_if_rx_queues_alloc - Allocate Rx queue memory
1025 * @ctx: iflib context structure
1026 * @vaddrs: virtual addresses for the queue memory
1027 * @paddrs: physical addresses for the queue memory
1028 * @nrxqs: number of Rx queues per set (should always be 1)
1029 * @nrxqsets: number of Rx queue sets to allocate
1030 *
1031 * Called by iflib to allocate Rx queues for the device. Allocates driver
1032 * memory to track each queue, as well as sets up the Rx queue sysctls.
1033 */
1034static int
1035ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
1036		       int __invariant_only nrxqs, int nrxqsets)
1037{
1038	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1039	struct ice_vsi *vsi = &sc->pf_vsi;
1040	struct ice_rx_queue *rxq;
1041	int err, i;
1042
1043	MPASS(nrxqs == 1);
1044	MPASS(sc->scctx->isc_nrxd[0] <= ICE_MAX_DESC_COUNT);
1045	ASSERT_CTX_LOCKED(sc);
1046
1047	/* Do not bother allocating queues if we're in recovery mode */
1048	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1049		return (0);
1050
1051	/* Allocate queue structure memory */
1052	if (!(vsi->rx_queues =
1053	      (struct ice_rx_queue *) malloc(sizeof(struct ice_rx_queue) * nrxqsets, M_ICE, M_WAITOK | M_ZERO))) {
1054		device_printf(sc->dev, "Unable to allocate Rx queue memory\n");
1055		return (ENOMEM);
1056	}
1057
1058	/* Assign queues from PF space to the main VSI */
1059	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap, nrxqsets);
1060	if (err) {
1061		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
1062			      ice_err_str(err));
1063		goto free_rx_queues;
1064	}
1065	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
1066
1067	/* Add Rx queue sysctls context */
1068	ice_vsi_add_rxqs_ctx(vsi);
1069
1070	for (i = 0, rxq = vsi->rx_queues; i < nrxqsets; i++, rxq++) {
1071		rxq->me = i;
1072		rxq->vsi = vsi;
1073
1074		/* store the queue size for easier access */
1075		rxq->desc_count = sc->scctx->isc_nrxd[0];
1076
1077		/* get the virtual and physical address of the hardware queues */
1078		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
1079		rxq->rx_base = (union ice_32b_rx_flex_desc *)vaddrs[i];
1080		rxq->rx_paddr = paddrs[i];
1081
1082		ice_add_rxq_sysctls(rxq);
1083	}
1084
1085	vsi->num_rx_queues = nrxqsets;
1086
1087	return (0);
1088
1089free_rx_queues:
1090	free(vsi->rx_queues, M_ICE);
1091	vsi->rx_queues = NULL;
1092	return err;
1093}
1094
1095/**
1096 * ice_if_queues_free - Free queue memory
1097 * @ctx: the iflib context structure
1098 *
1099 * Free queue memory allocated by ice_if_tx_queues_alloc() and
1100 * ice_if_rx_queues_alloc().
1101 *
1102 * There is no guarantee that ice_if_queues_free() and ice_if_detach() will be
1103 * called in the same order. It's possible for ice_if_queues_free() to be
1104 * called prior to ice_if_detach(), and vice versa.
1105 *
1106 * For this reason, the main VSI is a static member of the ice_softc, which is
1107 * not free'd until after iflib finishes calling both of these functions.
1108 *
1109 * Thus, care must be taken in how we manage the memory being freed by this
1110 * function, and in what tasks it can and must perform.
1111 */
1112static void
1113ice_if_queues_free(if_ctx_t ctx)
1114{
1115	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1116	struct ice_vsi *vsi = &sc->pf_vsi;
1117	struct ice_tx_queue *txq;
1118	int i;
1119
1120	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
1121	 * pointers. Note, the calls here and those in ice_if_detach()
1122	 * are *BOTH* necessary, as we cannot guarantee which path will be
1123	 * run first
1124	 */
1125	ice_vsi_del_txqs_ctx(vsi);
1126	ice_vsi_del_rxqs_ctx(vsi);
1127
1128	/* Release MSI-X IRQ vectors, if not yet released in ice_if_detach */
1129	ice_free_irqvs(sc);
1130
1131	if (vsi->tx_queues != NULL) {
1132		/* free the tx_rsq arrays */
1133		for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1134			if (txq->tx_rsq != NULL) {
1135				free(txq->tx_rsq, M_ICE);
1136				txq->tx_rsq = NULL;
1137			}
1138		}
1139		free(vsi->tx_queues, M_ICE);
1140		vsi->tx_queues = NULL;
1141		vsi->num_tx_queues = 0;
1142	}
1143	if (vsi->rx_queues != NULL) {
1144		free(vsi->rx_queues, M_ICE);
1145		vsi->rx_queues = NULL;
1146		vsi->num_rx_queues = 0;
1147	}
1148}
1149
1150/**
1151 * ice_msix_que - Fast interrupt handler for MSI-X receive queues
1152 * @arg: The Rx queue memory
1153 *
1154 * Interrupt filter function for iflib MSI-X interrupts. Called by iflib when
1155 * an MSI-X interrupt for a given queue is triggered. Currently this just asks
1156 * iflib to schedule the main Rx thread.
1157 */
1158static int
1159ice_msix_que(void *arg)
1160{
1161	struct ice_rx_queue __unused *rxq = (struct ice_rx_queue *)arg;
1162
1163	/* TODO: dynamic ITR algorithm?? */
1164
1165	return (FILTER_SCHEDULE_THREAD);
1166}
1167
1168/**
1169 * ice_msix_admin - Fast interrupt handler for MSI-X admin interrupt
1170 * @arg: pointer to device softc memory
1171 *
1172 * Called by iflib when an administrative interrupt occurs. Should perform any
1173 * fast logic for handling the interrupt cause, and then indicate whether the
1174 * admin task needs to be queued.
1175 */
1176static int
1177ice_msix_admin(void *arg)
1178{
1179	struct ice_softc *sc = (struct ice_softc *)arg;
1180	struct ice_hw *hw = &sc->hw;
1181	device_t dev = sc->dev;
1182	u32 oicr;
1183
1184	/* There is no safe way to modify the enabled miscellaneous causes of
1185	 * the OICR vector at runtime, as doing so would be prone to race
1186	 * conditions. Reading PFINT_OICR will unmask the associated interrupt
1187	 * causes and allow future interrupts to occur. The admin interrupt
1188	 * vector will not be re-enabled until after we exit this function,
1189	 * but any delayed tasks must be resilient against possible "late
1190	 * arrival" interrupts that occur while we're already handling the
1191	 * task. This is done by using state bits and serializing these
1192	 * delayed tasks via the admin status task function.
1193	 */
1194	oicr = rd32(hw, PFINT_OICR);
1195
1196	/* Processing multiple controlq interrupts on a single vector does not
1197	 * provide an indication of which controlq triggered the interrupt.
1198	 * We might try reading the INTEVENT bit of the respective PFINT_*_CTL
1199	 * registers. However, the INTEVENT bit is not guaranteed to be set as
1200	 * it gets automatically cleared when the hardware acknowledges the
1201	 * interrupt.
1202	 *
1203	 * This means we don't really have a good indication of whether or
1204	 * which controlq triggered this interrupt. We'll just notify the
1205	 * admin task that it should check all the controlqs.
1206	 */
1207	ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
1208
1209	if (oicr & PFINT_OICR_VFLR_M) {
1210		ice_set_state(&sc->state, ICE_STATE_VFLR_PENDING);
1211	}
1212
1213	if (oicr & PFINT_OICR_MAL_DETECT_M) {
1214		ice_set_state(&sc->state, ICE_STATE_MDD_PENDING);
1215	}
1216
1217	if (oicr & PFINT_OICR_GRST_M) {
1218		u32 reset;
1219
1220		reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
1221			GLGEN_RSTAT_RESET_TYPE_S;
1222
1223		if (reset == ICE_RESET_CORER)
1224			sc->soft_stats.corer_count++;
1225		else if (reset == ICE_RESET_GLOBR)
1226			sc->soft_stats.globr_count++;
1227		else
1228			sc->soft_stats.empr_count++;
1229
1230		/* There are a couple of bits at play for handling resets.
1231		 * First, the ICE_STATE_RESET_OICR_RECV bit is used to
1232		 * indicate that the driver has received an OICR with a reset
1233		 * bit active, indicating that a CORER/GLOBR/EMPR is about to
1234		 * happen. Second, we set hw->reset_ongoing to indicate that
1235		 * the hardware is in reset. We will set this back to false as
1236		 * soon as the driver has determined that the hardware is out
1237		 * of reset.
1238		 *
1239		 * If the driver wishes to trigger a reqest, it can set one of
1240		 * the ICE_STATE_RESET_*_REQ bits, which will trigger the
1241		 * correct type of reset.
1242		 */
1243		if (!ice_testandset_state(&sc->state, ICE_STATE_RESET_OICR_RECV))
1244			hw->reset_ongoing = true;
1245	}
1246
1247	if (oicr & PFINT_OICR_ECC_ERR_M) {
1248		device_printf(dev, "ECC Error detected!\n");
1249		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1250	}
1251
1252	if (oicr & PFINT_OICR_PE_CRITERR_M) {
1253		device_printf(dev, "Critical Protocol Engine Error detected!\n");
1254		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1255	}
1256
1257	if (oicr & PFINT_OICR_PCI_EXCEPTION_M) {
1258		device_printf(dev, "PCI Exception detected!\n");
1259		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1260	}
1261
1262	if (oicr & PFINT_OICR_HMC_ERR_M) {
1263		/* Log the HMC errors, but don't disable the interrupt cause */
1264		ice_log_hmc_error(hw, dev);
1265	}
1266
1267	return (FILTER_SCHEDULE_THREAD);
1268}
1269
1270/**
1271 * ice_allocate_msix - Allocate MSI-X vectors for the interface
1272 * @sc: the device private softc
1273 *
1274 * Map the MSI-X bar, and then request MSI-X vectors in a two-stage process.
1275 *
1276 * First, determine a suitable total number of vectors based on the number
1277 * of CPUs, RSS buckets, the administrative vector, and other demands such as
1278 * RDMA.
1279 *
1280 * Request the desired amount of vectors, and see how many we obtain. If we
1281 * don't obtain as many as desired, reduce the demands by lowering the number
1282 * of requested queues or reducing the demand from other features such as
1283 * RDMA.
1284 *
1285 * @remark This function is required because the driver sets the
1286 * IFLIB_SKIP_MSIX flag indicating that the driver will manage MSI-X vectors
1287 * manually.
1288 *
1289 * @remark This driver will only use MSI-X vectors. If this is not possible,
1290 * neither MSI or legacy interrupts will be tried.
1291 *
1292 * @post on success this function must set the following scctx parameters:
1293 * isc_vectors, isc_nrxqsets, isc_ntxqsets, and isc_intr.
1294 *
1295 * @returns zero on success or an error code on failure.
1296 */
1297static int
1298ice_allocate_msix(struct ice_softc *sc)
1299{
1300	bool iflib_override_queue_count = false;
1301	if_softc_ctx_t scctx = sc->scctx;
1302	device_t dev = sc->dev;
1303	cpuset_t cpus;
1304	int bar, queues, vectors, requested;
1305	int err = 0;
1306
1307	/* Allocate the MSI-X bar */
1308	bar = scctx->isc_msix_bar;
1309	sc->msix_table = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE);
1310	if (!sc->msix_table) {
1311		device_printf(dev, "Unable to map MSI-X table\n");
1312		return (ENOMEM);
1313	}
1314
1315	/* Check if the iflib queue count sysctls have been set */
1316	if (sc->ifc_sysctl_ntxqs || sc->ifc_sysctl_nrxqs)
1317		iflib_override_queue_count = true;
1318
1319	err = bus_get_cpus(dev, INTR_CPUS, sizeof(cpus), &cpus);
1320	if (err) {
1321		device_printf(dev, "%s: Unable to fetch the CPU list: %s\n",
1322			      __func__, ice_err_str(err));
1323		CPU_COPY(&all_cpus, &cpus);
1324	}
1325
1326	/* Attempt to mimic behavior of iflib_msix_init */
1327	if (iflib_override_queue_count) {
1328		/*
1329		 * If the override sysctls have been set, limit the queues to
1330		 * the number of logical CPUs.
1331		 */
1332		queues = mp_ncpus;
1333	} else {
1334		/*
1335		 * Otherwise, limit the queue count to the CPUs associated
1336		 * with the NUMA node the device is associated with.
1337		 */
1338		queues = CPU_COUNT(&cpus);
1339	}
1340
1341	/* Clamp to the number of RSS buckets */
1342	queues = imin(queues, rss_getnumbuckets());
1343
1344	/*
1345	 * Clamp the number of queue pairs to the minimum of the requested Tx
1346	 * and Rx queues.
1347	 */
1348	queues = imin(queues, sc->ifc_sysctl_ntxqs ?: scctx->isc_ntxqsets);
1349	queues = imin(queues, sc->ifc_sysctl_nrxqs ?: scctx->isc_nrxqsets);
1350
1351	/*
1352	 * Determine the number of vectors to request. Note that we also need
1353	 * to allocate one vector for administrative tasks.
1354	 */
1355	requested = queues + 1;
1356
1357	vectors = requested;
1358
1359	err = pci_alloc_msix(dev, &vectors);
1360	if (err) {
1361		device_printf(dev, "Failed to allocate %d MSI-X vectors, err %s\n",
1362			      vectors, ice_err_str(err));
1363		goto err_free_msix_table;
1364	}
1365
1366	/* If we don't receive enough vectors, reduce demands */
1367	if (vectors < requested) {
1368		int diff = requested - vectors;
1369
1370		device_printf(dev, "Requested %d MSI-X vectors, but got only %d\n",
1371			      requested, vectors);
1372
1373		/*
1374		 * If we still have a difference, we need to reduce the number
1375		 * of queue pairs.
1376		 *
1377		 * However, we still need at least one vector for the admin
1378		 * interrupt and one queue pair.
1379		 */
1380		if (queues <= diff) {
1381			device_printf(dev, "Unable to allocate sufficient MSI-X vectors\n");
1382			err = (ERANGE);
1383			goto err_pci_release_msi;
1384		}
1385
1386		queues -= diff;
1387	}
1388
1389	device_printf(dev, "Using %d Tx and Rx queues\n", queues);
1390	device_printf(dev, "Using MSI-X interrupts with %d vectors\n",
1391		      vectors);
1392
1393	scctx->isc_vectors = vectors;
1394	scctx->isc_nrxqsets = queues;
1395	scctx->isc_ntxqsets = queues;
1396	scctx->isc_intr = IFLIB_INTR_MSIX;
1397
1398	/* Interrupt allocation tracking isn't required in recovery mode,
1399	 * since neither RDMA nor VFs are enabled.
1400	 */
1401	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1402		return (0);
1403
1404	/* Keep track of which interrupt indices are being used for what */
1405	sc->lan_vectors = vectors;
1406	err = ice_resmgr_assign_contiguous(&sc->imgr, sc->pf_imap, sc->lan_vectors);
1407	if (err) {
1408		device_printf(dev, "Unable to assign PF interrupt mapping: %s\n",
1409			      ice_err_str(err));
1410		goto err_pci_release_msi;
1411	}
1412
1413	return (0);
1414
1415err_pci_release_msi:
1416	pci_release_msi(dev);
1417err_free_msix_table:
1418	if (sc->msix_table != NULL) {
1419		bus_release_resource(sc->dev, SYS_RES_MEMORY,
1420				rman_get_rid(sc->msix_table),
1421				sc->msix_table);
1422		sc->msix_table = NULL;
1423	}
1424
1425	return (err);
1426}
1427
1428/**
1429 * ice_if_msix_intr_assign - Assign MSI-X interrupt vectors to queues
1430 * @ctx: the iflib context structure
1431 * @msix: the number of vectors we were assigned
1432 *
1433 * Called by iflib to assign MSI-X vectors to queues. Currently requires that
1434 * we get at least the same number of vectors as we have queues, and that we
1435 * always have the same number of Tx and Rx queues.
1436 *
1437 * Tx queues use a softirq instead of using their own hardware interrupt.
1438 */
1439static int
1440ice_if_msix_intr_assign(if_ctx_t ctx, int msix)
1441{
1442	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1443	struct ice_vsi *vsi = &sc->pf_vsi;
1444	int err, i, vector;
1445
1446	ASSERT_CTX_LOCKED(sc);
1447
1448	if (vsi->num_rx_queues != vsi->num_tx_queues) {
1449		device_printf(sc->dev,
1450			      "iflib requested %d Tx queues, and %d Rx queues, but the driver isn't able to support a differing number of Tx and Rx queues\n",
1451			      vsi->num_tx_queues, vsi->num_rx_queues);
1452		return (EOPNOTSUPP);
1453	}
1454
1455	if (msix < (vsi->num_rx_queues + 1)) {
1456		device_printf(sc->dev,
1457			      "Not enough MSI-X vectors to assign one vector to each queue pair\n");
1458		return (EOPNOTSUPP);
1459	}
1460
1461	/* Save the number of vectors for future use */
1462	sc->num_irq_vectors = vsi->num_rx_queues + 1;
1463
1464	/* Allocate space to store the IRQ vector data */
1465	if (!(sc->irqvs =
1466	      (struct ice_irq_vector *) malloc(sizeof(struct ice_irq_vector) * (sc->num_irq_vectors),
1467					       M_ICE, M_NOWAIT))) {
1468		device_printf(sc->dev,
1469			      "Unable to allocate irqv memory\n");
1470		return (ENOMEM);
1471	}
1472
1473	/* Administrative interrupt events will use vector 0 */
1474	err = iflib_irq_alloc_generic(ctx, &sc->irqvs[0].irq, 1, IFLIB_INTR_ADMIN,
1475				      ice_msix_admin, sc, 0, "admin");
1476	if (err) {
1477		device_printf(sc->dev,
1478			      "Failed to register Admin queue handler: %s\n",
1479			      ice_err_str(err));
1480		goto free_irqvs;
1481	}
1482	sc->irqvs[0].me = 0;
1483
1484	/* Do not allocate queue interrupts when in recovery mode */
1485	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1486		return (0);
1487
1488	for (i = 0, vector = 1; i < vsi->num_rx_queues; i++, vector++) {
1489		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1490		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1491		int rid = vector + 1;
1492		char irq_name[16];
1493
1494		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
1495		err = iflib_irq_alloc_generic(ctx, &sc->irqvs[vector].irq, rid,
1496					      IFLIB_INTR_RX, ice_msix_que,
1497					      rxq, rxq->me, irq_name);
1498		if (err) {
1499			device_printf(sc->dev,
1500				      "Failed to allocate q int %d err: %s\n",
1501				      i, ice_err_str(err));
1502			vector--;
1503			i--;
1504			goto fail;
1505		}
1506		sc->irqvs[vector].me = vector;
1507		rxq->irqv = &sc->irqvs[vector];
1508
1509		bzero(irq_name, sizeof(irq_name));
1510
1511		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
1512		iflib_softirq_alloc_generic(ctx, &sc->irqvs[vector].irq,
1513					    IFLIB_INTR_TX, txq,
1514					    txq->me, irq_name);
1515		txq->irqv = &sc->irqvs[vector];
1516	}
1517
1518	return (0);
1519fail:
1520	for (; i >= 0; i--, vector--)
1521		iflib_irq_free(ctx, &sc->irqvs[vector].irq);
1522	iflib_irq_free(ctx, &sc->irqvs[0].irq);
1523free_irqvs:
1524	free(sc->irqvs, M_ICE);
1525	sc->irqvs = NULL;
1526	return err;
1527}
1528
1529/**
1530 * ice_if_mtu_set - Set the device MTU
1531 * @ctx: iflib context structure
1532 * @mtu: the MTU requested
1533 *
1534 * Called by iflib to configure the device's Maximum Transmission Unit (MTU).
1535 *
1536 * @pre assumes the caller holds the iflib CTX lock
1537 */
1538static int
1539ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu)
1540{
1541	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1542
1543	ASSERT_CTX_LOCKED(sc);
1544
1545	/* Do not support configuration when in recovery mode */
1546	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1547		return (ENOSYS);
1548
1549	if (mtu < ICE_MIN_MTU || mtu > ICE_MAX_MTU)
1550		return (EINVAL);
1551
1552	sc->scctx->isc_max_frame_size = mtu +
1553		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
1554
1555	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
1556
1557	return (0);
1558}
1559
1560/**
1561 * ice_if_intr_enable - Enable device interrupts
1562 * @ctx: iflib context structure
1563 *
1564 * Called by iflib to request enabling device interrupts.
1565 */
1566static void
1567ice_if_intr_enable(if_ctx_t ctx)
1568{
1569	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1570	struct ice_vsi *vsi = &sc->pf_vsi;
1571	struct ice_hw *hw = &sc->hw;
1572
1573	ASSERT_CTX_LOCKED(sc);
1574
1575	/* Enable ITR 0 */
1576	ice_enable_intr(hw, sc->irqvs[0].me);
1577
1578	/* Do not enable queue interrupts in recovery mode */
1579	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1580		return;
1581
1582	/* Enable all queue interrupts */
1583	for (int i = 0; i < vsi->num_rx_queues; i++)
1584		ice_enable_intr(hw, vsi->rx_queues[i].irqv->me);
1585}
1586
1587/**
1588 * ice_if_intr_disable - Disable device interrupts
1589 * @ctx: iflib context structure
1590 *
1591 * Called by iflib to request disabling device interrupts.
1592 */
1593static void
1594ice_if_intr_disable(if_ctx_t ctx)
1595{
1596	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1597	struct ice_hw *hw = &sc->hw;
1598	unsigned int i;
1599
1600	ASSERT_CTX_LOCKED(sc);
1601
1602	/* IFDI_INTR_DISABLE may be called prior to interrupts actually being
1603	 * assigned to queues. Instead of assuming that the interrupt
1604	 * assignment in the rx_queues structure is valid, just disable all
1605	 * possible interrupts
1606	 *
1607	 * Note that we choose not to disable ITR 0 because this handles the
1608	 * AdminQ interrupts, and we want to keep processing these even when
1609	 * the interface is offline.
1610	 */
1611	for (i = 1; i < hw->func_caps.common_cap.num_msix_vectors; i++)
1612		ice_disable_intr(hw, i);
1613}
1614
1615/**
1616 * ice_if_rx_queue_intr_enable - Enable a specific Rx queue interrupt
1617 * @ctx: iflib context structure
1618 * @rxqid: the Rx queue to enable
1619 *
1620 * Enable a specific Rx queue interrupt.
1621 *
1622 * This function is not protected by the iflib CTX lock.
1623 */
1624static int
1625ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid)
1626{
1627	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1628	struct ice_vsi *vsi = &sc->pf_vsi;
1629	struct ice_hw *hw = &sc->hw;
1630
1631	/* Do not enable queue interrupts in recovery mode */
1632	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1633		return (ENOSYS);
1634
1635	ice_enable_intr(hw, vsi->rx_queues[rxqid].irqv->me);
1636	return (0);
1637}
1638
1639/**
1640 * ice_if_tx_queue_intr_enable - Enable a specific Tx queue interrupt
1641 * @ctx: iflib context structure
1642 * @txqid: the Tx queue to enable
1643 *
1644 * Enable a specific Tx queue interrupt.
1645 *
1646 * This function is not protected by the iflib CTX lock.
1647 */
1648static int
1649ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid)
1650{
1651	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1652	struct ice_vsi *vsi = &sc->pf_vsi;
1653	struct ice_hw *hw = &sc->hw;
1654
1655	/* Do not enable queue interrupts in recovery mode */
1656	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1657		return (ENOSYS);
1658
1659	ice_enable_intr(hw, vsi->tx_queues[txqid].irqv->me);
1660	return (0);
1661}
1662
1663/**
1664 * ice_if_promisc_set - Set device promiscuous mode
1665 * @ctx: iflib context structure
1666 * @flags: promiscuous flags to configure
1667 *
1668 * Called by iflib to configure device promiscuous mode.
1669 *
1670 * @remark Calls to this function will always overwrite the previous setting
1671 */
1672static int
1673ice_if_promisc_set(if_ctx_t ctx, int flags)
1674{
1675	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1676	struct ice_hw *hw = &sc->hw;
1677	device_t dev = sc->dev;
1678	enum ice_status status;
1679	bool promisc_enable = flags & IFF_PROMISC;
1680	bool multi_enable = flags & IFF_ALLMULTI;
1681
1682	/* Do not support configuration when in recovery mode */
1683	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1684		return (ENOSYS);
1685
1686	if (multi_enable)
1687		return (EOPNOTSUPP);
1688
1689	if (promisc_enable) {
1690		status = ice_set_vsi_promisc(hw, sc->pf_vsi.idx,
1691					     ICE_VSI_PROMISC_MASK, 0);
1692		if (status && status != ICE_ERR_ALREADY_EXISTS) {
1693			device_printf(dev,
1694				      "Failed to enable promiscuous mode for PF VSI, err %s aq_err %s\n",
1695				      ice_status_str(status),
1696				      ice_aq_str(hw->adminq.sq_last_status));
1697			return (EIO);
1698		}
1699	} else {
1700		status = ice_clear_vsi_promisc(hw, sc->pf_vsi.idx,
1701					       ICE_VSI_PROMISC_MASK, 0);
1702		if (status) {
1703			device_printf(dev,
1704				      "Failed to disable promiscuous mode for PF VSI, err %s aq_err %s\n",
1705				      ice_status_str(status),
1706				      ice_aq_str(hw->adminq.sq_last_status));
1707			return (EIO);
1708		}
1709	}
1710
1711	return (0);
1712}
1713
1714/**
1715 * ice_if_media_change - Change device media
1716 * @ctx: device ctx structure
1717 *
1718 * Called by iflib when a media change is requested. This operation is not
1719 * supported by the hardware, so we just return an error code.
1720 */
1721static int
1722ice_if_media_change(if_ctx_t ctx)
1723{
1724	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1725
1726	device_printf(sc->dev, "Media change is not supported.\n");
1727	return (ENODEV);
1728}
1729
1730/**
1731 * ice_if_media_status - Report current device media
1732 * @ctx: iflib context structure
1733 * @ifmr: ifmedia request structure to update
1734 *
1735 * Updates the provided ifmr with current device media status, including link
1736 * status and media type.
1737 */
1738static void
1739ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr)
1740{
1741	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1742	struct ice_link_status *li = &sc->hw.port_info->phy.link_info;
1743
1744	ifmr->ifm_status = IFM_AVALID;
1745	ifmr->ifm_active = IFM_ETHER;
1746
1747	/* Never report link up or media types when in recovery mode */
1748	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1749		return;
1750
1751	if (!sc->link_up)
1752		return;
1753
1754	ifmr->ifm_status |= IFM_ACTIVE;
1755	ifmr->ifm_active |= IFM_FDX;
1756
1757	if (li->phy_type_low)
1758		ifmr->ifm_active |= ice_get_phy_type_low(li->phy_type_low);
1759	else if (li->phy_type_high)
1760		ifmr->ifm_active |= ice_get_phy_type_high(li->phy_type_high);
1761	else
1762		ifmr->ifm_active |= IFM_UNKNOWN;
1763
1764	/* Report flow control status as well */
1765	if (li->an_info & ICE_AQ_LINK_PAUSE_TX)
1766		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
1767	if (li->an_info & ICE_AQ_LINK_PAUSE_RX)
1768		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
1769}
1770
1771/**
1772 * ice_init_tx_tracking - Initialize Tx queue software tracking values
1773 * @vsi: the VSI to initialize
1774 *
1775 * Initialize Tx queue software tracking values, including the Report Status
1776 * queue, and related software tracking values.
1777 */
1778static void
1779ice_init_tx_tracking(struct ice_vsi *vsi)
1780{
1781	struct ice_tx_queue *txq;
1782	size_t j;
1783	int i;
1784
1785	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1786
1787		txq->tx_rs_cidx = txq->tx_rs_pidx = 0;
1788
1789		/* Initialize the last processed descriptor to be the end of
1790		 * the ring, rather than the start, so that we avoid an
1791		 * off-by-one error in ice_ift_txd_credits_update for the
1792		 * first packet.
1793		 */
1794		txq->tx_cidx_processed = txq->desc_count - 1;
1795
1796		for (j = 0; j < txq->desc_count; j++)
1797			txq->tx_rsq[j] = QIDX_INVALID;
1798	}
1799}
1800
1801/**
1802 * ice_update_rx_mbuf_sz - Update the Rx buffer size for all queues
1803 * @sc: the device softc
1804 *
1805 * Called to update the Rx queue mbuf_sz parameter for configuring the receive
1806 * buffer sizes when programming hardware.
1807 */
1808static void
1809ice_update_rx_mbuf_sz(struct ice_softc *sc)
1810{
1811	uint32_t mbuf_sz = iflib_get_rx_mbuf_sz(sc->ctx);
1812	struct ice_vsi *vsi = &sc->pf_vsi;
1813
1814	MPASS(mbuf_sz <= UINT16_MAX);
1815	vsi->mbuf_sz = mbuf_sz;
1816}
1817
1818/**
1819 * ice_if_init - Initialize the device
1820 * @ctx: iflib ctx structure
1821 *
1822 * Called by iflib to bring the device up, i.e. ifconfig ice0 up. Initializes
1823 * device filters and prepares the Tx and Rx engines.
1824 *
1825 * @pre assumes the caller holds the iflib CTX lock
1826 */
1827static void
1828ice_if_init(if_ctx_t ctx)
1829{
1830	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1831	device_t dev = sc->dev;
1832	int err;
1833
1834	ASSERT_CTX_LOCKED(sc);
1835
1836	/*
1837	 * We've seen an issue with 11.3/12.1 where sideband routines are
1838	 * called after detach is called.  This would call routines after
1839	 * if_stop, causing issues with the teardown process.  This has
1840	 * seemingly been fixed in STABLE snapshots, but it seems like a
1841	 * good idea to have this guard here regardless.
1842	 */
1843	if (ice_driver_is_detaching(sc))
1844		return;
1845
1846	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1847		return;
1848
1849	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
1850		device_printf(sc->dev, "request to start interface cannot be completed as the device failed to reset\n");
1851		return;
1852	}
1853
1854	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
1855		device_printf(sc->dev, "request to start interface while device is prepared for impending reset\n");
1856		return;
1857	}
1858
1859	ice_update_rx_mbuf_sz(sc);
1860
1861	/* Update the MAC address... User might use a LAA */
1862	err = ice_update_laa_mac(sc);
1863	if (err) {
1864		device_printf(dev,
1865			      "LAA address change failed, err %s\n",
1866			      ice_err_str(err));
1867		return;
1868	}
1869
1870	/* Initialize software Tx tracking values */
1871	ice_init_tx_tracking(&sc->pf_vsi);
1872
1873	err = ice_cfg_vsi_for_tx(&sc->pf_vsi);
1874	if (err) {
1875		device_printf(dev,
1876			      "Unable to configure the main VSI for Tx: %s\n",
1877			      ice_err_str(err));
1878		return;
1879	}
1880
1881	err = ice_cfg_vsi_for_rx(&sc->pf_vsi);
1882	if (err) {
1883		device_printf(dev,
1884			      "Unable to configure the main VSI for Rx: %s\n",
1885			      ice_err_str(err));
1886		goto err_cleanup_tx;
1887	}
1888
1889	err = ice_control_rx_queues(&sc->pf_vsi, true);
1890	if (err) {
1891		device_printf(dev,
1892			      "Unable to enable Rx rings for transmit: %s\n",
1893			      ice_err_str(err));
1894		goto err_cleanup_tx;
1895	}
1896
1897	err = ice_cfg_pf_default_mac_filters(sc);
1898	if (err) {
1899		device_printf(dev,
1900			      "Unable to configure default MAC filters: %s\n",
1901			      ice_err_str(err));
1902		goto err_stop_rx;
1903	}
1904
1905	/* We use software interrupts for Tx, so we only program the hardware
1906	 * interrupts for Rx.
1907	 */
1908	ice_configure_rxq_interrupts(&sc->pf_vsi);
1909	ice_configure_rx_itr(&sc->pf_vsi);
1910
1911	/* Configure promiscuous mode */
1912	ice_if_promisc_set(ctx, if_getflags(sc->ifp));
1913
1914	ice_set_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED);
1915	return;
1916
1917err_stop_rx:
1918	ice_control_rx_queues(&sc->pf_vsi, false);
1919err_cleanup_tx:
1920	ice_vsi_disable_tx(&sc->pf_vsi);
1921}
1922
1923/**
1924 * ice_poll_for_media_avail - Re-enable link if media is detected
1925 * @sc: device private structure
1926 *
1927 * Intended to be called from the driver's timer function, this function
1928 * sends the Get Link Status AQ command and re-enables HW link if the
1929 * command says that media is available.
1930 *
1931 * If the driver doesn't have the "NO_MEDIA" state set, then this does nothing,
1932 * since media removal events are supposed to be sent to the driver through
1933 * a link status event.
1934 */
1935static void
1936ice_poll_for_media_avail(struct ice_softc *sc)
1937{
1938	struct ice_hw *hw = &sc->hw;
1939	struct ice_port_info *pi = hw->port_info;
1940
1941	if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA)) {
1942		pi->phy.get_link_info = true;
1943		ice_get_link_status(pi, &sc->link_up);
1944
1945		if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
1946			enum ice_status status;
1947
1948			/* Re-enable link and re-apply user link settings */
1949			ice_apply_saved_phy_cfg(sc);
1950
1951			/* Update the OS about changes in media capability */
1952			status = ice_add_media_types(sc, sc->media);
1953			if (status)
1954				device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n",
1955					      ice_status_str(status),
1956					      ice_aq_str(hw->adminq.sq_last_status));
1957
1958			ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA);
1959		}
1960	}
1961}
1962
1963/**
1964 * ice_if_timer - called by iflib periodically
1965 * @ctx: iflib ctx structure
1966 * @qid: the queue this timer was called for
1967 *
1968 * This callback is triggered by iflib periodically. We use it to update the
1969 * hw statistics.
1970 *
1971 * @remark this function is not protected by the iflib CTX lock.
1972 */
1973static void
1974ice_if_timer(if_ctx_t ctx, uint16_t qid)
1975{
1976	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1977	uint64_t prev_link_xoff_rx = sc->stats.cur.link_xoff_rx;
1978
1979	if (qid != 0)
1980		return;
1981
1982	/* Do not attempt to update stats when in recovery mode */
1983	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1984		return;
1985
1986	/* Update device statistics */
1987	ice_update_pf_stats(sc);
1988
1989	/*
1990	 * For proper watchdog management, the iflib stack needs to know if
1991	 * we've been paused during the last interval. Check if the
1992	 * link_xoff_rx stat changed, and set the isc_pause_frames, if so.
1993	 */
1994	if (sc->stats.cur.link_xoff_rx != prev_link_xoff_rx)
1995		sc->scctx->isc_pause_frames = 1;
1996
1997	/* Update the primary VSI stats */
1998	ice_update_vsi_hw_stats(&sc->pf_vsi);
1999}
2000
2001/**
2002 * ice_admin_timer - called periodically to trigger the admin task
2003 * @arg: callout(9) argument pointing to the device private softc structure
2004 *
2005 * Timer function used as part of a callout(9) timer that will periodically
2006 * trigger the admin task, even when the interface is down.
2007 *
2008 * @remark this function is not called by iflib and is not protected by the
2009 * iflib CTX lock.
2010 *
2011 * @remark because this is a callout function, it cannot sleep and should not
2012 * attempt taking the iflib CTX lock.
2013 */
2014static void
2015ice_admin_timer(void *arg)
2016{
2017	struct ice_softc *sc = (struct ice_softc *)arg;
2018
2019	/* Fire off the admin task */
2020	iflib_admin_intr_deferred(sc->ctx);
2021
2022	/* Reschedule the admin timer */
2023	callout_schedule(&sc->admin_timer, hz/2);
2024}
2025
2026/**
2027 * ice_transition_recovery_mode - Transition to recovery mode
2028 * @sc: the device private softc
2029 *
2030 * Called when the driver detects that the firmware has entered recovery mode
2031 * at run time.
2032 */
2033static void
2034ice_transition_recovery_mode(struct ice_softc *sc)
2035{
2036	struct ice_vsi *vsi = &sc->pf_vsi;
2037	int i;
2038
2039	device_printf(sc->dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
2040
2041	/* Tell the stack that the link has gone down */
2042	iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
2043
2044	/* Request that the device be re-initialized */
2045	ice_request_stack_reinit(sc);
2046
2047	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2048	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2049
2050	ice_vsi_del_txqs_ctx(vsi);
2051	ice_vsi_del_rxqs_ctx(vsi);
2052
2053	for (i = 0; i < sc->num_available_vsi; i++) {
2054		if (sc->all_vsi[i])
2055			ice_release_vsi(sc->all_vsi[i]);
2056	}
2057	sc->num_available_vsi = 0;
2058
2059	if (sc->all_vsi) {
2060		free(sc->all_vsi, M_ICE);
2061		sc->all_vsi = NULL;
2062	}
2063
2064	/* Destroy the interrupt manager */
2065	ice_resmgr_destroy(&sc->imgr);
2066	/* Destroy the queue managers */
2067	ice_resmgr_destroy(&sc->tx_qmgr);
2068	ice_resmgr_destroy(&sc->rx_qmgr);
2069
2070	ice_deinit_hw(&sc->hw);
2071}
2072
2073/**
2074 * ice_transition_safe_mode - Transition to safe mode
2075 * @sc: the device private softc
2076 *
2077 * Called when the driver attempts to reload the DDP package during a device
2078 * reset, and the new download fails. If so, we must transition to safe mode
2079 * at run time.
2080 *
2081 * @remark although safe mode normally allocates only a single queue, we can't
2082 * change the number of queues dynamically when using iflib. Due to this, we
2083 * do not attempt to reduce the number of queues.
2084 */
2085static void
2086ice_transition_safe_mode(struct ice_softc *sc)
2087{
2088	/* Indicate that we are in Safe mode */
2089	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap);
2090	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en);
2091
2092	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2093	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2094
2095	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2096	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_en);
2097}
2098
2099/**
2100 * ice_if_update_admin_status - update admin status
2101 * @ctx: iflib ctx structure
2102 *
2103 * Called by iflib to update the admin status. For our purposes, this means
2104 * check the adminq, and update the link status. It's ultimately triggered by
2105 * our admin interrupt, or by the ice_if_timer periodically.
2106 *
2107 * @pre assumes the caller holds the iflib CTX lock
2108 */
2109static void
2110ice_if_update_admin_status(if_ctx_t ctx)
2111{
2112	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2113	enum ice_fw_modes fw_mode;
2114	bool reschedule = false;
2115	u16 pending = 0;
2116
2117	ASSERT_CTX_LOCKED(sc);
2118
2119	/* Check if the firmware entered recovery mode at run time */
2120	fw_mode = ice_get_fw_mode(&sc->hw);
2121	if (fw_mode == ICE_FW_MODE_REC) {
2122		if (!ice_testandset_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2123			/* If we just entered recovery mode, log a warning to
2124			 * the system administrator and deinit driver state
2125			 * that is no longer functional.
2126			 */
2127			ice_transition_recovery_mode(sc);
2128		}
2129	} else if (fw_mode == ICE_FW_MODE_ROLLBACK) {
2130		if (!ice_testandset_state(&sc->state, ICE_STATE_ROLLBACK_MODE)) {
2131			/* Rollback mode isn't fatal, but we don't want to
2132			 * repeatedly post a message about it.
2133			 */
2134			ice_print_rollback_msg(&sc->hw);
2135		}
2136	}
2137
2138	/* Handle global reset events */
2139	ice_handle_reset_event(sc);
2140
2141	/* Handle PF reset requests */
2142	ice_handle_pf_reset_request(sc);
2143
2144	/* Handle MDD events */
2145	ice_handle_mdd_event(sc);
2146
2147	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED) ||
2148	    ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET) ||
2149	    ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2150		/*
2151		 * If we know the control queues are disabled, skip processing
2152		 * the control queues entirely.
2153		 */
2154		;
2155	} else if (ice_testandclear_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING)) {
2156		ice_process_ctrlq(sc, ICE_CTL_Q_ADMIN, &pending);
2157		if (pending > 0)
2158			reschedule = true;
2159
2160		ice_process_ctrlq(sc, ICE_CTL_Q_MAILBOX, &pending);
2161		if (pending > 0)
2162			reschedule = true;
2163	}
2164
2165	/* Poll for link up */
2166	ice_poll_for_media_avail(sc);
2167
2168	/* Check and update link status */
2169	ice_update_link_status(sc, false);
2170
2171	/*
2172	 * If there are still messages to process, we need to reschedule
2173	 * ourselves. Otherwise, we can just re-enable the interrupt. We'll be
2174	 * woken up at the next interrupt or timer event.
2175	 */
2176	if (reschedule) {
2177		ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
2178		iflib_admin_intr_deferred(ctx);
2179	} else {
2180		ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2181	}
2182}
2183
2184/**
2185 * ice_prepare_for_reset - Prepare device for an impending reset
2186 * @sc: The device private softc
2187 *
2188 * Prepare the driver for an impending reset, shutting down VSIs, clearing the
2189 * scheduler setup, and shutting down controlqs. Uses the
2190 * ICE_STATE_PREPARED_FOR_RESET to indicate whether we've already prepared the
2191 * driver for reset or not.
2192 */
2193static void
2194ice_prepare_for_reset(struct ice_softc *sc)
2195{
2196	struct ice_hw *hw = &sc->hw;
2197
2198	/* If we're already prepared, there's nothing to do */
2199	if (ice_testandset_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET))
2200		return;
2201
2202	log(LOG_INFO, "%s: preparing to reset device logic\n", sc->ifp->if_xname);
2203
2204	/* In recovery mode, hardware is not initialized */
2205	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2206		return;
2207
2208	/* Release the main PF VSI queue mappings */
2209	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2210				    sc->pf_vsi.num_tx_queues);
2211	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2212				    sc->pf_vsi.num_rx_queues);
2213
2214	ice_clear_hw_tbls(hw);
2215
2216	if (hw->port_info)
2217		ice_sched_clear_port(hw->port_info);
2218
2219	ice_shutdown_all_ctrlq(hw);
2220}
2221
2222/**
2223 * ice_rebuild_pf_vsi_qmap - Rebuild the main PF VSI queue mapping
2224 * @sc: the device softc pointer
2225 *
2226 * Loops over the Tx and Rx queues for the main PF VSI and reassigns the queue
2227 * mapping after a reset occurred.
2228 */
2229static int
2230ice_rebuild_pf_vsi_qmap(struct ice_softc *sc)
2231{
2232	struct ice_vsi *vsi = &sc->pf_vsi;
2233	struct ice_tx_queue *txq;
2234	struct ice_rx_queue *rxq;
2235	int err, i;
2236
2237	/* Re-assign Tx queues from PF space to the main VSI */
2238	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap,
2239					    vsi->num_tx_queues);
2240	if (err) {
2241		device_printf(sc->dev, "Unable to re-assign PF Tx queues: %s\n",
2242			      ice_err_str(err));
2243		return (err);
2244	}
2245
2246	/* Re-assign Rx queues from PF space to this VSI */
2247	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap,
2248					    vsi->num_rx_queues);
2249	if (err) {
2250		device_printf(sc->dev, "Unable to re-assign PF Rx queues: %s\n",
2251			      ice_err_str(err));
2252		goto err_release_tx_queues;
2253	}
2254
2255	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
2256
2257	/* Re-assign Tx queue tail pointers */
2258	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++)
2259		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
2260
2261	/* Re-assign Rx queue tail pointers */
2262	for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++)
2263		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
2264
2265	return (0);
2266
2267err_release_tx_queues:
2268	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2269				   sc->pf_vsi.num_tx_queues);
2270
2271	return (err);
2272}
2273
2274/* determine if the iflib context is active */
2275#define CTX_ACTIVE(ctx) ((if_getdrvflags(iflib_get_ifp(ctx)) & IFF_DRV_RUNNING))
2276
2277/**
2278 * ice_rebuild_recovery_mode - Rebuild driver state while in recovery mode
2279 * @sc: The device private softc
2280 *
2281 * Handle a driver rebuild while in recovery mode. This will only rebuild the
2282 * limited functionality supported while in recovery mode.
2283 */
2284static void
2285ice_rebuild_recovery_mode(struct ice_softc *sc)
2286{
2287	device_t dev = sc->dev;
2288
2289	/* enable PCIe bus master */
2290	pci_enable_busmaster(dev);
2291
2292	/* Configure interrupt causes for the administrative interrupt */
2293	ice_configure_misc_interrupts(sc);
2294
2295	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2296	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2297
2298	/* Now that the rebuild is finished, we're no longer prepared to reset */
2299	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2300
2301	log(LOG_INFO, "%s: device rebuild successful\n", sc->ifp->if_xname);
2302
2303	/* In order to completely restore device functionality, the iflib core
2304	 * needs to be reset. We need to request an iflib reset. Additionally,
2305	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2306	 * the iflib core, we also want re-run the admin task so that iflib
2307	 * resets immediately instead of waiting for the next interrupt.
2308	 */
2309	ice_request_stack_reinit(sc);
2310
2311	return;
2312}
2313
2314/**
2315 * ice_rebuild - Rebuild driver state post reset
2316 * @sc: The device private softc
2317 *
2318 * Restore driver state after a reset occurred. Restart the controlqs, setup
2319 * the hardware port, and re-enable the VSIs.
2320 */
2321static void
2322ice_rebuild(struct ice_softc *sc)
2323{
2324	struct ice_hw *hw = &sc->hw;
2325	device_t dev = sc->dev;
2326	enum ice_status status;
2327	int err;
2328
2329	sc->rebuild_ticks = ticks;
2330
2331	/* If we're rebuilding, then a reset has succeeded. */
2332	ice_clear_state(&sc->state, ICE_STATE_RESET_FAILED);
2333
2334	/*
2335	 * If the firmware is in recovery mode, only restore the limited
2336	 * functionality supported by recovery mode.
2337	 */
2338	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2339		ice_rebuild_recovery_mode(sc);
2340		return;
2341	}
2342
2343	/* enable PCIe bus master */
2344	pci_enable_busmaster(dev);
2345
2346	status = ice_init_all_ctrlq(hw);
2347	if (status) {
2348		device_printf(dev, "failed to re-init controlqs, err %s\n",
2349			      ice_status_str(status));
2350		goto err_shutdown_ctrlq;
2351	}
2352
2353	/* Query the allocated resources for Tx scheduler */
2354	status = ice_sched_query_res_alloc(hw);
2355	if (status) {
2356		device_printf(dev,
2357			      "Failed to query scheduler resources, err %s aq_err %s\n",
2358			      ice_status_str(status),
2359			      ice_aq_str(hw->adminq.sq_last_status));
2360		goto err_shutdown_ctrlq;
2361	}
2362
2363	err = ice_send_version(sc);
2364	if (err)
2365		goto err_shutdown_ctrlq;
2366
2367	err = ice_init_link_events(sc);
2368	if (err) {
2369		device_printf(dev, "ice_init_link_events failed: %s\n",
2370			      ice_err_str(err));
2371		goto err_shutdown_ctrlq;
2372	}
2373
2374	status = ice_clear_pf_cfg(hw);
2375	if (status) {
2376		device_printf(dev, "failed to clear PF configuration, err %s\n",
2377			      ice_status_str(status));
2378		goto err_shutdown_ctrlq;
2379	}
2380
2381	ice_clear_pxe_mode(hw);
2382
2383	status = ice_get_caps(hw);
2384	if (status) {
2385		device_printf(dev, "failed to get capabilities, err %s\n",
2386			      ice_status_str(status));
2387		goto err_shutdown_ctrlq;
2388	}
2389
2390	status = ice_sched_init_port(hw->port_info);
2391	if (status) {
2392		device_printf(dev, "failed to initialize port, err %s\n",
2393			      ice_status_str(status));
2394		goto err_sched_cleanup;
2395	}
2396
2397	/* If we previously loaded the package, it needs to be reloaded now */
2398	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE)) {
2399		status = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size);
2400		if (status) {
2401			ice_log_pkg_init(sc, &status);
2402
2403			ice_transition_safe_mode(sc);
2404		}
2405	}
2406
2407	ice_reset_pf_stats(sc);
2408
2409	err = ice_rebuild_pf_vsi_qmap(sc);
2410	if (err) {
2411		device_printf(sc->dev, "Unable to re-assign main VSI queues, err %s\n",
2412			      ice_err_str(err));
2413		goto err_sched_cleanup;
2414	}
2415	err = ice_initialize_vsi(&sc->pf_vsi);
2416	if (err) {
2417		device_printf(sc->dev, "Unable to re-initialize Main VSI, err %s\n",
2418			      ice_err_str(err));
2419		goto err_release_queue_allocations;
2420	}
2421
2422	/* Replay all VSI configuration */
2423	err = ice_replay_all_vsi_cfg(sc);
2424	if (err)
2425		goto err_deinit_pf_vsi;
2426
2427	/* Reconfigure the main PF VSI for RSS */
2428	err = ice_config_rss(&sc->pf_vsi);
2429	if (err) {
2430		device_printf(sc->dev,
2431			      "Unable to reconfigure RSS for the main VSI, err %s\n",
2432			      ice_err_str(err));
2433		goto err_deinit_pf_vsi;
2434	}
2435
2436	/* Refresh link status */
2437	ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED);
2438	sc->hw.port_info->phy.get_link_info = true;
2439	ice_get_link_status(sc->hw.port_info, &sc->link_up);
2440	ice_update_link_status(sc, true);
2441
2442	/* Configure interrupt causes for the administrative interrupt */
2443	ice_configure_misc_interrupts(sc);
2444
2445	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2446	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2447
2448	/* Now that the rebuild is finished, we're no longer prepared to reset */
2449	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2450
2451	log(LOG_INFO, "%s: device rebuild successful\n", sc->ifp->if_xname);
2452
2453	/* In order to completely restore device functionality, the iflib core
2454	 * needs to be reset. We need to request an iflib reset. Additionally,
2455	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2456	 * the iflib core, we also want re-run the admin task so that iflib
2457	 * resets immediately instead of waiting for the next interrupt.
2458	 */
2459	ice_request_stack_reinit(sc);
2460
2461	return;
2462
2463err_deinit_pf_vsi:
2464	ice_deinit_vsi(&sc->pf_vsi);
2465err_release_queue_allocations:
2466	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2467				    sc->pf_vsi.num_tx_queues);
2468	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2469				    sc->pf_vsi.num_rx_queues);
2470err_sched_cleanup:
2471	ice_sched_cleanup_all(hw);
2472err_shutdown_ctrlq:
2473	ice_shutdown_all_ctrlq(hw);
2474	ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2475	device_printf(dev, "Driver rebuild failed, please reload the device driver\n");
2476}
2477
2478/**
2479 * ice_handle_reset_event - Handle reset events triggered by OICR
2480 * @sc: The device private softc
2481 *
2482 * Handle reset events triggered by an OICR notification. This includes CORER,
2483 * GLOBR, and EMPR resets triggered by software on this or any other PF or by
2484 * firmware.
2485 *
2486 * @pre assumes the iflib context lock is held, and will unlock it while
2487 * waiting for the hardware to finish reset.
2488 */
2489static void
2490ice_handle_reset_event(struct ice_softc *sc)
2491{
2492	struct ice_hw *hw = &sc->hw;
2493	enum ice_status status;
2494	device_t dev = sc->dev;
2495
2496	/* When a CORER, GLOBR, or EMPR is about to happen, the hardware will
2497	 * trigger an OICR interrupt. Our OICR handler will determine when
2498	 * this occurs and set the ICE_STATE_RESET_OICR_RECV bit as
2499	 * appropriate.
2500	 */
2501	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_OICR_RECV))
2502		return;
2503
2504	ice_prepare_for_reset(sc);
2505
2506	/*
2507	 * Release the iflib context lock and wait for the device to finish
2508	 * resetting.
2509	 */
2510	IFLIB_CTX_UNLOCK(sc);
2511	status = ice_check_reset(hw);
2512	IFLIB_CTX_LOCK(sc);
2513	if (status) {
2514		device_printf(dev, "Device never came out of reset, err %s\n",
2515			      ice_status_str(status));
2516		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2517		return;
2518	}
2519
2520	/* We're done with the reset, so we can rebuild driver state */
2521	sc->hw.reset_ongoing = false;
2522	ice_rebuild(sc);
2523
2524	/* In the unlikely event that a PF reset request occurs at the same
2525	 * time as a global reset, clear the request now. This avoids
2526	 * resetting a second time right after we reset due to a global event.
2527	 */
2528	if (ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2529		device_printf(dev, "Ignoring PFR request that occurred while a reset was ongoing\n");
2530}
2531
2532/**
2533 * ice_handle_pf_reset_request - Initiate PF reset requested by software
2534 * @sc: The device private softc
2535 *
2536 * Initiate a PF reset requested by software. We handle this in the admin task
2537 * so that only one thread actually handles driver preparation and cleanup,
2538 * rather than having multiple threads possibly attempt to run this code
2539 * simultaneously.
2540 *
2541 * @pre assumes the iflib context lock is held and will unlock it while
2542 * waiting for the PF reset to complete.
2543 */
2544static void
2545ice_handle_pf_reset_request(struct ice_softc *sc)
2546{
2547	struct ice_hw *hw = &sc->hw;
2548	enum ice_status status;
2549
2550	/* Check for PF reset requests */
2551	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2552		return;
2553
2554	/* Make sure we're prepared for reset */
2555	ice_prepare_for_reset(sc);
2556
2557	/*
2558	 * Release the iflib context lock and wait for the device to finish
2559	 * resetting.
2560	 */
2561	IFLIB_CTX_UNLOCK(sc);
2562	status = ice_reset(hw, ICE_RESET_PFR);
2563	IFLIB_CTX_LOCK(sc);
2564	if (status) {
2565		device_printf(sc->dev, "device PF reset failed, err %s\n",
2566			      ice_status_str(status));
2567		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2568		return;
2569	}
2570
2571	sc->soft_stats.pfr_count++;
2572	ice_rebuild(sc);
2573}
2574
2575/**
2576 * ice_init_device_features - Init device driver features
2577 * @sc: driver softc structure
2578 *
2579 * @pre assumes that the function capabilities bits have been set up by
2580 * ice_init_hw().
2581 */
2582static void
2583ice_init_device_features(struct ice_softc *sc)
2584{
2585	/*
2586	 * A failed pkg file download triggers safe mode, disabling advanced
2587	 * device feature support
2588	 */
2589	if (ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE))
2590		return;
2591
2592	/* Set capabilities that all devices support */
2593	ice_set_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2594	ice_set_bit(ICE_FEATURE_RSS, sc->feat_cap);
2595	ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_cap);
2596	ice_set_bit(ICE_FEATURE_DEFAULT_OVERRIDE, sc->feat_cap);
2597
2598	/* Disable features due to hardware limitations... */
2599	if (!sc->hw.func_caps.common_cap.rss_table_size)
2600		ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2601
2602	/* Disable capabilities not supported by the OS */
2603	ice_disable_unsupported_features(sc->feat_cap);
2604
2605	/* RSS is always enabled for iflib */
2606	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RSS))
2607		ice_set_bit(ICE_FEATURE_RSS, sc->feat_en);
2608}
2609
2610/**
2611 * ice_if_multi_set - Callback to update Multicast filters in HW
2612 * @ctx: iflib ctx structure
2613 *
2614 * Called by iflib in response to SIOCDELMULTI and SIOCADDMULTI. Must search
2615 * the if_multiaddrs list and determine which filters have been added or
2616 * removed from the list, and update HW programming to reflect the new list.
2617 *
2618 * @pre assumes the caller holds the iflib CTX lock
2619 */
2620static void
2621ice_if_multi_set(if_ctx_t ctx)
2622{
2623	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2624	int err;
2625
2626	ASSERT_CTX_LOCKED(sc);
2627
2628	/* Do not handle multicast configuration in recovery mode */
2629	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2630		return;
2631
2632	err = ice_sync_multicast_filters(sc);
2633	if (err) {
2634		device_printf(sc->dev,
2635			      "Failed to synchronize multicast filter list: %s\n",
2636			      ice_err_str(err));
2637		return;
2638	}
2639}
2640
2641/**
2642 * ice_if_vlan_register - Register a VLAN with the hardware
2643 * @ctx: iflib ctx pointer
2644 * @vtag: VLAN to add
2645 *
2646 * Programs the main PF VSI with a hardware filter for the given VLAN.
2647 *
2648 * @pre assumes the caller holds the iflib CTX lock
2649 */
2650static void
2651ice_if_vlan_register(if_ctx_t ctx, u16 vtag)
2652{
2653	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2654	enum ice_status status;
2655
2656	ASSERT_CTX_LOCKED(sc);
2657
2658	/* Do not handle VLAN configuration in recovery mode */
2659	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2660		return;
2661
2662	status = ice_add_vlan_hw_filter(&sc->pf_vsi, vtag);
2663	if (status) {
2664		device_printf(sc->dev,
2665			      "Failure adding VLAN %d to main VSI, err %s aq_err %s\n",
2666			      vtag, ice_status_str(status),
2667			      ice_aq_str(sc->hw.adminq.sq_last_status));
2668	}
2669}
2670
2671/**
2672 * ice_if_vlan_unregister - Remove a VLAN filter from the hardware
2673 * @ctx: iflib ctx pointer
2674 * @vtag: VLAN to add
2675 *
2676 * Removes the previously programmed VLAN filter from the main PF VSI.
2677 *
2678 * @pre assumes the caller holds the iflib CTX lock
2679 */
2680static void
2681ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag)
2682{
2683	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2684	enum ice_status status;
2685
2686	ASSERT_CTX_LOCKED(sc);
2687
2688	/* Do not handle VLAN configuration in recovery mode */
2689	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2690		return;
2691
2692	status = ice_remove_vlan_hw_filter(&sc->pf_vsi, vtag);
2693	if (status) {
2694		device_printf(sc->dev,
2695			      "Failure removing VLAN %d from main VSI, err %s aq_err %s\n",
2696			      vtag, ice_status_str(status),
2697			      ice_aq_str(sc->hw.adminq.sq_last_status));
2698	}
2699}
2700
2701/**
2702 * ice_if_stop - Stop the device
2703 * @ctx: iflib context structure
2704 *
2705 * Called by iflib to stop the device and bring it down. (i.e. ifconfig ice0
2706 * down)
2707 *
2708 * @pre assumes the caller holds the iflib CTX lock
2709 */
2710static void
2711ice_if_stop(if_ctx_t ctx)
2712{
2713	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2714
2715	ASSERT_CTX_LOCKED(sc);
2716
2717	/*
2718	 * The iflib core may call IFDI_STOP prior to the first call to
2719	 * IFDI_INIT. This will cause us to attempt to remove MAC filters we
2720	 * don't have, and disable Tx queues which aren't yet configured.
2721	 * Although it is likely these extra operations are harmless, they do
2722	 * cause spurious warning messages to be displayed, which may confuse
2723	 * users.
2724	 *
2725	 * To avoid these messages, we use a state bit indicating if we've
2726	 * been initialized. It will be set when ice_if_init is called, and
2727	 * cleared here in ice_if_stop.
2728	 */
2729	if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
2730		return;
2731
2732	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
2733		device_printf(sc->dev, "request to stop interface cannot be completed as the device failed to reset\n");
2734		return;
2735	}
2736
2737	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
2738		device_printf(sc->dev, "request to stop interface while device is prepared for impending reset\n");
2739		return;
2740	}
2741
2742	/* Remove the MAC filters, stop Tx, and stop Rx. We don't check the
2743	 * return of these functions because there's nothing we can really do
2744	 * if they fail, and the functions already print error messages.
2745	 * Just try to shut down as much as we can.
2746	 */
2747	ice_rm_pf_default_mac_filters(sc);
2748
2749	/* Dissociate the Tx and Rx queues from the interrupts */
2750	ice_flush_txq_interrupts(&sc->pf_vsi);
2751	ice_flush_rxq_interrupts(&sc->pf_vsi);
2752
2753	/* Disable the Tx and Rx queues */
2754	ice_vsi_disable_tx(&sc->pf_vsi);
2755	ice_control_rx_queues(&sc->pf_vsi, false);
2756}
2757
2758/**
2759 * ice_if_get_counter - Get current value of an ifnet statistic
2760 * @ctx: iflib context pointer
2761 * @counter: ifnet counter to read
2762 *
2763 * Reads the current value of an ifnet counter for the device.
2764 *
2765 * This function is not protected by the iflib CTX lock.
2766 */
2767static uint64_t
2768ice_if_get_counter(if_ctx_t ctx, ift_counter counter)
2769{
2770	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2771
2772	/* Return the counter for the main PF VSI */
2773	return ice_get_ifnet_counter(&sc->pf_vsi, counter);
2774}
2775
2776/**
2777 * ice_request_stack_reinit - Request that iflib re-initialize
2778 * @sc: the device private softc
2779 *
2780 * Request that the device be brought down and up, to re-initialize. For
2781 * example, this may be called when a device reset occurs, or when Tx and Rx
2782 * queues need to be re-initialized.
2783 *
2784 * This is required because the iflib state is outside the driver, and must be
2785 * re-initialized if we need to resart Tx and Rx queues.
2786 */
2787void
2788ice_request_stack_reinit(struct ice_softc *sc)
2789{
2790	if (CTX_ACTIVE(sc->ctx)) {
2791		iflib_request_reset(sc->ctx);
2792		iflib_admin_intr_deferred(sc->ctx);
2793	}
2794}
2795
2796/**
2797 * ice_driver_is_detaching - Check if the driver is detaching/unloading
2798 * @sc: device private softc
2799 *
2800 * Returns true if the driver is detaching, false otherwise.
2801 *
2802 * @remark on newer kernels, take advantage of iflib_in_detach in order to
2803 * report detachment correctly as early as possible.
2804 *
2805 * @remark this function is used by various code paths that want to avoid
2806 * running if the driver is about to be removed. This includes sysctls and
2807 * other driver access points. Note that it does not fully resolve
2808 * detach-based race conditions as it is possible for a thread to race with
2809 * iflib_in_detach.
2810 */
2811bool
2812ice_driver_is_detaching(struct ice_softc *sc)
2813{
2814	return (ice_test_state(&sc->state, ICE_STATE_DETACHING) ||
2815		iflib_in_detach(sc->ctx));
2816}
2817
2818/**
2819 * ice_if_priv_ioctl - Device private ioctl handler
2820 * @ctx: iflib context pointer
2821 * @command: The ioctl command issued
2822 * @data: ioctl specific data
2823 *
2824 * iflib callback for handling custom driver specific ioctls.
2825 *
2826 * @pre Assumes that the iflib context lock is held.
2827 */
2828static int
2829ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data)
2830{
2831	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2832	struct ifdrv *ifd;
2833	device_t dev = sc->dev;
2834
2835	if (data == NULL)
2836		return (EINVAL);
2837
2838	ASSERT_CTX_LOCKED(sc);
2839
2840	/* Make sure the command type is valid */
2841	switch (command) {
2842	case SIOCSDRVSPEC:
2843	case SIOCGDRVSPEC:
2844		/* Accepted commands */
2845		break;
2846	case SIOCGPRIVATE_0:
2847		/*
2848		 * Although we do not support this ioctl command, it's
2849		 * expected that iflib will forward it to the IFDI_PRIV_IOCTL
2850		 * handler. Do not print a message in this case
2851		 */
2852		return (ENOTSUP);
2853	default:
2854		/*
2855		 * If we get a different command for this function, it's
2856		 * definitely unexpected, so log a message indicating what
2857		 * command we got for debugging purposes.
2858		 */
2859		device_printf(dev, "%s: unexpected ioctl command %08lx\n",
2860			      __func__, command);
2861		return (EINVAL);
2862	}
2863
2864	ifd = (struct ifdrv *)data;
2865
2866	switch (ifd->ifd_cmd) {
2867	case ICE_NVM_ACCESS:
2868		return ice_handle_nvm_access_ioctl(sc, ifd);
2869	default:
2870		return EINVAL;
2871	}
2872}
2873
2874/**
2875 * ice_if_i2c_req - I2C request handler for iflib
2876 * @ctx: iflib context pointer
2877 * @req: The I2C parameters to use
2878 *
2879 * Read from the port's I2C eeprom using the parameters from the ioctl.
2880 *
2881 * @remark The iflib-only part is pretty simple.
2882 */
2883static int
2884ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req)
2885{
2886	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2887
2888	return ice_handle_i2c_req(sc, req);
2889}
2890
2891/**
2892 * ice_if_suspend - PCI device suspend handler for iflib
2893 * @ctx: iflib context pointer
2894 *
2895 * Deinitializes the driver and clears HW resources in preparation for
2896 * suspend or an FLR.
2897 *
2898 * @returns 0; this return value is ignored
2899 */
2900static int
2901ice_if_suspend(if_ctx_t ctx)
2902{
2903	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2904
2905	/* At least a PFR is always going to happen after this;
2906	 * either via FLR or during the D3->D0 transition.
2907	 */
2908	ice_clear_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
2909
2910	ice_prepare_for_reset(sc);
2911
2912	return (0);
2913}
2914
2915/**
2916 * ice_if_resume - PCI device resume handler for iflib
2917 * @ctx: iflib context pointer
2918 *
2919 * Reinitializes the driver and the HW after PCI resume or after
2920 * an FLR. An init is performed by iflib after this function is finished.
2921 *
2922 * @returns 0; this return value is ignored
2923 */
2924static int
2925ice_if_resume(if_ctx_t ctx)
2926{
2927	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2928
2929	ice_rebuild(sc);
2930
2931	return (0);
2932}
2933
2934