1139749Simp/* SPDX-License-Identifier: BSD-3-Clause */
2113584Ssimokawa/*  Copyright (c) 2024, Intel Corporation
3103285Sikob *  All rights reserved.
4103285Sikob *
5103285Sikob *  Redistribution and use in source and binary forms, with or without
6103285Sikob *  modification, are permitted provided that the following conditions are met:
7103285Sikob *
8103285Sikob *   1. Redistributions of source code must retain the above copyright notice,
9103285Sikob *      this list of conditions and the following disclaimer.
10103285Sikob *
11103285Sikob *   2. Redistributions in binary form must reproduce the above copyright
12103285Sikob *      notice, this list of conditions and the following disclaimer in the
13103285Sikob *      documentation and/or other materials provided with the distribution.
14103285Sikob *
15103285Sikob *   3. Neither the name of the Intel Corporation nor the names of its
16103285Sikob *      contributors may be used to endorse or promote products derived from
17103285Sikob *      this software without specific prior written permission.
18103285Sikob *
19103285Sikob *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20103285Sikob *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21103285Sikob *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22103285Sikob *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23103285Sikob *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24103285Sikob *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25103285Sikob *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26103285Sikob *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27103285Sikob *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28103285Sikob *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29103285Sikob *  POSSIBILITY OF SUCH DAMAGE.
30103285Sikob */
31103285Sikob
32103285Sikob/**
33103285Sikob * @file if_ice_iflib.c
34103285Sikob * @brief iflib driver implementation
35103285Sikob *
36103285Sikob * Contains the main entry point for the iflib driver implementation. It
37103285Sikob * implements the various ifdi driver methods, and sets up the module and
38103285Sikob * driver values to load an iflib driver.
39103285Sikob */
40103285Sikob
41103285Sikob#include "ice_iflib.h"
42103285Sikob#include "ice_drv_info.h"
43103285Sikob#include "ice_switch.h"
44103285Sikob#include "ice_sched.h"
45103285Sikob
46103285Sikob#include <sys/module.h>
47103285Sikob#include <sys/sockio.h>
48103285Sikob#include <sys/smp.h>
49103285Sikob#include <dev/pci/pcivar.h>
50118293Ssimokawa#include <dev/pci/pcireg.h>
51103285Sikob
52103285Sikob/*
53103285Sikob * Device method prototypes
54103285Sikob */
55103285Sikob
56103285Sikobstatic void *ice_register(device_t);
57109801Ssimokawastatic int  ice_if_attach_pre(if_ctx_t);
58129585Sdfrstatic int  ice_attach_pre_recovery_mode(struct ice_softc *sc);
59129585Sdfrstatic int  ice_if_attach_post(if_ctx_t);
60103285Sikobstatic void ice_attach_post_recovery_mode(struct ice_softc *sc);
61103285Sikobstatic int  ice_if_detach(if_ctx_t);
62103285Sikobstatic int  ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets);
63103285Sikobstatic int  ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nqs, int nqsets);
64103285Sikobstatic int ice_if_msix_intr_assign(if_ctx_t ctx, int msix);
65103285Sikobstatic void ice_if_queues_free(if_ctx_t ctx);
66103285Sikobstatic int ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu);
67109801Ssimokawastatic void ice_if_intr_enable(if_ctx_t ctx);
68129585Sdfrstatic void ice_if_intr_disable(if_ctx_t ctx);
69129585Sdfrstatic int ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid);
70103285Sikobstatic int ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid);
71103285Sikobstatic int ice_if_promisc_set(if_ctx_t ctx, int flags);
72108701Ssimokawastatic void ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr);
73103285Sikobstatic int ice_if_media_change(if_ctx_t ctx);
74122216Ssimokawastatic void ice_if_init(if_ctx_t ctx);
75103285Sikobstatic void ice_if_timer(if_ctx_t ctx, uint16_t qid);
76103285Sikobstatic void ice_if_update_admin_status(if_ctx_t ctx);
77103285Sikobstatic void ice_if_multi_set(if_ctx_t ctx);
78103285Sikobstatic void ice_if_vlan_register(if_ctx_t ctx, u16 vtag);
79103285Sikobstatic void ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag);
80103285Sikobstatic void ice_if_stop(if_ctx_t ctx);
81103285Sikobstatic uint64_t ice_if_get_counter(if_ctx_t ctx, ift_counter counter);
82103285Sikobstatic int ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data);
83103285Sikobstatic int ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req);
84103285Sikobstatic int ice_if_suspend(if_ctx_t ctx);
85103285Sikobstatic int ice_if_resume(if_ctx_t ctx);
86103285Sikobstatic bool ice_if_needs_restart(if_ctx_t ctx, enum iflib_restart_event event);
87103285Sikobstatic int ice_setup_mirror_vsi(struct ice_mirr_if *mif);
88103285Sikobstatic int ice_wire_mirror_intrs(struct ice_mirr_if *mif);
89103285Sikobstatic void ice_free_irqvs_subif(struct ice_mirr_if *mif);
90103285Sikobstatic void *ice_subif_register(device_t);
91103285Sikobstatic void ice_subif_setup_scctx(struct ice_mirr_if *mif);
92103285Sikobstatic int ice_subif_rebuild(struct ice_softc *sc);
93103285Sikobstatic int ice_subif_rebuild_vsi_qmap(struct ice_softc *sc);
94103285Sikob
95103285Sikob/* Iflib API */
96103285Sikobstatic int ice_subif_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs,
97103285Sikob    uint64_t *paddrs, int ntxqs, int ntxqsets);
98103285Sikobstatic int ice_subif_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs,
99103285Sikob    uint64_t *paddrs, int nrxqs, int nrxqsets);
100103285Sikobstatic int ice_subif_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid);
101103285Sikobstatic int ice_subif_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid);
102188508Ssbrunostatic void ice_subif_if_intr_enable(if_ctx_t ctx);
103188508Ssbrunostatic int ice_subif_if_msix_intr_assign(if_ctx_t ctx, int msix);
104188508Ssbrunostatic void ice_subif_if_init(if_ctx_t ctx);
105188508Ssbrunostatic void ice_subif_if_stop(if_ctx_t ctx);
106103285Sikobstatic void ice_subif_if_queues_free(if_ctx_t ctx);
107103285Sikobstatic int ice_subif_if_attach_pre(if_ctx_t);
108103285Sikobstatic int ice_subif_if_attach_post(if_ctx_t);
109188508Ssbrunostatic void ice_subif_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr);
110188508Ssbrunostatic int ice_subif_if_promisc_set(if_ctx_t ctx, int flags);
111188508Ssbruno
112103285Sikobstatic int ice_msix_que(void *arg);
113103285Sikobstatic int ice_msix_admin(void *arg);
114103285Sikob
115109801Ssimokawa/*
116129585Sdfr * Helper function prototypes
117103285Sikob */
118109801Ssimokawastatic int ice_pci_mapping(struct ice_softc *sc);
119109801Ssimokawastatic void ice_free_pci_mapping(struct ice_softc *sc);
120129585Sdfrstatic void ice_update_link_status(struct ice_softc *sc, bool update_media);
121103285Sikobstatic void ice_init_device_features(struct ice_softc *sc);
122109801Ssimokawastatic void ice_init_tx_tracking(struct ice_vsi *vsi);
123113584Ssimokawastatic void ice_handle_reset_event(struct ice_softc *sc);
124129585Sdfrstatic void ice_handle_pf_reset_request(struct ice_softc *sc);
125129585Sdfrstatic void ice_prepare_for_reset(struct ice_softc *sc);
126113584Ssimokawastatic int ice_rebuild_pf_vsi_qmap(struct ice_softc *sc);
127129585Sdfrstatic void ice_rebuild(struct ice_softc *sc);
128129585Sdfrstatic void ice_rebuild_recovery_mode(struct ice_softc *sc);
129109801Ssimokawastatic void ice_free_irqvs(struct ice_softc *sc);
130109801Ssimokawastatic void ice_update_rx_mbuf_sz(struct ice_softc *sc);
131113584Ssimokawastatic void ice_poll_for_media_avail(struct ice_softc *sc);
132113584Ssimokawastatic void ice_setup_scctx(struct ice_softc *sc);
133129585Sdfrstatic int ice_allocate_msix(struct ice_softc *sc);
134129585Sdfrstatic void ice_admin_timer(void *arg);
135113584Ssimokawastatic void ice_transition_recovery_mode(struct ice_softc *sc);
136129585Sdfrstatic void ice_transition_safe_mode(struct ice_softc *sc);
137129585Sdfrstatic void ice_set_default_promisc_mask(ice_bitmap_t *promisc_mask);
138113584Ssimokawa
139113584Ssimokawa/*
140109801Ssimokawa * Device Interface Declaration
141109801Ssimokawa */
142129585Sdfr
143103285Sikob/**
144113584Ssimokawa * @var ice_methods
145109801Ssimokawa * @brief ice driver method entry points
146103285Sikob *
147113584Ssimokawa * List of device methods implementing the generic device interface used by
148129585Sdfr * the device stack to interact with the ice driver. Since this is an iflib
149109801Ssimokawa * driver, most of the methods point to the generic iflib implementation.
150103285Sikob */
151113584Ssimokawastatic device_method_t ice_methods[] = {
152113584Ssimokawa	/* Device interface */
153109801Ssimokawa	DEVMETHOD(device_register, ice_register),
154103285Sikob	DEVMETHOD(device_probe,    iflib_device_probe_vendor),
155113584Ssimokawa	DEVMETHOD(device_attach,   iflib_device_attach),
156113584Ssimokawa	DEVMETHOD(device_detach,   iflib_device_detach),
157129585Sdfr	DEVMETHOD(device_shutdown, iflib_device_shutdown),
158109801Ssimokawa	DEVMETHOD(device_suspend,  iflib_device_suspend),
159103285Sikob	DEVMETHOD(device_resume,   iflib_device_resume),
160113584Ssimokawa	DEVMETHOD_END
161113584Ssimokawa};
162129585Sdfr
163109801Ssimokawa/**
164103285Sikob * @var ice_iflib_methods
165113584Ssimokawa * @brief iflib method entry points
166113584Ssimokawa *
167129585Sdfr * List of device methods used by the iflib stack to interact with this
168113584Ssimokawa * driver. These are the real main entry points used to interact with this
169109801Ssimokawa * driver.
170103285Sikob */
171113584Ssimokawastatic device_method_t ice_iflib_methods[] = {
172113584Ssimokawa	DEVMETHOD(ifdi_attach_pre, ice_if_attach_pre),
173129585Sdfr	DEVMETHOD(ifdi_attach_post, ice_if_attach_post),
174129585Sdfr	DEVMETHOD(ifdi_detach, ice_if_detach),
175109801Ssimokawa	DEVMETHOD(ifdi_tx_queues_alloc, ice_if_tx_queues_alloc),
176103285Sikob	DEVMETHOD(ifdi_rx_queues_alloc, ice_if_rx_queues_alloc),
177113584Ssimokawa	DEVMETHOD(ifdi_msix_intr_assign, ice_if_msix_intr_assign),
178113584Ssimokawa	DEVMETHOD(ifdi_queues_free, ice_if_queues_free),
179129585Sdfr	DEVMETHOD(ifdi_mtu_set, ice_if_mtu_set),
180129585Sdfr	DEVMETHOD(ifdi_intr_enable, ice_if_intr_enable),
181109801Ssimokawa	DEVMETHOD(ifdi_intr_disable, ice_if_intr_disable),
182103285Sikob	DEVMETHOD(ifdi_rx_queue_intr_enable, ice_if_rx_queue_intr_enable),
183113584Ssimokawa	DEVMETHOD(ifdi_tx_queue_intr_enable, ice_if_tx_queue_intr_enable),
184113584Ssimokawa	DEVMETHOD(ifdi_promisc_set, ice_if_promisc_set),
185129585Sdfr	DEVMETHOD(ifdi_media_status, ice_if_media_status),
186129585Sdfr	DEVMETHOD(ifdi_media_change, ice_if_media_change),
187109801Ssimokawa	DEVMETHOD(ifdi_init, ice_if_init),
188103285Sikob	DEVMETHOD(ifdi_stop, ice_if_stop),
189113584Ssimokawa	DEVMETHOD(ifdi_timer, ice_if_timer),
190113584Ssimokawa	DEVMETHOD(ifdi_update_admin_status, ice_if_update_admin_status),
191129585Sdfr	DEVMETHOD(ifdi_multi_set, ice_if_multi_set),
192113584Ssimokawa	DEVMETHOD(ifdi_vlan_register, ice_if_vlan_register),
193129585Sdfr	DEVMETHOD(ifdi_vlan_unregister, ice_if_vlan_unregister),
194109801Ssimokawa	DEVMETHOD(ifdi_get_counter, ice_if_get_counter),
195103285Sikob	DEVMETHOD(ifdi_priv_ioctl, ice_if_priv_ioctl),
196113584Ssimokawa	DEVMETHOD(ifdi_i2c_req, ice_if_i2c_req),
197113584Ssimokawa	DEVMETHOD(ifdi_suspend, ice_if_suspend),
198129585Sdfr	DEVMETHOD(ifdi_resume, ice_if_resume),
199113584Ssimokawa	DEVMETHOD(ifdi_needs_restart, ice_if_needs_restart),
200129585Sdfr	DEVMETHOD_END
201109801Ssimokawa};
202103285Sikob
203113584Ssimokawa/**
204113584Ssimokawa * @var ice_driver
205129585Sdfr * @brief driver structure for the generic device stack
206113584Ssimokawa *
207129585Sdfr * driver_t definition used to setup the generic device methods.
208109801Ssimokawa */
209103285Sikobstatic driver_t ice_driver = {
210113584Ssimokawa	.name = "ice",
211113584Ssimokawa	.methods = ice_methods,
212129585Sdfr	.size = sizeof(struct ice_softc),
213113584Ssimokawa};
214129585Sdfr
215109801Ssimokawa/**
216109801Ssimokawa * @var ice_iflib_driver
217103285Sikob * @brief driver structure for the iflib stack
218109801Ssimokawa *
219120660Ssimokawa * driver_t definition used to setup the iflib device methods.
220120660Ssimokawa */
221120660Ssimokawastatic driver_t ice_iflib_driver = {
222120660Ssimokawa	.name = "ice",
223120660Ssimokawa	.methods = ice_iflib_methods,
224120660Ssimokawa	.size = sizeof(struct ice_softc),
225120660Ssimokawa};
226120660Ssimokawa
227120660Ssimokawaextern struct if_txrx ice_txrx;
228120660Ssimokawaextern struct if_txrx ice_recovery_txrx;
229120660Ssimokawa
230120660Ssimokawa/**
231120660Ssimokawa * @var ice_sctx
232120660Ssimokawa * @brief ice driver shared context
233120660Ssimokawa *
234120660Ssimokawa * Structure defining shared values (context) that is used by all instances of
235120660Ssimokawa * the device. Primarily used to setup details about how the iflib stack
236120660Ssimokawa * should treat this driver. Also defines the default, minimum, and maximum
237120660Ssimokawa * number of descriptors in each ring.
238120660Ssimokawa */
239120660Ssimokawastatic struct if_shared_ctx ice_sctx = {
240120660Ssimokawa	.isc_magic = IFLIB_MAGIC,
241120660Ssimokawa	.isc_q_align = PAGE_SIZE,
242120660Ssimokawa
243120660Ssimokawa	.isc_tx_maxsize = ICE_MAX_FRAME_SIZE,
244120660Ssimokawa	/* We could technically set this as high as ICE_MAX_DMA_SEG_SIZE, but
245120660Ssimokawa	 * that doesn't make sense since that would be larger than the maximum
246103285Sikob	 * size of a single packet.
247129585Sdfr	 */
248103285Sikob	.isc_tx_maxsegsize = ICE_MAX_FRAME_SIZE,
249109814Ssimokawa
250109814Ssimokawa	/* XXX: This is only used by iflib to ensure that
251109814Ssimokawa	 * scctx->isc_tx_tso_size_max + the VLAN header is a valid size.
252109814Ssimokawa	 */
253109814Ssimokawa	.isc_tso_maxsize = ICE_TSO_SIZE + sizeof(struct ether_vlan_header),
254110193Ssimokawa	/* XXX: This is used by iflib to set the number of segments in the TSO
255110193Ssimokawa	 * DMA tag. However, scctx->isc_tx_tso_segsize_max is used to set the
256109801Ssimokawa	 * related ifnet parameter.
257103285Sikob	 */
258103285Sikob	.isc_tso_maxsegsize = ICE_MAX_DMA_SEG_SIZE,
259103285Sikob
260103285Sikob	.isc_rx_maxsize = ICE_MAX_FRAME_SIZE,
261103285Sikob	.isc_rx_nsegments = ICE_MAX_RX_SEGS,
262103285Sikob	.isc_rx_maxsegsize = ICE_MAX_FRAME_SIZE,
263103285Sikob
264103285Sikob	.isc_nfl = 1,
265103285Sikob	.isc_ntxqs = 1,
266103285Sikob	.isc_nrxqs = 1,
267103285Sikob
268103285Sikob	.isc_admin_intrcnt = 1,
269103285Sikob	.isc_vendor_info = ice_vendor_info_array,
270103285Sikob	.isc_driver_version = __DECONST(char *, ice_driver_version),
271129585Sdfr	.isc_driver = &ice_iflib_driver,
272103285Sikob
273109801Ssimokawa	/*
274109814Ssimokawa	 * IFLIB_NEED_SCRATCH ensures that mbufs have scratch space available
275109814Ssimokawa	 * for hardware checksum offload
276129585Sdfr	 *
277129585Sdfr	 * IFLIB_TSO_INIT_IP ensures that the TSO packets have zeroed out the
278109814Ssimokawa	 * IP sum field, required by our hardware to calculate valid TSO
279109814Ssimokawa	 * checksums.
280109814Ssimokawa	 *
281103285Sikob	 * IFLIB_ADMIN_ALWAYS_RUN ensures that the administrative task runs
282129585Sdfr	 * even when the interface is down.
283129585Sdfr	 *
284109814Ssimokawa	 * IFLIB_SKIP_MSIX allows the driver to handle allocating MSI-X
285103285Sikob	 * vectors manually instead of relying on iflib code to do this.
286109801Ssimokawa	 */
287188585Ssbruno	.isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP |
288188585Ssbruno		IFLIB_ADMIN_ALWAYS_RUN | IFLIB_SKIP_MSIX,
289188585Ssbruno
290188585Ssbruno	.isc_nrxd_min = {ICE_MIN_DESC_COUNT},
291103285Sikob	.isc_ntxd_min = {ICE_MIN_DESC_COUNT},
292103285Sikob	.isc_nrxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
293103285Sikob	.isc_ntxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
294103285Sikob	.isc_nrxd_default = {ICE_DEFAULT_DESC_COUNT},
295188726Ssbruno	.isc_ntxd_default = {ICE_DEFAULT_DESC_COUNT},
296188726Ssbruno};
297188726Ssbruno
298188726SsbrunoDRIVER_MODULE(ice, pci, ice_driver, ice_module_event_handler, NULL);
299113584Ssimokawa
300103285SikobMODULE_VERSION(ice, 1);
301103285SikobMODULE_DEPEND(ice, pci, 1, 1, 1);
302129585SdfrMODULE_DEPEND(ice, ether, 1, 1, 1);
303113584SsimokawaMODULE_DEPEND(ice, iflib, 1, 1, 1);
304113584Ssimokawa
305113584SsimokawaIFLIB_PNP_INFO(pci, ice, ice_vendor_info_array);
306113584Ssimokawa
307113584Ssimokawa/* Static driver-wide sysctls */
308188726Ssbruno#include "ice_iflib_sysctls.h"
309113584Ssimokawa
310113584Ssimokawa/**
311113584Ssimokawa * ice_pci_mapping - Map PCI BAR memory
312113584Ssimokawa * @sc: device private softc
313113584Ssimokawa *
314113584Ssimokawa * Map PCI BAR 0 for device operation.
315113584Ssimokawa */
316113584Ssimokawastatic int
317113584Ssimokawaice_pci_mapping(struct ice_softc *sc)
318129585Sdfr{
319113584Ssimokawa	int rc;
320113584Ssimokawa
321113584Ssimokawa	/* Map BAR0 */
322113584Ssimokawa	rc = ice_map_bar(sc->dev, &sc->bar0, 0);
323188585Ssbruno	if (rc)
324188585Ssbruno		return rc;
325188585Ssbruno
326188585Ssbruno	return 0;
327188585Ssbruno}
328188585Ssbruno
329188585Ssbruno/**
330188585Ssbruno * ice_free_pci_mapping - Release PCI BAR memory
331188585Ssbruno * @sc: device private softc
332188585Ssbruno *
333113584Ssimokawa * Release PCI BARs which were previously mapped by ice_pci_mapping().
334113584Ssimokawa */
335188585Ssbrunostatic void
336188585Ssbrunoice_free_pci_mapping(struct ice_softc *sc)
337188585Ssbruno{
338188585Ssbruno	/* Free BAR0 */
339188585Ssbruno	ice_free_bar(sc->dev, &sc->bar0);
340188585Ssbruno}
341188585Ssbruno
342188585Ssbruno/*
343188585Ssbruno * Device methods
344188585Ssbruno */
345188585Ssbruno
346188585Ssbruno/**
347188585Ssbruno * ice_register - register device method callback
348188585Ssbruno * @dev: the device being registered
349113584Ssimokawa *
350113584Ssimokawa * Returns a pointer to the shared context structure, which is used by iflib.
351113584Ssimokawa */
352113584Ssimokawastatic void *
353129585Sdfrice_register(device_t dev __unused)
354103285Sikob{
355103285Sikob	return &ice_sctx;
356103285Sikob} /* ice_register */
357103285Sikob
358103285Sikob/**
359103285Sikob * ice_setup_scctx - Setup the iflib softc context structure
360188726Ssbruno * @sc: the device private structure
361103285Sikob *
362103285Sikob * Setup the parameters in if_softc_ctx_t structure used by the iflib stack
363103285Sikob * when loading.
364103285Sikob */
365103285Sikobstatic void
366103285Sikobice_setup_scctx(struct ice_softc *sc)
367103285Sikob{
368103285Sikob	if_softc_ctx_t scctx = sc->scctx;
369129585Sdfr	struct ice_hw *hw = &sc->hw;
370103285Sikob	device_t dev = sc->dev;
371188585Ssbruno	bool safe_mode, recovery_mode;
372188585Ssbruno
373188585Ssbruno	safe_mode = ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE);
374188585Ssbruno	recovery_mode = ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE);
375188585Ssbruno
376188585Ssbruno	/*
377188585Ssbruno	 * If the driver loads in Safe mode or Recovery mode, limit iflib to
378188585Ssbruno	 * a single queue pair.
379103285Sikob	 */
380103285Sikob	if (safe_mode || recovery_mode) {
381103285Sikob		scctx->isc_ntxqsets = scctx->isc_nrxqsets = 1;
382103285Sikob		scctx->isc_ntxqsets_max = 1;
383103285Sikob		scctx->isc_nrxqsets_max = 1;
384103285Sikob	} else {
385188585Ssbruno		/*
386188585Ssbruno		 * iflib initially sets the isc_ntxqsets and isc_nrxqsets to
387188585Ssbruno		 * the values of the override sysctls. Cache these initial
388188585Ssbruno		 * values so that the driver can be aware of what the iflib
389188585Ssbruno		 * sysctl value is when setting up MSI-X vectors.
390188585Ssbruno		 */
391188585Ssbruno		sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets;
392188585Ssbruno		sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets;
393188585Ssbruno
394188585Ssbruno		if (scctx->isc_ntxqsets == 0)
395188585Ssbruno			scctx->isc_ntxqsets = hw->func_caps.common_cap.rss_table_size;
396188585Ssbruno		if (scctx->isc_nrxqsets == 0)
397188585Ssbruno			scctx->isc_nrxqsets = hw->func_caps.common_cap.rss_table_size;
398188585Ssbruno
399103285Sikob		scctx->isc_ntxqsets_max = hw->func_caps.common_cap.num_txq;
400109801Ssimokawa		scctx->isc_nrxqsets_max = hw->func_caps.common_cap.num_rxq;
401109801Ssimokawa
402109801Ssimokawa		/*
403103285Sikob		 * Sanity check that the iflib sysctl values are within the
404129585Sdfr		 * maximum supported range.
405129585Sdfr		 */
406129585Sdfr		if (sc->ifc_sysctl_ntxqs > scctx->isc_ntxqsets_max)
407129585Sdfr			sc->ifc_sysctl_ntxqs = scctx->isc_ntxqsets_max;
408129585Sdfr		if (sc->ifc_sysctl_nrxqs > scctx->isc_nrxqsets_max)
409103285Sikob			sc->ifc_sysctl_nrxqs = scctx->isc_nrxqsets_max;
410103285Sikob	}
411109801Ssimokawa
412103285Sikob	scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]
413129585Sdfr	    * sizeof(struct ice_tx_desc), DBA_ALIGN);
414129585Sdfr	scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0]
415129585Sdfr	    * sizeof(union ice_32b_rx_flex_desc), DBA_ALIGN);
416129585Sdfr
417103285Sikob	scctx->isc_tx_nsegments = ICE_MAX_TX_SEGS;
418109801Ssimokawa	scctx->isc_tx_tso_segments_max = ICE_MAX_TSO_SEGS;
419103285Sikob	scctx->isc_tx_tso_size_max = ICE_TSO_SIZE;
420103285Sikob	scctx->isc_tx_tso_segsize_max = ICE_MAX_DMA_SEG_SIZE;
421169019Ssimokawa
422103285Sikob	scctx->isc_msix_bar = pci_msix_table_bar(dev);
423103285Sikob	scctx->isc_rss_table_size = hw->func_caps.common_cap.rss_table_size;
424109801Ssimokawa
425103285Sikob	/*
426108281Ssimokawa	 * If the driver loads in recovery mode, disable Tx/Rx functionality
427103285Sikob	 */
428103285Sikob	if (recovery_mode)
429103285Sikob		scctx->isc_txrx = &ice_recovery_txrx;
430103285Sikob	else
431103285Sikob		scctx->isc_txrx = &ice_txrx;
432103285Sikob
433103285Sikob	/*
434103285Sikob	 * If the driver loads in Safe mode or Recovery mode, disable
435103285Sikob	 * advanced features including hardware offloads.
436103285Sikob	 */
437103285Sikob	if (safe_mode || recovery_mode) {
438103285Sikob		scctx->isc_capenable = ICE_SAFE_CAPS;
439103285Sikob		scctx->isc_tx_csum_flags = 0;
440103285Sikob	} else {
441103285Sikob		scctx->isc_capenable = ICE_FULL_CAPS;
442103285Sikob		scctx->isc_tx_csum_flags = ICE_CSUM_OFFLOAD;
443110582Ssimokawa	}
444110582Ssimokawa
445110582Ssimokawa	scctx->isc_capabilities = scctx->isc_capenable;
446103285Sikob} /* ice_setup_scctx */
447103285Sikob
448119118Ssimokawa/**
449119118Ssimokawa * ice_if_attach_pre - Early device attach logic
450103285Sikob * @ctx: the iflib context structure
451129585Sdfr *
452103285Sikob * Called by iflib during the attach process. Earliest main driver entry
453103285Sikob * point which performs necessary hardware and driver initialization. Called
454103285Sikob * before the Tx and Rx queues are allocated.
455103285Sikob */
456103285Sikobstatic int
457127468Ssimokawaice_if_attach_pre(if_ctx_t ctx)
458103285Sikob{
459103285Sikob	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
460103285Sikob	enum ice_fw_modes fw_mode;
461103285Sikob	enum ice_status status;
462118455Ssimokawa	if_softc_ctx_t scctx;
463183397Sed	struct ice_hw *hw;
464103285Sikob	device_t dev;
465118293Ssimokawa	int err;
466103285Sikob
467103285Sikob	device_printf(iflib_get_dev(ctx), "Loading the iflib ice driver\n");
468103285Sikob
469103285Sikob	ice_set_state(&sc->state, ICE_STATE_ATTACHING);
470103285Sikob
471	sc->ctx = ctx;
472	sc->media = iflib_get_media(ctx);
473	sc->sctx = iflib_get_sctx(ctx);
474	sc->iflib_ctx_lock = iflib_ctx_lock_get(ctx);
475
476	dev = sc->dev = iflib_get_dev(ctx);
477	scctx = sc->scctx = iflib_get_softc_ctx(ctx);
478
479	hw = &sc->hw;
480	hw->back = sc;
481
482	snprintf(sc->admin_mtx_name, sizeof(sc->admin_mtx_name),
483		 "%s:admin", device_get_nameunit(dev));
484	mtx_init(&sc->admin_mtx, sc->admin_mtx_name, NULL, MTX_DEF);
485	callout_init_mtx(&sc->admin_timer, &sc->admin_mtx, 0);
486
487	ASSERT_CTX_LOCKED(sc);
488
489	if (ice_pci_mapping(sc)) {
490		err = (ENXIO);
491		goto destroy_admin_timer;
492	}
493
494	/* Save off the PCI information */
495	ice_save_pci_info(hw, dev);
496
497	/* create tunables as early as possible */
498	ice_add_device_tunables(sc);
499
500	/* Setup ControlQ lengths */
501	ice_set_ctrlq_len(hw);
502
503reinit_hw:
504
505	fw_mode = ice_get_fw_mode(hw);
506	if (fw_mode == ICE_FW_MODE_REC) {
507		device_printf(dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
508
509		err = ice_attach_pre_recovery_mode(sc);
510		if (err)
511			goto free_pci_mapping;
512
513		return (0);
514	}
515
516	/* Initialize the hw data structure */
517	status = ice_init_hw(hw);
518	if (status) {
519		if (status == ICE_ERR_FW_API_VER) {
520			/* Enter recovery mode, so that the driver remains
521			 * loaded. This way, if the system administrator
522			 * cannot update the driver, they may still attempt to
523			 * downgrade the NVM.
524			 */
525			err = ice_attach_pre_recovery_mode(sc);
526			if (err)
527				goto free_pci_mapping;
528
529			return (0);
530		} else {
531			err = EIO;
532			device_printf(dev, "Unable to initialize hw, err %s aq_err %s\n",
533				      ice_status_str(status),
534				      ice_aq_str(hw->adminq.sq_last_status));
535		}
536		goto free_pci_mapping;
537	}
538
539	ice_init_device_features(sc);
540
541	/* Keep flag set by default */
542	ice_set_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN);
543
544	/* Notify firmware of the device driver version */
545	err = ice_send_version(sc);
546	if (err)
547		goto deinit_hw;
548
549	/*
550	 * Success indicates a change was made that requires a reinitialization
551	 * of the hardware
552	 */
553	err = ice_load_pkg_file(sc);
554	if (err == ICE_SUCCESS) {
555		ice_deinit_hw(hw);
556		goto reinit_hw;
557	}
558
559	err = ice_init_link_events(sc);
560	if (err) {
561		device_printf(dev, "ice_init_link_events failed: %s\n",
562			      ice_err_str(err));
563		goto deinit_hw;
564	}
565
566	/* Initialize VLAN mode in FW; if dual VLAN mode is supported by the package
567	 * and firmware, this will force them to use single VLAN mode.
568	 */
569	status = ice_set_vlan_mode(hw);
570	if (status) {
571		err = EIO;
572		device_printf(dev, "Unable to initialize VLAN mode, err %s aq_err %s\n",
573			      ice_status_str(status),
574			      ice_aq_str(hw->adminq.sq_last_status));
575		goto deinit_hw;
576	}
577
578	ice_print_nvm_version(sc);
579
580	/* Setup the MAC address */
581	iflib_set_mac(ctx, hw->port_info->mac.lan_addr);
582
583	/* Setup the iflib softc context structure */
584	ice_setup_scctx(sc);
585
586	/* Initialize the Tx queue manager */
587	err = ice_resmgr_init(&sc->tx_qmgr, hw->func_caps.common_cap.num_txq);
588	if (err) {
589		device_printf(dev, "Unable to initialize Tx queue manager: %s\n",
590			      ice_err_str(err));
591		goto deinit_hw;
592	}
593
594	/* Initialize the Rx queue manager */
595	err = ice_resmgr_init(&sc->rx_qmgr, hw->func_caps.common_cap.num_rxq);
596	if (err) {
597		device_printf(dev, "Unable to initialize Rx queue manager: %s\n",
598			      ice_err_str(err));
599		goto free_tx_qmgr;
600	}
601
602	/* Initialize the PF device interrupt resource manager */
603	err = ice_alloc_intr_tracking(sc);
604	if (err)
605		/* Errors are already printed */
606		goto free_rx_qmgr;
607
608	/* Determine maximum number of VSIs we'll prepare for */
609	sc->num_available_vsi = min(ICE_MAX_VSI_AVAILABLE,
610				    hw->func_caps.guar_num_vsi);
611
612	if (!sc->num_available_vsi) {
613		err = EIO;
614		device_printf(dev, "No VSIs allocated to host\n");
615		goto free_intr_tracking;
616	}
617
618	/* Allocate storage for the VSI pointers */
619	sc->all_vsi = (struct ice_vsi **)
620		malloc(sizeof(struct ice_vsi *) * sc->num_available_vsi,
621		       M_ICE, M_WAITOK | M_ZERO);
622	if (!sc->all_vsi) {
623		err = ENOMEM;
624		device_printf(dev, "Unable to allocate VSI array\n");
625		goto free_intr_tracking;
626	}
627
628	/*
629	 * Prepare the statically allocated primary PF VSI in the softc
630	 * structure. Other VSIs will be dynamically allocated as needed.
631	 */
632	ice_setup_pf_vsi(sc);
633
634	err = ice_alloc_vsi_qmap(&sc->pf_vsi, scctx->isc_ntxqsets_max,
635	    scctx->isc_nrxqsets_max);
636	if (err) {
637		device_printf(dev, "Unable to allocate VSI Queue maps\n");
638		goto free_main_vsi;
639	}
640
641	/* Allocate MSI-X vectors (due to isc_flags IFLIB_SKIP_MSIX) */
642	err = ice_allocate_msix(sc);
643	if (err)
644		goto free_main_vsi;
645
646	return 0;
647
648free_main_vsi:
649	/* ice_release_vsi will free the queue maps if they were allocated */
650	ice_release_vsi(&sc->pf_vsi);
651	free(sc->all_vsi, M_ICE);
652	sc->all_vsi = NULL;
653free_intr_tracking:
654	ice_free_intr_tracking(sc);
655free_rx_qmgr:
656	ice_resmgr_destroy(&sc->rx_qmgr);
657free_tx_qmgr:
658	ice_resmgr_destroy(&sc->tx_qmgr);
659deinit_hw:
660	ice_deinit_hw(hw);
661free_pci_mapping:
662	ice_free_pci_mapping(sc);
663destroy_admin_timer:
664	mtx_lock(&sc->admin_mtx);
665	callout_stop(&sc->admin_timer);
666	mtx_unlock(&sc->admin_mtx);
667	mtx_destroy(&sc->admin_mtx);
668	return err;
669} /* ice_if_attach_pre */
670
671/**
672 * ice_attach_pre_recovery_mode - Limited driver attach_pre for FW recovery
673 * @sc: the device private softc
674 *
675 * Loads the device driver in limited Firmware Recovery mode, intended to
676 * allow users to update the firmware to attempt to recover the device.
677 *
678 * @remark We may enter recovery mode in case either (a) the firmware is
679 * detected to be in an invalid state and must be re-programmed, or (b) the
680 * driver detects that the loaded firmware has a non-compatible API version
681 * that the driver cannot operate with.
682 */
683static int
684ice_attach_pre_recovery_mode(struct ice_softc *sc)
685{
686	ice_set_state(&sc->state, ICE_STATE_RECOVERY_MODE);
687
688	/* Setup the iflib softc context */
689	ice_setup_scctx(sc);
690
691	/* Setup the PF VSI back pointer */
692	sc->pf_vsi.sc = sc;
693
694	/*
695	 * We still need to allocate MSI-X vectors since we need one vector to
696	 * run the administrative admin interrupt
697	 */
698	return ice_allocate_msix(sc);
699}
700
701/**
702 * ice_update_link_status - notify OS of link state change
703 * @sc: device private softc structure
704 * @update_media: true if we should update media even if link didn't change
705 *
706 * Called to notify iflib core of link status changes. Should be called once
707 * during attach_post, and whenever link status changes during runtime.
708 *
709 * This call only updates the currently supported media types if the link
710 * status changed, or if update_media is set to true.
711 */
712static void
713ice_update_link_status(struct ice_softc *sc, bool update_media)
714{
715	struct ice_hw *hw = &sc->hw;
716	enum ice_status status;
717
718	/* Never report link up when in recovery mode */
719	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
720		return;
721
722	/* Report link status to iflib only once each time it changes */
723	if (!ice_testandset_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED)) {
724		if (sc->link_up) { /* link is up */
725			uint64_t baudrate = ice_aq_speed_to_rate(sc->hw.port_info);
726
727			if (!(hw->port_info->phy.link_info_old.link_info & ICE_AQ_LINK_UP))
728				ice_set_default_local_lldp_mib(sc);
729
730			iflib_link_state_change(sc->ctx, LINK_STATE_UP, baudrate);
731			ice_rdma_link_change(sc, LINK_STATE_UP, baudrate);
732
733			ice_link_up_msg(sc);
734		} else { /* link is down */
735			iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
736			ice_rdma_link_change(sc, LINK_STATE_DOWN, 0);
737		}
738		update_media = true;
739	}
740
741	/* Update the supported media types */
742	if (update_media && !ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
743		status = ice_add_media_types(sc, sc->media);
744		if (status)
745			device_printf(sc->dev, "Error adding device media types: %s aq_err %s\n",
746				      ice_status_str(status),
747				      ice_aq_str(hw->adminq.sq_last_status));
748	}
749}
750
751/**
752 * ice_if_attach_post - Late device attach logic
753 * @ctx: the iflib context structure
754 *
755 * Called by iflib to finish up attaching the device. Performs any attach
756 * logic which must wait until after the Tx and Rx queues have been
757 * allocated.
758 */
759static int
760ice_if_attach_post(if_ctx_t ctx)
761{
762	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
763	if_t ifp = iflib_get_ifp(ctx);
764	enum ice_status status;
765	int err;
766
767	ASSERT_CTX_LOCKED(sc);
768
769	/* We don't yet support loading if MSI-X is not supported */
770	if (sc->scctx->isc_intr != IFLIB_INTR_MSIX) {
771		device_printf(sc->dev, "The ice driver does not support loading without MSI-X\n");
772		return (ENOTSUP);
773	}
774
775	/* The ifnet structure hasn't yet been initialized when the attach_pre
776	 * handler is called, so wait until attach_post to setup the
777	 * isc_max_frame_size.
778	 */
779
780	sc->ifp = ifp;
781	sc->scctx->isc_max_frame_size = if_getmtu(ifp) +
782		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
783
784	/*
785	 * If we are in recovery mode, only perform a limited subset of
786	 * initialization to support NVM recovery.
787	 */
788	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
789		ice_attach_post_recovery_mode(sc);
790		return (0);
791	}
792
793	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
794
795	err = ice_initialize_vsi(&sc->pf_vsi);
796	if (err) {
797		device_printf(sc->dev, "Unable to initialize Main VSI: %s\n",
798			      ice_err_str(err));
799		return err;
800	}
801
802	/* Enable FW health event reporting */
803	ice_init_health_events(sc);
804
805	/* Configure the main PF VSI for RSS */
806	err = ice_config_rss(&sc->pf_vsi);
807	if (err) {
808		device_printf(sc->dev,
809			      "Unable to configure RSS for the main VSI, err %s\n",
810			      ice_err_str(err));
811		return err;
812	}
813
814	/* Configure switch to drop transmitted LLDP and PAUSE frames */
815	err = ice_cfg_pf_ethertype_filters(sc);
816	if (err)
817		return err;
818
819	ice_get_and_print_bus_info(sc);
820
821	ice_set_link_management_mode(sc);
822
823	ice_init_saved_phy_cfg(sc);
824
825	ice_cfg_pba_num(sc);
826
827	/* Set a default value for PFC mode on attach since the FW state is unknown
828	 * before sysctl tunables are executed and it can't be queried. This fixes an
829	 * issue when loading the driver with the FW LLDP agent enabled but the FW
830	 * was previously in DSCP PFC mode.
831	 */
832	status = ice_aq_set_pfc_mode(&sc->hw, ICE_AQC_PFC_VLAN_BASED_PFC, NULL);
833	if (status != ICE_SUCCESS)
834		device_printf(sc->dev, "Setting pfc mode failed, status %s\n", ice_status_str(status));
835
836	ice_add_device_sysctls(sc);
837
838	/* Get DCBX/LLDP state and start DCBX agent */
839	ice_init_dcb_setup(sc);
840
841	/* Setup link configuration parameters */
842	ice_init_link_configuration(sc);
843	ice_update_link_status(sc, true);
844
845	/* Configure interrupt causes for the administrative interrupt */
846	ice_configure_misc_interrupts(sc);
847
848	/* Enable ITR 0 right away, so that we can handle admin interrupts */
849	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
850
851	err = ice_rdma_pf_attach(sc);
852	if (err)
853		return (err);
854
855	/* Start the admin timer */
856	mtx_lock(&sc->admin_mtx);
857	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
858	mtx_unlock(&sc->admin_mtx);
859
860	if (ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) &&
861		 !ice_test_state(&sc->state, ICE_STATE_NO_MEDIA))
862		ice_set_state(&sc->state, ICE_STATE_FIRST_INIT_LINK);
863
864	ice_clear_state(&sc->state, ICE_STATE_ATTACHING);
865
866	return 0;
867} /* ice_if_attach_post */
868
869/**
870 * ice_attach_post_recovery_mode - Limited driver attach_post for FW recovery
871 * @sc: the device private softc
872 *
873 * Performs minimal work to prepare the driver to recover an NVM in case the
874 * firmware is in recovery mode.
875 */
876static void
877ice_attach_post_recovery_mode(struct ice_softc *sc)
878{
879	/* Configure interrupt causes for the administrative interrupt */
880	ice_configure_misc_interrupts(sc);
881
882	/* Enable ITR 0 right away, so that we can handle admin interrupts */
883	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
884
885	/* Start the admin timer */
886	mtx_lock(&sc->admin_mtx);
887	callout_reset(&sc->admin_timer, hz/2, ice_admin_timer, sc);
888	mtx_unlock(&sc->admin_mtx);
889
890	ice_clear_state(&sc->state, ICE_STATE_ATTACHING);
891}
892
893/**
894 * ice_free_irqvs - Free IRQ vector memory
895 * @sc: the device private softc structure
896 *
897 * Free IRQ vector memory allocated during ice_if_msix_intr_assign.
898 */
899static void
900ice_free_irqvs(struct ice_softc *sc)
901{
902	struct ice_vsi *vsi = &sc->pf_vsi;
903	if_ctx_t ctx = sc->ctx;
904	int i;
905
906	/* If the irqvs array is NULL, then there are no vectors to free */
907	if (sc->irqvs == NULL)
908		return;
909
910	/* Free the IRQ vectors */
911	for (i = 0; i < sc->num_irq_vectors; i++)
912		iflib_irq_free(ctx, &sc->irqvs[i].irq);
913
914	/* Clear the irqv pointers */
915	for (i = 0; i < vsi->num_rx_queues; i++)
916		vsi->rx_queues[i].irqv = NULL;
917
918	for (i = 0; i < vsi->num_tx_queues; i++)
919		vsi->tx_queues[i].irqv = NULL;
920
921	/* Release the vector array memory */
922	free(sc->irqvs, M_ICE);
923	sc->irqvs = NULL;
924	sc->num_irq_vectors = 0;
925}
926
927/**
928 * ice_if_detach - Device driver detach logic
929 * @ctx: iflib context structure
930 *
931 * Perform device shutdown logic to detach the device driver.
932 *
933 * Note that there is no guarantee of the ordering of ice_if_queues_free() and
934 * ice_if_detach(). It is possible for the functions to be called in either
935 * order, and they must not assume to have a strict ordering.
936 */
937static int
938ice_if_detach(if_ctx_t ctx)
939{
940	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
941	struct ice_vsi *vsi = &sc->pf_vsi;
942	enum ice_status status;
943	int i;
944
945	ASSERT_CTX_LOCKED(sc);
946
947	/* Indicate that we're detaching */
948	ice_set_state(&sc->state, ICE_STATE_DETACHING);
949
950	/* Stop the admin timer */
951	mtx_lock(&sc->admin_mtx);
952	callout_stop(&sc->admin_timer);
953	mtx_unlock(&sc->admin_mtx);
954	mtx_destroy(&sc->admin_mtx);
955
956	/* Remove additional interfaces if they exist */
957	if (sc->mirr_if)
958		ice_destroy_mirror_interface(sc);
959	ice_rdma_pf_detach(sc);
960
961	/* Free allocated media types */
962	ifmedia_removeall(sc->media);
963
964	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
965	 * pointers. Note, the calls here and those in ice_if_queues_free()
966	 * are *BOTH* necessary, as we cannot guarantee which path will be
967	 * run first
968	 */
969	ice_vsi_del_txqs_ctx(vsi);
970	ice_vsi_del_rxqs_ctx(vsi);
971
972	/* Release MSI-X resources */
973	ice_free_irqvs(sc);
974
975	for (i = 0; i < sc->num_available_vsi; i++) {
976		if (sc->all_vsi[i])
977			ice_release_vsi(sc->all_vsi[i]);
978	}
979
980	if (sc->all_vsi) {
981		free(sc->all_vsi, M_ICE);
982		sc->all_vsi = NULL;
983	}
984
985	/* Release MSI-X memory */
986	pci_release_msi(sc->dev);
987
988	if (sc->msix_table != NULL) {
989		bus_release_resource(sc->dev, SYS_RES_MEMORY,
990				     rman_get_rid(sc->msix_table),
991				     sc->msix_table);
992		sc->msix_table = NULL;
993	}
994
995	ice_free_intr_tracking(sc);
996
997	/* Destroy the queue managers */
998	ice_resmgr_destroy(&sc->tx_qmgr);
999	ice_resmgr_destroy(&sc->rx_qmgr);
1000
1001	if (!ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1002		ice_deinit_hw(&sc->hw);
1003
1004	IFLIB_CTX_UNLOCK(sc);
1005	status = ice_reset(&sc->hw, ICE_RESET_PFR);
1006	IFLIB_CTX_LOCK(sc);
1007	if (status) {
1008		device_printf(sc->dev, "device PF reset failed, err %s\n",
1009			      ice_status_str(status));
1010	}
1011
1012	ice_free_pci_mapping(sc);
1013
1014	return 0;
1015} /* ice_if_detach */
1016
1017/**
1018 * ice_if_tx_queues_alloc - Allocate Tx queue memory
1019 * @ctx: iflib context structure
1020 * @vaddrs: virtual addresses for the queue memory
1021 * @paddrs: physical addresses for the queue memory
1022 * @ntxqs: the number of Tx queues per set (should always be 1)
1023 * @ntxqsets: the number of Tx queue sets to allocate
1024 *
1025 * Called by iflib to allocate Tx queues for the device. Allocates driver
1026 * memory to track each queue, the status arrays used for descriptor
1027 * status reporting, and Tx queue sysctls.
1028 */
1029static int
1030ice_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
1031		       int __invariant_only ntxqs, int ntxqsets)
1032{
1033	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1034	struct ice_vsi *vsi = &sc->pf_vsi;
1035	struct ice_tx_queue *txq;
1036	int err, i, j;
1037
1038	MPASS(ntxqs == 1);
1039	MPASS(sc->scctx->isc_ntxd[0] <= ICE_MAX_DESC_COUNT);
1040	ASSERT_CTX_LOCKED(sc);
1041
1042	/* Do not bother allocating queues if we're in recovery mode */
1043	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1044		return (0);
1045
1046	/* Allocate queue structure memory */
1047	if (!(vsi->tx_queues =
1048	      (struct ice_tx_queue *) malloc(sizeof(struct ice_tx_queue) * ntxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
1049		device_printf(sc->dev, "Unable to allocate Tx queue memory\n");
1050		return (ENOMEM);
1051	}
1052
1053	/* Allocate report status arrays */
1054	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1055		if (!(txq->tx_rsq =
1056		      (uint16_t *) malloc(sizeof(uint16_t) * sc->scctx->isc_ntxd[0], M_ICE, M_NOWAIT))) {
1057			device_printf(sc->dev, "Unable to allocate tx_rsq memory\n");
1058			err = ENOMEM;
1059			goto free_tx_queues;
1060		}
1061		/* Initialize report status array */
1062		for (j = 0; j < sc->scctx->isc_ntxd[0]; j++)
1063			txq->tx_rsq[j] = QIDX_INVALID;
1064	}
1065
1066	/* Assign queues from PF space to the main VSI */
1067	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap, ntxqsets);
1068	if (err) {
1069		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
1070			      ice_err_str(err));
1071		goto free_tx_queues;
1072	}
1073	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
1074
1075	/* Add Tx queue sysctls context */
1076	ice_vsi_add_txqs_ctx(vsi);
1077
1078	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1079		/* q_handle == me when only one TC */
1080		txq->me = txq->q_handle = i;
1081		txq->vsi = vsi;
1082
1083		/* store the queue size for easier access */
1084		txq->desc_count = sc->scctx->isc_ntxd[0];
1085
1086		/* get the virtual and physical address of the hardware queues */
1087		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
1088		txq->tx_base = (struct ice_tx_desc *)vaddrs[i];
1089		txq->tx_paddr = paddrs[i];
1090
1091		ice_add_txq_sysctls(txq);
1092	}
1093
1094	vsi->num_tx_queues = ntxqsets;
1095
1096	return (0);
1097
1098free_tx_queues:
1099	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
1100		if (txq->tx_rsq != NULL) {
1101			free(txq->tx_rsq, M_ICE);
1102			txq->tx_rsq = NULL;
1103		}
1104	}
1105	free(vsi->tx_queues, M_ICE);
1106	vsi->tx_queues = NULL;
1107	return err;
1108}
1109
1110/**
1111 * ice_if_rx_queues_alloc - Allocate Rx queue memory
1112 * @ctx: iflib context structure
1113 * @vaddrs: virtual addresses for the queue memory
1114 * @paddrs: physical addresses for the queue memory
1115 * @nrxqs: number of Rx queues per set (should always be 1)
1116 * @nrxqsets: number of Rx queue sets to allocate
1117 *
1118 * Called by iflib to allocate Rx queues for the device. Allocates driver
1119 * memory to track each queue, as well as sets up the Rx queue sysctls.
1120 */
1121static int
1122ice_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
1123		       int __invariant_only nrxqs, int nrxqsets)
1124{
1125	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1126	struct ice_vsi *vsi = &sc->pf_vsi;
1127	struct ice_rx_queue *rxq;
1128	int err, i;
1129
1130	MPASS(nrxqs == 1);
1131	MPASS(sc->scctx->isc_nrxd[0] <= ICE_MAX_DESC_COUNT);
1132	ASSERT_CTX_LOCKED(sc);
1133
1134	/* Do not bother allocating queues if we're in recovery mode */
1135	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1136		return (0);
1137
1138	/* Allocate queue structure memory */
1139	if (!(vsi->rx_queues =
1140	      (struct ice_rx_queue *) malloc(sizeof(struct ice_rx_queue) * nrxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
1141		device_printf(sc->dev, "Unable to allocate Rx queue memory\n");
1142		return (ENOMEM);
1143	}
1144
1145	/* Assign queues from PF space to the main VSI */
1146	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap, nrxqsets);
1147	if (err) {
1148		device_printf(sc->dev, "Unable to assign PF queues: %s\n",
1149			      ice_err_str(err));
1150		goto free_rx_queues;
1151	}
1152	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
1153
1154	/* Add Rx queue sysctls context */
1155	ice_vsi_add_rxqs_ctx(vsi);
1156
1157	for (i = 0, rxq = vsi->rx_queues; i < nrxqsets; i++, rxq++) {
1158		rxq->me = i;
1159		rxq->vsi = vsi;
1160
1161		/* store the queue size for easier access */
1162		rxq->desc_count = sc->scctx->isc_nrxd[0];
1163
1164		/* get the virtual and physical address of the hardware queues */
1165		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
1166		rxq->rx_base = (union ice_32b_rx_flex_desc *)vaddrs[i];
1167		rxq->rx_paddr = paddrs[i];
1168
1169		ice_add_rxq_sysctls(rxq);
1170	}
1171
1172	vsi->num_rx_queues = nrxqsets;
1173
1174	return (0);
1175
1176free_rx_queues:
1177	free(vsi->rx_queues, M_ICE);
1178	vsi->rx_queues = NULL;
1179	return err;
1180}
1181
1182/**
1183 * ice_if_queues_free - Free queue memory
1184 * @ctx: the iflib context structure
1185 *
1186 * Free queue memory allocated by ice_if_tx_queues_alloc() and
1187 * ice_if_rx_queues_alloc().
1188 *
1189 * There is no guarantee that ice_if_queues_free() and ice_if_detach() will be
1190 * called in the same order. It's possible for ice_if_queues_free() to be
1191 * called prior to ice_if_detach(), and vice versa.
1192 *
1193 * For this reason, the main VSI is a static member of the ice_softc, which is
1194 * not free'd until after iflib finishes calling both of these functions.
1195 *
1196 * Thus, care must be taken in how we manage the memory being freed by this
1197 * function, and in what tasks it can and must perform.
1198 */
1199static void
1200ice_if_queues_free(if_ctx_t ctx)
1201{
1202	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1203	struct ice_vsi *vsi = &sc->pf_vsi;
1204	struct ice_tx_queue *txq;
1205	int i;
1206
1207	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
1208	 * pointers. Note, the calls here and those in ice_if_detach()
1209	 * are *BOTH* necessary, as we cannot guarantee which path will be
1210	 * run first
1211	 */
1212	ice_vsi_del_txqs_ctx(vsi);
1213	ice_vsi_del_rxqs_ctx(vsi);
1214
1215	/* Release MSI-X IRQ vectors, if not yet released in ice_if_detach */
1216	ice_free_irqvs(sc);
1217
1218	if (vsi->tx_queues != NULL) {
1219		/* free the tx_rsq arrays */
1220		for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1221			if (txq->tx_rsq != NULL) {
1222				free(txq->tx_rsq, M_ICE);
1223				txq->tx_rsq = NULL;
1224			}
1225		}
1226		free(vsi->tx_queues, M_ICE);
1227		vsi->tx_queues = NULL;
1228		vsi->num_tx_queues = 0;
1229	}
1230	if (vsi->rx_queues != NULL) {
1231		free(vsi->rx_queues, M_ICE);
1232		vsi->rx_queues = NULL;
1233		vsi->num_rx_queues = 0;
1234	}
1235}
1236
1237/**
1238 * ice_msix_que - Fast interrupt handler for MSI-X receive queues
1239 * @arg: The Rx queue memory
1240 *
1241 * Interrupt filter function for iflib MSI-X interrupts. Called by iflib when
1242 * an MSI-X interrupt for a given queue is triggered. Currently this just asks
1243 * iflib to schedule the main Rx thread.
1244 */
1245static int
1246ice_msix_que(void *arg)
1247{
1248	struct ice_rx_queue __unused *rxq = (struct ice_rx_queue *)arg;
1249
1250	/* TODO: dynamic ITR algorithm?? */
1251
1252	return (FILTER_SCHEDULE_THREAD);
1253}
1254
1255/**
1256 * ice_msix_admin - Fast interrupt handler for MSI-X admin interrupt
1257 * @arg: pointer to device softc memory
1258 *
1259 * Called by iflib when an administrative interrupt occurs. Should perform any
1260 * fast logic for handling the interrupt cause, and then indicate whether the
1261 * admin task needs to be queued.
1262 */
1263static int
1264ice_msix_admin(void *arg)
1265{
1266	struct ice_softc *sc = (struct ice_softc *)arg;
1267	struct ice_hw *hw = &sc->hw;
1268	device_t dev = sc->dev;
1269	u32 oicr;
1270
1271	/* There is no safe way to modify the enabled miscellaneous causes of
1272	 * the OICR vector at runtime, as doing so would be prone to race
1273	 * conditions. Reading PFINT_OICR will unmask the associated interrupt
1274	 * causes and allow future interrupts to occur. The admin interrupt
1275	 * vector will not be re-enabled until after we exit this function,
1276	 * but any delayed tasks must be resilient against possible "late
1277	 * arrival" interrupts that occur while we're already handling the
1278	 * task. This is done by using state bits and serializing these
1279	 * delayed tasks via the admin status task function.
1280	 */
1281	oicr = rd32(hw, PFINT_OICR);
1282
1283	/* Processing multiple controlq interrupts on a single vector does not
1284	 * provide an indication of which controlq triggered the interrupt.
1285	 * We might try reading the INTEVENT bit of the respective PFINT_*_CTL
1286	 * registers. However, the INTEVENT bit is not guaranteed to be set as
1287	 * it gets automatically cleared when the hardware acknowledges the
1288	 * interrupt.
1289	 *
1290	 * This means we don't really have a good indication of whether or
1291	 * which controlq triggered this interrupt. We'll just notify the
1292	 * admin task that it should check all the controlqs.
1293	 */
1294	ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
1295
1296	if (oicr & PFINT_OICR_VFLR_M) {
1297		ice_set_state(&sc->state, ICE_STATE_VFLR_PENDING);
1298	}
1299
1300	if (oicr & PFINT_OICR_MAL_DETECT_M) {
1301		ice_set_state(&sc->state, ICE_STATE_MDD_PENDING);
1302	}
1303
1304	if (oicr & PFINT_OICR_GRST_M) {
1305		u32 reset;
1306
1307		reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
1308			GLGEN_RSTAT_RESET_TYPE_S;
1309
1310		if (reset == ICE_RESET_CORER)
1311			sc->soft_stats.corer_count++;
1312		else if (reset == ICE_RESET_GLOBR)
1313			sc->soft_stats.globr_count++;
1314		else
1315			sc->soft_stats.empr_count++;
1316
1317		/* There are a couple of bits at play for handling resets.
1318		 * First, the ICE_STATE_RESET_OICR_RECV bit is used to
1319		 * indicate that the driver has received an OICR with a reset
1320		 * bit active, indicating that a CORER/GLOBR/EMPR is about to
1321		 * happen. Second, we set hw->reset_ongoing to indicate that
1322		 * the hardware is in reset. We will set this back to false as
1323		 * soon as the driver has determined that the hardware is out
1324		 * of reset.
1325		 *
1326		 * If the driver wishes to trigger a request, it can set one of
1327		 * the ICE_STATE_RESET_*_REQ bits, which will trigger the
1328		 * correct type of reset.
1329		 */
1330		if (!ice_testandset_state(&sc->state, ICE_STATE_RESET_OICR_RECV)) {
1331			hw->reset_ongoing = true;
1332			/*
1333			 * During the NVM update process, there is a driver reset and link
1334			 * goes down and then up. The below if-statement prevents a second
1335			 * link flap from occurring in ice_if_init().
1336			 */
1337			if (if_getflags(sc->ifp) & IFF_UP)
1338				ice_set_state(&sc->state, ICE_STATE_FIRST_INIT_LINK);
1339		}
1340	}
1341
1342	if (oicr & PFINT_OICR_ECC_ERR_M) {
1343		device_printf(dev, "ECC Error detected!\n");
1344		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1345	}
1346
1347	if (oicr & (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M)) {
1348		if (oicr & PFINT_OICR_HMC_ERR_M)
1349			/* Log the HMC errors */
1350			ice_log_hmc_error(hw, dev);
1351		ice_rdma_notify_pe_intr(sc, oicr);
1352	}
1353
1354	if (oicr & PFINT_OICR_PCI_EXCEPTION_M) {
1355		device_printf(dev, "PCI Exception detected!\n");
1356		ice_set_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
1357	}
1358
1359	return (FILTER_SCHEDULE_THREAD);
1360}
1361
1362/**
1363 * ice_allocate_msix - Allocate MSI-X vectors for the interface
1364 * @sc: the device private softc
1365 *
1366 * Map the MSI-X bar, and then request MSI-X vectors in a two-stage process.
1367 *
1368 * First, determine a suitable total number of vectors based on the number
1369 * of CPUs, RSS buckets, the administrative vector, and other demands such as
1370 * RDMA.
1371 *
1372 * Request the desired amount of vectors, and see how many we obtain. If we
1373 * don't obtain as many as desired, reduce the demands by lowering the number
1374 * of requested queues or reducing the demand from other features such as
1375 * RDMA.
1376 *
1377 * @remark This function is required because the driver sets the
1378 * IFLIB_SKIP_MSIX flag indicating that the driver will manage MSI-X vectors
1379 * manually.
1380 *
1381 * @remark This driver will only use MSI-X vectors. If this is not possible,
1382 * neither MSI or legacy interrupts will be tried.
1383 *
1384 * @remark if it exists, os_imgr is initialized here for keeping track of
1385 * the assignments of extra MSIX vectors.
1386 *
1387 * @post on success this function must set the following scctx parameters:
1388 * isc_vectors, isc_nrxqsets, isc_ntxqsets, and isc_intr.
1389 *
1390 * @returns zero on success or an error code on failure.
1391 */
1392static int
1393ice_allocate_msix(struct ice_softc *sc)
1394{
1395	bool iflib_override_queue_count = false;
1396	if_softc_ctx_t scctx = sc->scctx;
1397	device_t dev = sc->dev;
1398	cpuset_t cpus;
1399	int bar, queues, vectors, requested;
1400	int err = 0;
1401	int rdma;
1402
1403	/* Allocate the MSI-X bar */
1404	bar = scctx->isc_msix_bar;
1405	sc->msix_table = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE);
1406	if (!sc->msix_table) {
1407		device_printf(dev, "Unable to map MSI-X table\n");
1408		return (ENOMEM);
1409	}
1410
1411	/* Check if the iflib queue count sysctls have been set */
1412	if (sc->ifc_sysctl_ntxqs || sc->ifc_sysctl_nrxqs)
1413		iflib_override_queue_count = true;
1414
1415	err = bus_get_cpus(dev, INTR_CPUS, sizeof(cpus), &cpus);
1416	if (err) {
1417		device_printf(dev, "%s: Unable to fetch the CPU list: %s\n",
1418			      __func__, ice_err_str(err));
1419		CPU_COPY(&all_cpus, &cpus);
1420	}
1421
1422	/* Attempt to mimic behavior of iflib_msix_init */
1423	if (iflib_override_queue_count) {
1424		/*
1425		 * If the override sysctls have been set, limit the queues to
1426		 * the number of logical CPUs.
1427		 */
1428		queues = mp_ncpus;
1429	} else {
1430		/*
1431		 * Otherwise, limit the queue count to the CPUs associated
1432		 * with the NUMA node the device is associated with.
1433		 */
1434		queues = CPU_COUNT(&cpus);
1435	}
1436
1437	/* Clamp to the number of RSS buckets */
1438	queues = imin(queues, rss_getnumbuckets());
1439
1440	/*
1441	 * Clamp the number of queue pairs to the minimum of the requested Tx
1442	 * and Rx queues.
1443	 */
1444	queues = imin(queues, sc->ifc_sysctl_ntxqs ?: scctx->isc_ntxqsets);
1445	queues = imin(queues, sc->ifc_sysctl_nrxqs ?: scctx->isc_nrxqsets);
1446
1447	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RDMA)) {
1448		/*
1449		 * Choose a number of RDMA vectors based on the number of CPUs
1450		 * up to a maximum
1451		 */
1452		rdma = min(CPU_COUNT(&cpus), ICE_RDMA_MAX_MSIX);
1453
1454		/* Further limit by the user configurable tunable */
1455		rdma = min(rdma, ice_rdma_max_msix);
1456	} else {
1457		rdma = 0;
1458	}
1459
1460	/*
1461	 * Determine the number of vectors to request. Note that we also need
1462	 * to allocate one vector for administrative tasks.
1463	 */
1464	requested = rdma + queues + 1;
1465	/* Add extra vectors requested by the user for later subinterface
1466	 * creation.
1467	 */
1468	if_ctx_t ctx = sc->ctx;
1469	u32 extra_vectors = iflib_get_extra_msix_vectors_sysctl(ctx);
1470	requested += extra_vectors;
1471
1472	vectors = requested;
1473	err = pci_alloc_msix(dev, &vectors);
1474	if (err) {
1475		device_printf(dev, "Failed to allocate %d MSI-X vectors, err %s\n",
1476			      vectors, ice_err_str(err));
1477		goto err_free_msix_table;
1478	}
1479
1480	/* If we don't receive enough vectors, reduce demands */
1481	if (vectors < requested) {
1482		int diff = requested - vectors;
1483
1484		device_printf(dev, "Requested %d MSI-X vectors, but got only %d\n",
1485			      requested, vectors);
1486
1487		diff += extra_vectors;
1488		extra_vectors = 0;
1489		/*
1490		 * The OS didn't grant us the requested number of vectors.
1491		 * Check to see if we can reduce demands by limiting the
1492		 * number of vectors allocated to certain features.
1493		 */
1494
1495		if (rdma >= diff) {
1496			/* Reduce the number of RDMA vectors we reserve */
1497			rdma -= diff;
1498			diff = 0;
1499		} else {
1500			/* Disable RDMA and reduce the difference */
1501			ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
1502			diff -= rdma;
1503			rdma = 0;
1504		}
1505
1506		/*
1507		 * If we still have a difference, we need to reduce the number
1508		 * of queue pairs.
1509		 *
1510		 * However, we still need at least one vector for the admin
1511		 * interrupt and one queue pair.
1512		 */
1513		if (queues <= diff) {
1514			device_printf(dev, "Unable to allocate sufficient MSI-X vectors\n");
1515			err = (ERANGE);
1516			goto err_pci_release_msi;
1517		}
1518
1519		queues -= diff;
1520	}
1521
1522	device_printf(dev, "Using %d Tx and Rx queues\n", queues);
1523	if (rdma)
1524		device_printf(dev, "Reserving %d MSI-X interrupts for iRDMA\n",
1525			      rdma);
1526	device_printf(dev, "Using MSI-X interrupts with %d vectors\n",
1527		      vectors);
1528
1529	/* Split resulting vectors back into requested splits */
1530	scctx->isc_vectors = vectors;
1531	scctx->isc_nrxqsets = queues;
1532	scctx->isc_ntxqsets = queues;
1533	scctx->isc_intr = IFLIB_INTR_MSIX;
1534
1535	sc->irdma_vectors = rdma;
1536
1537	/* Interrupt allocation tracking isn't required in recovery mode,
1538	 * since neither RDMA nor VFs are enabled.
1539	 */
1540	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1541		return (0);
1542
1543	/* Keep track of which interrupt indices are being used for what */
1544	sc->lan_vectors = vectors - rdma;
1545	sc->lan_vectors -= extra_vectors;
1546	err = ice_resmgr_assign_contiguous(&sc->dev_imgr, sc->pf_imap, sc->lan_vectors);
1547	if (err) {
1548		device_printf(dev, "Unable to assign PF interrupt mapping: %s\n",
1549			      ice_err_str(err));
1550		goto err_pci_release_msi;
1551	}
1552	err = ice_resmgr_assign_contiguous(&sc->dev_imgr, sc->rdma_imap, rdma);
1553	if (err) {
1554		device_printf(dev, "Unable to assign PF RDMA interrupt mapping: %s\n",
1555			      ice_err_str(err));
1556		goto err_release_pf_imap;
1557	}
1558	sc->extra_vectors = extra_vectors;
1559	/* Setup another resource manager to track the assignments of extra OS
1560	 * vectors. These OS interrupt allocations don't need to be contiguous,
1561	 * unlike the ones that come from the device.
1562	 */
1563	err = ice_resmgr_init(&sc->os_imgr, sc->extra_vectors);
1564	if (err) {
1565		device_printf(dev, "Unable to initialize OS extra interrupt manager: %s\n",
1566			      ice_err_str(err));
1567		ice_resmgr_release_map(&sc->dev_imgr, sc->rdma_imap,
1568					    rdma);
1569		goto err_release_pf_imap;
1570	}
1571	return (0);
1572
1573err_release_pf_imap:
1574	ice_resmgr_release_map(&sc->dev_imgr, sc->pf_imap,
1575				    sc->lan_vectors);
1576err_pci_release_msi:
1577	pci_release_msi(dev);
1578err_free_msix_table:
1579	if (sc->msix_table != NULL) {
1580		bus_release_resource(sc->dev, SYS_RES_MEMORY,
1581				rman_get_rid(sc->msix_table),
1582				sc->msix_table);
1583		sc->msix_table = NULL;
1584	}
1585
1586	return (err);
1587}
1588
1589/**
1590 * ice_if_msix_intr_assign - Assign MSI-X interrupt vectors to queues
1591 * @ctx: the iflib context structure
1592 * @msix: the number of vectors we were assigned
1593 *
1594 * Called by iflib to assign MSI-X vectors to queues. Currently requires that
1595 * we get at least the same number of vectors as we have queues, and that we
1596 * always have the same number of Tx and Rx queues.
1597 *
1598 * Tx queues use a softirq instead of using their own hardware interrupt.
1599 */
1600static int
1601ice_if_msix_intr_assign(if_ctx_t ctx, int msix)
1602{
1603	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1604	struct ice_vsi *vsi = &sc->pf_vsi;
1605	int err, i, vector;
1606
1607	ASSERT_CTX_LOCKED(sc);
1608
1609	if (vsi->num_rx_queues != vsi->num_tx_queues) {
1610		device_printf(sc->dev,
1611			      "iflib requested %d Tx queues, and %d Rx queues, but the driver isn't able to support a differing number of Tx and Rx queues\n",
1612			      vsi->num_tx_queues, vsi->num_rx_queues);
1613		return (EOPNOTSUPP);
1614	}
1615
1616	if (msix < (vsi->num_rx_queues + 1)) {
1617		device_printf(sc->dev,
1618			      "Not enough MSI-X vectors to assign one vector to each queue pair\n");
1619		return (EOPNOTSUPP);
1620	}
1621
1622	/* Save the number of vectors for future use */
1623	sc->num_irq_vectors = vsi->num_rx_queues + 1;
1624
1625	/* Allocate space to store the IRQ vector data */
1626	if (!(sc->irqvs =
1627	      (struct ice_irq_vector *) malloc(sizeof(struct ice_irq_vector) * (sc->num_irq_vectors),
1628					       M_ICE, M_NOWAIT))) {
1629		device_printf(sc->dev,
1630			      "Unable to allocate irqv memory\n");
1631		return (ENOMEM);
1632	}
1633
1634	/* Administrative interrupt events will use vector 0 */
1635	err = iflib_irq_alloc_generic(ctx, &sc->irqvs[0].irq, 1, IFLIB_INTR_ADMIN,
1636				      ice_msix_admin, sc, 0, "admin");
1637	if (err) {
1638		device_printf(sc->dev,
1639			      "Failed to register Admin queue handler: %s\n",
1640			      ice_err_str(err));
1641		goto free_irqvs;
1642	}
1643	sc->irqvs[0].me = 0;
1644
1645	/* Do not allocate queue interrupts when in recovery mode */
1646	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1647		return (0);
1648
1649	int rid;
1650	for (i = 0, vector = 1; i < vsi->num_rx_queues; i++, vector++) {
1651		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
1652		struct ice_tx_queue *txq = &vsi->tx_queues[i];
1653		char irq_name[16];
1654
1655		rid = vector + 1;
1656
1657		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
1658		err = iflib_irq_alloc_generic(ctx, &sc->irqvs[vector].irq, rid,
1659					      IFLIB_INTR_RXTX, ice_msix_que,
1660					      rxq, rxq->me, irq_name);
1661		if (err) {
1662			device_printf(sc->dev,
1663				      "Failed to allocate q int %d err: %s\n",
1664				      i, ice_err_str(err));
1665			vector--;
1666			i--;
1667			goto fail;
1668		}
1669		sc->irqvs[vector].me = vector;
1670		rxq->irqv = &sc->irqvs[vector];
1671
1672		bzero(irq_name, sizeof(irq_name));
1673
1674		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
1675		iflib_softirq_alloc_generic(ctx, &sc->irqvs[vector].irq,
1676					    IFLIB_INTR_TX, txq,
1677					    txq->me, irq_name);
1678		txq->irqv = &sc->irqvs[vector];
1679	}
1680
1681	/* For future interrupt assignments */
1682	sc->last_rid = rid + sc->irdma_vectors;
1683
1684	return (0);
1685fail:
1686	for (; i >= 0; i--, vector--)
1687		iflib_irq_free(ctx, &sc->irqvs[vector].irq);
1688	iflib_irq_free(ctx, &sc->irqvs[0].irq);
1689free_irqvs:
1690	free(sc->irqvs, M_ICE);
1691	sc->irqvs = NULL;
1692	return err;
1693}
1694
1695/**
1696 * ice_if_mtu_set - Set the device MTU
1697 * @ctx: iflib context structure
1698 * @mtu: the MTU requested
1699 *
1700 * Called by iflib to configure the device's Maximum Transmission Unit (MTU).
1701 *
1702 * @pre assumes the caller holds the iflib CTX lock
1703 */
1704static int
1705ice_if_mtu_set(if_ctx_t ctx, uint32_t mtu)
1706{
1707	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1708
1709	ASSERT_CTX_LOCKED(sc);
1710
1711	/* Do not support configuration when in recovery mode */
1712	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1713		return (ENOSYS);
1714
1715	if (mtu < ICE_MIN_MTU || mtu > ICE_MAX_MTU)
1716		return (EINVAL);
1717
1718	sc->scctx->isc_max_frame_size = mtu +
1719		ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
1720
1721	sc->pf_vsi.max_frame_size = sc->scctx->isc_max_frame_size;
1722
1723	return (0);
1724}
1725
1726/**
1727 * ice_if_intr_enable - Enable device interrupts
1728 * @ctx: iflib context structure
1729 *
1730 * Called by iflib to request enabling device interrupts.
1731 */
1732static void
1733ice_if_intr_enable(if_ctx_t ctx)
1734{
1735	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1736	struct ice_vsi *vsi = &sc->pf_vsi;
1737	struct ice_hw *hw = &sc->hw;
1738
1739	ASSERT_CTX_LOCKED(sc);
1740
1741	/* Enable ITR 0 */
1742	ice_enable_intr(hw, sc->irqvs[0].me);
1743
1744	/* Do not enable queue interrupts in recovery mode */
1745	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1746		return;
1747
1748	/* Enable all queue interrupts */
1749	for (int i = 0; i < vsi->num_rx_queues; i++)
1750		ice_enable_intr(hw, vsi->rx_queues[i].irqv->me);
1751}
1752
1753/**
1754 * ice_if_intr_disable - Disable device interrupts
1755 * @ctx: iflib context structure
1756 *
1757 * Called by iflib to request disabling device interrupts.
1758 */
1759static void
1760ice_if_intr_disable(if_ctx_t ctx)
1761{
1762	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1763	struct ice_hw *hw = &sc->hw;
1764	unsigned int i;
1765
1766	ASSERT_CTX_LOCKED(sc);
1767
1768	/* IFDI_INTR_DISABLE may be called prior to interrupts actually being
1769	 * assigned to queues. Instead of assuming that the interrupt
1770	 * assignment in the rx_queues structure is valid, just disable all
1771	 * possible interrupts
1772	 *
1773	 * Note that we choose not to disable ITR 0 because this handles the
1774	 * AdminQ interrupts, and we want to keep processing these even when
1775	 * the interface is offline.
1776	 */
1777	for (i = 1; i < hw->func_caps.common_cap.num_msix_vectors; i++)
1778		ice_disable_intr(hw, i);
1779}
1780
1781/**
1782 * ice_if_rx_queue_intr_enable - Enable a specific Rx queue interrupt
1783 * @ctx: iflib context structure
1784 * @rxqid: the Rx queue to enable
1785 *
1786 * Enable a specific Rx queue interrupt.
1787 *
1788 * This function is not protected by the iflib CTX lock.
1789 */
1790static int
1791ice_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid)
1792{
1793	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1794	struct ice_vsi *vsi = &sc->pf_vsi;
1795	struct ice_hw *hw = &sc->hw;
1796
1797	/* Do not enable queue interrupts in recovery mode */
1798	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1799		return (ENOSYS);
1800
1801	ice_enable_intr(hw, vsi->rx_queues[rxqid].irqv->me);
1802	return (0);
1803}
1804
1805/**
1806 * ice_if_tx_queue_intr_enable - Enable a specific Tx queue interrupt
1807 * @ctx: iflib context structure
1808 * @txqid: the Tx queue to enable
1809 *
1810 * Enable a specific Tx queue interrupt.
1811 *
1812 * This function is not protected by the iflib CTX lock.
1813 */
1814static int
1815ice_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid)
1816{
1817	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1818	struct ice_vsi *vsi = &sc->pf_vsi;
1819	struct ice_hw *hw = &sc->hw;
1820
1821	/* Do not enable queue interrupts in recovery mode */
1822	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1823		return (ENOSYS);
1824
1825	ice_enable_intr(hw, vsi->tx_queues[txqid].irqv->me);
1826	return (0);
1827}
1828
1829/**
1830 * ice_set_default_promisc_mask - Set default config for promisc settings
1831 * @promisc_mask: bitmask to setup
1832 *
1833 * The ice_(set|clear)_vsi_promisc() function expects a mask of promiscuous
1834 * modes to operate on. The mask used in here is the default one for the
1835 * driver, where promiscuous is enabled/disabled for all types of
1836 * non-VLAN-tagged/VLAN 0 traffic.
1837 */
1838static void
1839ice_set_default_promisc_mask(ice_bitmap_t *promisc_mask)
1840{
1841	ice_zero_bitmap(promisc_mask, ICE_PROMISC_MAX);
1842	ice_set_bit(ICE_PROMISC_UCAST_TX, promisc_mask);
1843	ice_set_bit(ICE_PROMISC_UCAST_RX, promisc_mask);
1844	ice_set_bit(ICE_PROMISC_MCAST_TX, promisc_mask);
1845	ice_set_bit(ICE_PROMISC_MCAST_RX, promisc_mask);
1846}
1847
1848/**
1849 * ice_if_promisc_set - Set device promiscuous mode
1850 * @ctx: iflib context structure
1851 * @flags: promiscuous flags to configure
1852 *
1853 * Called by iflib to configure device promiscuous mode.
1854 *
1855 * @remark Calls to this function will always overwrite the previous setting
1856 */
1857static int
1858ice_if_promisc_set(if_ctx_t ctx, int flags)
1859{
1860	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1861	struct ice_hw *hw = &sc->hw;
1862	device_t dev = sc->dev;
1863	enum ice_status status;
1864	bool promisc_enable = flags & IFF_PROMISC;
1865	bool multi_enable = flags & IFF_ALLMULTI;
1866	ice_declare_bitmap(promisc_mask, ICE_PROMISC_MAX);
1867
1868	/* Do not support configuration when in recovery mode */
1869	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1870		return (ENOSYS);
1871
1872	ice_set_default_promisc_mask(promisc_mask);
1873
1874	if (multi_enable)
1875		return (EOPNOTSUPP);
1876
1877	if (promisc_enable) {
1878		status = ice_set_vsi_promisc(hw, sc->pf_vsi.idx,
1879					     promisc_mask, 0);
1880		if (status && status != ICE_ERR_ALREADY_EXISTS) {
1881			device_printf(dev,
1882				      "Failed to enable promiscuous mode for PF VSI, err %s aq_err %s\n",
1883				      ice_status_str(status),
1884				      ice_aq_str(hw->adminq.sq_last_status));
1885			return (EIO);
1886		}
1887	} else {
1888		status = ice_clear_vsi_promisc(hw, sc->pf_vsi.idx,
1889					       promisc_mask, 0);
1890		if (status) {
1891			device_printf(dev,
1892				      "Failed to disable promiscuous mode for PF VSI, err %s aq_err %s\n",
1893				      ice_status_str(status),
1894				      ice_aq_str(hw->adminq.sq_last_status));
1895			return (EIO);
1896		}
1897	}
1898
1899	return (0);
1900}
1901
1902/**
1903 * ice_if_media_change - Change device media
1904 * @ctx: device ctx structure
1905 *
1906 * Called by iflib when a media change is requested. This operation is not
1907 * supported by the hardware, so we just return an error code.
1908 */
1909static int
1910ice_if_media_change(if_ctx_t ctx)
1911{
1912	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1913
1914	device_printf(sc->dev, "Media change is not supported.\n");
1915	return (ENODEV);
1916}
1917
1918/**
1919 * ice_if_media_status - Report current device media
1920 * @ctx: iflib context structure
1921 * @ifmr: ifmedia request structure to update
1922 *
1923 * Updates the provided ifmr with current device media status, including link
1924 * status and media type.
1925 */
1926static void
1927ice_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr)
1928{
1929	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
1930	struct ice_link_status *li = &sc->hw.port_info->phy.link_info;
1931
1932	ifmr->ifm_status = IFM_AVALID;
1933	ifmr->ifm_active = IFM_ETHER;
1934
1935	/* Never report link up or media types when in recovery mode */
1936	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
1937		return;
1938
1939	if (!sc->link_up)
1940		return;
1941
1942	ifmr->ifm_status |= IFM_ACTIVE;
1943	ifmr->ifm_active |= IFM_FDX;
1944
1945	if (li->phy_type_low)
1946		ifmr->ifm_active |= ice_get_phy_type_low(li->phy_type_low);
1947	else if (li->phy_type_high)
1948		ifmr->ifm_active |= ice_get_phy_type_high(li->phy_type_high);
1949	else
1950		ifmr->ifm_active |= IFM_UNKNOWN;
1951
1952	/* Report flow control status as well */
1953	if (li->an_info & ICE_AQ_LINK_PAUSE_TX)
1954		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
1955	if (li->an_info & ICE_AQ_LINK_PAUSE_RX)
1956		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
1957}
1958
1959/**
1960 * ice_init_tx_tracking - Initialize Tx queue software tracking values
1961 * @vsi: the VSI to initialize
1962 *
1963 * Initialize Tx queue software tracking values, including the Report Status
1964 * queue, and related software tracking values.
1965 */
1966static void
1967ice_init_tx_tracking(struct ice_vsi *vsi)
1968{
1969	struct ice_tx_queue *txq;
1970	size_t j;
1971	int i;
1972
1973	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
1974
1975		txq->tx_rs_cidx = txq->tx_rs_pidx = 0;
1976
1977		/* Initialize the last processed descriptor to be the end of
1978		 * the ring, rather than the start, so that we avoid an
1979		 * off-by-one error in ice_ift_txd_credits_update for the
1980		 * first packet.
1981		 */
1982		txq->tx_cidx_processed = txq->desc_count - 1;
1983
1984		for (j = 0; j < txq->desc_count; j++)
1985			txq->tx_rsq[j] = QIDX_INVALID;
1986	}
1987}
1988
1989/**
1990 * ice_update_rx_mbuf_sz - Update the Rx buffer size for all queues
1991 * @sc: the device softc
1992 *
1993 * Called to update the Rx queue mbuf_sz parameter for configuring the receive
1994 * buffer sizes when programming hardware.
1995 */
1996static void
1997ice_update_rx_mbuf_sz(struct ice_softc *sc)
1998{
1999	uint32_t mbuf_sz = iflib_get_rx_mbuf_sz(sc->ctx);
2000	struct ice_vsi *vsi = &sc->pf_vsi;
2001
2002	MPASS(mbuf_sz <= UINT16_MAX);
2003	vsi->mbuf_sz = mbuf_sz;
2004}
2005
2006/**
2007 * ice_if_init - Initialize the device
2008 * @ctx: iflib ctx structure
2009 *
2010 * Called by iflib to bring the device up, i.e. ifconfig ice0 up. Initializes
2011 * device filters and prepares the Tx and Rx engines.
2012 *
2013 * @pre assumes the caller holds the iflib CTX lock
2014 */
2015static void
2016ice_if_init(if_ctx_t ctx)
2017{
2018	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
2019	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2020	device_t dev = sc->dev;
2021	int err;
2022
2023	ASSERT_CTX_LOCKED(sc);
2024
2025	/*
2026	 * We've seen an issue with 11.3/12.1 where sideband routines are
2027	 * called after detach is called.  This would call routines after
2028	 * if_stop, causing issues with the teardown process.  This has
2029	 * seemingly been fixed in STABLE snapshots, but it seems like a
2030	 * good idea to have this guard here regardless.
2031	 */
2032	if (ice_driver_is_detaching(sc))
2033		return;
2034
2035	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2036		return;
2037
2038	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
2039		device_printf(sc->dev, "request to start interface cannot be completed as the device failed to reset\n");
2040		return;
2041	}
2042
2043	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
2044		device_printf(sc->dev, "request to start interface while device is prepared for impending reset\n");
2045		return;
2046	}
2047
2048	ice_update_rx_mbuf_sz(sc);
2049
2050	/* Update the MAC address... User might use a LAA */
2051	err = ice_update_laa_mac(sc);
2052	if (err) {
2053		device_printf(dev,
2054			      "LAA address change failed, err %s\n",
2055			      ice_err_str(err));
2056		return;
2057	}
2058
2059	/* Initialize software Tx tracking values */
2060	ice_init_tx_tracking(&sc->pf_vsi);
2061
2062	err = ice_cfg_vsi_for_tx(&sc->pf_vsi);
2063	if (err) {
2064		device_printf(dev,
2065			      "Unable to configure the main VSI for Tx: %s\n",
2066			      ice_err_str(err));
2067		return;
2068	}
2069
2070	err = ice_cfg_vsi_for_rx(&sc->pf_vsi);
2071	if (err) {
2072		device_printf(dev,
2073			      "Unable to configure the main VSI for Rx: %s\n",
2074			      ice_err_str(err));
2075		goto err_cleanup_tx;
2076	}
2077
2078	err = ice_control_all_rx_queues(&sc->pf_vsi, true);
2079	if (err) {
2080		device_printf(dev,
2081			      "Unable to enable Rx rings for transmit: %s\n",
2082			      ice_err_str(err));
2083		goto err_cleanup_tx;
2084	}
2085
2086	err = ice_cfg_pf_default_mac_filters(sc);
2087	if (err) {
2088		device_printf(dev,
2089			      "Unable to configure default MAC filters: %s\n",
2090			      ice_err_str(err));
2091		goto err_stop_rx;
2092	}
2093
2094	/* We use software interrupts for Tx, so we only program the hardware
2095	 * interrupts for Rx.
2096	 */
2097	ice_configure_all_rxq_interrupts(&sc->pf_vsi);
2098	ice_configure_rx_itr(&sc->pf_vsi);
2099
2100	/* Configure promiscuous mode */
2101	ice_if_promisc_set(ctx, if_getflags(sc->ifp));
2102
2103	if (!ice_testandclear_state(&sc->state, ICE_STATE_FIRST_INIT_LINK))
2104		if (!sc->link_up && ((if_getflags(sc->ifp) & IFF_UP) ||
2105			 ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN)))
2106			ice_set_link(sc, true);
2107
2108	ice_rdma_pf_init(sc);
2109
2110	ice_set_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED);
2111
2112	if (sc->mirr_if && ice_testandclear_state(&mif->state, ICE_STATE_SUBIF_NEEDS_REINIT)) {
2113		ice_clear_state(&mif->state, ICE_STATE_DRIVER_INITIALIZED);
2114		iflib_request_reset(sc->mirr_if->subctx);
2115		iflib_admin_intr_deferred(sc->mirr_if->subctx);
2116	}
2117
2118	return;
2119
2120err_stop_rx:
2121	ice_control_all_rx_queues(&sc->pf_vsi, false);
2122err_cleanup_tx:
2123	ice_vsi_disable_tx(&sc->pf_vsi);
2124}
2125
2126/**
2127 * ice_poll_for_media_avail - Re-enable link if media is detected
2128 * @sc: device private structure
2129 *
2130 * Intended to be called from the driver's timer function, this function
2131 * sends the Get Link Status AQ command and re-enables HW link if the
2132 * command says that media is available.
2133 *
2134 * If the driver doesn't have the "NO_MEDIA" state set, then this does nothing,
2135 * since media removal events are supposed to be sent to the driver through
2136 * a link status event.
2137 */
2138static void
2139ice_poll_for_media_avail(struct ice_softc *sc)
2140{
2141	struct ice_hw *hw = &sc->hw;
2142	struct ice_port_info *pi = hw->port_info;
2143
2144	if (ice_test_state(&sc->state, ICE_STATE_NO_MEDIA)) {
2145		pi->phy.get_link_info = true;
2146		ice_get_link_status(pi, &sc->link_up);
2147
2148		if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
2149			enum ice_status status;
2150
2151			/* Re-enable link and re-apply user link settings */
2152			if (ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) ||
2153			    (if_getflags(sc->ifp) & IFF_UP)) {
2154				ice_apply_saved_phy_cfg(sc, ICE_APPLY_LS_FEC_FC);
2155
2156				/* Update the OS about changes in media capability */
2157				status = ice_add_media_types(sc, sc->media);
2158				if (status)
2159					device_printf(sc->dev,
2160					    "Error adding device media types: %s aq_err %s\n",
2161					    ice_status_str(status),
2162					    ice_aq_str(hw->adminq.sq_last_status));
2163			}
2164
2165			ice_clear_state(&sc->state, ICE_STATE_NO_MEDIA);
2166		}
2167	}
2168}
2169
2170/**
2171 * ice_if_timer - called by iflib periodically
2172 * @ctx: iflib ctx structure
2173 * @qid: the queue this timer was called for
2174 *
2175 * This callback is triggered by iflib periodically. We use it to update the
2176 * hw statistics.
2177 *
2178 * @remark this function is not protected by the iflib CTX lock.
2179 */
2180static void
2181ice_if_timer(if_ctx_t ctx, uint16_t qid)
2182{
2183	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2184	uint64_t prev_link_xoff_rx = sc->stats.cur.link_xoff_rx;
2185
2186	if (qid != 0)
2187		return;
2188
2189	/* Do not attempt to update stats when in recovery mode */
2190	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2191		return;
2192
2193	/* Update device statistics */
2194	ice_update_pf_stats(sc);
2195
2196	/*
2197	 * For proper watchdog management, the iflib stack needs to know if
2198	 * we've been paused during the last interval. Check if the
2199	 * link_xoff_rx stat changed, and set the isc_pause_frames, if so.
2200	 */
2201	if (sc->stats.cur.link_xoff_rx != prev_link_xoff_rx)
2202		sc->scctx->isc_pause_frames = 1;
2203
2204	/* Update the primary VSI stats */
2205	ice_update_vsi_hw_stats(&sc->pf_vsi);
2206
2207	/* Update mirror VSI stats */
2208	if (sc->mirr_if && sc->mirr_if->if_attached)
2209		ice_update_vsi_hw_stats(sc->mirr_if->vsi);
2210}
2211
2212/**
2213 * ice_admin_timer - called periodically to trigger the admin task
2214 * @arg: callout(9) argument pointing to the device private softc structure
2215 *
2216 * Timer function used as part of a callout(9) timer that will periodically
2217 * trigger the admin task, even when the interface is down.
2218 *
2219 * @remark this function is not called by iflib and is not protected by the
2220 * iflib CTX lock.
2221 *
2222 * @remark because this is a callout function, it cannot sleep and should not
2223 * attempt taking the iflib CTX lock.
2224 */
2225static void
2226ice_admin_timer(void *arg)
2227{
2228	struct ice_softc *sc = (struct ice_softc *)arg;
2229
2230	/*
2231	 * There is a point where callout routines are no longer
2232	 * cancelable.  So there exists a window of time where the
2233	 * driver enters detach() and tries to cancel the callout, but the
2234	 * callout routine has passed the cancellation point.  The detach()
2235	 * routine is unaware of this and tries to free resources that the
2236	 * callout routine needs.  So we check for the detach state flag to
2237	 * at least shrink the window of opportunity.
2238	 */
2239	if (ice_driver_is_detaching(sc))
2240		return;
2241
2242	/* Fire off the admin task */
2243	iflib_admin_intr_deferred(sc->ctx);
2244
2245	/* Reschedule the admin timer */
2246	callout_schedule(&sc->admin_timer, hz/2);
2247}
2248
2249/**
2250 * ice_transition_recovery_mode - Transition to recovery mode
2251 * @sc: the device private softc
2252 *
2253 * Called when the driver detects that the firmware has entered recovery mode
2254 * at run time.
2255 */
2256static void
2257ice_transition_recovery_mode(struct ice_softc *sc)
2258{
2259	struct ice_vsi *vsi = &sc->pf_vsi;
2260	int i;
2261
2262	device_printf(sc->dev, "Firmware recovery mode detected. Limiting functionality. Refer to Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
2263
2264	/* Tell the stack that the link has gone down */
2265	iflib_link_state_change(sc->ctx, LINK_STATE_DOWN, 0);
2266
2267	/* Request that the device be re-initialized */
2268	ice_request_stack_reinit(sc);
2269
2270	ice_rdma_pf_detach(sc);
2271	ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2272
2273	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2274	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2275
2276	ice_vsi_del_txqs_ctx(vsi);
2277	ice_vsi_del_rxqs_ctx(vsi);
2278
2279	for (i = 0; i < sc->num_available_vsi; i++) {
2280		if (sc->all_vsi[i])
2281			ice_release_vsi(sc->all_vsi[i]);
2282	}
2283	sc->num_available_vsi = 0;
2284
2285	if (sc->all_vsi) {
2286		free(sc->all_vsi, M_ICE);
2287		sc->all_vsi = NULL;
2288	}
2289
2290	/* Destroy the interrupt manager */
2291	ice_resmgr_destroy(&sc->dev_imgr);
2292	/* Destroy the queue managers */
2293	ice_resmgr_destroy(&sc->tx_qmgr);
2294	ice_resmgr_destroy(&sc->rx_qmgr);
2295
2296	ice_deinit_hw(&sc->hw);
2297}
2298
2299/**
2300 * ice_transition_safe_mode - Transition to safe mode
2301 * @sc: the device private softc
2302 *
2303 * Called when the driver attempts to reload the DDP package during a device
2304 * reset, and the new download fails. If so, we must transition to safe mode
2305 * at run time.
2306 *
2307 * @remark although safe mode normally allocates only a single queue, we can't
2308 * change the number of queues dynamically when using iflib. Due to this, we
2309 * do not attempt to reduce the number of queues.
2310 */
2311static void
2312ice_transition_safe_mode(struct ice_softc *sc)
2313{
2314	/* Indicate that we are in Safe mode */
2315	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_cap);
2316	ice_set_bit(ICE_FEATURE_SAFE_MODE, sc->feat_en);
2317
2318	ice_rdma_pf_detach(sc);
2319	ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2320
2321	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_en);
2322	ice_clear_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2323
2324	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2325	ice_clear_bit(ICE_FEATURE_RSS, sc->feat_en);
2326}
2327
2328/**
2329 * ice_if_update_admin_status - update admin status
2330 * @ctx: iflib ctx structure
2331 *
2332 * Called by iflib to update the admin status. For our purposes, this means
2333 * check the adminq, and update the link status. It's ultimately triggered by
2334 * our admin interrupt, or by the ice_if_timer periodically.
2335 *
2336 * @pre assumes the caller holds the iflib CTX lock
2337 */
2338static void
2339ice_if_update_admin_status(if_ctx_t ctx)
2340{
2341	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2342	enum ice_fw_modes fw_mode;
2343	bool reschedule = false;
2344	u16 pending = 0;
2345
2346	ASSERT_CTX_LOCKED(sc);
2347
2348	/* Check if the firmware entered recovery mode at run time */
2349	fw_mode = ice_get_fw_mode(&sc->hw);
2350	if (fw_mode == ICE_FW_MODE_REC) {
2351		if (!ice_testandset_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2352			/* If we just entered recovery mode, log a warning to
2353			 * the system administrator and deinit driver state
2354			 * that is no longer functional.
2355			 */
2356			ice_transition_recovery_mode(sc);
2357		}
2358	} else if (fw_mode == ICE_FW_MODE_ROLLBACK) {
2359		if (!ice_testandset_state(&sc->state, ICE_STATE_ROLLBACK_MODE)) {
2360			/* Rollback mode isn't fatal, but we don't want to
2361			 * repeatedly post a message about it.
2362			 */
2363			ice_print_rollback_msg(&sc->hw);
2364		}
2365	}
2366
2367	/* Handle global reset events */
2368	ice_handle_reset_event(sc);
2369
2370	/* Handle PF reset requests */
2371	ice_handle_pf_reset_request(sc);
2372
2373	/* Handle MDD events */
2374	ice_handle_mdd_event(sc);
2375
2376	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED) ||
2377	    ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET) ||
2378	    ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2379		/*
2380		 * If we know the control queues are disabled, skip processing
2381		 * the control queues entirely.
2382		 */
2383		;
2384	} else if (ice_testandclear_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING)) {
2385		ice_process_ctrlq(sc, ICE_CTL_Q_ADMIN, &pending);
2386		if (pending > 0)
2387			reschedule = true;
2388
2389		ice_process_ctrlq(sc, ICE_CTL_Q_MAILBOX, &pending);
2390		if (pending > 0)
2391			reschedule = true;
2392	}
2393
2394	/* Poll for link up */
2395	ice_poll_for_media_avail(sc);
2396
2397	/* Check and update link status */
2398	ice_update_link_status(sc, false);
2399
2400	/*
2401	 * If there are still messages to process, we need to reschedule
2402	 * ourselves. Otherwise, we can just re-enable the interrupt. We'll be
2403	 * woken up at the next interrupt or timer event.
2404	 */
2405	if (reschedule) {
2406		ice_set_state(&sc->state, ICE_STATE_CONTROLQ_EVENT_PENDING);
2407		iflib_admin_intr_deferred(ctx);
2408	} else {
2409		ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2410	}
2411}
2412
2413/**
2414 * ice_prepare_for_reset - Prepare device for an impending reset
2415 * @sc: The device private softc
2416 *
2417 * Prepare the driver for an impending reset, shutting down VSIs, clearing the
2418 * scheduler setup, and shutting down controlqs. Uses the
2419 * ICE_STATE_PREPARED_FOR_RESET to indicate whether we've already prepared the
2420 * driver for reset or not.
2421 */
2422static void
2423ice_prepare_for_reset(struct ice_softc *sc)
2424{
2425	struct ice_hw *hw = &sc->hw;
2426
2427	/* If we're already prepared, there's nothing to do */
2428	if (ice_testandset_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET))
2429		return;
2430
2431	log(LOG_INFO, "%s: preparing to reset device logic\n", if_name(sc->ifp));
2432
2433	/* In recovery mode, hardware is not initialized */
2434	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2435		return;
2436
2437	/* inform the RDMA client */
2438	ice_rdma_notify_reset(sc);
2439	/* stop the RDMA client */
2440	ice_rdma_pf_stop(sc);
2441
2442	/* Release the main PF VSI queue mappings */
2443	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2444				    sc->pf_vsi.num_tx_queues);
2445	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2446				    sc->pf_vsi.num_rx_queues);
2447	if (sc->mirr_if) {
2448		ice_resmgr_release_map(&sc->tx_qmgr, sc->mirr_if->vsi->tx_qmap,
2449		    sc->mirr_if->num_irq_vectors);
2450		ice_resmgr_release_map(&sc->rx_qmgr, sc->mirr_if->vsi->rx_qmap,
2451		    sc->mirr_if->num_irq_vectors);
2452	}
2453
2454	ice_clear_hw_tbls(hw);
2455
2456	if (hw->port_info)
2457		ice_sched_cleanup_all(hw);
2458
2459	ice_shutdown_all_ctrlq(hw, false);
2460}
2461
2462/**
2463 * ice_rebuild_pf_vsi_qmap - Rebuild the main PF VSI queue mapping
2464 * @sc: the device softc pointer
2465 *
2466 * Loops over the Tx and Rx queues for the main PF VSI and reassigns the queue
2467 * mapping after a reset occurred.
2468 */
2469static int
2470ice_rebuild_pf_vsi_qmap(struct ice_softc *sc)
2471{
2472	struct ice_vsi *vsi = &sc->pf_vsi;
2473	struct ice_tx_queue *txq;
2474	struct ice_rx_queue *rxq;
2475	int err, i;
2476
2477	/* Re-assign Tx queues from PF space to the main VSI */
2478	err = ice_resmgr_assign_contiguous(&sc->tx_qmgr, vsi->tx_qmap,
2479					    vsi->num_tx_queues);
2480	if (err) {
2481		device_printf(sc->dev, "Unable to re-assign PF Tx queues: %s\n",
2482			      ice_err_str(err));
2483		return (err);
2484	}
2485
2486	/* Re-assign Rx queues from PF space to this VSI */
2487	err = ice_resmgr_assign_contiguous(&sc->rx_qmgr, vsi->rx_qmap,
2488					    vsi->num_rx_queues);
2489	if (err) {
2490		device_printf(sc->dev, "Unable to re-assign PF Rx queues: %s\n",
2491			      ice_err_str(err));
2492		goto err_release_tx_queues;
2493	}
2494
2495	vsi->qmap_type = ICE_RESMGR_ALLOC_CONTIGUOUS;
2496
2497	/* Re-assign Tx queue tail pointers */
2498	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++)
2499		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
2500
2501	/* Re-assign Rx queue tail pointers */
2502	for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++)
2503		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
2504
2505	return (0);
2506
2507err_release_tx_queues:
2508	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2509				   sc->pf_vsi.num_tx_queues);
2510
2511	return (err);
2512}
2513
2514/* determine if the iflib context is active */
2515#define CTX_ACTIVE(ctx) ((if_getdrvflags(iflib_get_ifp(ctx)) & IFF_DRV_RUNNING))
2516
2517/**
2518 * ice_rebuild_recovery_mode - Rebuild driver state while in recovery mode
2519 * @sc: The device private softc
2520 *
2521 * Handle a driver rebuild while in recovery mode. This will only rebuild the
2522 * limited functionality supported while in recovery mode.
2523 */
2524static void
2525ice_rebuild_recovery_mode(struct ice_softc *sc)
2526{
2527	device_t dev = sc->dev;
2528
2529	/* enable PCIe bus master */
2530	pci_enable_busmaster(dev);
2531
2532	/* Configure interrupt causes for the administrative interrupt */
2533	ice_configure_misc_interrupts(sc);
2534
2535	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2536	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2537
2538	/* Now that the rebuild is finished, we're no longer prepared to reset */
2539	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2540
2541	log(LOG_INFO, "%s: device rebuild successful\n", if_name(sc->ifp));
2542
2543	/* In order to completely restore device functionality, the iflib core
2544	 * needs to be reset. We need to request an iflib reset. Additionally,
2545	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2546	 * the iflib core, we also want re-run the admin task so that iflib
2547	 * resets immediately instead of waiting for the next interrupt.
2548	 */
2549	ice_request_stack_reinit(sc);
2550
2551	return;
2552}
2553
2554/**
2555 * ice_rebuild - Rebuild driver state post reset
2556 * @sc: The device private softc
2557 *
2558 * Restore driver state after a reset occurred. Restart the controlqs, setup
2559 * the hardware port, and re-enable the VSIs.
2560 */
2561static void
2562ice_rebuild(struct ice_softc *sc)
2563{
2564	struct ice_hw *hw = &sc->hw;
2565	device_t dev = sc->dev;
2566	enum ice_ddp_state pkg_state;
2567	enum ice_status status;
2568	int err;
2569
2570	sc->rebuild_ticks = ticks;
2571
2572	/* If we're rebuilding, then a reset has succeeded. */
2573	ice_clear_state(&sc->state, ICE_STATE_RESET_FAILED);
2574
2575	/*
2576	 * If the firmware is in recovery mode, only restore the limited
2577	 * functionality supported by recovery mode.
2578	 */
2579	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE)) {
2580		ice_rebuild_recovery_mode(sc);
2581		return;
2582	}
2583
2584	/* enable PCIe bus master */
2585	pci_enable_busmaster(dev);
2586
2587	status = ice_init_all_ctrlq(hw);
2588	if (status) {
2589		device_printf(dev, "failed to re-init controlqs, err %s\n",
2590			      ice_status_str(status));
2591		goto err_shutdown_ctrlq;
2592	}
2593
2594	/* Query the allocated resources for Tx scheduler */
2595	status = ice_sched_query_res_alloc(hw);
2596	if (status) {
2597		device_printf(dev,
2598			      "Failed to query scheduler resources, err %s aq_err %s\n",
2599			      ice_status_str(status),
2600			      ice_aq_str(hw->adminq.sq_last_status));
2601		goto err_shutdown_ctrlq;
2602	}
2603
2604	/* Re-enable FW logging. Keep going even if this fails */
2605	status = ice_fwlog_set(hw, &hw->fwlog_cfg);
2606	if (!status) {
2607		/*
2608		 * We should have the most updated cached copy of the
2609		 * configuration, regardless of whether we're rebuilding
2610		 * or not.  So we'll simply check to see if logging was
2611		 * enabled pre-rebuild.
2612		 */
2613		if (hw->fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) {
2614			status = ice_fwlog_register(hw);
2615			if (status)
2616				device_printf(dev, "failed to re-register fw logging, err %s aq_err %s\n",
2617				   ice_status_str(status),
2618				   ice_aq_str(hw->adminq.sq_last_status));
2619		}
2620	} else
2621		device_printf(dev, "failed to rebuild fw logging configuration, err %s aq_err %s\n",
2622		   ice_status_str(status),
2623		   ice_aq_str(hw->adminq.sq_last_status));
2624
2625	err = ice_send_version(sc);
2626	if (err)
2627		goto err_shutdown_ctrlq;
2628
2629	err = ice_init_link_events(sc);
2630	if (err) {
2631		device_printf(dev, "ice_init_link_events failed: %s\n",
2632			      ice_err_str(err));
2633		goto err_shutdown_ctrlq;
2634	}
2635
2636	status = ice_clear_pf_cfg(hw);
2637	if (status) {
2638		device_printf(dev, "failed to clear PF configuration, err %s\n",
2639			      ice_status_str(status));
2640		goto err_shutdown_ctrlq;
2641	}
2642
2643	ice_clean_all_vsi_rss_cfg(sc);
2644
2645	ice_clear_pxe_mode(hw);
2646
2647	status = ice_get_caps(hw);
2648	if (status) {
2649		device_printf(dev, "failed to get capabilities, err %s\n",
2650			      ice_status_str(status));
2651		goto err_shutdown_ctrlq;
2652	}
2653
2654	status = ice_sched_init_port(hw->port_info);
2655	if (status) {
2656		device_printf(dev, "failed to initialize port, err %s\n",
2657			      ice_status_str(status));
2658		goto err_sched_cleanup;
2659	}
2660
2661	/* If we previously loaded the package, it needs to be reloaded now */
2662	if (!ice_is_bit_set(sc->feat_en, ICE_FEATURE_SAFE_MODE)) {
2663		pkg_state = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size);
2664		if (!ice_is_init_pkg_successful(pkg_state)) {
2665			ice_log_pkg_init(sc, pkg_state);
2666			ice_transition_safe_mode(sc);
2667		}
2668	}
2669
2670	ice_reset_pf_stats(sc);
2671
2672	err = ice_rebuild_pf_vsi_qmap(sc);
2673	if (err) {
2674		device_printf(sc->dev, "Unable to re-assign main VSI queues, err %s\n",
2675			      ice_err_str(err));
2676		goto err_sched_cleanup;
2677	}
2678	err = ice_initialize_vsi(&sc->pf_vsi);
2679	if (err) {
2680		device_printf(sc->dev, "Unable to re-initialize Main VSI, err %s\n",
2681			      ice_err_str(err));
2682		goto err_release_queue_allocations;
2683	}
2684
2685	/* Replay all VSI configuration */
2686	err = ice_replay_all_vsi_cfg(sc);
2687	if (err)
2688		goto err_deinit_pf_vsi;
2689
2690	/* Re-enable FW health event reporting */
2691	ice_init_health_events(sc);
2692
2693	/* Reconfigure the main PF VSI for RSS */
2694	err = ice_config_rss(&sc->pf_vsi);
2695	if (err) {
2696		device_printf(sc->dev,
2697			      "Unable to reconfigure RSS for the main VSI, err %s\n",
2698			      ice_err_str(err));
2699		goto err_deinit_pf_vsi;
2700	}
2701
2702	if (hw->port_info->qos_cfg.is_sw_lldp)
2703		ice_add_rx_lldp_filter(sc);
2704
2705	/* Refresh link status */
2706	ice_clear_state(&sc->state, ICE_STATE_LINK_STATUS_REPORTED);
2707	sc->hw.port_info->phy.get_link_info = true;
2708	ice_get_link_status(sc->hw.port_info, &sc->link_up);
2709	ice_update_link_status(sc, true);
2710
2711	/* RDMA interface will be restarted by the stack re-init */
2712
2713	/* Configure interrupt causes for the administrative interrupt */
2714	ice_configure_misc_interrupts(sc);
2715
2716	/* Enable ITR 0 right away, so that we can handle admin interrupts */
2717	ice_enable_intr(&sc->hw, sc->irqvs[0].me);
2718
2719	/* Now that the rebuild is finished, we're no longer prepared to reset */
2720	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2721
2722	/* Reconfigure the subinterface */
2723	if (sc->mirr_if) {
2724		err = ice_subif_rebuild(sc);
2725		if (err)
2726			goto err_deinit_pf_vsi;
2727	}
2728
2729	log(LOG_INFO, "%s: device rebuild successful\n", sc->ifp->if_xname);
2730
2731	/* In order to completely restore device functionality, the iflib core
2732	 * needs to be reset. We need to request an iflib reset. Additionally,
2733	 * because the state of IFC_DO_RESET is cached within task_fn_admin in
2734	 * the iflib core, we also want re-run the admin task so that iflib
2735	 * resets immediately instead of waiting for the next interrupt.
2736	 * If LLDP is enabled we need to reconfig DCB to properly reinit all TC
2737	 * queues, not only 0. It contains ice_request_stack_reinit as well.
2738	 */
2739	if (hw->port_info->qos_cfg.is_sw_lldp)
2740		ice_request_stack_reinit(sc);
2741	else
2742		ice_do_dcb_reconfig(sc, false);
2743
2744	return;
2745
2746err_deinit_pf_vsi:
2747	ice_deinit_vsi(&sc->pf_vsi);
2748err_release_queue_allocations:
2749	ice_resmgr_release_map(&sc->tx_qmgr, sc->pf_vsi.tx_qmap,
2750				    sc->pf_vsi.num_tx_queues);
2751	ice_resmgr_release_map(&sc->rx_qmgr, sc->pf_vsi.rx_qmap,
2752				    sc->pf_vsi.num_rx_queues);
2753err_sched_cleanup:
2754	ice_sched_cleanup_all(hw);
2755err_shutdown_ctrlq:
2756	ice_shutdown_all_ctrlq(hw, false);
2757	ice_clear_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET);
2758	ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2759	device_printf(dev, "Driver rebuild failed, please reload the device driver\n");
2760}
2761
2762/**
2763 * ice_handle_reset_event - Handle reset events triggered by OICR
2764 * @sc: The device private softc
2765 *
2766 * Handle reset events triggered by an OICR notification. This includes CORER,
2767 * GLOBR, and EMPR resets triggered by software on this or any other PF or by
2768 * firmware.
2769 *
2770 * @pre assumes the iflib context lock is held, and will unlock it while
2771 * waiting for the hardware to finish reset.
2772 */
2773static void
2774ice_handle_reset_event(struct ice_softc *sc)
2775{
2776	struct ice_hw *hw = &sc->hw;
2777	enum ice_status status;
2778	device_t dev = sc->dev;
2779
2780	/* When a CORER, GLOBR, or EMPR is about to happen, the hardware will
2781	 * trigger an OICR interrupt. Our OICR handler will determine when
2782	 * this occurs and set the ICE_STATE_RESET_OICR_RECV bit as
2783	 * appropriate.
2784	 */
2785	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_OICR_RECV))
2786		return;
2787
2788	ice_prepare_for_reset(sc);
2789
2790	/*
2791	 * Release the iflib context lock and wait for the device to finish
2792	 * resetting.
2793	 */
2794	IFLIB_CTX_UNLOCK(sc);
2795	status = ice_check_reset(hw);
2796	IFLIB_CTX_LOCK(sc);
2797	if (status) {
2798		device_printf(dev, "Device never came out of reset, err %s\n",
2799			      ice_status_str(status));
2800		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2801		return;
2802	}
2803
2804	/* We're done with the reset, so we can rebuild driver state */
2805	sc->hw.reset_ongoing = false;
2806	ice_rebuild(sc);
2807
2808	/* In the unlikely event that a PF reset request occurs at the same
2809	 * time as a global reset, clear the request now. This avoids
2810	 * resetting a second time right after we reset due to a global event.
2811	 */
2812	if (ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2813		device_printf(dev, "Ignoring PFR request that occurred while a reset was ongoing\n");
2814}
2815
2816/**
2817 * ice_handle_pf_reset_request - Initiate PF reset requested by software
2818 * @sc: The device private softc
2819 *
2820 * Initiate a PF reset requested by software. We handle this in the admin task
2821 * so that only one thread actually handles driver preparation and cleanup,
2822 * rather than having multiple threads possibly attempt to run this code
2823 * simultaneously.
2824 *
2825 * @pre assumes the iflib context lock is held and will unlock it while
2826 * waiting for the PF reset to complete.
2827 */
2828static void
2829ice_handle_pf_reset_request(struct ice_softc *sc)
2830{
2831	struct ice_hw *hw = &sc->hw;
2832	enum ice_status status;
2833
2834	/* Check for PF reset requests */
2835	if (!ice_testandclear_state(&sc->state, ICE_STATE_RESET_PFR_REQ))
2836		return;
2837
2838	/* Make sure we're prepared for reset */
2839	ice_prepare_for_reset(sc);
2840
2841	/*
2842	 * Release the iflib context lock and wait for the device to finish
2843	 * resetting.
2844	 */
2845	IFLIB_CTX_UNLOCK(sc);
2846	status = ice_reset(hw, ICE_RESET_PFR);
2847	IFLIB_CTX_LOCK(sc);
2848	if (status) {
2849		device_printf(sc->dev, "device PF reset failed, err %s\n",
2850			      ice_status_str(status));
2851		ice_set_state(&sc->state, ICE_STATE_RESET_FAILED);
2852		return;
2853	}
2854
2855	sc->soft_stats.pfr_count++;
2856	ice_rebuild(sc);
2857}
2858
2859/**
2860 * ice_init_device_features - Init device driver features
2861 * @sc: driver softc structure
2862 *
2863 * @pre assumes that the function capabilities bits have been set up by
2864 * ice_init_hw().
2865 */
2866static void
2867ice_init_device_features(struct ice_softc *sc)
2868{
2869	struct ice_hw *hw = &sc->hw;
2870
2871	/* Set capabilities that all devices support */
2872	ice_set_bit(ICE_FEATURE_SRIOV, sc->feat_cap);
2873	ice_set_bit(ICE_FEATURE_RSS, sc->feat_cap);
2874	ice_set_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2875	ice_set_bit(ICE_FEATURE_LENIENT_LINK_MODE, sc->feat_cap);
2876	ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_1, sc->feat_cap);
2877	ice_set_bit(ICE_FEATURE_LINK_MGMT_VER_2, sc->feat_cap);
2878	ice_set_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
2879	ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap);
2880	ice_set_bit(ICE_FEATURE_HAS_PBA, sc->feat_cap);
2881	ice_set_bit(ICE_FEATURE_DCB, sc->feat_cap);
2882	ice_set_bit(ICE_FEATURE_TX_BALANCE, sc->feat_cap);
2883
2884	/* Disable features due to hardware limitations... */
2885	if (!hw->func_caps.common_cap.rss_table_size)
2886		ice_clear_bit(ICE_FEATURE_RSS, sc->feat_cap);
2887	if (!hw->func_caps.common_cap.iwarp || !ice_enable_irdma)
2888		ice_clear_bit(ICE_FEATURE_RDMA, sc->feat_cap);
2889	if (!hw->func_caps.common_cap.dcb)
2890		ice_clear_bit(ICE_FEATURE_DCB, sc->feat_cap);
2891	/* Disable features due to firmware limitations... */
2892	if (!ice_is_fw_health_report_supported(hw))
2893		ice_clear_bit(ICE_FEATURE_HEALTH_STATUS, sc->feat_cap);
2894	if (!ice_fwlog_supported(hw))
2895		ice_clear_bit(ICE_FEATURE_FW_LOGGING, sc->feat_cap);
2896	if (hw->fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) {
2897		if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_FW_LOGGING))
2898			ice_set_bit(ICE_FEATURE_FW_LOGGING, sc->feat_en);
2899		else
2900			ice_fwlog_unregister(hw);
2901	}
2902
2903	/* Disable capabilities not supported by the OS */
2904	ice_disable_unsupported_features(sc->feat_cap);
2905
2906	/* RSS is always enabled for iflib */
2907	if (ice_is_bit_set(sc->feat_cap, ICE_FEATURE_RSS))
2908		ice_set_bit(ICE_FEATURE_RSS, sc->feat_en);
2909
2910	/* Disable features based on sysctl settings */
2911	if (!ice_tx_balance_en)
2912		ice_clear_bit(ICE_FEATURE_TX_BALANCE, sc->feat_cap);
2913
2914	if (hw->dev_caps.supported_sensors & ICE_SENSOR_SUPPORT_E810_INT_TEMP) {
2915		ice_set_bit(ICE_FEATURE_TEMP_SENSOR, sc->feat_cap);
2916		ice_set_bit(ICE_FEATURE_TEMP_SENSOR, sc->feat_en);
2917	}
2918}
2919
2920/**
2921 * ice_if_multi_set - Callback to update Multicast filters in HW
2922 * @ctx: iflib ctx structure
2923 *
2924 * Called by iflib in response to SIOCDELMULTI and SIOCADDMULTI. Must search
2925 * the if_multiaddrs list and determine which filters have been added or
2926 * removed from the list, and update HW programming to reflect the new list.
2927 *
2928 * @pre assumes the caller holds the iflib CTX lock
2929 */
2930static void
2931ice_if_multi_set(if_ctx_t ctx)
2932{
2933	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2934	int err;
2935
2936	ASSERT_CTX_LOCKED(sc);
2937
2938	/* Do not handle multicast configuration in recovery mode */
2939	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2940		return;
2941
2942	err = ice_sync_multicast_filters(sc);
2943	if (err) {
2944		device_printf(sc->dev,
2945			      "Failed to synchronize multicast filter list: %s\n",
2946			      ice_err_str(err));
2947		return;
2948	}
2949}
2950
2951/**
2952 * ice_if_vlan_register - Register a VLAN with the hardware
2953 * @ctx: iflib ctx pointer
2954 * @vtag: VLAN to add
2955 *
2956 * Programs the main PF VSI with a hardware filter for the given VLAN.
2957 *
2958 * @pre assumes the caller holds the iflib CTX lock
2959 */
2960static void
2961ice_if_vlan_register(if_ctx_t ctx, u16 vtag)
2962{
2963	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2964	enum ice_status status;
2965
2966	ASSERT_CTX_LOCKED(sc);
2967
2968	/* Do not handle VLAN configuration in recovery mode */
2969	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
2970		return;
2971
2972	status = ice_add_vlan_hw_filter(&sc->pf_vsi, vtag);
2973	if (status) {
2974		device_printf(sc->dev,
2975			      "Failure adding VLAN %d to main VSI, err %s aq_err %s\n",
2976			      vtag, ice_status_str(status),
2977			      ice_aq_str(sc->hw.adminq.sq_last_status));
2978	}
2979}
2980
2981/**
2982 * ice_if_vlan_unregister - Remove a VLAN filter from the hardware
2983 * @ctx: iflib ctx pointer
2984 * @vtag: VLAN to add
2985 *
2986 * Removes the previously programmed VLAN filter from the main PF VSI.
2987 *
2988 * @pre assumes the caller holds the iflib CTX lock
2989 */
2990static void
2991ice_if_vlan_unregister(if_ctx_t ctx, u16 vtag)
2992{
2993	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
2994	enum ice_status status;
2995
2996	ASSERT_CTX_LOCKED(sc);
2997
2998	/* Do not handle VLAN configuration in recovery mode */
2999	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
3000		return;
3001
3002	status = ice_remove_vlan_hw_filter(&sc->pf_vsi, vtag);
3003	if (status) {
3004		device_printf(sc->dev,
3005			      "Failure removing VLAN %d from main VSI, err %s aq_err %s\n",
3006			      vtag, ice_status_str(status),
3007			      ice_aq_str(sc->hw.adminq.sq_last_status));
3008	}
3009}
3010
3011/**
3012 * ice_if_stop - Stop the device
3013 * @ctx: iflib context structure
3014 *
3015 * Called by iflib to stop the device and bring it down. (i.e. ifconfig ice0
3016 * down)
3017 *
3018 * @pre assumes the caller holds the iflib CTX lock
3019 */
3020static void
3021ice_if_stop(if_ctx_t ctx)
3022{
3023	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
3024	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3025
3026	ASSERT_CTX_LOCKED(sc);
3027
3028	/*
3029	 * The iflib core may call IFDI_STOP prior to the first call to
3030	 * IFDI_INIT. This will cause us to attempt to remove MAC filters we
3031	 * don't have, and disable Tx queues which aren't yet configured.
3032	 * Although it is likely these extra operations are harmless, they do
3033	 * cause spurious warning messages to be displayed, which may confuse
3034	 * users.
3035	 *
3036	 * To avoid these messages, we use a state bit indicating if we've
3037	 * been initialized. It will be set when ice_if_init is called, and
3038	 * cleared here in ice_if_stop.
3039	 */
3040	if (!ice_testandclear_state(&sc->state, ICE_STATE_DRIVER_INITIALIZED))
3041		return;
3042
3043	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
3044		device_printf(sc->dev, "request to stop interface cannot be completed as the device failed to reset\n");
3045		return;
3046	}
3047
3048	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
3049		device_printf(sc->dev, "request to stop interface while device is prepared for impending reset\n");
3050		return;
3051	}
3052
3053	ice_rdma_pf_stop(sc);
3054
3055	/* Remove the MAC filters, stop Tx, and stop Rx. We don't check the
3056	 * return of these functions because there's nothing we can really do
3057	 * if they fail, and the functions already print error messages.
3058	 * Just try to shut down as much as we can.
3059	 */
3060	ice_rm_pf_default_mac_filters(sc);
3061
3062	/* Dissociate the Tx and Rx queues from the interrupts */
3063	ice_flush_txq_interrupts(&sc->pf_vsi);
3064	ice_flush_rxq_interrupts(&sc->pf_vsi);
3065
3066	/* Disable the Tx and Rx queues */
3067	ice_vsi_disable_tx(&sc->pf_vsi);
3068	ice_control_all_rx_queues(&sc->pf_vsi, false);
3069
3070	if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) &&
3071		 !(if_getflags(sc->ifp) & IFF_UP) && sc->link_up)
3072		ice_set_link(sc, false);
3073
3074	if (sc->mirr_if && ice_test_state(&mif->state, ICE_STATE_SUBIF_NEEDS_REINIT)) {
3075		ice_subif_if_stop(sc->mirr_if->subctx);
3076		device_printf(sc->dev, "The subinterface also comes down and up after reset\n");
3077	}
3078}
3079
3080/**
3081 * ice_if_get_counter - Get current value of an ifnet statistic
3082 * @ctx: iflib context pointer
3083 * @counter: ifnet counter to read
3084 *
3085 * Reads the current value of an ifnet counter for the device.
3086 *
3087 * This function is not protected by the iflib CTX lock.
3088 */
3089static uint64_t
3090ice_if_get_counter(if_ctx_t ctx, ift_counter counter)
3091{
3092	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3093
3094	/* Return the counter for the main PF VSI */
3095	return ice_get_ifnet_counter(&sc->pf_vsi, counter);
3096}
3097
3098/**
3099 * ice_request_stack_reinit - Request that iflib re-initialize
3100 * @sc: the device private softc
3101 *
3102 * Request that the device be brought down and up, to re-initialize. For
3103 * example, this may be called when a device reset occurs, or when Tx and Rx
3104 * queues need to be re-initialized.
3105 *
3106 * This is required because the iflib state is outside the driver, and must be
3107 * re-initialized if we need to resart Tx and Rx queues.
3108 */
3109void
3110ice_request_stack_reinit(struct ice_softc *sc)
3111{
3112	if (CTX_ACTIVE(sc->ctx)) {
3113		iflib_request_reset(sc->ctx);
3114		iflib_admin_intr_deferred(sc->ctx);
3115	}
3116}
3117
3118/**
3119 * ice_driver_is_detaching - Check if the driver is detaching/unloading
3120 * @sc: device private softc
3121 *
3122 * Returns true if the driver is detaching, false otherwise.
3123 *
3124 * @remark on newer kernels, take advantage of iflib_in_detach in order to
3125 * report detachment correctly as early as possible.
3126 *
3127 * @remark this function is used by various code paths that want to avoid
3128 * running if the driver is about to be removed. This includes sysctls and
3129 * other driver access points. Note that it does not fully resolve
3130 * detach-based race conditions as it is possible for a thread to race with
3131 * iflib_in_detach.
3132 */
3133bool
3134ice_driver_is_detaching(struct ice_softc *sc)
3135{
3136	return (ice_test_state(&sc->state, ICE_STATE_DETACHING) ||
3137		iflib_in_detach(sc->ctx));
3138}
3139
3140/**
3141 * ice_if_priv_ioctl - Device private ioctl handler
3142 * @ctx: iflib context pointer
3143 * @command: The ioctl command issued
3144 * @data: ioctl specific data
3145 *
3146 * iflib callback for handling custom driver specific ioctls.
3147 *
3148 * @pre Assumes that the iflib context lock is held.
3149 */
3150static int
3151ice_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data)
3152{
3153	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3154	struct ifdrv *ifd;
3155	device_t dev = sc->dev;
3156
3157	if (data == NULL)
3158		return (EINVAL);
3159
3160	ASSERT_CTX_LOCKED(sc);
3161
3162	/* Make sure the command type is valid */
3163	switch (command) {
3164	case SIOCSDRVSPEC:
3165	case SIOCGDRVSPEC:
3166		/* Accepted commands */
3167		break;
3168	case SIOCGPRIVATE_0:
3169		/*
3170		 * Although we do not support this ioctl command, it's
3171		 * expected that iflib will forward it to the IFDI_PRIV_IOCTL
3172		 * handler. Do not print a message in this case
3173		 */
3174		return (ENOTSUP);
3175	default:
3176		/*
3177		 * If we get a different command for this function, it's
3178		 * definitely unexpected, so log a message indicating what
3179		 * command we got for debugging purposes.
3180		 */
3181		device_printf(dev, "%s: unexpected ioctl command %08lx\n",
3182			      __func__, command);
3183		return (EINVAL);
3184	}
3185
3186	ifd = (struct ifdrv *)data;
3187
3188	switch (ifd->ifd_cmd) {
3189	case ICE_NVM_ACCESS:
3190		return ice_handle_nvm_access_ioctl(sc, ifd);
3191	case ICE_DEBUG_DUMP:
3192		return ice_handle_debug_dump_ioctl(sc, ifd);
3193	default:
3194		return EINVAL;
3195	}
3196}
3197
3198/**
3199 * ice_if_i2c_req - I2C request handler for iflib
3200 * @ctx: iflib context pointer
3201 * @req: The I2C parameters to use
3202 *
3203 * Read from the port's I2C eeprom using the parameters from the ioctl.
3204 *
3205 * @remark The iflib-only part is pretty simple.
3206 */
3207static int
3208ice_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req)
3209{
3210	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3211
3212	return ice_handle_i2c_req(sc, req);
3213}
3214
3215/**
3216 * ice_if_suspend - PCI device suspend handler for iflib
3217 * @ctx: iflib context pointer
3218 *
3219 * Deinitializes the driver and clears HW resources in preparation for
3220 * suspend or an FLR.
3221 *
3222 * @returns 0; this return value is ignored
3223 */
3224static int
3225ice_if_suspend(if_ctx_t ctx)
3226{
3227	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3228
3229	/* At least a PFR is always going to happen after this;
3230	 * either via FLR or during the D3->D0 transition.
3231	 */
3232	ice_clear_state(&sc->state, ICE_STATE_RESET_PFR_REQ);
3233
3234	ice_prepare_for_reset(sc);
3235
3236	return (0);
3237}
3238
3239/**
3240 * ice_if_resume - PCI device resume handler for iflib
3241 * @ctx: iflib context pointer
3242 *
3243 * Reinitializes the driver and the HW after PCI resume or after
3244 * an FLR. An init is performed by iflib after this function is finished.
3245 *
3246 * @returns 0; this return value is ignored
3247 */
3248static int
3249ice_if_resume(if_ctx_t ctx)
3250{
3251	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3252
3253	ice_rebuild(sc);
3254
3255	return (0);
3256}
3257
3258/**
3259 * ice_if_needs_restart - Tell iflib when the driver needs to be reinitialized
3260 * @ctx: iflib context pointer
3261 * @event: event code to check
3262 *
3263 * Defaults to returning true for unknown events.
3264 *
3265 * @returns true if iflib needs to reinit the interface
3266 */
3267static bool
3268ice_if_needs_restart(if_ctx_t ctx, enum iflib_restart_event event)
3269{
3270	struct ice_softc *sc = (struct ice_softc *)iflib_get_softc(ctx);
3271
3272	switch (event) {
3273	case IFLIB_RESTART_VLAN_CONFIG:
3274		if (!ice_test_state(&sc->state, ICE_STATE_LINK_ACTIVE_ON_DOWN) &&
3275			 !(if_getflags(sc->ifp) & IFF_UP))
3276			return false;
3277	default:
3278		return true;
3279	}
3280}
3281
3282extern struct if_txrx ice_subif_txrx;
3283
3284/**
3285 * @var ice_subif_methods
3286 * @brief ice driver method entry points
3287 */
3288static device_method_t ice_subif_methods[] = {
3289	/* Device interface */
3290	DEVMETHOD(device_register, ice_subif_register),
3291	DEVMETHOD_END
3292};
3293
3294/**
3295 * @var ice_subif_driver
3296 * @brief driver structure for the device API
3297 */
3298static driver_t ice_subif_driver = {
3299	.name = "ice_subif",
3300	.methods = ice_subif_methods,
3301	.size = sizeof(struct ice_mirr_if),
3302};
3303
3304static device_method_t ice_iflib_subif_methods[] = {
3305	DEVMETHOD(ifdi_attach_pre, ice_subif_if_attach_pre),
3306	DEVMETHOD(ifdi_attach_post, ice_subif_if_attach_post),
3307	DEVMETHOD(ifdi_tx_queues_alloc, ice_subif_if_tx_queues_alloc),
3308	DEVMETHOD(ifdi_rx_queues_alloc, ice_subif_if_rx_queues_alloc),
3309	DEVMETHOD(ifdi_msix_intr_assign, ice_subif_if_msix_intr_assign),
3310	DEVMETHOD(ifdi_intr_enable, ice_subif_if_intr_enable),
3311	DEVMETHOD(ifdi_rx_queue_intr_enable, ice_subif_if_rx_queue_intr_enable),
3312	DEVMETHOD(ifdi_tx_queue_intr_enable, ice_subif_if_tx_queue_intr_enable),
3313	DEVMETHOD(ifdi_init, ice_subif_if_init),
3314	DEVMETHOD(ifdi_stop, ice_subif_if_stop),
3315	DEVMETHOD(ifdi_queues_free, ice_subif_if_queues_free),
3316	DEVMETHOD(ifdi_media_status, ice_subif_if_media_status),
3317	DEVMETHOD(ifdi_promisc_set, ice_subif_if_promisc_set),
3318};
3319
3320/**
3321 * @var ice_iflib_subif_driver
3322 * @brief driver structure for the iflib stack
3323 *
3324 * driver_t definition used to setup the iflib device methods.
3325 */
3326static driver_t ice_iflib_subif_driver = {
3327	.name = "ice_subif",
3328	.methods = ice_iflib_subif_methods,
3329	.size = sizeof(struct ice_mirr_if),
3330};
3331
3332/**
3333 * @var ice_subif_sctx
3334 * @brief ice driver shared context
3335 *
3336 * Similar to the existing ice_sctx, this structure has these differences:
3337 * - isc_admin_intrcnt is set to 0
3338 * - Uses subif iflib driver methods
3339 * - Flagged as a VF for iflib
3340 */
3341static struct if_shared_ctx ice_subif_sctx = {
3342	.isc_magic = IFLIB_MAGIC,
3343	.isc_q_align = PAGE_SIZE,
3344
3345	.isc_tx_maxsize = ICE_MAX_FRAME_SIZE,
3346	.isc_tx_maxsegsize = ICE_MAX_FRAME_SIZE,
3347	.isc_tso_maxsize = ICE_TSO_SIZE + sizeof(struct ether_vlan_header),
3348	.isc_tso_maxsegsize = ICE_MAX_DMA_SEG_SIZE,
3349
3350	.isc_rx_maxsize = ICE_MAX_FRAME_SIZE,
3351	.isc_rx_nsegments = ICE_MAX_RX_SEGS,
3352	.isc_rx_maxsegsize = ICE_MAX_FRAME_SIZE,
3353
3354	.isc_nfl = 1,
3355	.isc_ntxqs = 1,
3356	.isc_nrxqs = 1,
3357
3358	.isc_admin_intrcnt = 0,
3359	.isc_vendor_info = ice_vendor_info_array,
3360	.isc_driver_version = __DECONST(char *, ice_driver_version),
3361	.isc_driver = &ice_iflib_subif_driver,
3362
3363	.isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP |
3364		IFLIB_ADMIN_ALWAYS_RUN | IFLIB_SKIP_MSIX |
3365		IFLIB_IS_VF,
3366
3367	.isc_nrxd_min = {ICE_MIN_DESC_COUNT},
3368	.isc_ntxd_min = {ICE_MIN_DESC_COUNT},
3369	.isc_nrxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
3370	.isc_ntxd_max = {ICE_IFLIB_MAX_DESC_COUNT},
3371	.isc_nrxd_default = {ICE_DEFAULT_DESC_COUNT},
3372	.isc_ntxd_default = {ICE_DEFAULT_DESC_COUNT},
3373};
3374
3375static void *
3376ice_subif_register(device_t dev __unused)
3377{
3378	return (&ice_subif_sctx);
3379}
3380
3381static void
3382ice_subif_setup_scctx(struct ice_mirr_if *mif)
3383{
3384	if_softc_ctx_t scctx = mif->subscctx;
3385
3386	scctx->isc_txrx = &ice_subif_txrx;
3387
3388	scctx->isc_capenable = ICE_FULL_CAPS;
3389	scctx->isc_tx_csum_flags = ICE_CSUM_OFFLOAD;
3390
3391	scctx->isc_ntxqsets = 4;
3392	scctx->isc_nrxqsets = 4;
3393	scctx->isc_vectors = scctx->isc_nrxqsets;
3394
3395	scctx->isc_ntxqsets_max = 256;
3396	scctx->isc_nrxqsets_max = 256;
3397
3398	scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]
3399	    * sizeof(struct ice_tx_desc), DBA_ALIGN);
3400	scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0]
3401	    * sizeof(union ice_32b_rx_flex_desc), DBA_ALIGN);
3402
3403	scctx->isc_tx_nsegments = ICE_MAX_TX_SEGS;
3404	scctx->isc_tx_tso_segments_max = ICE_MAX_TSO_SEGS;
3405	scctx->isc_tx_tso_size_max = ICE_TSO_SIZE;
3406	scctx->isc_tx_tso_segsize_max = ICE_MAX_DMA_SEG_SIZE;
3407}
3408
3409static int
3410ice_subif_if_attach_pre(if_ctx_t ctx)
3411{
3412	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
3413	device_t dev = iflib_get_dev(ctx);
3414
3415	mif->subctx = ctx;
3416	mif->subdev = dev;
3417	mif->subscctx = iflib_get_softc_ctx(ctx);
3418
3419	/* Setup the iflib softc context structure */
3420	ice_subif_setup_scctx(mif);
3421
3422	return (0);
3423}
3424
3425static int
3426ice_subif_if_attach_post(if_ctx_t ctx __unused)
3427{
3428	return (0);
3429}
3430
3431/**
3432 * ice_destroy_mirror_interface - destroy mirror interface
3433 * @sc: driver private data
3434 *
3435 * Destroys all resources associated with the mirroring interface.
3436 * Will not exit early on failure.
3437 *
3438 * @pre: Mirror interface already exists and is initialized.
3439 */
3440void
3441ice_destroy_mirror_interface(struct ice_softc *sc)
3442{
3443	struct ice_mirr_if *mif = sc->mirr_if;
3444	struct ice_vsi *vsi = mif->vsi;
3445	bool is_locked = false;
3446	int ret;
3447
3448	is_locked = sx_xlocked(sc->iflib_ctx_lock);
3449	if (is_locked)
3450		IFLIB_CTX_UNLOCK(sc);
3451
3452	if (mif->ifp) {
3453		ret = iflib_device_deregister(mif->subctx);
3454		if (ret) {
3455			device_printf(sc->dev,
3456			    "iflib_device_deregister for mirror interface failed: %d\n",
3457			    ret);
3458		}
3459	}
3460
3461	bus_topo_lock();
3462	ret = device_delete_child(sc->dev, mif->subdev);
3463	bus_topo_unlock();
3464	if (ret) {
3465		device_printf(sc->dev,
3466		    "device_delete_child for mirror interface failed: %d\n",
3467		    ret);
3468	}
3469
3470	if (is_locked)
3471		IFLIB_CTX_LOCK(sc);
3472
3473	if (mif->if_imap) {
3474		free(mif->if_imap, M_ICE);
3475		mif->if_imap = NULL;
3476	}
3477	if (mif->os_imap) {
3478		free(mif->os_imap, M_ICE);
3479		mif->os_imap = NULL;
3480	}
3481
3482	/* These are freed via ice_subif_queues_free_subif
3483	 * vsi:
3484	 * - rx_irqvs
3485	 * - tx_queues
3486	 * - rx_queues
3487	 */
3488	ice_release_vsi(vsi);
3489
3490	free(mif, M_ICE);
3491	sc->mirr_if = NULL;
3492
3493}
3494
3495/**
3496 * ice_setup_mirror_vsi - Initialize mirror VSI
3497 * @mif: driver private data for mirror interface
3498 *
3499 * Allocates a VSI for a mirror interface, and sets that VSI up for use as a
3500 * mirror for the main PF VSI.
3501 *
3502 * Returns 0 on success, or a standard error code on failure.
3503 */
3504static int
3505ice_setup_mirror_vsi(struct ice_mirr_if *mif)
3506{
3507	struct ice_softc *sc = mif->back;
3508	device_t dev = sc->dev;
3509	struct ice_vsi *vsi;
3510	int ret = 0;
3511
3512	/* vsi is for the new mirror vsi, not the PF's main VSI */
3513	vsi = ice_alloc_vsi(sc, ICE_VSI_VMDQ2);
3514	if (!vsi) {
3515		/* Already prints an error message */
3516		return (ENOMEM);
3517	}
3518	mif->vsi = vsi;
3519
3520	/* Reserve VSI queue allocation from PF queues */
3521	ret = ice_alloc_vsi_qmap(vsi, ICE_DEFAULT_VF_QUEUES, ICE_DEFAULT_VF_QUEUES);
3522	if (ret) {
3523		device_printf(dev, "%s: Unable to allocate mirror VSI queue maps (%d queues): %s\n",
3524		    __func__, ICE_DEFAULT_VF_QUEUES, ice_err_str(ret));
3525		goto release_vsi;
3526	}
3527	vsi->num_tx_queues = vsi->num_rx_queues = ICE_DEFAULT_VF_QUEUES;
3528
3529	/* Assign Tx queues from PF space */
3530	ret = ice_resmgr_assign_scattered(&sc->tx_qmgr, vsi->tx_qmap,
3531	    vsi->num_tx_queues);
3532	if (ret) {
3533		device_printf(dev, "Unable to assign mirror VSI Tx queues: %s\n",
3534		    ice_err_str(ret));
3535		goto release_vsi;
3536	}
3537	/* Assign Rx queues from PF space */
3538	ret = ice_resmgr_assign_scattered(&sc->rx_qmgr, vsi->rx_qmap,
3539	    vsi->num_rx_queues);
3540	if (ret) {
3541		device_printf(dev, "Unable to assign mirror VSI Rx queues: %s\n",
3542		    ice_err_str(ret));
3543		goto release_vsi;
3544	}
3545	vsi->qmap_type = ICE_RESMGR_ALLOC_SCATTERED;
3546	vsi->max_frame_size = ICE_MAX_FRAME_SIZE;
3547
3548	ret = ice_initialize_vsi(vsi);
3549	if (ret) {
3550		device_printf(dev, "%s: Error in ice_initialize_vsi for mirror VSI: %s\n",
3551		    __func__, ice_err_str(ret));
3552		goto release_vsi;
3553	}
3554
3555	/* Setup this VSI for receiving traffic */
3556	ret = ice_config_rss(vsi);
3557	if (ret) {
3558		device_printf(dev,
3559		    "Unable to configure RSS for mirror VSI: %s\n",
3560		    ice_err_str(ret));
3561		goto release_vsi;
3562	}
3563
3564	/* Set HW rules for mirroring traffic */
3565	vsi->mirror_src_vsi = sc->pf_vsi.idx;
3566
3567	ice_debug(&sc->hw, ICE_DBG_INIT,
3568	    "Configuring mirroring from VSI %d to %d\n",
3569	    vsi->mirror_src_vsi, vsi->idx);
3570	ice_debug(&sc->hw, ICE_DBG_INIT, "(HW num: VSI %d to %d)\n",
3571	    ice_get_hw_vsi_num(&sc->hw, vsi->mirror_src_vsi),
3572	    ice_get_hw_vsi_num(&sc->hw, vsi->idx));
3573
3574	ret = ice_setup_vsi_mirroring(vsi);
3575	if (ret) {
3576		device_printf(dev,
3577		    "Unable to configure mirroring for VSI: %s\n",
3578		    ice_err_str(ret));
3579		goto release_vsi;
3580	}
3581
3582	return (0);
3583
3584release_vsi:
3585	ice_release_vsi(vsi);
3586	mif->vsi = NULL;
3587	return (ret);
3588}
3589
3590/**
3591 * ice_create_mirror_interface - Initialize mirror interface
3592 * @sc: driver private data
3593 *
3594 * Creates and sets up a mirror interface that will mirror traffic from
3595 * the main PF interface. Includes a call to iflib_device_register() in order
3596 * to setup necessary iflib structures for this new interface as well.
3597 *
3598 * If it returns successfully, a new interface will be created and will show
3599 * up in the ifconfig interface list.
3600 *
3601 * Returns 0 on success, or a standard error code on failure.
3602 */
3603int
3604ice_create_mirror_interface(struct ice_softc *sc)
3605{
3606	device_t dev = sc->dev;
3607	struct ice_mirr_if *mif;
3608	struct ifmedia *media;
3609	struct sbuf *sb;
3610	int ret = 0;
3611
3612	mif = (struct ice_mirr_if *)malloc(sizeof(*mif), M_ICE, M_ZERO | M_NOWAIT);
3613	if (!mif) {
3614		device_printf(dev, "malloc() error allocating mirror interface\n");
3615		return (ENOMEM);
3616	}
3617
3618	/* Set pointers */
3619	sc->mirr_if = mif;
3620	mif->back = sc;
3621
3622	/* Do early setup because these will be called during iflib_device_register():
3623	 * - ice_subif_if_tx_queues_alloc
3624	 * - ice_subif_if_rx_queues_alloc
3625	 */
3626	ret = ice_setup_mirror_vsi(mif);
3627	if (ret)
3628		goto out;
3629
3630	/* Determine name for new interface:
3631	 * (base interface name)(modifier name)(modifier unit number)
3632	 * e.g. for ice0 with a new mirror interface (modifier m)
3633	 * of index 0, this equals "ice0m0"
3634	 */
3635	sb = sbuf_new_auto();
3636	MPASS(sb != NULL);
3637	sbuf_printf(sb, "%sm", device_get_nameunit(dev));
3638	sbuf_finish(sb);
3639
3640	bus_topo_lock();
3641	mif->subdev = device_add_child(dev, sbuf_data(sb), 0);
3642	bus_topo_unlock();
3643
3644	if (!mif->subdev) {
3645		device_printf(dev, "device_add_child failed for %s0\n", sbuf_data(sb));
3646		sbuf_delete(sb);
3647		free(mif, M_ICE);
3648		sc->mirr_if = NULL;
3649		return (ENOMEM);
3650	}
3651	sbuf_delete(sb);
3652
3653	device_set_driver(mif->subdev, &ice_subif_driver);
3654
3655	/* Use iflib_device_register() directly because the driver already
3656	 * has an initialized softc to pass to iflib
3657	 */
3658	ret = iflib_device_register(mif->subdev, mif, &ice_subif_sctx, &mif->subctx);
3659	if (ret)
3660		goto out;
3661
3662	/* Indicate that created interface will be just for monitoring */
3663	mif->ifp = iflib_get_ifp(mif->subctx);
3664	if_setflagbits(mif->ifp, IFF_MONITOR, 0);
3665
3666	/* Use autoselect media by default */
3667	media = iflib_get_media(mif->subctx);
3668	ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL);
3669	ifmedia_set(media, IFM_ETHER | IFM_AUTO);
3670
3671	device_printf(dev, "Created dev %s and ifnet %s for mirroring\n",
3672	    device_get_nameunit(mif->subdev), if_name(mif->ifp));
3673
3674	ice_add_vsi_sysctls(mif->vsi);
3675
3676	ret = ice_wire_mirror_intrs(mif);
3677	if (ret)
3678		goto out;
3679
3680	mif->if_attached = true;
3681	return (0);
3682
3683out:
3684	ice_destroy_mirror_interface(sc);
3685	return (ret);
3686}
3687
3688/**
3689 * ice_wire_mirror_intrs
3690 * @mif: driver private subinterface structure
3691 *
3692 * Helper function that sets up driver interrupt data and calls
3693 * into iflib in order to setup interrupts in its data structures as well.
3694 *
3695 * Like ice_if_msix_intr_assign, currently requires that we get at least the same
3696 * number of vectors as we have queues, and that we always have the same number
3697 * of Tx and Rx queues. Unlike that function, this calls a special
3698 * iflib_irq_alloc_generic_subif() function for RX interrupts because the
3699 * driver needs to get MSI-X resources from the parent device.
3700 *
3701 * Tx queues use a softirq instead of using their own hardware interrupt so that
3702 * remains unchanged.
3703 *
3704 * Returns 0 on success or an error code from iflib_irq_alloc_generic_subctx()
3705 * on failure.
3706 */
3707static int
3708ice_wire_mirror_intrs(struct ice_mirr_if *mif)
3709{
3710	struct ice_softc *sc = mif->back;
3711	struct ice_hw *hw = &sc->hw;
3712	struct ice_vsi *vsi = mif->vsi;
3713	device_t dev = mif->subdev;
3714	int err, i, rid;
3715
3716	if_ctx_t ctx = mif->subctx;
3717
3718	ice_debug(hw, ICE_DBG_INIT, "%s: Last rid: %d\n", __func__, sc->last_rid);
3719
3720	rid = sc->last_rid + 1;
3721	for (i = 0; i < vsi->num_rx_queues; i++, rid++) {
3722		struct ice_rx_queue *rxq = &vsi->rx_queues[i];
3723		struct ice_tx_queue *txq = &vsi->tx_queues[i];
3724		char irq_name[16];
3725
3726		// TODO: Change to use dynamic interface number
3727		snprintf(irq_name, sizeof(irq_name), "m0rxq%d", i);
3728		/* First arg is parent device (physical port's) iflib ctx */
3729		err = iflib_irq_alloc_generic_subctx(sc->ctx, ctx,
3730		    &mif->rx_irqvs[i].irq, rid, IFLIB_INTR_RXTX, ice_msix_que,
3731		    rxq, rxq->me, irq_name);
3732		if (err) {
3733			device_printf(dev,
3734			    "Failed to allocate q int %d err: %s\n",
3735			    i, ice_err_str(err));
3736			i--;
3737			goto fail;
3738		}
3739		MPASS(rid - 1 > 0);
3740		/* Set vector number used in interrupt enable/disable functions */
3741		mif->rx_irqvs[i].me = rid - 1;
3742		rxq->irqv = &mif->rx_irqvs[i];
3743
3744		bzero(irq_name, sizeof(irq_name));
3745		snprintf(irq_name, sizeof(irq_name), "m0txq%d", i);
3746		iflib_softirq_alloc_generic(ctx, &mif->rx_irqvs[i].irq,
3747		    IFLIB_INTR_TX, txq, txq->me, irq_name);
3748		txq->irqv = &mif->rx_irqvs[i];
3749	}
3750
3751	sc->last_rid = rid - 1;
3752
3753	ice_debug(hw, ICE_DBG_INIT, "%s: New last rid: %d\n", __func__,
3754	    sc->last_rid);
3755
3756	return (0);
3757
3758fail:
3759	for (; i >= 0; i--)
3760		iflib_irq_free(ctx, &mif->rx_irqvs[i].irq);
3761	return (err);
3762}
3763
3764/**
3765 * ice_subif_rebuild - Rebuild subinterface post reset
3766 * @sc: The device private softc
3767 *
3768 * Restore subinterface state after a reset occurred.
3769 * Restart the VSI and enable the mirroring.
3770 */
3771static int
3772ice_subif_rebuild(struct ice_softc *sc)
3773{
3774	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(sc->ctx);
3775	struct ice_vsi *vsi = sc->mirr_if->vsi;
3776	int err;
3777
3778	err = ice_subif_rebuild_vsi_qmap(sc);
3779	if (err) {
3780		device_printf(sc->dev, "Unable to re-assign mirror VSI queues, err %s\n",
3781		      ice_err_str(err));
3782		return (err);
3783	}
3784
3785	err = ice_initialize_vsi(vsi);
3786	if (err) {
3787		device_printf(sc->dev, "Unable to re-initialize mirror VSI, err %s\n",
3788		      ice_err_str(err));
3789		goto err_release_queue_allocations_subif;
3790	}
3791
3792	err = ice_config_rss(vsi);
3793	if (err) {
3794		device_printf(sc->dev,
3795		      "Unable to reconfigure RSS for the mirror VSI, err %s\n",
3796		      ice_err_str(err));
3797		goto err_deinit_subif_vsi;
3798	}
3799
3800	vsi->mirror_src_vsi = sc->pf_vsi.idx;
3801
3802	err = ice_setup_vsi_mirroring(vsi);
3803	if (err) {
3804		device_printf(sc->dev,
3805		      "Unable to configure mirroring for VSI: %s\n",
3806		      ice_err_str(err));
3807		goto err_deinit_subif_vsi;
3808	}
3809
3810	ice_set_state(&mif->state, ICE_STATE_SUBIF_NEEDS_REINIT);
3811
3812	return (0);
3813
3814err_deinit_subif_vsi:
3815	ice_deinit_vsi(vsi);
3816err_release_queue_allocations_subif:
3817	ice_resmgr_release_map(&sc->tx_qmgr, vsi->tx_qmap,
3818	    sc->mirr_if->num_irq_vectors);
3819	ice_resmgr_release_map(&sc->rx_qmgr, vsi->rx_qmap,
3820	    sc->mirr_if->num_irq_vectors);
3821
3822	return (err);
3823}
3824
3825/**
3826 * ice_subif_rebuild_vsi_qmap - Rebuild the mirror VSI queue mapping
3827 * @sc: the device softc pointer
3828 *
3829 * Loops over the Tx and Rx queues for the mirror VSI and reassigns the queue
3830 * mapping after a reset occurred.
3831 */
3832static int
3833ice_subif_rebuild_vsi_qmap(struct ice_softc *sc)
3834{
3835	struct ice_vsi *vsi = sc->mirr_if->vsi;
3836	struct ice_tx_queue *txq;
3837	struct ice_rx_queue *rxq;
3838	int err, i;
3839
3840	err = ice_resmgr_assign_scattered(&sc->tx_qmgr, vsi->tx_qmap, sc->mirr_if->num_irq_vectors);
3841	if (err) {
3842		device_printf(sc->dev, "Unable to assign mirror VSI Tx queues: %s\n",
3843		      ice_err_str(err));
3844		return (err);
3845	}
3846
3847	err = ice_resmgr_assign_scattered(&sc->rx_qmgr, vsi->rx_qmap, sc->mirr_if->num_irq_vectors);
3848	if (err) {
3849		device_printf(sc->dev, "Unable to assign mirror VSI Rx queues: %s\n",
3850		      ice_err_str(err));
3851		goto err_release_tx_queues;
3852	}
3853
3854	vsi->qmap_type = ICE_RESMGR_ALLOC_SCATTERED;
3855
3856	/* Re-assign Tx queue tail pointers */
3857	for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++)
3858		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
3859
3860	/* Re-assign Rx queue tail pointers */
3861	for (i = 0, rxq = vsi->rx_queues; i < vsi->num_rx_queues; i++, rxq++)
3862		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
3863
3864	return (0);
3865
3866err_release_tx_queues:
3867	ice_resmgr_release_map(&sc->tx_qmgr, vsi->tx_qmap, vsi->num_tx_queues);
3868
3869	return (err);
3870}
3871
3872/**
3873 * ice_subif_if_tx_queues_alloc - Allocate Tx queue memory for subinterfaces
3874 * @ctx: iflib context structure
3875 * @vaddrs: virtual addresses for the queue memory
3876 * @paddrs: physical addresses for the queue memory
3877 * @ntxqs: the number of Tx queues per set (should always be 1)
3878 * @ntxqsets: the number of Tx queue sets to allocate
3879 *
3880 * See ice_if_tx_queues_alloc() description. Similar to that function, but
3881 * for subinterfaces instead.
3882 */
3883static int
3884ice_subif_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
3885			     int __invariant_only ntxqs, int ntxqsets)
3886{
3887	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
3888	struct ice_tx_queue *txq;
3889	device_t dev = mif->subdev;
3890	struct ice_vsi *vsi;
3891	int err, i, j;
3892
3893	MPASS(mif != NULL);
3894	MPASS(ntxqs == 1);
3895	MPASS(mif->subscctx->isc_ntxd[0] <= ICE_MAX_DESC_COUNT);
3896
3897	vsi = mif->vsi;
3898
3899	MPASS(vsi->num_tx_queues == ntxqsets);
3900
3901	/* Allocate queue structure memory */
3902	if (!(vsi->tx_queues =
3903	      (struct ice_tx_queue *)malloc(sizeof(struct ice_tx_queue) * ntxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
3904		device_printf(dev, "%s: Unable to allocate Tx queue memory for subfunction\n",
3905		    __func__);
3906		return (ENOMEM);
3907	}
3908
3909	/* Allocate report status arrays */
3910	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
3911		if (!(txq->tx_rsq =
3912		      (uint16_t *)malloc(sizeof(uint16_t) * mif->subscctx->isc_ntxd[0], M_ICE, M_NOWAIT))) {
3913			device_printf(dev,
3914			    "%s: Unable to allocate tx_rsq memory for subfunction\n", __func__);
3915			err = ENOMEM;
3916			goto free_tx_queues;
3917		}
3918		/* Initialize report status array */
3919		for (j = 0; j < mif->subscctx->isc_ntxd[0]; j++)
3920			txq->tx_rsq[j] = QIDX_INVALID;
3921	}
3922
3923	/* Add Tx queue sysctls context */
3924	ice_vsi_add_txqs_ctx(vsi);
3925
3926	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
3927		/* q_handle == me when only one TC */
3928		txq->me = txq->q_handle = i;
3929		txq->vsi = vsi;
3930
3931		/* store the queue size for easier access */
3932		txq->desc_count = mif->subscctx->isc_ntxd[0];
3933
3934		/* get the virtual and physical address of the hardware queues */
3935		txq->tail = QTX_COMM_DBELL(vsi->tx_qmap[i]);
3936		txq->tx_base = (struct ice_tx_desc *)vaddrs[i];
3937		txq->tx_paddr = paddrs[i];
3938
3939		ice_add_txq_sysctls(txq);
3940	}
3941
3942	return (0);
3943
3944free_tx_queues:
3945	for (i = 0, txq = vsi->tx_queues; i < ntxqsets; i++, txq++) {
3946		if (txq->tx_rsq != NULL) {
3947			free(txq->tx_rsq, M_ICE);
3948			txq->tx_rsq = NULL;
3949		}
3950	}
3951	free(vsi->tx_queues, M_ICE);
3952	vsi->tx_queues = NULL;
3953	return (err);
3954}
3955
3956/**
3957 * ice_subif_if_rx_queues_alloc - Allocate Rx queue memory for subinterfaces
3958 * @ctx: iflib context structure
3959 * @vaddrs: virtual addresses for the queue memory
3960 * @paddrs: physical addresses for the queue memory
3961 * @nrxqs: number of Rx queues per set (should always be 1)
3962 * @nrxqsets: number of Rx queue sets to allocate
3963 *
3964 * See ice_if_rx_queues_alloc() for general summary; this is similar to that
3965 * but implemented for subinterfaces.
3966 */
3967static int
3968ice_subif_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
3969    int __invariant_only nrxqs, int nrxqsets)
3970{
3971	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
3972	struct ice_rx_queue *rxq;
3973	device_t dev = mif->subdev;
3974	struct ice_vsi *vsi;
3975	int i;
3976
3977	MPASS(mif != NULL);
3978	MPASS(nrxqs == 1);
3979	MPASS(mif->subscctx->isc_nrxd[0] <= ICE_MAX_DESC_COUNT);
3980
3981	vsi = mif->vsi;
3982
3983	MPASS(vsi->num_rx_queues == nrxqsets);
3984
3985	/* Allocate queue structure memory */
3986	if (!(vsi->rx_queues =
3987	      (struct ice_rx_queue *) malloc(sizeof(struct ice_rx_queue) * nrxqsets, M_ICE, M_NOWAIT | M_ZERO))) {
3988		device_printf(dev, "%s: Unable to allocate Rx queue memory for subfunction\n",
3989		    __func__);
3990		return (ENOMEM);
3991	}
3992
3993	/* Add Rx queue sysctls context */
3994	ice_vsi_add_rxqs_ctx(vsi);
3995
3996	for (i = 0, rxq = vsi->rx_queues; i < nrxqsets; i++, rxq++) {
3997		rxq->me = i;
3998		rxq->vsi = vsi;
3999
4000		/* store the queue size for easier access */
4001		rxq->desc_count = mif->subscctx->isc_nrxd[0];
4002
4003		/* get the virtual and physical address of the hardware queues */
4004		rxq->tail = QRX_TAIL(vsi->rx_qmap[i]);
4005		rxq->rx_base = (union ice_32b_rx_flex_desc *)vaddrs[i];
4006		rxq->rx_paddr = paddrs[i];
4007
4008		ice_add_rxq_sysctls(rxq);
4009	}
4010
4011	return (0);
4012}
4013
4014/**
4015 * ice_subif_if_msix_intr_assign - Assign MSI-X interrupts to new sub interface
4016 * @ctx: the iflib context structure
4017 * @msix: the number of vectors we were assigned
4018 *
4019 * Allocates and assigns driver private resources for MSI-X interrupt tracking.
4020 *
4021 * @pre OS MSI-X resources have been pre-allocated by parent interface.
4022 */
4023static int
4024ice_subif_if_msix_intr_assign(if_ctx_t ctx, int msix)
4025{
4026	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4027	struct ice_softc *sc = mif->back;
4028	struct ice_vsi *vsi = mif->vsi;
4029
4030	device_t dev = mif->subdev;
4031	int ret;
4032
4033	if (vsi->num_rx_queues != vsi->num_tx_queues) {
4034		device_printf(dev,
4035			      "iflib requested %d Tx queues, and %d Rx queues, but the driver isn't able to support a differing number of Tx and Rx queues\n",
4036			      vsi->num_tx_queues, vsi->num_rx_queues);
4037		return (EOPNOTSUPP);
4038	}
4039
4040	if (msix > sc->extra_vectors) {
4041		device_printf(dev,
4042		     "%s: Not enough spare (%d) msix vectors for new sub-interface requested (%d)\n",
4043		     __func__, sc->extra_vectors, msix);
4044		return (ENOSPC);
4045	}
4046	device_printf(dev, "%s: Using %d vectors for sub-interface\n", __func__,
4047	    msix);
4048
4049	/* Allocate space to store the IRQ vector data */
4050	mif->num_irq_vectors = vsi->num_rx_queues;
4051	mif->rx_irqvs = (struct ice_irq_vector *)
4052	    malloc(sizeof(struct ice_irq_vector) * (mif->num_irq_vectors),
4053		   M_ICE, M_NOWAIT);
4054	if (!mif->rx_irqvs) {
4055		device_printf(dev,
4056			      "Unable to allocate RX irqv memory for mirror's %d vectors\n",
4057			      mif->num_irq_vectors);
4058		return (ENOMEM);
4059	}
4060
4061	/* Assign mirror interface interrupts from PF device space */
4062	if (!(mif->if_imap =
4063	      (u16 *)malloc(sizeof(u16) * mif->num_irq_vectors,
4064	      M_ICE, M_NOWAIT))) {
4065		device_printf(dev, "Unable to allocate mirror intfc if_imap memory\n");
4066		ret = ENOMEM;
4067		goto free_irqvs;
4068	}
4069	ret = ice_resmgr_assign_contiguous(&sc->dev_imgr, mif->if_imap, mif->num_irq_vectors);
4070	if (ret) {
4071		device_printf(dev, "Unable to assign mirror intfc PF device interrupt mapping: %s\n",
4072			      ice_err_str(ret));
4073		goto free_if_imap;
4074	}
4075	/* Assign mirror interface interrupts from OS interrupt allocation space */
4076	if (!(mif->os_imap =
4077	      (u16 *)malloc(sizeof(u16) * mif->num_irq_vectors,
4078	      M_ICE, M_NOWAIT))) {
4079		device_printf(dev, "Unable to allocate mirror intfc os_imap memory\n");
4080		ret = ENOMEM;
4081		goto free_if_imap;
4082	}
4083	ret = ice_resmgr_assign_contiguous(&sc->os_imgr, mif->os_imap, mif->num_irq_vectors);
4084	if (ret) {
4085		device_printf(dev, "Unable to assign mirror intfc OS interrupt mapping: %s\n",
4086			      ice_err_str(ret));
4087		goto free_if_imap;
4088	}
4089
4090	return (0);
4091
4092free_if_imap:
4093	free(mif->if_imap, M_ICE);
4094	mif->if_imap = NULL;
4095free_irqvs:
4096	free(mif->rx_irqvs, M_ICE);
4097	mif->rx_irqvs = NULL;
4098	return (ret);
4099}
4100
4101/**
4102 * ice_subif_if_intr_enable - Enable device interrupts for a subinterface
4103 * @ctx: iflib context structure
4104 *
4105 * Called by iflib to request enabling all interrupts that belong to a
4106 * subinterface.
4107 */
4108static void
4109ice_subif_if_intr_enable(if_ctx_t ctx)
4110{
4111	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4112	struct ice_softc *sc = mif->back;
4113	struct ice_vsi *vsi = mif->vsi;
4114	struct ice_hw *hw = &sc->hw;
4115
4116	/* Do not enable queue interrupts in recovery mode */
4117	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
4118		return;
4119
4120	/* Enable all queue interrupts */
4121	for (int i = 0; i < vsi->num_rx_queues; i++)
4122		ice_enable_intr(hw, vsi->rx_queues[i].irqv->me);
4123}
4124
4125/**
4126 * ice_subif_if_rx_queue_intr_enable - Enable a specific Rx queue interrupt
4127 * @ctx: iflib context structure
4128 * @rxqid: the Rx queue to enable
4129 *
4130 * Enable a specific Rx queue interrupt.
4131 *
4132 * This function is not protected by the iflib CTX lock.
4133 */
4134static int
4135ice_subif_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid)
4136{
4137	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4138	struct ice_softc *sc = mif->back;
4139	struct ice_vsi *vsi = mif->vsi;
4140	struct ice_hw *hw = &sc->hw;
4141
4142	/* Do not enable queue interrupts in recovery mode */
4143	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
4144		return (ENOSYS);
4145
4146	ice_enable_intr(hw, vsi->rx_queues[rxqid].irqv->me);
4147	return (0);
4148}
4149
4150/**
4151 * ice_subif_if_tx_queue_intr_enable - Enable a specific Tx queue interrupt
4152 * @ctx: iflib context structure
4153 * @txqid: the Tx queue to enable
4154 *
4155 * Enable a specific Tx queue interrupt.
4156 *
4157 * This function is not protected by the iflib CTX lock.
4158 */
4159static int
4160ice_subif_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid)
4161{
4162	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4163	struct ice_softc *sc = mif->back;
4164	struct ice_vsi *vsi = mif->vsi;
4165	struct ice_hw *hw = &sc->hw;
4166
4167	/* Do not enable queue interrupts in recovery mode */
4168	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
4169		return (ENOSYS);
4170
4171	ice_enable_intr(hw, vsi->tx_queues[txqid].irqv->me);
4172	return (0);
4173}
4174
4175/**
4176 * ice_subif_if_init - Initialize the subinterface
4177 * @ctx: iflib ctx structure
4178 *
4179 * Called by iflib to bring the device up, i.e. ifconfig ice0m0 up.
4180 * Prepares the Tx and Rx engines and enables interrupts.
4181 *
4182 * @pre assumes the caller holds the iflib CTX lock
4183 */
4184static void
4185ice_subif_if_init(if_ctx_t ctx)
4186{
4187	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4188	struct ice_softc *sc = mif->back;
4189	struct ice_vsi *vsi = mif->vsi;
4190	device_t dev = mif->subdev;
4191	int err;
4192
4193	if (ice_driver_is_detaching(sc))
4194		return;
4195
4196	if (ice_test_state(&sc->state, ICE_STATE_RECOVERY_MODE))
4197		return;
4198
4199	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
4200		device_printf(dev,
4201		    "request to start interface cannot be completed as the parent device %s failed to reset\n",
4202		    device_get_nameunit(sc->dev));
4203		return;
4204	}
4205
4206	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
4207		device_printf(dev,
4208		    "request to start interface cannot be completed while parent device %s is prepared for impending reset\n",
4209		    device_get_nameunit(sc->dev));
4210		return;
4211	}
4212
4213	/* XXX: Equiv to ice_update_rx_mbuf_sz */
4214	vsi->mbuf_sz = iflib_get_rx_mbuf_sz(ctx);
4215
4216	/* Initialize software Tx tracking values */
4217	ice_init_tx_tracking(vsi);
4218
4219	err = ice_cfg_vsi_for_tx(vsi);
4220	if (err) {
4221		device_printf(dev,
4222			      "Unable to configure subif VSI for Tx: %s\n",
4223			      ice_err_str(err));
4224		return;
4225	}
4226
4227	err = ice_cfg_vsi_for_rx(vsi);
4228	if (err) {
4229		device_printf(dev,
4230			      "Unable to configure subif VSI for Rx: %s\n",
4231			      ice_err_str(err));
4232		goto err_cleanup_tx;
4233	}
4234
4235	err = ice_control_all_rx_queues(vsi, true);
4236	if (err) {
4237		device_printf(dev,
4238			      "Unable to enable subif Rx rings for receive: %s\n",
4239			      ice_err_str(err));
4240		goto err_cleanup_tx;
4241	}
4242
4243	ice_configure_all_rxq_interrupts(vsi);
4244	ice_configure_rx_itr(vsi);
4245
4246	ice_set_state(&mif->state, ICE_STATE_DRIVER_INITIALIZED);
4247	return;
4248
4249err_cleanup_tx:
4250	ice_vsi_disable_tx(vsi);
4251}
4252
4253/**
4254 * ice_if_stop_subif - Stop the subinterface
4255 * @ctx: iflib context structure
4256 * @ifs: subinterface context structure
4257 *
4258 * Called by iflib to stop the subinterface and bring it down.
4259 * (e.g. ifconfig ice0m0 down)
4260 *
4261 * @pre assumes the caller holds the iflib CTX lock
4262 */
4263static void
4264ice_subif_if_stop(if_ctx_t ctx)
4265{
4266	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4267	struct ice_softc *sc = mif->back;
4268	struct ice_vsi *vsi = mif->vsi;
4269	device_t dev = mif->subdev;
4270
4271	if (!ice_testandclear_state(&mif->state, ICE_STATE_DRIVER_INITIALIZED))
4272		return;
4273
4274	if (ice_test_state(&sc->state, ICE_STATE_RESET_FAILED)) {
4275		device_printf(dev,
4276		    "request to stop interface cannot be completed as the parent device %s failed to reset\n",
4277		    device_get_nameunit(sc->dev));
4278		return;
4279	}
4280
4281	if (ice_test_state(&sc->state, ICE_STATE_PREPARED_FOR_RESET)) {
4282		device_printf(dev,
4283		    "request to stop interface cannot be completed while parent device %s is prepared for impending reset\n",
4284		    device_get_nameunit(sc->dev));
4285		return;
4286	}
4287
4288	/* Dissociate the Tx and Rx queues from the interrupts */
4289	ice_flush_txq_interrupts(vsi);
4290	ice_flush_rxq_interrupts(vsi);
4291
4292	/* Disable the Tx and Rx queues */
4293	ice_vsi_disable_tx(vsi);
4294	ice_control_all_rx_queues(vsi, false);
4295}
4296
4297/**
4298 * ice_free_irqvs_subif - Free IRQ vector memory for subinterfaces
4299 * @mif: Mirror interface private structure
4300 *
4301 * Free IRQ vector memory allocated during ice_subif_if_msix_intr_assign.
4302 */
4303static void
4304ice_free_irqvs_subif(struct ice_mirr_if *mif)
4305{
4306	struct ice_softc *sc = mif->back;
4307	struct ice_vsi *vsi = mif->vsi;
4308	if_ctx_t ctx = sc->ctx;
4309	int i;
4310
4311	/* If the irqvs array is NULL, then there are no vectors to free */
4312	if (mif->rx_irqvs == NULL)
4313		return;
4314
4315	/* Free the IRQ vectors -- currently subinterfaces have number
4316	 * of vectors equal to number of RX queues
4317	 *
4318	 * XXX: ctx is parent device's ctx, not the subinterface ctx
4319	 */
4320	for (i = 0; i < vsi->num_rx_queues; i++)
4321		iflib_irq_free(ctx, &mif->rx_irqvs[i].irq);
4322
4323	ice_resmgr_release_map(&sc->os_imgr, mif->os_imap,
4324	    mif->num_irq_vectors);
4325	ice_resmgr_release_map(&sc->dev_imgr, mif->if_imap,
4326	    mif->num_irq_vectors);
4327
4328	sc->last_rid -= vsi->num_rx_queues;
4329
4330	/* Clear the irqv pointers */
4331	for (i = 0; i < vsi->num_rx_queues; i++)
4332		vsi->rx_queues[i].irqv = NULL;
4333
4334	for (i = 0; i < vsi->num_tx_queues; i++)
4335		vsi->tx_queues[i].irqv = NULL;
4336
4337	/* Release the vector array memory */
4338	free(mif->rx_irqvs, M_ICE);
4339	mif->rx_irqvs = NULL;
4340}
4341
4342/**
4343 * ice_subif_if_queues_free - Free queue memory for subinterfaces
4344 * @ctx: the iflib context structure
4345 *
4346 * Free queue memory allocated by ice_subif_tx_queues_alloc() and
4347 * ice_subif_if_rx_queues_alloc().
4348 */
4349static void
4350ice_subif_if_queues_free(if_ctx_t ctx)
4351{
4352	struct ice_mirr_if *mif = (struct ice_mirr_if *)iflib_get_softc(ctx);
4353	struct ice_vsi *vsi = mif->vsi;
4354	struct ice_tx_queue *txq;
4355	int i;
4356
4357	/* Free the Tx and Rx sysctl contexts, and assign NULL to the node
4358	 * pointers.
4359	 */
4360	ice_vsi_del_txqs_ctx(vsi);
4361	ice_vsi_del_rxqs_ctx(vsi);
4362
4363	/* Release MSI-X IRQ vectors */
4364	ice_free_irqvs_subif(mif);
4365
4366	if (vsi->tx_queues != NULL) {
4367		/* free the tx_rsq arrays */
4368		for (i = 0, txq = vsi->tx_queues; i < vsi->num_tx_queues; i++, txq++) {
4369			if (txq->tx_rsq != NULL) {
4370				free(txq->tx_rsq, M_ICE);
4371				txq->tx_rsq = NULL;
4372			}
4373		}
4374		free(vsi->tx_queues, M_ICE);
4375		vsi->tx_queues = NULL;
4376	}
4377	if (vsi->rx_queues != NULL) {
4378		free(vsi->rx_queues, M_ICE);
4379		vsi->rx_queues = NULL;
4380	}
4381}
4382
4383/**
4384 * ice_subif_if_media_status - Report subinterface media
4385 * @ctx: iflib context structure
4386 * @ifmr: ifmedia request structure to update
4387 *
4388 * Updates the provided ifmr with something, in order to prevent a
4389 * "no media types?" message from ifconfig.
4390 *
4391 * Mirror interfaces are always up.
4392 */
4393static void
4394ice_subif_if_media_status(if_ctx_t ctx __unused, struct ifmediareq *ifmr)
4395{
4396	ifmr->ifm_status = IFM_AVALID | IFM_ACTIVE;
4397	ifmr->ifm_active = IFM_ETHER | IFM_AUTO;
4398}
4399
4400/**
4401 * ice_subif_if_promisc_set - Set subinterface promiscuous mode
4402 * @ctx: iflib context structure
4403 * @flags: promiscuous flags to configure
4404 *
4405 * Called by iflib to configure device promiscuous mode.
4406 *
4407 * @remark This does not need to be implemented for now.
4408 */
4409static int
4410ice_subif_if_promisc_set(if_ctx_t ctx __unused, int flags __unused)
4411{
4412	return (0);
4413}
4414
4415