1/*	$NetBSD: if_igc.c,v 1.13 2024/02/21 12:39:39 msaitoh Exp $	*/
2/*	$OpenBSD: if_igc.c,v 1.13 2023/04/28 10:18:57 bluhm Exp $	*/
3/*-
4 * SPDX-License-Identifier: BSD-2-Clause
5 *
6 * Copyright (c) 2016 Nicole Graziano <nicole@nextbsd.org>
7 * All rights reserved.
8 * Copyright (c) 2021 Rubicon Communications, LLC (Netgate)
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#include <sys/cdefs.h>
33__KERNEL_RCSID(0, "$NetBSD: if_igc.c,v 1.13 2024/02/21 12:39:39 msaitoh Exp $");
34
35#ifdef _KERNEL_OPT
36#include "opt_if_igc.h"
37#if 0 /* notyet */
38#include "vlan.h"
39#endif
40#endif
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/bus.h>
45#include <sys/cpu.h>
46#include <sys/device.h>
47#include <sys/endian.h>
48#include <sys/intr.h>
49#include <sys/interrupt.h>
50#include <sys/kernel.h>
51#include <sys/kmem.h>
52#include <sys/mbuf.h>
53#include <sys/mutex.h>
54#include <sys/socket.h>
55#include <sys/workqueue.h>
56#include <sys/xcall.h>
57
58#include <net/bpf.h>
59#include <net/if.h>
60#include <net/if_ether.h>
61#include <net/if_media.h>
62#include <net/if_vlanvar.h>
63#include <net/rss_config.h>
64
65#include <netinet/in.h>
66#include <netinet/ip.h>
67#include <netinet/ip6.h>
68#include <netinet/tcp.h>
69
70#include <dev/pci/pcivar.h>
71#include <dev/pci/pcireg.h>
72#include <dev/pci/pcidevs.h>
73
74#include <dev/pci/igc/if_igc.h>
75#include <dev/pci/igc/igc_evcnt.h>
76#include <dev/pci/igc/igc_hw.h>
77#include <dev/mii/miivar.h>
78
79#define IGC_WORKQUEUE_PRI	PRI_SOFTNET
80
81#ifndef IGC_RX_INTR_PROCESS_LIMIT_DEFAULT
82#define IGC_RX_INTR_PROCESS_LIMIT_DEFAULT	0
83#endif
84#ifndef IGC_TX_INTR_PROCESS_LIMIT_DEFAULT
85#define IGC_TX_INTR_PROCESS_LIMIT_DEFAULT	0
86#endif
87
88#ifndef IGC_RX_PROCESS_LIMIT_DEFAULT
89#define IGC_RX_PROCESS_LIMIT_DEFAULT		256
90#endif
91#ifndef IGC_TX_PROCESS_LIMIT_DEFAULT
92#define IGC_TX_PROCESS_LIMIT_DEFAULT		256
93#endif
94
95#define	htolem32(p, x)	(*((uint32_t *)(p)) = htole32(x))
96#define	htolem64(p, x)	(*((uint64_t *)(p)) = htole64(x))
97
98static const struct igc_product {
99	pci_vendor_id_t		igcp_vendor;
100	pci_product_id_t	igcp_product;
101	const char		*igcp_name;
102} igc_products[] = {
103	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I225_IT,
104	    "Intel(R) Ethernet Controller I225-IT(2)" },
105	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I226_LM,
106	    "Intel(R) Ethernet Controller I226-LM" },
107	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I226_V,
108	    "Intel(R) Ethernet Controller I226-V" },
109	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I226_IT,
110	    "Intel(R) Ethernet Controller I226-IT" },
111	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I221_V,
112	    "Intel(R) Ethernet Controller I221-V" },
113	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I226_BLANK_NVM,
114	    "Intel(R) Ethernet Controller I226(blankNVM)" },
115	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I225_LM,
116	    "Intel(R) Ethernet Controller I225-LM" },
117	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I225_V,
118	    "Intel(R) Ethernet Controller I225-V" },
119	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I220_V,
120	    "Intel(R) Ethernet Controller I220-V" },
121	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I225_I,
122	    "Intel(R) Ethernet Controller I225-I" },
123	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I225_BLANK_NVM,
124	    "Intel(R) Ethernet Controller I225(blankNVM)" },
125	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I225_K,
126	    "Intel(R) Ethernet Controller I225-K" },
127	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I225_K2,
128	    "Intel(R) Ethernet Controller I225-K(2)" },
129	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I226_K,
130	    "Intel(R) Ethernet Controller I226-K" },
131	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I225_LMVP,
132	    "Intel(R) Ethernet Controller I225-LMvP(2)" },
133	{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_I226_LMVP,
134	    "Intel(R) Ethernet Controller I226-LMvP" },
135	{ 0, 0, NULL },
136};
137
138#define	IGC_DF_CFG	0x1
139#define	IGC_DF_TX	0x2
140#define	IGC_DF_RX	0x4
141#define	IGC_DF_MISC	0x8
142
143#ifdef IGC_DEBUG_FLAGS
144int igc_debug_flags = IGC_DEBUG_FLAGS;
145#else
146int igc_debug_flags = 0;
147#endif
148
149#define	DPRINTF(flag, fmt, args...)		do {			\
150	if (igc_debug_flags & (IGC_DF_ ## flag))			\
151		printf("%s: %d: " fmt, __func__, __LINE__, ##args);	\
152    } while (0)
153
154/*********************************************************************
155 *  Function Prototypes
156 *********************************************************************/
157static int	igc_match(device_t, cfdata_t, void *);
158static void	igc_attach(device_t, device_t, void *);
159static int	igc_detach(device_t, int);
160
161static void	igc_identify_hardware(struct igc_softc *);
162static int	igc_adjust_nqueues(struct igc_softc *);
163static int	igc_allocate_pci_resources(struct igc_softc *);
164static int	igc_allocate_interrupts(struct igc_softc *);
165static int	igc_allocate_queues(struct igc_softc *);
166static void	igc_free_pci_resources(struct igc_softc *);
167static void	igc_free_interrupts(struct igc_softc *);
168static void	igc_free_queues(struct igc_softc *);
169static void	igc_reset(struct igc_softc *);
170static void	igc_init_dmac(struct igc_softc *, uint32_t);
171static int	igc_setup_interrupts(struct igc_softc *);
172static void	igc_attach_counters(struct igc_softc *sc);
173static void	igc_detach_counters(struct igc_softc *sc);
174static void	igc_update_counters(struct igc_softc *sc);
175static void	igc_clear_counters(struct igc_softc *sc);
176static int	igc_setup_msix(struct igc_softc *);
177static int	igc_setup_msi(struct igc_softc *);
178static int	igc_setup_intx(struct igc_softc *);
179static int	igc_dma_malloc(struct igc_softc *, bus_size_t,
180		    struct igc_dma_alloc *);
181static void	igc_dma_free(struct igc_softc *, struct igc_dma_alloc *);
182static void	igc_setup_interface(struct igc_softc *);
183
184static int	igc_init(struct ifnet *);
185static int	igc_init_locked(struct igc_softc *);
186static void	igc_start(struct ifnet *);
187static int	igc_transmit(struct ifnet *, struct mbuf *);
188static void	igc_tx_common_locked(struct ifnet *, struct tx_ring *, int);
189static bool	igc_txeof(struct tx_ring *, u_int);
190static void	igc_intr_barrier(struct igc_softc *);
191static void	igc_stop(struct ifnet *, int);
192static void	igc_stop_locked(struct igc_softc *);
193static int	igc_ioctl(struct ifnet *, u_long, void *);
194#ifdef IF_RXR
195static int	igc_rxrinfo(struct igc_softc *, struct if_rxrinfo *);
196#endif
197static void	igc_rxfill(struct rx_ring *);
198static void	igc_rxrefill(struct rx_ring *, int);
199static bool	igc_rxeof(struct rx_ring *, u_int);
200static int	igc_rx_checksum(struct igc_queue *, uint64_t, uint32_t,
201		    uint32_t);
202static void	igc_watchdog(struct ifnet *);
203static void	igc_tick(void *);
204static void	igc_media_status(struct ifnet *, struct ifmediareq *);
205static int	igc_media_change(struct ifnet *);
206static int	igc_ifflags_cb(struct ethercom *);
207static void	igc_set_filter(struct igc_softc *);
208static void	igc_update_link_status(struct igc_softc *);
209static int	igc_get_buf(struct rx_ring *, int, bool);
210static int	igc_tx_ctx_setup(struct tx_ring *, struct mbuf *, int,
211		    uint32_t *, uint32_t *);
212static int	igc_tso_setup(struct tx_ring *, struct mbuf *, int,
213		    uint32_t *, uint32_t *);
214
215static void	igc_configure_queues(struct igc_softc *);
216static void	igc_set_queues(struct igc_softc *, uint32_t, uint32_t, int);
217static void	igc_enable_queue(struct igc_softc *, uint32_t);
218static void	igc_enable_intr(struct igc_softc *);
219static void	igc_disable_intr(struct igc_softc *);
220static int	igc_intr_link(void *);
221static int	igc_intr_queue(void *);
222static int	igc_intr(void *);
223static void	igc_handle_queue(void *);
224static void	igc_handle_queue_work(struct work *, void *);
225static void	igc_sched_handle_queue(struct igc_softc *, struct igc_queue *);
226static void	igc_barrier_handle_queue(struct igc_softc *);
227
228static int	igc_allocate_transmit_buffers(struct tx_ring *);
229static int	igc_setup_transmit_structures(struct igc_softc *);
230static int	igc_setup_transmit_ring(struct tx_ring *);
231static void	igc_initialize_transmit_unit(struct igc_softc *);
232static void	igc_free_transmit_structures(struct igc_softc *);
233static void	igc_free_transmit_buffers(struct tx_ring *);
234static void	igc_withdraw_transmit_packets(struct tx_ring *, bool);
235static int	igc_allocate_receive_buffers(struct rx_ring *);
236static int	igc_setup_receive_structures(struct igc_softc *);
237static int	igc_setup_receive_ring(struct rx_ring *);
238static void	igc_initialize_receive_unit(struct igc_softc *);
239static void	igc_free_receive_structures(struct igc_softc *);
240static void	igc_free_receive_buffers(struct rx_ring *);
241static void	igc_clear_receive_status(struct rx_ring *);
242static void	igc_initialize_rss_mapping(struct igc_softc *);
243
244static void	igc_get_hw_control(struct igc_softc *);
245static void	igc_release_hw_control(struct igc_softc *);
246static int	igc_is_valid_ether_addr(uint8_t *);
247static void	igc_print_devinfo(struct igc_softc *);
248
249CFATTACH_DECL3_NEW(igc, sizeof(struct igc_softc),
250    igc_match, igc_attach, igc_detach, NULL, NULL, NULL, 0);
251
252static inline int
253igc_txdesc_incr(struct igc_softc *sc, int id)
254{
255
256	if (++id == sc->num_tx_desc)
257		id = 0;
258	return id;
259}
260
261static inline int __unused
262igc_txdesc_decr(struct igc_softc *sc, int id)
263{
264
265	if (--id < 0)
266		id = sc->num_tx_desc - 1;
267	return id;
268}
269
270static inline void
271igc_txdesc_sync(struct tx_ring *txr, int id, int ops)
272{
273
274	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
275	    id * sizeof(union igc_adv_tx_desc), sizeof(union igc_adv_tx_desc),
276	    ops);
277}
278
279static inline int
280igc_rxdesc_incr(struct igc_softc *sc, int id)
281{
282
283	if (++id == sc->num_rx_desc)
284		id = 0;
285	return id;
286}
287
288static inline int
289igc_rxdesc_decr(struct igc_softc *sc, int id)
290{
291
292	if (--id < 0)
293		id = sc->num_rx_desc - 1;
294	return id;
295}
296
297static inline void
298igc_rxdesc_sync(struct rx_ring *rxr, int id, int ops)
299{
300
301	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
302	    id * sizeof(union igc_adv_rx_desc), sizeof(union igc_adv_rx_desc),
303	    ops);
304}
305
306static const struct igc_product *
307igc_lookup(const struct pci_attach_args *pa)
308{
309	const struct igc_product *igcp;
310
311	for (igcp = igc_products; igcp->igcp_name != NULL; igcp++) {
312		if (PCI_VENDOR(pa->pa_id) == igcp->igcp_vendor &&
313		    PCI_PRODUCT(pa->pa_id) == igcp->igcp_product)
314			return igcp;
315	}
316	return NULL;
317}
318
319/*********************************************************************
320 *  Device identification routine
321 *
322 *  igc_match determines if the driver should be loaded on
323 *  adapter based on PCI vendor/device id of the adapter.
324 *
325 *  return 0 on success, positive on failure
326 *********************************************************************/
327static int
328igc_match(device_t parent, cfdata_t match, void *aux)
329{
330	struct pci_attach_args *pa = aux;
331
332	if (igc_lookup(pa) != NULL)
333		return 1;
334
335	return 0;
336}
337
338/*********************************************************************
339 *  Device initialization routine
340 *
341 *  The attach entry point is called when the driver is being loaded.
342 *  This routine identifies the type of hardware, allocates all resources
343 *  and initializes the hardware.
344 *
345 *  return 0 on success, positive on failure
346 *********************************************************************/
347static void
348igc_attach(device_t parent, device_t self, void *aux)
349{
350	struct pci_attach_args *pa = aux;
351	struct igc_softc *sc = device_private(self);
352	struct igc_hw *hw = &sc->hw;
353
354	const struct igc_product *igcp = igc_lookup(pa);
355	KASSERT(igcp != NULL);
356
357	sc->sc_dev = self;
358	callout_init(&sc->sc_tick_ch, CALLOUT_MPSAFE);
359	callout_setfunc(&sc->sc_tick_ch, igc_tick, sc);
360	sc->sc_core_stopping = false;
361
362	sc->osdep.os_sc = sc;
363	sc->osdep.os_pa = *pa;
364#ifndef __aarch64__
365	/*
366	 * XXX PR port-arm/57643
367	 * 64-bit DMA does not work at least for LX2K with 32/64GB memory.
368	 * smmu(4) support may be required.
369	 */
370	if (pci_dma64_available(pa)) {
371		aprint_verbose(", 64-bit DMA");
372		sc->osdep.os_dmat = pa->pa_dmat64;
373	} else
374#endif
375	{
376		aprint_verbose(", 32-bit DMA");
377		sc->osdep.os_dmat = pa->pa_dmat;
378	}
379
380	pci_aprint_devinfo_fancy(pa, "Ethernet controller", igcp->igcp_name, 1);
381
382	/* Determine hardware and mac info */
383	igc_identify_hardware(sc);
384
385	sc->num_tx_desc = IGC_DEFAULT_TXD;
386	sc->num_rx_desc = IGC_DEFAULT_RXD;
387
388	 /* Setup PCI resources */
389	if (igc_allocate_pci_resources(sc)) {
390		aprint_error_dev(sc->sc_dev,
391		    "unable to allocate PCI resources\n");
392		goto err_pci;
393	}
394
395	if (igc_allocate_interrupts(sc)) {
396		aprint_error_dev(sc->sc_dev, "unable to allocate interrupts\n");
397		goto err_pci;
398	}
399
400	/* Allocate TX/RX queues */
401	if (igc_allocate_queues(sc)) {
402		aprint_error_dev(sc->sc_dev, "unable to allocate queues\n");
403		goto err_alloc_intr;
404	}
405
406	/* Do shared code initialization */
407	if (igc_setup_init_funcs(hw, true)) {
408		aprint_error_dev(sc->sc_dev, "unable to initialize\n");
409		goto err_alloc_intr;
410	}
411
412	hw->mac.autoneg = DO_AUTO_NEG;
413	hw->phy.autoneg_wait_to_complete = false;
414	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
415
416	/* Copper options. */
417	if (hw->phy.media_type == igc_media_type_copper)
418		hw->phy.mdix = AUTO_ALL_MODES;
419
420	/* Set the max frame size. */
421	sc->hw.mac.max_frame_size = 9234;
422
423	/* Allocate multicast array memory. */
424	sc->mta = kmem_alloc(IGC_MTA_LEN, KM_SLEEP);
425
426	/* Check SOL/IDER usage. */
427	if (igc_check_reset_block(hw)) {
428		aprint_error_dev(sc->sc_dev,
429		    "PHY reset is blocked due to SOL/IDER session\n");
430	}
431
432	/* Disable Energy Efficient Ethernet. */
433	sc->hw.dev_spec._i225.eee_disable = true;
434
435	igc_reset_hw(hw);
436
437	/* Make sure we have a good EEPROM before we read from it. */
438	if (igc_validate_nvm_checksum(hw) < 0) {
439		/*
440		 * Some PCI-E parts fail the first check due to
441		 * the link being in sleep state, call it again,
442		 * if it fails a second time its a real issue.
443		 */
444		if (igc_validate_nvm_checksum(hw) < 0) {
445			aprint_error_dev(sc->sc_dev,
446			    "EEPROM checksum invalid\n");
447			goto err_late;
448		}
449	}
450
451	/* Copy the permanent MAC address out of the EEPROM. */
452	if (igc_read_mac_addr(hw) < 0) {
453		aprint_error_dev(sc->sc_dev,
454		    "unable to read MAC address from EEPROM\n");
455		goto err_late;
456	}
457
458	if (!igc_is_valid_ether_addr(hw->mac.addr)) {
459		aprint_error_dev(sc->sc_dev, "invalid MAC address\n");
460		goto err_late;
461	}
462
463	if (igc_setup_interrupts(sc))
464		goto err_late;
465
466	/* Attach counters. */
467	igc_attach_counters(sc);
468
469	/* Setup OS specific network interface. */
470	igc_setup_interface(sc);
471
472	igc_print_devinfo(sc);
473
474	igc_reset(sc);
475	hw->mac.get_link_status = true;
476	igc_update_link_status(sc);
477
478	/* The driver can now take control from firmware. */
479	igc_get_hw_control(sc);
480
481	aprint_normal_dev(sc->sc_dev, "Ethernet address %s\n",
482	    ether_sprintf(sc->hw.mac.addr));
483
484	if (pmf_device_register(self, NULL, NULL))
485		pmf_class_network_register(self, &sc->sc_ec.ec_if);
486	else
487		aprint_error_dev(self, "couldn't establish power handler\n");
488
489	return;
490
491 err_late:
492	igc_release_hw_control(sc);
493 err_alloc_intr:
494	igc_free_interrupts(sc);
495 err_pci:
496	igc_free_pci_resources(sc);
497	kmem_free(sc->mta, IGC_MTA_LEN);
498}
499
500/*********************************************************************
501 *  Device removal routine
502 *
503 *  The detach entry point is called when the driver is being removed.
504 *  This routine stops the adapter and deallocates all the resources
505 *  that were allocated for driver operation.
506 *
507 *  return 0 on success, positive on failure
508 *********************************************************************/
509static int
510igc_detach(device_t self, int flags)
511{
512	struct igc_softc *sc = device_private(self);
513	struct ifnet *ifp = &sc->sc_ec.ec_if;
514
515	mutex_enter(&sc->sc_core_lock);
516	igc_stop_locked(sc);
517	mutex_exit(&sc->sc_core_lock);
518
519	igc_detach_counters(sc);
520
521	igc_free_queues(sc);
522
523	igc_phy_hw_reset(&sc->hw);
524	igc_release_hw_control(sc);
525
526	ether_ifdetach(ifp);
527	if_detach(ifp);
528	ifmedia_fini(&sc->media);
529
530	igc_free_interrupts(sc);
531	igc_free_pci_resources(sc);
532	kmem_free(sc->mta, IGC_MTA_LEN);
533
534	mutex_destroy(&sc->sc_core_lock);
535
536	return 0;
537}
538
539static void
540igc_identify_hardware(struct igc_softc *sc)
541{
542	struct igc_osdep *os = &sc->osdep;
543	struct pci_attach_args *pa = &os->os_pa;
544
545	/* Save off the information about this board. */
546	sc->hw.device_id = PCI_PRODUCT(pa->pa_id);
547
548	/* Do shared code init and setup. */
549	if (igc_set_mac_type(&sc->hw)) {
550		aprint_error_dev(sc->sc_dev, "unable to identify hardware\n");
551		return;
552	}
553}
554
555static int
556igc_allocate_pci_resources(struct igc_softc *sc)
557{
558	struct igc_osdep *os = &sc->osdep;
559	struct pci_attach_args *pa = &os->os_pa;
560
561	/*
562	 * Enable bus mastering and memory-mapped I/O for sure.
563	 */
564	pcireg_t csr =
565	    pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG);
566	csr |= PCI_COMMAND_MASTER_ENABLE | PCI_COMMAND_MEM_ENABLE;
567	pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG, csr);
568
569	const pcireg_t memtype =
570	    pci_mapreg_type(pa->pa_pc, pa->pa_tag, IGC_PCIREG);
571	if (pci_mapreg_map(pa, IGC_PCIREG, memtype, 0, &os->os_memt,
572	    &os->os_memh, &os->os_membase, &os->os_memsize)) {
573		aprint_error_dev(sc->sc_dev, "unable to map registers\n");
574		return ENXIO;
575	}
576
577	sc->hw.hw_addr = os->os_membase;
578	sc->hw.back = os;
579
580	return 0;
581}
582
583static int __unused
584igc_adjust_nqueues(struct igc_softc *sc)
585{
586	struct pci_attach_args *pa = &sc->osdep.os_pa;
587	int nqueues = MIN(IGC_MAX_NQUEUES, ncpu);
588
589	const int nmsix = pci_msix_count(pa->pa_pc, pa->pa_tag);
590	if (nmsix <= 1)
591		nqueues = 1;
592	else if (nmsix < nqueues + 1)
593		nqueues = nmsix - 1;
594
595	return nqueues;
596}
597
598static int
599igc_allocate_interrupts(struct igc_softc *sc)
600{
601	struct pci_attach_args *pa = &sc->osdep.os_pa;
602	int error;
603
604#ifndef IGC_DISABLE_MSIX
605	const int nqueues = igc_adjust_nqueues(sc);
606	if (nqueues > 1) {
607		sc->sc_nintrs = nqueues + 1;
608		error = pci_msix_alloc_exact(pa, &sc->sc_intrs, sc->sc_nintrs);
609		if (!error) {
610			sc->sc_nqueues = nqueues;
611			sc->sc_intr_type = PCI_INTR_TYPE_MSIX;
612			return 0;
613		}
614	}
615#endif
616
617	/* fallback to MSI */
618	sc->sc_nintrs = sc->sc_nqueues = 1;
619
620#ifndef IGC_DISABLE_MSI
621	error = pci_msi_alloc_exact(pa, &sc->sc_intrs, sc->sc_nintrs);
622	if (!error) {
623		sc->sc_intr_type = PCI_INTR_TYPE_MSI;
624		return 0;
625	}
626#endif
627
628	/* fallback to INTx */
629
630	error = pci_intx_alloc(pa, &sc->sc_intrs);
631	if (!error) {
632		sc->sc_intr_type = PCI_INTR_TYPE_INTX;
633		return 0;
634	}
635
636	return error;
637}
638
639static int
640igc_allocate_queues(struct igc_softc *sc)
641{
642	device_t dev = sc->sc_dev;
643	int rxconf = 0, txconf = 0;
644
645	/* Allocate the top level queue structs. */
646	sc->queues =
647	    kmem_zalloc(sc->sc_nqueues * sizeof(struct igc_queue), KM_SLEEP);
648
649	/* Allocate the TX ring. */
650	sc->tx_rings =
651	    kmem_zalloc(sc->sc_nqueues * sizeof(struct tx_ring), KM_SLEEP);
652
653	/* Allocate the RX ring. */
654	sc->rx_rings =
655	    kmem_zalloc(sc->sc_nqueues * sizeof(struct rx_ring), KM_SLEEP);
656
657	/* Set up the TX queues. */
658	for (int iq = 0; iq < sc->sc_nqueues; iq++, txconf++) {
659		struct tx_ring *txr = &sc->tx_rings[iq];
660		const int tsize = roundup2(
661		    sc->num_tx_desc * sizeof(union igc_adv_tx_desc),
662		    IGC_DBA_ALIGN);
663
664		txr->sc = sc;
665		txr->txr_igcq = &sc->queues[iq];
666		txr->me = iq;
667		if (igc_dma_malloc(sc, tsize, &txr->txdma)) {
668			aprint_error_dev(dev,
669			    "unable to allocate TX descriptor\n");
670			goto fail;
671		}
672		txr->tx_base = (union igc_adv_tx_desc *)txr->txdma.dma_vaddr;
673		memset(txr->tx_base, 0, tsize);
674	}
675
676	/* Prepare transmit descriptors and buffers. */
677	if (igc_setup_transmit_structures(sc)) {
678		aprint_error_dev(dev, "unable to setup transmit structures\n");
679		goto fail;
680	}
681
682	/* Set up the RX queues. */
683	for (int iq = 0; iq < sc->sc_nqueues; iq++, rxconf++) {
684		struct rx_ring *rxr = &sc->rx_rings[iq];
685		const int rsize = roundup2(
686		    sc->num_rx_desc * sizeof(union igc_adv_rx_desc),
687		    IGC_DBA_ALIGN);
688
689		rxr->sc = sc;
690		rxr->rxr_igcq = &sc->queues[iq];
691		rxr->me = iq;
692#ifdef OPENBSD
693		timeout_set(&rxr->rx_refill, igc_rxrefill, rxr);
694#endif
695		if (igc_dma_malloc(sc, rsize, &rxr->rxdma)) {
696			aprint_error_dev(dev,
697			    "unable to allocate RX descriptor\n");
698			goto fail;
699		}
700		rxr->rx_base = (union igc_adv_rx_desc *)rxr->rxdma.dma_vaddr;
701		memset(rxr->rx_base, 0, rsize);
702	}
703
704	sc->rx_mbuf_sz = MCLBYTES;
705	/* Prepare receive descriptors and buffers. */
706	if (igc_setup_receive_structures(sc)) {
707		aprint_error_dev(sc->sc_dev,
708		    "unable to setup receive structures\n");
709		goto fail;
710	}
711
712	/* Set up the queue holding structs. */
713	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
714		struct igc_queue *q = &sc->queues[iq];
715
716		q->sc = sc;
717		q->txr = &sc->tx_rings[iq];
718		q->rxr = &sc->rx_rings[iq];
719	}
720
721	return 0;
722
723 fail:
724	for (struct rx_ring *rxr = sc->rx_rings; rxconf > 0; rxr++, rxconf--)
725		igc_dma_free(sc, &rxr->rxdma);
726	for (struct tx_ring *txr = sc->tx_rings; txconf > 0; txr++, txconf--)
727		igc_dma_free(sc, &txr->txdma);
728
729	kmem_free(sc->rx_rings, sc->sc_nqueues * sizeof(struct rx_ring));
730	sc->rx_rings = NULL;
731	kmem_free(sc->tx_rings, sc->sc_nqueues * sizeof(struct tx_ring));
732	sc->tx_rings = NULL;
733	kmem_free(sc->queues, sc->sc_nqueues * sizeof(struct igc_queue));
734	sc->queues = NULL;
735
736	return ENOMEM;
737}
738
739static void
740igc_free_pci_resources(struct igc_softc *sc)
741{
742	struct igc_osdep *os = &sc->osdep;
743
744	if (os->os_membase != 0)
745		bus_space_unmap(os->os_memt, os->os_memh, os->os_memsize);
746	os->os_membase = 0;
747}
748
749static void
750igc_free_interrupts(struct igc_softc *sc)
751{
752	struct pci_attach_args *pa = &sc->osdep.os_pa;
753	pci_chipset_tag_t pc = pa->pa_pc;
754
755	for (int i = 0; i < sc->sc_nintrs; i++) {
756		if (sc->sc_ihs[i] != NULL) {
757			pci_intr_disestablish(pc, sc->sc_ihs[i]);
758			sc->sc_ihs[i] = NULL;
759		}
760	}
761	pci_intr_release(pc, sc->sc_intrs, sc->sc_nintrs);
762}
763
764static void
765igc_free_queues(struct igc_softc *sc)
766{
767
768	igc_free_receive_structures(sc);
769	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
770		struct rx_ring *rxr = &sc->rx_rings[iq];
771
772		igc_dma_free(sc, &rxr->rxdma);
773	}
774
775	igc_free_transmit_structures(sc);
776	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
777		struct tx_ring *txr = &sc->tx_rings[iq];
778
779		igc_dma_free(sc, &txr->txdma);
780	}
781
782	kmem_free(sc->rx_rings, sc->sc_nqueues * sizeof(struct rx_ring));
783	kmem_free(sc->tx_rings, sc->sc_nqueues * sizeof(struct tx_ring));
784	kmem_free(sc->queues, sc->sc_nqueues * sizeof(struct igc_queue));
785}
786
787/*********************************************************************
788 *
789 *  Initialize the hardware to a configuration as specified by the
790 *  adapter structure.
791 *
792 **********************************************************************/
793static void
794igc_reset(struct igc_softc *sc)
795{
796	struct igc_hw *hw = &sc->hw;
797
798	/* Let the firmware know the OS is in control */
799	igc_get_hw_control(sc);
800
801	/*
802	 * Packet Buffer Allocation (PBA)
803	 * Writing PBA sets the receive portion of the buffer
804	 * the remainder is used for the transmit buffer.
805	 */
806	const uint32_t pba = IGC_PBA_34K;
807
808	/*
809	 * These parameters control the automatic generation (Tx) and
810	 * response (Rx) to Ethernet PAUSE frames.
811	 * - High water mark should allow for at least two frames to be
812	 *   received after sending an XOFF.
813	 * - Low water mark works best when it is very near the high water mark.
814	 *   This allows the receiver to restart by sending XON when it has
815	 *   drained a bit. Here we use an arbitrary value of 1500 which will
816	 *   restart after one full frame is pulled from the buffer. There
817	 *   could be several smaller frames in the buffer and if so they will
818	 *   not trigger the XON until their total number reduces the buffer
819	 *   by 1500.
820	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
821	 */
822	const uint16_t rx_buffer_size = (pba & 0xffff) << 10;
823
824	hw->fc.high_water = rx_buffer_size -
825	    roundup2(sc->hw.mac.max_frame_size, 1024);
826	/* 16-byte granularity */
827	hw->fc.low_water = hw->fc.high_water - 16;
828
829	if (sc->fc) /* locally set flow control value? */
830		hw->fc.requested_mode = sc->fc;
831	else
832		hw->fc.requested_mode = igc_fc_full;
833
834	hw->fc.pause_time = IGC_FC_PAUSE_TIME;
835
836	hw->fc.send_xon = true;
837
838	/* Issue a global reset */
839	igc_reset_hw(hw);
840	IGC_WRITE_REG(hw, IGC_WUC, 0);
841
842	/* and a re-init */
843	if (igc_init_hw(hw) < 0) {
844		aprint_error_dev(sc->sc_dev, "unable to reset hardware\n");
845		return;
846	}
847
848	/* Setup DMA Coalescing */
849	igc_init_dmac(sc, pba);
850
851	IGC_WRITE_REG(hw, IGC_VET, ETHERTYPE_VLAN);
852	igc_get_phy_info(hw);
853	igc_check_for_link(hw);
854}
855
856/*********************************************************************
857 *
858 *  Initialize the DMA Coalescing feature
859 *
860 **********************************************************************/
861static void
862igc_init_dmac(struct igc_softc *sc, uint32_t pba)
863{
864	struct igc_hw *hw = &sc->hw;
865	const uint16_t max_frame_size = sc->hw.mac.max_frame_size;
866	uint32_t reg, status;
867
868	if (sc->dmac == 0) { /* Disabling it */
869		reg = ~IGC_DMACR_DMAC_EN;	/* XXXRO */
870		IGC_WRITE_REG(hw, IGC_DMACR, reg);
871		DPRINTF(MISC, "DMA coalescing disabled\n");
872		return;
873	} else {
874		device_printf(sc->sc_dev, "DMA coalescing enabled\n");
875	}
876
877	/* Set starting threshold */
878	IGC_WRITE_REG(hw, IGC_DMCTXTH, 0);
879
880	uint16_t hwm = 64 * pba - max_frame_size / 16;
881	if (hwm < 64 * (pba - 6))
882		hwm = 64 * (pba - 6);
883	reg = IGC_READ_REG(hw, IGC_FCRTC);
884	reg &= ~IGC_FCRTC_RTH_COAL_MASK;
885	reg |= (hwm << IGC_FCRTC_RTH_COAL_SHIFT) & IGC_FCRTC_RTH_COAL_MASK;
886	IGC_WRITE_REG(hw, IGC_FCRTC, reg);
887
888	uint32_t dmac = pba - max_frame_size / 512;
889	if (dmac < pba - 10)
890		dmac = pba - 10;
891	reg = IGC_READ_REG(hw, IGC_DMACR);
892	reg &= ~IGC_DMACR_DMACTHR_MASK;
893	reg |= (dmac << IGC_DMACR_DMACTHR_SHIFT) & IGC_DMACR_DMACTHR_MASK;
894
895	/* transition to L0x or L1 if available..*/
896	reg |= IGC_DMACR_DMAC_EN | IGC_DMACR_DMAC_LX_MASK;
897
898	/* Check if status is 2.5Gb backplane connection
899	 * before configuration of watchdog timer, which is
900	 * in msec values in 12.8usec intervals
901	 * watchdog timer= msec values in 32usec intervals
902	 * for non 2.5Gb connection
903	 */
904	status = IGC_READ_REG(hw, IGC_STATUS);
905	if ((status & IGC_STATUS_2P5_SKU) &&
906	    !(status & IGC_STATUS_2P5_SKU_OVER))
907		reg |= (sc->dmac * 5) >> 6;
908	else
909		reg |= sc->dmac >> 5;
910
911	IGC_WRITE_REG(hw, IGC_DMACR, reg);
912
913	IGC_WRITE_REG(hw, IGC_DMCRTRH, 0);
914
915	/* Set the interval before transition */
916	reg = IGC_READ_REG(hw, IGC_DMCTLX);
917	reg |= IGC_DMCTLX_DCFLUSH_DIS;
918
919	/*
920	 * in 2.5Gb connection, TTLX unit is 0.4 usec
921	 * which is 0x4*2 = 0xA. But delay is still 4 usec
922	 */
923	status = IGC_READ_REG(hw, IGC_STATUS);
924	if ((status & IGC_STATUS_2P5_SKU) &&
925	    !(status & IGC_STATUS_2P5_SKU_OVER))
926		reg |= 0xA;
927	else
928		reg |= 0x4;
929
930	IGC_WRITE_REG(hw, IGC_DMCTLX, reg);
931
932	/* free space in tx packet buffer to wake from DMA coal */
933	IGC_WRITE_REG(hw, IGC_DMCTXTH,
934	    (IGC_TXPBSIZE - (2 * max_frame_size)) >> 6);
935
936	/* make low power state decision controlled by DMA coal */
937	reg = IGC_READ_REG(hw, IGC_PCIEMISC);
938	reg &= ~IGC_PCIEMISC_LX_DECISION;
939	IGC_WRITE_REG(hw, IGC_PCIEMISC, reg);
940}
941
942static int
943igc_setup_interrupts(struct igc_softc *sc)
944{
945	int error;
946
947	switch (sc->sc_intr_type) {
948	case PCI_INTR_TYPE_MSIX:
949		error = igc_setup_msix(sc);
950		break;
951	case PCI_INTR_TYPE_MSI:
952		error = igc_setup_msi(sc);
953		break;
954	case PCI_INTR_TYPE_INTX:
955		error = igc_setup_intx(sc);
956		break;
957	default:
958		panic("%s: invalid interrupt type: %d",
959		    device_xname(sc->sc_dev), sc->sc_intr_type);
960	}
961
962	return error;
963}
964
965static void
966igc_attach_counters(struct igc_softc *sc)
967{
968#ifdef IGC_EVENT_COUNTERS
969
970	/* Global counters */
971	sc->sc_global_evcnts = kmem_zalloc(
972	    IGC_GLOBAL_COUNTERS * sizeof(sc->sc_global_evcnts[0]), KM_SLEEP);
973
974	for (int cnt = 0; cnt < IGC_GLOBAL_COUNTERS; cnt++) {
975		evcnt_attach_dynamic(&sc->sc_global_evcnts[cnt],
976		    igc_global_counters[cnt].type, NULL,
977		    device_xname(sc->sc_dev), igc_global_counters[cnt].name);
978	}
979
980	/* Driver counters */
981	sc->sc_driver_evcnts = kmem_zalloc(
982	    IGC_DRIVER_COUNTERS * sizeof(sc->sc_driver_evcnts[0]), KM_SLEEP);
983
984	for (int cnt = 0; cnt < IGC_DRIVER_COUNTERS; cnt++) {
985		evcnt_attach_dynamic(&sc->sc_driver_evcnts[cnt],
986		    igc_driver_counters[cnt].type, NULL,
987		    device_xname(sc->sc_dev), igc_driver_counters[cnt].name);
988	}
989
990	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
991		struct igc_queue *q = &sc->queues[iq];
992
993		q->igcq_driver_counters = kmem_zalloc(
994		    IGC_DRIVER_COUNTERS * sizeof(q->igcq_driver_counters[0]),
995		    KM_SLEEP);
996	}
997
998	/* Queue counters */
999	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
1000		struct igc_queue *q = &sc->queues[iq];
1001
1002		snprintf(q->igcq_queue_evname, sizeof(q->igcq_queue_evname),
1003		    "%s q%d", device_xname(sc->sc_dev), iq);
1004
1005		q->igcq_queue_evcnts = kmem_zalloc(
1006		    IGC_QUEUE_COUNTERS * sizeof(q->igcq_queue_evcnts[0]),
1007		    KM_SLEEP);
1008
1009		for (int cnt = 0; cnt < IGC_QUEUE_COUNTERS; cnt++) {
1010			evcnt_attach_dynamic(&q->igcq_queue_evcnts[cnt],
1011			    igc_queue_counters[cnt].type, NULL,
1012			    q->igcq_queue_evname, igc_queue_counters[cnt].name);
1013		}
1014	}
1015
1016	/* MAC counters */
1017	snprintf(sc->sc_mac_evname, sizeof(sc->sc_mac_evname),
1018	    "%s Mac Statistics", device_xname(sc->sc_dev));
1019
1020	sc->sc_mac_evcnts = kmem_zalloc(
1021	    IGC_MAC_COUNTERS * sizeof(sc->sc_mac_evcnts[0]), KM_SLEEP);
1022
1023	for (int cnt = 0; cnt < IGC_MAC_COUNTERS; cnt++) {
1024		evcnt_attach_dynamic(&sc->sc_mac_evcnts[cnt], EVCNT_TYPE_MISC,
1025		    NULL, sc->sc_mac_evname, igc_mac_counters[cnt].name);
1026	}
1027#endif
1028}
1029
1030static void
1031igc_detach_counters(struct igc_softc *sc)
1032{
1033#ifdef IGC_EVENT_COUNTERS
1034
1035	/* Global counters */
1036	for (int cnt = 0; cnt < IGC_GLOBAL_COUNTERS; cnt++)
1037		evcnt_detach(&sc->sc_global_evcnts[cnt]);
1038
1039	kmem_free(sc->sc_global_evcnts,
1040	    IGC_GLOBAL_COUNTERS * sizeof(sc->sc_global_evcnts));
1041
1042	/* Driver counters */
1043	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
1044		struct igc_queue *q = &sc->queues[iq];
1045
1046		kmem_free(q->igcq_driver_counters,
1047		    IGC_DRIVER_COUNTERS * sizeof(q->igcq_driver_counters[0]));
1048	}
1049
1050	for (int cnt = 0; cnt < IGC_DRIVER_COUNTERS; cnt++)
1051		evcnt_detach(&sc->sc_driver_evcnts[cnt]);
1052
1053	kmem_free(sc->sc_driver_evcnts,
1054	    IGC_DRIVER_COUNTERS * sizeof(sc->sc_driver_evcnts));
1055
1056	/* Queue counters */
1057	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
1058		struct igc_queue *q = &sc->queues[iq];
1059
1060		for (int cnt = 0; cnt < IGC_QUEUE_COUNTERS; cnt++)
1061			evcnt_detach(&q->igcq_queue_evcnts[cnt]);
1062
1063		kmem_free(q->igcq_queue_evcnts,
1064		    IGC_QUEUE_COUNTERS * sizeof(q->igcq_queue_evcnts[0]));
1065	}
1066
1067	/* MAC statistics */
1068	for (int cnt = 0; cnt < IGC_MAC_COUNTERS; cnt++)
1069		evcnt_detach(&sc->sc_mac_evcnts[cnt]);
1070
1071	kmem_free(sc->sc_mac_evcnts,
1072	    IGC_MAC_COUNTERS * sizeof(sc->sc_mac_evcnts[0]));
1073#endif
1074}
1075
1076/*
1077 * XXX
1078 * FreeBSD uses 4-byte-wise read for 64-bit counters, while Linux just
1079 * drops hi words.
1080 */
1081static inline uint64_t __unused
1082igc_read_mac_counter(struct igc_hw *hw, bus_size_t reg, bool is64)
1083{
1084	uint64_t val;
1085
1086	val = IGC_READ_REG(hw, reg);
1087	if (is64)
1088		val += ((uint64_t)IGC_READ_REG(hw, reg + 4)) << 32;
1089	return val;
1090}
1091
1092static void
1093igc_update_counters(struct igc_softc *sc)
1094{
1095#ifdef IGC_EVENT_COUNTERS
1096
1097	/* Global counters: nop */
1098
1099	/* Driver counters */
1100	uint64_t sum[IGC_DRIVER_COUNTERS];
1101
1102	memset(sum, 0, sizeof(sum));
1103	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
1104		struct igc_queue *q = &sc->queues[iq];
1105
1106		for (int cnt = 0; cnt < IGC_DRIVER_COUNTERS; cnt++) {
1107			sum[cnt] += IGC_QUEUE_DRIVER_COUNTER_VAL(q, cnt);
1108			IGC_QUEUE_DRIVER_COUNTER_STORE(q, cnt, 0);
1109		}
1110	}
1111
1112	for (int cnt = 0; cnt < IGC_DRIVER_COUNTERS; cnt++)
1113		IGC_DRIVER_COUNTER_ADD(sc, cnt, sum[cnt]);
1114
1115	/* Queue counters: nop */
1116
1117	/* Mac statistics */
1118	struct igc_hw *hw = &sc->hw;
1119	struct ifnet *ifp = &sc->sc_ec.ec_if;
1120	uint64_t iqdrops = 0;
1121
1122	for (int cnt = 0; cnt < IGC_MAC_COUNTERS; cnt++) {
1123		uint64_t val;
1124		bus_size_t regaddr = igc_mac_counters[cnt].reg;
1125
1126		val = igc_read_mac_counter(hw, regaddr,
1127		    igc_mac_counters[cnt].is64);
1128		IGC_MAC_COUNTER_ADD(sc, cnt, val);
1129		/* XXX Count MPC to iqdrops. */
1130		if (regaddr == IGC_MPC)
1131			iqdrops += val;
1132	}
1133
1134	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
1135		uint32_t val;
1136
1137		/* XXX RQDPC should be visible via evcnt(9). */
1138		val = IGC_READ_REG(hw, IGC_RQDPC(iq));
1139
1140		/* RQDPC is not cleard on read. */
1141		if (val != 0)
1142			IGC_WRITE_REG(hw, IGC_RQDPC(iq), 0);
1143		iqdrops += val;
1144	}
1145
1146	if (iqdrops != 0)
1147		if_statadd(ifp, if_iqdrops, iqdrops);
1148#endif
1149}
1150
1151static void
1152igc_clear_counters(struct igc_softc *sc)
1153{
1154#ifdef IGC_EVENT_COUNTERS
1155
1156	/* Global counters */
1157	for (int cnt = 0; cnt < IGC_GLOBAL_COUNTERS; cnt++)
1158		IGC_GLOBAL_COUNTER_STORE(sc, cnt, 0);
1159
1160	/* Driver counters */
1161	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
1162		struct igc_queue *q = &sc->queues[iq];
1163
1164		for (int cnt = 0; cnt < IGC_DRIVER_COUNTERS; cnt++)
1165			IGC_QUEUE_DRIVER_COUNTER_STORE(q, cnt, 0);
1166	}
1167
1168	for (int cnt = 0; cnt < IGC_DRIVER_COUNTERS; cnt++)
1169		IGC_DRIVER_COUNTER_STORE(sc, cnt, 0);
1170
1171	/* Queue counters */
1172	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
1173		struct igc_queue *q = &sc->queues[iq];
1174
1175		for (int cnt = 0; cnt < IGC_QUEUE_COUNTERS; cnt++)
1176			IGC_QUEUE_COUNTER_STORE(q, cnt, 0);
1177	}
1178
1179	/* Mac statistics */
1180	struct igc_hw *hw = &sc->hw;
1181
1182	for (int cnt = 0; cnt < IGC_MAC_COUNTERS; cnt++) {
1183		(void)igc_read_mac_counter(hw, igc_mac_counters[cnt].reg,
1184		    igc_mac_counters[cnt].is64);
1185		IGC_MAC_COUNTER_STORE(sc, cnt, 0);
1186	}
1187#endif
1188}
1189
1190static int
1191igc_setup_msix(struct igc_softc *sc)
1192{
1193	pci_chipset_tag_t pc = sc->osdep.os_pa.pa_pc;
1194	device_t dev = sc->sc_dev;
1195	pci_intr_handle_t *intrs;
1196	void **ihs;
1197	const char *intrstr;
1198	char intrbuf[PCI_INTRSTR_LEN];
1199	char xnamebuf[MAX(32, MAXCOMLEN)];
1200	int iq, error;
1201
1202	for (iq = 0, intrs = sc->sc_intrs, ihs = sc->sc_ihs;
1203	    iq < sc->sc_nqueues; iq++, intrs++, ihs++) {
1204		struct igc_queue *q = &sc->queues[iq];
1205
1206		snprintf(xnamebuf, sizeof(xnamebuf), "%s: txrx %d",
1207		    device_xname(dev), iq);
1208
1209		intrstr = pci_intr_string(pc, *intrs, intrbuf, sizeof(intrbuf));
1210
1211		pci_intr_setattr(pc, intrs, PCI_INTR_MPSAFE, true);
1212		*ihs = pci_intr_establish_xname(pc, *intrs, IPL_NET,
1213		    igc_intr_queue, q, xnamebuf);
1214		if (*ihs == NULL) {
1215			aprint_error_dev(dev,
1216			    "unable to establish txrx interrupt at %s\n",
1217			    intrstr);
1218			return ENOBUFS;
1219		}
1220		aprint_normal_dev(dev, "txrx interrupting at %s\n", intrstr);
1221
1222		kcpuset_t *affinity;
1223		kcpuset_create(&affinity, true);
1224		kcpuset_set(affinity, iq % ncpu);
1225		error = interrupt_distribute(*ihs, affinity, NULL);
1226		if (error) {
1227			aprint_normal_dev(dev,
1228			    "%s: unable to change affinity, use default CPU\n",
1229			    intrstr);
1230		}
1231		kcpuset_destroy(affinity);
1232
1233		q->igcq_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
1234		    igc_handle_queue, q);
1235		if (q->igcq_si == NULL) {
1236			aprint_error_dev(dev,
1237			    "%s: unable to establish softint\n", intrstr);
1238			return ENOBUFS;
1239		}
1240
1241		q->msix = iq;
1242		q->eims = 1 << iq;
1243	}
1244
1245	snprintf(xnamebuf, MAXCOMLEN, "%s_tx_rx", device_xname(dev));
1246	error = workqueue_create(&sc->sc_queue_wq, xnamebuf,
1247	    igc_handle_queue_work, sc, IGC_WORKQUEUE_PRI, IPL_NET,
1248	    WQ_PERCPU | WQ_MPSAFE);
1249	if (error) {
1250		aprint_error_dev(dev, "workqueue_create failed\n");
1251		return ENOBUFS;
1252	}
1253	sc->sc_txrx_workqueue = false;
1254
1255	intrstr = pci_intr_string(pc, *intrs, intrbuf, sizeof(intrbuf));
1256	snprintf(xnamebuf, sizeof(xnamebuf), "%s: link", device_xname(dev));
1257	pci_intr_setattr(pc, intrs, PCI_INTR_MPSAFE, true);
1258	*ihs = pci_intr_establish_xname(pc, *intrs, IPL_NET,
1259	    igc_intr_link, sc, xnamebuf);
1260	if (*ihs == NULL) {
1261		aprint_error_dev(dev,
1262		    "unable to establish link interrupt at %s\n", intrstr);
1263		return ENOBUFS;
1264	}
1265	aprint_normal_dev(dev, "link interrupting at %s\n", intrstr);
1266	/* use later in igc_configure_queues() */
1267	sc->linkvec = iq;
1268
1269	return 0;
1270}
1271
1272static int
1273igc_setup_msi(struct igc_softc *sc)
1274{
1275	pci_chipset_tag_t pc = sc->osdep.os_pa.pa_pc;
1276	device_t dev = sc->sc_dev;
1277	pci_intr_handle_t *intr = sc->sc_intrs;
1278	void **ihs = sc->sc_ihs;
1279	const char *intrstr;
1280	char intrbuf[PCI_INTRSTR_LEN];
1281	char xnamebuf[MAX(32, MAXCOMLEN)];
1282	int error;
1283
1284	intrstr = pci_intr_string(pc, *intr, intrbuf, sizeof(intrbuf));
1285
1286	snprintf(xnamebuf, sizeof(xnamebuf), "%s: msi", device_xname(dev));
1287	pci_intr_setattr(pc, intr, PCI_INTR_MPSAFE, true);
1288	*ihs = pci_intr_establish_xname(pc, *intr, IPL_NET,
1289	    igc_intr, sc, xnamebuf);
1290	if (*ihs == NULL) {
1291		aprint_error_dev(dev,
1292		    "unable to establish interrupt at %s\n", intrstr);
1293		return ENOBUFS;
1294	}
1295	aprint_normal_dev(dev, "interrupting at %s\n", intrstr);
1296
1297	struct igc_queue *iq = sc->queues;
1298	iq->igcq_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
1299	    igc_handle_queue, iq);
1300	if (iq->igcq_si == NULL) {
1301		aprint_error_dev(dev,
1302		    "%s: unable to establish softint\n", intrstr);
1303		return ENOBUFS;
1304	}
1305
1306	snprintf(xnamebuf, MAXCOMLEN, "%s_tx_rx", device_xname(dev));
1307	error = workqueue_create(&sc->sc_queue_wq, xnamebuf,
1308	    igc_handle_queue_work, sc, IGC_WORKQUEUE_PRI, IPL_NET,
1309	    WQ_PERCPU | WQ_MPSAFE);
1310	if (error) {
1311		aprint_error_dev(dev, "workqueue_create failed\n");
1312		return ENOBUFS;
1313	}
1314	sc->sc_txrx_workqueue = false;
1315
1316	sc->queues[0].msix = 0;
1317	sc->linkvec = 0;
1318
1319	return 0;
1320}
1321
1322static int
1323igc_setup_intx(struct igc_softc *sc)
1324{
1325	pci_chipset_tag_t pc = sc->osdep.os_pa.pa_pc;
1326	device_t dev = sc->sc_dev;
1327	pci_intr_handle_t *intr = sc->sc_intrs;
1328	void **ihs = sc->sc_ihs;
1329	const char *intrstr;
1330	char intrbuf[PCI_INTRSTR_LEN];
1331	char xnamebuf[32];
1332
1333	intrstr = pci_intr_string(pc, *intr, intrbuf, sizeof(intrbuf));
1334
1335	snprintf(xnamebuf, sizeof(xnamebuf), "%s:intx", device_xname(dev));
1336	pci_intr_setattr(pc, intr, PCI_INTR_MPSAFE, true);
1337	*ihs = pci_intr_establish_xname(pc, *intr, IPL_NET,
1338	    igc_intr, sc, xnamebuf);
1339	if (*ihs == NULL) {
1340		aprint_error_dev(dev,
1341		    "unable to establish interrupt at %s\n", intrstr);
1342		return ENOBUFS;
1343	}
1344	aprint_normal_dev(dev, "interrupting at %s\n", intrstr);
1345
1346	struct igc_queue *iq = sc->queues;
1347	iq->igcq_si = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
1348	    igc_handle_queue, iq);
1349	if (iq->igcq_si == NULL) {
1350		aprint_error_dev(dev,
1351		    "%s: unable to establish softint\n", intrstr);
1352		return ENOBUFS;
1353	}
1354
1355	/* create workqueue? */
1356	sc->sc_txrx_workqueue = false;
1357
1358	sc->queues[0].msix = 0;
1359	sc->linkvec = 0;
1360
1361	return 0;
1362}
1363
1364static int
1365igc_dma_malloc(struct igc_softc *sc, bus_size_t size, struct igc_dma_alloc *dma)
1366{
1367	struct igc_osdep *os = &sc->osdep;
1368
1369	dma->dma_tag = os->os_dmat;
1370
1371	if (bus_dmamap_create(dma->dma_tag, size, 1, size, 0,
1372	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &dma->dma_map))
1373		return 1;
1374	if (bus_dmamem_alloc(dma->dma_tag, size, PAGE_SIZE, 0, &dma->dma_seg,
1375	    1, &dma->dma_nseg, BUS_DMA_WAITOK))
1376		goto destroy;
1377	/*
1378	 * XXXRO
1379	 *
1380	 * Coherent mapping for descriptors is required for now.
1381	 *
1382	 * Both TX and RX descriptors are 16-byte length, which is shorter
1383	 * than dcache lines on modern CPUs. Therefore, sync for a descriptor
1384	 * may overwrite DMA read for descriptors in the same cache line.
1385	 *
1386	 * Can't we avoid this by use cache-line-aligned descriptors at once?
1387	 */
1388	if (bus_dmamem_map(dma->dma_tag, &dma->dma_seg, dma->dma_nseg, size,
1389	    &dma->dma_vaddr, BUS_DMA_WAITOK | BUS_DMA_COHERENT /* XXXRO */))
1390		goto free;
1391	if (bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size,
1392	    NULL, BUS_DMA_WAITOK))
1393		goto unmap;
1394
1395	dma->dma_size = size;
1396
1397	return 0;
1398 unmap:
1399	bus_dmamem_unmap(dma->dma_tag, dma->dma_vaddr, size);
1400 free:
1401	bus_dmamem_free(dma->dma_tag, &dma->dma_seg, dma->dma_nseg);
1402 destroy:
1403	bus_dmamap_destroy(dma->dma_tag, dma->dma_map);
1404	dma->dma_map = NULL;
1405	dma->dma_tag = NULL;
1406	return 1;
1407}
1408
1409static void
1410igc_dma_free(struct igc_softc *sc, struct igc_dma_alloc *dma)
1411{
1412
1413	if (dma->dma_tag == NULL)
1414		return;
1415
1416	if (dma->dma_map != NULL) {
1417		bus_dmamap_sync(dma->dma_tag, dma->dma_map, 0,
1418		    dma->dma_map->dm_mapsize,
1419		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1420		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
1421		bus_dmamem_unmap(dma->dma_tag, dma->dma_vaddr, dma->dma_size);
1422		bus_dmamem_free(dma->dma_tag, &dma->dma_seg, dma->dma_nseg);
1423		bus_dmamap_destroy(dma->dma_tag, dma->dma_map);
1424		dma->dma_map = NULL;
1425	}
1426}
1427
1428/*********************************************************************
1429 *
1430 *  Setup networking device structure and register an interface.
1431 *
1432 **********************************************************************/
1433static void
1434igc_setup_interface(struct igc_softc *sc)
1435{
1436	struct ifnet *ifp = &sc->sc_ec.ec_if;
1437
1438	strlcpy(ifp->if_xname, device_xname(sc->sc_dev), sizeof(ifp->if_xname));
1439	ifp->if_softc = sc;
1440	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1441	ifp->if_extflags = IFEF_MPSAFE;
1442	ifp->if_ioctl = igc_ioctl;
1443	ifp->if_start = igc_start;
1444	if (sc->sc_nqueues > 1)
1445		ifp->if_transmit = igc_transmit;
1446	ifp->if_watchdog = igc_watchdog;
1447	ifp->if_init = igc_init;
1448	ifp->if_stop = igc_stop;
1449
1450#if 0 /* notyet */
1451	ifp->if_capabilities = IFCAP_TSOv4 | IFCAP_TSOv6;
1452#endif
1453
1454	ifp->if_capabilities |=
1455	    IFCAP_CSUM_IPv4_Tx  | IFCAP_CSUM_IPv4_Rx  |
1456	    IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_TCPv4_Rx |
1457	    IFCAP_CSUM_UDPv4_Tx | IFCAP_CSUM_UDPv4_Rx |
1458	    IFCAP_CSUM_TCPv6_Tx | IFCAP_CSUM_TCPv6_Rx |
1459	    IFCAP_CSUM_UDPv6_Tx | IFCAP_CSUM_UDPv6_Rx;
1460
1461	ifp->if_capenable = 0;
1462
1463	sc->sc_ec.ec_capabilities |=
1464	    ETHERCAP_JUMBO_MTU | ETHERCAP_VLAN_MTU;
1465
1466	IFQ_SET_MAXLEN(&ifp->if_snd, sc->num_tx_desc - 1);
1467	IFQ_SET_READY(&ifp->if_snd);
1468
1469#if NVLAN > 0
1470	sc->sc_ec.ec_capabilities |=  ETHERCAP_VLAN_HWTAGGING;
1471#endif
1472
1473	mutex_init(&sc->sc_core_lock, MUTEX_DEFAULT, IPL_NET);
1474
1475	/* Initialize ifmedia structures. */
1476	sc->sc_ec.ec_ifmedia = &sc->media;
1477	ifmedia_init_with_lock(&sc->media, IFM_IMASK, igc_media_change,
1478	    igc_media_status, &sc->sc_core_lock);
1479	ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T, 0, NULL);
1480	ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T | IFM_FDX, 0, NULL);
1481	ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX, 0, NULL);
1482	ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL);
1483	ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
1484	ifmedia_add(&sc->media, IFM_ETHER | IFM_2500_T | IFM_FDX, 0, NULL);
1485	ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
1486	ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO);
1487
1488	sc->sc_rx_intr_process_limit = IGC_RX_INTR_PROCESS_LIMIT_DEFAULT;
1489	sc->sc_tx_intr_process_limit = IGC_TX_INTR_PROCESS_LIMIT_DEFAULT;
1490	sc->sc_rx_process_limit = IGC_RX_PROCESS_LIMIT_DEFAULT;
1491	sc->sc_tx_process_limit = IGC_TX_PROCESS_LIMIT_DEFAULT;
1492
1493	if_initialize(ifp);
1494	sc->sc_ipq = if_percpuq_create(ifp);
1495	if_deferred_start_init(ifp, NULL);
1496	ether_ifattach(ifp, sc->hw.mac.addr);
1497	ether_set_ifflags_cb(&sc->sc_ec, igc_ifflags_cb);
1498	if_register(ifp);
1499}
1500
1501static int
1502igc_init(struct ifnet *ifp)
1503{
1504	struct igc_softc *sc = ifp->if_softc;
1505	int error;
1506
1507	mutex_enter(&sc->sc_core_lock);
1508	error = igc_init_locked(sc);
1509	mutex_exit(&sc->sc_core_lock);
1510
1511	return error;
1512}
1513
1514static int
1515igc_init_locked(struct igc_softc *sc)
1516{
1517	struct ethercom *ec = &sc->sc_ec;
1518	struct ifnet *ifp = &ec->ec_if;
1519
1520	DPRINTF(CFG, "called\n");
1521
1522	KASSERT(mutex_owned(&sc->sc_core_lock));
1523
1524	if (ISSET(ifp->if_flags, IFF_RUNNING))
1525		igc_stop_locked(sc);
1526
1527	/* Put the address into the receive address array. */
1528	igc_rar_set(&sc->hw, sc->hw.mac.addr, 0);
1529
1530	/* Initialize the hardware. */
1531	igc_reset(sc);
1532	igc_update_link_status(sc);
1533
1534	/* Setup VLAN support, basic and offload if available. */
1535	IGC_WRITE_REG(&sc->hw, IGC_VET, ETHERTYPE_VLAN);
1536
1537	igc_initialize_transmit_unit(sc);
1538	igc_initialize_receive_unit(sc);
1539
1540	if (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) {
1541		uint32_t ctrl = IGC_READ_REG(&sc->hw, IGC_CTRL);
1542		ctrl |= IGC_CTRL_VME;
1543		IGC_WRITE_REG(&sc->hw, IGC_CTRL, ctrl);
1544	}
1545
1546	/* Setup multicast table. */
1547	igc_set_filter(sc);
1548
1549	igc_clear_hw_cntrs_base_generic(&sc->hw);
1550
1551	if (sc->sc_intr_type == PCI_INTR_TYPE_MSIX)
1552		igc_configure_queues(sc);
1553
1554	/* This clears any pending interrupts */
1555	IGC_READ_REG(&sc->hw, IGC_ICR);
1556	IGC_WRITE_REG(&sc->hw, IGC_ICS, IGC_ICS_LSC);
1557
1558	/* The driver can now take control from firmware. */
1559	igc_get_hw_control(sc);
1560
1561	/* Set Energy Efficient Ethernet. */
1562	igc_set_eee_i225(&sc->hw, true, true, true);
1563
1564	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
1565		struct rx_ring *rxr = &sc->rx_rings[iq];
1566
1567		mutex_enter(&rxr->rxr_lock);
1568		igc_rxfill(rxr);
1569		mutex_exit(&rxr->rxr_lock);
1570	}
1571
1572	sc->sc_core_stopping = false;
1573
1574	ifp->if_flags |= IFF_RUNNING;
1575
1576	/* Save last flags for the callback */
1577	sc->sc_if_flags = ifp->if_flags;
1578
1579	callout_schedule(&sc->sc_tick_ch, hz);
1580
1581	igc_enable_intr(sc);
1582
1583	return 0;
1584}
1585
1586static inline int
1587igc_load_mbuf(struct igc_queue *q, bus_dma_tag_t dmat, bus_dmamap_t map,
1588    struct mbuf *m)
1589{
1590	int error;
1591
1592	error = bus_dmamap_load_mbuf(dmat, map, m,
1593	    BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1594
1595	if (__predict_false(error == EFBIG)) {
1596		IGC_DRIVER_EVENT(q, txdma_efbig, 1);
1597		m = m_defrag(m, M_NOWAIT);
1598		if (__predict_false(m == NULL)) {
1599			IGC_DRIVER_EVENT(q, txdma_defrag, 1);
1600			return ENOBUFS;
1601		}
1602		error = bus_dmamap_load_mbuf(dmat, map, m,
1603		    BUS_DMA_WRITE | BUS_DMA_NOWAIT);
1604	}
1605
1606	switch (error) {
1607	case 0:
1608		break;
1609	case ENOMEM:
1610		IGC_DRIVER_EVENT(q, txdma_enomem, 1);
1611		break;
1612	case EINVAL:
1613		IGC_DRIVER_EVENT(q, txdma_einval, 1);
1614		break;
1615	case EAGAIN:
1616		IGC_DRIVER_EVENT(q, txdma_eagain, 1);
1617		break;
1618	default:
1619		IGC_DRIVER_EVENT(q, txdma_other, 1);
1620		break;
1621	}
1622
1623	return error;
1624}
1625
1626#define IGC_TX_START	1
1627#define IGC_TX_TRANSMIT	2
1628
1629static void
1630igc_start(struct ifnet *ifp)
1631{
1632	struct igc_softc *sc = ifp->if_softc;
1633
1634	if (__predict_false(!sc->link_active)) {
1635		IFQ_PURGE(&ifp->if_snd);
1636		return;
1637	}
1638
1639	struct tx_ring *txr = &sc->tx_rings[0]; /* queue 0 */
1640	mutex_enter(&txr->txr_lock);
1641	igc_tx_common_locked(ifp, txr, IGC_TX_START);
1642	mutex_exit(&txr->txr_lock);
1643}
1644
1645static inline u_int
1646igc_select_txqueue(struct igc_softc *sc, struct mbuf *m __unused)
1647{
1648	const u_int cpuid = cpu_index(curcpu());
1649
1650	return cpuid % sc->sc_nqueues;
1651}
1652
1653static int
1654igc_transmit(struct ifnet *ifp, struct mbuf *m)
1655{
1656	struct igc_softc *sc = ifp->if_softc;
1657	const u_int qid = igc_select_txqueue(sc, m);
1658	struct tx_ring *txr = &sc->tx_rings[qid];
1659	struct igc_queue *q = txr->txr_igcq;
1660
1661	if (__predict_false(!pcq_put(txr->txr_interq, m))) {
1662		IGC_QUEUE_EVENT(q, tx_pcq_drop, 1);
1663		m_freem(m);
1664		return ENOBUFS;
1665	}
1666
1667	mutex_enter(&txr->txr_lock);
1668	igc_tx_common_locked(ifp, txr, IGC_TX_TRANSMIT);
1669	mutex_exit(&txr->txr_lock);
1670
1671	return 0;
1672}
1673
1674static void
1675igc_tx_common_locked(struct ifnet *ifp, struct tx_ring *txr, int caller)
1676{
1677	struct igc_softc *sc = ifp->if_softc;
1678	struct igc_queue *q = txr->txr_igcq;
1679	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
1680	int prod, free, last = -1;
1681	bool post = false;
1682
1683	prod = txr->next_avail_desc;
1684	free = txr->next_to_clean;
1685	if (free <= prod)
1686		free += sc->num_tx_desc;
1687	free -= prod;
1688
1689	DPRINTF(TX, "%s: begin: msix %d prod %d n2c %d free %d\n",
1690	    caller == IGC_TX_TRANSMIT ? "transmit" : "start",
1691	    txr->me, prod, txr->next_to_clean, free);
1692
1693	for (;;) {
1694		struct mbuf *m;
1695
1696		if (__predict_false(free <= IGC_MAX_SCATTER)) {
1697			IGC_QUEUE_EVENT(q, tx_no_desc, 1);
1698			break;
1699		}
1700
1701		if (caller == IGC_TX_TRANSMIT)
1702			m = pcq_get(txr->txr_interq);
1703		else
1704			IFQ_DEQUEUE(&ifp->if_snd, m);
1705		if (__predict_false(m == NULL))
1706			break;
1707
1708		struct igc_tx_buf *txbuf = &txr->tx_buffers[prod];
1709		bus_dmamap_t map = txbuf->map;
1710
1711		if (__predict_false(
1712		    igc_load_mbuf(q, txr->txdma.dma_tag, map, m))) {
1713			if (caller == IGC_TX_TRANSMIT)
1714				IGC_QUEUE_EVENT(q, tx_pcq_drop, 1);
1715			m_freem(m);
1716			if_statinc_ref(nsr, if_oerrors);
1717			continue;
1718		}
1719
1720		bus_dmamap_sync(txr->txdma.dma_tag, map, 0,
1721		    map->dm_mapsize, BUS_DMASYNC_PREWRITE);
1722
1723		uint32_t ctx_cmd_type_len = 0, olinfo_status = 0;
1724		if (igc_tx_ctx_setup(txr, m, prod, &ctx_cmd_type_len,
1725		    &olinfo_status)) {
1726			IGC_QUEUE_EVENT(q, tx_ctx, 1);
1727			/* Consume the first descriptor */
1728			prod = igc_txdesc_incr(sc, prod);
1729			free--;
1730		}
1731		for (int i = 0; i < map->dm_nsegs; i++) {
1732			union igc_adv_tx_desc *txdesc = &txr->tx_base[prod];
1733
1734			uint32_t cmd_type_len = ctx_cmd_type_len |
1735			    IGC_ADVTXD_DCMD_IFCS | IGC_ADVTXD_DTYP_DATA |
1736			    IGC_ADVTXD_DCMD_DEXT | map->dm_segs[i].ds_len;
1737			if (i == map->dm_nsegs - 1) {
1738				cmd_type_len |=
1739				    IGC_ADVTXD_DCMD_EOP | IGC_ADVTXD_DCMD_RS;
1740			}
1741
1742			igc_txdesc_sync(txr, prod,
1743			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1744			htolem64(&txdesc->read.buffer_addr,
1745			    map->dm_segs[i].ds_addr);
1746			htolem32(&txdesc->read.cmd_type_len, cmd_type_len);
1747			htolem32(&txdesc->read.olinfo_status, olinfo_status);
1748			igc_txdesc_sync(txr, prod,
1749			    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1750
1751			last = prod;
1752			prod = igc_txdesc_incr(sc, prod);
1753		}
1754
1755		txbuf->m_head = m;
1756		txbuf->eop_index = last;
1757
1758		bpf_mtap(ifp, m, BPF_D_OUT);
1759
1760		if_statadd_ref(nsr, if_obytes, m->m_pkthdr.len);
1761		if (m->m_flags & M_MCAST)
1762			if_statinc_ref(nsr, if_omcasts);
1763		IGC_QUEUE_EVENT(q, tx_packets, 1);
1764		IGC_QUEUE_EVENT(q, tx_bytes, m->m_pkthdr.len);
1765
1766		free -= map->dm_nsegs;
1767		post = true;
1768	}
1769
1770	if (post) {
1771		txr->next_avail_desc = prod;
1772		IGC_WRITE_REG(&sc->hw, IGC_TDT(txr->me), prod);
1773	}
1774
1775	DPRINTF(TX, "%s: done : msix %d prod %d n2c %d free %d\n",
1776	    caller == IGC_TX_TRANSMIT ? "transmit" : "start",
1777	    txr->me, prod, txr->next_to_clean, free);
1778
1779	IF_STAT_PUTREF(ifp);
1780}
1781
1782static bool
1783igc_txeof(struct tx_ring *txr, u_int limit)
1784{
1785	struct igc_softc *sc = txr->sc;
1786	struct ifnet *ifp = &sc->sc_ec.ec_if;
1787	int cons, prod;
1788	bool more = false;
1789
1790	prod = txr->next_avail_desc;
1791	cons = txr->next_to_clean;
1792
1793	if (cons == prod) {
1794		DPRINTF(TX, "false: msix %d cons %d prod %d\n",
1795		    txr->me, cons, prod);
1796		return false;
1797	}
1798
1799	do {
1800		struct igc_tx_buf *txbuf = &txr->tx_buffers[cons];
1801		const int last = txbuf->eop_index;
1802
1803		membar_consumer();	/* XXXRO necessary? */
1804
1805		KASSERT(last != -1);
1806		union igc_adv_tx_desc *txdesc = &txr->tx_base[last];
1807		igc_txdesc_sync(txr, last, BUS_DMASYNC_POSTREAD);
1808		const uint32_t status = le32toh(txdesc->wb.status);
1809		igc_txdesc_sync(txr, last, BUS_DMASYNC_PREREAD);
1810
1811		if (!(status & IGC_TXD_STAT_DD))
1812			break;
1813
1814		if (limit-- == 0) {
1815			more = true;
1816			DPRINTF(TX, "pending TX "
1817			    "msix %d cons %d last %d prod %d "
1818			    "status 0x%08x\n",
1819			    txr->me, cons, last, prod, status);
1820			break;
1821		}
1822
1823		DPRINTF(TX, "handled TX "
1824		    "msix %d cons %d last %d prod %d "
1825		    "status 0x%08x\n",
1826		    txr->me, cons, last, prod, status);
1827
1828		if_statinc(ifp, if_opackets);
1829
1830		bus_dmamap_t map = txbuf->map;
1831		bus_dmamap_sync(txr->txdma.dma_tag, map, 0, map->dm_mapsize,
1832		    BUS_DMASYNC_POSTWRITE);
1833		bus_dmamap_unload(txr->txdma.dma_tag, map);
1834		m_freem(txbuf->m_head);
1835
1836		txbuf->m_head = NULL;
1837		txbuf->eop_index = -1;
1838
1839		cons = igc_txdesc_incr(sc, last);
1840	} while (cons != prod);
1841
1842	txr->next_to_clean = cons;
1843
1844	return more;
1845}
1846
1847static void
1848igc_intr_barrier(struct igc_softc *sc __unused)
1849{
1850
1851	xc_barrier(0);
1852}
1853
1854static void
1855igc_stop(struct ifnet *ifp, int disable)
1856{
1857	struct igc_softc *sc = ifp->if_softc;
1858
1859	mutex_enter(&sc->sc_core_lock);
1860	igc_stop_locked(sc);
1861	mutex_exit(&sc->sc_core_lock);
1862}
1863
1864/*********************************************************************
1865 *
1866 *  This routine disables all traffic on the adapter by issuing a
1867 *  global reset on the MAC.
1868 *
1869 **********************************************************************/
1870static void
1871igc_stop_locked(struct igc_softc *sc)
1872{
1873	struct ifnet *ifp = &sc->sc_ec.ec_if;
1874
1875	DPRINTF(CFG, "called\n");
1876
1877	KASSERT(mutex_owned(&sc->sc_core_lock));
1878
1879	/*
1880	 * If stopping processing has already started, do nothing.
1881	 */
1882	if ((ifp->if_flags & IFF_RUNNING) == 0)
1883		return;
1884
1885	/* Tell the stack that the interface is no longer active. */
1886	ifp->if_flags &= ~IFF_RUNNING;
1887
1888	/*
1889	 * igc_handle_queue() can enable interrupts, so wait for completion of
1890	 * last igc_handle_queue() after unset IFF_RUNNING.
1891	 */
1892	mutex_exit(&sc->sc_core_lock);
1893	igc_barrier_handle_queue(sc);
1894	mutex_enter(&sc->sc_core_lock);
1895
1896	sc->sc_core_stopping = true;
1897
1898	igc_disable_intr(sc);
1899
1900	callout_halt(&sc->sc_tick_ch, &sc->sc_core_lock);
1901
1902	igc_reset_hw(&sc->hw);
1903	IGC_WRITE_REG(&sc->hw, IGC_WUC, 0);
1904
1905	/*
1906	 * Wait for completion of interrupt handlers.
1907	 */
1908	mutex_exit(&sc->sc_core_lock);
1909	igc_intr_barrier(sc);
1910	mutex_enter(&sc->sc_core_lock);
1911
1912	igc_update_link_status(sc);
1913
1914	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
1915		struct tx_ring *txr = &sc->tx_rings[iq];
1916
1917		igc_withdraw_transmit_packets(txr, false);
1918	}
1919
1920	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
1921		struct rx_ring *rxr = &sc->rx_rings[iq];
1922
1923		igc_clear_receive_status(rxr);
1924	}
1925
1926	/* Save last flags for the callback */
1927	sc->sc_if_flags = ifp->if_flags;
1928}
1929
1930/*********************************************************************
1931 *  Ioctl entry point
1932 *
1933 *  igc_ioctl is called when the user wants to configure the
1934 *  interface.
1935 *
1936 *  return 0 on success, positive on failure
1937 **********************************************************************/
1938static int
1939igc_ioctl(struct ifnet * ifp, u_long cmd, void *data)
1940{
1941	struct igc_softc *sc __unused = ifp->if_softc;
1942	int s;
1943	int error;
1944
1945	DPRINTF(CFG, "cmd 0x%016lx\n", cmd);
1946
1947	switch (cmd) {
1948	case SIOCADDMULTI:
1949	case SIOCDELMULTI:
1950		break;
1951	default:
1952		KASSERT(IFNET_LOCKED(ifp));
1953	}
1954
1955	if (cmd == SIOCZIFDATA) {
1956		mutex_enter(&sc->sc_core_lock);
1957		igc_clear_counters(sc);
1958		mutex_exit(&sc->sc_core_lock);
1959	}
1960
1961	switch (cmd) {
1962#ifdef IF_RXR
1963	case SIOCGIFRXR:
1964		s = splnet();
1965		error = igc_rxrinfo(sc, (struct if_rxrinfo *)ifr->ifr_data);
1966		splx(s);
1967		break;
1968#endif
1969	default:
1970		s = splnet();
1971		error = ether_ioctl(ifp, cmd, data);
1972		splx(s);
1973		break;
1974	}
1975
1976	if (error != ENETRESET)
1977		return error;
1978
1979	error = 0;
1980
1981	if (cmd == SIOCSIFCAP)
1982		error = if_init(ifp);
1983	else if ((cmd == SIOCADDMULTI) || (cmd == SIOCDELMULTI)) {
1984		mutex_enter(&sc->sc_core_lock);
1985		if (sc->sc_if_flags & IFF_RUNNING) {
1986			/*
1987			 * Multicast list has changed; set the hardware filter
1988			 * accordingly.
1989			 */
1990			igc_disable_intr(sc);
1991			igc_set_filter(sc);
1992			igc_enable_intr(sc);
1993		}
1994		mutex_exit(&sc->sc_core_lock);
1995	}
1996
1997	return error;
1998}
1999
2000#ifdef IF_RXR
2001static int
2002igc_rxrinfo(struct igc_softc *sc, struct if_rxrinfo *ifri)
2003{
2004	struct if_rxring_info *ifr, ifr1;
2005	int error;
2006
2007	if (sc->sc_nqueues > 1) {
2008		ifr = kmem_zalloc(sc->sc_nqueues * sizeof(*ifr), KM_SLEEP);
2009	} else {
2010		ifr = &ifr1;
2011		memset(ifr, 0, sizeof(*ifr));
2012	}
2013
2014	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
2015		struct rx_ring *rxr = &sc->rx_rings[iq];
2016
2017		ifr[iq].ifr_size = MCLBYTES;
2018		snprintf(ifr[iq].ifr_name, sizeof(ifr[iq].ifr_name), "%d", iq);
2019		ifr[iq].ifr_info = rxr->rx_ring;
2020	}
2021
2022	error = if_rxr_info_ioctl(ifri, sc->sc_nqueues, ifr);
2023	if (sc->sc_nqueues > 1)
2024		kmem_free(ifr, sc->sc_nqueues * sizeof(*ifr));
2025
2026	return error;
2027}
2028#endif
2029
2030static void
2031igc_rxfill(struct rx_ring *rxr)
2032{
2033	struct igc_softc *sc = rxr->sc;
2034	int id;
2035
2036	for (id = 0; id < sc->num_rx_desc; id++) {
2037		if (igc_get_buf(rxr, id, false)) {
2038			panic("%s: msix=%d i=%d\n", __func__, rxr->me, id);
2039		}
2040	}
2041
2042	id = sc->num_rx_desc - 1;
2043	rxr->last_desc_filled = id;
2044	IGC_WRITE_REG(&sc->hw, IGC_RDT(rxr->me), id);
2045	rxr->next_to_check = 0;
2046}
2047
2048static void
2049igc_rxrefill(struct rx_ring *rxr, int end)
2050{
2051	struct igc_softc *sc = rxr->sc;
2052	int id;
2053
2054	for (id = rxr->next_to_check; id != end; id = igc_rxdesc_incr(sc, id)) {
2055		if (igc_get_buf(rxr, id, true)) {
2056			/* XXXRO */
2057			panic("%s: msix=%d id=%d\n", __func__, rxr->me, id);
2058		}
2059	}
2060
2061	id = igc_rxdesc_decr(sc, id);
2062	DPRINTF(RX, "%s RDT %d id %d\n",
2063	    rxr->last_desc_filled == id ? "same" : "diff",
2064	    rxr->last_desc_filled, id);
2065	rxr->last_desc_filled = id;
2066	IGC_WRITE_REG(&sc->hw, IGC_RDT(rxr->me), id);
2067}
2068
2069/*********************************************************************
2070 *
2071 *  This routine executes in interrupt context. It replenishes
2072 *  the mbufs in the descriptor and sends data which has been
2073 *  dma'ed into host memory to upper layer.
2074 *
2075 *********************************************************************/
2076static bool
2077igc_rxeof(struct rx_ring *rxr, u_int limit)
2078{
2079	struct igc_softc *sc = rxr->sc;
2080	struct igc_queue *q = rxr->rxr_igcq;
2081	struct ifnet *ifp = &sc->sc_ec.ec_if;
2082	int id;
2083	bool more = false;
2084
2085	id = rxr->next_to_check;
2086	for (;;) {
2087		union igc_adv_rx_desc *rxdesc = &rxr->rx_base[id];
2088		struct igc_rx_buf *rxbuf, *nxbuf;
2089		struct mbuf *mp, *m;
2090
2091		igc_rxdesc_sync(rxr, id,
2092		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2093
2094		const uint32_t staterr = le32toh(rxdesc->wb.upper.status_error);
2095
2096		if (!ISSET(staterr, IGC_RXD_STAT_DD)) {
2097			igc_rxdesc_sync(rxr, id,
2098			    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2099			break;
2100		}
2101
2102		if (limit-- == 0) {
2103			igc_rxdesc_sync(rxr, id,
2104			    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2105			DPRINTF(RX, "more=true\n");
2106			more = true;
2107			break;
2108		}
2109
2110		/* Zero out the receive descriptors status. */
2111		rxdesc->wb.upper.status_error = 0;
2112
2113		/* Pull the mbuf off the ring. */
2114		rxbuf = &rxr->rx_buffers[id];
2115		bus_dmamap_t map = rxbuf->map;
2116		bus_dmamap_sync(rxr->rxdma.dma_tag, map,
2117		    0, map->dm_mapsize, BUS_DMASYNC_POSTREAD);
2118		bus_dmamap_unload(rxr->rxdma.dma_tag, map);
2119
2120		mp = rxbuf->buf;
2121		rxbuf->buf = NULL;
2122
2123		const bool eop = staterr & IGC_RXD_STAT_EOP;
2124		const uint16_t len = le16toh(rxdesc->wb.upper.length);
2125
2126#if NVLAN > 0
2127		const uint16_t vtag = le16toh(rxdesc->wb.upper.vlan);
2128#endif
2129
2130		const uint32_t ptype = le32toh(rxdesc->wb.lower.lo_dword.data) &
2131		    IGC_PKTTYPE_MASK;
2132
2133		const uint32_t hash __unused =
2134		    le32toh(rxdesc->wb.lower.hi_dword.rss);
2135		const uint16_t hashtype __unused =
2136		    le16toh(rxdesc->wb.lower.lo_dword.hs_rss.pkt_info) &
2137		    IGC_RXDADV_RSSTYPE_MASK;
2138
2139		igc_rxdesc_sync(rxr, id,
2140		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2141
2142		if (__predict_false(staterr & IGC_RXDEXT_STATERR_RXE)) {
2143			if (rxbuf->fmp) {
2144				m_freem(rxbuf->fmp);
2145				rxbuf->fmp = NULL;
2146			}
2147
2148			m_freem(mp);
2149			m = NULL;
2150
2151			if_statinc(ifp, if_ierrors);
2152			IGC_QUEUE_EVENT(q, rx_discard, 1);
2153
2154			DPRINTF(RX, "ierrors++\n");
2155
2156			goto next_desc;
2157		}
2158
2159		if (__predict_false(mp == NULL)) {
2160			panic("%s: igc_rxeof: NULL mbuf in slot %d "
2161			    "(filled %d)", device_xname(sc->sc_dev),
2162			    id, rxr->last_desc_filled);
2163		}
2164
2165		if (!eop) {
2166			/*
2167			 * Figure out the next descriptor of this frame.
2168			 */
2169			int nextp = igc_rxdesc_incr(sc, id);
2170
2171			nxbuf = &rxr->rx_buffers[nextp];
2172			/*
2173			 * TODO prefetch(nxbuf);
2174			 */
2175		}
2176
2177		mp->m_len = len;
2178
2179		m = rxbuf->fmp;
2180		rxbuf->fmp = NULL;
2181
2182		if (m != NULL) {
2183			m->m_pkthdr.len += mp->m_len;
2184		} else {
2185			m = mp;
2186			m->m_pkthdr.len = mp->m_len;
2187#if NVLAN > 0
2188			if (staterr & IGC_RXD_STAT_VP)
2189				vlan_set_tag(m, vtag);
2190#endif
2191		}
2192
2193		/* Pass the head pointer on */
2194		if (!eop) {
2195			nxbuf->fmp = m;
2196			m = NULL;
2197			mp->m_next = nxbuf->buf;
2198		} else {
2199			m_set_rcvif(m, ifp);
2200
2201			m->m_pkthdr.csum_flags = igc_rx_checksum(q,
2202			    ifp->if_capenable, staterr, ptype);
2203
2204#ifdef notyet
2205			if (hashtype != IGC_RXDADV_RSSTYPE_NONE) {
2206				m->m_pkthdr.ph_flowid = hash;
2207				SET(m->m_pkthdr.csum_flags, M_FLOWID);
2208			}
2209			ml_enqueue(&ml, m);
2210#endif
2211
2212			if_percpuq_enqueue(sc->sc_ipq, m);
2213
2214			if_statinc(ifp, if_ipackets);
2215			IGC_QUEUE_EVENT(q, rx_packets, 1);
2216			IGC_QUEUE_EVENT(q, rx_bytes, m->m_pkthdr.len);
2217		}
2218 next_desc:
2219		/* Advance our pointers to the next descriptor. */
2220		id = igc_rxdesc_incr(sc, id);
2221	}
2222
2223	DPRINTF(RX, "fill queue[%d]\n", rxr->me);
2224	igc_rxrefill(rxr, id);
2225
2226	DPRINTF(RX, "%s n2c %d id %d\n",
2227	    rxr->next_to_check == id ? "same" : "diff",
2228	    rxr->next_to_check, id);
2229	rxr->next_to_check = id;
2230
2231#ifdef OPENBSD
2232	if (!(staterr & IGC_RXD_STAT_DD))
2233		return 0;
2234#endif
2235
2236	return more;
2237}
2238
2239/*********************************************************************
2240 *
2241 *  Verify that the hardware indicated that the checksum is valid.
2242 *  Inform the stack about the status of checksum so that stack
2243 *  doesn't spend time verifying the checksum.
2244 *
2245 *********************************************************************/
2246static int
2247igc_rx_checksum(struct igc_queue *q, uint64_t capenable, uint32_t staterr,
2248    uint32_t ptype)
2249{
2250	const uint16_t status = (uint16_t)staterr;
2251	const uint8_t errors = (uint8_t)(staterr >> 24);
2252	int flags = 0;
2253
2254	if ((status & IGC_RXD_STAT_IPCS) != 0 &&
2255	    (capenable & IFCAP_CSUM_IPv4_Rx) != 0) {
2256		IGC_DRIVER_EVENT(q, rx_ipcs, 1);
2257		flags |= M_CSUM_IPv4;
2258		if (__predict_false((errors & IGC_RXD_ERR_IPE) != 0)) {
2259			IGC_DRIVER_EVENT(q, rx_ipcs_bad, 1);
2260			flags |= M_CSUM_IPv4_BAD;
2261		}
2262	}
2263
2264	if ((status & IGC_RXD_STAT_TCPCS) != 0) {
2265		IGC_DRIVER_EVENT(q, rx_tcpcs, 1);
2266		if ((capenable & IFCAP_CSUM_TCPv4_Rx) != 0)
2267			flags |= M_CSUM_TCPv4;
2268		if ((capenable & IFCAP_CSUM_TCPv6_Rx) != 0)
2269			flags |= M_CSUM_TCPv6;
2270	}
2271
2272	if ((status & IGC_RXD_STAT_UDPCS) != 0) {
2273		IGC_DRIVER_EVENT(q, rx_udpcs, 1);
2274		if ((capenable & IFCAP_CSUM_UDPv4_Rx) != 0)
2275			flags |= M_CSUM_UDPv4;
2276		if ((capenable & IFCAP_CSUM_UDPv6_Rx) != 0)
2277			flags |= M_CSUM_UDPv6;
2278	}
2279
2280	if (__predict_false((errors & IGC_RXD_ERR_TCPE) != 0)) {
2281		IGC_DRIVER_EVENT(q, rx_l4cs_bad, 1);
2282		if ((flags & ~M_CSUM_IPv4) != 0)
2283			flags |= M_CSUM_TCP_UDP_BAD;
2284	}
2285
2286	return flags;
2287}
2288
2289static void
2290igc_watchdog(struct ifnet * ifp)
2291{
2292}
2293
2294static void
2295igc_tick(void *arg)
2296{
2297	struct igc_softc *sc = arg;
2298
2299	mutex_enter(&sc->sc_core_lock);
2300
2301	if (__predict_false(sc->sc_core_stopping)) {
2302		mutex_exit(&sc->sc_core_lock);
2303		return;
2304	}
2305
2306	/* XXX watchdog */
2307	if (0) {
2308		IGC_GLOBAL_EVENT(sc, watchdog, 1);
2309	}
2310
2311	igc_update_counters(sc);
2312
2313	mutex_exit(&sc->sc_core_lock);
2314
2315	callout_schedule(&sc->sc_tick_ch, hz);
2316}
2317
2318/*********************************************************************
2319 *
2320 *  Media Ioctl callback
2321 *
2322 *  This routine is called whenever the user queries the status of
2323 *  the interface using ifconfig.
2324 *
2325 **********************************************************************/
2326static void
2327igc_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2328{
2329	struct igc_softc *sc = ifp->if_softc;
2330	struct igc_hw *hw = &sc->hw;
2331
2332	igc_update_link_status(sc);
2333
2334	ifmr->ifm_status = IFM_AVALID;
2335	ifmr->ifm_active = IFM_ETHER;
2336
2337	if (!sc->link_active) {
2338		ifmr->ifm_active |= IFM_NONE;
2339		return;
2340	}
2341
2342	ifmr->ifm_status |= IFM_ACTIVE;
2343
2344	switch (sc->link_speed) {
2345	case 10:
2346		ifmr->ifm_active |= IFM_10_T;
2347		break;
2348	case 100:
2349		ifmr->ifm_active |= IFM_100_TX;
2350		break;
2351	case 1000:
2352		ifmr->ifm_active |= IFM_1000_T;
2353		break;
2354	case 2500:
2355		ifmr->ifm_active |= IFM_2500_T;
2356		break;
2357	}
2358
2359	if (sc->link_duplex == FULL_DUPLEX)
2360		ifmr->ifm_active |= IFM_FDX;
2361	else
2362		ifmr->ifm_active |= IFM_HDX;
2363
2364	switch (hw->fc.current_mode) {
2365	case igc_fc_tx_pause:
2366		ifmr->ifm_active |= IFM_FLOW | IFM_ETH_TXPAUSE;
2367		break;
2368	case igc_fc_rx_pause:
2369		ifmr->ifm_active |= IFM_FLOW | IFM_ETH_RXPAUSE;
2370		break;
2371	case igc_fc_full:
2372		ifmr->ifm_active |= IFM_FLOW |
2373		    IFM_ETH_TXPAUSE | IFM_ETH_RXPAUSE;
2374		break;
2375	case igc_fc_none:
2376	default:
2377		break;
2378	}
2379}
2380
2381/*********************************************************************
2382 *
2383 *  Media Ioctl callback
2384 *
2385 *  This routine is called when the user changes speed/duplex using
2386 *  media/mediopt option with ifconfig.
2387 *
2388 **********************************************************************/
2389static int
2390igc_media_change(struct ifnet *ifp)
2391{
2392	struct igc_softc *sc = ifp->if_softc;
2393	struct ifmedia *ifm = &sc->media;
2394
2395	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
2396		return EINVAL;
2397
2398	sc->hw.mac.autoneg = DO_AUTO_NEG;
2399
2400	switch (IFM_SUBTYPE(ifm->ifm_media)) {
2401	case IFM_AUTO:
2402		sc->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
2403		break;
2404	case IFM_2500_T:
2405		sc->hw.phy.autoneg_advertised = ADVERTISE_2500_FULL;
2406		break;
2407	case IFM_1000_T:
2408		sc->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
2409		break;
2410	case IFM_100_TX:
2411		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
2412			sc->hw.phy.autoneg_advertised = ADVERTISE_100_FULL;
2413		else
2414			sc->hw.phy.autoneg_advertised = ADVERTISE_100_HALF;
2415		break;
2416	case IFM_10_T:
2417		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
2418			sc->hw.phy.autoneg_advertised = ADVERTISE_10_FULL;
2419		else
2420			sc->hw.phy.autoneg_advertised = ADVERTISE_10_HALF;
2421		break;
2422	default:
2423		return EINVAL;
2424	}
2425
2426	igc_init_locked(sc);
2427
2428	return 0;
2429}
2430
2431static int
2432igc_ifflags_cb(struct ethercom *ec)
2433{
2434	struct ifnet *ifp = &ec->ec_if;
2435	struct igc_softc *sc = ifp->if_softc;
2436	int rc = 0;
2437	u_short iffchange;
2438	bool needreset = false;
2439
2440	DPRINTF(CFG, "called\n");
2441
2442	KASSERT(IFNET_LOCKED(ifp));
2443
2444	mutex_enter(&sc->sc_core_lock);
2445
2446	/*
2447	 * Check for if_flags.
2448	 * Main usage is to prevent linkdown when opening bpf.
2449	 */
2450	iffchange = ifp->if_flags ^ sc->sc_if_flags;
2451	sc->sc_if_flags = ifp->if_flags;
2452	if ((iffchange & ~(IFF_CANTCHANGE | IFF_DEBUG)) != 0) {
2453		needreset = true;
2454		goto ec;
2455	}
2456
2457	/* iff related updates */
2458	if ((iffchange & IFF_PROMISC) != 0)
2459		igc_set_filter(sc);
2460
2461#ifdef notyet
2462	igc_set_vlan(sc);
2463#endif
2464
2465ec:
2466#ifdef notyet
2467	/* Check for ec_capenable. */
2468	ecchange = ec->ec_capenable ^ sc->sc_ec_capenable;
2469	sc->sc_ec_capenable = ec->ec_capenable;
2470	if ((ecchange & ~ETHERCAP_SOMETHING) != 0) {
2471		needreset = true;
2472		goto out;
2473	}
2474#endif
2475	if (needreset)
2476		rc = ENETRESET;
2477
2478	mutex_exit(&sc->sc_core_lock);
2479
2480	return rc;
2481}
2482
2483static void
2484igc_set_filter(struct igc_softc *sc)
2485{
2486	struct ethercom *ec = &sc->sc_ec;
2487	uint32_t rctl;
2488
2489	rctl = IGC_READ_REG(&sc->hw, IGC_RCTL);
2490	rctl &= ~(IGC_RCTL_BAM |IGC_RCTL_UPE | IGC_RCTL_MPE);
2491
2492	if ((sc->sc_if_flags & IFF_BROADCAST) != 0)
2493		rctl |= IGC_RCTL_BAM;
2494	if ((sc->sc_if_flags & IFF_PROMISC) != 0) {
2495		DPRINTF(CFG, "promisc\n");
2496		rctl |= IGC_RCTL_UPE;
2497		ETHER_LOCK(ec);
2498 allmulti:
2499		ec->ec_flags |= ETHER_F_ALLMULTI;
2500		ETHER_UNLOCK(ec);
2501		rctl |= IGC_RCTL_MPE;
2502	} else {
2503		struct ether_multistep step;
2504		struct ether_multi *enm;
2505		int mcnt = 0;
2506
2507		memset(sc->mta, 0, IGC_MTA_LEN);
2508
2509		ETHER_LOCK(ec);
2510		ETHER_FIRST_MULTI(step, ec, enm);
2511		while (enm != NULL) {
2512			if (((memcmp(enm->enm_addrlo, enm->enm_addrhi,
2513					ETHER_ADDR_LEN)) != 0) ||
2514			    (mcnt >= MAX_NUM_MULTICAST_ADDRESSES)) {
2515				/*
2516				 * We must listen to a range of multicast
2517				 * addresses. For now, just accept all
2518				 * multicasts, rather than trying to set only
2519				 * those filter bits needed to match the range.
2520				 * (At this time, the only use of address
2521				 * ranges is for IP multicast routing, for
2522				 * which the range is big enough to require all
2523				 * bits set.)
2524				 */
2525				goto allmulti;
2526			}
2527			DPRINTF(CFG, "%d: %s\n", mcnt,
2528			    ether_sprintf(enm->enm_addrlo));
2529			memcpy(&sc->mta[mcnt * ETHER_ADDR_LEN],
2530			    enm->enm_addrlo, ETHER_ADDR_LEN);
2531
2532			mcnt++;
2533			ETHER_NEXT_MULTI(step, enm);
2534		}
2535		ec->ec_flags &= ~ETHER_F_ALLMULTI;
2536		ETHER_UNLOCK(ec);
2537
2538		DPRINTF(CFG, "hw filter\n");
2539		igc_update_mc_addr_list(&sc->hw, sc->mta, mcnt);
2540	}
2541
2542	IGC_WRITE_REG(&sc->hw, IGC_RCTL, rctl);
2543}
2544
2545static void
2546igc_update_link_status(struct igc_softc *sc)
2547{
2548	struct ifnet *ifp = &sc->sc_ec.ec_if;
2549	struct igc_hw *hw = &sc->hw;
2550
2551	if (hw->mac.get_link_status == true)
2552		igc_check_for_link(hw);
2553
2554	if (IGC_READ_REG(&sc->hw, IGC_STATUS) & IGC_STATUS_LU) {
2555		if (sc->link_active == 0) {
2556			igc_get_speed_and_duplex(hw, &sc->link_speed,
2557			    &sc->link_duplex);
2558			sc->link_active = 1;
2559			ifp->if_baudrate = IF_Mbps(sc->link_speed);
2560			if_link_state_change(ifp, LINK_STATE_UP);
2561		}
2562	} else {
2563		if (sc->link_active == 1) {
2564			ifp->if_baudrate = sc->link_speed = 0;
2565			sc->link_duplex = 0;
2566			sc->link_active = 0;
2567			if_link_state_change(ifp, LINK_STATE_DOWN);
2568		}
2569	}
2570}
2571
2572/*********************************************************************
2573 *
2574 *  Get a buffer from system mbuf buffer pool.
2575 *
2576 **********************************************************************/
2577static int
2578igc_get_buf(struct rx_ring *rxr, int id, bool strict)
2579{
2580	struct igc_softc *sc = rxr->sc;
2581	struct igc_queue *q = rxr->rxr_igcq;
2582	struct igc_rx_buf *rxbuf = &rxr->rx_buffers[id];
2583	bus_dmamap_t map = rxbuf->map;
2584	struct mbuf *m;
2585	int error;
2586
2587	if (__predict_false(rxbuf->buf)) {
2588		if (strict) {
2589			DPRINTF(RX, "slot %d already has an mbuf\n", id);
2590			return EINVAL;
2591		}
2592		return 0;
2593	}
2594
2595	MGETHDR(m, M_DONTWAIT, MT_DATA);
2596	if (__predict_false(m == NULL)) {
2597 enobuf:
2598		IGC_QUEUE_EVENT(q, rx_no_mbuf, 1);
2599		return ENOBUFS;
2600	}
2601
2602	MCLGET(m, M_DONTWAIT);
2603	if (__predict_false(!(m->m_flags & M_EXT))) {
2604		m_freem(m);
2605		goto enobuf;
2606	}
2607
2608	m->m_len = m->m_pkthdr.len = sc->rx_mbuf_sz;
2609
2610	error = bus_dmamap_load_mbuf(rxr->rxdma.dma_tag, map, m,
2611	    BUS_DMA_READ | BUS_DMA_NOWAIT);
2612	if (error) {
2613		m_freem(m);
2614		return error;
2615	}
2616
2617	bus_dmamap_sync(rxr->rxdma.dma_tag, map, 0,
2618	    map->dm_mapsize, BUS_DMASYNC_PREREAD);
2619	rxbuf->buf = m;
2620
2621	union igc_adv_rx_desc *rxdesc = &rxr->rx_base[id];
2622	igc_rxdesc_sync(rxr, id, BUS_DMASYNC_POSTWRITE | BUS_DMASYNC_POSTREAD);
2623	rxdesc->read.pkt_addr = htole64(map->dm_segs[0].ds_addr);
2624	igc_rxdesc_sync(rxr, id, BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD);
2625
2626	return 0;
2627}
2628
2629static void
2630igc_configure_queues(struct igc_softc *sc)
2631{
2632	struct igc_hw *hw = &sc->hw;
2633	uint32_t ivar;
2634
2635	/* First turn on RSS capability */
2636	IGC_WRITE_REG(hw, IGC_GPIE, IGC_GPIE_MSIX_MODE | IGC_GPIE_EIAME |
2637	    IGC_GPIE_PBA | IGC_GPIE_NSICR);
2638
2639	/* Set the starting interrupt rate */
2640	uint32_t newitr = (4000000 / MAX_INTS_PER_SEC) & 0x7FFC;
2641	newitr |= IGC_EITR_CNT_IGNR;
2642
2643	/* Turn on MSI-X */
2644	uint32_t newmask = 0;
2645	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
2646		struct igc_queue *q = &sc->queues[iq];
2647
2648		/* RX entries */
2649		igc_set_queues(sc, iq, q->msix, 0);
2650		/* TX entries */
2651		igc_set_queues(sc, iq, q->msix, 1);
2652		newmask |= q->eims;
2653		IGC_WRITE_REG(hw, IGC_EITR(q->msix), newitr);
2654	}
2655	sc->msix_queuesmask = newmask;
2656
2657#if 1
2658	ivar = IGC_READ_REG_ARRAY(hw, IGC_IVAR0, 0);
2659	DPRINTF(CFG, "ivar(0)=0x%x\n", ivar);
2660	ivar = IGC_READ_REG_ARRAY(hw, IGC_IVAR0, 1);
2661	DPRINTF(CFG, "ivar(1)=0x%x\n", ivar);
2662#endif
2663
2664	/* And for the link interrupt */
2665	ivar = (sc->linkvec | IGC_IVAR_VALID) << 8;
2666	sc->msix_linkmask = 1 << sc->linkvec;
2667	IGC_WRITE_REG(hw, IGC_IVAR_MISC, ivar);
2668}
2669
2670static void
2671igc_set_queues(struct igc_softc *sc, uint32_t entry, uint32_t vector, int type)
2672{
2673	struct igc_hw *hw = &sc->hw;
2674	const uint32_t index = entry >> 1;
2675	uint32_t ivar = IGC_READ_REG_ARRAY(hw, IGC_IVAR0, index);
2676
2677	if (type) {
2678		if (entry & 1) {
2679			ivar &= 0x00FFFFFF;
2680			ivar |= (vector | IGC_IVAR_VALID) << 24;
2681		} else {
2682			ivar &= 0xFFFF00FF;
2683			ivar |= (vector | IGC_IVAR_VALID) << 8;
2684		}
2685	} else {
2686		if (entry & 1) {
2687			ivar &= 0xFF00FFFF;
2688			ivar |= (vector | IGC_IVAR_VALID) << 16;
2689		} else {
2690			ivar &= 0xFFFFFF00;
2691			ivar |= vector | IGC_IVAR_VALID;
2692		}
2693	}
2694	IGC_WRITE_REG_ARRAY(hw, IGC_IVAR0, index, ivar);
2695}
2696
2697static void
2698igc_enable_queue(struct igc_softc *sc, uint32_t eims)
2699{
2700	IGC_WRITE_REG(&sc->hw, IGC_EIMS, eims);
2701}
2702
2703static void
2704igc_enable_intr(struct igc_softc *sc)
2705{
2706	struct igc_hw *hw = &sc->hw;
2707
2708	if (sc->sc_intr_type == PCI_INTR_TYPE_MSIX) {
2709		const uint32_t mask = sc->msix_queuesmask | sc->msix_linkmask;
2710
2711		IGC_WRITE_REG(hw, IGC_EIAC, mask);
2712		IGC_WRITE_REG(hw, IGC_EIAM, mask);
2713		IGC_WRITE_REG(hw, IGC_EIMS, mask);
2714		IGC_WRITE_REG(hw, IGC_IMS, IGC_IMS_LSC);
2715	} else {
2716		IGC_WRITE_REG(hw, IGC_IMS, IMS_ENABLE_MASK);
2717	}
2718	IGC_WRITE_FLUSH(hw);
2719}
2720
2721static void
2722igc_disable_intr(struct igc_softc *sc)
2723{
2724	struct igc_hw *hw = &sc->hw;
2725
2726	if (sc->sc_intr_type == PCI_INTR_TYPE_MSIX) {
2727		IGC_WRITE_REG(hw, IGC_EIMC, 0xffffffff);
2728		IGC_WRITE_REG(hw, IGC_EIAC, 0);
2729	}
2730	IGC_WRITE_REG(hw, IGC_IMC, 0xffffffff);
2731	IGC_WRITE_FLUSH(hw);
2732}
2733
2734static int
2735igc_intr_link(void *arg)
2736{
2737	struct igc_softc *sc = (struct igc_softc *)arg;
2738	const uint32_t reg_icr = IGC_READ_REG(&sc->hw, IGC_ICR);
2739
2740	IGC_GLOBAL_EVENT(sc, link, 1);
2741
2742	if (reg_icr & IGC_ICR_LSC) {
2743		mutex_enter(&sc->sc_core_lock);
2744		sc->hw.mac.get_link_status = true;
2745		igc_update_link_status(sc);
2746		mutex_exit(&sc->sc_core_lock);
2747	}
2748
2749	IGC_WRITE_REG(&sc->hw, IGC_IMS, IGC_IMS_LSC);
2750	IGC_WRITE_REG(&sc->hw, IGC_EIMS, sc->msix_linkmask);
2751
2752	return 1;
2753}
2754
2755static int
2756igc_intr_queue(void *arg)
2757{
2758	struct igc_queue *iq = arg;
2759	struct igc_softc *sc = iq->sc;
2760	struct ifnet *ifp = &sc->sc_ec.ec_if;
2761	struct rx_ring *rxr = iq->rxr;
2762	struct tx_ring *txr = iq->txr;
2763	const u_int txlimit = sc->sc_tx_intr_process_limit,
2764		    rxlimit = sc->sc_rx_intr_process_limit;
2765	bool txmore, rxmore;
2766
2767	IGC_QUEUE_EVENT(iq, irqs, 1);
2768
2769	if (__predict_false(!ISSET(ifp->if_flags, IFF_RUNNING)))
2770		return 0;
2771
2772	mutex_enter(&txr->txr_lock);
2773	txmore = igc_txeof(txr, txlimit);
2774	mutex_exit(&txr->txr_lock);
2775	mutex_enter(&rxr->rxr_lock);
2776	rxmore = igc_rxeof(rxr, rxlimit);
2777	mutex_exit(&rxr->rxr_lock);
2778
2779	if (txmore || rxmore) {
2780		IGC_QUEUE_EVENT(iq, req, 1);
2781		igc_sched_handle_queue(sc, iq);
2782	} else {
2783		igc_enable_queue(sc, iq->eims);
2784	}
2785
2786	return 1;
2787}
2788
2789static int
2790igc_intr(void *arg)
2791{
2792	struct igc_softc *sc = arg;
2793	struct ifnet *ifp = &sc->sc_ec.ec_if;
2794	struct igc_queue *iq = &sc->queues[0];
2795	struct rx_ring *rxr = iq->rxr;
2796	struct tx_ring *txr = iq->txr;
2797	const u_int txlimit = sc->sc_tx_intr_process_limit,
2798		    rxlimit = sc->sc_rx_intr_process_limit;
2799	bool txmore, rxmore;
2800
2801	if (__predict_false(!ISSET(ifp->if_flags, IFF_RUNNING)))
2802		return 0;
2803
2804	const uint32_t reg_icr = IGC_READ_REG(&sc->hw, IGC_ICR);
2805	DPRINTF(MISC, "reg_icr=0x%x\n", reg_icr);
2806
2807	/* Definitely not our interrupt. */
2808	if (reg_icr == 0x0) {
2809		DPRINTF(MISC, "not for me");
2810		return 0;
2811	}
2812
2813	IGC_QUEUE_EVENT(iq, irqs, 1);
2814
2815	/* Hot eject? */
2816	if (__predict_false(reg_icr == 0xffffffff)) {
2817		DPRINTF(MISC, "hot eject\n");
2818		return 0;
2819	}
2820
2821	if (__predict_false(!(reg_icr & IGC_ICR_INT_ASSERTED))) {
2822		DPRINTF(MISC, "not set IGC_ICR_INT_ASSERTED");
2823		return 0;
2824	}
2825
2826	/*
2827	 * Only MSI-X interrupts have one-shot behavior by taking advantage
2828	 * of the EIAC register.  Thus, explicitly disable interrupts.  This
2829	 * also works around the MSI message reordering errata on certain
2830	 * systems.
2831	 */
2832	igc_disable_intr(sc);
2833
2834	mutex_enter(&txr->txr_lock);
2835	txmore = igc_txeof(txr, txlimit);
2836	mutex_exit(&txr->txr_lock);
2837	mutex_enter(&rxr->rxr_lock);
2838	rxmore = igc_rxeof(rxr, rxlimit);
2839	mutex_exit(&rxr->rxr_lock);
2840
2841	/* Link status change */
2842	// XXXX FreeBSD checks IGC_ICR_RXSEQ
2843	if (__predict_false(reg_icr & IGC_ICR_LSC)) {
2844		IGC_GLOBAL_EVENT(sc, link, 1);
2845		mutex_enter(&sc->sc_core_lock);
2846		sc->hw.mac.get_link_status = true;
2847		igc_update_link_status(sc);
2848		mutex_exit(&sc->sc_core_lock);
2849	}
2850
2851	if (txmore || rxmore) {
2852		IGC_QUEUE_EVENT(iq, req, 1);
2853		igc_sched_handle_queue(sc, iq);
2854	} else {
2855		igc_enable_intr(sc);
2856	}
2857
2858	return 1;
2859}
2860
2861static void
2862igc_handle_queue(void *arg)
2863{
2864	struct igc_queue *iq = arg;
2865	struct igc_softc *sc = iq->sc;
2866	struct tx_ring *txr = iq->txr;
2867	struct rx_ring *rxr = iq->rxr;
2868	const u_int txlimit = sc->sc_tx_process_limit,
2869		    rxlimit = sc->sc_rx_process_limit;
2870	bool txmore, rxmore;
2871
2872	IGC_QUEUE_EVENT(iq, handleq, 1);
2873
2874	mutex_enter(&txr->txr_lock);
2875	txmore = igc_txeof(txr, txlimit);
2876	/* for ALTQ, dequeue from if_snd */
2877	if (txr->me == 0) {
2878		struct ifnet *ifp = &sc->sc_ec.ec_if;
2879
2880		igc_tx_common_locked(ifp, txr, IGC_TX_START);
2881	}
2882	mutex_exit(&txr->txr_lock);
2883
2884	mutex_enter(&rxr->rxr_lock);
2885	rxmore = igc_rxeof(rxr, rxlimit);
2886	mutex_exit(&rxr->rxr_lock);
2887
2888	if (txmore || rxmore) {
2889		igc_sched_handle_queue(sc, iq);
2890	} else {
2891		if (sc->sc_intr_type == PCI_INTR_TYPE_MSIX)
2892			igc_enable_queue(sc, iq->eims);
2893		else
2894			igc_enable_intr(sc);
2895	}
2896}
2897
2898static void
2899igc_handle_queue_work(struct work *wk, void *context)
2900{
2901	struct igc_queue *iq =
2902	    container_of(wk, struct igc_queue, igcq_wq_cookie);
2903
2904	igc_handle_queue(iq);
2905}
2906
2907static void
2908igc_sched_handle_queue(struct igc_softc *sc, struct igc_queue *iq)
2909{
2910
2911	if (iq->igcq_workqueue) {
2912		/* XXXRO notyet */
2913		workqueue_enqueue(sc->sc_queue_wq, &iq->igcq_wq_cookie,
2914		    curcpu());
2915	} else {
2916		softint_schedule(iq->igcq_si);
2917	}
2918}
2919
2920static void
2921igc_barrier_handle_queue(struct igc_softc *sc)
2922{
2923
2924	if (sc->sc_txrx_workqueue) {
2925		for (int iq = 0; iq < sc->sc_nqueues; iq++) {
2926			struct igc_queue *q = &sc->queues[iq];
2927
2928			workqueue_wait(sc->sc_queue_wq, &q->igcq_wq_cookie);
2929		}
2930	} else {
2931		xc_barrier(0);
2932	}
2933}
2934
2935/*********************************************************************
2936 *
2937 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2938 *  the information needed to transmit a packet on the wire.
2939 *
2940 **********************************************************************/
2941static int
2942igc_allocate_transmit_buffers(struct tx_ring *txr)
2943{
2944	struct igc_softc *sc = txr->sc;
2945	int error;
2946
2947	txr->tx_buffers =
2948	    kmem_zalloc(sc->num_tx_desc * sizeof(struct igc_tx_buf), KM_SLEEP);
2949	txr->txtag = txr->txdma.dma_tag;
2950
2951	/* Create the descriptor buffer dma maps. */
2952	for (int id = 0; id < sc->num_tx_desc; id++) {
2953		struct igc_tx_buf *txbuf = &txr->tx_buffers[id];
2954
2955		error = bus_dmamap_create(txr->txdma.dma_tag,
2956		    round_page(IGC_TSO_SIZE + sizeof(struct ether_vlan_header)),
2957		    IGC_MAX_SCATTER, PAGE_SIZE, 0, BUS_DMA_NOWAIT, &txbuf->map);
2958		if (error != 0) {
2959			aprint_error_dev(sc->sc_dev,
2960			    "unable to create TX DMA map\n");
2961			goto fail;
2962		}
2963
2964		txbuf->eop_index = -1;
2965	}
2966
2967	return 0;
2968 fail:
2969	return error;
2970}
2971
2972
2973/*********************************************************************
2974 *
2975 *  Allocate and initialize transmit structures.
2976 *
2977 **********************************************************************/
2978static int
2979igc_setup_transmit_structures(struct igc_softc *sc)
2980{
2981
2982	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
2983		struct tx_ring *txr = &sc->tx_rings[iq];
2984
2985		if (igc_setup_transmit_ring(txr))
2986			goto fail;
2987	}
2988
2989	return 0;
2990 fail:
2991	igc_free_transmit_structures(sc);
2992	return ENOBUFS;
2993}
2994
2995/*********************************************************************
2996 *
2997 *  Initialize a transmit ring.
2998 *
2999 **********************************************************************/
3000static int
3001igc_setup_transmit_ring(struct tx_ring *txr)
3002{
3003	struct igc_softc *sc = txr->sc;
3004
3005	/* Now allocate transmit buffers for the ring. */
3006	if (igc_allocate_transmit_buffers(txr))
3007		return ENOMEM;
3008
3009	/* Clear the old ring contents */
3010	memset(txr->tx_base, 0,
3011	    sizeof(union igc_adv_tx_desc) * sc->num_tx_desc);
3012
3013	/* Reset indices. */
3014	txr->next_avail_desc = 0;
3015	txr->next_to_clean = 0;
3016
3017	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 0,
3018	    txr->txdma.dma_map->dm_mapsize,
3019	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3020
3021	txr->txr_interq = pcq_create(sc->num_tx_desc, KM_SLEEP);
3022
3023	mutex_init(&txr->txr_lock, MUTEX_DEFAULT, IPL_NET);
3024
3025	return 0;
3026}
3027
3028/*********************************************************************
3029 *
3030 *  Enable transmit unit.
3031 *
3032 **********************************************************************/
3033static void
3034igc_initialize_transmit_unit(struct igc_softc *sc)
3035{
3036	struct ifnet *ifp = &sc->sc_ec.ec_if;
3037	struct igc_hw *hw = &sc->hw;
3038
3039	/* Setup the Base and Length of the TX descriptor ring. */
3040	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
3041		struct tx_ring *txr = &sc->tx_rings[iq];
3042		const uint64_t bus_addr =
3043		    txr->txdma.dma_map->dm_segs[0].ds_addr;
3044
3045		/* Base and len of TX ring */
3046		IGC_WRITE_REG(hw, IGC_TDLEN(iq),
3047		    sc->num_tx_desc * sizeof(union igc_adv_tx_desc));
3048		IGC_WRITE_REG(hw, IGC_TDBAH(iq), (uint32_t)(bus_addr >> 32));
3049		IGC_WRITE_REG(hw, IGC_TDBAL(iq), (uint32_t)bus_addr);
3050
3051		/* Init the HEAD/TAIL indices */
3052		IGC_WRITE_REG(hw, IGC_TDT(iq), 0 /* XXX txr->next_avail_desc */);
3053		IGC_WRITE_REG(hw, IGC_TDH(iq), 0);
3054
3055		txr->watchdog_timer = 0;
3056
3057		uint32_t txdctl = 0;	/* Clear txdctl */
3058		txdctl |= 0x1f;		/* PTHRESH */
3059		txdctl |= 1 << 8;	/* HTHRESH */
3060		txdctl |= 1 << 16;	/* WTHRESH */
3061		txdctl |= 1 << 22;	/* Reserved bit 22 must always be 1 */
3062		txdctl |= IGC_TXDCTL_GRAN;
3063		txdctl |= 1 << 25;	/* LWTHRESH */
3064
3065		IGC_WRITE_REG(hw, IGC_TXDCTL(iq), txdctl);
3066	}
3067	ifp->if_timer = 0;
3068
3069	/* Program the Transmit Control Register */
3070	uint32_t tctl = IGC_READ_REG(&sc->hw, IGC_TCTL);
3071	tctl &= ~IGC_TCTL_CT;
3072	tctl |= (IGC_TCTL_PSP | IGC_TCTL_RTLC | IGC_TCTL_EN |
3073	    (IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT));
3074
3075	/* This write will effectively turn on the transmit unit. */
3076	IGC_WRITE_REG(&sc->hw, IGC_TCTL, tctl);
3077}
3078
3079/*********************************************************************
3080 *
3081 *  Free all transmit rings.
3082 *
3083 **********************************************************************/
3084static void
3085igc_free_transmit_structures(struct igc_softc *sc)
3086{
3087
3088	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
3089		struct tx_ring *txr = &sc->tx_rings[iq];
3090
3091		igc_free_transmit_buffers(txr);
3092	}
3093}
3094
3095/*********************************************************************
3096 *
3097 *  Free transmit ring related data structures.
3098 *
3099 **********************************************************************/
3100static void
3101igc_free_transmit_buffers(struct tx_ring *txr)
3102{
3103	struct igc_softc *sc = txr->sc;
3104
3105	if (txr->tx_buffers == NULL)
3106		return;
3107
3108	igc_withdraw_transmit_packets(txr, true);
3109
3110	kmem_free(txr->tx_buffers,
3111	    sc->num_tx_desc * sizeof(struct igc_tx_buf));
3112	txr->tx_buffers = NULL;
3113	txr->txtag = NULL;
3114
3115	pcq_destroy(txr->txr_interq);
3116	mutex_destroy(&txr->txr_lock);
3117}
3118
3119/*********************************************************************
3120 *
3121 *  Withdraw transmit packets.
3122 *
3123 **********************************************************************/
3124static void
3125igc_withdraw_transmit_packets(struct tx_ring *txr, bool destroy)
3126{
3127	struct igc_softc *sc = txr->sc;
3128	struct igc_queue *q = txr->txr_igcq;
3129
3130	mutex_enter(&txr->txr_lock);
3131
3132	for (int id = 0; id < sc->num_tx_desc; id++) {
3133		union igc_adv_tx_desc *txdesc = &txr->tx_base[id];
3134
3135		igc_txdesc_sync(txr, id,
3136		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3137		txdesc->read.buffer_addr = 0;
3138		txdesc->read.cmd_type_len = 0;
3139		txdesc->read.olinfo_status = 0;
3140		igc_txdesc_sync(txr, id,
3141		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3142
3143		struct igc_tx_buf *txbuf = &txr->tx_buffers[id];
3144		bus_dmamap_t map = txbuf->map;
3145
3146		if (map != NULL && map->dm_nsegs > 0) {
3147			bus_dmamap_sync(txr->txdma.dma_tag, map,
3148			    0, map->dm_mapsize, BUS_DMASYNC_POSTWRITE);
3149			bus_dmamap_unload(txr->txdma.dma_tag, map);
3150		}
3151		if (txbuf->m_head != NULL) {
3152			m_freem(txbuf->m_head);
3153			txbuf->m_head = NULL;
3154		}
3155		if (map != NULL && destroy) {
3156			bus_dmamap_destroy(txr->txdma.dma_tag, map);
3157			txbuf->map = NULL;
3158		}
3159		txbuf->eop_index = -1;
3160
3161		txr->next_avail_desc = 0;
3162		txr->next_to_clean = 0;
3163	}
3164
3165	struct mbuf *m;
3166	while ((m = pcq_get(txr->txr_interq)) != NULL) {
3167		IGC_QUEUE_EVENT(q, tx_pcq_drop, 1);
3168		m_freem(m);
3169	}
3170
3171	mutex_exit(&txr->txr_lock);
3172}
3173
3174
3175/*********************************************************************
3176 *
3177 *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3178 *
3179 **********************************************************************/
3180
3181static int
3182igc_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, int prod,
3183    uint32_t *cmd_type_len, uint32_t *olinfo_status)
3184{
3185	struct ether_vlan_header *evl;
3186	uint32_t type_tucmd_mlhl = 0;
3187	uint32_t vlan_macip_lens = 0;
3188	uint32_t ehlen, iphlen;
3189	uint16_t ehtype;
3190	int off = 0;
3191
3192	const int csum_flags = mp->m_pkthdr.csum_flags;
3193
3194	/* First check if TSO is to be used */
3195	if ((csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) != 0) {
3196		return igc_tso_setup(txr, mp, prod, cmd_type_len,
3197		    olinfo_status);
3198	}
3199
3200	const bool v4 = (csum_flags &
3201	    (M_CSUM_IPv4 | M_CSUM_TCPv4 | M_CSUM_UDPv4)) != 0;
3202	const bool v6 = (csum_flags & (M_CSUM_UDPv6 | M_CSUM_TCPv6)) != 0;
3203
3204	/* Indicate the whole packet as payload when not doing TSO */
3205	*olinfo_status |= mp->m_pkthdr.len << IGC_ADVTXD_PAYLEN_SHIFT;
3206
3207	/*
3208	 * In advanced descriptors the vlan tag must
3209	 * be placed into the context descriptor. Hence
3210	 * we need to make one even if not doing offloads.
3211	 */
3212#if NVLAN > 0
3213	if (vlan_has_tag(mp)) {
3214		vlan_macip_lens |= (uint32_t)vlan_get_tag(mp)
3215		    << IGC_ADVTXD_VLAN_SHIFT;
3216		off = 1;
3217	} else
3218#endif
3219	if (!v4 && !v6)
3220		return 0;
3221
3222	KASSERT(mp->m_len >= sizeof(struct ether_header));
3223	evl = mtod(mp, struct ether_vlan_header *);
3224	if (evl->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3225		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
3226		ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3227		ehtype = evl->evl_proto;
3228	} else {
3229		ehlen = ETHER_HDR_LEN;
3230		ehtype = evl->evl_encap_proto;
3231	}
3232
3233	vlan_macip_lens |= ehlen << IGC_ADVTXD_MACLEN_SHIFT;
3234
3235#ifdef IGC_DEBUG
3236	/*
3237	 * For checksum offloading, L3 headers are not mandatory.
3238	 * We use these only for consistency checks.
3239	 */
3240	struct ip *ip;
3241	struct ip6_hdr *ip6;
3242	uint8_t ipproto;
3243	char *l3d;
3244
3245	if (mp->m_len == ehlen && mp->m_next != NULL)
3246		l3d = mtod(mp->m_next, char *);
3247	else
3248		l3d = mtod(mp, char *) + ehlen;
3249#endif
3250
3251	switch (ntohs(ehtype)) {
3252	case ETHERTYPE_IP:
3253		iphlen = M_CSUM_DATA_IPv4_IPHL(mp->m_pkthdr.csum_data);
3254		type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_IPV4;
3255
3256		if ((csum_flags & M_CSUM_IPv4) != 0) {
3257			*olinfo_status |= IGC_TXD_POPTS_IXSM << 8;
3258			off = 1;
3259		}
3260#ifdef IGC_DEBUG
3261		KASSERT(!v6);
3262		ip = (void *)l3d;
3263		ipproto = ip->ip_p;
3264		KASSERT(iphlen == ip->ip_hl << 2);
3265		KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
3266		    ip->ip_sum == 0);
3267#endif
3268		break;
3269	case ETHERTYPE_IPV6:
3270		iphlen = M_CSUM_DATA_IPv6_IPHL(mp->m_pkthdr.csum_data);
3271		type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_IPV6;
3272#ifdef IGC_DEBUG
3273		KASSERT(!v4);
3274		ip6 = (void *)l3d;
3275		ipproto = ip6->ip6_nxt;	/* XXX */
3276		KASSERT(iphlen == sizeof(struct ip6_hdr));
3277#endif
3278		break;
3279	default:
3280		/*
3281		 * Unknown L3 protocol. Clear L3 header length and proceed for
3282		 * LAN as done by Linux driver.
3283		 */
3284		iphlen = 0;
3285#ifdef IGC_DEBUG
3286		KASSERT(!v4 && !v6);
3287		ipproto = 0;
3288#endif
3289		break;
3290	}
3291
3292	vlan_macip_lens |= iphlen;
3293
3294	const bool tcp = (csum_flags & (M_CSUM_TCPv4 | M_CSUM_TCPv6)) != 0;
3295	const bool udp = (csum_flags & (M_CSUM_UDPv4 | M_CSUM_UDPv6)) != 0;
3296
3297	if (tcp) {
3298#ifdef IGC_DEBUG
3299		KASSERTMSG(ipproto == IPPROTO_TCP, "ipproto = %d", ipproto);
3300#endif
3301		type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_TCP;
3302		*olinfo_status |= IGC_TXD_POPTS_TXSM << 8;
3303		off = 1;
3304	} else if (udp) {
3305#ifdef IGC_DEBUG
3306		KASSERTMSG(ipproto == IPPROTO_UDP, "ipproto = %d", ipproto);
3307#endif
3308		type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_UDP;
3309		*olinfo_status |= IGC_TXD_POPTS_TXSM << 8;
3310		off = 1;
3311	}
3312
3313	if (off == 0)
3314		return 0;
3315
3316	type_tucmd_mlhl |= IGC_ADVTXD_DCMD_DEXT | IGC_ADVTXD_DTYP_CTXT;
3317
3318	/* Now ready a context descriptor */
3319	struct igc_adv_tx_context_desc *txdesc =
3320	    (struct igc_adv_tx_context_desc *)&txr->tx_base[prod];
3321
3322	/* Now copy bits into descriptor */
3323	igc_txdesc_sync(txr, prod,
3324	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3325	htolem32(&txdesc->vlan_macip_lens, vlan_macip_lens);
3326	htolem32(&txdesc->type_tucmd_mlhl, type_tucmd_mlhl);
3327	htolem32(&txdesc->seqnum_seed, 0);
3328	htolem32(&txdesc->mss_l4len_idx, 0);
3329	igc_txdesc_sync(txr, prod,
3330	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3331
3332	return 1;
3333}
3334
3335/*********************************************************************
3336 *
3337 *  Advanced Context Descriptor setup for TSO
3338 *
3339 *  XXX XXXRO
3340 *	Not working. Some packets are sent with correct csums, but
3341 *	others aren't. th->th_sum may be adjusted.
3342 *
3343 **********************************************************************/
3344
3345static int
3346igc_tso_setup(struct tx_ring *txr, struct mbuf *mp, int prod,
3347    uint32_t *cmd_type_len, uint32_t *olinfo_status)
3348{
3349#if 1 /* notyet */
3350	return 0;
3351#else
3352	struct ether_vlan_header *evl;
3353	struct ip *ip;
3354	struct ip6_hdr *ip6;
3355	struct tcphdr *th;
3356	uint32_t type_tucmd_mlhl = 0;
3357	uint32_t vlan_macip_lens = 0;
3358	uint32_t mss_l4len_idx = 0;
3359	uint32_t ehlen, iphlen, tcphlen, paylen;
3360	uint16_t ehtype;
3361
3362	/*
3363	 * In advanced descriptors the vlan tag must
3364	 * be placed into the context descriptor. Hence
3365	 * we need to make one even if not doing offloads.
3366	 */
3367#if NVLAN > 0
3368	if (vlan_has_tag(mp)) {
3369		vlan_macip_lens |= (uint32_t)vlan_get_tag(mp)
3370		    << IGC_ADVTXD_VLAN_SHIFT;
3371	}
3372#endif
3373
3374	KASSERT(mp->m_len >= sizeof(struct ether_header));
3375	evl = mtod(mp, struct ether_vlan_header *);
3376	if (evl->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3377		KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
3378		ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3379		ehtype = evl->evl_proto;
3380	} else {
3381		ehlen = ETHER_HDR_LEN;
3382		ehtype = evl->evl_encap_proto;
3383	}
3384
3385	vlan_macip_lens |= ehlen << IGC_ADVTXD_MACLEN_SHIFT;
3386
3387	switch (ntohs(ehtype)) {
3388	case ETHERTYPE_IP:
3389		iphlen = M_CSUM_DATA_IPv4_IPHL(mp->m_pkthdr.csum_data);
3390		type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_IPV4;
3391		*olinfo_status |= IGC_TXD_POPTS_IXSM << 8;
3392
3393		KASSERT(mp->m_len >= ehlen + sizeof(*ip));
3394		ip = (void *)(mtod(mp, char *) + ehlen);
3395		ip->ip_len = 0;
3396		KASSERT(iphlen == ip->ip_hl << 2);
3397		KASSERT(ip->ip_sum == 0);
3398		KASSERT(ip->ip_p == IPPROTO_TCP);
3399
3400		KASSERT(mp->m_len >= ehlen + iphlen + sizeof(*th));
3401		th = (void *)((char *)ip + iphlen);
3402		th->th_sum = in_cksum_phdr(ip->ip_src.s_addr, ip->ip_dst.s_addr,
3403		    htons(IPPROTO_TCP));
3404		break;
3405	case ETHERTYPE_IPV6:
3406		iphlen = M_CSUM_DATA_IPv6_IPHL(mp->m_pkthdr.csum_data);
3407		type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_IPV6;
3408
3409		KASSERT(mp->m_len >= ehlen + sizeof(*ip6));
3410		ip6 = (void *)(mtod(mp, char *) + ehlen);
3411		ip6->ip6_plen = 0;
3412		KASSERT(iphlen == sizeof(struct ip6_hdr));
3413		KASSERT(ip6->ip6_nxt == IPPROTO_TCP);
3414
3415		KASSERT(mp->m_len >= ehlen + iphlen + sizeof(*th));
3416		th = (void *)((char *)ip6 + iphlen);
3417		tcphlen = th->th_off << 2;
3418		paylen = mp->m_pkthdr.len - ehlen - iphlen - tcphlen;
3419		th->th_sum = in6_cksum_phdr(&ip6->ip6_src, &ip6->ip6_dst, 0,
3420		    htonl(IPPROTO_TCP));
3421		break;
3422	default:
3423		panic("%s", __func__);
3424	}
3425
3426	tcphlen = th->th_off << 2;
3427	paylen = mp->m_pkthdr.len - ehlen - iphlen - tcphlen;
3428
3429	vlan_macip_lens |= iphlen;
3430
3431	type_tucmd_mlhl |= IGC_ADVTXD_DCMD_DEXT | IGC_ADVTXD_DTYP_CTXT;
3432	type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_TCP;
3433
3434	mss_l4len_idx |= mp->m_pkthdr.segsz << IGC_ADVTXD_MSS_SHIFT;
3435	mss_l4len_idx |= tcphlen << IGC_ADVTXD_L4LEN_SHIFT;
3436
3437	/* Now ready a context descriptor */
3438	struct igc_adv_tx_context_desc *txdesc =
3439	    (struct igc_adv_tx_context_desc *)&txr->tx_base[prod];
3440
3441	/* Now copy bits into descriptor */
3442	igc_txdesc_sync(txr, prod,
3443	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3444	htolem32(&txdesc->vlan_macip_lens, vlan_macip_lens);
3445	htolem32(&txdesc->type_tucmd_mlhl, type_tucmd_mlhl);
3446	htolem32(&txdesc->seqnum_seed, 0);
3447	htolem32(&txdesc->mss_l4len_idx, mss_l4len_idx);
3448	igc_txdesc_sync(txr, prod,
3449	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3450
3451	*cmd_type_len |= IGC_ADVTXD_DCMD_TSE;
3452	*olinfo_status |= IGC_TXD_POPTS_TXSM << 8;
3453	*olinfo_status |= paylen << IGC_ADVTXD_PAYLEN_SHIFT;
3454
3455	return 1;
3456#endif /* notyet */
3457}
3458
3459/*********************************************************************
3460 *
3461 *  Allocate memory for rx_buffer structures. Since we use one
3462 *  rx_buffer per received packet, the maximum number of rx_buffer's
3463 *  that we'll need is equal to the number of receive descriptors
3464 *  that we've allocated.
3465 *
3466 **********************************************************************/
3467static int
3468igc_allocate_receive_buffers(struct rx_ring *rxr)
3469{
3470	struct igc_softc *sc = rxr->sc;
3471	int error;
3472
3473	rxr->rx_buffers =
3474	    kmem_zalloc(sc->num_rx_desc * sizeof(struct igc_rx_buf), KM_SLEEP);
3475
3476	for (int id = 0; id < sc->num_rx_desc; id++) {
3477		struct igc_rx_buf *rxbuf = &rxr->rx_buffers[id];
3478
3479		error = bus_dmamap_create(rxr->rxdma.dma_tag, MCLBYTES, 1,
3480		    MCLBYTES, 0, BUS_DMA_WAITOK, &rxbuf->map);
3481		if (error) {
3482			aprint_error_dev(sc->sc_dev,
3483			    "unable to create RX DMA map\n");
3484			goto fail;
3485		}
3486	}
3487	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 0,
3488	    rxr->rxdma.dma_map->dm_mapsize,
3489	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3490
3491	return 0;
3492 fail:
3493	return error;
3494}
3495
3496/*********************************************************************
3497 *
3498 *  Allocate and initialize receive structures.
3499 *
3500 **********************************************************************/
3501static int
3502igc_setup_receive_structures(struct igc_softc *sc)
3503{
3504
3505	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
3506		struct rx_ring *rxr = &sc->rx_rings[iq];
3507
3508		if (igc_setup_receive_ring(rxr))
3509			goto fail;
3510	}
3511
3512	return 0;
3513 fail:
3514	igc_free_receive_structures(sc);
3515	return ENOBUFS;
3516}
3517
3518/*********************************************************************
3519 *
3520 *  Initialize a receive ring and its buffers.
3521 *
3522 **********************************************************************/
3523static int
3524igc_setup_receive_ring(struct rx_ring *rxr)
3525{
3526	struct igc_softc *sc = rxr->sc;
3527	const int rsize = roundup2(
3528	    sc->num_rx_desc * sizeof(union igc_adv_rx_desc), IGC_DBA_ALIGN);
3529
3530	/* Clear the ring contents. */
3531	memset(rxr->rx_base, 0, rsize);
3532
3533	if (igc_allocate_receive_buffers(rxr))
3534		return ENOMEM;
3535
3536	/* Setup our descriptor indices. */
3537	rxr->next_to_check = 0;
3538	rxr->last_desc_filled = 0;
3539
3540	mutex_init(&rxr->rxr_lock, MUTEX_DEFAULT, IPL_NET);
3541
3542	return 0;
3543}
3544
3545/*********************************************************************
3546 *
3547 *  Enable receive unit.
3548 *
3549 **********************************************************************/
3550static void
3551igc_initialize_receive_unit(struct igc_softc *sc)
3552{
3553	struct ifnet *ifp = &sc->sc_ec.ec_if;
3554	struct igc_hw *hw = &sc->hw;
3555	uint32_t rctl, rxcsum, srrctl;
3556
3557	DPRINTF(RX, "called\n");
3558
3559	/*
3560	 * Make sure receives are disabled while setting
3561	 * up the descriptor ring.
3562	 */
3563	rctl = IGC_READ_REG(hw, IGC_RCTL);
3564	IGC_WRITE_REG(hw, IGC_RCTL, rctl & ~IGC_RCTL_EN);
3565
3566	/* Setup the Receive Control Register */
3567	rctl &= ~(3 << IGC_RCTL_MO_SHIFT);
3568	rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_LBM_NO |
3569	    IGC_RCTL_RDMTS_HALF | (hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT);
3570
3571#if 1
3572	/* Do not store bad packets */
3573	rctl &= ~IGC_RCTL_SBP;
3574#else
3575	/* for debug */
3576	rctl |= IGC_RCTL_SBP;
3577#endif
3578
3579	/* Enable Long Packet receive */
3580	if (sc->hw.mac.max_frame_size > ETHER_MAX_LEN)
3581		rctl |= IGC_RCTL_LPE;
3582	else
3583		rctl &= ~IGC_RCTL_LPE;
3584
3585	/* Strip the CRC */
3586	rctl |= IGC_RCTL_SECRC;
3587
3588	/*
3589	 * Set the interrupt throttling rate. Value is calculated
3590	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
3591	 *
3592	 * XXX Sync with Linux, especially for jumbo MTU or TSO.
3593	 * XXX Shouldn't be here?
3594	 */
3595	IGC_WRITE_REG(hw, IGC_ITR, DEFAULT_ITR);
3596
3597	rxcsum = IGC_READ_REG(hw, IGC_RXCSUM);
3598	rxcsum &= ~(IGC_RXCSUM_IPOFL | IGC_RXCSUM_TUOFL | IGC_RXCSUM_PCSD);
3599	if (ifp->if_capenable & IFCAP_CSUM_IPv4_Rx)
3600		rxcsum |= IGC_RXCSUM_IPOFL;
3601	if (ifp->if_capenable & (IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_UDPv4_Rx |
3602				 IFCAP_CSUM_TCPv6_Rx | IFCAP_CSUM_UDPv6_Rx))
3603		rxcsum |= IGC_RXCSUM_TUOFL;
3604	if (sc->sc_nqueues > 1)
3605		rxcsum |= IGC_RXCSUM_PCSD;
3606	IGC_WRITE_REG(hw, IGC_RXCSUM, rxcsum);
3607
3608	if (sc->sc_nqueues > 1)
3609		igc_initialize_rss_mapping(sc);
3610
3611	srrctl = 0;
3612#if 0
3613	srrctl |= 4096 >> IGC_SRRCTL_BSIZEPKT_SHIFT;
3614	rctl |= IGC_RCTL_SZ_4096 | IGC_RCTL_BSEX;
3615#else
3616	srrctl |= 2048 >> IGC_SRRCTL_BSIZEPKT_SHIFT;
3617	rctl |= IGC_RCTL_SZ_2048;
3618#endif
3619
3620	/*
3621	 * If TX flow control is disabled and there's > 1 queue defined,
3622	 * enable DROP.
3623	 *
3624	 * This drops frames rather than hanging the RX MAC for all queues.
3625	 */
3626	if (sc->sc_nqueues > 1 &&
3627	    (sc->fc == igc_fc_none || sc->fc == igc_fc_rx_pause))
3628		srrctl |= IGC_SRRCTL_DROP_EN;
3629
3630	/* Setup the Base and Length of the RX descriptor rings. */
3631	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
3632		struct rx_ring *rxr = &sc->rx_rings[iq];
3633		const uint64_t bus_addr =
3634		    rxr->rxdma.dma_map->dm_segs[0].ds_addr;
3635
3636		IGC_WRITE_REG(hw, IGC_RXDCTL(iq), 0);
3637
3638		srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF;
3639
3640		IGC_WRITE_REG(hw, IGC_RDLEN(iq),
3641		    sc->num_rx_desc * sizeof(union igc_adv_rx_desc));
3642		IGC_WRITE_REG(hw, IGC_RDBAH(iq), (uint32_t)(bus_addr >> 32));
3643		IGC_WRITE_REG(hw, IGC_RDBAL(iq), (uint32_t)bus_addr);
3644		IGC_WRITE_REG(hw, IGC_SRRCTL(iq), srrctl);
3645
3646		/* Setup the Head and Tail Descriptor Pointers */
3647		IGC_WRITE_REG(hw, IGC_RDH(iq), 0);
3648		IGC_WRITE_REG(hw, IGC_RDT(iq), 0 /* XXX rxr->last_desc_filled */);
3649
3650		/* Enable this Queue */
3651		uint32_t rxdctl = IGC_READ_REG(hw, IGC_RXDCTL(iq));
3652		rxdctl |= IGC_RXDCTL_QUEUE_ENABLE;
3653		rxdctl &= 0xFFF00000;
3654		rxdctl |= IGC_RX_PTHRESH;
3655		rxdctl |= IGC_RX_HTHRESH << 8;
3656		rxdctl |= IGC_RX_WTHRESH << 16;
3657		IGC_WRITE_REG(hw, IGC_RXDCTL(iq), rxdctl);
3658	}
3659
3660	/* Make sure VLAN Filters are off */
3661	rctl &= ~IGC_RCTL_VFE;
3662
3663	/* Write out the settings */
3664	IGC_WRITE_REG(hw, IGC_RCTL, rctl);
3665}
3666
3667/*********************************************************************
3668 *
3669 *  Free all receive rings.
3670 *
3671 **********************************************************************/
3672static void
3673igc_free_receive_structures(struct igc_softc *sc)
3674{
3675
3676	for (int iq = 0; iq < sc->sc_nqueues; iq++) {
3677		struct rx_ring *rxr = &sc->rx_rings[iq];
3678
3679		igc_free_receive_buffers(rxr);
3680	}
3681}
3682
3683/*********************************************************************
3684 *
3685 *  Free receive ring data structures
3686 *
3687 **********************************************************************/
3688static void
3689igc_free_receive_buffers(struct rx_ring *rxr)
3690{
3691	struct igc_softc *sc = rxr->sc;
3692
3693	if (rxr->rx_buffers != NULL) {
3694		for (int id = 0; id < sc->num_rx_desc; id++) {
3695			struct igc_rx_buf *rxbuf = &rxr->rx_buffers[id];
3696			bus_dmamap_t map = rxbuf->map;
3697
3698			if (rxbuf->buf != NULL) {
3699				bus_dmamap_sync(rxr->rxdma.dma_tag, map,
3700				    0, map->dm_mapsize, BUS_DMASYNC_POSTREAD);
3701				bus_dmamap_unload(rxr->rxdma.dma_tag, map);
3702				m_freem(rxbuf->buf);
3703				rxbuf->buf = NULL;
3704			}
3705			bus_dmamap_destroy(rxr->rxdma.dma_tag, map);
3706			rxbuf->map = NULL;
3707		}
3708		kmem_free(rxr->rx_buffers,
3709		    sc->num_rx_desc * sizeof(struct igc_rx_buf));
3710		rxr->rx_buffers = NULL;
3711	}
3712
3713	mutex_destroy(&rxr->rxr_lock);
3714}
3715
3716/*********************************************************************
3717 *
3718 * Clear status registers in all RX descriptors.
3719 *
3720 **********************************************************************/
3721static void
3722igc_clear_receive_status(struct rx_ring *rxr)
3723{
3724	struct igc_softc *sc = rxr->sc;
3725
3726	mutex_enter(&rxr->rxr_lock);
3727
3728	for (int id = 0; id < sc->num_rx_desc; id++) {
3729		union igc_adv_rx_desc *rxdesc = &rxr->rx_base[id];
3730
3731		igc_rxdesc_sync(rxr, id,
3732		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3733		rxdesc->wb.upper.status_error = 0;
3734		igc_rxdesc_sync(rxr, id,
3735		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3736	}
3737
3738	mutex_exit(&rxr->rxr_lock);
3739}
3740
3741/*
3742 * Initialise the RSS mapping for NICs that support multiple transmit/
3743 * receive rings.
3744 */
3745static void
3746igc_initialize_rss_mapping(struct igc_softc *sc)
3747{
3748	struct igc_hw *hw = &sc->hw;
3749
3750	/*
3751	 * The redirection table controls which destination
3752	 * queue each bucket redirects traffic to.
3753	 * Each DWORD represents four queues, with the LSB
3754	 * being the first queue in the DWORD.
3755	 *
3756	 * This just allocates buckets to queues using round-robin
3757	 * allocation.
3758	 *
3759	 * NOTE: It Just Happens to line up with the default
3760	 * RSS allocation method.
3761	 */
3762
3763	/* Warning FM follows */
3764	uint32_t reta = 0;
3765	for (int i = 0; i < 128; i++) {
3766		const int shift = 0; /* XXXRO */
3767		int queue_id = i % sc->sc_nqueues;
3768		/* Adjust if required */
3769		queue_id <<= shift;
3770
3771		/*
3772		 * The low 8 bits are for hash value (n+0);
3773		 * The next 8 bits are for hash value (n+1), etc.
3774		 */
3775		reta >>= 8;
3776		reta |= ((uint32_t)queue_id) << 24;
3777		if ((i & 3) == 3) {
3778			IGC_WRITE_REG(hw, IGC_RETA(i >> 2), reta);
3779			reta = 0;
3780		}
3781	}
3782
3783	/*
3784	 * MRQC: Multiple Receive Queues Command
3785	 * Set queuing to RSS control, number depends on the device.
3786	 */
3787
3788	/* Set up random bits */
3789	uint32_t rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
3790	rss_getkey((uint8_t *)rss_key);
3791
3792	/* Now fill our hash function seeds */
3793	for (int i = 0; i < __arraycount(rss_key); i++)
3794		IGC_WRITE_REG_ARRAY(hw, IGC_RSSRK(0), i, rss_key[i]);
3795
3796	/*
3797	 * Configure the RSS fields to hash upon.
3798	 */
3799	uint32_t mrqc = IGC_MRQC_ENABLE_RSS_4Q;
3800	mrqc |= IGC_MRQC_RSS_FIELD_IPV4 | IGC_MRQC_RSS_FIELD_IPV4_TCP;
3801	mrqc |= IGC_MRQC_RSS_FIELD_IPV6 | IGC_MRQC_RSS_FIELD_IPV6_TCP;
3802	mrqc |= IGC_MRQC_RSS_FIELD_IPV6_TCP_EX;
3803
3804	IGC_WRITE_REG(hw, IGC_MRQC, mrqc);
3805}
3806
3807/*
3808 * igc_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
3809 * For ASF and Pass Through versions of f/w this means
3810 * that the driver is loaded. For AMT version type f/w
3811 * this means that the network i/f is open.
3812 */
3813static void
3814igc_get_hw_control(struct igc_softc *sc)
3815{
3816	const uint32_t ctrl_ext = IGC_READ_REG(&sc->hw, IGC_CTRL_EXT);
3817
3818	IGC_WRITE_REG(&sc->hw, IGC_CTRL_EXT, ctrl_ext | IGC_CTRL_EXT_DRV_LOAD);
3819}
3820
3821/*
3822 * igc_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
3823 * For ASF and Pass Through versions of f/w this means that
3824 * the driver is no longer loaded. For AMT versions of the
3825 * f/w this means that the network i/f is closed.
3826 */
3827static void
3828igc_release_hw_control(struct igc_softc *sc)
3829{
3830	const uint32_t ctrl_ext = IGC_READ_REG(&sc->hw, IGC_CTRL_EXT);
3831
3832	IGC_WRITE_REG(&sc->hw, IGC_CTRL_EXT, ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD);
3833}
3834
3835static int
3836igc_is_valid_ether_addr(uint8_t *addr)
3837{
3838	const char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
3839
3840	if ((addr[0] & 1) || !bcmp(addr, zero_addr, ETHER_ADDR_LEN))
3841		return 0;
3842
3843	return 1;
3844}
3845
3846static void
3847igc_print_devinfo(struct igc_softc *sc)
3848{
3849	device_t dev = sc->sc_dev;
3850	struct igc_hw *hw = &sc->hw;
3851	struct igc_phy_info *phy = &hw->phy;
3852	u_int oui, model, rev;
3853	uint16_t id1, id2, nvm_ver, phy_ver, etk_lo, etk_hi;
3854	char descr[MII_MAX_DESCR_LEN];
3855
3856	/* Print PHY Info */
3857	id1 = phy->id >> 16;
3858	/* The revision field in phy->id is cleard and it's in phy->revision */
3859	id2 = (phy->id & 0xfff0) | phy->revision;
3860	oui = MII_OUI(id1, id2);
3861	model = MII_MODEL(id2);
3862	rev = MII_REV(id2);
3863	mii_get_descr(descr, sizeof(descr), oui, model);
3864	if (descr[0])
3865		aprint_normal_dev(dev, "PHY: %s, rev. %d",
3866		    descr, rev);
3867	else
3868		aprint_normal_dev(dev,
3869		    "PHY OUI 0x%06x, model 0x%04x, rev. %d",
3870		    oui, model, rev);
3871
3872	/* PHY FW version */
3873	phy->ops.read_reg(hw, 0x1e, &phy_ver);
3874	aprint_normal(", PHY FW version 0x%04hx\n", phy_ver);
3875
3876	/* NVM version */
3877	hw->nvm.ops.read(hw, NVM_VERSION, 1, &nvm_ver);
3878
3879	/* EtrackID */
3880	hw->nvm.ops.read(hw, NVM_ETKID_LO, 1, &etk_lo);
3881	hw->nvm.ops.read(hw, NVM_ETKID_HI, 1, &etk_hi);
3882
3883	aprint_normal_dev(dev,
3884	    "NVM image version %x.%02x, EtrackID %04hx%04hx\n",
3885	    (nvm_ver & NVM_VERSION_MAJOR) >> NVM_VERSION_MAJOR_SHIFT,
3886	    nvm_ver & NVM_VERSION_MINOR, etk_hi, etk_lo);
3887}
3888