if_igb.c revision 256200
185587Sobrien/******************************************************************************
285587Sobrien
385587Sobrien  Copyright (c) 2001-2013, Intel Corporation
485587Sobrien  All rights reserved.
585587Sobrien
685587Sobrien  Redistribution and use in source and binary forms, with or without
785587Sobrien  modification, are permitted provided that the following conditions are met:
885587Sobrien
985587Sobrien   1. Redistributions of source code must retain the above copyright notice,
1085587Sobrien      this list of conditions and the following disclaimer.
1185587Sobrien
1285587Sobrien   2. Redistributions in binary form must reproduce the above copyright
1385587Sobrien      notice, this list of conditions and the following disclaimer in the
1485587Sobrien      documentation and/or other materials provided with the distribution.
1585587Sobrien
1685587Sobrien   3. Neither the name of the Intel Corporation nor the names of its
1785587Sobrien      contributors may be used to endorse or promote products derived from
1885587Sobrien      this software without specific prior written permission.
1985587Sobrien
2085587Sobrien  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
2185587Sobrien  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2285587Sobrien  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2385587Sobrien  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
2485587Sobrien  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2585587Sobrien  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2685587Sobrien  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2785587Sobrien  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2885587Sobrien  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
2985587Sobrien  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
3085587Sobrien  POSSIBILITY OF SUCH DAMAGE.
3185587Sobrien
3285587Sobrien******************************************************************************/
3385587Sobrien/*$FreeBSD: head/sys/dev/e1000/if_igb.c 256200 2013-10-09 17:32:52Z jfv $*/
3485587Sobrien
3585587Sobrien
36107806Sobrien#include "opt_inet.h"
3785587Sobrien#include "opt_inet6.h"
3885587Sobrien
3985587Sobrien#ifdef HAVE_KERNEL_OPTION_HEADERS
4085587Sobrien#include "opt_device_polling.h"
41107806Sobrien#include "opt_altq.h"
42107806Sobrien#endif
4385587Sobrien
4485587Sobrien#include <sys/param.h>
4585587Sobrien#include <sys/systm.h>
46224731Sru#ifndef IGB_LEGACY_TX
47224731Sru#include <sys/buf_ring.h>
48107806Sobrien#endif
49244988Sdelphij#include <sys/bus.h>
5085587Sobrien#include <sys/endian.h>
5185587Sobrien#include <sys/kernel.h>
5285587Sobrien#include <sys/kthread.h>
53107806Sobrien#include <sys/malloc.h>
54107806Sobrien#include <sys/mbuf.h>
55107806Sobrien#include <sys/module.h>
56107806Sobrien#include <sys/rman.h>
57107806Sobrien#include <sys/socket.h>
5885587Sobrien#include <sys/sockio.h>
5985587Sobrien#include <sys/sysctl.h>
6085587Sobrien#include <sys/taskqueue.h>
6185587Sobrien#include <sys/eventhandler.h>
6285587Sobrien#include <sys/pcpu.h>
6385587Sobrien#include <sys/smp.h>
6485587Sobrien#include <machine/smp.h>
6585587Sobrien#include <machine/bus.h>
6685587Sobrien#include <machine/resource.h>
6785587Sobrien
6885587Sobrien#include <net/bpf.h>
6985587Sobrien#include <net/ethernet.h>
7085587Sobrien#include <net/if.h>
7185587Sobrien#include <net/if_arp.h>
7285587Sobrien#include <net/if_dl.h>
7385587Sobrien#include <net/if_media.h>
7485587Sobrien
7585587Sobrien#include <net/if_types.h>
7685587Sobrien#include <net/if_vlan_var.h>
7785587Sobrien
7885587Sobrien#include <netinet/in_systm.h>
7985587Sobrien#include <netinet/in.h>
8085587Sobrien#include <netinet/if_ether.h>
8185587Sobrien#include <netinet/ip.h>
8285587Sobrien#include <netinet/ip6.h>
8385587Sobrien#include <netinet/tcp.h>
8485587Sobrien#include <netinet/tcp_lro.h>
8585587Sobrien#include <netinet/udp.h>
8685587Sobrien
8785587Sobrien#include <machine/in_cksum.h>
8885587Sobrien#include <dev/led/led.h>
8985587Sobrien#include <dev/pci/pcivar.h>
90107806Sobrien#include <dev/pci/pcireg.h>
9185587Sobrien
9285587Sobrien#include "e1000_api.h"
9385587Sobrien#include "e1000_82575.h"
9485587Sobrien#include "if_igb.h"
9585587Sobrien
9685587Sobrien/*********************************************************************
9785587Sobrien *  Set this to one to display debug statistics
9885587Sobrien *********************************************************************/
9985587Sobrienint	igb_display_debug_stats = 0;
10085587Sobrien
101107806Sobrien/*********************************************************************
102107806Sobrien *  Driver version:
103107806Sobrien *********************************************************************/
10485587Sobrienchar igb_driver_version[] = "version - 2.4.0";
105107806Sobrien
10685587Sobrien
107107806Sobrien/*********************************************************************
108107806Sobrien *  PCI Device ID Table
10985587Sobrien *
11085587Sobrien *  Used by probe to select devices to load on
111107806Sobrien *  Last field stores an index into e1000_strings
112107806Sobrien *  Last entry must be all 0s
113107806Sobrien *
11485587Sobrien *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
11585587Sobrien *********************************************************************/
11685587Sobrien
11785587Sobrienstatic igb_vendor_info_t igb_vendor_info_array[] =
11885587Sobrien{
11985587Sobrien	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
12085587Sobrien	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
12185587Sobrien						PCI_ANY_ID, PCI_ANY_ID, 0},
12285587Sobrien	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
12385587Sobrien						PCI_ANY_ID, PCI_ANY_ID, 0},
12485587Sobrien	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
12585587Sobrien	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
12685587Sobrien	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
127107806Sobrien	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
12885587Sobrien	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
12985587Sobrien	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
130107806Sobrien						PCI_ANY_ID, PCI_ANY_ID, 0},
13185587Sobrien	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
13285587Sobrien						PCI_ANY_ID, PCI_ANY_ID, 0},
13385587Sobrien	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
134107806Sobrien						PCI_ANY_ID, PCI_ANY_ID, 0},
135244988Sdelphij	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
136107806Sobrien	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
13785587Sobrien	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
13885587Sobrien	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
13985587Sobrien	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
140107806Sobrien	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
141107806Sobrien						PCI_ANY_ID, PCI_ANY_ID, 0},
142107806Sobrien	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
14385587Sobrien						PCI_ANY_ID, PCI_ANY_ID, 0},
144107806Sobrien	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
14585587Sobrien	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
14685587Sobrien	{ 0x8086, E1000_DEV_ID_DH89XXCC_SFP,	PCI_ANY_ID, PCI_ANY_ID, 0},
14785587Sobrien	{ 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
14885587Sobrien						PCI_ANY_ID, PCI_ANY_ID, 0},
14985587Sobrien	{ 0x8086, E1000_DEV_ID_I350_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
15085587Sobrien	{ 0x8086, E1000_DEV_ID_I350_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
15185587Sobrien	{ 0x8086, E1000_DEV_ID_I350_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
152201951Sru	{ 0x8086, E1000_DEV_ID_I350_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
15385587Sobrien	{ 0x8086, E1000_DEV_ID_I350_VF,		PCI_ANY_ID, PCI_ANY_ID, 0},
15485587Sobrien	{ 0x8086, E1000_DEV_ID_I210_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
15585587Sobrien	{ 0x8086, E1000_DEV_ID_I210_COPPER_IT,	PCI_ANY_ID, PCI_ANY_ID, 0},
15685587Sobrien	{ 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
15785587Sobrien						PCI_ANY_ID, PCI_ANY_ID, 0},
15885587Sobrien	{ 0x8086, E1000_DEV_ID_I210_COPPER_FLASHLESS,
15985587Sobrien						PCI_ANY_ID, PCI_ANY_ID, 0},
16085587Sobrien	{ 0x8086, E1000_DEV_ID_I210_SERDES_FLASHLESS,
16185587Sobrien						PCI_ANY_ID, PCI_ANY_ID, 0},
16285587Sobrien	{ 0x8086, E1000_DEV_ID_I210_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
16385587Sobrien	{ 0x8086, E1000_DEV_ID_I210_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
16485587Sobrien	{ 0x8086, E1000_DEV_ID_I210_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
16585587Sobrien	{ 0x8086, E1000_DEV_ID_I211_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
166107806Sobrien	{ 0x8086, E1000_DEV_ID_I354_BACKPLANE_1GBPS,
16785587Sobrien						PCI_ANY_ID, PCI_ANY_ID, 0},
16885587Sobrien	{ 0x8086, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS,
16985587Sobrien						PCI_ANY_ID, PCI_ANY_ID, 0},
17085587Sobrien	{ 0x8086, E1000_DEV_ID_I354_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
17185587Sobrien	/* required last entry */
17285587Sobrien	{ 0, 0, 0, 0, 0}
17385587Sobrien};
17485587Sobrien
17585587Sobrien/*********************************************************************
17685587Sobrien *  Table of branding strings for all supported NICs.
17785587Sobrien *********************************************************************/
17885587Sobrien
17985587Sobrienstatic char *igb_strings[] = {
18085587Sobrien	"Intel(R) PRO/1000 Network Connection"
18185587Sobrien};
18285587Sobrien
18385587Sobrien/*********************************************************************
18485587Sobrien *  Function prototypes
18585587Sobrien *********************************************************************/
18685587Sobrienstatic int	igb_probe(device_t);
187107806Sobrienstatic int	igb_attach(device_t);
188107806Sobrienstatic int	igb_detach(device_t);
18985587Sobrienstatic int	igb_shutdown(device_t);
19085587Sobrienstatic int	igb_suspend(device_t);
19185587Sobrienstatic int	igb_resume(device_t);
19285587Sobrien#ifndef IGB_LEGACY_TX
19385587Sobrienstatic int	igb_mq_start(struct ifnet *, struct mbuf *);
19485587Sobrienstatic int	igb_mq_start_locked(struct ifnet *, struct tx_ring *);
19585587Sobrienstatic void	igb_qflush(struct ifnet *);
196static void	igb_deferred_mq_start(void *, int);
197#else
198static void	igb_start(struct ifnet *);
199static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
200#endif
201static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
202static void	igb_init(void *);
203static void	igb_init_locked(struct adapter *);
204static void	igb_stop(void *);
205static void	igb_media_status(struct ifnet *, struct ifmediareq *);
206static int	igb_media_change(struct ifnet *);
207static void	igb_identify_hardware(struct adapter *);
208static int	igb_allocate_pci_resources(struct adapter *);
209static int	igb_allocate_msix(struct adapter *);
210static int	igb_allocate_legacy(struct adapter *);
211static int	igb_setup_msix(struct adapter *);
212static void	igb_free_pci_resources(struct adapter *);
213static void	igb_local_timer(void *);
214static void	igb_reset(struct adapter *);
215static int	igb_setup_interface(device_t, struct adapter *);
216static int	igb_allocate_queues(struct adapter *);
217static void	igb_configure_queues(struct adapter *);
218
219static int	igb_allocate_transmit_buffers(struct tx_ring *);
220static void	igb_setup_transmit_structures(struct adapter *);
221static void	igb_setup_transmit_ring(struct tx_ring *);
222static void	igb_initialize_transmit_units(struct adapter *);
223static void	igb_free_transmit_structures(struct adapter *);
224static void	igb_free_transmit_buffers(struct tx_ring *);
225
226static int	igb_allocate_receive_buffers(struct rx_ring *);
227static int	igb_setup_receive_structures(struct adapter *);
228static int	igb_setup_receive_ring(struct rx_ring *);
229static void	igb_initialize_receive_units(struct adapter *);
230static void	igb_free_receive_structures(struct adapter *);
231static void	igb_free_receive_buffers(struct rx_ring *);
232static void	igb_free_receive_ring(struct rx_ring *);
233
234static void	igb_enable_intr(struct adapter *);
235static void	igb_disable_intr(struct adapter *);
236static void	igb_update_stats_counters(struct adapter *);
237static bool	igb_txeof(struct tx_ring *);
238
239static __inline	void igb_rx_discard(struct rx_ring *, int);
240static __inline void igb_rx_input(struct rx_ring *,
241		    struct ifnet *, struct mbuf *, u32);
242
243static bool	igb_rxeof(struct igb_queue *, int, int *);
244static void	igb_rx_checksum(u32, struct mbuf *, u32);
245static int	igb_tx_ctx_setup(struct tx_ring *,
246		    struct mbuf *, u32 *, u32 *);
247static int	igb_tso_setup(struct tx_ring *,
248		    struct mbuf *, u32 *, u32 *);
249static void	igb_set_promisc(struct adapter *);
250static void	igb_disable_promisc(struct adapter *);
251static void	igb_set_multi(struct adapter *);
252static void	igb_update_link_status(struct adapter *);
253static void	igb_refresh_mbufs(struct rx_ring *, int);
254
255static void	igb_register_vlan(void *, struct ifnet *, u16);
256static void	igb_unregister_vlan(void *, struct ifnet *, u16);
257static void	igb_setup_vlan_hw_support(struct adapter *);
258
259static int	igb_xmit(struct tx_ring *, struct mbuf **);
260static int	igb_dma_malloc(struct adapter *, bus_size_t,
261		    struct igb_dma_alloc *, int);
262static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
263static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
264static void	igb_print_nvm_info(struct adapter *);
265static int 	igb_is_valid_ether_addr(u8 *);
266static void     igb_add_hw_stats(struct adapter *);
267
268static void	igb_vf_init_stats(struct adapter *);
269static void	igb_update_vf_stats_counters(struct adapter *);
270
271/* Management and WOL Support */
272static void	igb_init_manageability(struct adapter *);
273static void	igb_release_manageability(struct adapter *);
274static void     igb_get_hw_control(struct adapter *);
275static void     igb_release_hw_control(struct adapter *);
276static void     igb_enable_wakeup(device_t);
277static void     igb_led_func(void *, int);
278
279static int	igb_irq_fast(void *);
280static void	igb_msix_que(void *);
281static void	igb_msix_link(void *);
282static void	igb_handle_que(void *context, int pending);
283static void	igb_handle_link(void *context, int pending);
284static void	igb_handle_link_locked(struct adapter *);
285
286static void	igb_set_sysctl_value(struct adapter *, const char *,
287		    const char *, int *, int);
288static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
289static int	igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
290static int	igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
291
292#ifdef DEVICE_POLLING
293static poll_handler_t igb_poll;
294#endif /* POLLING */
295
296/*********************************************************************
297 *  FreeBSD Device Interface Entry Points
298 *********************************************************************/
299
300static device_method_t igb_methods[] = {
301	/* Device interface */
302	DEVMETHOD(device_probe, igb_probe),
303	DEVMETHOD(device_attach, igb_attach),
304	DEVMETHOD(device_detach, igb_detach),
305	DEVMETHOD(device_shutdown, igb_shutdown),
306	DEVMETHOD(device_suspend, igb_suspend),
307	DEVMETHOD(device_resume, igb_resume),
308	DEVMETHOD_END
309};
310
311static driver_t igb_driver = {
312	"igb", igb_methods, sizeof(struct adapter),
313};
314
315static devclass_t igb_devclass;
316DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
317MODULE_DEPEND(igb, pci, 1, 1, 1);
318MODULE_DEPEND(igb, ether, 1, 1, 1);
319
320/*********************************************************************
321 *  Tunable default values.
322 *********************************************************************/
323
324static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
325
326/* Descriptor defaults */
327static int igb_rxd = IGB_DEFAULT_RXD;
328static int igb_txd = IGB_DEFAULT_TXD;
329TUNABLE_INT("hw.igb.rxd", &igb_rxd);
330TUNABLE_INT("hw.igb.txd", &igb_txd);
331SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
332    "Number of receive descriptors per queue");
333SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
334    "Number of transmit descriptors per queue");
335
336/*
337** AIM: Adaptive Interrupt Moderation
338** which means that the interrupt rate
339** is varied over time based on the
340** traffic for that interrupt vector
341*/
342static int igb_enable_aim = TRUE;
343TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
344SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
345    "Enable adaptive interrupt moderation");
346
347/*
348 * MSIX should be the default for best performance,
349 * but this allows it to be forced off for testing.
350 */
351static int igb_enable_msix = 1;
352TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
353SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
354    "Enable MSI-X interrupts");
355
356/*
357** Tuneable Interrupt rate
358*/
359static int igb_max_interrupt_rate = 8000;
360TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
361SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
362    &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
363
364#ifndef IGB_LEGACY_TX
365/*
366** Tuneable number of buffers in the buf-ring (drbr_xxx)
367*/
368static int igb_buf_ring_size = IGB_BR_SIZE;
369TUNABLE_INT("hw.igb.buf_ring_size", &igb_buf_ring_size);
370SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
371    &igb_buf_ring_size, 0, "Size of the bufring");
372#endif
373
374/*
375** Header split causes the packet header to
376** be dma'd to a seperate mbuf from the payload.
377** this can have memory alignment benefits. But
378** another plus is that small packets often fit
379** into the header and thus use no cluster. Its
380** a very workload dependent type feature.
381*/
382static int igb_header_split = FALSE;
383TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
384SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
385    "Enable receive mbuf header split");
386
387/*
388** This will autoconfigure based on the
389** number of CPUs and max supported
390** MSIX messages if left at 0.
391*/
392static int igb_num_queues = 0;
393TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
394SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
395    "Number of queues to configure, 0 indicates autoconfigure");
396
397/*
398** Global variable to store last used CPU when binding queues
399** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
400** queue is bound to a cpu.
401*/
402static int igb_last_bind_cpu = -1;
403
404/* How many packets rxeof tries to clean at a time */
405static int igb_rx_process_limit = 100;
406TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
407SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
408    &igb_rx_process_limit, 0,
409    "Maximum number of received packets to process at a time, -1 means unlimited");
410
411#ifdef DEV_NETMAP	/* see ixgbe.c for details */
412#include <dev/netmap/if_igb_netmap.h>
413#endif /* DEV_NETMAP */
414/*********************************************************************
415 *  Device identification routine
416 *
417 *  igb_probe determines if the driver should be loaded on
418 *  adapter based on PCI vendor/device id of the adapter.
419 *
420 *  return BUS_PROBE_DEFAULT on success, positive on failure
421 *********************************************************************/
422
423static int
424igb_probe(device_t dev)
425{
426	char		adapter_name[60];
427	uint16_t	pci_vendor_id = 0;
428	uint16_t	pci_device_id = 0;
429	uint16_t	pci_subvendor_id = 0;
430	uint16_t	pci_subdevice_id = 0;
431	igb_vendor_info_t *ent;
432
433	INIT_DEBUGOUT("igb_probe: begin");
434
435	pci_vendor_id = pci_get_vendor(dev);
436	if (pci_vendor_id != IGB_VENDOR_ID)
437		return (ENXIO);
438
439	pci_device_id = pci_get_device(dev);
440	pci_subvendor_id = pci_get_subvendor(dev);
441	pci_subdevice_id = pci_get_subdevice(dev);
442
443	ent = igb_vendor_info_array;
444	while (ent->vendor_id != 0) {
445		if ((pci_vendor_id == ent->vendor_id) &&
446		    (pci_device_id == ent->device_id) &&
447
448		    ((pci_subvendor_id == ent->subvendor_id) ||
449		    (ent->subvendor_id == PCI_ANY_ID)) &&
450
451		    ((pci_subdevice_id == ent->subdevice_id) ||
452		    (ent->subdevice_id == PCI_ANY_ID))) {
453			sprintf(adapter_name, "%s %s",
454				igb_strings[ent->index],
455				igb_driver_version);
456			device_set_desc_copy(dev, adapter_name);
457			return (BUS_PROBE_DEFAULT);
458		}
459		ent++;
460	}
461
462	return (ENXIO);
463}
464
465/*********************************************************************
466 *  Device initialization routine
467 *
468 *  The attach entry point is called when the driver is being loaded.
469 *  This routine identifies the type of hardware, allocates all resources
470 *  and initializes the hardware.
471 *
472 *  return 0 on success, positive on failure
473 *********************************************************************/
474
475static int
476igb_attach(device_t dev)
477{
478	struct adapter	*adapter;
479	int		error = 0;
480	u16		eeprom_data;
481
482	INIT_DEBUGOUT("igb_attach: begin");
483
484	if (resource_disabled("igb", device_get_unit(dev))) {
485		device_printf(dev, "Disabled by device hint\n");
486		return (ENXIO);
487	}
488
489	adapter = device_get_softc(dev);
490	adapter->dev = adapter->osdep.dev = dev;
491	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
492
493	/* SYSCTL stuff */
494	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
495	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
496	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
497	    igb_sysctl_nvm_info, "I", "NVM Information");
498
499	igb_set_sysctl_value(adapter, "enable_aim",
500	    "Interrupt Moderation", &adapter->enable_aim,
501	    igb_enable_aim);
502
503	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
504	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
505	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
506	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
507
508	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
509
510	/* Determine hardware and mac info */
511	igb_identify_hardware(adapter);
512
513	/* Setup PCI resources */
514	if (igb_allocate_pci_resources(adapter)) {
515		device_printf(dev, "Allocation of PCI resources failed\n");
516		error = ENXIO;
517		goto err_pci;
518	}
519
520	/* Do Shared Code initialization */
521	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
522		device_printf(dev, "Setup of Shared code failed\n");
523		error = ENXIO;
524		goto err_pci;
525	}
526
527	e1000_get_bus_info(&adapter->hw);
528
529	/* Sysctl for limiting the amount of work done in the taskqueue */
530	igb_set_sysctl_value(adapter, "rx_processing_limit",
531	    "max number of rx packets to process",
532	    &adapter->rx_process_limit, igb_rx_process_limit);
533
534	/*
535	 * Validate number of transmit and receive descriptors. It
536	 * must not exceed hardware maximum, and must be multiple
537	 * of E1000_DBA_ALIGN.
538	 */
539	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
540	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
541		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
542		    IGB_DEFAULT_TXD, igb_txd);
543		adapter->num_tx_desc = IGB_DEFAULT_TXD;
544	} else
545		adapter->num_tx_desc = igb_txd;
546	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
547	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
548		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
549		    IGB_DEFAULT_RXD, igb_rxd);
550		adapter->num_rx_desc = IGB_DEFAULT_RXD;
551	} else
552		adapter->num_rx_desc = igb_rxd;
553
554	adapter->hw.mac.autoneg = DO_AUTO_NEG;
555	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
556	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
557
558	/* Copper options */
559	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
560		adapter->hw.phy.mdix = AUTO_ALL_MODES;
561		adapter->hw.phy.disable_polarity_correction = FALSE;
562		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
563	}
564
565	/*
566	 * Set the frame limits assuming
567	 * standard ethernet sized frames.
568	 */
569	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
570
571	/*
572	** Allocate and Setup Queues
573	*/
574	if (igb_allocate_queues(adapter)) {
575		error = ENOMEM;
576		goto err_pci;
577	}
578
579	/* Allocate the appropriate stats memory */
580	if (adapter->vf_ifp) {
581		adapter->stats =
582		    (struct e1000_vf_stats *)malloc(sizeof \
583		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
584		igb_vf_init_stats(adapter);
585	} else
586		adapter->stats =
587		    (struct e1000_hw_stats *)malloc(sizeof \
588		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
589	if (adapter->stats == NULL) {
590		device_printf(dev, "Can not allocate stats memory\n");
591		error = ENOMEM;
592		goto err_late;
593	}
594
595	/* Allocate multicast array memory. */
596	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
597	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
598	if (adapter->mta == NULL) {
599		device_printf(dev, "Can not allocate multicast setup array\n");
600		error = ENOMEM;
601		goto err_late;
602	}
603
604	/* Some adapter-specific advanced features */
605	if (adapter->hw.mac.type >= e1000_i350) {
606		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
607		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
608		    OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
609		    adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
610		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
611		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
612		    OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
613		    adapter, 0, igb_sysctl_eee, "I",
614		    "Disable Energy Efficient Ethernet");
615		if (adapter->hw.phy.media_type == e1000_media_type_copper) {
616			if (adapter->hw.mac.type == e1000_i354)
617				e1000_set_eee_i354(&adapter->hw);
618			else
619				e1000_set_eee_i350(&adapter->hw);
620		}
621	}
622
623	/*
624	** Start from a known state, this is
625	** important in reading the nvm and
626	** mac from that.
627	*/
628	e1000_reset_hw(&adapter->hw);
629
630	/* Make sure we have a good EEPROM before we read from it */
631	if (((adapter->hw.mac.type != e1000_i210) &&
632	    (adapter->hw.mac.type != e1000_i211)) &&
633	    (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
634		/*
635		** Some PCI-E parts fail the first check due to
636		** the link being in sleep state, call it again,
637		** if it fails a second time its a real issue.
638		*/
639		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
640			device_printf(dev,
641			    "The EEPROM Checksum Is Not Valid\n");
642			error = EIO;
643			goto err_late;
644		}
645	}
646
647	/*
648	** Copy the permanent MAC address out of the EEPROM
649	*/
650	if (e1000_read_mac_addr(&adapter->hw) < 0) {
651		device_printf(dev, "EEPROM read error while reading MAC"
652		    " address\n");
653		error = EIO;
654		goto err_late;
655	}
656	/* Check its sanity */
657	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
658		device_printf(dev, "Invalid MAC address\n");
659		error = EIO;
660		goto err_late;
661	}
662
663	/* Setup OS specific network interface */
664	if (igb_setup_interface(dev, adapter) != 0)
665		goto err_late;
666
667	/* Now get a good starting state */
668	igb_reset(adapter);
669
670	/* Initialize statistics */
671	igb_update_stats_counters(adapter);
672
673	adapter->hw.mac.get_link_status = 1;
674	igb_update_link_status(adapter);
675
676	/* Indicate SOL/IDER usage */
677	if (e1000_check_reset_block(&adapter->hw))
678		device_printf(dev,
679		    "PHY reset is blocked due to SOL/IDER session.\n");
680
681	/* Determine if we have to control management hardware */
682	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
683
684	/*
685	 * Setup Wake-on-Lan
686	 */
687	/* APME bit in EEPROM is mapped to WUC.APME */
688	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
689	if (eeprom_data)
690		adapter->wol = E1000_WUFC_MAG;
691
692	/* Register for VLAN events */
693	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
694	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
695	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
696	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
697
698	igb_add_hw_stats(adapter);
699
700	/* Tell the stack that the interface is not active */
701	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
702	adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
703
704	adapter->led_dev = led_create(igb_led_func, adapter,
705	    device_get_nameunit(dev));
706
707	/*
708	** Configure Interrupts
709	*/
710	if ((adapter->msix > 1) && (igb_enable_msix))
711		error = igb_allocate_msix(adapter);
712	else /* MSI or Legacy */
713		error = igb_allocate_legacy(adapter);
714	if (error)
715		goto err_late;
716
717#ifdef DEV_NETMAP
718	igb_netmap_attach(adapter);
719#endif /* DEV_NETMAP */
720	INIT_DEBUGOUT("igb_attach: end");
721
722	return (0);
723
724err_late:
725	igb_detach(dev);
726	igb_free_transmit_structures(adapter);
727	igb_free_receive_structures(adapter);
728	igb_release_hw_control(adapter);
729err_pci:
730	igb_free_pci_resources(adapter);
731	if (adapter->ifp != NULL)
732		if_free(adapter->ifp);
733	free(adapter->mta, M_DEVBUF);
734	IGB_CORE_LOCK_DESTROY(adapter);
735
736	return (error);
737}
738
739/*********************************************************************
740 *  Device removal routine
741 *
742 *  The detach entry point is called when the driver is being removed.
743 *  This routine stops the adapter and deallocates all the resources
744 *  that were allocated for driver operation.
745 *
746 *  return 0 on success, positive on failure
747 *********************************************************************/
748
749static int
750igb_detach(device_t dev)
751{
752	struct adapter	*adapter = device_get_softc(dev);
753	struct ifnet	*ifp = adapter->ifp;
754
755	INIT_DEBUGOUT("igb_detach: begin");
756
757	/* Make sure VLANS are not using driver */
758	if (adapter->ifp->if_vlantrunk != NULL) {
759		device_printf(dev,"Vlan in use, detach first\n");
760		return (EBUSY);
761	}
762
763	ether_ifdetach(adapter->ifp);
764
765	if (adapter->led_dev != NULL)
766		led_destroy(adapter->led_dev);
767
768#ifdef DEVICE_POLLING
769	if (ifp->if_capenable & IFCAP_POLLING)
770		ether_poll_deregister(ifp);
771#endif
772
773	IGB_CORE_LOCK(adapter);
774	adapter->in_detach = 1;
775	igb_stop(adapter);
776	IGB_CORE_UNLOCK(adapter);
777
778	e1000_phy_hw_reset(&adapter->hw);
779
780	/* Give control back to firmware */
781	igb_release_manageability(adapter);
782	igb_release_hw_control(adapter);
783
784	if (adapter->wol) {
785		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
786		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
787		igb_enable_wakeup(dev);
788	}
789
790	/* Unregister VLAN events */
791	if (adapter->vlan_attach != NULL)
792		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
793	if (adapter->vlan_detach != NULL)
794		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
795
796	callout_drain(&adapter->timer);
797
798#ifdef DEV_NETMAP
799	netmap_detach(adapter->ifp);
800#endif /* DEV_NETMAP */
801	igb_free_pci_resources(adapter);
802	bus_generic_detach(dev);
803	if_free(ifp);
804
805	igb_free_transmit_structures(adapter);
806	igb_free_receive_structures(adapter);
807	if (adapter->mta != NULL)
808		free(adapter->mta, M_DEVBUF);
809
810	IGB_CORE_LOCK_DESTROY(adapter);
811
812	return (0);
813}
814
815/*********************************************************************
816 *
817 *  Shutdown entry point
818 *
819 **********************************************************************/
820
821static int
822igb_shutdown(device_t dev)
823{
824	return igb_suspend(dev);
825}
826
827/*
828 * Suspend/resume device methods.
829 */
830static int
831igb_suspend(device_t dev)
832{
833	struct adapter *adapter = device_get_softc(dev);
834
835	IGB_CORE_LOCK(adapter);
836
837	igb_stop(adapter);
838
839        igb_release_manageability(adapter);
840	igb_release_hw_control(adapter);
841
842        if (adapter->wol) {
843                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
844                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
845                igb_enable_wakeup(dev);
846        }
847
848	IGB_CORE_UNLOCK(adapter);
849
850	return bus_generic_suspend(dev);
851}
852
853static int
854igb_resume(device_t dev)
855{
856	struct adapter *adapter = device_get_softc(dev);
857	struct tx_ring	*txr = adapter->tx_rings;
858	struct ifnet *ifp = adapter->ifp;
859
860	IGB_CORE_LOCK(adapter);
861	igb_init_locked(adapter);
862	igb_init_manageability(adapter);
863
864	if ((ifp->if_flags & IFF_UP) &&
865	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
866		for (int i = 0; i < adapter->num_queues; i++, txr++) {
867			IGB_TX_LOCK(txr);
868#ifndef IGB_LEGACY_TX
869			/* Process the stack queue only if not depleted */
870			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
871			    !drbr_empty(ifp, txr->br))
872				igb_mq_start_locked(ifp, txr);
873#else
874			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
875				igb_start_locked(txr, ifp);
876#endif
877			IGB_TX_UNLOCK(txr);
878		}
879	}
880	IGB_CORE_UNLOCK(adapter);
881
882	return bus_generic_resume(dev);
883}
884
885
886#ifdef IGB_LEGACY_TX
887
888/*********************************************************************
889 *  Transmit entry point
890 *
891 *  igb_start is called by the stack to initiate a transmit.
892 *  The driver will remain in this routine as long as there are
893 *  packets to transmit and transmit resources are available.
894 *  In case resources are not available stack is notified and
895 *  the packet is requeued.
896 **********************************************************************/
897
898static void
899igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
900{
901	struct adapter	*adapter = ifp->if_softc;
902	struct mbuf	*m_head;
903
904	IGB_TX_LOCK_ASSERT(txr);
905
906	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
907	    IFF_DRV_RUNNING)
908		return;
909	if (!adapter->link_active)
910		return;
911
912	/* Call cleanup if number of TX descriptors low */
913	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
914		igb_txeof(txr);
915
916	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
917		if (txr->tx_avail <= IGB_MAX_SCATTER) {
918			txr->queue_status |= IGB_QUEUE_DEPLETED;
919			break;
920		}
921		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
922		if (m_head == NULL)
923			break;
924		/*
925		 *  Encapsulation can modify our pointer, and or make it
926		 *  NULL on failure.  In that event, we can't requeue.
927		 */
928		if (igb_xmit(txr, &m_head)) {
929			if (m_head != NULL)
930				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
931			if (txr->tx_avail <= IGB_MAX_SCATTER)
932				txr->queue_status |= IGB_QUEUE_DEPLETED;
933			break;
934		}
935
936		/* Send a copy of the frame to the BPF listener */
937		ETHER_BPF_MTAP(ifp, m_head);
938
939		/* Set watchdog on */
940		txr->watchdog_time = ticks;
941		txr->queue_status |= IGB_QUEUE_WORKING;
942	}
943}
944
945/*
946 * Legacy TX driver routine, called from the
947 * stack, always uses tx[0], and spins for it.
948 * Should not be used with multiqueue tx
949 */
950static void
951igb_start(struct ifnet *ifp)
952{
953	struct adapter	*adapter = ifp->if_softc;
954	struct tx_ring	*txr = adapter->tx_rings;
955
956	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
957		IGB_TX_LOCK(txr);
958		igb_start_locked(txr, ifp);
959		IGB_TX_UNLOCK(txr);
960	}
961	return;
962}
963
964#else /* ~IGB_LEGACY_TX */
965
966/*
967** Multiqueue Transmit Entry:
968**  quick turnaround to the stack
969**
970*/
971static int
972igb_mq_start(struct ifnet *ifp, struct mbuf *m)
973{
974	struct adapter		*adapter = ifp->if_softc;
975	struct igb_queue	*que;
976	struct tx_ring		*txr;
977	int 			i, err = 0;
978
979	/* Which queue to use */
980	if ((m->m_flags & M_FLOWID) != 0)
981		i = m->m_pkthdr.flowid % adapter->num_queues;
982	else
983		i = curcpu % adapter->num_queues;
984	txr = &adapter->tx_rings[i];
985	que = &adapter->queues[i];
986
987	err = drbr_enqueue(ifp, txr->br, m);
988	if (err)
989		return (err);
990	if (IGB_TX_TRYLOCK(txr)) {
991		err = igb_mq_start_locked(ifp, txr);
992		IGB_TX_UNLOCK(txr);
993	} else
994		taskqueue_enqueue(que->tq, &txr->txq_task);
995
996	return (err);
997}
998
999static int
1000igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1001{
1002	struct adapter  *adapter = txr->adapter;
1003        struct mbuf     *next;
1004        int             err = 0, enq = 0;
1005
1006	IGB_TX_LOCK_ASSERT(txr);
1007
1008	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
1009	    adapter->link_active == 0)
1010		return (ENETDOWN);
1011
1012
1013	/* Process the queue */
1014	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1015		if ((err = igb_xmit(txr, &next)) != 0) {
1016			if (next == NULL) {
1017				/* It was freed, move forward */
1018				drbr_advance(ifp, txr->br);
1019			} else {
1020				/*
1021				 * Still have one left, it may not be
1022				 * the same since the transmit function
1023				 * may have changed it.
1024				 */
1025				drbr_putback(ifp, txr->br, next);
1026			}
1027			break;
1028		}
1029		drbr_advance(ifp, txr->br);
1030		enq++;
1031		ifp->if_obytes += next->m_pkthdr.len;
1032		if (next->m_flags & M_MCAST)
1033			ifp->if_omcasts++;
1034		ETHER_BPF_MTAP(ifp, next);
1035		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1036			break;
1037	}
1038	if (enq > 0) {
1039		/* Set the watchdog */
1040		txr->queue_status |= IGB_QUEUE_WORKING;
1041		txr->watchdog_time = ticks;
1042	}
1043	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1044		igb_txeof(txr);
1045	if (txr->tx_avail <= IGB_MAX_SCATTER)
1046		txr->queue_status |= IGB_QUEUE_DEPLETED;
1047	return (err);
1048}
1049
1050/*
1051 * Called from a taskqueue to drain queued transmit packets.
1052 */
1053static void
1054igb_deferred_mq_start(void *arg, int pending)
1055{
1056	struct tx_ring *txr = arg;
1057	struct adapter *adapter = txr->adapter;
1058	struct ifnet *ifp = adapter->ifp;
1059
1060	IGB_TX_LOCK(txr);
1061	if (!drbr_empty(ifp, txr->br))
1062		igb_mq_start_locked(ifp, txr);
1063	IGB_TX_UNLOCK(txr);
1064}
1065
1066/*
1067** Flush all ring buffers
1068*/
1069static void
1070igb_qflush(struct ifnet *ifp)
1071{
1072	struct adapter	*adapter = ifp->if_softc;
1073	struct tx_ring	*txr = adapter->tx_rings;
1074	struct mbuf	*m;
1075
1076	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1077		IGB_TX_LOCK(txr);
1078		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1079			m_freem(m);
1080		IGB_TX_UNLOCK(txr);
1081	}
1082	if_qflush(ifp);
1083}
1084#endif /* ~IGB_LEGACY_TX */
1085
1086/*********************************************************************
1087 *  Ioctl entry point
1088 *
1089 *  igb_ioctl is called when the user wants to configure the
1090 *  interface.
1091 *
1092 *  return 0 on success, positive on failure
1093 **********************************************************************/
1094
1095static int
1096igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1097{
1098	struct adapter	*adapter = ifp->if_softc;
1099	struct ifreq	*ifr = (struct ifreq *)data;
1100#if defined(INET) || defined(INET6)
1101	struct ifaddr	*ifa = (struct ifaddr *)data;
1102#endif
1103	bool		avoid_reset = FALSE;
1104	int		error = 0;
1105
1106	if (adapter->in_detach)
1107		return (error);
1108
1109	switch (command) {
1110	case SIOCSIFADDR:
1111#ifdef INET
1112		if (ifa->ifa_addr->sa_family == AF_INET)
1113			avoid_reset = TRUE;
1114#endif
1115#ifdef INET6
1116		if (ifa->ifa_addr->sa_family == AF_INET6)
1117			avoid_reset = TRUE;
1118#endif
1119		/*
1120		** Calling init results in link renegotiation,
1121		** so we avoid doing it when possible.
1122		*/
1123		if (avoid_reset) {
1124			ifp->if_flags |= IFF_UP;
1125			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1126				igb_init(adapter);
1127#ifdef INET
1128			if (!(ifp->if_flags & IFF_NOARP))
1129				arp_ifinit(ifp, ifa);
1130#endif
1131		} else
1132			error = ether_ioctl(ifp, command, data);
1133		break;
1134	case SIOCSIFMTU:
1135	    {
1136		int max_frame_size;
1137
1138		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1139
1140		IGB_CORE_LOCK(adapter);
1141		max_frame_size = 9234;
1142		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1143		    ETHER_CRC_LEN) {
1144			IGB_CORE_UNLOCK(adapter);
1145			error = EINVAL;
1146			break;
1147		}
1148
1149		ifp->if_mtu = ifr->ifr_mtu;
1150		adapter->max_frame_size =
1151		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1152		igb_init_locked(adapter);
1153		IGB_CORE_UNLOCK(adapter);
1154		break;
1155	    }
1156	case SIOCSIFFLAGS:
1157		IOCTL_DEBUGOUT("ioctl rcv'd:\
1158		    SIOCSIFFLAGS (Set Interface Flags)");
1159		IGB_CORE_LOCK(adapter);
1160		if (ifp->if_flags & IFF_UP) {
1161			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1162				if ((ifp->if_flags ^ adapter->if_flags) &
1163				    (IFF_PROMISC | IFF_ALLMULTI)) {
1164					igb_disable_promisc(adapter);
1165					igb_set_promisc(adapter);
1166				}
1167			} else
1168				igb_init_locked(adapter);
1169		} else
1170			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1171				igb_stop(adapter);
1172		adapter->if_flags = ifp->if_flags;
1173		IGB_CORE_UNLOCK(adapter);
1174		break;
1175	case SIOCADDMULTI:
1176	case SIOCDELMULTI:
1177		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1178		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1179			IGB_CORE_LOCK(adapter);
1180			igb_disable_intr(adapter);
1181			igb_set_multi(adapter);
1182#ifdef DEVICE_POLLING
1183			if (!(ifp->if_capenable & IFCAP_POLLING))
1184#endif
1185				igb_enable_intr(adapter);
1186			IGB_CORE_UNLOCK(adapter);
1187		}
1188		break;
1189	case SIOCSIFMEDIA:
1190		/* Check SOL/IDER usage */
1191		IGB_CORE_LOCK(adapter);
1192		if (e1000_check_reset_block(&adapter->hw)) {
1193			IGB_CORE_UNLOCK(adapter);
1194			device_printf(adapter->dev, "Media change is"
1195			    " blocked due to SOL/IDER session.\n");
1196			break;
1197		}
1198		IGB_CORE_UNLOCK(adapter);
1199	case SIOCGIFMEDIA:
1200		IOCTL_DEBUGOUT("ioctl rcv'd: \
1201		    SIOCxIFMEDIA (Get/Set Interface Media)");
1202		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1203		break;
1204	case SIOCSIFCAP:
1205	    {
1206		int mask, reinit;
1207
1208		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1209		reinit = 0;
1210		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1211#ifdef DEVICE_POLLING
1212		if (mask & IFCAP_POLLING) {
1213			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1214				error = ether_poll_register(igb_poll, ifp);
1215				if (error)
1216					return (error);
1217				IGB_CORE_LOCK(adapter);
1218				igb_disable_intr(adapter);
1219				ifp->if_capenable |= IFCAP_POLLING;
1220				IGB_CORE_UNLOCK(adapter);
1221			} else {
1222				error = ether_poll_deregister(ifp);
1223				/* Enable interrupt even in error case */
1224				IGB_CORE_LOCK(adapter);
1225				igb_enable_intr(adapter);
1226				ifp->if_capenable &= ~IFCAP_POLLING;
1227				IGB_CORE_UNLOCK(adapter);
1228			}
1229		}
1230#endif
1231		if (mask & IFCAP_HWCSUM) {
1232			ifp->if_capenable ^= IFCAP_HWCSUM;
1233			reinit = 1;
1234		}
1235		if (mask & IFCAP_TSO4) {
1236			ifp->if_capenable ^= IFCAP_TSO4;
1237			reinit = 1;
1238		}
1239		if (mask & IFCAP_TSO6) {
1240			ifp->if_capenable ^= IFCAP_TSO6;
1241			reinit = 1;
1242		}
1243		if (mask & IFCAP_VLAN_HWTAGGING) {
1244			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1245			reinit = 1;
1246		}
1247		if (mask & IFCAP_VLAN_HWFILTER) {
1248			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1249			reinit = 1;
1250		}
1251		if (mask & IFCAP_VLAN_HWTSO) {
1252			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1253			reinit = 1;
1254		}
1255		if (mask & IFCAP_LRO) {
1256			ifp->if_capenable ^= IFCAP_LRO;
1257			reinit = 1;
1258		}
1259		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1260			igb_init(adapter);
1261		VLAN_CAPABILITIES(ifp);
1262		break;
1263	    }
1264
1265	default:
1266		error = ether_ioctl(ifp, command, data);
1267		break;
1268	}
1269
1270	return (error);
1271}
1272
1273
1274/*********************************************************************
1275 *  Init entry point
1276 *
1277 *  This routine is used in two ways. It is used by the stack as
1278 *  init entry point in network interface structure. It is also used
1279 *  by the driver as a hw/sw initialization routine to get to a
1280 *  consistent state.
1281 *
1282 *  return 0 on success, positive on failure
1283 **********************************************************************/
1284
1285static void
1286igb_init_locked(struct adapter *adapter)
1287{
1288	struct ifnet	*ifp = adapter->ifp;
1289	device_t	dev = adapter->dev;
1290
1291	INIT_DEBUGOUT("igb_init: begin");
1292
1293	IGB_CORE_LOCK_ASSERT(adapter);
1294
1295	igb_disable_intr(adapter);
1296	callout_stop(&adapter->timer);
1297
1298	/* Get the latest mac address, User can use a LAA */
1299        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1300              ETHER_ADDR_LEN);
1301
1302	/* Put the address into the Receive Address Array */
1303	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1304
1305	igb_reset(adapter);
1306	igb_update_link_status(adapter);
1307
1308	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1309
1310	/* Set hardware offload abilities */
1311	ifp->if_hwassist = 0;
1312	if (ifp->if_capenable & IFCAP_TXCSUM) {
1313		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1314#if __FreeBSD_version >= 800000
1315		if (adapter->hw.mac.type == e1000_82576)
1316			ifp->if_hwassist |= CSUM_SCTP;
1317#endif
1318	}
1319
1320	if (ifp->if_capenable & IFCAP_TSO)
1321		ifp->if_hwassist |= CSUM_TSO;
1322
1323	/* Configure for OS presence */
1324	igb_init_manageability(adapter);
1325
1326	/* Prepare transmit descriptors and buffers */
1327	igb_setup_transmit_structures(adapter);
1328	igb_initialize_transmit_units(adapter);
1329
1330	/* Setup Multicast table */
1331	igb_set_multi(adapter);
1332
1333	/*
1334	** Figure out the desired mbuf pool
1335	** for doing jumbo/packetsplit
1336	*/
1337	if (adapter->max_frame_size <= 2048)
1338		adapter->rx_mbuf_sz = MCLBYTES;
1339	else if (adapter->max_frame_size <= 4096)
1340		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1341	else
1342		adapter->rx_mbuf_sz = MJUM9BYTES;
1343
1344	/* Prepare receive descriptors and buffers */
1345	if (igb_setup_receive_structures(adapter)) {
1346		device_printf(dev, "Could not setup receive structures\n");
1347		return;
1348	}
1349	igb_initialize_receive_units(adapter);
1350
1351        /* Enable VLAN support */
1352	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1353		igb_setup_vlan_hw_support(adapter);
1354
1355	/* Don't lose promiscuous settings */
1356	igb_set_promisc(adapter);
1357
1358	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1359	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1360
1361	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1362	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1363
1364	if (adapter->msix > 1) /* Set up queue routing */
1365		igb_configure_queues(adapter);
1366
1367	/* this clears any pending interrupts */
1368	E1000_READ_REG(&adapter->hw, E1000_ICR);
1369#ifdef DEVICE_POLLING
1370	/*
1371	 * Only enable interrupts if we are not polling, make sure
1372	 * they are off otherwise.
1373	 */
1374	if (ifp->if_capenable & IFCAP_POLLING)
1375		igb_disable_intr(adapter);
1376	else
1377#endif /* DEVICE_POLLING */
1378	{
1379		igb_enable_intr(adapter);
1380		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1381	}
1382
1383	/* Set Energy Efficient Ethernet */
1384	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
1385		if (adapter->hw.mac.type == e1000_i354)
1386			e1000_set_eee_i354(&adapter->hw);
1387		else
1388			e1000_set_eee_i350(&adapter->hw);
1389	}
1390}
1391
1392static void
1393igb_init(void *arg)
1394{
1395	struct adapter *adapter = arg;
1396
1397	IGB_CORE_LOCK(adapter);
1398	igb_init_locked(adapter);
1399	IGB_CORE_UNLOCK(adapter);
1400}
1401
1402
1403static void
1404igb_handle_que(void *context, int pending)
1405{
1406	struct igb_queue *que = context;
1407	struct adapter *adapter = que->adapter;
1408	struct tx_ring *txr = que->txr;
1409	struct ifnet	*ifp = adapter->ifp;
1410
1411	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1412		bool	more;
1413
1414		more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1415
1416		IGB_TX_LOCK(txr);
1417		igb_txeof(txr);
1418#ifndef IGB_LEGACY_TX
1419		/* Process the stack queue only if not depleted */
1420		if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1421		    !drbr_empty(ifp, txr->br))
1422			igb_mq_start_locked(ifp, txr);
1423#else
1424		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1425			igb_start_locked(txr, ifp);
1426#endif
1427		IGB_TX_UNLOCK(txr);
1428		/* Do we need another? */
1429		if (more) {
1430			taskqueue_enqueue(que->tq, &que->que_task);
1431			return;
1432		}
1433	}
1434
1435#ifdef DEVICE_POLLING
1436	if (ifp->if_capenable & IFCAP_POLLING)
1437		return;
1438#endif
1439	/* Reenable this interrupt */
1440	if (que->eims)
1441		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1442	else
1443		igb_enable_intr(adapter);
1444}
1445
1446/* Deal with link in a sleepable context */
1447static void
1448igb_handle_link(void *context, int pending)
1449{
1450	struct adapter *adapter = context;
1451
1452	IGB_CORE_LOCK(adapter);
1453	igb_handle_link_locked(adapter);
1454	IGB_CORE_UNLOCK(adapter);
1455}
1456
1457static void
1458igb_handle_link_locked(struct adapter *adapter)
1459{
1460	struct tx_ring	*txr = adapter->tx_rings;
1461	struct ifnet *ifp = adapter->ifp;
1462
1463	IGB_CORE_LOCK_ASSERT(adapter);
1464	adapter->hw.mac.get_link_status = 1;
1465	igb_update_link_status(adapter);
1466	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1467		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1468			IGB_TX_LOCK(txr);
1469#ifndef IGB_LEGACY_TX
1470			/* Process the stack queue only if not depleted */
1471			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1472			    !drbr_empty(ifp, txr->br))
1473				igb_mq_start_locked(ifp, txr);
1474#else
1475			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1476				igb_start_locked(txr, ifp);
1477#endif
1478			IGB_TX_UNLOCK(txr);
1479		}
1480	}
1481}
1482
1483/*********************************************************************
1484 *
1485 *  MSI/Legacy Deferred
1486 *  Interrupt Service routine
1487 *
1488 *********************************************************************/
1489static int
1490igb_irq_fast(void *arg)
1491{
1492	struct adapter		*adapter = arg;
1493	struct igb_queue	*que = adapter->queues;
1494	u32			reg_icr;
1495
1496
1497	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1498
1499	/* Hot eject?  */
1500	if (reg_icr == 0xffffffff)
1501		return FILTER_STRAY;
1502
1503	/* Definitely not our interrupt.  */
1504	if (reg_icr == 0x0)
1505		return FILTER_STRAY;
1506
1507	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1508		return FILTER_STRAY;
1509
1510	/*
1511	 * Mask interrupts until the taskqueue is finished running.  This is
1512	 * cheap, just assume that it is needed.  This also works around the
1513	 * MSI message reordering errata on certain systems.
1514	 */
1515	igb_disable_intr(adapter);
1516	taskqueue_enqueue(que->tq, &que->que_task);
1517
1518	/* Link status change */
1519	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1520		taskqueue_enqueue(que->tq, &adapter->link_task);
1521
1522	if (reg_icr & E1000_ICR_RXO)
1523		adapter->rx_overruns++;
1524	return FILTER_HANDLED;
1525}
1526
1527#ifdef DEVICE_POLLING
1528#if __FreeBSD_version >= 800000
1529#define POLL_RETURN_COUNT(a) (a)
1530static int
1531#else
1532#define POLL_RETURN_COUNT(a)
1533static void
1534#endif
1535igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1536{
1537	struct adapter		*adapter = ifp->if_softc;
1538	struct igb_queue	*que;
1539	struct tx_ring		*txr;
1540	u32			reg_icr, rx_done = 0;
1541	u32			loop = IGB_MAX_LOOP;
1542	bool			more;
1543
1544	IGB_CORE_LOCK(adapter);
1545	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1546		IGB_CORE_UNLOCK(adapter);
1547		return POLL_RETURN_COUNT(rx_done);
1548	}
1549
1550	if (cmd == POLL_AND_CHECK_STATUS) {
1551		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1552		/* Link status change */
1553		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1554			igb_handle_link_locked(adapter);
1555
1556		if (reg_icr & E1000_ICR_RXO)
1557			adapter->rx_overruns++;
1558	}
1559	IGB_CORE_UNLOCK(adapter);
1560
1561	for (int i = 0; i < adapter->num_queues; i++) {
1562		que = &adapter->queues[i];
1563		txr = que->txr;
1564
1565		igb_rxeof(que, count, &rx_done);
1566
1567		IGB_TX_LOCK(txr);
1568		do {
1569			more = igb_txeof(txr);
1570		} while (loop-- && more);
1571#ifndef IGB_LEGACY_TX
1572		if (!drbr_empty(ifp, txr->br))
1573			igb_mq_start_locked(ifp, txr);
1574#else
1575		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1576			igb_start_locked(txr, ifp);
1577#endif
1578		IGB_TX_UNLOCK(txr);
1579	}
1580
1581	return POLL_RETURN_COUNT(rx_done);
1582}
1583#endif /* DEVICE_POLLING */
1584
1585/*********************************************************************
1586 *
1587 *  MSIX Que Interrupt Service routine
1588 *
1589 **********************************************************************/
1590static void
1591igb_msix_que(void *arg)
1592{
1593	struct igb_queue *que = arg;
1594	struct adapter *adapter = que->adapter;
1595	struct ifnet   *ifp = adapter->ifp;
1596	struct tx_ring *txr = que->txr;
1597	struct rx_ring *rxr = que->rxr;
1598	u32		newitr = 0;
1599	bool		more_rx;
1600
1601	/* Ignore spurious interrupts */
1602	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1603		return;
1604
1605	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1606	++que->irqs;
1607
1608	IGB_TX_LOCK(txr);
1609	igb_txeof(txr);
1610#ifndef IGB_LEGACY_TX
1611	/* Process the stack queue only if not depleted */
1612	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1613	    !drbr_empty(ifp, txr->br))
1614		igb_mq_start_locked(ifp, txr);
1615#else
1616	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1617		igb_start_locked(txr, ifp);
1618#endif
1619	IGB_TX_UNLOCK(txr);
1620
1621	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1622
1623	if (adapter->enable_aim == FALSE)
1624		goto no_calc;
1625	/*
1626	** Do Adaptive Interrupt Moderation:
1627        **  - Write out last calculated setting
1628	**  - Calculate based on average size over
1629	**    the last interval.
1630	*/
1631        if (que->eitr_setting)
1632                E1000_WRITE_REG(&adapter->hw,
1633                    E1000_EITR(que->msix), que->eitr_setting);
1634
1635        que->eitr_setting = 0;
1636
1637        /* Idle, do nothing */
1638        if ((txr->bytes == 0) && (rxr->bytes == 0))
1639                goto no_calc;
1640
1641        /* Used half Default if sub-gig */
1642        if (adapter->link_speed != 1000)
1643                newitr = IGB_DEFAULT_ITR / 2;
1644        else {
1645		if ((txr->bytes) && (txr->packets))
1646                	newitr = txr->bytes/txr->packets;
1647		if ((rxr->bytes) && (rxr->packets))
1648			newitr = max(newitr,
1649			    (rxr->bytes / rxr->packets));
1650                newitr += 24; /* account for hardware frame, crc */
1651		/* set an upper boundary */
1652		newitr = min(newitr, 3000);
1653		/* Be nice to the mid range */
1654                if ((newitr > 300) && (newitr < 1200))
1655                        newitr = (newitr / 3);
1656                else
1657                        newitr = (newitr / 2);
1658        }
1659        newitr &= 0x7FFC;  /* Mask invalid bits */
1660        if (adapter->hw.mac.type == e1000_82575)
1661                newitr |= newitr << 16;
1662        else
1663                newitr |= E1000_EITR_CNT_IGNR;
1664
1665        /* save for next interrupt */
1666        que->eitr_setting = newitr;
1667
1668        /* Reset state */
1669        txr->bytes = 0;
1670        txr->packets = 0;
1671        rxr->bytes = 0;
1672        rxr->packets = 0;
1673
1674no_calc:
1675	/* Schedule a clean task if needed*/
1676	if (more_rx)
1677		taskqueue_enqueue(que->tq, &que->que_task);
1678	else
1679		/* Reenable this interrupt */
1680		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1681	return;
1682}
1683
1684
1685/*********************************************************************
1686 *
1687 *  MSIX Link Interrupt Service routine
1688 *
1689 **********************************************************************/
1690
1691static void
1692igb_msix_link(void *arg)
1693{
1694	struct adapter	*adapter = arg;
1695	u32       	icr;
1696
1697	++adapter->link_irq;
1698	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1699	if (!(icr & E1000_ICR_LSC))
1700		goto spurious;
1701	igb_handle_link(adapter, 0);
1702
1703spurious:
1704	/* Rearm */
1705	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1706	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1707	return;
1708}
1709
1710
1711/*********************************************************************
1712 *
1713 *  Media Ioctl callback
1714 *
1715 *  This routine is called whenever the user queries the status of
1716 *  the interface using ifconfig.
1717 *
1718 **********************************************************************/
1719static void
1720igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1721{
1722	struct adapter *adapter = ifp->if_softc;
1723
1724	INIT_DEBUGOUT("igb_media_status: begin");
1725
1726	IGB_CORE_LOCK(adapter);
1727	igb_update_link_status(adapter);
1728
1729	ifmr->ifm_status = IFM_AVALID;
1730	ifmr->ifm_active = IFM_ETHER;
1731
1732	if (!adapter->link_active) {
1733		IGB_CORE_UNLOCK(adapter);
1734		return;
1735	}
1736
1737	ifmr->ifm_status |= IFM_ACTIVE;
1738
1739	switch (adapter->link_speed) {
1740	case 10:
1741		ifmr->ifm_active |= IFM_10_T;
1742		break;
1743	case 100:
1744		/*
1745		** Support for 100Mb SFP - these are Fiber
1746		** but the media type appears as serdes
1747		*/
1748		if (adapter->hw.phy.media_type ==
1749		    e1000_media_type_internal_serdes)
1750			ifmr->ifm_active |= IFM_100_FX;
1751		else
1752			ifmr->ifm_active |= IFM_100_TX;
1753		break;
1754	case 1000:
1755		ifmr->ifm_active |= IFM_1000_T;
1756		break;
1757	case 2500:
1758		ifmr->ifm_active |= IFM_2500_SX;
1759		break;
1760	}
1761
1762	if (adapter->link_duplex == FULL_DUPLEX)
1763		ifmr->ifm_active |= IFM_FDX;
1764	else
1765		ifmr->ifm_active |= IFM_HDX;
1766
1767	IGB_CORE_UNLOCK(adapter);
1768}
1769
1770/*********************************************************************
1771 *
1772 *  Media Ioctl callback
1773 *
1774 *  This routine is called when the user changes speed/duplex using
1775 *  media/mediopt option with ifconfig.
1776 *
1777 **********************************************************************/
1778static int
1779igb_media_change(struct ifnet *ifp)
1780{
1781	struct adapter *adapter = ifp->if_softc;
1782	struct ifmedia  *ifm = &adapter->media;
1783
1784	INIT_DEBUGOUT("igb_media_change: begin");
1785
1786	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1787		return (EINVAL);
1788
1789	IGB_CORE_LOCK(adapter);
1790	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1791	case IFM_AUTO:
1792		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1793		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1794		break;
1795	case IFM_1000_LX:
1796	case IFM_1000_SX:
1797	case IFM_1000_T:
1798		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1799		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1800		break;
1801	case IFM_100_TX:
1802		adapter->hw.mac.autoneg = FALSE;
1803		adapter->hw.phy.autoneg_advertised = 0;
1804		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1805			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1806		else
1807			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1808		break;
1809	case IFM_10_T:
1810		adapter->hw.mac.autoneg = FALSE;
1811		adapter->hw.phy.autoneg_advertised = 0;
1812		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1813			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1814		else
1815			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1816		break;
1817	default:
1818		device_printf(adapter->dev, "Unsupported media type\n");
1819	}
1820
1821	igb_init_locked(adapter);
1822	IGB_CORE_UNLOCK(adapter);
1823
1824	return (0);
1825}
1826
1827
1828/*********************************************************************
1829 *
1830 *  This routine maps the mbufs to Advanced TX descriptors.
1831 *
1832 **********************************************************************/
1833static int
1834igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1835{
1836	struct adapter  *adapter = txr->adapter;
1837	u32		olinfo_status = 0, cmd_type_len;
1838	int             i, j, error, nsegs;
1839	int		first;
1840	bool		remap = TRUE;
1841	struct mbuf	*m_head;
1842	bus_dma_segment_t segs[IGB_MAX_SCATTER];
1843	bus_dmamap_t	map;
1844	struct igb_tx_buf *txbuf;
1845	union e1000_adv_tx_desc *txd = NULL;
1846
1847	m_head = *m_headp;
1848
1849	/* Basic descriptor defines */
1850        cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
1851	    E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
1852
1853	if (m_head->m_flags & M_VLANTAG)
1854        	cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1855
1856        /*
1857         * Important to capture the first descriptor
1858         * used because it will contain the index of
1859         * the one we tell the hardware to report back
1860         */
1861        first = txr->next_avail_desc;
1862	txbuf = &txr->tx_buffers[first];
1863	map = txbuf->map;
1864
1865	/*
1866	 * Map the packet for DMA.
1867	 */
1868retry:
1869	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1870	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1871
1872	if (__predict_false(error)) {
1873		struct mbuf *m;
1874
1875		switch (error) {
1876		case EFBIG:
1877			/* Try it again? - one try */
1878			if (remap == TRUE) {
1879				remap = FALSE;
1880				m = m_defrag(*m_headp, M_NOWAIT);
1881				if (m == NULL) {
1882					adapter->mbuf_defrag_failed++;
1883					m_freem(*m_headp);
1884					*m_headp = NULL;
1885					return (ENOBUFS);
1886				}
1887				*m_headp = m;
1888				goto retry;
1889			} else
1890				return (error);
1891		case ENOMEM:
1892			txr->no_tx_dma_setup++;
1893			return (error);
1894		default:
1895			txr->no_tx_dma_setup++;
1896			m_freem(*m_headp);
1897			*m_headp = NULL;
1898			return (error);
1899		}
1900	}
1901
1902	/* Make certain there are enough descriptors */
1903	if (nsegs > txr->tx_avail - 2) {
1904		txr->no_desc_avail++;
1905		bus_dmamap_unload(txr->txtag, map);
1906		return (ENOBUFS);
1907	}
1908	m_head = *m_headp;
1909
1910	/*
1911	** Set up the appropriate offload context
1912	** this will consume the first descriptor
1913	*/
1914	error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1915	if (__predict_false(error)) {
1916		m_freem(*m_headp);
1917		*m_headp = NULL;
1918		return (error);
1919	}
1920
1921	/* 82575 needs the queue index added */
1922	if (adapter->hw.mac.type == e1000_82575)
1923		olinfo_status |= txr->me << 4;
1924
1925	i = txr->next_avail_desc;
1926	for (j = 0; j < nsegs; j++) {
1927		bus_size_t seglen;
1928		bus_addr_t segaddr;
1929
1930		txbuf = &txr->tx_buffers[i];
1931		txd = &txr->tx_base[i];
1932		seglen = segs[j].ds_len;
1933		segaddr = htole64(segs[j].ds_addr);
1934
1935		txd->read.buffer_addr = segaddr;
1936		txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
1937		    cmd_type_len | seglen);
1938		txd->read.olinfo_status = htole32(olinfo_status);
1939
1940		if (++i == txr->num_desc)
1941			i = 0;
1942	}
1943
1944	txd->read.cmd_type_len |=
1945	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1946	txr->tx_avail -= nsegs;
1947	txr->next_avail_desc = i;
1948
1949	txbuf->m_head = m_head;
1950	/*
1951	** Here we swap the map so the last descriptor,
1952	** which gets the completion interrupt has the
1953	** real map, and the first descriptor gets the
1954	** unused map from this descriptor.
1955	*/
1956	txr->tx_buffers[first].map = txbuf->map;
1957	txbuf->map = map;
1958	bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1959
1960        /* Set the EOP descriptor that will be marked done */
1961        txbuf = &txr->tx_buffers[first];
1962	txbuf->eop = txd;
1963
1964        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1965            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1966	/*
1967	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
1968	 * hardware that this frame is available to transmit.
1969	 */
1970	++txr->total_packets;
1971	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1972
1973	return (0);
1974}
1975static void
1976igb_set_promisc(struct adapter *adapter)
1977{
1978	struct ifnet	*ifp = adapter->ifp;
1979	struct e1000_hw *hw = &adapter->hw;
1980	u32		reg;
1981
1982	if (adapter->vf_ifp) {
1983		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1984		return;
1985	}
1986
1987	reg = E1000_READ_REG(hw, E1000_RCTL);
1988	if (ifp->if_flags & IFF_PROMISC) {
1989		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1990		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1991	} else if (ifp->if_flags & IFF_ALLMULTI) {
1992		reg |= E1000_RCTL_MPE;
1993		reg &= ~E1000_RCTL_UPE;
1994		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1995	}
1996}
1997
1998static void
1999igb_disable_promisc(struct adapter *adapter)
2000{
2001	struct e1000_hw *hw = &adapter->hw;
2002	struct ifnet	*ifp = adapter->ifp;
2003	u32		reg;
2004	int		mcnt = 0;
2005
2006	if (adapter->vf_ifp) {
2007		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2008		return;
2009	}
2010	reg = E1000_READ_REG(hw, E1000_RCTL);
2011	reg &=  (~E1000_RCTL_UPE);
2012	if (ifp->if_flags & IFF_ALLMULTI)
2013		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2014	else {
2015		struct  ifmultiaddr *ifma;
2016#if __FreeBSD_version < 800000
2017		IF_ADDR_LOCK(ifp);
2018#else
2019		if_maddr_rlock(ifp);
2020#endif
2021		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2022			if (ifma->ifma_addr->sa_family != AF_LINK)
2023				continue;
2024			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2025				break;
2026			mcnt++;
2027		}
2028#if __FreeBSD_version < 800000
2029		IF_ADDR_UNLOCK(ifp);
2030#else
2031		if_maddr_runlock(ifp);
2032#endif
2033	}
2034	/* Don't disable if in MAX groups */
2035	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2036		reg &=  (~E1000_RCTL_MPE);
2037	E1000_WRITE_REG(hw, E1000_RCTL, reg);
2038}
2039
2040
2041/*********************************************************************
2042 *  Multicast Update
2043 *
2044 *  This routine is called whenever multicast address list is updated.
2045 *
2046 **********************************************************************/
2047
2048static void
2049igb_set_multi(struct adapter *adapter)
2050{
2051	struct ifnet	*ifp = adapter->ifp;
2052	struct ifmultiaddr *ifma;
2053	u32 reg_rctl = 0;
2054	u8  *mta;
2055
2056	int mcnt = 0;
2057
2058	IOCTL_DEBUGOUT("igb_set_multi: begin");
2059
2060	mta = adapter->mta;
2061	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2062	    MAX_NUM_MULTICAST_ADDRESSES);
2063
2064#if __FreeBSD_version < 800000
2065	IF_ADDR_LOCK(ifp);
2066#else
2067	if_maddr_rlock(ifp);
2068#endif
2069	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2070		if (ifma->ifma_addr->sa_family != AF_LINK)
2071			continue;
2072
2073		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2074			break;
2075
2076		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2077		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2078		mcnt++;
2079	}
2080#if __FreeBSD_version < 800000
2081	IF_ADDR_UNLOCK(ifp);
2082#else
2083	if_maddr_runlock(ifp);
2084#endif
2085
2086	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2087		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2088		reg_rctl |= E1000_RCTL_MPE;
2089		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2090	} else
2091		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2092}
2093
2094
2095/*********************************************************************
2096 *  Timer routine:
2097 *  	This routine checks for link status,
2098 *	updates statistics, and does the watchdog.
2099 *
2100 **********************************************************************/
2101
2102static void
2103igb_local_timer(void *arg)
2104{
2105	struct adapter		*adapter = arg;
2106	device_t		dev = adapter->dev;
2107	struct ifnet		*ifp = adapter->ifp;
2108	struct tx_ring		*txr = adapter->tx_rings;
2109	struct igb_queue	*que = adapter->queues;
2110	int			hung = 0, busy = 0;
2111
2112
2113	IGB_CORE_LOCK_ASSERT(adapter);
2114
2115	igb_update_link_status(adapter);
2116	igb_update_stats_counters(adapter);
2117
2118        /*
2119        ** Check the TX queues status
2120	**	- central locked handling of OACTIVE
2121	**	- watchdog only if all queues show hung
2122        */
2123	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2124		if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2125		    (adapter->pause_frames == 0))
2126			++hung;
2127		if (txr->queue_status & IGB_QUEUE_DEPLETED)
2128			++busy;
2129		if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2130			taskqueue_enqueue(que->tq, &que->que_task);
2131	}
2132	if (hung == adapter->num_queues)
2133		goto timeout;
2134	if (busy == adapter->num_queues)
2135		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2136	else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2137	    (busy < adapter->num_queues))
2138		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2139
2140	adapter->pause_frames = 0;
2141	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2142#ifndef DEVICE_POLLING
2143	/* Schedule all queue interrupts - deadlock protection */
2144	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2145#endif
2146	return;
2147
2148timeout:
2149	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2150	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2151            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2152            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2153	device_printf(dev,"TX(%d) desc avail = %d,"
2154            "Next TX to Clean = %d\n",
2155            txr->me, txr->tx_avail, txr->next_to_clean);
2156	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2157	adapter->watchdog_events++;
2158	igb_init_locked(adapter);
2159}
2160
2161static void
2162igb_update_link_status(struct adapter *adapter)
2163{
2164	struct e1000_hw		*hw = &adapter->hw;
2165	struct e1000_fc_info	*fc = &hw->fc;
2166	struct ifnet		*ifp = adapter->ifp;
2167	device_t		dev = adapter->dev;
2168	struct tx_ring		*txr = adapter->tx_rings;
2169	u32			link_check, thstat, ctrl;
2170	char			*flowctl = NULL;
2171
2172	link_check = thstat = ctrl = 0;
2173
2174	/* Get the cached link value or read for real */
2175        switch (hw->phy.media_type) {
2176        case e1000_media_type_copper:
2177                if (hw->mac.get_link_status) {
2178			/* Do the work to read phy */
2179                        e1000_check_for_link(hw);
2180                        link_check = !hw->mac.get_link_status;
2181                } else
2182                        link_check = TRUE;
2183                break;
2184        case e1000_media_type_fiber:
2185                e1000_check_for_link(hw);
2186                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2187                                 E1000_STATUS_LU);
2188                break;
2189        case e1000_media_type_internal_serdes:
2190                e1000_check_for_link(hw);
2191                link_check = adapter->hw.mac.serdes_has_link;
2192                break;
2193	/* VF device is type_unknown */
2194        case e1000_media_type_unknown:
2195                e1000_check_for_link(hw);
2196		link_check = !hw->mac.get_link_status;
2197		/* Fall thru */
2198        default:
2199                break;
2200        }
2201
2202	/* Check for thermal downshift or shutdown */
2203	if (hw->mac.type == e1000_i350) {
2204		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2205		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2206	}
2207
2208	/* Get the flow control for display */
2209	switch (fc->current_mode) {
2210	case e1000_fc_rx_pause:
2211		flowctl = "RX";
2212		break;
2213	case e1000_fc_tx_pause:
2214		flowctl = "TX";
2215		break;
2216	case e1000_fc_full:
2217		flowctl = "Full";
2218		break;
2219	case e1000_fc_none:
2220	default:
2221		flowctl = "None";
2222		break;
2223	}
2224
2225	/* Now we check if a transition has happened */
2226	if (link_check && (adapter->link_active == 0)) {
2227		e1000_get_speed_and_duplex(&adapter->hw,
2228		    &adapter->link_speed, &adapter->link_duplex);
2229		if (bootverbose)
2230			device_printf(dev, "Link is up %d Mbps %s,"
2231			    " Flow Control: %s\n",
2232			    adapter->link_speed,
2233			    ((adapter->link_duplex == FULL_DUPLEX) ?
2234			    "Full Duplex" : "Half Duplex"), flowctl);
2235		adapter->link_active = 1;
2236		ifp->if_baudrate = adapter->link_speed * 1000000;
2237		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2238		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2239			device_printf(dev, "Link: thermal downshift\n");
2240		/* Delay Link Up for Phy update */
2241		if (((hw->mac.type == e1000_i210) ||
2242		    (hw->mac.type == e1000_i211)) &&
2243		    (hw->phy.id == I210_I_PHY_ID))
2244			msec_delay(I210_LINK_DELAY);
2245		/* Reset if the media type changed. */
2246		if (hw->dev_spec._82575.media_changed) {
2247			hw->dev_spec._82575.media_changed = false;
2248			adapter->flags |= IGB_MEDIA_RESET;
2249			igb_reset(adapter);
2250		}
2251		/* This can sleep */
2252		if_link_state_change(ifp, LINK_STATE_UP);
2253	} else if (!link_check && (adapter->link_active == 1)) {
2254		ifp->if_baudrate = adapter->link_speed = 0;
2255		adapter->link_duplex = 0;
2256		if (bootverbose)
2257			device_printf(dev, "Link is Down\n");
2258		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2259		    (thstat & E1000_THSTAT_PWR_DOWN))
2260			device_printf(dev, "Link: thermal shutdown\n");
2261		adapter->link_active = 0;
2262		/* This can sleep */
2263		if_link_state_change(ifp, LINK_STATE_DOWN);
2264		/* Reset queue state */
2265		for (int i = 0; i < adapter->num_queues; i++, txr++)
2266			txr->queue_status = IGB_QUEUE_IDLE;
2267	}
2268}
2269
2270/*********************************************************************
2271 *
2272 *  This routine disables all traffic on the adapter by issuing a
2273 *  global reset on the MAC and deallocates TX/RX buffers.
2274 *
2275 **********************************************************************/
2276
2277static void
2278igb_stop(void *arg)
2279{
2280	struct adapter	*adapter = arg;
2281	struct ifnet	*ifp = adapter->ifp;
2282	struct tx_ring *txr = adapter->tx_rings;
2283
2284	IGB_CORE_LOCK_ASSERT(adapter);
2285
2286	INIT_DEBUGOUT("igb_stop: begin");
2287
2288	igb_disable_intr(adapter);
2289
2290	callout_stop(&adapter->timer);
2291
2292	/* Tell the stack that the interface is no longer active */
2293	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2294	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2295
2296	/* Disarm watchdog timer. */
2297	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2298		IGB_TX_LOCK(txr);
2299		txr->queue_status = IGB_QUEUE_IDLE;
2300		IGB_TX_UNLOCK(txr);
2301	}
2302
2303	e1000_reset_hw(&adapter->hw);
2304	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2305
2306	e1000_led_off(&adapter->hw);
2307	e1000_cleanup_led(&adapter->hw);
2308}
2309
2310
2311/*********************************************************************
2312 *
2313 *  Determine hardware revision.
2314 *
2315 **********************************************************************/
2316static void
2317igb_identify_hardware(struct adapter *adapter)
2318{
2319	device_t dev = adapter->dev;
2320
2321	/* Make sure our PCI config space has the necessary stuff set */
2322	pci_enable_busmaster(dev);
2323	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2324
2325	/* Save off the information about this board */
2326	adapter->hw.vendor_id = pci_get_vendor(dev);
2327	adapter->hw.device_id = pci_get_device(dev);
2328	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2329	adapter->hw.subsystem_vendor_id =
2330	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2331	adapter->hw.subsystem_device_id =
2332	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2333
2334	/* Set MAC type early for PCI setup */
2335	e1000_set_mac_type(&adapter->hw);
2336
2337	/* Are we a VF device? */
2338	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2339	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2340		adapter->vf_ifp = 1;
2341	else
2342		adapter->vf_ifp = 0;
2343}
2344
2345static int
2346igb_allocate_pci_resources(struct adapter *adapter)
2347{
2348	device_t	dev = adapter->dev;
2349	int		rid;
2350
2351	rid = PCIR_BAR(0);
2352	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2353	    &rid, RF_ACTIVE);
2354	if (adapter->pci_mem == NULL) {
2355		device_printf(dev, "Unable to allocate bus resource: memory\n");
2356		return (ENXIO);
2357	}
2358	adapter->osdep.mem_bus_space_tag =
2359	    rman_get_bustag(adapter->pci_mem);
2360	adapter->osdep.mem_bus_space_handle =
2361	    rman_get_bushandle(adapter->pci_mem);
2362	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2363
2364	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2365
2366	/* This will setup either MSI/X or MSI */
2367	adapter->msix = igb_setup_msix(adapter);
2368	adapter->hw.back = &adapter->osdep;
2369
2370	return (0);
2371}
2372
2373/*********************************************************************
2374 *
2375 *  Setup the Legacy or MSI Interrupt handler
2376 *
2377 **********************************************************************/
2378static int
2379igb_allocate_legacy(struct adapter *adapter)
2380{
2381	device_t		dev = adapter->dev;
2382	struct igb_queue	*que = adapter->queues;
2383	struct tx_ring		*txr = adapter->tx_rings;
2384	int			error, rid = 0;
2385
2386	/* Turn off all interrupts */
2387	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2388
2389	/* MSI RID is 1 */
2390	if (adapter->msix == 1)
2391		rid = 1;
2392
2393	/* We allocate a single interrupt resource */
2394	adapter->res = bus_alloc_resource_any(dev,
2395	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2396	if (adapter->res == NULL) {
2397		device_printf(dev, "Unable to allocate bus resource: "
2398		    "interrupt\n");
2399		return (ENXIO);
2400	}
2401
2402#ifndef IGB_LEGACY_TX
2403	TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2404#endif
2405
2406	/*
2407	 * Try allocating a fast interrupt and the associated deferred
2408	 * processing contexts.
2409	 */
2410	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2411	/* Make tasklet for deferred link handling */
2412	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2413	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2414	    taskqueue_thread_enqueue, &que->tq);
2415	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2416	    device_get_nameunit(adapter->dev));
2417	if ((error = bus_setup_intr(dev, adapter->res,
2418	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2419	    adapter, &adapter->tag)) != 0) {
2420		device_printf(dev, "Failed to register fast interrupt "
2421			    "handler: %d\n", error);
2422		taskqueue_free(que->tq);
2423		que->tq = NULL;
2424		return (error);
2425	}
2426
2427	return (0);
2428}
2429
2430
2431/*********************************************************************
2432 *
2433 *  Setup the MSIX Queue Interrupt handlers:
2434 *
2435 **********************************************************************/
2436static int
2437igb_allocate_msix(struct adapter *adapter)
2438{
2439	device_t		dev = adapter->dev;
2440	struct igb_queue	*que = adapter->queues;
2441	int			error, rid, vector = 0;
2442
2443	/* Be sure to start with all interrupts disabled */
2444	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2445	E1000_WRITE_FLUSH(&adapter->hw);
2446
2447	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2448		rid = vector +1;
2449		que->res = bus_alloc_resource_any(dev,
2450		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2451		if (que->res == NULL) {
2452			device_printf(dev,
2453			    "Unable to allocate bus resource: "
2454			    "MSIX Queue Interrupt\n");
2455			return (ENXIO);
2456		}
2457		error = bus_setup_intr(dev, que->res,
2458	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2459		    igb_msix_que, que, &que->tag);
2460		if (error) {
2461			que->res = NULL;
2462			device_printf(dev, "Failed to register Queue handler");
2463			return (error);
2464		}
2465#if __FreeBSD_version >= 800504
2466		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2467#endif
2468		que->msix = vector;
2469		if (adapter->hw.mac.type == e1000_82575)
2470			que->eims = E1000_EICR_TX_QUEUE0 << i;
2471		else
2472			que->eims = 1 << vector;
2473		/*
2474		** Bind the msix vector, and thus the
2475		** rings to the corresponding cpu.
2476		*/
2477		if (adapter->num_queues > 1) {
2478			if (igb_last_bind_cpu < 0)
2479				igb_last_bind_cpu = CPU_FIRST();
2480			bus_bind_intr(dev, que->res, igb_last_bind_cpu);
2481			device_printf(dev,
2482				"Bound queue %d to cpu %d\n",
2483				i,igb_last_bind_cpu);
2484			igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2485		}
2486#ifndef IGB_LEGACY_TX
2487		TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2488		    que->txr);
2489#endif
2490		/* Make tasklet for deferred handling */
2491		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2492		que->tq = taskqueue_create("igb_que", M_NOWAIT,
2493		    taskqueue_thread_enqueue, &que->tq);
2494		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2495		    device_get_nameunit(adapter->dev));
2496	}
2497
2498	/* And Link */
2499	rid = vector + 1;
2500	adapter->res = bus_alloc_resource_any(dev,
2501	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2502	if (adapter->res == NULL) {
2503		device_printf(dev,
2504		    "Unable to allocate bus resource: "
2505		    "MSIX Link Interrupt\n");
2506		return (ENXIO);
2507	}
2508	if ((error = bus_setup_intr(dev, adapter->res,
2509	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2510	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2511		device_printf(dev, "Failed to register Link handler");
2512		return (error);
2513	}
2514#if __FreeBSD_version >= 800504
2515	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2516#endif
2517	adapter->linkvec = vector;
2518
2519	return (0);
2520}
2521
2522
2523static void
2524igb_configure_queues(struct adapter *adapter)
2525{
2526	struct	e1000_hw	*hw = &adapter->hw;
2527	struct	igb_queue	*que;
2528	u32			tmp, ivar = 0, newitr = 0;
2529
2530	/* First turn on RSS capability */
2531	if (adapter->hw.mac.type != e1000_82575)
2532		E1000_WRITE_REG(hw, E1000_GPIE,
2533		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2534		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2535
2536	/* Turn on MSIX */
2537	switch (adapter->hw.mac.type) {
2538	case e1000_82580:
2539	case e1000_i350:
2540	case e1000_i354:
2541	case e1000_i210:
2542	case e1000_i211:
2543	case e1000_vfadapt:
2544	case e1000_vfadapt_i350:
2545		/* RX entries */
2546		for (int i = 0; i < adapter->num_queues; i++) {
2547			u32 index = i >> 1;
2548			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2549			que = &adapter->queues[i];
2550			if (i & 1) {
2551				ivar &= 0xFF00FFFF;
2552				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2553			} else {
2554				ivar &= 0xFFFFFF00;
2555				ivar |= que->msix | E1000_IVAR_VALID;
2556			}
2557			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2558		}
2559		/* TX entries */
2560		for (int i = 0; i < adapter->num_queues; i++) {
2561			u32 index = i >> 1;
2562			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2563			que = &adapter->queues[i];
2564			if (i & 1) {
2565				ivar &= 0x00FFFFFF;
2566				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2567			} else {
2568				ivar &= 0xFFFF00FF;
2569				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2570			}
2571			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2572			adapter->que_mask |= que->eims;
2573		}
2574
2575		/* And for the link interrupt */
2576		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2577		adapter->link_mask = 1 << adapter->linkvec;
2578		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2579		break;
2580	case e1000_82576:
2581		/* RX entries */
2582		for (int i = 0; i < adapter->num_queues; i++) {
2583			u32 index = i & 0x7; /* Each IVAR has two entries */
2584			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2585			que = &adapter->queues[i];
2586			if (i < 8) {
2587				ivar &= 0xFFFFFF00;
2588				ivar |= que->msix | E1000_IVAR_VALID;
2589			} else {
2590				ivar &= 0xFF00FFFF;
2591				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2592			}
2593			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2594			adapter->que_mask |= que->eims;
2595		}
2596		/* TX entries */
2597		for (int i = 0; i < adapter->num_queues; i++) {
2598			u32 index = i & 0x7; /* Each IVAR has two entries */
2599			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2600			que = &adapter->queues[i];
2601			if (i < 8) {
2602				ivar &= 0xFFFF00FF;
2603				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2604			} else {
2605				ivar &= 0x00FFFFFF;
2606				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2607			}
2608			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2609			adapter->que_mask |= que->eims;
2610		}
2611
2612		/* And for the link interrupt */
2613		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2614		adapter->link_mask = 1 << adapter->linkvec;
2615		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2616		break;
2617
2618	case e1000_82575:
2619                /* enable MSI-X support*/
2620		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2621                tmp |= E1000_CTRL_EXT_PBA_CLR;
2622                /* Auto-Mask interrupts upon ICR read. */
2623                tmp |= E1000_CTRL_EXT_EIAME;
2624                tmp |= E1000_CTRL_EXT_IRCA;
2625                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2626
2627		/* Queues */
2628		for (int i = 0; i < adapter->num_queues; i++) {
2629			que = &adapter->queues[i];
2630			tmp = E1000_EICR_RX_QUEUE0 << i;
2631			tmp |= E1000_EICR_TX_QUEUE0 << i;
2632			que->eims = tmp;
2633			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2634			    i, que->eims);
2635			adapter->que_mask |= que->eims;
2636		}
2637
2638		/* Link */
2639		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2640		    E1000_EIMS_OTHER);
2641		adapter->link_mask |= E1000_EIMS_OTHER;
2642	default:
2643		break;
2644	}
2645
2646	/* Set the starting interrupt rate */
2647	if (igb_max_interrupt_rate > 0)
2648		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2649
2650        if (hw->mac.type == e1000_82575)
2651                newitr |= newitr << 16;
2652        else
2653                newitr |= E1000_EITR_CNT_IGNR;
2654
2655	for (int i = 0; i < adapter->num_queues; i++) {
2656		que = &adapter->queues[i];
2657		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2658	}
2659
2660	return;
2661}
2662
2663
2664static void
2665igb_free_pci_resources(struct adapter *adapter)
2666{
2667	struct		igb_queue *que = adapter->queues;
2668	device_t	dev = adapter->dev;
2669	int		rid;
2670
2671	/*
2672	** There is a slight possibility of a failure mode
2673	** in attach that will result in entering this function
2674	** before interrupt resources have been initialized, and
2675	** in that case we do not want to execute the loops below
2676	** We can detect this reliably by the state of the adapter
2677	** res pointer.
2678	*/
2679	if (adapter->res == NULL)
2680		goto mem;
2681
2682	/*
2683	 * First release all the interrupt resources:
2684	 */
2685	for (int i = 0; i < adapter->num_queues; i++, que++) {
2686		rid = que->msix + 1;
2687		if (que->tag != NULL) {
2688			bus_teardown_intr(dev, que->res, que->tag);
2689			que->tag = NULL;
2690		}
2691		if (que->res != NULL)
2692			bus_release_resource(dev,
2693			    SYS_RES_IRQ, rid, que->res);
2694	}
2695
2696	/* Clean the Legacy or Link interrupt last */
2697	if (adapter->linkvec) /* we are doing MSIX */
2698		rid = adapter->linkvec + 1;
2699	else
2700		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2701
2702	que = adapter->queues;
2703	if (adapter->tag != NULL) {
2704		taskqueue_drain(que->tq, &adapter->link_task);
2705		bus_teardown_intr(dev, adapter->res, adapter->tag);
2706		adapter->tag = NULL;
2707	}
2708	if (adapter->res != NULL)
2709		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2710
2711	for (int i = 0; i < adapter->num_queues; i++, que++) {
2712		if (que->tq != NULL) {
2713#ifndef IGB_LEGACY_TX
2714			taskqueue_drain(que->tq, &que->txr->txq_task);
2715#endif
2716			taskqueue_drain(que->tq, &que->que_task);
2717			taskqueue_free(que->tq);
2718		}
2719	}
2720mem:
2721	if (adapter->msix)
2722		pci_release_msi(dev);
2723
2724	if (adapter->msix_mem != NULL)
2725		bus_release_resource(dev, SYS_RES_MEMORY,
2726		    adapter->memrid, adapter->msix_mem);
2727
2728	if (adapter->pci_mem != NULL)
2729		bus_release_resource(dev, SYS_RES_MEMORY,
2730		    PCIR_BAR(0), adapter->pci_mem);
2731
2732}
2733
2734/*
2735 * Setup Either MSI/X or MSI
2736 */
2737static int
2738igb_setup_msix(struct adapter *adapter)
2739{
2740	device_t	dev = adapter->dev;
2741	int		bar, want, queues, msgs, maxqueues;
2742
2743	/* tuneable override */
2744	if (igb_enable_msix == 0)
2745		goto msi;
2746
2747	/* First try MSI/X */
2748	msgs = pci_msix_count(dev);
2749	if (msgs == 0)
2750		goto msi;
2751	/*
2752	** Some new devices, as with ixgbe, now may
2753	** use a different BAR, so we need to keep
2754	** track of which is used.
2755	*/
2756	adapter->memrid = PCIR_BAR(IGB_MSIX_BAR);
2757	bar = pci_read_config(dev, adapter->memrid, 4);
2758	if (bar == 0) /* use next bar */
2759		adapter->memrid += 4;
2760	adapter->msix_mem = bus_alloc_resource_any(dev,
2761	    SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2762       	if (adapter->msix_mem == NULL) {
2763		/* May not be enabled */
2764		device_printf(adapter->dev,
2765		    "Unable to map MSIX table \n");
2766		goto msi;
2767	}
2768
2769	/* Figure out a reasonable auto config value */
2770	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2771
2772	/* Manual override */
2773	if (igb_num_queues != 0)
2774		queues = igb_num_queues;
2775
2776	/* Sanity check based on HW */
2777	switch (adapter->hw.mac.type) {
2778		case e1000_82575:
2779			maxqueues = 4;
2780			break;
2781		case e1000_82576:
2782		case e1000_82580:
2783		case e1000_i350:
2784		case e1000_i354:
2785			maxqueues = 8;
2786			break;
2787		case e1000_i210:
2788			maxqueues = 4;
2789			break;
2790		case e1000_i211:
2791			maxqueues = 2;
2792			break;
2793		default:  /* VF interfaces */
2794			maxqueues = 1;
2795			break;
2796	}
2797	if (queues > maxqueues)
2798		queues = maxqueues;
2799
2800	/* Manual override */
2801	if (igb_num_queues != 0)
2802		queues = igb_num_queues;
2803
2804	/*
2805	** One vector (RX/TX pair) per queue
2806	** plus an additional for Link interrupt
2807	*/
2808	want = queues + 1;
2809	if (msgs >= want)
2810		msgs = want;
2811	else {
2812               	device_printf(adapter->dev,
2813		    "MSIX Configuration Problem, "
2814		    "%d vectors configured, but %d queues wanted!\n",
2815		    msgs, want);
2816		goto msi;
2817	}
2818	if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2819               	device_printf(adapter->dev,
2820		    "Using MSIX interrupts with %d vectors\n", msgs);
2821		adapter->num_queues = queues;
2822		return (msgs);
2823	}
2824	/*
2825	** If MSIX alloc failed or provided us with
2826	** less than needed, free and fall through to MSI
2827	*/
2828	pci_release_msi(dev);
2829
2830msi:
2831       	if (adapter->msix_mem != NULL) {
2832		bus_release_resource(dev, SYS_RES_MEMORY,
2833		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2834		adapter->msix_mem = NULL;
2835	}
2836       	msgs = 1;
2837	if (pci_alloc_msi(dev, &msgs) == 0) {
2838		device_printf(adapter->dev," Using an MSI interrupt\n");
2839		return (msgs);
2840	}
2841	device_printf(adapter->dev," Using a Legacy interrupt\n");
2842	return (0);
2843}
2844
2845/*********************************************************************
2846 *
2847 *  Initialize the DMA Coalescing feature
2848 *
2849 **********************************************************************/
2850static void
2851igb_init_dmac(struct adapter *adapter, u32 pba)
2852{
2853	device_t	dev = adapter->dev;
2854	struct e1000_hw *hw = &adapter->hw;
2855	u32 		dmac, reg = ~E1000_DMACR_DMAC_EN;
2856	u16		hwm;
2857
2858	if (hw->mac.type == e1000_i211)
2859		return;
2860
2861	if (hw->mac.type > e1000_82580) {
2862
2863		if (adapter->dmac == 0) { /* Disabling it */
2864			E1000_WRITE_REG(hw, E1000_DMACR, reg);
2865			return;
2866		} else
2867			device_printf(dev, "DMA Coalescing enabled\n");
2868
2869		/* Set starting threshold */
2870		E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2871
2872		hwm = 64 * pba - adapter->max_frame_size / 16;
2873		if (hwm < 64 * (pba - 6))
2874			hwm = 64 * (pba - 6);
2875		reg = E1000_READ_REG(hw, E1000_FCRTC);
2876		reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2877		reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2878		    & E1000_FCRTC_RTH_COAL_MASK);
2879		E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2880
2881
2882		dmac = pba - adapter->max_frame_size / 512;
2883		if (dmac < pba - 10)
2884			dmac = pba - 10;
2885		reg = E1000_READ_REG(hw, E1000_DMACR);
2886		reg &= ~E1000_DMACR_DMACTHR_MASK;
2887		reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
2888		    & E1000_DMACR_DMACTHR_MASK);
2889
2890		/* transition to L0x or L1 if available..*/
2891		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2892
2893		/* Check if status is 2.5Gb backplane connection
2894		* before configuration of watchdog timer, which is
2895		* in msec values in 12.8usec intervals
2896		* watchdog timer= msec values in 32usec intervals
2897		* for non 2.5Gb connection
2898		*/
2899		if (hw->mac.type == e1000_i354) {
2900			int status = E1000_READ_REG(hw, E1000_STATUS);
2901			if ((status & E1000_STATUS_2P5_SKU) &&
2902			    (!(status & E1000_STATUS_2P5_SKU_OVER)))
2903				reg |= ((adapter->dmac * 5) >> 6);
2904			else
2905				reg |= (adapter->dmac >> 5);
2906		} else {
2907			reg |= (adapter->dmac >> 5);
2908		}
2909
2910		E1000_WRITE_REG(hw, E1000_DMACR, reg);
2911
2912#ifdef I210_OBFF_SUPPORT
2913		/*
2914		 * Set the OBFF Rx threshold to DMA Coalescing Rx
2915		 * threshold - 2KB and enable the feature in the
2916		 * hardware for I210.
2917		 */
2918		if (hw->mac.type == e1000_i210) {
2919			int obff = dmac - 2;
2920			reg = E1000_READ_REG(hw, E1000_DOBFFCTL);
2921			reg &= ~E1000_DOBFFCTL_OBFFTHR_MASK;
2922			reg |= (obff & E1000_DOBFFCTL_OBFFTHR_MASK)
2923			    | E1000_DOBFFCTL_EXIT_ACT_MASK;
2924			E1000_WRITE_REG(hw, E1000_DOBFFCTL, reg);
2925		}
2926#endif
2927		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2928
2929		/* Set the interval before transition */
2930		reg = E1000_READ_REG(hw, E1000_DMCTLX);
2931		if (hw->mac.type == e1000_i350)
2932			reg |= IGB_DMCTLX_DCFLUSH_DIS;
2933		/*
2934		** in 2.5Gb connection, TTLX unit is 0.4 usec
2935		** which is 0x4*2 = 0xA. But delay is still 4 usec
2936		*/
2937		if (hw->mac.type == e1000_i354) {
2938			int status = E1000_READ_REG(hw, E1000_STATUS);
2939			if ((status & E1000_STATUS_2P5_SKU) &&
2940			    (!(status & E1000_STATUS_2P5_SKU_OVER)))
2941				reg |= 0xA;
2942			else
2943				reg |= 0x4;
2944		} else {
2945			reg |= 0x4;
2946		}
2947
2948		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2949
2950		/* free space in tx packet buffer to wake from DMA coal */
2951		E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE -
2952		    (2 * adapter->max_frame_size)) >> 6);
2953
2954		/* make low power state decision controlled by DMA coal */
2955		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2956		reg &= ~E1000_PCIEMISC_LX_DECISION;
2957		E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
2958
2959	} else if (hw->mac.type == e1000_82580) {
2960		u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2961		E1000_WRITE_REG(hw, E1000_PCIEMISC,
2962		    reg & ~E1000_PCIEMISC_LX_DECISION);
2963		E1000_WRITE_REG(hw, E1000_DMACR, 0);
2964	}
2965}
2966
2967
2968/*********************************************************************
2969 *
2970 *  Set up an fresh starting state
2971 *
2972 **********************************************************************/
2973static void
2974igb_reset(struct adapter *adapter)
2975{
2976	device_t	dev = adapter->dev;
2977	struct e1000_hw *hw = &adapter->hw;
2978	struct e1000_fc_info *fc = &hw->fc;
2979	struct ifnet	*ifp = adapter->ifp;
2980	u32		pba = 0;
2981	u16		hwm;
2982
2983	INIT_DEBUGOUT("igb_reset: begin");
2984
2985	/* Let the firmware know the OS is in control */
2986	igb_get_hw_control(adapter);
2987
2988	/*
2989	 * Packet Buffer Allocation (PBA)
2990	 * Writing PBA sets the receive portion of the buffer
2991	 * the remainder is used for the transmit buffer.
2992	 */
2993	switch (hw->mac.type) {
2994	case e1000_82575:
2995		pba = E1000_PBA_32K;
2996		break;
2997	case e1000_82576:
2998	case e1000_vfadapt:
2999		pba = E1000_READ_REG(hw, E1000_RXPBS);
3000		pba &= E1000_RXPBS_SIZE_MASK_82576;
3001		break;
3002	case e1000_82580:
3003	case e1000_i350:
3004	case e1000_i354:
3005	case e1000_vfadapt_i350:
3006		pba = E1000_READ_REG(hw, E1000_RXPBS);
3007		pba = e1000_rxpbs_adjust_82580(pba);
3008		break;
3009	case e1000_i210:
3010	case e1000_i211:
3011		pba = E1000_PBA_34K;
3012	default:
3013		break;
3014	}
3015
3016	/* Special needs in case of Jumbo frames */
3017	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
3018		u32 tx_space, min_tx, min_rx;
3019		pba = E1000_READ_REG(hw, E1000_PBA);
3020		tx_space = pba >> 16;
3021		pba &= 0xffff;
3022		min_tx = (adapter->max_frame_size +
3023		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
3024		min_tx = roundup2(min_tx, 1024);
3025		min_tx >>= 10;
3026                min_rx = adapter->max_frame_size;
3027                min_rx = roundup2(min_rx, 1024);
3028                min_rx >>= 10;
3029		if (tx_space < min_tx &&
3030		    ((min_tx - tx_space) < pba)) {
3031			pba = pba - (min_tx - tx_space);
3032			/*
3033                         * if short on rx space, rx wins
3034                         * and must trump tx adjustment
3035			 */
3036                        if (pba < min_rx)
3037                                pba = min_rx;
3038		}
3039		E1000_WRITE_REG(hw, E1000_PBA, pba);
3040	}
3041
3042	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3043
3044	/*
3045	 * These parameters control the automatic generation (Tx) and
3046	 * response (Rx) to Ethernet PAUSE frames.
3047	 * - High water mark should allow for at least two frames to be
3048	 *   received after sending an XOFF.
3049	 * - Low water mark works best when it is very near the high water mark.
3050	 *   This allows the receiver to restart by sending XON when it has
3051	 *   drained a bit.
3052	 */
3053	hwm = min(((pba << 10) * 9 / 10),
3054	    ((pba << 10) - 2 * adapter->max_frame_size));
3055
3056	if (hw->mac.type < e1000_82576) {
3057		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
3058		fc->low_water = fc->high_water - 8;
3059	} else {
3060		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
3061		fc->low_water = fc->high_water - 16;
3062	}
3063
3064	fc->pause_time = IGB_FC_PAUSE_TIME;
3065	fc->send_xon = TRUE;
3066	if (adapter->fc)
3067		fc->requested_mode = adapter->fc;
3068	else
3069		fc->requested_mode = e1000_fc_default;
3070
3071	/* Issue a global reset */
3072	e1000_reset_hw(hw);
3073	E1000_WRITE_REG(hw, E1000_WUC, 0);
3074
3075	/* Reset for AutoMediaDetect */
3076	if (adapter->flags & IGB_MEDIA_RESET) {
3077		e1000_setup_init_funcs(hw, TRUE);
3078		e1000_get_bus_info(hw);
3079		adapter->flags &= ~IGB_MEDIA_RESET;
3080	}
3081
3082	if (e1000_init_hw(hw) < 0)
3083		device_printf(dev, "Hardware Initialization Failed\n");
3084
3085	/* Setup DMA Coalescing */
3086	igb_init_dmac(adapter, pba);
3087
3088	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3089	e1000_get_phy_info(hw);
3090	e1000_check_for_link(hw);
3091	return;
3092}
3093
3094/*********************************************************************
3095 *
3096 *  Setup networking device structure and register an interface.
3097 *
3098 **********************************************************************/
3099static int
3100igb_setup_interface(device_t dev, struct adapter *adapter)
3101{
3102	struct ifnet   *ifp;
3103
3104	INIT_DEBUGOUT("igb_setup_interface: begin");
3105
3106	ifp = adapter->ifp = if_alloc(IFT_ETHER);
3107	if (ifp == NULL) {
3108		device_printf(dev, "can not allocate ifnet structure\n");
3109		return (-1);
3110	}
3111	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3112	ifp->if_init =  igb_init;
3113	ifp->if_softc = adapter;
3114	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3115	ifp->if_ioctl = igb_ioctl;
3116#ifndef IGB_LEGACY_TX
3117	ifp->if_transmit = igb_mq_start;
3118	ifp->if_qflush = igb_qflush;
3119#else
3120	ifp->if_start = igb_start;
3121	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3122	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3123	IFQ_SET_READY(&ifp->if_snd);
3124#endif
3125
3126	ether_ifattach(ifp, adapter->hw.mac.addr);
3127
3128	ifp->if_capabilities = ifp->if_capenable = 0;
3129
3130	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3131	ifp->if_capabilities |= IFCAP_TSO;
3132	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3133	ifp->if_capenable = ifp->if_capabilities;
3134
3135	/* Don't enable LRO by default */
3136	ifp->if_capabilities |= IFCAP_LRO;
3137
3138#ifdef DEVICE_POLLING
3139	ifp->if_capabilities |= IFCAP_POLLING;
3140#endif
3141
3142	/*
3143	 * Tell the upper layer(s) we
3144	 * support full VLAN capability.
3145	 */
3146	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3147	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3148			     |  IFCAP_VLAN_HWTSO
3149			     |  IFCAP_VLAN_MTU;
3150	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3151			  |  IFCAP_VLAN_HWTSO
3152			  |  IFCAP_VLAN_MTU;
3153
3154	/*
3155	** Don't turn this on by default, if vlans are
3156	** created on another pseudo device (eg. lagg)
3157	** then vlan events are not passed thru, breaking
3158	** operation, but with HW FILTER off it works. If
3159	** using vlans directly on the igb driver you can
3160	** enable this and get full hardware tag filtering.
3161	*/
3162	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3163
3164	/*
3165	 * Specify the media types supported by this adapter and register
3166	 * callbacks to update media and link information
3167	 */
3168	ifmedia_init(&adapter->media, IFM_IMASK,
3169	    igb_media_change, igb_media_status);
3170	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3171	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3172		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
3173			    0, NULL);
3174		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3175	} else {
3176		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3177		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3178			    0, NULL);
3179		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3180			    0, NULL);
3181		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3182			    0, NULL);
3183		if (adapter->hw.phy.type != e1000_phy_ife) {
3184			ifmedia_add(&adapter->media,
3185				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3186			ifmedia_add(&adapter->media,
3187				IFM_ETHER | IFM_1000_T, 0, NULL);
3188		}
3189	}
3190	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3191	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3192	return (0);
3193}
3194
3195
3196/*
3197 * Manage DMA'able memory.
3198 */
3199static void
3200igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3201{
3202	if (error)
3203		return;
3204	*(bus_addr_t *) arg = segs[0].ds_addr;
3205}
3206
3207static int
3208igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3209        struct igb_dma_alloc *dma, int mapflags)
3210{
3211	int error;
3212
3213	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3214				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
3215				BUS_SPACE_MAXADDR,	/* lowaddr */
3216				BUS_SPACE_MAXADDR,	/* highaddr */
3217				NULL, NULL,		/* filter, filterarg */
3218				size,			/* maxsize */
3219				1,			/* nsegments */
3220				size,			/* maxsegsize */
3221				0,			/* flags */
3222				NULL,			/* lockfunc */
3223				NULL,			/* lockarg */
3224				&dma->dma_tag);
3225	if (error) {
3226		device_printf(adapter->dev,
3227		    "%s: bus_dma_tag_create failed: %d\n",
3228		    __func__, error);
3229		goto fail_0;
3230	}
3231
3232	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3233	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3234	if (error) {
3235		device_printf(adapter->dev,
3236		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3237		    __func__, (uintmax_t)size, error);
3238		goto fail_2;
3239	}
3240
3241	dma->dma_paddr = 0;
3242	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3243	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3244	if (error || dma->dma_paddr == 0) {
3245		device_printf(adapter->dev,
3246		    "%s: bus_dmamap_load failed: %d\n",
3247		    __func__, error);
3248		goto fail_3;
3249	}
3250
3251	return (0);
3252
3253fail_3:
3254	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3255fail_2:
3256	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3257	bus_dma_tag_destroy(dma->dma_tag);
3258fail_0:
3259	dma->dma_map = NULL;
3260	dma->dma_tag = NULL;
3261
3262	return (error);
3263}
3264
3265static void
3266igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3267{
3268	if (dma->dma_tag == NULL)
3269		return;
3270	if (dma->dma_map != NULL) {
3271		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3272		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3273		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3274		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3275		dma->dma_map = NULL;
3276	}
3277	bus_dma_tag_destroy(dma->dma_tag);
3278	dma->dma_tag = NULL;
3279}
3280
3281
3282/*********************************************************************
3283 *
3284 *  Allocate memory for the transmit and receive rings, and then
3285 *  the descriptors associated with each, called only once at attach.
3286 *
3287 **********************************************************************/
3288static int
3289igb_allocate_queues(struct adapter *adapter)
3290{
3291	device_t dev = adapter->dev;
3292	struct igb_queue	*que = NULL;
3293	struct tx_ring		*txr = NULL;
3294	struct rx_ring		*rxr = NULL;
3295	int rsize, tsize, error = E1000_SUCCESS;
3296	int txconf = 0, rxconf = 0;
3297
3298	/* First allocate the top level queue structs */
3299	if (!(adapter->queues =
3300	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3301	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3302		device_printf(dev, "Unable to allocate queue memory\n");
3303		error = ENOMEM;
3304		goto fail;
3305	}
3306
3307	/* Next allocate the TX ring struct memory */
3308	if (!(adapter->tx_rings =
3309	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3310	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3311		device_printf(dev, "Unable to allocate TX ring memory\n");
3312		error = ENOMEM;
3313		goto tx_fail;
3314	}
3315
3316	/* Now allocate the RX */
3317	if (!(adapter->rx_rings =
3318	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3319	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3320		device_printf(dev, "Unable to allocate RX ring memory\n");
3321		error = ENOMEM;
3322		goto rx_fail;
3323	}
3324
3325	tsize = roundup2(adapter->num_tx_desc *
3326	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3327	/*
3328	 * Now set up the TX queues, txconf is needed to handle the
3329	 * possibility that things fail midcourse and we need to
3330	 * undo memory gracefully
3331	 */
3332	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3333		/* Set up some basics */
3334		txr = &adapter->tx_rings[i];
3335		txr->adapter = adapter;
3336		txr->me = i;
3337		txr->num_desc = adapter->num_tx_desc;
3338
3339		/* Initialize the TX lock */
3340		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3341		    device_get_nameunit(dev), txr->me);
3342		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3343
3344		if (igb_dma_malloc(adapter, tsize,
3345			&txr->txdma, BUS_DMA_NOWAIT)) {
3346			device_printf(dev,
3347			    "Unable to allocate TX Descriptor memory\n");
3348			error = ENOMEM;
3349			goto err_tx_desc;
3350		}
3351		txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr;
3352		bzero((void *)txr->tx_base, tsize);
3353
3354        	/* Now allocate transmit buffers for the ring */
3355        	if (igb_allocate_transmit_buffers(txr)) {
3356			device_printf(dev,
3357			    "Critical Failure setting up transmit buffers\n");
3358			error = ENOMEM;
3359			goto err_tx_desc;
3360        	}
3361#ifndef IGB_LEGACY_TX
3362		/* Allocate a buf ring */
3363		txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3364		    M_WAITOK, &txr->tx_mtx);
3365#endif
3366	}
3367
3368	/*
3369	 * Next the RX queues...
3370	 */
3371	rsize = roundup2(adapter->num_rx_desc *
3372	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3373	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3374		rxr = &adapter->rx_rings[i];
3375		rxr->adapter = adapter;
3376		rxr->me = i;
3377
3378		/* Initialize the RX lock */
3379		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3380		    device_get_nameunit(dev), txr->me);
3381		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3382
3383		if (igb_dma_malloc(adapter, rsize,
3384			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3385			device_printf(dev,
3386			    "Unable to allocate RxDescriptor memory\n");
3387			error = ENOMEM;
3388			goto err_rx_desc;
3389		}
3390		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3391		bzero((void *)rxr->rx_base, rsize);
3392
3393        	/* Allocate receive buffers for the ring*/
3394		if (igb_allocate_receive_buffers(rxr)) {
3395			device_printf(dev,
3396			    "Critical Failure setting up receive buffers\n");
3397			error = ENOMEM;
3398			goto err_rx_desc;
3399		}
3400	}
3401
3402	/*
3403	** Finally set up the queue holding structs
3404	*/
3405	for (int i = 0; i < adapter->num_queues; i++) {
3406		que = &adapter->queues[i];
3407		que->adapter = adapter;
3408		que->txr = &adapter->tx_rings[i];
3409		que->rxr = &adapter->rx_rings[i];
3410	}
3411
3412	return (0);
3413
3414err_rx_desc:
3415	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3416		igb_dma_free(adapter, &rxr->rxdma);
3417err_tx_desc:
3418	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3419		igb_dma_free(adapter, &txr->txdma);
3420	free(adapter->rx_rings, M_DEVBUF);
3421rx_fail:
3422#ifndef IGB_LEGACY_TX
3423	buf_ring_free(txr->br, M_DEVBUF);
3424#endif
3425	free(adapter->tx_rings, M_DEVBUF);
3426tx_fail:
3427	free(adapter->queues, M_DEVBUF);
3428fail:
3429	return (error);
3430}
3431
3432/*********************************************************************
3433 *
3434 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3435 *  the information needed to transmit a packet on the wire. This is
3436 *  called only once at attach, setup is done every reset.
3437 *
3438 **********************************************************************/
3439static int
3440igb_allocate_transmit_buffers(struct tx_ring *txr)
3441{
3442	struct adapter *adapter = txr->adapter;
3443	device_t dev = adapter->dev;
3444	struct igb_tx_buf *txbuf;
3445	int error, i;
3446
3447	/*
3448	 * Setup DMA descriptor areas.
3449	 */
3450	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3451			       1, 0,			/* alignment, bounds */
3452			       BUS_SPACE_MAXADDR,	/* lowaddr */
3453			       BUS_SPACE_MAXADDR,	/* highaddr */
3454			       NULL, NULL,		/* filter, filterarg */
3455			       IGB_TSO_SIZE,		/* maxsize */
3456			       IGB_MAX_SCATTER,		/* nsegments */
3457			       PAGE_SIZE,		/* maxsegsize */
3458			       0,			/* flags */
3459			       NULL,			/* lockfunc */
3460			       NULL,			/* lockfuncarg */
3461			       &txr->txtag))) {
3462		device_printf(dev,"Unable to allocate TX DMA tag\n");
3463		goto fail;
3464	}
3465
3466	if (!(txr->tx_buffers =
3467	    (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) *
3468	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3469		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3470		error = ENOMEM;
3471		goto fail;
3472	}
3473
3474        /* Create the descriptor buffer dma maps */
3475	txbuf = txr->tx_buffers;
3476	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3477		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3478		if (error != 0) {
3479			device_printf(dev, "Unable to create TX DMA map\n");
3480			goto fail;
3481		}
3482	}
3483
3484	return 0;
3485fail:
3486	/* We free all, it handles case where we are in the middle */
3487	igb_free_transmit_structures(adapter);
3488	return (error);
3489}
3490
3491/*********************************************************************
3492 *
3493 *  Initialize a transmit ring.
3494 *
3495 **********************************************************************/
3496static void
3497igb_setup_transmit_ring(struct tx_ring *txr)
3498{
3499	struct adapter *adapter = txr->adapter;
3500	struct igb_tx_buf *txbuf;
3501	int i;
3502#ifdef DEV_NETMAP
3503	struct netmap_adapter *na = NA(adapter->ifp);
3504	struct netmap_slot *slot;
3505#endif /* DEV_NETMAP */
3506
3507	/* Clear the old descriptor contents */
3508	IGB_TX_LOCK(txr);
3509#ifdef DEV_NETMAP
3510	slot = netmap_reset(na, NR_TX, txr->me, 0);
3511#endif /* DEV_NETMAP */
3512	bzero((void *)txr->tx_base,
3513	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3514	/* Reset indices */
3515	txr->next_avail_desc = 0;
3516	txr->next_to_clean = 0;
3517
3518	/* Free any existing tx buffers. */
3519        txbuf = txr->tx_buffers;
3520	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3521		if (txbuf->m_head != NULL) {
3522			bus_dmamap_sync(txr->txtag, txbuf->map,
3523			    BUS_DMASYNC_POSTWRITE);
3524			bus_dmamap_unload(txr->txtag, txbuf->map);
3525			m_freem(txbuf->m_head);
3526			txbuf->m_head = NULL;
3527		}
3528#ifdef DEV_NETMAP
3529		if (slot) {
3530			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3531			/* no need to set the address */
3532			netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3533		}
3534#endif /* DEV_NETMAP */
3535		/* clear the watch index */
3536		txbuf->eop = NULL;
3537        }
3538
3539	/* Set number of descriptors available */
3540	txr->tx_avail = adapter->num_tx_desc;
3541
3542	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3543	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3544	IGB_TX_UNLOCK(txr);
3545}
3546
3547/*********************************************************************
3548 *
3549 *  Initialize all transmit rings.
3550 *
3551 **********************************************************************/
3552static void
3553igb_setup_transmit_structures(struct adapter *adapter)
3554{
3555	struct tx_ring *txr = adapter->tx_rings;
3556
3557	for (int i = 0; i < adapter->num_queues; i++, txr++)
3558		igb_setup_transmit_ring(txr);
3559
3560	return;
3561}
3562
3563/*********************************************************************
3564 *
3565 *  Enable transmit unit.
3566 *
3567 **********************************************************************/
3568static void
3569igb_initialize_transmit_units(struct adapter *adapter)
3570{
3571	struct tx_ring	*txr = adapter->tx_rings;
3572	struct e1000_hw *hw = &adapter->hw;
3573	u32		tctl, txdctl;
3574
3575	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3576	tctl = txdctl = 0;
3577
3578	/* Setup the Tx Descriptor Rings */
3579	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3580		u64 bus_addr = txr->txdma.dma_paddr;
3581
3582		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3583		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3584		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3585		    (uint32_t)(bus_addr >> 32));
3586		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3587		    (uint32_t)bus_addr);
3588
3589		/* Setup the HW Tx Head and Tail descriptor pointers */
3590		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3591		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3592
3593		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3594		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3595		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3596
3597		txr->queue_status = IGB_QUEUE_IDLE;
3598
3599		txdctl |= IGB_TX_PTHRESH;
3600		txdctl |= IGB_TX_HTHRESH << 8;
3601		txdctl |= IGB_TX_WTHRESH << 16;
3602		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3603		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3604	}
3605
3606	if (adapter->vf_ifp)
3607		return;
3608
3609	e1000_config_collision_dist(hw);
3610
3611	/* Program the Transmit Control Register */
3612	tctl = E1000_READ_REG(hw, E1000_TCTL);
3613	tctl &= ~E1000_TCTL_CT;
3614	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3615		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3616
3617	/* This write will effectively turn on the transmit unit. */
3618	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3619}
3620
3621/*********************************************************************
3622 *
3623 *  Free all transmit rings.
3624 *
3625 **********************************************************************/
3626static void
3627igb_free_transmit_structures(struct adapter *adapter)
3628{
3629	struct tx_ring *txr = adapter->tx_rings;
3630
3631	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3632		IGB_TX_LOCK(txr);
3633		igb_free_transmit_buffers(txr);
3634		igb_dma_free(adapter, &txr->txdma);
3635		IGB_TX_UNLOCK(txr);
3636		IGB_TX_LOCK_DESTROY(txr);
3637	}
3638	free(adapter->tx_rings, M_DEVBUF);
3639}
3640
3641/*********************************************************************
3642 *
3643 *  Free transmit ring related data structures.
3644 *
3645 **********************************************************************/
3646static void
3647igb_free_transmit_buffers(struct tx_ring *txr)
3648{
3649	struct adapter *adapter = txr->adapter;
3650	struct igb_tx_buf *tx_buffer;
3651	int             i;
3652
3653	INIT_DEBUGOUT("free_transmit_ring: begin");
3654
3655	if (txr->tx_buffers == NULL)
3656		return;
3657
3658	tx_buffer = txr->tx_buffers;
3659	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3660		if (tx_buffer->m_head != NULL) {
3661			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3662			    BUS_DMASYNC_POSTWRITE);
3663			bus_dmamap_unload(txr->txtag,
3664			    tx_buffer->map);
3665			m_freem(tx_buffer->m_head);
3666			tx_buffer->m_head = NULL;
3667			if (tx_buffer->map != NULL) {
3668				bus_dmamap_destroy(txr->txtag,
3669				    tx_buffer->map);
3670				tx_buffer->map = NULL;
3671			}
3672		} else if (tx_buffer->map != NULL) {
3673			bus_dmamap_unload(txr->txtag,
3674			    tx_buffer->map);
3675			bus_dmamap_destroy(txr->txtag,
3676			    tx_buffer->map);
3677			tx_buffer->map = NULL;
3678		}
3679	}
3680#ifndef IGB_LEGACY_TX
3681	if (txr->br != NULL)
3682		buf_ring_free(txr->br, M_DEVBUF);
3683#endif
3684	if (txr->tx_buffers != NULL) {
3685		free(txr->tx_buffers, M_DEVBUF);
3686		txr->tx_buffers = NULL;
3687	}
3688	if (txr->txtag != NULL) {
3689		bus_dma_tag_destroy(txr->txtag);
3690		txr->txtag = NULL;
3691	}
3692	return;
3693}
3694
3695/**********************************************************************
3696 *
3697 *  Setup work for hardware segmentation offload (TSO) on
3698 *  adapters using advanced tx descriptors
3699 *
3700 **********************************************************************/
3701static int
3702igb_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3703    u32 *cmd_type_len, u32 *olinfo_status)
3704{
3705	struct adapter *adapter = txr->adapter;
3706	struct e1000_adv_tx_context_desc *TXD;
3707	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3708	u32 mss_l4len_idx = 0, paylen;
3709	u16 vtag = 0, eh_type;
3710	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3711	struct ether_vlan_header *eh;
3712#ifdef INET6
3713	struct ip6_hdr *ip6;
3714#endif
3715#ifdef INET
3716	struct ip *ip;
3717#endif
3718	struct tcphdr *th;
3719
3720
3721	/*
3722	 * Determine where frame payload starts.
3723	 * Jump over vlan headers if already present
3724	 */
3725	eh = mtod(mp, struct ether_vlan_header *);
3726	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3727		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3728		eh_type = eh->evl_proto;
3729	} else {
3730		ehdrlen = ETHER_HDR_LEN;
3731		eh_type = eh->evl_encap_proto;
3732	}
3733
3734	switch (ntohs(eh_type)) {
3735#ifdef INET6
3736	case ETHERTYPE_IPV6:
3737		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3738		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
3739		if (ip6->ip6_nxt != IPPROTO_TCP)
3740			return (ENXIO);
3741		ip_hlen = sizeof(struct ip6_hdr);
3742		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3743		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3744		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3745		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3746		break;
3747#endif
3748#ifdef INET
3749	case ETHERTYPE_IP:
3750		ip = (struct ip *)(mp->m_data + ehdrlen);
3751		if (ip->ip_p != IPPROTO_TCP)
3752			return (ENXIO);
3753		ip->ip_sum = 0;
3754		ip_hlen = ip->ip_hl << 2;
3755		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3756		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3757		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3758		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3759		/* Tell transmit desc to also do IPv4 checksum. */
3760		*olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3761		break;
3762#endif
3763	default:
3764		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3765		    __func__, ntohs(eh_type));
3766		break;
3767	}
3768
3769	ctxd = txr->next_avail_desc;
3770	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3771
3772	tcp_hlen = th->th_off << 2;
3773
3774	/* This is used in the transmit desc in encap */
3775	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3776
3777	/* VLAN MACLEN IPLEN */
3778	if (mp->m_flags & M_VLANTAG) {
3779		vtag = htole16(mp->m_pkthdr.ether_vtag);
3780                vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3781	}
3782
3783	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3784	vlan_macip_lens |= ip_hlen;
3785	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3786
3787	/* ADV DTYPE TUCMD */
3788	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3789	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3790	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3791
3792	/* MSS L4LEN IDX */
3793	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3794	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3795	/* 82575 needs the queue index added */
3796	if (adapter->hw.mac.type == e1000_82575)
3797		mss_l4len_idx |= txr->me << 4;
3798	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3799
3800	TXD->seqnum_seed = htole32(0);
3801
3802	if (++ctxd == txr->num_desc)
3803		ctxd = 0;
3804
3805	txr->tx_avail--;
3806	txr->next_avail_desc = ctxd;
3807	*cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3808	*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3809	*olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
3810	++txr->tso_tx;
3811	return (0);
3812}
3813
3814/*********************************************************************
3815 *
3816 *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3817 *
3818 **********************************************************************/
3819
3820static int
3821igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3822    u32 *cmd_type_len, u32 *olinfo_status)
3823{
3824	struct e1000_adv_tx_context_desc *TXD;
3825	struct adapter *adapter = txr->adapter;
3826	struct ether_vlan_header *eh;
3827	struct ip *ip;
3828	struct ip6_hdr *ip6;
3829	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
3830	int	ehdrlen, ip_hlen = 0;
3831	u16	etype;
3832	u8	ipproto = 0;
3833	int	offload = TRUE;
3834	int	ctxd = txr->next_avail_desc;
3835	u16	vtag = 0;
3836
3837	/* First check if TSO is to be used */
3838	if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3839		return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3840
3841	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3842		offload = FALSE;
3843
3844	/* Indicate the whole packet as payload when not doing TSO */
3845       	*olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
3846
3847	/* Now ready a context descriptor */
3848	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3849
3850	/*
3851	** In advanced descriptors the vlan tag must
3852	** be placed into the context descriptor. Hence
3853	** we need to make one even if not doing offloads.
3854	*/
3855	if (mp->m_flags & M_VLANTAG) {
3856		vtag = htole16(mp->m_pkthdr.ether_vtag);
3857		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3858	} else if (offload == FALSE) /* ... no offload to do */
3859		return (0);
3860
3861	/*
3862	 * Determine where frame payload starts.
3863	 * Jump over vlan headers if already present,
3864	 * helpful for QinQ too.
3865	 */
3866	eh = mtod(mp, struct ether_vlan_header *);
3867	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3868		etype = ntohs(eh->evl_proto);
3869		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3870	} else {
3871		etype = ntohs(eh->evl_encap_proto);
3872		ehdrlen = ETHER_HDR_LEN;
3873	}
3874
3875	/* Set the ether header length */
3876	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3877
3878	switch (etype) {
3879		case ETHERTYPE_IP:
3880			ip = (struct ip *)(mp->m_data + ehdrlen);
3881			ip_hlen = ip->ip_hl << 2;
3882			ipproto = ip->ip_p;
3883			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3884			break;
3885		case ETHERTYPE_IPV6:
3886			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3887			ip_hlen = sizeof(struct ip6_hdr);
3888			/* XXX-BZ this will go badly in case of ext hdrs. */
3889			ipproto = ip6->ip6_nxt;
3890			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3891			break;
3892		default:
3893			offload = FALSE;
3894			break;
3895	}
3896
3897	vlan_macip_lens |= ip_hlen;
3898	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3899
3900	switch (ipproto) {
3901		case IPPROTO_TCP:
3902			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3903				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3904			break;
3905		case IPPROTO_UDP:
3906			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3907				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3908			break;
3909
3910#if __FreeBSD_version >= 800000
3911		case IPPROTO_SCTP:
3912			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3913				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3914			break;
3915#endif
3916		default:
3917			offload = FALSE;
3918			break;
3919	}
3920
3921	if (offload) /* For the TX descriptor setup */
3922		*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3923
3924	/* 82575 needs the queue index added */
3925	if (adapter->hw.mac.type == e1000_82575)
3926		mss_l4len_idx = txr->me << 4;
3927
3928	/* Now copy bits into descriptor */
3929	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3930	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3931	TXD->seqnum_seed = htole32(0);
3932	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3933
3934	/* We've consumed the first desc, adjust counters */
3935	if (++ctxd == txr->num_desc)
3936		ctxd = 0;
3937	txr->next_avail_desc = ctxd;
3938	--txr->tx_avail;
3939
3940        return (0);
3941}
3942
3943/**********************************************************************
3944 *
3945 *  Examine each tx_buffer in the used queue. If the hardware is done
3946 *  processing the packet then free associated resources. The
3947 *  tx_buffer is put back on the free queue.
3948 *
3949 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3950 **********************************************************************/
3951static bool
3952igb_txeof(struct tx_ring *txr)
3953{
3954	struct adapter		*adapter = txr->adapter;
3955	struct ifnet		*ifp = adapter->ifp;
3956	u32			work, processed = 0;
3957	u16			limit = txr->process_limit;
3958	struct igb_tx_buf	*buf;
3959	union e1000_adv_tx_desc *txd;
3960
3961	mtx_assert(&txr->tx_mtx, MA_OWNED);
3962
3963#ifdef DEV_NETMAP
3964	if (netmap_tx_irq(ifp, txr->me |
3965	    (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT)))
3966		return (FALSE);
3967#endif /* DEV_NETMAP */
3968
3969	if (txr->tx_avail == txr->num_desc) {
3970		txr->queue_status = IGB_QUEUE_IDLE;
3971		return FALSE;
3972	}
3973
3974	/* Get work starting point */
3975	work = txr->next_to_clean;
3976	buf = &txr->tx_buffers[work];
3977	txd = &txr->tx_base[work];
3978	work -= txr->num_desc; /* The distance to ring end */
3979        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3980            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3981	do {
3982		union e1000_adv_tx_desc *eop = buf->eop;
3983		if (eop == NULL) /* No work */
3984			break;
3985
3986		if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
3987			break;	/* I/O not complete */
3988
3989		if (buf->m_head) {
3990			txr->bytes +=
3991			    buf->m_head->m_pkthdr.len;
3992			bus_dmamap_sync(txr->txtag,
3993			    buf->map,
3994			    BUS_DMASYNC_POSTWRITE);
3995			bus_dmamap_unload(txr->txtag,
3996			    buf->map);
3997			m_freem(buf->m_head);
3998			buf->m_head = NULL;
3999			buf->map = NULL;
4000		}
4001		buf->eop = NULL;
4002		++txr->tx_avail;
4003
4004		/* We clean the range if multi segment */
4005		while (txd != eop) {
4006			++txd;
4007			++buf;
4008			++work;
4009			/* wrap the ring? */
4010			if (__predict_false(!work)) {
4011				work -= txr->num_desc;
4012				buf = txr->tx_buffers;
4013				txd = txr->tx_base;
4014			}
4015			if (buf->m_head) {
4016				txr->bytes +=
4017				    buf->m_head->m_pkthdr.len;
4018				bus_dmamap_sync(txr->txtag,
4019				    buf->map,
4020				    BUS_DMASYNC_POSTWRITE);
4021				bus_dmamap_unload(txr->txtag,
4022				    buf->map);
4023				m_freem(buf->m_head);
4024				buf->m_head = NULL;
4025				buf->map = NULL;
4026			}
4027			++txr->tx_avail;
4028			buf->eop = NULL;
4029
4030		}
4031		++txr->packets;
4032		++processed;
4033		++ifp->if_opackets;
4034		txr->watchdog_time = ticks;
4035
4036		/* Try the next packet */
4037		++txd;
4038		++buf;
4039		++work;
4040		/* reset with a wrap */
4041		if (__predict_false(!work)) {
4042			work -= txr->num_desc;
4043			buf = txr->tx_buffers;
4044			txd = txr->tx_base;
4045		}
4046		prefetch(txd);
4047	} while (__predict_true(--limit));
4048
4049	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4050	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4051
4052	work += txr->num_desc;
4053	txr->next_to_clean = work;
4054
4055	/*
4056	** Watchdog calculation, we know there's
4057	** work outstanding or the first return
4058	** would have been taken, so none processed
4059	** for too long indicates a hang.
4060	*/
4061	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
4062		txr->queue_status |= IGB_QUEUE_HUNG;
4063
4064	if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
4065		txr->queue_status &= ~IGB_QUEUE_DEPLETED;
4066
4067	if (txr->tx_avail == txr->num_desc) {
4068		txr->queue_status = IGB_QUEUE_IDLE;
4069		return (FALSE);
4070	}
4071
4072	return (TRUE);
4073}
4074
4075/*********************************************************************
4076 *
4077 *  Refresh mbuf buffers for RX descriptor rings
4078 *   - now keeps its own state so discards due to resource
4079 *     exhaustion are unnecessary, if an mbuf cannot be obtained
4080 *     it just returns, keeping its placeholder, thus it can simply
4081 *     be recalled to try again.
4082 *
4083 **********************************************************************/
4084static void
4085igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4086{
4087	struct adapter		*adapter = rxr->adapter;
4088	bus_dma_segment_t	hseg[1];
4089	bus_dma_segment_t	pseg[1];
4090	struct igb_rx_buf	*rxbuf;
4091	struct mbuf		*mh, *mp;
4092	int			i, j, nsegs, error;
4093	bool			refreshed = FALSE;
4094
4095	i = j = rxr->next_to_refresh;
4096	/*
4097	** Get one descriptor beyond
4098	** our work mark to control
4099	** the loop.
4100        */
4101	if (++j == adapter->num_rx_desc)
4102		j = 0;
4103
4104	while (j != limit) {
4105		rxbuf = &rxr->rx_buffers[i];
4106		/* No hdr mbuf used with header split off */
4107		if (rxr->hdr_split == FALSE)
4108			goto no_split;
4109		if (rxbuf->m_head == NULL) {
4110			mh = m_gethdr(M_NOWAIT, MT_DATA);
4111			if (mh == NULL)
4112				goto update;
4113		} else
4114			mh = rxbuf->m_head;
4115
4116		mh->m_pkthdr.len = mh->m_len = MHLEN;
4117		mh->m_len = MHLEN;
4118		mh->m_flags |= M_PKTHDR;
4119		/* Get the memory mapping */
4120		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4121		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4122		if (error != 0) {
4123			printf("Refresh mbufs: hdr dmamap load"
4124			    " failure - %d\n", error);
4125			m_free(mh);
4126			rxbuf->m_head = NULL;
4127			goto update;
4128		}
4129		rxbuf->m_head = mh;
4130		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4131		    BUS_DMASYNC_PREREAD);
4132		rxr->rx_base[i].read.hdr_addr =
4133		    htole64(hseg[0].ds_addr);
4134no_split:
4135		if (rxbuf->m_pack == NULL) {
4136			mp = m_getjcl(M_NOWAIT, MT_DATA,
4137			    M_PKTHDR, adapter->rx_mbuf_sz);
4138			if (mp == NULL)
4139				goto update;
4140		} else
4141			mp = rxbuf->m_pack;
4142
4143		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4144		/* Get the memory mapping */
4145		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4146		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4147		if (error != 0) {
4148			printf("Refresh mbufs: payload dmamap load"
4149			    " failure - %d\n", error);
4150			m_free(mp);
4151			rxbuf->m_pack = NULL;
4152			goto update;
4153		}
4154		rxbuf->m_pack = mp;
4155		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4156		    BUS_DMASYNC_PREREAD);
4157		rxr->rx_base[i].read.pkt_addr =
4158		    htole64(pseg[0].ds_addr);
4159		refreshed = TRUE; /* I feel wefreshed :) */
4160
4161		i = j; /* our next is precalculated */
4162		rxr->next_to_refresh = i;
4163		if (++j == adapter->num_rx_desc)
4164			j = 0;
4165	}
4166update:
4167	if (refreshed) /* update tail */
4168		E1000_WRITE_REG(&adapter->hw,
4169		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4170	return;
4171}
4172
4173
4174/*********************************************************************
4175 *
4176 *  Allocate memory for rx_buffer structures. Since we use one
4177 *  rx_buffer per received packet, the maximum number of rx_buffer's
4178 *  that we'll need is equal to the number of receive descriptors
4179 *  that we've allocated.
4180 *
4181 **********************************************************************/
4182static int
4183igb_allocate_receive_buffers(struct rx_ring *rxr)
4184{
4185	struct	adapter 	*adapter = rxr->adapter;
4186	device_t 		dev = adapter->dev;
4187	struct igb_rx_buf	*rxbuf;
4188	int             	i, bsize, error;
4189
4190	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4191	if (!(rxr->rx_buffers =
4192	    (struct igb_rx_buf *) malloc(bsize,
4193	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
4194		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4195		error = ENOMEM;
4196		goto fail;
4197	}
4198
4199	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4200				   1, 0,		/* alignment, bounds */
4201				   BUS_SPACE_MAXADDR,	/* lowaddr */
4202				   BUS_SPACE_MAXADDR,	/* highaddr */
4203				   NULL, NULL,		/* filter, filterarg */
4204				   MSIZE,		/* maxsize */
4205				   1,			/* nsegments */
4206				   MSIZE,		/* maxsegsize */
4207				   0,			/* flags */
4208				   NULL,		/* lockfunc */
4209				   NULL,		/* lockfuncarg */
4210				   &rxr->htag))) {
4211		device_printf(dev, "Unable to create RX DMA tag\n");
4212		goto fail;
4213	}
4214
4215	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4216				   1, 0,		/* alignment, bounds */
4217				   BUS_SPACE_MAXADDR,	/* lowaddr */
4218				   BUS_SPACE_MAXADDR,	/* highaddr */
4219				   NULL, NULL,		/* filter, filterarg */
4220				   MJUM9BYTES,		/* maxsize */
4221				   1,			/* nsegments */
4222				   MJUM9BYTES,		/* maxsegsize */
4223				   0,			/* flags */
4224				   NULL,		/* lockfunc */
4225				   NULL,		/* lockfuncarg */
4226				   &rxr->ptag))) {
4227		device_printf(dev, "Unable to create RX payload DMA tag\n");
4228		goto fail;
4229	}
4230
4231	for (i = 0; i < adapter->num_rx_desc; i++) {
4232		rxbuf = &rxr->rx_buffers[i];
4233		error = bus_dmamap_create(rxr->htag,
4234		    BUS_DMA_NOWAIT, &rxbuf->hmap);
4235		if (error) {
4236			device_printf(dev,
4237			    "Unable to create RX head DMA maps\n");
4238			goto fail;
4239		}
4240		error = bus_dmamap_create(rxr->ptag,
4241		    BUS_DMA_NOWAIT, &rxbuf->pmap);
4242		if (error) {
4243			device_printf(dev,
4244			    "Unable to create RX packet DMA maps\n");
4245			goto fail;
4246		}
4247	}
4248
4249	return (0);
4250
4251fail:
4252	/* Frees all, but can handle partial completion */
4253	igb_free_receive_structures(adapter);
4254	return (error);
4255}
4256
4257
4258static void
4259igb_free_receive_ring(struct rx_ring *rxr)
4260{
4261	struct	adapter		*adapter = rxr->adapter;
4262	struct igb_rx_buf	*rxbuf;
4263
4264
4265	for (int i = 0; i < adapter->num_rx_desc; i++) {
4266		rxbuf = &rxr->rx_buffers[i];
4267		if (rxbuf->m_head != NULL) {
4268			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4269			    BUS_DMASYNC_POSTREAD);
4270			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4271			rxbuf->m_head->m_flags |= M_PKTHDR;
4272			m_freem(rxbuf->m_head);
4273		}
4274		if (rxbuf->m_pack != NULL) {
4275			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4276			    BUS_DMASYNC_POSTREAD);
4277			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4278			rxbuf->m_pack->m_flags |= M_PKTHDR;
4279			m_freem(rxbuf->m_pack);
4280		}
4281		rxbuf->m_head = NULL;
4282		rxbuf->m_pack = NULL;
4283	}
4284}
4285
4286
4287/*********************************************************************
4288 *
4289 *  Initialize a receive ring and its buffers.
4290 *
4291 **********************************************************************/
4292static int
4293igb_setup_receive_ring(struct rx_ring *rxr)
4294{
4295	struct	adapter		*adapter;
4296	struct  ifnet		*ifp;
4297	device_t		dev;
4298	struct igb_rx_buf	*rxbuf;
4299	bus_dma_segment_t	pseg[1], hseg[1];
4300	struct lro_ctrl		*lro = &rxr->lro;
4301	int			rsize, nsegs, error = 0;
4302#ifdef DEV_NETMAP
4303	struct netmap_adapter *na = NA(rxr->adapter->ifp);
4304	struct netmap_slot *slot;
4305#endif /* DEV_NETMAP */
4306
4307	adapter = rxr->adapter;
4308	dev = adapter->dev;
4309	ifp = adapter->ifp;
4310
4311	/* Clear the ring contents */
4312	IGB_RX_LOCK(rxr);
4313#ifdef DEV_NETMAP
4314	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4315#endif /* DEV_NETMAP */
4316	rsize = roundup2(adapter->num_rx_desc *
4317	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4318	bzero((void *)rxr->rx_base, rsize);
4319
4320	/*
4321	** Free current RX buffer structures and their mbufs
4322	*/
4323	igb_free_receive_ring(rxr);
4324
4325	/* Configure for header split? */
4326	if (igb_header_split)
4327		rxr->hdr_split = TRUE;
4328
4329        /* Now replenish the ring mbufs */
4330	for (int j = 0; j < adapter->num_rx_desc; ++j) {
4331		struct mbuf	*mh, *mp;
4332
4333		rxbuf = &rxr->rx_buffers[j];
4334#ifdef DEV_NETMAP
4335		if (slot) {
4336			/* slot sj is mapped to the i-th NIC-ring entry */
4337			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4338			uint64_t paddr;
4339			void *addr;
4340
4341			addr = PNMB(slot + sj, &paddr);
4342			netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4343			/* Update descriptor */
4344			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4345			continue;
4346		}
4347#endif /* DEV_NETMAP */
4348		if (rxr->hdr_split == FALSE)
4349			goto skip_head;
4350
4351		/* First the header */
4352		rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4353		if (rxbuf->m_head == NULL) {
4354			error = ENOBUFS;
4355                        goto fail;
4356		}
4357		m_adj(rxbuf->m_head, ETHER_ALIGN);
4358		mh = rxbuf->m_head;
4359		mh->m_len = mh->m_pkthdr.len = MHLEN;
4360		mh->m_flags |= M_PKTHDR;
4361		/* Get the memory mapping */
4362		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4363		    rxbuf->hmap, rxbuf->m_head, hseg,
4364		    &nsegs, BUS_DMA_NOWAIT);
4365		if (error != 0) /* Nothing elegant to do here */
4366                        goto fail;
4367		bus_dmamap_sync(rxr->htag,
4368		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
4369		/* Update descriptor */
4370		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4371
4372skip_head:
4373		/* Now the payload cluster */
4374		rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4375		    M_PKTHDR, adapter->rx_mbuf_sz);
4376		if (rxbuf->m_pack == NULL) {
4377			error = ENOBUFS;
4378                        goto fail;
4379		}
4380		mp = rxbuf->m_pack;
4381		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4382		/* Get the memory mapping */
4383		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4384		    rxbuf->pmap, mp, pseg,
4385		    &nsegs, BUS_DMA_NOWAIT);
4386		if (error != 0)
4387                        goto fail;
4388		bus_dmamap_sync(rxr->ptag,
4389		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
4390		/* Update descriptor */
4391		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4392        }
4393
4394	/* Setup our descriptor indices */
4395	rxr->next_to_check = 0;
4396	rxr->next_to_refresh = adapter->num_rx_desc - 1;
4397	rxr->lro_enabled = FALSE;
4398	rxr->rx_split_packets = 0;
4399	rxr->rx_bytes = 0;
4400
4401	rxr->fmp = NULL;
4402	rxr->lmp = NULL;
4403	rxr->discard = FALSE;
4404
4405	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4406	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4407
4408	/*
4409	** Now set up the LRO interface, we
4410	** also only do head split when LRO
4411	** is enabled, since so often they
4412	** are undesireable in similar setups.
4413	*/
4414	if (ifp->if_capenable & IFCAP_LRO) {
4415		error = tcp_lro_init(lro);
4416		if (error) {
4417			device_printf(dev, "LRO Initialization failed!\n");
4418			goto fail;
4419		}
4420		INIT_DEBUGOUT("RX LRO Initialized\n");
4421		rxr->lro_enabled = TRUE;
4422		lro->ifp = adapter->ifp;
4423	}
4424
4425	IGB_RX_UNLOCK(rxr);
4426	return (0);
4427
4428fail:
4429	igb_free_receive_ring(rxr);
4430	IGB_RX_UNLOCK(rxr);
4431	return (error);
4432}
4433
4434
4435/*********************************************************************
4436 *
4437 *  Initialize all receive rings.
4438 *
4439 **********************************************************************/
4440static int
4441igb_setup_receive_structures(struct adapter *adapter)
4442{
4443	struct rx_ring *rxr = adapter->rx_rings;
4444	int i;
4445
4446	for (i = 0; i < adapter->num_queues; i++, rxr++)
4447		if (igb_setup_receive_ring(rxr))
4448			goto fail;
4449
4450	return (0);
4451fail:
4452	/*
4453	 * Free RX buffers allocated so far, we will only handle
4454	 * the rings that completed, the failing case will have
4455	 * cleaned up for itself. 'i' is the endpoint.
4456	 */
4457	for (int j = 0; j < i; ++j) {
4458		rxr = &adapter->rx_rings[j];
4459		IGB_RX_LOCK(rxr);
4460		igb_free_receive_ring(rxr);
4461		IGB_RX_UNLOCK(rxr);
4462	}
4463
4464	return (ENOBUFS);
4465}
4466
4467/*********************************************************************
4468 *
4469 *  Enable receive unit.
4470 *
4471 **********************************************************************/
4472static void
4473igb_initialize_receive_units(struct adapter *adapter)
4474{
4475	struct rx_ring	*rxr = adapter->rx_rings;
4476	struct ifnet	*ifp = adapter->ifp;
4477	struct e1000_hw *hw = &adapter->hw;
4478	u32		rctl, rxcsum, psize, srrctl = 0;
4479
4480	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4481
4482	/*
4483	 * Make sure receives are disabled while setting
4484	 * up the descriptor ring
4485	 */
4486	rctl = E1000_READ_REG(hw, E1000_RCTL);
4487	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4488
4489	/*
4490	** Set up for header split
4491	*/
4492	if (igb_header_split) {
4493		/* Use a standard mbuf for the header */
4494		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4495		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4496	} else
4497		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4498
4499	/*
4500	** Set up for jumbo frames
4501	*/
4502	if (ifp->if_mtu > ETHERMTU) {
4503		rctl |= E1000_RCTL_LPE;
4504		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4505			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4506			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4507		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4508			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4509			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4510		}
4511		/* Set maximum packet len */
4512		psize = adapter->max_frame_size;
4513		/* are we on a vlan? */
4514		if (adapter->ifp->if_vlantrunk != NULL)
4515			psize += VLAN_TAG_SIZE;
4516		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4517	} else {
4518		rctl &= ~E1000_RCTL_LPE;
4519		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4520		rctl |= E1000_RCTL_SZ_2048;
4521	}
4522
4523	/* Setup the Base and Length of the Rx Descriptor Rings */
4524	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4525		u64 bus_addr = rxr->rxdma.dma_paddr;
4526		u32 rxdctl;
4527
4528		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4529		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4530		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4531		    (uint32_t)(bus_addr >> 32));
4532		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4533		    (uint32_t)bus_addr);
4534		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4535		/* Enable this Queue */
4536		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4537		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4538		rxdctl &= 0xFFF00000;
4539		rxdctl |= IGB_RX_PTHRESH;
4540		rxdctl |= IGB_RX_HTHRESH << 8;
4541		rxdctl |= IGB_RX_WTHRESH << 16;
4542		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4543	}
4544
4545	/*
4546	** Setup for RX MultiQueue
4547	*/
4548	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4549	if (adapter->num_queues >1) {
4550		u32 random[10], mrqc, shift = 0;
4551		union igb_reta {
4552			u32 dword;
4553			u8  bytes[4];
4554		} reta;
4555
4556		arc4rand(&random, sizeof(random), 0);
4557		if (adapter->hw.mac.type == e1000_82575)
4558			shift = 6;
4559		/* Warning FM follows */
4560		for (int i = 0; i < 128; i++) {
4561			reta.bytes[i & 3] =
4562			    (i % adapter->num_queues) << shift;
4563			if ((i & 3) == 3)
4564				E1000_WRITE_REG(hw,
4565				    E1000_RETA(i >> 2), reta.dword);
4566		}
4567		/* Now fill in hash table */
4568		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4569		for (int i = 0; i < 10; i++)
4570			E1000_WRITE_REG_ARRAY(hw,
4571			    E1000_RSSRK(0), i, random[i]);
4572
4573		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4574		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4575		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4576		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4577		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4578		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4579		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4580		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4581
4582		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4583
4584		/*
4585		** NOTE: Receive Full-Packet Checksum Offload
4586		** is mutually exclusive with Multiqueue. However
4587		** this is not the same as TCP/IP checksums which
4588		** still work.
4589		*/
4590		rxcsum |= E1000_RXCSUM_PCSD;
4591#if __FreeBSD_version >= 800000
4592		/* For SCTP Offload */
4593		if ((hw->mac.type == e1000_82576)
4594		    && (ifp->if_capenable & IFCAP_RXCSUM))
4595			rxcsum |= E1000_RXCSUM_CRCOFL;
4596#endif
4597	} else {
4598		/* Non RSS setup */
4599		if (ifp->if_capenable & IFCAP_RXCSUM) {
4600			rxcsum |= E1000_RXCSUM_IPPCSE;
4601#if __FreeBSD_version >= 800000
4602			if (adapter->hw.mac.type == e1000_82576)
4603				rxcsum |= E1000_RXCSUM_CRCOFL;
4604#endif
4605		} else
4606			rxcsum &= ~E1000_RXCSUM_TUOFL;
4607	}
4608	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4609
4610	/* Setup the Receive Control Register */
4611	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4612	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4613		   E1000_RCTL_RDMTS_HALF |
4614		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4615	/* Strip CRC bytes. */
4616	rctl |= E1000_RCTL_SECRC;
4617	/* Make sure VLAN Filters are off */
4618	rctl &= ~E1000_RCTL_VFE;
4619	/* Don't store bad packets */
4620	rctl &= ~E1000_RCTL_SBP;
4621
4622	/* Enable Receives */
4623	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4624
4625	/*
4626	 * Setup the HW Rx Head and Tail Descriptor Pointers
4627	 *   - needs to be after enable
4628	 */
4629	for (int i = 0; i < adapter->num_queues; i++) {
4630		rxr = &adapter->rx_rings[i];
4631		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4632#ifdef DEV_NETMAP
4633		/*
4634		 * an init() while a netmap client is active must
4635		 * preserve the rx buffers passed to userspace.
4636		 * In this driver it means we adjust RDT to
4637		 * somthing different from next_to_refresh
4638		 * (which is not used in netmap mode).
4639		 */
4640		if (ifp->if_capenable & IFCAP_NETMAP) {
4641			struct netmap_adapter *na = NA(adapter->ifp);
4642			struct netmap_kring *kring = &na->rx_rings[i];
4643			int t = rxr->next_to_refresh - kring->nr_hwavail;
4644
4645			if (t >= adapter->num_rx_desc)
4646				t -= adapter->num_rx_desc;
4647			else if (t < 0)
4648				t += adapter->num_rx_desc;
4649			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4650		} else
4651#endif /* DEV_NETMAP */
4652		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4653	}
4654	return;
4655}
4656
4657/*********************************************************************
4658 *
4659 *  Free receive rings.
4660 *
4661 **********************************************************************/
4662static void
4663igb_free_receive_structures(struct adapter *adapter)
4664{
4665	struct rx_ring *rxr = adapter->rx_rings;
4666
4667	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4668		struct lro_ctrl	*lro = &rxr->lro;
4669		igb_free_receive_buffers(rxr);
4670		tcp_lro_free(lro);
4671		igb_dma_free(adapter, &rxr->rxdma);
4672	}
4673
4674	free(adapter->rx_rings, M_DEVBUF);
4675}
4676
4677/*********************************************************************
4678 *
4679 *  Free receive ring data structures.
4680 *
4681 **********************************************************************/
4682static void
4683igb_free_receive_buffers(struct rx_ring *rxr)
4684{
4685	struct adapter		*adapter = rxr->adapter;
4686	struct igb_rx_buf	*rxbuf;
4687	int i;
4688
4689	INIT_DEBUGOUT("free_receive_structures: begin");
4690
4691	/* Cleanup any existing buffers */
4692	if (rxr->rx_buffers != NULL) {
4693		for (i = 0; i < adapter->num_rx_desc; i++) {
4694			rxbuf = &rxr->rx_buffers[i];
4695			if (rxbuf->m_head != NULL) {
4696				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4697				    BUS_DMASYNC_POSTREAD);
4698				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4699				rxbuf->m_head->m_flags |= M_PKTHDR;
4700				m_freem(rxbuf->m_head);
4701			}
4702			if (rxbuf->m_pack != NULL) {
4703				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4704				    BUS_DMASYNC_POSTREAD);
4705				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4706				rxbuf->m_pack->m_flags |= M_PKTHDR;
4707				m_freem(rxbuf->m_pack);
4708			}
4709			rxbuf->m_head = NULL;
4710			rxbuf->m_pack = NULL;
4711			if (rxbuf->hmap != NULL) {
4712				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4713				rxbuf->hmap = NULL;
4714			}
4715			if (rxbuf->pmap != NULL) {
4716				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4717				rxbuf->pmap = NULL;
4718			}
4719		}
4720		if (rxr->rx_buffers != NULL) {
4721			free(rxr->rx_buffers, M_DEVBUF);
4722			rxr->rx_buffers = NULL;
4723		}
4724	}
4725
4726	if (rxr->htag != NULL) {
4727		bus_dma_tag_destroy(rxr->htag);
4728		rxr->htag = NULL;
4729	}
4730	if (rxr->ptag != NULL) {
4731		bus_dma_tag_destroy(rxr->ptag);
4732		rxr->ptag = NULL;
4733	}
4734}
4735
4736static __inline void
4737igb_rx_discard(struct rx_ring *rxr, int i)
4738{
4739	struct igb_rx_buf	*rbuf;
4740
4741	rbuf = &rxr->rx_buffers[i];
4742
4743	/* Partially received? Free the chain */
4744	if (rxr->fmp != NULL) {
4745		rxr->fmp->m_flags |= M_PKTHDR;
4746		m_freem(rxr->fmp);
4747		rxr->fmp = NULL;
4748		rxr->lmp = NULL;
4749	}
4750
4751	/*
4752	** With advanced descriptors the writeback
4753	** clobbers the buffer addrs, so its easier
4754	** to just free the existing mbufs and take
4755	** the normal refresh path to get new buffers
4756	** and mapping.
4757	*/
4758	if (rbuf->m_head) {
4759		m_free(rbuf->m_head);
4760		rbuf->m_head = NULL;
4761	}
4762
4763	if (rbuf->m_pack) {
4764		m_free(rbuf->m_pack);
4765		rbuf->m_pack = NULL;
4766	}
4767
4768	return;
4769}
4770
4771static __inline void
4772igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4773{
4774
4775	/*
4776	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4777	 * should be computed by hardware. Also it should not have VLAN tag in
4778	 * ethernet header.
4779	 */
4780	if (rxr->lro_enabled &&
4781	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4782	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4783	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4784	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4785	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4786	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4787		/*
4788		 * Send to the stack if:
4789		 **  - LRO not enabled, or
4790		 **  - no LRO resources, or
4791		 **  - lro enqueue fails
4792		 */
4793		if (rxr->lro.lro_cnt != 0)
4794			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4795				return;
4796	}
4797	IGB_RX_UNLOCK(rxr);
4798	(*ifp->if_input)(ifp, m);
4799	IGB_RX_LOCK(rxr);
4800}
4801
4802/*********************************************************************
4803 *
4804 *  This routine executes in interrupt context. It replenishes
4805 *  the mbufs in the descriptor and sends data which has been
4806 *  dma'ed into host memory to upper layer.
4807 *
4808 *  We loop at most count times if count is > 0, or until done if
4809 *  count < 0.
4810 *
4811 *  Return TRUE if more to clean, FALSE otherwise
4812 *********************************************************************/
4813static bool
4814igb_rxeof(struct igb_queue *que, int count, int *done)
4815{
4816	struct adapter		*adapter = que->adapter;
4817	struct rx_ring		*rxr = que->rxr;
4818	struct ifnet		*ifp = adapter->ifp;
4819	struct lro_ctrl		*lro = &rxr->lro;
4820	struct lro_entry	*queued;
4821	int			i, processed = 0, rxdone = 0;
4822	u32			ptype, staterr = 0;
4823	union e1000_adv_rx_desc	*cur;
4824
4825	IGB_RX_LOCK(rxr);
4826	/* Sync the ring. */
4827	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4828	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4829
4830#ifdef DEV_NETMAP
4831	if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed))
4832		return (FALSE);
4833#endif /* DEV_NETMAP */
4834
4835	/* Main clean loop */
4836	for (i = rxr->next_to_check; count != 0;) {
4837		struct mbuf		*sendmp, *mh, *mp;
4838		struct igb_rx_buf	*rxbuf;
4839		u16			hlen, plen, hdr, vtag;
4840		bool			eop = FALSE;
4841
4842		cur = &rxr->rx_base[i];
4843		staterr = le32toh(cur->wb.upper.status_error);
4844		if ((staterr & E1000_RXD_STAT_DD) == 0)
4845			break;
4846		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4847			break;
4848		count--;
4849		sendmp = mh = mp = NULL;
4850		cur->wb.upper.status_error = 0;
4851		rxbuf = &rxr->rx_buffers[i];
4852		plen = le16toh(cur->wb.upper.length);
4853		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4854		if (((adapter->hw.mac.type == e1000_i350) ||
4855		    (adapter->hw.mac.type == e1000_i354)) &&
4856		    (staterr & E1000_RXDEXT_STATERR_LB))
4857			vtag = be16toh(cur->wb.upper.vlan);
4858		else
4859			vtag = le16toh(cur->wb.upper.vlan);
4860		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4861		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4862
4863		/* Make sure all segments of a bad packet are discarded */
4864		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4865		    (rxr->discard)) {
4866			adapter->dropped_pkts++;
4867			++rxr->rx_discarded;
4868			if (!eop) /* Catch subsequent segs */
4869				rxr->discard = TRUE;
4870			else
4871				rxr->discard = FALSE;
4872			igb_rx_discard(rxr, i);
4873			goto next_desc;
4874		}
4875
4876		/*
4877		** The way the hardware is configured to
4878		** split, it will ONLY use the header buffer
4879		** when header split is enabled, otherwise we
4880		** get normal behavior, ie, both header and
4881		** payload are DMA'd into the payload buffer.
4882		**
4883		** The fmp test is to catch the case where a
4884		** packet spans multiple descriptors, in that
4885		** case only the first header is valid.
4886		*/
4887		if (rxr->hdr_split && rxr->fmp == NULL) {
4888			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4889			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4890			if (hlen > IGB_HDR_BUF)
4891				hlen = IGB_HDR_BUF;
4892			mh = rxr->rx_buffers[i].m_head;
4893			mh->m_len = hlen;
4894			/* clear buf pointer for refresh */
4895			rxbuf->m_head = NULL;
4896			/*
4897			** Get the payload length, this
4898			** could be zero if its a small
4899			** packet.
4900			*/
4901			if (plen > 0) {
4902				mp = rxr->rx_buffers[i].m_pack;
4903				mp->m_len = plen;
4904				mh->m_next = mp;
4905				/* clear buf pointer */
4906				rxbuf->m_pack = NULL;
4907				rxr->rx_split_packets++;
4908			}
4909		} else {
4910			/*
4911			** Either no header split, or a
4912			** secondary piece of a fragmented
4913			** split packet.
4914			*/
4915			mh = rxr->rx_buffers[i].m_pack;
4916			mh->m_len = plen;
4917			/* clear buf info for refresh */
4918			rxbuf->m_pack = NULL;
4919		}
4920
4921		++processed; /* So we know when to refresh */
4922
4923		/* Initial frame - setup */
4924		if (rxr->fmp == NULL) {
4925			mh->m_pkthdr.len = mh->m_len;
4926			/* Save the head of the chain */
4927			rxr->fmp = mh;
4928			rxr->lmp = mh;
4929			if (mp != NULL) {
4930				/* Add payload if split */
4931				mh->m_pkthdr.len += mp->m_len;
4932				rxr->lmp = mh->m_next;
4933			}
4934		} else {
4935			/* Chain mbuf's together */
4936			rxr->lmp->m_next = mh;
4937			rxr->lmp = rxr->lmp->m_next;
4938			rxr->fmp->m_pkthdr.len += mh->m_len;
4939		}
4940
4941		if (eop) {
4942			rxr->fmp->m_pkthdr.rcvif = ifp;
4943			ifp->if_ipackets++;
4944			rxr->rx_packets++;
4945			/* capture data for AIM */
4946			rxr->packets++;
4947			rxr->bytes += rxr->fmp->m_pkthdr.len;
4948			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4949
4950			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4951				igb_rx_checksum(staterr, rxr->fmp, ptype);
4952
4953			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4954			    (staterr & E1000_RXD_STAT_VP) != 0) {
4955				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4956				rxr->fmp->m_flags |= M_VLANTAG;
4957			}
4958#ifndef IGB_LEGACY_TX
4959			rxr->fmp->m_pkthdr.flowid = que->msix;
4960			rxr->fmp->m_flags |= M_FLOWID;
4961#endif
4962			sendmp = rxr->fmp;
4963			/* Make sure to set M_PKTHDR. */
4964			sendmp->m_flags |= M_PKTHDR;
4965			rxr->fmp = NULL;
4966			rxr->lmp = NULL;
4967		}
4968
4969next_desc:
4970		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4971		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4972
4973		/* Advance our pointers to the next descriptor. */
4974		if (++i == adapter->num_rx_desc)
4975			i = 0;
4976		/*
4977		** Send to the stack or LRO
4978		*/
4979		if (sendmp != NULL) {
4980			rxr->next_to_check = i;
4981			igb_rx_input(rxr, ifp, sendmp, ptype);
4982			i = rxr->next_to_check;
4983			rxdone++;
4984		}
4985
4986		/* Every 8 descriptors we go to refresh mbufs */
4987		if (processed == 8) {
4988                        igb_refresh_mbufs(rxr, i);
4989                        processed = 0;
4990		}
4991	}
4992
4993	/* Catch any remainders */
4994	if (igb_rx_unrefreshed(rxr))
4995		igb_refresh_mbufs(rxr, i);
4996
4997	rxr->next_to_check = i;
4998
4999	/*
5000	 * Flush any outstanding LRO work
5001	 */
5002	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
5003		SLIST_REMOVE_HEAD(&lro->lro_active, next);
5004		tcp_lro_flush(lro, queued);
5005	}
5006
5007	if (done != NULL)
5008		*done += rxdone;
5009
5010	IGB_RX_UNLOCK(rxr);
5011	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
5012}
5013
5014/*********************************************************************
5015 *
5016 *  Verify that the hardware indicated that the checksum is valid.
5017 *  Inform the stack about the status of checksum so that stack
5018 *  doesn't spend time verifying the checksum.
5019 *
5020 *********************************************************************/
5021static void
5022igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
5023{
5024	u16 status = (u16)staterr;
5025	u8  errors = (u8) (staterr >> 24);
5026	int sctp;
5027
5028	/* Ignore Checksum bit is set */
5029	if (status & E1000_RXD_STAT_IXSM) {
5030		mp->m_pkthdr.csum_flags = 0;
5031		return;
5032	}
5033
5034	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
5035	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
5036		sctp = 1;
5037	else
5038		sctp = 0;
5039	if (status & E1000_RXD_STAT_IPCS) {
5040		/* Did it pass? */
5041		if (!(errors & E1000_RXD_ERR_IPE)) {
5042			/* IP Checksum Good */
5043			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
5044			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
5045		} else
5046			mp->m_pkthdr.csum_flags = 0;
5047	}
5048
5049	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
5050		u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5051#if __FreeBSD_version >= 800000
5052		if (sctp) /* reassign */
5053			type = CSUM_SCTP_VALID;
5054#endif
5055		/* Did it pass? */
5056		if (!(errors & E1000_RXD_ERR_TCPE)) {
5057			mp->m_pkthdr.csum_flags |= type;
5058			if (sctp == 0)
5059				mp->m_pkthdr.csum_data = htons(0xffff);
5060		}
5061	}
5062	return;
5063}
5064
5065/*
5066 * This routine is run via an vlan
5067 * config EVENT
5068 */
5069static void
5070igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5071{
5072	struct adapter	*adapter = ifp->if_softc;
5073	u32		index, bit;
5074
5075	if (ifp->if_softc !=  arg)   /* Not our event */
5076		return;
5077
5078	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5079                return;
5080
5081	IGB_CORE_LOCK(adapter);
5082	index = (vtag >> 5) & 0x7F;
5083	bit = vtag & 0x1F;
5084	adapter->shadow_vfta[index] |= (1 << bit);
5085	++adapter->num_vlans;
5086	/* Change hw filter setting */
5087	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5088		igb_setup_vlan_hw_support(adapter);
5089	IGB_CORE_UNLOCK(adapter);
5090}
5091
5092/*
5093 * This routine is run via an vlan
5094 * unconfig EVENT
5095 */
5096static void
5097igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5098{
5099	struct adapter	*adapter = ifp->if_softc;
5100	u32		index, bit;
5101
5102	if (ifp->if_softc !=  arg)
5103		return;
5104
5105	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5106                return;
5107
5108	IGB_CORE_LOCK(adapter);
5109	index = (vtag >> 5) & 0x7F;
5110	bit = vtag & 0x1F;
5111	adapter->shadow_vfta[index] &= ~(1 << bit);
5112	--adapter->num_vlans;
5113	/* Change hw filter setting */
5114	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5115		igb_setup_vlan_hw_support(adapter);
5116	IGB_CORE_UNLOCK(adapter);
5117}
5118
5119static void
5120igb_setup_vlan_hw_support(struct adapter *adapter)
5121{
5122	struct e1000_hw *hw = &adapter->hw;
5123	struct ifnet	*ifp = adapter->ifp;
5124	u32             reg;
5125
5126	if (adapter->vf_ifp) {
5127		e1000_rlpml_set_vf(hw,
5128		    adapter->max_frame_size + VLAN_TAG_SIZE);
5129		return;
5130	}
5131
5132	reg = E1000_READ_REG(hw, E1000_CTRL);
5133	reg |= E1000_CTRL_VME;
5134	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5135
5136	/* Enable the Filter Table */
5137	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5138		reg = E1000_READ_REG(hw, E1000_RCTL);
5139		reg &= ~E1000_RCTL_CFIEN;
5140		reg |= E1000_RCTL_VFE;
5141		E1000_WRITE_REG(hw, E1000_RCTL, reg);
5142	}
5143
5144	/* Update the frame size */
5145	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5146	    adapter->max_frame_size + VLAN_TAG_SIZE);
5147
5148	/* Don't bother with table if no vlans */
5149	if ((adapter->num_vlans == 0) ||
5150	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5151                return;
5152	/*
5153	** A soft reset zero's out the VFTA, so
5154	** we need to repopulate it now.
5155	*/
5156	for (int i = 0; i < IGB_VFTA_SIZE; i++)
5157                if (adapter->shadow_vfta[i] != 0) {
5158			if (adapter->vf_ifp)
5159				e1000_vfta_set_vf(hw,
5160				    adapter->shadow_vfta[i], TRUE);
5161			else
5162				e1000_write_vfta(hw,
5163				    i, adapter->shadow_vfta[i]);
5164		}
5165}
5166
5167static void
5168igb_enable_intr(struct adapter *adapter)
5169{
5170	/* With RSS set up what to auto clear */
5171	if (adapter->msix_mem) {
5172		u32 mask = (adapter->que_mask | adapter->link_mask);
5173		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5174		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5175		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5176		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5177		    E1000_IMS_LSC);
5178	} else {
5179		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5180		    IMS_ENABLE_MASK);
5181	}
5182	E1000_WRITE_FLUSH(&adapter->hw);
5183
5184	return;
5185}
5186
5187static void
5188igb_disable_intr(struct adapter *adapter)
5189{
5190	if (adapter->msix_mem) {
5191		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5192		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5193	}
5194	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5195	E1000_WRITE_FLUSH(&adapter->hw);
5196	return;
5197}
5198
5199/*
5200 * Bit of a misnomer, what this really means is
5201 * to enable OS management of the system... aka
5202 * to disable special hardware management features
5203 */
5204static void
5205igb_init_manageability(struct adapter *adapter)
5206{
5207	if (adapter->has_manage) {
5208		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5209		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5210
5211		/* disable hardware interception of ARP */
5212		manc &= ~(E1000_MANC_ARP_EN);
5213
5214                /* enable receiving management packets to the host */
5215		manc |= E1000_MANC_EN_MNG2HOST;
5216		manc2h |= 1 << 5;  /* Mng Port 623 */
5217		manc2h |= 1 << 6;  /* Mng Port 664 */
5218		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5219		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5220	}
5221}
5222
5223/*
5224 * Give control back to hardware management
5225 * controller if there is one.
5226 */
5227static void
5228igb_release_manageability(struct adapter *adapter)
5229{
5230	if (adapter->has_manage) {
5231		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5232
5233		/* re-enable hardware interception of ARP */
5234		manc |= E1000_MANC_ARP_EN;
5235		manc &= ~E1000_MANC_EN_MNG2HOST;
5236
5237		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5238	}
5239}
5240
5241/*
5242 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5243 * For ASF and Pass Through versions of f/w this means that
5244 * the driver is loaded.
5245 *
5246 */
5247static void
5248igb_get_hw_control(struct adapter *adapter)
5249{
5250	u32 ctrl_ext;
5251
5252	if (adapter->vf_ifp)
5253		return;
5254
5255	/* Let firmware know the driver has taken over */
5256	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5257	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5258	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5259}
5260
5261/*
5262 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5263 * For ASF and Pass Through versions of f/w this means that the
5264 * driver is no longer loaded.
5265 *
5266 */
5267static void
5268igb_release_hw_control(struct adapter *adapter)
5269{
5270	u32 ctrl_ext;
5271
5272	if (adapter->vf_ifp)
5273		return;
5274
5275	/* Let firmware taken over control of h/w */
5276	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5277	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5278	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5279}
5280
5281static int
5282igb_is_valid_ether_addr(uint8_t *addr)
5283{
5284	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5285
5286	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5287		return (FALSE);
5288	}
5289
5290	return (TRUE);
5291}
5292
5293
5294/*
5295 * Enable PCI Wake On Lan capability
5296 */
5297static void
5298igb_enable_wakeup(device_t dev)
5299{
5300	u16     cap, status;
5301	u8      id;
5302
5303	/* First find the capabilities pointer*/
5304	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5305	/* Read the PM Capabilities */
5306	id = pci_read_config(dev, cap, 1);
5307	if (id != PCIY_PMG)     /* Something wrong */
5308		return;
5309	/* OK, we have the power capabilities, so
5310	   now get the status register */
5311	cap += PCIR_POWER_STATUS;
5312	status = pci_read_config(dev, cap, 2);
5313	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5314	pci_write_config(dev, cap, status, 2);
5315	return;
5316}
5317
5318static void
5319igb_led_func(void *arg, int onoff)
5320{
5321	struct adapter	*adapter = arg;
5322
5323	IGB_CORE_LOCK(adapter);
5324	if (onoff) {
5325		e1000_setup_led(&adapter->hw);
5326		e1000_led_on(&adapter->hw);
5327	} else {
5328		e1000_led_off(&adapter->hw);
5329		e1000_cleanup_led(&adapter->hw);
5330	}
5331	IGB_CORE_UNLOCK(adapter);
5332}
5333
5334/**********************************************************************
5335 *
5336 *  Update the board statistics counters.
5337 *
5338 **********************************************************************/
5339static void
5340igb_update_stats_counters(struct adapter *adapter)
5341{
5342	struct ifnet		*ifp;
5343        struct e1000_hw		*hw = &adapter->hw;
5344	struct e1000_hw_stats	*stats;
5345
5346	/*
5347	** The virtual function adapter has only a
5348	** small controlled set of stats, do only
5349	** those and return.
5350	*/
5351	if (adapter->vf_ifp) {
5352		igb_update_vf_stats_counters(adapter);
5353		return;
5354	}
5355
5356	stats = (struct e1000_hw_stats	*)adapter->stats;
5357
5358	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5359	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5360		stats->symerrs +=
5361		    E1000_READ_REG(hw,E1000_SYMERRS);
5362		stats->sec += E1000_READ_REG(hw, E1000_SEC);
5363	}
5364
5365	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5366	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5367	stats->scc += E1000_READ_REG(hw, E1000_SCC);
5368	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5369
5370	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5371	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5372	stats->colc += E1000_READ_REG(hw, E1000_COLC);
5373	stats->dc += E1000_READ_REG(hw, E1000_DC);
5374	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5375	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5376	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5377	/*
5378	** For watchdog management we need to know if we have been
5379	** paused during the last interval, so capture that here.
5380	*/
5381        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5382        stats->xoffrxc += adapter->pause_frames;
5383	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5384	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5385	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5386	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5387	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5388	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5389	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5390	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5391	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5392	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5393	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5394	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5395
5396	/* For the 64-bit byte counters the low dword must be read first. */
5397	/* Both registers clear on the read of the high dword */
5398
5399	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5400	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5401	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5402	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5403
5404	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5405	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5406	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5407	stats->roc += E1000_READ_REG(hw, E1000_ROC);
5408	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5409
5410	stats->tor += E1000_READ_REG(hw, E1000_TORH);
5411	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5412
5413	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5414	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5415	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5416	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5417	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5418	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5419	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5420	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5421	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5422	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5423
5424	/* Interrupt Counts */
5425
5426	stats->iac += E1000_READ_REG(hw, E1000_IAC);
5427	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5428	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5429	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5430	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5431	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5432	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5433	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5434	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5435
5436	/* Host to Card Statistics */
5437
5438	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5439	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5440	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5441	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5442	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5443	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5444	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5445	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5446	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5447	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5448	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5449	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5450	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5451	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5452
5453	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5454	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5455	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5456	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5457	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5458	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5459
5460	ifp = adapter->ifp;
5461	ifp->if_collisions = stats->colc;
5462
5463	/* Rx Errors */
5464	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5465	    stats->crcerrs + stats->algnerrc +
5466	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5467
5468	/* Tx Errors */
5469	ifp->if_oerrors = stats->ecol +
5470	    stats->latecol + adapter->watchdog_events;
5471
5472	/* Driver specific counters */
5473	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5474	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5475	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5476	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5477	adapter->packet_buf_alloc_tx =
5478	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5479	adapter->packet_buf_alloc_rx =
5480	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5481}
5482
5483
5484/**********************************************************************
5485 *
5486 *  Initialize the VF board statistics counters.
5487 *
5488 **********************************************************************/
5489static void
5490igb_vf_init_stats(struct adapter *adapter)
5491{
5492        struct e1000_hw *hw = &adapter->hw;
5493	struct e1000_vf_stats	*stats;
5494
5495	stats = (struct e1000_vf_stats	*)adapter->stats;
5496	if (stats == NULL)
5497		return;
5498        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5499        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5500        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5501        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5502        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5503}
5504
5505/**********************************************************************
5506 *
5507 *  Update the VF board statistics counters.
5508 *
5509 **********************************************************************/
5510static void
5511igb_update_vf_stats_counters(struct adapter *adapter)
5512{
5513	struct e1000_hw *hw = &adapter->hw;
5514	struct e1000_vf_stats	*stats;
5515
5516	if (adapter->link_speed == 0)
5517		return;
5518
5519	stats = (struct e1000_vf_stats	*)adapter->stats;
5520
5521	UPDATE_VF_REG(E1000_VFGPRC,
5522	    stats->last_gprc, stats->gprc);
5523	UPDATE_VF_REG(E1000_VFGORC,
5524	    stats->last_gorc, stats->gorc);
5525	UPDATE_VF_REG(E1000_VFGPTC,
5526	    stats->last_gptc, stats->gptc);
5527	UPDATE_VF_REG(E1000_VFGOTC,
5528	    stats->last_gotc, stats->gotc);
5529	UPDATE_VF_REG(E1000_VFMPRC,
5530	    stats->last_mprc, stats->mprc);
5531}
5532
5533/* Export a single 32-bit register via a read-only sysctl. */
5534static int
5535igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5536{
5537	struct adapter *adapter;
5538	u_int val;
5539
5540	adapter = oidp->oid_arg1;
5541	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5542	return (sysctl_handle_int(oidp, &val, 0, req));
5543}
5544
5545/*
5546**  Tuneable interrupt rate handler
5547*/
5548static int
5549igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5550{
5551	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5552	int			error;
5553	u32			reg, usec, rate;
5554
5555	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5556	usec = ((reg & 0x7FFC) >> 2);
5557	if (usec > 0)
5558		rate = 1000000 / usec;
5559	else
5560		rate = 0;
5561	error = sysctl_handle_int(oidp, &rate, 0, req);
5562	if (error || !req->newptr)
5563		return error;
5564	return 0;
5565}
5566
5567/*
5568 * Add sysctl variables, one per statistic, to the system.
5569 */
5570static void
5571igb_add_hw_stats(struct adapter *adapter)
5572{
5573	device_t dev = adapter->dev;
5574
5575	struct tx_ring *txr = adapter->tx_rings;
5576	struct rx_ring *rxr = adapter->rx_rings;
5577
5578	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5579	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5580	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5581	struct e1000_hw_stats *stats = adapter->stats;
5582
5583	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5584	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5585
5586#define QUEUE_NAME_LEN 32
5587	char namebuf[QUEUE_NAME_LEN];
5588
5589	/* Driver Statistics */
5590	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
5591			CTLFLAG_RD, &adapter->link_irq, 0,
5592			"Link MSIX IRQ Handled");
5593	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5594			CTLFLAG_RD, &adapter->dropped_pkts,
5595			"Driver dropped packets");
5596	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5597			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5598			"Driver tx dma failure in xmit");
5599	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5600			CTLFLAG_RD, &adapter->rx_overruns,
5601			"RX overruns");
5602	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5603			CTLFLAG_RD, &adapter->watchdog_events,
5604			"Watchdog timeouts");
5605
5606	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5607			CTLFLAG_RD, &adapter->device_control,
5608			"Device Control Register");
5609	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5610			CTLFLAG_RD, &adapter->rx_control,
5611			"Receiver Control Register");
5612	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5613			CTLFLAG_RD, &adapter->int_mask,
5614			"Interrupt Mask");
5615	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5616			CTLFLAG_RD, &adapter->eint_mask,
5617			"Extended Interrupt Mask");
5618	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5619			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5620			"Transmit Buffer Packet Allocation");
5621	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5622			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5623			"Receive Buffer Packet Allocation");
5624	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5625			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5626			"Flow Control High Watermark");
5627	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5628			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5629			"Flow Control Low Watermark");
5630
5631	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5632		struct lro_ctrl *lro = &rxr->lro;
5633
5634		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5635		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5636					    CTLFLAG_RD, NULL, "Queue Name");
5637		queue_list = SYSCTL_CHILDREN(queue_node);
5638
5639		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5640				CTLFLAG_RD, &adapter->queues[i],
5641				sizeof(&adapter->queues[i]),
5642				igb_sysctl_interrupt_rate_handler,
5643				"IU", "Interrupt Rate");
5644
5645		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5646				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5647				igb_sysctl_reg_handler, "IU",
5648 				"Transmit Descriptor Head");
5649		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5650				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5651				igb_sysctl_reg_handler, "IU",
5652 				"Transmit Descriptor Tail");
5653		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5654				CTLFLAG_RD, &txr->no_desc_avail,
5655				"Queue No Descriptor Available");
5656		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5657				CTLFLAG_RD, &txr->total_packets,
5658				"Queue Packets Transmitted");
5659
5660		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5661				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5662				igb_sysctl_reg_handler, "IU",
5663				"Receive Descriptor Head");
5664		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5665				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5666				igb_sysctl_reg_handler, "IU",
5667				"Receive Descriptor Tail");
5668		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5669				CTLFLAG_RD, &rxr->rx_packets,
5670				"Queue Packets Received");
5671		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5672				CTLFLAG_RD, &rxr->rx_bytes,
5673				"Queue Bytes Received");
5674		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5675				CTLFLAG_RD, &lro->lro_queued, 0,
5676				"LRO Queued");
5677		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5678				CTLFLAG_RD, &lro->lro_flushed, 0,
5679				"LRO Flushed");
5680	}
5681
5682	/* MAC stats get their own sub node */
5683
5684	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5685				    CTLFLAG_RD, NULL, "MAC Statistics");
5686	stat_list = SYSCTL_CHILDREN(stat_node);
5687
5688	/*
5689	** VF adapter has a very limited set of stats
5690	** since its not managing the metal, so to speak.
5691	*/
5692	if (adapter->vf_ifp) {
5693	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5694			CTLFLAG_RD, &stats->gprc,
5695			"Good Packets Received");
5696	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5697			CTLFLAG_RD, &stats->gptc,
5698			"Good Packets Transmitted");
5699 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5700 			CTLFLAG_RD, &stats->gorc,
5701 			"Good Octets Received");
5702 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5703 			CTLFLAG_RD, &stats->gotc,
5704 			"Good Octets Transmitted");
5705	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5706			CTLFLAG_RD, &stats->mprc,
5707			"Multicast Packets Received");
5708		return;
5709	}
5710
5711	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5712			CTLFLAG_RD, &stats->ecol,
5713			"Excessive collisions");
5714	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5715			CTLFLAG_RD, &stats->scc,
5716			"Single collisions");
5717	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5718			CTLFLAG_RD, &stats->mcc,
5719			"Multiple collisions");
5720	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5721			CTLFLAG_RD, &stats->latecol,
5722			"Late collisions");
5723	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5724			CTLFLAG_RD, &stats->colc,
5725			"Collision Count");
5726	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5727			CTLFLAG_RD, &stats->symerrs,
5728			"Symbol Errors");
5729	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5730			CTLFLAG_RD, &stats->sec,
5731			"Sequence Errors");
5732	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5733			CTLFLAG_RD, &stats->dc,
5734			"Defer Count");
5735	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5736			CTLFLAG_RD, &stats->mpc,
5737			"Missed Packets");
5738	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5739			CTLFLAG_RD, &stats->rnbc,
5740			"Receive No Buffers");
5741	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5742			CTLFLAG_RD, &stats->ruc,
5743			"Receive Undersize");
5744	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5745			CTLFLAG_RD, &stats->rfc,
5746			"Fragmented Packets Received ");
5747	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5748			CTLFLAG_RD, &stats->roc,
5749			"Oversized Packets Received");
5750	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5751			CTLFLAG_RD, &stats->rjc,
5752			"Recevied Jabber");
5753	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5754			CTLFLAG_RD, &stats->rxerrc,
5755			"Receive Errors");
5756	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5757			CTLFLAG_RD, &stats->crcerrs,
5758			"CRC errors");
5759	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5760			CTLFLAG_RD, &stats->algnerrc,
5761			"Alignment Errors");
5762	/* On 82575 these are collision counts */
5763	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5764			CTLFLAG_RD, &stats->cexterr,
5765			"Collision/Carrier extension errors");
5766	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5767			CTLFLAG_RD, &stats->xonrxc,
5768			"XON Received");
5769	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5770			CTLFLAG_RD, &stats->xontxc,
5771			"XON Transmitted");
5772	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5773			CTLFLAG_RD, &stats->xoffrxc,
5774			"XOFF Received");
5775	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5776			CTLFLAG_RD, &stats->xofftxc,
5777			"XOFF Transmitted");
5778	/* Packet Reception Stats */
5779	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5780			CTLFLAG_RD, &stats->tpr,
5781			"Total Packets Received ");
5782	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5783			CTLFLAG_RD, &stats->gprc,
5784			"Good Packets Received");
5785	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5786			CTLFLAG_RD, &stats->bprc,
5787			"Broadcast Packets Received");
5788	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5789			CTLFLAG_RD, &stats->mprc,
5790			"Multicast Packets Received");
5791	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5792			CTLFLAG_RD, &stats->prc64,
5793			"64 byte frames received ");
5794	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5795			CTLFLAG_RD, &stats->prc127,
5796			"65-127 byte frames received");
5797	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5798			CTLFLAG_RD, &stats->prc255,
5799			"128-255 byte frames received");
5800	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5801			CTLFLAG_RD, &stats->prc511,
5802			"256-511 byte frames received");
5803	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5804			CTLFLAG_RD, &stats->prc1023,
5805			"512-1023 byte frames received");
5806	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5807			CTLFLAG_RD, &stats->prc1522,
5808			"1023-1522 byte frames received");
5809 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5810 			CTLFLAG_RD, &stats->gorc,
5811 			"Good Octets Received");
5812
5813	/* Packet Transmission Stats */
5814 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5815 			CTLFLAG_RD, &stats->gotc,
5816 			"Good Octets Transmitted");
5817	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5818			CTLFLAG_RD, &stats->tpt,
5819			"Total Packets Transmitted");
5820	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5821			CTLFLAG_RD, &stats->gptc,
5822			"Good Packets Transmitted");
5823	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5824			CTLFLAG_RD, &stats->bptc,
5825			"Broadcast Packets Transmitted");
5826	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5827			CTLFLAG_RD, &stats->mptc,
5828			"Multicast Packets Transmitted");
5829	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5830			CTLFLAG_RD, &stats->ptc64,
5831			"64 byte frames transmitted ");
5832	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5833			CTLFLAG_RD, &stats->ptc127,
5834			"65-127 byte frames transmitted");
5835	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5836			CTLFLAG_RD, &stats->ptc255,
5837			"128-255 byte frames transmitted");
5838	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5839			CTLFLAG_RD, &stats->ptc511,
5840			"256-511 byte frames transmitted");
5841	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5842			CTLFLAG_RD, &stats->ptc1023,
5843			"512-1023 byte frames transmitted");
5844	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5845			CTLFLAG_RD, &stats->ptc1522,
5846			"1024-1522 byte frames transmitted");
5847	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5848			CTLFLAG_RD, &stats->tsctc,
5849			"TSO Contexts Transmitted");
5850	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5851			CTLFLAG_RD, &stats->tsctfc,
5852			"TSO Contexts Failed");
5853
5854
5855	/* Interrupt Stats */
5856
5857	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5858				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5859	int_list = SYSCTL_CHILDREN(int_node);
5860
5861	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5862			CTLFLAG_RD, &stats->iac,
5863			"Interrupt Assertion Count");
5864
5865	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5866			CTLFLAG_RD, &stats->icrxptc,
5867			"Interrupt Cause Rx Pkt Timer Expire Count");
5868
5869	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5870			CTLFLAG_RD, &stats->icrxatc,
5871			"Interrupt Cause Rx Abs Timer Expire Count");
5872
5873	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5874			CTLFLAG_RD, &stats->ictxptc,
5875			"Interrupt Cause Tx Pkt Timer Expire Count");
5876
5877	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5878			CTLFLAG_RD, &stats->ictxatc,
5879			"Interrupt Cause Tx Abs Timer Expire Count");
5880
5881	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5882			CTLFLAG_RD, &stats->ictxqec,
5883			"Interrupt Cause Tx Queue Empty Count");
5884
5885	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5886			CTLFLAG_RD, &stats->ictxqmtc,
5887			"Interrupt Cause Tx Queue Min Thresh Count");
5888
5889	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5890			CTLFLAG_RD, &stats->icrxdmtc,
5891			"Interrupt Cause Rx Desc Min Thresh Count");
5892
5893	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5894			CTLFLAG_RD, &stats->icrxoc,
5895			"Interrupt Cause Receiver Overrun Count");
5896
5897	/* Host to Card Stats */
5898
5899	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5900				    CTLFLAG_RD, NULL,
5901				    "Host to Card Statistics");
5902
5903	host_list = SYSCTL_CHILDREN(host_node);
5904
5905	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5906			CTLFLAG_RD, &stats->cbtmpc,
5907			"Circuit Breaker Tx Packet Count");
5908
5909	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5910			CTLFLAG_RD, &stats->htdpmc,
5911			"Host Transmit Discarded Packets");
5912
5913	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5914			CTLFLAG_RD, &stats->rpthc,
5915			"Rx Packets To Host");
5916
5917	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5918			CTLFLAG_RD, &stats->cbrmpc,
5919			"Circuit Breaker Rx Packet Count");
5920
5921	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5922			CTLFLAG_RD, &stats->cbrdpc,
5923			"Circuit Breaker Rx Dropped Count");
5924
5925	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5926			CTLFLAG_RD, &stats->hgptc,
5927			"Host Good Packets Tx Count");
5928
5929	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5930			CTLFLAG_RD, &stats->htcbdpc,
5931			"Host Tx Circuit Breaker Dropped Count");
5932
5933	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5934			CTLFLAG_RD, &stats->hgorc,
5935			"Host Good Octets Received Count");
5936
5937	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5938			CTLFLAG_RD, &stats->hgotc,
5939			"Host Good Octets Transmit Count");
5940
5941	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5942			CTLFLAG_RD, &stats->lenerrs,
5943			"Length Errors");
5944
5945	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5946			CTLFLAG_RD, &stats->scvpc,
5947			"SerDes/SGMII Code Violation Pkt Count");
5948
5949	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5950			CTLFLAG_RD, &stats->hrmpc,
5951			"Header Redirection Missed Packet Count");
5952}
5953
5954
5955/**********************************************************************
5956 *
5957 *  This routine provides a way to dump out the adapter eeprom,
5958 *  often a useful debug/service tool. This only dumps the first
5959 *  32 words, stuff that matters is in that extent.
5960 *
5961 **********************************************************************/
5962static int
5963igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5964{
5965	struct adapter *adapter;
5966	int error;
5967	int result;
5968
5969	result = -1;
5970	error = sysctl_handle_int(oidp, &result, 0, req);
5971
5972	if (error || !req->newptr)
5973		return (error);
5974
5975	/*
5976	 * This value will cause a hex dump of the
5977	 * first 32 16-bit words of the EEPROM to
5978	 * the screen.
5979	 */
5980	if (result == 1) {
5981		adapter = (struct adapter *)arg1;
5982		igb_print_nvm_info(adapter);
5983        }
5984
5985	return (error);
5986}
5987
5988static void
5989igb_print_nvm_info(struct adapter *adapter)
5990{
5991	u16	eeprom_data;
5992	int	i, j, row = 0;
5993
5994	/* Its a bit crude, but it gets the job done */
5995	printf("\nInterface EEPROM Dump:\n");
5996	printf("Offset\n0x0000  ");
5997	for (i = 0, j = 0; i < 32; i++, j++) {
5998		if (j == 8) { /* Make the offset block */
5999			j = 0; ++row;
6000			printf("\n0x00%x0  ",row);
6001		}
6002		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6003		printf("%04x ", eeprom_data);
6004	}
6005	printf("\n");
6006}
6007
6008static void
6009igb_set_sysctl_value(struct adapter *adapter, const char *name,
6010	const char *description, int *limit, int value)
6011{
6012	*limit = value;
6013	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6014	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6015	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
6016}
6017
6018/*
6019** Set flow control using sysctl:
6020** Flow control values:
6021** 	0 - off
6022**	1 - rx pause
6023**	2 - tx pause
6024**	3 - full
6025*/
6026static int
6027igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
6028{
6029	int		error;
6030	static int	input = 3; /* default is full */
6031	struct adapter	*adapter = (struct adapter *) arg1;
6032
6033	error = sysctl_handle_int(oidp, &input, 0, req);
6034
6035	if ((error) || (req->newptr == NULL))
6036		return (error);
6037
6038	switch (input) {
6039		case e1000_fc_rx_pause:
6040		case e1000_fc_tx_pause:
6041		case e1000_fc_full:
6042		case e1000_fc_none:
6043			adapter->hw.fc.requested_mode = input;
6044			adapter->fc = input;
6045			break;
6046		default:
6047			/* Do nothing */
6048			return (error);
6049	}
6050
6051	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6052	e1000_force_mac_fc(&adapter->hw);
6053	return (error);
6054}
6055
6056/*
6057** Manage DMA Coalesce:
6058** Control values:
6059** 	0/1 - off/on
6060**	Legal timer values are:
6061**	250,500,1000-10000 in thousands
6062*/
6063static int
6064igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6065{
6066	struct adapter *adapter = (struct adapter *) arg1;
6067	int		error;
6068
6069	error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6070
6071	if ((error) || (req->newptr == NULL))
6072		return (error);
6073
6074	switch (adapter->dmac) {
6075		case 0:
6076			/*Disabling */
6077			break;
6078		case 1: /* Just enable and use default */
6079			adapter->dmac = 1000;
6080			break;
6081		case 250:
6082		case 500:
6083		case 1000:
6084		case 2000:
6085		case 3000:
6086		case 4000:
6087		case 5000:
6088		case 6000:
6089		case 7000:
6090		case 8000:
6091		case 9000:
6092		case 10000:
6093			/* Legal values - allow */
6094			break;
6095		default:
6096			/* Do nothing, illegal value */
6097			adapter->dmac = 0;
6098			return (EINVAL);
6099	}
6100	/* Reinit the interface */
6101	igb_init(adapter);
6102	return (error);
6103}
6104
6105/*
6106** Manage Energy Efficient Ethernet:
6107** Control values:
6108**     0/1 - enabled/disabled
6109*/
6110static int
6111igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6112{
6113	struct adapter	*adapter = (struct adapter *) arg1;
6114	int		error, value;
6115
6116	value = adapter->hw.dev_spec._82575.eee_disable;
6117	error = sysctl_handle_int(oidp, &value, 0, req);
6118	if (error || req->newptr == NULL)
6119		return (error);
6120	IGB_CORE_LOCK(adapter);
6121	adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6122	igb_init_locked(adapter);
6123	IGB_CORE_UNLOCK(adapter);
6124	return (0);
6125}
6126