if_igb.c revision 228788
179971Sobrien/******************************************************************************
279971Sobrien
379971Sobrien  Copyright (c) 2001-2011, Intel Corporation
479971Sobrien  All rights reserved.
579971Sobrien
679971Sobrien  Redistribution and use in source and binary forms, with or without
779971Sobrien  modification, are permitted provided that the following conditions are met:
879971Sobrien
979971Sobrien   1. Redistributions of source code must retain the above copyright notice,
1079971Sobrien      this list of conditions and the following disclaimer.
1179971Sobrien
1279971Sobrien   2. Redistributions in binary form must reproduce the above copyright
1379971Sobrien      notice, this list of conditions and the following disclaimer in the
1479971Sobrien      documentation and/or other materials provided with the distribution.
1579971Sobrien
1679971Sobrien   3. Neither the name of the Intel Corporation nor the names of its
1779971Sobrien      contributors may be used to endorse or promote products derived from
1879971Sobrien      this software without specific prior written permission.
1979971Sobrien
2079971Sobrien  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
2179971Sobrien  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2279971Sobrien  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2379971Sobrien  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
2479971Sobrien  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2579971Sobrien  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2679971Sobrien  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2779971Sobrien  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2879971Sobrien  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
2979971Sobrien  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
3079971Sobrien  POSSIBILITY OF SUCH DAMAGE.
3179971Sobrien
3279971Sobrien******************************************************************************/
3379971Sobrien/*$FreeBSD: head/sys/dev/e1000/if_igb.c 228788 2011-12-21 20:10:11Z jhb $*/
3479971Sobrien
3579971Sobrien
3679971Sobrien#ifdef HAVE_KERNEL_OPTION_HEADERS
3779971Sobrien#include "opt_device_polling.h"
3879971Sobrien#include "opt_inet.h"
39116424Smikeh#include "opt_inet6.h"
40116424Smikeh#include "opt_altq.h"
41116424Smikeh#endif
42116424Smikeh
43116424Smikeh#include <sys/param.h>
4479971Sobrien#include <sys/systm.h>
4579971Sobrien#if __FreeBSD_version >= 800000
4679971Sobrien#include <sys/buf_ring.h>
4779971Sobrien#endif
48116424Smikeh#include <sys/bus.h>
4979971Sobrien#include <sys/endian.h>
50116424Smikeh#include <sys/kernel.h>
51116424Smikeh#include <sys/kthread.h>
52116424Smikeh#include <sys/malloc.h>
53116424Smikeh#include <sys/mbuf.h>
54116424Smikeh#include <sys/module.h>
55116424Smikeh#include <sys/rman.h>
56116424Smikeh#include <sys/socket.h>
5779971Sobrien#include <sys/sockio.h>
5879971Sobrien#include <sys/sysctl.h>
5979971Sobrien#include <sys/taskqueue.h>
6079971Sobrien#include <sys/eventhandler.h>
6179971Sobrien#include <sys/pcpu.h>
6279971Sobrien#include <sys/smp.h>
6379971Sobrien#include <machine/smp.h>
6479971Sobrien#include <machine/bus.h>
6579971Sobrien#include <machine/resource.h>
6679971Sobrien
6779971Sobrien#include <net/bpf.h>
6879971Sobrien#include <net/ethernet.h>
6979971Sobrien#include <net/if.h>
7079971Sobrien#include <net/if_arp.h>
7179971Sobrien#include <net/if_dl.h>
7279971Sobrien#include <net/if_media.h>
7379971Sobrien
7479971Sobrien#include <net/if_types.h>
7579971Sobrien#include <net/if_vlan_var.h>
7679971Sobrien
7779971Sobrien#include <netinet/in_systm.h>
7879971Sobrien#include <netinet/in.h>
7979971Sobrien#include <netinet/if_ether.h>
8079971Sobrien#include <netinet/ip.h>
8179971Sobrien#include <netinet/ip6.h>
8279971Sobrien#include <netinet/tcp.h>
8379971Sobrien#include <netinet/tcp_lro.h>
8479971Sobrien#include <netinet/udp.h>
8579971Sobrien
8679971Sobrien#include <machine/in_cksum.h>
8779971Sobrien#include <dev/led/led.h>
8879971Sobrien#include <dev/pci/pcivar.h>
8979971Sobrien#include <dev/pci/pcireg.h>
9079971Sobrien
9179971Sobrien#include "e1000_api.h"
9279971Sobrien#include "e1000_82575.h"
9379971Sobrien#include "if_igb.h"
9479971Sobrien
9579971Sobrien/*********************************************************************
9679971Sobrien *  Set this to one to display debug statistics
9779971Sobrien *********************************************************************/
9879971Sobrienint	igb_display_debug_stats = 0;
9979971Sobrien
10079971Sobrien/*********************************************************************
10179971Sobrien *  Driver version:
10279971Sobrien *********************************************************************/
10379971Sobrienchar igb_driver_version[] = "version - 2.3.1";
10479971Sobrien
10579971Sobrien
10679971Sobrien/*********************************************************************
10779971Sobrien *  PCI Device ID Table
10879971Sobrien *
10979971Sobrien *  Used by probe to select devices to load on
11079971Sobrien *  Last field stores an index into e1000_strings
11179971Sobrien *  Last entry must be all 0s
11279971Sobrien *
11379971Sobrien *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
11479971Sobrien *********************************************************************/
11579971Sobrien
11679971Sobrienstatic igb_vendor_info_t igb_vendor_info_array[] =
11779971Sobrien{
11879971Sobrien	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
11979971Sobrien	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
12079971Sobrien						PCI_ANY_ID, PCI_ANY_ID, 0},
12179971Sobrien	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
12279971Sobrien						PCI_ANY_ID, PCI_ANY_ID, 0},
12379971Sobrien	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
12479971Sobrien	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
12579971Sobrien	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
12679971Sobrien	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
12779971Sobrien	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
12879971Sobrien	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
12979971Sobrien						PCI_ANY_ID, PCI_ANY_ID, 0},
13079971Sobrien	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
13179971Sobrien						PCI_ANY_ID, PCI_ANY_ID, 0},
13279971Sobrien	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
13379971Sobrien						PCI_ANY_ID, PCI_ANY_ID, 0},
13479971Sobrien	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
13579971Sobrien	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
13679971Sobrien	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
13779971Sobrien	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
13879971Sobrien	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
13979971Sobrien	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
14079971Sobrien						PCI_ANY_ID, PCI_ANY_ID, 0},
14179971Sobrien	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
14279971Sobrien						PCI_ANY_ID, PCI_ANY_ID, 0},
14379971Sobrien	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
14479971Sobrien	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
14579971Sobrien	{ 0x8086, E1000_DEV_ID_DH89XXCC_SFP,	PCI_ANY_ID, PCI_ANY_ID, 0},
14679971Sobrien	{ 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
14779971Sobrien						PCI_ANY_ID, PCI_ANY_ID, 0},
14879971Sobrien	{ 0x8086, E1000_DEV_ID_I350_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
14979971Sobrien	{ 0x8086, E1000_DEV_ID_I350_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
15079971Sobrien	{ 0x8086, E1000_DEV_ID_I350_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
15179971Sobrien	{ 0x8086, E1000_DEV_ID_I350_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
15279971Sobrien	{ 0x8086, E1000_DEV_ID_I350_VF,		PCI_ANY_ID, PCI_ANY_ID, 0},
15379971Sobrien	/* required last entry */
15479971Sobrien	{ 0, 0, 0, 0, 0}
15579971Sobrien};
15679971Sobrien
15779971Sobrien/*********************************************************************
15879971Sobrien *  Table of branding strings for all supported NICs.
15979971Sobrien *********************************************************************/
16079971Sobrien
16179971Sobrienstatic char *igb_strings[] = {
16279971Sobrien	"Intel(R) PRO/1000 Network Connection"
16379971Sobrien};
16479971Sobrien
16579971Sobrien/*********************************************************************
16679971Sobrien *  Function prototypes
16779971Sobrien *********************************************************************/
16879971Sobrienstatic int	igb_probe(device_t);
16979971Sobrienstatic int	igb_attach(device_t);
17079971Sobrienstatic int	igb_detach(device_t);
17179971Sobrienstatic int	igb_shutdown(device_t);
17279971Sobrienstatic int	igb_suspend(device_t);
17379971Sobrienstatic int	igb_resume(device_t);
17479971Sobrienstatic void	igb_start(struct ifnet *);
17579971Sobrienstatic void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
17679971Sobrien#if __FreeBSD_version >= 800000
17779971Sobrienstatic int	igb_mq_start(struct ifnet *, struct mbuf *);
17879971Sobrienstatic int	igb_mq_start_locked(struct ifnet *,
17979971Sobrien		    struct tx_ring *, struct mbuf *);
18079971Sobrienstatic void	igb_qflush(struct ifnet *);
18179971Sobrien#endif
18279971Sobrienstatic int	igb_ioctl(struct ifnet *, u_long, caddr_t);
18379971Sobrienstatic void	igb_init(void *);
18479971Sobrienstatic void	igb_init_locked(struct adapter *);
18579971Sobrienstatic void	igb_stop(void *);
18679971Sobrienstatic void	igb_media_status(struct ifnet *, struct ifmediareq *);
18779971Sobrienstatic int	igb_media_change(struct ifnet *);
18879971Sobrienstatic void	igb_identify_hardware(struct adapter *);
18979971Sobrienstatic int	igb_allocate_pci_resources(struct adapter *);
19079971Sobrienstatic int	igb_allocate_msix(struct adapter *);
19179971Sobrienstatic int	igb_allocate_legacy(struct adapter *);
19279971Sobrienstatic int	igb_setup_msix(struct adapter *);
19379971Sobrienstatic void	igb_free_pci_resources(struct adapter *);
19479971Sobrienstatic void	igb_local_timer(void *);
19579971Sobrienstatic void	igb_reset(struct adapter *);
19679971Sobrienstatic int	igb_setup_interface(device_t, struct adapter *);
19779971Sobrienstatic int	igb_allocate_queues(struct adapter *);
19879971Sobrienstatic void	igb_configure_queues(struct adapter *);
19979971Sobrien
20079971Sobrienstatic int	igb_allocate_transmit_buffers(struct tx_ring *);
20179971Sobrienstatic void	igb_setup_transmit_structures(struct adapter *);
20279971Sobrienstatic void	igb_setup_transmit_ring(struct tx_ring *);
20379971Sobrienstatic void	igb_initialize_transmit_units(struct adapter *);
20479971Sobrienstatic void	igb_free_transmit_structures(struct adapter *);
20579971Sobrienstatic void	igb_free_transmit_buffers(struct tx_ring *);
20679971Sobrien
20779971Sobrienstatic int	igb_allocate_receive_buffers(struct rx_ring *);
20879971Sobrienstatic int	igb_setup_receive_structures(struct adapter *);
20979971Sobrienstatic int	igb_setup_receive_ring(struct rx_ring *);
21079971Sobrienstatic void	igb_initialize_receive_units(struct adapter *);
21179971Sobrienstatic void	igb_free_receive_structures(struct adapter *);
21279971Sobrienstatic void	igb_free_receive_buffers(struct rx_ring *);
21379971Sobrienstatic void	igb_free_receive_ring(struct rx_ring *);
21479971Sobrien
21579971Sobrienstatic void	igb_enable_intr(struct adapter *);
21679971Sobrienstatic void	igb_disable_intr(struct adapter *);
21779971Sobrienstatic void	igb_update_stats_counters(struct adapter *);
21879971Sobrienstatic bool	igb_txeof(struct tx_ring *);
21979971Sobrien
22079971Sobrienstatic __inline	void igb_rx_discard(struct rx_ring *, int);
22179971Sobrienstatic __inline void igb_rx_input(struct rx_ring *,
22279971Sobrien		    struct ifnet *, struct mbuf *, u32);
22379971Sobrien
22479971Sobrienstatic bool	igb_rxeof(struct igb_queue *, int, int *);
22579971Sobrienstatic void	igb_rx_checksum(u32, struct mbuf *, u32);
22679971Sobrienstatic bool	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
22779971Sobrienstatic bool	igb_tso_setup(struct tx_ring *, struct mbuf *, int,
22879971Sobrien		    struct ip *, struct tcphdr *);
22979971Sobrienstatic void	igb_set_promisc(struct adapter *);
23079971Sobrienstatic void	igb_disable_promisc(struct adapter *);
23179971Sobrienstatic void	igb_set_multi(struct adapter *);
23279971Sobrienstatic void	igb_update_link_status(struct adapter *);
23379971Sobrienstatic void	igb_refresh_mbufs(struct rx_ring *, int);
23479971Sobrien
23579971Sobrienstatic void	igb_register_vlan(void *, struct ifnet *, u16);
23679971Sobrienstatic void	igb_unregister_vlan(void *, struct ifnet *, u16);
23779971Sobrienstatic void	igb_setup_vlan_hw_support(struct adapter *);
23879971Sobrien
23979971Sobrienstatic int	igb_xmit(struct tx_ring *, struct mbuf **);
24079971Sobrienstatic int	igb_dma_malloc(struct adapter *, bus_size_t,
24179971Sobrien		    struct igb_dma_alloc *, int);
24279971Sobrienstatic void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
24379971Sobrienstatic int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
24479971Sobrienstatic void	igb_print_nvm_info(struct adapter *);
24579971Sobrienstatic int 	igb_is_valid_ether_addr(u8 *);
24679971Sobrienstatic void     igb_add_hw_stats(struct adapter *);
24779971Sobrien
24879971Sobrienstatic void	igb_vf_init_stats(struct adapter *);
24979971Sobrienstatic void	igb_update_vf_stats_counters(struct adapter *);
25079971Sobrien
25179971Sobrien/* Management and WOL Support */
25279971Sobrienstatic void	igb_init_manageability(struct adapter *);
25379971Sobrienstatic void	igb_release_manageability(struct adapter *);
25479971Sobrienstatic void     igb_get_hw_control(struct adapter *);
25579971Sobrienstatic void     igb_release_hw_control(struct adapter *);
25679971Sobrienstatic void     igb_enable_wakeup(device_t);
25779971Sobrienstatic void     igb_led_func(void *, int);
25879971Sobrien
25979971Sobrienstatic int	igb_irq_fast(void *);
26079971Sobrienstatic void	igb_msix_que(void *);
26179971Sobrienstatic void	igb_msix_link(void *);
26279971Sobrienstatic void	igb_handle_que(void *context, int pending);
26379971Sobrienstatic void	igb_handle_link(void *context, int pending);
26479971Sobrien
26579971Sobrienstatic void	igb_set_sysctl_value(struct adapter *, const char *,
26679971Sobrien		    const char *, int *, int);
26779971Sobrienstatic int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
26879971Sobrienstatic int	igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
26979971Sobrien
27079971Sobrien#ifdef DEVICE_POLLING
27179971Sobrienstatic poll_handler_t igb_poll;
27279971Sobrien#endif /* POLLING */
27379971Sobrien
27479971Sobrien/*********************************************************************
27579971Sobrien *  FreeBSD Device Interface Entry Points
27679971Sobrien *********************************************************************/
27779971Sobrien
27879971Sobrienstatic device_method_t igb_methods[] = {
27979971Sobrien	/* Device interface */
28079971Sobrien	DEVMETHOD(device_probe, igb_probe),
28179971Sobrien	DEVMETHOD(device_attach, igb_attach),
28279971Sobrien	DEVMETHOD(device_detach, igb_detach),
28379971Sobrien	DEVMETHOD(device_shutdown, igb_shutdown),
28479971Sobrien	DEVMETHOD(device_suspend, igb_suspend),
28579971Sobrien	DEVMETHOD(device_resume, igb_resume),
28679971Sobrien	{0, 0}
28779971Sobrien};
28879971Sobrien
28979971Sobrienstatic driver_t igb_driver = {
29079971Sobrien	"igb", igb_methods, sizeof(struct adapter),
29179971Sobrien};
29279971Sobrien
29379971Sobrienstatic devclass_t igb_devclass;
29479971SobrienDRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
29579971SobrienMODULE_DEPEND(igb, pci, 1, 1, 1);
29679971SobrienMODULE_DEPEND(igb, ether, 1, 1, 1);
29779971Sobrien
29879971Sobrien/*********************************************************************
29979971Sobrien *  Tunable default values.
30079971Sobrien *********************************************************************/
30179971Sobrien
30279971Sobrienstatic SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
30379971Sobrien
30479971Sobrien/* Descriptor defaults */
30579971Sobrienstatic int igb_rxd = IGB_DEFAULT_RXD;
30679971Sobrienstatic int igb_txd = IGB_DEFAULT_TXD;
30779971SobrienTUNABLE_INT("hw.igb.rxd", &igb_rxd);
30879971SobrienTUNABLE_INT("hw.igb.txd", &igb_txd);
30979971SobrienSYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
31079971Sobrien    "Number of receive descriptors per queue");
31179971SobrienSYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
31279971Sobrien    "Number of transmit descriptors per queue");
31379971Sobrien
31479971Sobrien/*
31579971Sobrien** AIM: Adaptive Interrupt Moderation
31679971Sobrien** which means that the interrupt rate
31779971Sobrien** is varied over time based on the
31879971Sobrien** traffic for that interrupt vector
31979971Sobrien*/
32079971Sobrienstatic int igb_enable_aim = TRUE;
32179971SobrienTUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
32279971SobrienSYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
32379971Sobrien    "Enable adaptive interrupt moderation");
32479971Sobrien
32579971Sobrien/*
32679971Sobrien * MSIX should be the default for best performance,
32779971Sobrien * but this allows it to be forced off for testing.
32879971Sobrien */
32979971Sobrienstatic int igb_enable_msix = 1;
33079971SobrienTUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
33179971SobrienSYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
33279971Sobrien    "Enable MSI-X interrupts");
33379971Sobrien
33479971Sobrien/*
33579971Sobrien** Tuneable Interrupt rate
33679971Sobrien*/
33779971Sobrienstatic int igb_max_interrupt_rate = 8000;
33879971SobrienTUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
33979971SobrienSYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
34079971Sobrien    &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
34179971Sobrien
34279971Sobrien/*
34379971Sobrien** Header split causes the packet header to
34479971Sobrien** be dma'd to a seperate mbuf from the payload.
34579971Sobrien** this can have memory alignment benefits. But
34679971Sobrien** another plus is that small packets often fit
34779971Sobrien** into the header and thus use no cluster. Its
34879971Sobrien** a very workload dependent type feature.
34979971Sobrien*/
35079971Sobrienstatic int igb_header_split = FALSE;
35179971SobrienTUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
35279971SobrienSYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
35379971Sobrien    "Enable receive mbuf header split");
35479971Sobrien
35579971Sobrien/*
35679971Sobrien** This will autoconfigure based on
35779971Sobrien** the number of CPUs if left at 0.
35879971Sobrien*/
35979971Sobrienstatic int igb_num_queues = 0;
36079971SobrienTUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
36179971SobrienSYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
36279971Sobrien    "Number of queues to configure, 0 indicates autoconfigure");
36379971Sobrien
36479971Sobrien/* How many packets rxeof tries to clean at a time */
36579971Sobrienstatic int igb_rx_process_limit = 100;
36679971SobrienTUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
36779971SobrienSYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
36879971Sobrien    &igb_rx_process_limit, 0,
36979971Sobrien    "Maximum number of received packets to process at a time, -1 means unlimited");
37079971Sobrien
37179971Sobrien/*********************************************************************
37279971Sobrien *  Device identification routine
37379971Sobrien *
37479971Sobrien *  igb_probe determines if the driver should be loaded on
37579971Sobrien *  adapter based on PCI vendor/device id of the adapter.
37679971Sobrien *
37779971Sobrien *  return BUS_PROBE_DEFAULT on success, positive on failure
37879971Sobrien *********************************************************************/
37979971Sobrien
38079971Sobrienstatic int
38179971Sobrienigb_probe(device_t dev)
38279971Sobrien{
38379971Sobrien	char		adapter_name[60];
38479971Sobrien	uint16_t	pci_vendor_id = 0;
38579971Sobrien	uint16_t	pci_device_id = 0;
38679971Sobrien	uint16_t	pci_subvendor_id = 0;
38779971Sobrien	uint16_t	pci_subdevice_id = 0;
38879971Sobrien	igb_vendor_info_t *ent;
38979971Sobrien
39079971Sobrien	INIT_DEBUGOUT("igb_probe: begin");
39179971Sobrien
39279971Sobrien	pci_vendor_id = pci_get_vendor(dev);
39379971Sobrien	if (pci_vendor_id != IGB_VENDOR_ID)
39479971Sobrien		return (ENXIO);
39579971Sobrien
39679971Sobrien	pci_device_id = pci_get_device(dev);
39779971Sobrien	pci_subvendor_id = pci_get_subvendor(dev);
39879971Sobrien	pci_subdevice_id = pci_get_subdevice(dev);
39979971Sobrien
40079971Sobrien	ent = igb_vendor_info_array;
40179971Sobrien	while (ent->vendor_id != 0) {
40279971Sobrien		if ((pci_vendor_id == ent->vendor_id) &&
40379971Sobrien		    (pci_device_id == ent->device_id) &&
40479971Sobrien
40579971Sobrien		    ((pci_subvendor_id == ent->subvendor_id) ||
40679971Sobrien		    (ent->subvendor_id == PCI_ANY_ID)) &&
40779971Sobrien
40879971Sobrien		    ((pci_subdevice_id == ent->subdevice_id) ||
40979971Sobrien		    (ent->subdevice_id == PCI_ANY_ID))) {
41079971Sobrien			sprintf(adapter_name, "%s %s",
41179971Sobrien				igb_strings[ent->index],
41279971Sobrien				igb_driver_version);
41379971Sobrien			device_set_desc_copy(dev, adapter_name);
41479971Sobrien			return (BUS_PROBE_DEFAULT);
41579971Sobrien		}
41679971Sobrien		ent++;
41779971Sobrien	}
41879971Sobrien
41979971Sobrien	return (ENXIO);
42079971Sobrien}
42179971Sobrien
42279971Sobrien/*********************************************************************
42379971Sobrien *  Device initialization routine
42479971Sobrien *
42579971Sobrien *  The attach entry point is called when the driver is being loaded.
42679971Sobrien *  This routine identifies the type of hardware, allocates all resources
42779971Sobrien *  and initializes the hardware.
42879971Sobrien *
42979971Sobrien *  return 0 on success, positive on failure
43079971Sobrien *********************************************************************/
43179971Sobrien
43279971Sobrienstatic int
43379971Sobrienigb_attach(device_t dev)
43479971Sobrien{
43579971Sobrien	struct adapter	*adapter;
436	int		error = 0;
437	u16		eeprom_data;
438
439	INIT_DEBUGOUT("igb_attach: begin");
440
441	if (resource_disabled("igb", device_get_unit(dev))) {
442		device_printf(dev, "Disabled by device hint\n");
443		return (ENXIO);
444	}
445
446	adapter = device_get_softc(dev);
447	adapter->dev = adapter->osdep.dev = dev;
448	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
449
450	/* SYSCTL stuff */
451	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
452	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
453	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
454	    igb_sysctl_nvm_info, "I", "NVM Information");
455
456	igb_set_sysctl_value(adapter, "enable_aim",
457	    "Interrupt Moderation", &adapter->enable_aim,
458	    igb_enable_aim);
459
460	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
461	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
462	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
463	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
464
465	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
466
467	/* Determine hardware and mac info */
468	igb_identify_hardware(adapter);
469
470	/* Setup PCI resources */
471	if (igb_allocate_pci_resources(adapter)) {
472		device_printf(dev, "Allocation of PCI resources failed\n");
473		error = ENXIO;
474		goto err_pci;
475	}
476
477	/* Do Shared Code initialization */
478	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
479		device_printf(dev, "Setup of Shared code failed\n");
480		error = ENXIO;
481		goto err_pci;
482	}
483
484	e1000_get_bus_info(&adapter->hw);
485
486	/* Sysctl for limiting the amount of work done in the taskqueue */
487	igb_set_sysctl_value(adapter, "rx_processing_limit",
488	    "max number of rx packets to process",
489	    &adapter->rx_process_limit, igb_rx_process_limit);
490
491	/*
492	 * Validate number of transmit and receive descriptors. It
493	 * must not exceed hardware maximum, and must be multiple
494	 * of E1000_DBA_ALIGN.
495	 */
496	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
497	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
498		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
499		    IGB_DEFAULT_TXD, igb_txd);
500		adapter->num_tx_desc = IGB_DEFAULT_TXD;
501	} else
502		adapter->num_tx_desc = igb_txd;
503	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
504	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
505		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
506		    IGB_DEFAULT_RXD, igb_rxd);
507		adapter->num_rx_desc = IGB_DEFAULT_RXD;
508	} else
509		adapter->num_rx_desc = igb_rxd;
510
511	adapter->hw.mac.autoneg = DO_AUTO_NEG;
512	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
513	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
514
515	/* Copper options */
516	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
517		adapter->hw.phy.mdix = AUTO_ALL_MODES;
518		adapter->hw.phy.disable_polarity_correction = FALSE;
519		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
520	}
521
522	/*
523	 * Set the frame limits assuming
524	 * standard ethernet sized frames.
525	 */
526	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
527	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
528
529	/*
530	** Allocate and Setup Queues
531	*/
532	if (igb_allocate_queues(adapter)) {
533		error = ENOMEM;
534		goto err_pci;
535	}
536
537	/* Allocate the appropriate stats memory */
538	if (adapter->vf_ifp) {
539		adapter->stats =
540		    (struct e1000_vf_stats *)malloc(sizeof \
541		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
542		igb_vf_init_stats(adapter);
543	} else
544		adapter->stats =
545		    (struct e1000_hw_stats *)malloc(sizeof \
546		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
547	if (adapter->stats == NULL) {
548		device_printf(dev, "Can not allocate stats memory\n");
549		error = ENOMEM;
550		goto err_late;
551	}
552
553	/* Allocate multicast array memory. */
554	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
555	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
556	if (adapter->mta == NULL) {
557		device_printf(dev, "Can not allocate multicast setup array\n");
558		error = ENOMEM;
559		goto err_late;
560	}
561
562	/* Some adapter-specific advanced features */
563	if (adapter->hw.mac.type >= e1000_i350) {
564		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
565		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
566		    OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
567		    adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
568		igb_set_sysctl_value(adapter, "eee_disabled",
569		    "enable Energy Efficient Ethernet",
570		    &adapter->hw.dev_spec._82575.eee_disable,
571		    TRUE);
572		e1000_set_eee_i350(&adapter->hw);
573	}
574
575	/*
576	** Start from a known state, this is
577	** important in reading the nvm and
578	** mac from that.
579	*/
580	e1000_reset_hw(&adapter->hw);
581
582	/* Make sure we have a good EEPROM before we read from it */
583	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
584		/*
585		** Some PCI-E parts fail the first check due to
586		** the link being in sleep state, call it again,
587		** if it fails a second time its a real issue.
588		*/
589		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
590			device_printf(dev,
591			    "The EEPROM Checksum Is Not Valid\n");
592			error = EIO;
593			goto err_late;
594		}
595	}
596
597	/*
598	** Copy the permanent MAC address out of the EEPROM
599	*/
600	if (e1000_read_mac_addr(&adapter->hw) < 0) {
601		device_printf(dev, "EEPROM read error while reading MAC"
602		    " address\n");
603		error = EIO;
604		goto err_late;
605	}
606	/* Check its sanity */
607	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
608		device_printf(dev, "Invalid MAC address\n");
609		error = EIO;
610		goto err_late;
611	}
612
613	/* Setup OS specific network interface */
614	if (igb_setup_interface(dev, adapter) != 0)
615		goto err_late;
616
617	/* Now get a good starting state */
618	igb_reset(adapter);
619
620	/* Initialize statistics */
621	igb_update_stats_counters(adapter);
622
623	adapter->hw.mac.get_link_status = 1;
624	igb_update_link_status(adapter);
625
626	/* Indicate SOL/IDER usage */
627	if (e1000_check_reset_block(&adapter->hw))
628		device_printf(dev,
629		    "PHY reset is blocked due to SOL/IDER session.\n");
630
631	/* Determine if we have to control management hardware */
632	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
633
634	/*
635	 * Setup Wake-on-Lan
636	 */
637	/* APME bit in EEPROM is mapped to WUC.APME */
638	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
639	if (eeprom_data)
640		adapter->wol = E1000_WUFC_MAG;
641
642	/* Register for VLAN events */
643	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
644	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
645	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
646	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
647
648	igb_add_hw_stats(adapter);
649
650	/* Tell the stack that the interface is not active */
651	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
652	adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
653
654	adapter->led_dev = led_create(igb_led_func, adapter,
655	    device_get_nameunit(dev));
656
657	/*
658	** Configure Interrupts
659	*/
660	if ((adapter->msix > 1) && (igb_enable_msix))
661		error = igb_allocate_msix(adapter);
662	else /* MSI or Legacy */
663		error = igb_allocate_legacy(adapter);
664	if (error)
665		goto err_late;
666
667	INIT_DEBUGOUT("igb_attach: end");
668
669	return (0);
670
671err_late:
672	igb_detach(dev);
673	igb_free_transmit_structures(adapter);
674	igb_free_receive_structures(adapter);
675	igb_release_hw_control(adapter);
676err_pci:
677	igb_free_pci_resources(adapter);
678	if (adapter->ifp != NULL)
679		if_free(adapter->ifp);
680	free(adapter->mta, M_DEVBUF);
681	IGB_CORE_LOCK_DESTROY(adapter);
682
683	return (error);
684}
685
686/*********************************************************************
687 *  Device removal routine
688 *
689 *  The detach entry point is called when the driver is being removed.
690 *  This routine stops the adapter and deallocates all the resources
691 *  that were allocated for driver operation.
692 *
693 *  return 0 on success, positive on failure
694 *********************************************************************/
695
696static int
697igb_detach(device_t dev)
698{
699	struct adapter	*adapter = device_get_softc(dev);
700	struct ifnet	*ifp = adapter->ifp;
701
702	INIT_DEBUGOUT("igb_detach: begin");
703
704	/* Make sure VLANS are not using driver */
705	if (adapter->ifp->if_vlantrunk != NULL) {
706		device_printf(dev,"Vlan in use, detach first\n");
707		return (EBUSY);
708	}
709
710	if (adapter->led_dev != NULL)
711		led_destroy(adapter->led_dev);
712
713#ifdef DEVICE_POLLING
714	if (ifp->if_capenable & IFCAP_POLLING)
715		ether_poll_deregister(ifp);
716#endif
717
718	IGB_CORE_LOCK(adapter);
719	adapter->in_detach = 1;
720	igb_stop(adapter);
721	IGB_CORE_UNLOCK(adapter);
722
723	e1000_phy_hw_reset(&adapter->hw);
724
725	/* Give control back to firmware */
726	igb_release_manageability(adapter);
727	igb_release_hw_control(adapter);
728
729	if (adapter->wol) {
730		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
731		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
732		igb_enable_wakeup(dev);
733	}
734
735	/* Unregister VLAN events */
736	if (adapter->vlan_attach != NULL)
737		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
738	if (adapter->vlan_detach != NULL)
739		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
740
741	ether_ifdetach(adapter->ifp);
742
743	callout_drain(&adapter->timer);
744
745	igb_free_pci_resources(adapter);
746	bus_generic_detach(dev);
747	if_free(ifp);
748
749	igb_free_transmit_structures(adapter);
750	igb_free_receive_structures(adapter);
751	if (adapter->mta != NULL)
752		free(adapter->mta, M_DEVBUF);
753
754	IGB_CORE_LOCK_DESTROY(adapter);
755
756	return (0);
757}
758
759/*********************************************************************
760 *
761 *  Shutdown entry point
762 *
763 **********************************************************************/
764
765static int
766igb_shutdown(device_t dev)
767{
768	return igb_suspend(dev);
769}
770
771/*
772 * Suspend/resume device methods.
773 */
774static int
775igb_suspend(device_t dev)
776{
777	struct adapter *adapter = device_get_softc(dev);
778
779	IGB_CORE_LOCK(adapter);
780
781	igb_stop(adapter);
782
783        igb_release_manageability(adapter);
784	igb_release_hw_control(adapter);
785
786        if (adapter->wol) {
787                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
788                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
789                igb_enable_wakeup(dev);
790        }
791
792	IGB_CORE_UNLOCK(adapter);
793
794	return bus_generic_suspend(dev);
795}
796
797static int
798igb_resume(device_t dev)
799{
800	struct adapter *adapter = device_get_softc(dev);
801	struct ifnet *ifp = adapter->ifp;
802
803	IGB_CORE_LOCK(adapter);
804	igb_init_locked(adapter);
805	igb_init_manageability(adapter);
806
807	if ((ifp->if_flags & IFF_UP) &&
808	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
809		igb_start(ifp);
810
811	IGB_CORE_UNLOCK(adapter);
812
813	return bus_generic_resume(dev);
814}
815
816
817/*********************************************************************
818 *  Transmit entry point
819 *
820 *  igb_start is called by the stack to initiate a transmit.
821 *  The driver will remain in this routine as long as there are
822 *  packets to transmit and transmit resources are available.
823 *  In case resources are not available stack is notified and
824 *  the packet is requeued.
825 **********************************************************************/
826
827static void
828igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
829{
830	struct adapter	*adapter = ifp->if_softc;
831	struct mbuf	*m_head;
832
833	IGB_TX_LOCK_ASSERT(txr);
834
835	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
836	    IFF_DRV_RUNNING)
837		return;
838	if (!adapter->link_active)
839		return;
840
841	/* Call cleanup if number of TX descriptors low */
842	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
843		igb_txeof(txr);
844
845	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
846		if (txr->tx_avail <= IGB_MAX_SCATTER) {
847			txr->queue_status |= IGB_QUEUE_DEPLETED;
848			break;
849		}
850		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
851		if (m_head == NULL)
852			break;
853		/*
854		 *  Encapsulation can modify our pointer, and or make it
855		 *  NULL on failure.  In that event, we can't requeue.
856		 */
857		if (igb_xmit(txr, &m_head)) {
858			if (m_head != NULL)
859				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
860			if (txr->tx_avail <= IGB_MAX_SCATTER)
861				txr->queue_status |= IGB_QUEUE_DEPLETED;
862			break;
863		}
864
865		/* Send a copy of the frame to the BPF listener */
866		ETHER_BPF_MTAP(ifp, m_head);
867
868		/* Set watchdog on */
869		txr->watchdog_time = ticks;
870		txr->queue_status |= IGB_QUEUE_WORKING;
871	}
872}
873
874/*
875 * Legacy TX driver routine, called from the
876 * stack, always uses tx[0], and spins for it.
877 * Should not be used with multiqueue tx
878 */
879static void
880igb_start(struct ifnet *ifp)
881{
882	struct adapter	*adapter = ifp->if_softc;
883	struct tx_ring	*txr = adapter->tx_rings;
884
885	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
886		IGB_TX_LOCK(txr);
887		igb_start_locked(txr, ifp);
888		IGB_TX_UNLOCK(txr);
889	}
890	return;
891}
892
893#if __FreeBSD_version >= 800000
894/*
895** Multiqueue Transmit driver
896**
897*/
898static int
899igb_mq_start(struct ifnet *ifp, struct mbuf *m)
900{
901	struct adapter		*adapter = ifp->if_softc;
902	struct igb_queue	*que;
903	struct tx_ring		*txr;
904	int 			i, err = 0;
905	bool			moveable = TRUE;
906
907	/* Which queue to use */
908	if ((m->m_flags & M_FLOWID) != 0) {
909		i = m->m_pkthdr.flowid % adapter->num_queues;
910		moveable = FALSE;
911	} else
912		i = curcpu % adapter->num_queues;
913
914	txr = &adapter->tx_rings[i];
915	que = &adapter->queues[i];
916	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
917	    IGB_TX_TRYLOCK(txr)) {
918		err = igb_mq_start_locked(ifp, txr, m);
919		IGB_TX_UNLOCK(txr);
920	} else {
921		err = drbr_enqueue(ifp, txr->br, m);
922		taskqueue_enqueue(que->tq, &que->que_task);
923	}
924
925	return (err);
926}
927
928static int
929igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
930{
931	struct adapter  *adapter = txr->adapter;
932        struct mbuf     *next;
933        int             err = 0, enq;
934
935	IGB_TX_LOCK_ASSERT(txr);
936
937	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
938	    (txr->queue_status == IGB_QUEUE_DEPLETED) ||
939	    adapter->link_active == 0) {
940		if (m != NULL)
941			err = drbr_enqueue(ifp, txr->br, m);
942		return (err);
943	}
944
945	enq = 0;
946	if (m == NULL) {
947		next = drbr_dequeue(ifp, txr->br);
948	} else if (drbr_needs_enqueue(ifp, txr->br)) {
949		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
950			return (err);
951		next = drbr_dequeue(ifp, txr->br);
952	} else
953		next = m;
954
955	/* Process the queue */
956	while (next != NULL) {
957		if ((err = igb_xmit(txr, &next)) != 0) {
958			if (next != NULL)
959				err = drbr_enqueue(ifp, txr->br, next);
960			break;
961		}
962		enq++;
963		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
964		ETHER_BPF_MTAP(ifp, next);
965		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
966			break;
967		next = drbr_dequeue(ifp, txr->br);
968	}
969	if (enq > 0) {
970		/* Set the watchdog */
971		txr->queue_status |= IGB_QUEUE_WORKING;
972		txr->watchdog_time = ticks;
973	}
974	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
975		igb_txeof(txr);
976	if (txr->tx_avail <= IGB_MAX_SCATTER)
977		txr->queue_status |= IGB_QUEUE_DEPLETED;
978	return (err);
979}
980
981/*
982** Flush all ring buffers
983*/
984static void
985igb_qflush(struct ifnet *ifp)
986{
987	struct adapter	*adapter = ifp->if_softc;
988	struct tx_ring	*txr = adapter->tx_rings;
989	struct mbuf	*m;
990
991	for (int i = 0; i < adapter->num_queues; i++, txr++) {
992		IGB_TX_LOCK(txr);
993		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
994			m_freem(m);
995		IGB_TX_UNLOCK(txr);
996	}
997	if_qflush(ifp);
998}
999#endif /* __FreeBSD_version >= 800000 */
1000
1001/*********************************************************************
1002 *  Ioctl entry point
1003 *
1004 *  igb_ioctl is called when the user wants to configure the
1005 *  interface.
1006 *
1007 *  return 0 on success, positive on failure
1008 **********************************************************************/
1009
1010static int
1011igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1012{
1013	struct adapter	*adapter = ifp->if_softc;
1014	struct ifreq	*ifr = (struct ifreq *)data;
1015#if defined(INET) || defined(INET6)
1016	struct ifaddr	*ifa = (struct ifaddr *)data;
1017#endif
1018	bool		avoid_reset = FALSE;
1019	int		error = 0;
1020
1021	if (adapter->in_detach)
1022		return (error);
1023
1024	switch (command) {
1025	case SIOCSIFADDR:
1026#ifdef INET
1027		if (ifa->ifa_addr->sa_family == AF_INET)
1028			avoid_reset = TRUE;
1029#endif
1030#ifdef INET6
1031		if (ifa->ifa_addr->sa_family == AF_INET6)
1032			avoid_reset = TRUE;
1033#endif
1034		/*
1035		** Calling init results in link renegotiation,
1036		** so we avoid doing it when possible.
1037		*/
1038		if (avoid_reset) {
1039			ifp->if_flags |= IFF_UP;
1040			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1041				igb_init(adapter);
1042#ifdef INET
1043			if (!(ifp->if_flags & IFF_NOARP))
1044				arp_ifinit(ifp, ifa);
1045#endif
1046		} else
1047			error = ether_ioctl(ifp, command, data);
1048		break;
1049	case SIOCSIFMTU:
1050	    {
1051		int max_frame_size;
1052
1053		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1054
1055		IGB_CORE_LOCK(adapter);
1056		max_frame_size = 9234;
1057		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1058		    ETHER_CRC_LEN) {
1059			IGB_CORE_UNLOCK(adapter);
1060			error = EINVAL;
1061			break;
1062		}
1063
1064		ifp->if_mtu = ifr->ifr_mtu;
1065		adapter->max_frame_size =
1066		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1067		igb_init_locked(adapter);
1068		IGB_CORE_UNLOCK(adapter);
1069		break;
1070	    }
1071	case SIOCSIFFLAGS:
1072		IOCTL_DEBUGOUT("ioctl rcv'd:\
1073		    SIOCSIFFLAGS (Set Interface Flags)");
1074		IGB_CORE_LOCK(adapter);
1075		if (ifp->if_flags & IFF_UP) {
1076			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1077				if ((ifp->if_flags ^ adapter->if_flags) &
1078				    (IFF_PROMISC | IFF_ALLMULTI)) {
1079					igb_disable_promisc(adapter);
1080					igb_set_promisc(adapter);
1081				}
1082			} else
1083				igb_init_locked(adapter);
1084		} else
1085			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1086				igb_stop(adapter);
1087		adapter->if_flags = ifp->if_flags;
1088		IGB_CORE_UNLOCK(adapter);
1089		break;
1090	case SIOCADDMULTI:
1091	case SIOCDELMULTI:
1092		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1093		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1094			IGB_CORE_LOCK(adapter);
1095			igb_disable_intr(adapter);
1096			igb_set_multi(adapter);
1097#ifdef DEVICE_POLLING
1098			if (!(ifp->if_capenable & IFCAP_POLLING))
1099#endif
1100				igb_enable_intr(adapter);
1101			IGB_CORE_UNLOCK(adapter);
1102		}
1103		break;
1104	case SIOCSIFMEDIA:
1105		/* Check SOL/IDER usage */
1106		IGB_CORE_LOCK(adapter);
1107		if (e1000_check_reset_block(&adapter->hw)) {
1108			IGB_CORE_UNLOCK(adapter);
1109			device_printf(adapter->dev, "Media change is"
1110			    " blocked due to SOL/IDER session.\n");
1111			break;
1112		}
1113		IGB_CORE_UNLOCK(adapter);
1114	case SIOCGIFMEDIA:
1115		IOCTL_DEBUGOUT("ioctl rcv'd: \
1116		    SIOCxIFMEDIA (Get/Set Interface Media)");
1117		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1118		break;
1119	case SIOCSIFCAP:
1120	    {
1121		int mask, reinit;
1122
1123		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1124		reinit = 0;
1125		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1126#ifdef DEVICE_POLLING
1127		if (mask & IFCAP_POLLING) {
1128			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1129				error = ether_poll_register(igb_poll, ifp);
1130				if (error)
1131					return (error);
1132				IGB_CORE_LOCK(adapter);
1133				igb_disable_intr(adapter);
1134				ifp->if_capenable |= IFCAP_POLLING;
1135				IGB_CORE_UNLOCK(adapter);
1136			} else {
1137				error = ether_poll_deregister(ifp);
1138				/* Enable interrupt even in error case */
1139				IGB_CORE_LOCK(adapter);
1140				igb_enable_intr(adapter);
1141				ifp->if_capenable &= ~IFCAP_POLLING;
1142				IGB_CORE_UNLOCK(adapter);
1143			}
1144		}
1145#endif
1146		if (mask & IFCAP_HWCSUM) {
1147			ifp->if_capenable ^= IFCAP_HWCSUM;
1148			reinit = 1;
1149		}
1150		if (mask & IFCAP_TSO4) {
1151			ifp->if_capenable ^= IFCAP_TSO4;
1152			reinit = 1;
1153		}
1154		if (mask & IFCAP_VLAN_HWTAGGING) {
1155			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1156			reinit = 1;
1157		}
1158		if (mask & IFCAP_VLAN_HWFILTER) {
1159			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1160			reinit = 1;
1161		}
1162		if (mask & IFCAP_VLAN_HWTSO) {
1163			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1164			reinit = 1;
1165		}
1166		if (mask & IFCAP_LRO) {
1167			ifp->if_capenable ^= IFCAP_LRO;
1168			reinit = 1;
1169		}
1170		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1171			igb_init(adapter);
1172		VLAN_CAPABILITIES(ifp);
1173		break;
1174	    }
1175
1176	default:
1177		error = ether_ioctl(ifp, command, data);
1178		break;
1179	}
1180
1181	return (error);
1182}
1183
1184
1185/*********************************************************************
1186 *  Init entry point
1187 *
1188 *  This routine is used in two ways. It is used by the stack as
1189 *  init entry point in network interface structure. It is also used
1190 *  by the driver as a hw/sw initialization routine to get to a
1191 *  consistent state.
1192 *
1193 *  return 0 on success, positive on failure
1194 **********************************************************************/
1195
1196static void
1197igb_init_locked(struct adapter *adapter)
1198{
1199	struct ifnet	*ifp = adapter->ifp;
1200	device_t	dev = adapter->dev;
1201
1202	INIT_DEBUGOUT("igb_init: begin");
1203
1204	IGB_CORE_LOCK_ASSERT(adapter);
1205
1206	igb_disable_intr(adapter);
1207	callout_stop(&adapter->timer);
1208
1209	/* Get the latest mac address, User can use a LAA */
1210        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1211              ETHER_ADDR_LEN);
1212
1213	/* Put the address into the Receive Address Array */
1214	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1215
1216	igb_reset(adapter);
1217	igb_update_link_status(adapter);
1218
1219	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1220
1221	/* Set hardware offload abilities */
1222	ifp->if_hwassist = 0;
1223	if (ifp->if_capenable & IFCAP_TXCSUM) {
1224		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1225#if __FreeBSD_version >= 800000
1226		if (adapter->hw.mac.type == e1000_82576)
1227			ifp->if_hwassist |= CSUM_SCTP;
1228#endif
1229	}
1230
1231	if (ifp->if_capenable & IFCAP_TSO4)
1232		ifp->if_hwassist |= CSUM_TSO;
1233
1234	/* Configure for OS presence */
1235	igb_init_manageability(adapter);
1236
1237	/* Prepare transmit descriptors and buffers */
1238	igb_setup_transmit_structures(adapter);
1239	igb_initialize_transmit_units(adapter);
1240
1241	/* Setup Multicast table */
1242	igb_set_multi(adapter);
1243
1244	/*
1245	** Figure out the desired mbuf pool
1246	** for doing jumbo/packetsplit
1247	*/
1248	if (adapter->max_frame_size <= 2048)
1249		adapter->rx_mbuf_sz = MCLBYTES;
1250	else if (adapter->max_frame_size <= 4096)
1251		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1252	else
1253		adapter->rx_mbuf_sz = MJUM9BYTES;
1254
1255	/* Prepare receive descriptors and buffers */
1256	if (igb_setup_receive_structures(adapter)) {
1257		device_printf(dev, "Could not setup receive structures\n");
1258		return;
1259	}
1260	igb_initialize_receive_units(adapter);
1261
1262        /* Enable VLAN support */
1263	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1264		igb_setup_vlan_hw_support(adapter);
1265
1266	/* Don't lose promiscuous settings */
1267	igb_set_promisc(adapter);
1268
1269	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1270	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1271
1272	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1273	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1274
1275	if (adapter->msix > 1) /* Set up queue routing */
1276		igb_configure_queues(adapter);
1277
1278	/* this clears any pending interrupts */
1279	E1000_READ_REG(&adapter->hw, E1000_ICR);
1280#ifdef DEVICE_POLLING
1281	/*
1282	 * Only enable interrupts if we are not polling, make sure
1283	 * they are off otherwise.
1284	 */
1285	if (ifp->if_capenable & IFCAP_POLLING)
1286		igb_disable_intr(adapter);
1287	else
1288#endif /* DEVICE_POLLING */
1289	{
1290		igb_enable_intr(adapter);
1291		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1292	}
1293
1294	/* Set Energy Efficient Ethernet */
1295
1296	e1000_set_eee_i350(&adapter->hw);
1297}
1298
1299static void
1300igb_init(void *arg)
1301{
1302	struct adapter *adapter = arg;
1303
1304	IGB_CORE_LOCK(adapter);
1305	igb_init_locked(adapter);
1306	IGB_CORE_UNLOCK(adapter);
1307}
1308
1309
1310static void
1311igb_handle_que(void *context, int pending)
1312{
1313	struct igb_queue *que = context;
1314	struct adapter *adapter = que->adapter;
1315	struct tx_ring *txr = que->txr;
1316	struct ifnet	*ifp = adapter->ifp;
1317
1318	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1319		bool	more;
1320
1321		more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1322
1323		IGB_TX_LOCK(txr);
1324		if (igb_txeof(txr))
1325			more = TRUE;
1326#if __FreeBSD_version >= 800000
1327		/* Process the stack queue only if not depleted */
1328		if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1329		    !drbr_empty(ifp, txr->br))
1330			igb_mq_start_locked(ifp, txr, NULL);
1331#else
1332		igb_start_locked(txr, ifp);
1333#endif
1334		IGB_TX_UNLOCK(txr);
1335		/* Do we need another? */
1336		if (more || (ifp->if_drv_flags & IFF_DRV_OACTIVE)) {
1337			taskqueue_enqueue(que->tq, &que->que_task);
1338			return;
1339		}
1340	}
1341
1342#ifdef DEVICE_POLLING
1343	if (ifp->if_capenable & IFCAP_POLLING)
1344		return;
1345#endif
1346	/* Reenable this interrupt */
1347	if (que->eims)
1348		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1349	else
1350		igb_enable_intr(adapter);
1351}
1352
1353/* Deal with link in a sleepable context */
1354static void
1355igb_handle_link(void *context, int pending)
1356{
1357	struct adapter *adapter = context;
1358
1359	adapter->hw.mac.get_link_status = 1;
1360	igb_update_link_status(adapter);
1361}
1362
1363/*********************************************************************
1364 *
1365 *  MSI/Legacy Deferred
1366 *  Interrupt Service routine
1367 *
1368 *********************************************************************/
1369static int
1370igb_irq_fast(void *arg)
1371{
1372	struct adapter		*adapter = arg;
1373	struct igb_queue	*que = adapter->queues;
1374	u32			reg_icr;
1375
1376
1377	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1378
1379	/* Hot eject?  */
1380	if (reg_icr == 0xffffffff)
1381		return FILTER_STRAY;
1382
1383	/* Definitely not our interrupt.  */
1384	if (reg_icr == 0x0)
1385		return FILTER_STRAY;
1386
1387	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1388		return FILTER_STRAY;
1389
1390	/*
1391	 * Mask interrupts until the taskqueue is finished running.  This is
1392	 * cheap, just assume that it is needed.  This also works around the
1393	 * MSI message reordering errata on certain systems.
1394	 */
1395	igb_disable_intr(adapter);
1396	taskqueue_enqueue(que->tq, &que->que_task);
1397
1398	/* Link status change */
1399	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1400		taskqueue_enqueue(que->tq, &adapter->link_task);
1401
1402	if (reg_icr & E1000_ICR_RXO)
1403		adapter->rx_overruns++;
1404	return FILTER_HANDLED;
1405}
1406
1407#ifdef DEVICE_POLLING
1408/*********************************************************************
1409 *
1410 *  Legacy polling routine : if using this code you MUST be sure that
1411 *  multiqueue is not defined, ie, set igb_num_queues to 1.
1412 *
1413 *********************************************************************/
1414#if __FreeBSD_version >= 800000
1415#define POLL_RETURN_COUNT(a) (a)
1416static int
1417#else
1418#define POLL_RETURN_COUNT(a)
1419static void
1420#endif
1421igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1422{
1423	struct adapter		*adapter = ifp->if_softc;
1424	struct igb_queue	*que = adapter->queues;
1425	struct tx_ring		*txr = adapter->tx_rings;
1426	u32			reg_icr, rx_done = 0;
1427	u32			loop = IGB_MAX_LOOP;
1428	bool			more;
1429
1430	IGB_CORE_LOCK(adapter);
1431	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1432		IGB_CORE_UNLOCK(adapter);
1433		return POLL_RETURN_COUNT(rx_done);
1434	}
1435
1436	if (cmd == POLL_AND_CHECK_STATUS) {
1437		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1438		/* Link status change */
1439		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1440			igb_handle_link(adapter, 0);
1441
1442		if (reg_icr & E1000_ICR_RXO)
1443			adapter->rx_overruns++;
1444	}
1445	IGB_CORE_UNLOCK(adapter);
1446
1447	igb_rxeof(que, count, &rx_done);
1448
1449	IGB_TX_LOCK(txr);
1450	do {
1451		more = igb_txeof(txr);
1452	} while (loop-- && more);
1453#if __FreeBSD_version >= 800000
1454	if (!drbr_empty(ifp, txr->br))
1455		igb_mq_start_locked(ifp, txr, NULL);
1456#else
1457	igb_start_locked(txr, ifp);
1458#endif
1459	IGB_TX_UNLOCK(txr);
1460	return POLL_RETURN_COUNT(rx_done);
1461}
1462#endif /* DEVICE_POLLING */
1463
1464/*********************************************************************
1465 *
1466 *  MSIX Que Interrupt Service routine
1467 *
1468 **********************************************************************/
1469static void
1470igb_msix_que(void *arg)
1471{
1472	struct igb_queue *que = arg;
1473	struct adapter *adapter = que->adapter;
1474	struct tx_ring *txr = que->txr;
1475	struct rx_ring *rxr = que->rxr;
1476	u32		newitr = 0;
1477	bool		more_tx, more_rx;
1478
1479	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1480	++que->irqs;
1481
1482	IGB_TX_LOCK(txr);
1483	more_tx = igb_txeof(txr);
1484	IGB_TX_UNLOCK(txr);
1485
1486	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1487
1488	if (adapter->enable_aim == FALSE)
1489		goto no_calc;
1490	/*
1491	** Do Adaptive Interrupt Moderation:
1492        **  - Write out last calculated setting
1493	**  - Calculate based on average size over
1494	**    the last interval.
1495	*/
1496        if (que->eitr_setting)
1497                E1000_WRITE_REG(&adapter->hw,
1498                    E1000_EITR(que->msix), que->eitr_setting);
1499
1500        que->eitr_setting = 0;
1501
1502        /* Idle, do nothing */
1503        if ((txr->bytes == 0) && (rxr->bytes == 0))
1504                goto no_calc;
1505
1506        /* Used half Default if sub-gig */
1507        if (adapter->link_speed != 1000)
1508                newitr = IGB_DEFAULT_ITR / 2;
1509        else {
1510		if ((txr->bytes) && (txr->packets))
1511                	newitr = txr->bytes/txr->packets;
1512		if ((rxr->bytes) && (rxr->packets))
1513			newitr = max(newitr,
1514			    (rxr->bytes / rxr->packets));
1515                newitr += 24; /* account for hardware frame, crc */
1516		/* set an upper boundary */
1517		newitr = min(newitr, 3000);
1518		/* Be nice to the mid range */
1519                if ((newitr > 300) && (newitr < 1200))
1520                        newitr = (newitr / 3);
1521                else
1522                        newitr = (newitr / 2);
1523        }
1524        newitr &= 0x7FFC;  /* Mask invalid bits */
1525        if (adapter->hw.mac.type == e1000_82575)
1526                newitr |= newitr << 16;
1527        else
1528                newitr |= E1000_EITR_CNT_IGNR;
1529
1530        /* save for next interrupt */
1531        que->eitr_setting = newitr;
1532
1533        /* Reset state */
1534        txr->bytes = 0;
1535        txr->packets = 0;
1536        rxr->bytes = 0;
1537        rxr->packets = 0;
1538
1539no_calc:
1540	/* Schedule a clean task if needed*/
1541	if (more_tx || more_rx)
1542		taskqueue_enqueue(que->tq, &que->que_task);
1543	else
1544		/* Reenable this interrupt */
1545		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1546	return;
1547}
1548
1549
1550/*********************************************************************
1551 *
1552 *  MSIX Link Interrupt Service routine
1553 *
1554 **********************************************************************/
1555
1556static void
1557igb_msix_link(void *arg)
1558{
1559	struct adapter	*adapter = arg;
1560	u32       	icr;
1561
1562	++adapter->link_irq;
1563	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1564	if (!(icr & E1000_ICR_LSC))
1565		goto spurious;
1566	igb_handle_link(adapter, 0);
1567
1568spurious:
1569	/* Rearm */
1570	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1571	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1572	return;
1573}
1574
1575
1576/*********************************************************************
1577 *
1578 *  Media Ioctl callback
1579 *
1580 *  This routine is called whenever the user queries the status of
1581 *  the interface using ifconfig.
1582 *
1583 **********************************************************************/
1584static void
1585igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1586{
1587	struct adapter *adapter = ifp->if_softc;
1588	u_char fiber_type = IFM_1000_SX;
1589
1590	INIT_DEBUGOUT("igb_media_status: begin");
1591
1592	IGB_CORE_LOCK(adapter);
1593	igb_update_link_status(adapter);
1594
1595	ifmr->ifm_status = IFM_AVALID;
1596	ifmr->ifm_active = IFM_ETHER;
1597
1598	if (!adapter->link_active) {
1599		IGB_CORE_UNLOCK(adapter);
1600		return;
1601	}
1602
1603	ifmr->ifm_status |= IFM_ACTIVE;
1604
1605	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1606	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1607		ifmr->ifm_active |= fiber_type | IFM_FDX;
1608	else {
1609		switch (adapter->link_speed) {
1610		case 10:
1611			ifmr->ifm_active |= IFM_10_T;
1612			break;
1613		case 100:
1614			ifmr->ifm_active |= IFM_100_TX;
1615			break;
1616		case 1000:
1617			ifmr->ifm_active |= IFM_1000_T;
1618			break;
1619		}
1620		if (adapter->link_duplex == FULL_DUPLEX)
1621			ifmr->ifm_active |= IFM_FDX;
1622		else
1623			ifmr->ifm_active |= IFM_HDX;
1624	}
1625	IGB_CORE_UNLOCK(adapter);
1626}
1627
1628/*********************************************************************
1629 *
1630 *  Media Ioctl callback
1631 *
1632 *  This routine is called when the user changes speed/duplex using
1633 *  media/mediopt option with ifconfig.
1634 *
1635 **********************************************************************/
1636static int
1637igb_media_change(struct ifnet *ifp)
1638{
1639	struct adapter *adapter = ifp->if_softc;
1640	struct ifmedia  *ifm = &adapter->media;
1641
1642	INIT_DEBUGOUT("igb_media_change: begin");
1643
1644	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1645		return (EINVAL);
1646
1647	IGB_CORE_LOCK(adapter);
1648	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1649	case IFM_AUTO:
1650		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1651		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1652		break;
1653	case IFM_1000_LX:
1654	case IFM_1000_SX:
1655	case IFM_1000_T:
1656		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1657		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1658		break;
1659	case IFM_100_TX:
1660		adapter->hw.mac.autoneg = FALSE;
1661		adapter->hw.phy.autoneg_advertised = 0;
1662		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1663			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1664		else
1665			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1666		break;
1667	case IFM_10_T:
1668		adapter->hw.mac.autoneg = FALSE;
1669		adapter->hw.phy.autoneg_advertised = 0;
1670		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1671			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1672		else
1673			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1674		break;
1675	default:
1676		device_printf(adapter->dev, "Unsupported media type\n");
1677	}
1678
1679	igb_init_locked(adapter);
1680	IGB_CORE_UNLOCK(adapter);
1681
1682	return (0);
1683}
1684
1685
1686/*********************************************************************
1687 *
1688 *  This routine maps the mbufs to Advanced TX descriptors.
1689 *
1690 **********************************************************************/
1691static int
1692igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1693{
1694	struct adapter		*adapter = txr->adapter;
1695	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1696	bus_dmamap_t		map;
1697	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1698	union e1000_adv_tx_desc	*txd = NULL;
1699	struct mbuf		*m_head = *m_headp;
1700	struct ether_vlan_header *eh = NULL;
1701	struct ip		*ip = NULL;
1702	struct tcphdr		*th = NULL;
1703	u32			hdrlen, cmd_type_len, olinfo_status = 0;
1704	int			ehdrlen, poff;
1705	int			nsegs, i, first, last = 0;
1706	int			error, do_tso, remap = 1;
1707
1708	/* Set basic descriptor constants */
1709	cmd_type_len = E1000_ADVTXD_DTYP_DATA;
1710	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1711	if (m_head->m_flags & M_VLANTAG)
1712		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1713
1714retry:
1715	m_head = *m_headp;
1716	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1717	hdrlen = ehdrlen = poff = 0;
1718
1719	/*
1720	 * Intel recommends entire IP/TCP header length reside in a single
1721	 * buffer. If multiple descriptors are used to describe the IP and
1722	 * TCP header, each descriptor should describe one or more
1723	 * complete headers; descriptors referencing only parts of headers
1724	 * are not supported. If all layer headers are not coalesced into
1725	 * a single buffer, each buffer should not cross a 4KB boundary,
1726	 * or be larger than the maximum read request size.
1727	 * Controller also requires modifing IP/TCP header to make TSO work
1728	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1729	 * IP/TCP header into a single buffer to meet the requirement of
1730	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1731	 * which also has similiar restrictions.
1732	 */
1733	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1734		if (do_tso || (m_head->m_next != NULL &&
1735		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1736			if (M_WRITABLE(*m_headp) == 0) {
1737				m_head = m_dup(*m_headp, M_DONTWAIT);
1738				m_freem(*m_headp);
1739				if (m_head == NULL) {
1740					*m_headp = NULL;
1741					return (ENOBUFS);
1742				}
1743				*m_headp = m_head;
1744			}
1745		}
1746		/*
1747		 * Assume IPv4, we don't have TSO/checksum offload support
1748		 * for IPv6 yet.
1749		 */
1750		ehdrlen = sizeof(struct ether_header);
1751		m_head = m_pullup(m_head, ehdrlen);
1752		if (m_head == NULL) {
1753			*m_headp = NULL;
1754			return (ENOBUFS);
1755		}
1756		eh = mtod(m_head, struct ether_vlan_header *);
1757		if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1758			ehdrlen = sizeof(struct ether_vlan_header);
1759			m_head = m_pullup(m_head, ehdrlen);
1760			if (m_head == NULL) {
1761				*m_headp = NULL;
1762				return (ENOBUFS);
1763			}
1764		}
1765		m_head = m_pullup(m_head, ehdrlen + sizeof(struct ip));
1766		if (m_head == NULL) {
1767			*m_headp = NULL;
1768			return (ENOBUFS);
1769		}
1770		ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1771		poff = ehdrlen + (ip->ip_hl << 2);
1772		if (do_tso) {
1773			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1774			if (m_head == NULL) {
1775				*m_headp = NULL;
1776				return (ENOBUFS);
1777			}
1778			/*
1779			 * The pseudo TCP checksum does not include TCP payload
1780			 * length so driver should recompute the checksum here
1781			 * what hardware expect to see. This is adherence of
1782			 * Microsoft's Large Send specification.
1783			 */
1784			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1785			th->th_sum = in_pseudo(ip->ip_src.s_addr,
1786			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1787			/* Keep track of the full header length */
1788			hdrlen = poff + (th->th_off << 2);
1789		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1790			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1791			if (m_head == NULL) {
1792				*m_headp = NULL;
1793				return (ENOBUFS);
1794			}
1795			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1796			m_head = m_pullup(m_head, poff + (th->th_off << 2));
1797			if (m_head == NULL) {
1798				*m_headp = NULL;
1799				return (ENOBUFS);
1800			}
1801			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1802			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1803		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1804			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1805			if (m_head == NULL) {
1806				*m_headp = NULL;
1807				return (ENOBUFS);
1808			}
1809			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1810		}
1811		*m_headp = m_head;
1812	}
1813
1814	/*
1815	 * Map the packet for DMA
1816	 *
1817	 * Capture the first descriptor index,
1818	 * this descriptor will have the index
1819	 * of the EOP which is the only one that
1820	 * now gets a DONE bit writeback.
1821	 */
1822	first = txr->next_avail_desc;
1823	tx_buffer = &txr->tx_buffers[first];
1824	tx_buffer_mapped = tx_buffer;
1825	map = tx_buffer->map;
1826
1827	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1828	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1829
1830	/*
1831	 * There are two types of errors we can (try) to handle:
1832	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1833	 *   out of segments.  Defragment the mbuf chain and try again.
1834	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1835	 *   at this point in time.  Defer sending and try again later.
1836	 * All other errors, in particular EINVAL, are fatal and prevent the
1837	 * mbuf chain from ever going through.  Drop it and report error.
1838	 */
1839	if (error == EFBIG && remap) {
1840		struct mbuf *m;
1841
1842		m = m_defrag(*m_headp, M_DONTWAIT);
1843		if (m == NULL) {
1844			adapter->mbuf_defrag_failed++;
1845			m_freem(*m_headp);
1846			*m_headp = NULL;
1847			return (ENOBUFS);
1848		}
1849		*m_headp = m;
1850
1851		/* Try it again, but only once */
1852		remap = 0;
1853		goto retry;
1854	} else if (error == ENOMEM) {
1855		adapter->no_tx_dma_setup++;
1856		return (error);
1857	} else if (error != 0) {
1858		adapter->no_tx_dma_setup++;
1859		m_freem(*m_headp);
1860		*m_headp = NULL;
1861		return (error);
1862	}
1863
1864	/*
1865	** Make sure we don't overrun the ring,
1866	** we need nsegs descriptors and one for
1867	** the context descriptor used for the
1868	** offloads.
1869	*/
1870        if ((nsegs + 1) > (txr->tx_avail - 2)) {
1871                txr->no_desc_avail++;
1872		bus_dmamap_unload(txr->txtag, map);
1873		return (ENOBUFS);
1874        }
1875	m_head = *m_headp;
1876
1877	/* Do hardware assists:
1878         * Set up the context descriptor, used
1879         * when any hardware offload is done.
1880         * This includes CSUM, VLAN, and TSO.
1881         * It will use the first descriptor.
1882         */
1883
1884	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1885		if (igb_tso_setup(txr, m_head, ehdrlen, ip, th)) {
1886			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1887			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1888			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1889		} else
1890			return (ENXIO);
1891	} else if (igb_tx_ctx_setup(txr, m_head))
1892			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1893
1894	/* Calculate payload length */
1895	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1896	    << E1000_ADVTXD_PAYLEN_SHIFT);
1897
1898	/* 82575 needs the queue index added */
1899	if (adapter->hw.mac.type == e1000_82575)
1900		olinfo_status |= txr->me << 4;
1901
1902	/* Set up our transmit descriptors */
1903	i = txr->next_avail_desc;
1904	for (int j = 0; j < nsegs; j++) {
1905		bus_size_t seg_len;
1906		bus_addr_t seg_addr;
1907
1908		tx_buffer = &txr->tx_buffers[i];
1909		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1910		seg_addr = segs[j].ds_addr;
1911		seg_len  = segs[j].ds_len;
1912
1913		txd->read.buffer_addr = htole64(seg_addr);
1914		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1915		txd->read.olinfo_status = htole32(olinfo_status);
1916		last = i;
1917		if (++i == adapter->num_tx_desc)
1918			i = 0;
1919		tx_buffer->m_head = NULL;
1920		tx_buffer->next_eop = -1;
1921	}
1922
1923	txr->next_avail_desc = i;
1924	txr->tx_avail -= nsegs;
1925        tx_buffer->m_head = m_head;
1926
1927	/*
1928	** Here we swap the map so the last descriptor,
1929	** which gets the completion interrupt has the
1930	** real map, and the first descriptor gets the
1931	** unused map from this descriptor.
1932	*/
1933	tx_buffer_mapped->map = tx_buffer->map;
1934	tx_buffer->map = map;
1935        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1936
1937        /*
1938         * Last Descriptor of Packet
1939	 * needs End Of Packet (EOP)
1940	 * and Report Status (RS)
1941         */
1942        txd->read.cmd_type_len |=
1943	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1944	/*
1945	 * Keep track in the first buffer which
1946	 * descriptor will be written back
1947	 */
1948	tx_buffer = &txr->tx_buffers[first];
1949	tx_buffer->next_eop = last;
1950	/* Update the watchdog time early and often */
1951	txr->watchdog_time = ticks;
1952
1953	/*
1954	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1955	 * that this frame is available to transmit.
1956	 */
1957	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1958	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1959	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1960	++txr->tx_packets;
1961
1962	return (0);
1963}
1964static void
1965igb_set_promisc(struct adapter *adapter)
1966{
1967	struct ifnet	*ifp = adapter->ifp;
1968	struct e1000_hw *hw = &adapter->hw;
1969	u32		reg;
1970
1971	if (adapter->vf_ifp) {
1972		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1973		return;
1974	}
1975
1976	reg = E1000_READ_REG(hw, E1000_RCTL);
1977	if (ifp->if_flags & IFF_PROMISC) {
1978		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1979		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1980	} else if (ifp->if_flags & IFF_ALLMULTI) {
1981		reg |= E1000_RCTL_MPE;
1982		reg &= ~E1000_RCTL_UPE;
1983		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1984	}
1985}
1986
1987static void
1988igb_disable_promisc(struct adapter *adapter)
1989{
1990	struct e1000_hw *hw = &adapter->hw;
1991	u32		reg;
1992
1993	if (adapter->vf_ifp) {
1994		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1995		return;
1996	}
1997	reg = E1000_READ_REG(hw, E1000_RCTL);
1998	reg &=  (~E1000_RCTL_UPE);
1999	reg &=  (~E1000_RCTL_MPE);
2000	E1000_WRITE_REG(hw, E1000_RCTL, reg);
2001}
2002
2003
2004/*********************************************************************
2005 *  Multicast Update
2006 *
2007 *  This routine is called whenever multicast address list is updated.
2008 *
2009 **********************************************************************/
2010
2011static void
2012igb_set_multi(struct adapter *adapter)
2013{
2014	struct ifnet	*ifp = adapter->ifp;
2015	struct ifmultiaddr *ifma;
2016	u32 reg_rctl = 0;
2017	u8  *mta;
2018
2019	int mcnt = 0;
2020
2021	IOCTL_DEBUGOUT("igb_set_multi: begin");
2022
2023	mta = adapter->mta;
2024	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2025	    MAX_NUM_MULTICAST_ADDRESSES);
2026
2027#if __FreeBSD_version < 800000
2028	IF_ADDR_LOCK(ifp);
2029#else
2030	if_maddr_rlock(ifp);
2031#endif
2032	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2033		if (ifma->ifma_addr->sa_family != AF_LINK)
2034			continue;
2035
2036		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2037			break;
2038
2039		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2040		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2041		mcnt++;
2042	}
2043#if __FreeBSD_version < 800000
2044	IF_ADDR_UNLOCK(ifp);
2045#else
2046	if_maddr_runlock(ifp);
2047#endif
2048
2049	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2050		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2051		reg_rctl |= E1000_RCTL_MPE;
2052		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2053	} else
2054		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2055}
2056
2057
2058/*********************************************************************
2059 *  Timer routine:
2060 *  	This routine checks for link status,
2061 *	updates statistics, and does the watchdog.
2062 *
2063 **********************************************************************/
2064
2065static void
2066igb_local_timer(void *arg)
2067{
2068	struct adapter		*adapter = arg;
2069	device_t		dev = adapter->dev;
2070	struct ifnet		*ifp = adapter->ifp;
2071	struct tx_ring		*txr = adapter->tx_rings;
2072	struct igb_queue	*que = adapter->queues;
2073	int			hung = 0, busy = 0;
2074
2075
2076	IGB_CORE_LOCK_ASSERT(adapter);
2077
2078	igb_update_link_status(adapter);
2079	igb_update_stats_counters(adapter);
2080
2081        /*
2082        ** Check the TX queues status
2083	**	- central locked handling of OACTIVE
2084	**	- watchdog only if all queues show hung
2085        */
2086	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2087		if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2088		    (adapter->pause_frames == 0))
2089			++hung;
2090		if (txr->queue_status & IGB_QUEUE_DEPLETED)
2091			++busy;
2092		if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2093			taskqueue_enqueue(que->tq, &que->que_task);
2094	}
2095	if (hung == adapter->num_queues)
2096		goto timeout;
2097	if (busy == adapter->num_queues)
2098		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2099	else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2100	    (busy < adapter->num_queues))
2101		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2102
2103	adapter->pause_frames = 0;
2104	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2105#ifndef DEVICE_POLLING
2106	/* Schedule all queue interrupts - deadlock protection */
2107	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2108#endif
2109	return;
2110
2111timeout:
2112	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2113	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2114            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2115            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2116	device_printf(dev,"TX(%d) desc avail = %d,"
2117            "Next TX to Clean = %d\n",
2118            txr->me, txr->tx_avail, txr->next_to_clean);
2119	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2120	adapter->watchdog_events++;
2121	igb_init_locked(adapter);
2122}
2123
2124static void
2125igb_update_link_status(struct adapter *adapter)
2126{
2127	struct e1000_hw *hw = &adapter->hw;
2128	struct ifnet *ifp = adapter->ifp;
2129	device_t dev = adapter->dev;
2130	struct tx_ring *txr = adapter->tx_rings;
2131	u32 link_check, thstat, ctrl;
2132
2133	link_check = thstat = ctrl = 0;
2134
2135	/* Get the cached link value or read for real */
2136        switch (hw->phy.media_type) {
2137        case e1000_media_type_copper:
2138                if (hw->mac.get_link_status) {
2139			/* Do the work to read phy */
2140                        e1000_check_for_link(hw);
2141                        link_check = !hw->mac.get_link_status;
2142                } else
2143                        link_check = TRUE;
2144                break;
2145        case e1000_media_type_fiber:
2146                e1000_check_for_link(hw);
2147                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2148                                 E1000_STATUS_LU);
2149                break;
2150        case e1000_media_type_internal_serdes:
2151                e1000_check_for_link(hw);
2152                link_check = adapter->hw.mac.serdes_has_link;
2153                break;
2154	/* VF device is type_unknown */
2155        case e1000_media_type_unknown:
2156                e1000_check_for_link(hw);
2157		link_check = !hw->mac.get_link_status;
2158		/* Fall thru */
2159        default:
2160                break;
2161        }
2162
2163	/* Check for thermal downshift or shutdown */
2164	if (hw->mac.type == e1000_i350) {
2165		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2166		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2167	}
2168
2169	/* Now we check if a transition has happened */
2170	if (link_check && (adapter->link_active == 0)) {
2171		e1000_get_speed_and_duplex(&adapter->hw,
2172		    &adapter->link_speed, &adapter->link_duplex);
2173		if (bootverbose)
2174			device_printf(dev, "Link is up %d Mbps %s\n",
2175			    adapter->link_speed,
2176			    ((adapter->link_duplex == FULL_DUPLEX) ?
2177			    "Full Duplex" : "Half Duplex"));
2178		adapter->link_active = 1;
2179		ifp->if_baudrate = adapter->link_speed * 1000000;
2180		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2181		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2182			device_printf(dev, "Link: thermal downshift\n");
2183		/* This can sleep */
2184		if_link_state_change(ifp, LINK_STATE_UP);
2185	} else if (!link_check && (adapter->link_active == 1)) {
2186		ifp->if_baudrate = adapter->link_speed = 0;
2187		adapter->link_duplex = 0;
2188		if (bootverbose)
2189			device_printf(dev, "Link is Down\n");
2190		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2191		    (thstat & E1000_THSTAT_PWR_DOWN))
2192			device_printf(dev, "Link: thermal shutdown\n");
2193		adapter->link_active = 0;
2194		/* This can sleep */
2195		if_link_state_change(ifp, LINK_STATE_DOWN);
2196		/* Reset queue state */
2197		for (int i = 0; i < adapter->num_queues; i++, txr++)
2198			txr->queue_status = IGB_QUEUE_IDLE;
2199	}
2200}
2201
2202/*********************************************************************
2203 *
2204 *  This routine disables all traffic on the adapter by issuing a
2205 *  global reset on the MAC and deallocates TX/RX buffers.
2206 *
2207 **********************************************************************/
2208
2209static void
2210igb_stop(void *arg)
2211{
2212	struct adapter	*adapter = arg;
2213	struct ifnet	*ifp = adapter->ifp;
2214	struct tx_ring *txr = adapter->tx_rings;
2215
2216	IGB_CORE_LOCK_ASSERT(adapter);
2217
2218	INIT_DEBUGOUT("igb_stop: begin");
2219
2220	igb_disable_intr(adapter);
2221
2222	callout_stop(&adapter->timer);
2223
2224	/* Tell the stack that the interface is no longer active */
2225	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2226	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2227
2228	/* Disarm watchdog timer. */
2229	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2230		IGB_TX_LOCK(txr);
2231		txr->queue_status = IGB_QUEUE_IDLE;
2232		IGB_TX_UNLOCK(txr);
2233	}
2234
2235	e1000_reset_hw(&adapter->hw);
2236	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2237
2238	e1000_led_off(&adapter->hw);
2239	e1000_cleanup_led(&adapter->hw);
2240}
2241
2242
2243/*********************************************************************
2244 *
2245 *  Determine hardware revision.
2246 *
2247 **********************************************************************/
2248static void
2249igb_identify_hardware(struct adapter *adapter)
2250{
2251	device_t dev = adapter->dev;
2252
2253	/* Make sure our PCI config space has the necessary stuff set */
2254	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2255	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2256	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2257		INIT_DEBUGOUT("Memory Access and/or Bus Master "
2258		    "bits were not set!\n");
2259		adapter->hw.bus.pci_cmd_word |=
2260		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2261		pci_write_config(dev, PCIR_COMMAND,
2262		    adapter->hw.bus.pci_cmd_word, 2);
2263	}
2264
2265	/* Save off the information about this board */
2266	adapter->hw.vendor_id = pci_get_vendor(dev);
2267	adapter->hw.device_id = pci_get_device(dev);
2268	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2269	adapter->hw.subsystem_vendor_id =
2270	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2271	adapter->hw.subsystem_device_id =
2272	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2273
2274	/* Set MAC type early for PCI setup */
2275	e1000_set_mac_type(&adapter->hw);
2276
2277	/* Are we a VF device? */
2278	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2279	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2280		adapter->vf_ifp = 1;
2281	else
2282		adapter->vf_ifp = 0;
2283}
2284
2285static int
2286igb_allocate_pci_resources(struct adapter *adapter)
2287{
2288	device_t	dev = adapter->dev;
2289	int		rid;
2290
2291	rid = PCIR_BAR(0);
2292	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2293	    &rid, RF_ACTIVE);
2294	if (adapter->pci_mem == NULL) {
2295		device_printf(dev, "Unable to allocate bus resource: memory\n");
2296		return (ENXIO);
2297	}
2298	adapter->osdep.mem_bus_space_tag =
2299	    rman_get_bustag(adapter->pci_mem);
2300	adapter->osdep.mem_bus_space_handle =
2301	    rman_get_bushandle(adapter->pci_mem);
2302	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2303
2304	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2305
2306	/* This will setup either MSI/X or MSI */
2307	adapter->msix = igb_setup_msix(adapter);
2308	adapter->hw.back = &adapter->osdep;
2309
2310	return (0);
2311}
2312
2313/*********************************************************************
2314 *
2315 *  Setup the Legacy or MSI Interrupt handler
2316 *
2317 **********************************************************************/
2318static int
2319igb_allocate_legacy(struct adapter *adapter)
2320{
2321	device_t		dev = adapter->dev;
2322	struct igb_queue	*que = adapter->queues;
2323	int			error, rid = 0;
2324
2325	/* Turn off all interrupts */
2326	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2327
2328	/* MSI RID is 1 */
2329	if (adapter->msix == 1)
2330		rid = 1;
2331
2332	/* We allocate a single interrupt resource */
2333	adapter->res = bus_alloc_resource_any(dev,
2334	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2335	if (adapter->res == NULL) {
2336		device_printf(dev, "Unable to allocate bus resource: "
2337		    "interrupt\n");
2338		return (ENXIO);
2339	}
2340
2341	/*
2342	 * Try allocating a fast interrupt and the associated deferred
2343	 * processing contexts.
2344	 */
2345	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2346	/* Make tasklet for deferred link handling */
2347	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2348	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2349	    taskqueue_thread_enqueue, &que->tq);
2350	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2351	    device_get_nameunit(adapter->dev));
2352	if ((error = bus_setup_intr(dev, adapter->res,
2353	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2354	    adapter, &adapter->tag)) != 0) {
2355		device_printf(dev, "Failed to register fast interrupt "
2356			    "handler: %d\n", error);
2357		taskqueue_free(que->tq);
2358		que->tq = NULL;
2359		return (error);
2360	}
2361
2362	return (0);
2363}
2364
2365
2366/*********************************************************************
2367 *
2368 *  Setup the MSIX Queue Interrupt handlers:
2369 *
2370 **********************************************************************/
2371static int
2372igb_allocate_msix(struct adapter *adapter)
2373{
2374	device_t		dev = adapter->dev;
2375	struct igb_queue	*que = adapter->queues;
2376	int			error, rid, vector = 0;
2377
2378	/* Be sure to start with all interrupts disabled */
2379	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2380	E1000_WRITE_FLUSH(&adapter->hw);
2381
2382	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2383		rid = vector +1;
2384		que->res = bus_alloc_resource_any(dev,
2385		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2386		if (que->res == NULL) {
2387			device_printf(dev,
2388			    "Unable to allocate bus resource: "
2389			    "MSIX Queue Interrupt\n");
2390			return (ENXIO);
2391		}
2392		error = bus_setup_intr(dev, que->res,
2393	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2394		    igb_msix_que, que, &que->tag);
2395		if (error) {
2396			que->res = NULL;
2397			device_printf(dev, "Failed to register Queue handler");
2398			return (error);
2399		}
2400#if __FreeBSD_version >= 800504
2401		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2402#endif
2403		que->msix = vector;
2404		if (adapter->hw.mac.type == e1000_82575)
2405			que->eims = E1000_EICR_TX_QUEUE0 << i;
2406		else
2407			que->eims = 1 << vector;
2408		/*
2409		** Bind the msix vector, and thus the
2410		** rings to the corresponding cpu.
2411		*/
2412		if (adapter->num_queues > 1)
2413			bus_bind_intr(dev, que->res, i);
2414		/* Make tasklet for deferred handling */
2415		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2416		que->tq = taskqueue_create_fast("igb_que", M_NOWAIT,
2417		    taskqueue_thread_enqueue, &que->tq);
2418		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2419		    device_get_nameunit(adapter->dev));
2420	}
2421
2422	/* And Link */
2423	rid = vector + 1;
2424	adapter->res = bus_alloc_resource_any(dev,
2425	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2426	if (adapter->res == NULL) {
2427		device_printf(dev,
2428		    "Unable to allocate bus resource: "
2429		    "MSIX Link Interrupt\n");
2430		return (ENXIO);
2431	}
2432	if ((error = bus_setup_intr(dev, adapter->res,
2433	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2434	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2435		device_printf(dev, "Failed to register Link handler");
2436		return (error);
2437	}
2438#if __FreeBSD_version >= 800504
2439	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2440#endif
2441	adapter->linkvec = vector;
2442
2443	return (0);
2444}
2445
2446
2447static void
2448igb_configure_queues(struct adapter *adapter)
2449{
2450	struct	e1000_hw	*hw = &adapter->hw;
2451	struct	igb_queue	*que;
2452	u32			tmp, ivar = 0, newitr = 0;
2453
2454	/* First turn on RSS capability */
2455	if (adapter->hw.mac.type != e1000_82575)
2456		E1000_WRITE_REG(hw, E1000_GPIE,
2457		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2458		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2459
2460	/* Turn on MSIX */
2461	switch (adapter->hw.mac.type) {
2462	case e1000_82580:
2463	case e1000_i350:
2464	case e1000_vfadapt:
2465	case e1000_vfadapt_i350:
2466		/* RX entries */
2467		for (int i = 0; i < adapter->num_queues; i++) {
2468			u32 index = i >> 1;
2469			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2470			que = &adapter->queues[i];
2471			if (i & 1) {
2472				ivar &= 0xFF00FFFF;
2473				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2474			} else {
2475				ivar &= 0xFFFFFF00;
2476				ivar |= que->msix | E1000_IVAR_VALID;
2477			}
2478			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2479		}
2480		/* TX entries */
2481		for (int i = 0; i < adapter->num_queues; i++) {
2482			u32 index = i >> 1;
2483			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2484			que = &adapter->queues[i];
2485			if (i & 1) {
2486				ivar &= 0x00FFFFFF;
2487				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2488			} else {
2489				ivar &= 0xFFFF00FF;
2490				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2491			}
2492			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2493			adapter->que_mask |= que->eims;
2494		}
2495
2496		/* And for the link interrupt */
2497		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2498		adapter->link_mask = 1 << adapter->linkvec;
2499		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2500		break;
2501	case e1000_82576:
2502		/* RX entries */
2503		for (int i = 0; i < adapter->num_queues; i++) {
2504			u32 index = i & 0x7; /* Each IVAR has two entries */
2505			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2506			que = &adapter->queues[i];
2507			if (i < 8) {
2508				ivar &= 0xFFFFFF00;
2509				ivar |= que->msix | E1000_IVAR_VALID;
2510			} else {
2511				ivar &= 0xFF00FFFF;
2512				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2513			}
2514			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2515			adapter->que_mask |= que->eims;
2516		}
2517		/* TX entries */
2518		for (int i = 0; i < adapter->num_queues; i++) {
2519			u32 index = i & 0x7; /* Each IVAR has two entries */
2520			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2521			que = &adapter->queues[i];
2522			if (i < 8) {
2523				ivar &= 0xFFFF00FF;
2524				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2525			} else {
2526				ivar &= 0x00FFFFFF;
2527				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2528			}
2529			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2530			adapter->que_mask |= que->eims;
2531		}
2532
2533		/* And for the link interrupt */
2534		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2535		adapter->link_mask = 1 << adapter->linkvec;
2536		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2537		break;
2538
2539	case e1000_82575:
2540                /* enable MSI-X support*/
2541		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2542                tmp |= E1000_CTRL_EXT_PBA_CLR;
2543                /* Auto-Mask interrupts upon ICR read. */
2544                tmp |= E1000_CTRL_EXT_EIAME;
2545                tmp |= E1000_CTRL_EXT_IRCA;
2546                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2547
2548		/* Queues */
2549		for (int i = 0; i < adapter->num_queues; i++) {
2550			que = &adapter->queues[i];
2551			tmp = E1000_EICR_RX_QUEUE0 << i;
2552			tmp |= E1000_EICR_TX_QUEUE0 << i;
2553			que->eims = tmp;
2554			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2555			    i, que->eims);
2556			adapter->que_mask |= que->eims;
2557		}
2558
2559		/* Link */
2560		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2561		    E1000_EIMS_OTHER);
2562		adapter->link_mask |= E1000_EIMS_OTHER;
2563	default:
2564		break;
2565	}
2566
2567	/* Set the starting interrupt rate */
2568	if (igb_max_interrupt_rate > 0)
2569		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2570
2571        if (hw->mac.type == e1000_82575)
2572                newitr |= newitr << 16;
2573        else
2574                newitr |= E1000_EITR_CNT_IGNR;
2575
2576	for (int i = 0; i < adapter->num_queues; i++) {
2577		que = &adapter->queues[i];
2578		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2579	}
2580
2581	return;
2582}
2583
2584
2585static void
2586igb_free_pci_resources(struct adapter *adapter)
2587{
2588	struct		igb_queue *que = adapter->queues;
2589	device_t	dev = adapter->dev;
2590	int		rid;
2591
2592	/*
2593	** There is a slight possibility of a failure mode
2594	** in attach that will result in entering this function
2595	** before interrupt resources have been initialized, and
2596	** in that case we do not want to execute the loops below
2597	** We can detect this reliably by the state of the adapter
2598	** res pointer.
2599	*/
2600	if (adapter->res == NULL)
2601		goto mem;
2602
2603	/*
2604	 * First release all the interrupt resources:
2605	 */
2606	for (int i = 0; i < adapter->num_queues; i++, que++) {
2607		rid = que->msix + 1;
2608		if (que->tag != NULL) {
2609			bus_teardown_intr(dev, que->res, que->tag);
2610			que->tag = NULL;
2611		}
2612		if (que->res != NULL)
2613			bus_release_resource(dev,
2614			    SYS_RES_IRQ, rid, que->res);
2615	}
2616
2617	/* Clean the Legacy or Link interrupt last */
2618	if (adapter->linkvec) /* we are doing MSIX */
2619		rid = adapter->linkvec + 1;
2620	else
2621		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2622
2623	if (adapter->tag != NULL) {
2624		bus_teardown_intr(dev, adapter->res, adapter->tag);
2625		adapter->tag = NULL;
2626	}
2627	if (adapter->res != NULL)
2628		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2629
2630mem:
2631	if (adapter->msix)
2632		pci_release_msi(dev);
2633
2634	if (adapter->msix_mem != NULL)
2635		bus_release_resource(dev, SYS_RES_MEMORY,
2636		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2637
2638	if (adapter->pci_mem != NULL)
2639		bus_release_resource(dev, SYS_RES_MEMORY,
2640		    PCIR_BAR(0), adapter->pci_mem);
2641
2642}
2643
2644/*
2645 * Setup Either MSI/X or MSI
2646 */
2647static int
2648igb_setup_msix(struct adapter *adapter)
2649{
2650	device_t dev = adapter->dev;
2651	int rid, want, queues, msgs;
2652
2653	/* tuneable override */
2654	if (igb_enable_msix == 0)
2655		goto msi;
2656
2657	/* First try MSI/X */
2658	rid = PCIR_BAR(IGB_MSIX_BAR);
2659	adapter->msix_mem = bus_alloc_resource_any(dev,
2660	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2661       	if (!adapter->msix_mem) {
2662		/* May not be enabled */
2663		device_printf(adapter->dev,
2664		    "Unable to map MSIX table \n");
2665		goto msi;
2666	}
2667
2668	msgs = pci_msix_count(dev);
2669	if (msgs == 0) { /* system has msix disabled */
2670		bus_release_resource(dev, SYS_RES_MEMORY,
2671		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2672		adapter->msix_mem = NULL;
2673		goto msi;
2674	}
2675
2676	/* Figure out a reasonable auto config value */
2677	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2678
2679	/* Manual override */
2680	if (igb_num_queues != 0)
2681		queues = igb_num_queues;
2682	if (queues > 8)  /* max queues */
2683		queues = 8;
2684
2685	/* Can have max of 4 queues on 82575 */
2686	if ((adapter->hw.mac.type == e1000_82575) && (queues > 4))
2687		queues = 4;
2688
2689	/* Limit the VF devices to one queue */
2690	if (adapter->vf_ifp)
2691		queues = 1;
2692
2693	/*
2694	** One vector (RX/TX pair) per queue
2695	** plus an additional for Link interrupt
2696	*/
2697	want = queues + 1;
2698	if (msgs >= want)
2699		msgs = want;
2700	else {
2701               	device_printf(adapter->dev,
2702		    "MSIX Configuration Problem, "
2703		    "%d vectors configured, but %d queues wanted!\n",
2704		    msgs, want);
2705		return (ENXIO);
2706	}
2707	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2708               	device_printf(adapter->dev,
2709		    "Using MSIX interrupts with %d vectors\n", msgs);
2710		adapter->num_queues = queues;
2711		return (msgs);
2712	}
2713msi:
2714       	msgs = pci_msi_count(dev);
2715       	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2716               	device_printf(adapter->dev,"Using MSI interrupt\n");
2717	return (msgs);
2718}
2719
2720/*********************************************************************
2721 *
2722 *  Set up an fresh starting state
2723 *
2724 **********************************************************************/
2725static void
2726igb_reset(struct adapter *adapter)
2727{
2728	device_t	dev = adapter->dev;
2729	struct e1000_hw *hw = &adapter->hw;
2730	struct e1000_fc_info *fc = &hw->fc;
2731	struct ifnet	*ifp = adapter->ifp;
2732	u32		pba = 0;
2733	u16		hwm;
2734
2735	INIT_DEBUGOUT("igb_reset: begin");
2736
2737	/* Let the firmware know the OS is in control */
2738	igb_get_hw_control(adapter);
2739
2740	/*
2741	 * Packet Buffer Allocation (PBA)
2742	 * Writing PBA sets the receive portion of the buffer
2743	 * the remainder is used for the transmit buffer.
2744	 */
2745	switch (hw->mac.type) {
2746	case e1000_82575:
2747		pba = E1000_PBA_32K;
2748		break;
2749	case e1000_82576:
2750	case e1000_vfadapt:
2751		pba = E1000_READ_REG(hw, E1000_RXPBS);
2752		pba &= E1000_RXPBS_SIZE_MASK_82576;
2753		break;
2754	case e1000_82580:
2755	case e1000_i350:
2756	case e1000_vfadapt_i350:
2757		pba = E1000_READ_REG(hw, E1000_RXPBS);
2758		pba = e1000_rxpbs_adjust_82580(pba);
2759		break;
2760	default:
2761		break;
2762	}
2763
2764	/* Special needs in case of Jumbo frames */
2765	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2766		u32 tx_space, min_tx, min_rx;
2767		pba = E1000_READ_REG(hw, E1000_PBA);
2768		tx_space = pba >> 16;
2769		pba &= 0xffff;
2770		min_tx = (adapter->max_frame_size +
2771		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2772		min_tx = roundup2(min_tx, 1024);
2773		min_tx >>= 10;
2774                min_rx = adapter->max_frame_size;
2775                min_rx = roundup2(min_rx, 1024);
2776                min_rx >>= 10;
2777		if (tx_space < min_tx &&
2778		    ((min_tx - tx_space) < pba)) {
2779			pba = pba - (min_tx - tx_space);
2780			/*
2781                         * if short on rx space, rx wins
2782                         * and must trump tx adjustment
2783			 */
2784                        if (pba < min_rx)
2785                                pba = min_rx;
2786		}
2787		E1000_WRITE_REG(hw, E1000_PBA, pba);
2788	}
2789
2790	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2791
2792	/*
2793	 * These parameters control the automatic generation (Tx) and
2794	 * response (Rx) to Ethernet PAUSE frames.
2795	 * - High water mark should allow for at least two frames to be
2796	 *   received after sending an XOFF.
2797	 * - Low water mark works best when it is very near the high water mark.
2798	 *   This allows the receiver to restart by sending XON when it has
2799	 *   drained a bit.
2800	 */
2801	hwm = min(((pba << 10) * 9 / 10),
2802	    ((pba << 10) - 2 * adapter->max_frame_size));
2803
2804	if (hw->mac.type < e1000_82576) {
2805		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2806		fc->low_water = fc->high_water - 8;
2807	} else {
2808		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2809		fc->low_water = fc->high_water - 16;
2810	}
2811
2812	fc->pause_time = IGB_FC_PAUSE_TIME;
2813	fc->send_xon = TRUE;
2814	if (adapter->fc)
2815		fc->requested_mode = adapter->fc;
2816	else
2817		fc->requested_mode = e1000_fc_default;
2818
2819	/* Issue a global reset */
2820	e1000_reset_hw(hw);
2821	E1000_WRITE_REG(hw, E1000_WUC, 0);
2822
2823	if (e1000_init_hw(hw) < 0)
2824		device_printf(dev, "Hardware Initialization Failed\n");
2825
2826	/* Setup DMA Coalescing */
2827	if (hw->mac.type == e1000_i350) {
2828		u32 reg = ~E1000_DMACR_DMAC_EN;
2829
2830		if (adapter->dmac == 0) { /* Disabling it */
2831			E1000_WRITE_REG(hw, E1000_DMACR, reg);
2832			goto reset_out;
2833		}
2834
2835		hwm = (pba - 4) << 10;
2836		reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
2837		    & E1000_DMACR_DMACTHR_MASK);
2838
2839		/* transition to L0x or L1 if available..*/
2840		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2841
2842		/* timer = value in adapter->dmac in 32usec intervals */
2843		reg |= (adapter->dmac >> 5);
2844		E1000_WRITE_REG(hw, E1000_DMACR, reg);
2845
2846		/* No lower threshold */
2847		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2848
2849		/* set hwm to PBA -  2 * max frame size */
2850		E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2851
2852		/* Set the interval before transition */
2853		reg = E1000_READ_REG(hw, E1000_DMCTLX);
2854		reg |= 0x800000FF; /* 255 usec */
2855		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2856
2857		/* free space in tx packet buffer to wake from DMA coal */
2858		E1000_WRITE_REG(hw, E1000_DMCTXTH,
2859		    (20480 - (2 * adapter->max_frame_size)) >> 6);
2860
2861		/* make low power state decision controlled by DMA coal */
2862		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2863		E1000_WRITE_REG(hw, E1000_PCIEMISC,
2864		    reg | E1000_PCIEMISC_LX_DECISION);
2865		device_printf(dev, "DMA Coalescing enabled\n");
2866	}
2867
2868reset_out:
2869	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2870	e1000_get_phy_info(hw);
2871	e1000_check_for_link(hw);
2872	return;
2873}
2874
2875/*********************************************************************
2876 *
2877 *  Setup networking device structure and register an interface.
2878 *
2879 **********************************************************************/
2880static int
2881igb_setup_interface(device_t dev, struct adapter *adapter)
2882{
2883	struct ifnet   *ifp;
2884
2885	INIT_DEBUGOUT("igb_setup_interface: begin");
2886
2887	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2888	if (ifp == NULL) {
2889		device_printf(dev, "can not allocate ifnet structure\n");
2890		return (-1);
2891	}
2892	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2893	ifp->if_mtu = ETHERMTU;
2894	ifp->if_init =  igb_init;
2895	ifp->if_softc = adapter;
2896	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2897	ifp->if_ioctl = igb_ioctl;
2898	ifp->if_start = igb_start;
2899#if __FreeBSD_version >= 800000
2900	ifp->if_transmit = igb_mq_start;
2901	ifp->if_qflush = igb_qflush;
2902#endif
2903	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2904	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2905	IFQ_SET_READY(&ifp->if_snd);
2906
2907	ether_ifattach(ifp, adapter->hw.mac.addr);
2908
2909	ifp->if_capabilities = ifp->if_capenable = 0;
2910
2911	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2912	ifp->if_capabilities |= IFCAP_TSO4;
2913	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2914	ifp->if_capenable = ifp->if_capabilities;
2915
2916	/* Don't enable LRO by default */
2917	ifp->if_capabilities |= IFCAP_LRO;
2918
2919#ifdef DEVICE_POLLING
2920	ifp->if_capabilities |= IFCAP_POLLING;
2921#endif
2922
2923	/*
2924	 * Tell the upper layer(s) we
2925	 * support full VLAN capability.
2926	 */
2927	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2928	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2929			     |  IFCAP_VLAN_HWTSO
2930			     |  IFCAP_VLAN_MTU;
2931	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
2932			  |  IFCAP_VLAN_HWTSO
2933			  |  IFCAP_VLAN_MTU;
2934
2935	/*
2936	** Don't turn this on by default, if vlans are
2937	** created on another pseudo device (eg. lagg)
2938	** then vlan events are not passed thru, breaking
2939	** operation, but with HW FILTER off it works. If
2940	** using vlans directly on the igb driver you can
2941	** enable this and get full hardware tag filtering.
2942	*/
2943	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2944
2945	/*
2946	 * Specify the media types supported by this adapter and register
2947	 * callbacks to update media and link information
2948	 */
2949	ifmedia_init(&adapter->media, IFM_IMASK,
2950	    igb_media_change, igb_media_status);
2951	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2952	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2953		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2954			    0, NULL);
2955		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2956	} else {
2957		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2958		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2959			    0, NULL);
2960		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2961			    0, NULL);
2962		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2963			    0, NULL);
2964		if (adapter->hw.phy.type != e1000_phy_ife) {
2965			ifmedia_add(&adapter->media,
2966				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2967			ifmedia_add(&adapter->media,
2968				IFM_ETHER | IFM_1000_T, 0, NULL);
2969		}
2970	}
2971	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2972	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2973	return (0);
2974}
2975
2976
2977/*
2978 * Manage DMA'able memory.
2979 */
2980static void
2981igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2982{
2983	if (error)
2984		return;
2985	*(bus_addr_t *) arg = segs[0].ds_addr;
2986}
2987
2988static int
2989igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2990        struct igb_dma_alloc *dma, int mapflags)
2991{
2992	int error;
2993
2994	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2995				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
2996				BUS_SPACE_MAXADDR,	/* lowaddr */
2997				BUS_SPACE_MAXADDR,	/* highaddr */
2998				NULL, NULL,		/* filter, filterarg */
2999				size,			/* maxsize */
3000				1,			/* nsegments */
3001				size,			/* maxsegsize */
3002				0,			/* flags */
3003				NULL,			/* lockfunc */
3004				NULL,			/* lockarg */
3005				&dma->dma_tag);
3006	if (error) {
3007		device_printf(adapter->dev,
3008		    "%s: bus_dma_tag_create failed: %d\n",
3009		    __func__, error);
3010		goto fail_0;
3011	}
3012
3013	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3014	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3015	if (error) {
3016		device_printf(adapter->dev,
3017		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3018		    __func__, (uintmax_t)size, error);
3019		goto fail_2;
3020	}
3021
3022	dma->dma_paddr = 0;
3023	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3024	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3025	if (error || dma->dma_paddr == 0) {
3026		device_printf(adapter->dev,
3027		    "%s: bus_dmamap_load failed: %d\n",
3028		    __func__, error);
3029		goto fail_3;
3030	}
3031
3032	return (0);
3033
3034fail_3:
3035	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3036fail_2:
3037	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3038	bus_dma_tag_destroy(dma->dma_tag);
3039fail_0:
3040	dma->dma_map = NULL;
3041	dma->dma_tag = NULL;
3042
3043	return (error);
3044}
3045
3046static void
3047igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3048{
3049	if (dma->dma_tag == NULL)
3050		return;
3051	if (dma->dma_map != NULL) {
3052		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3053		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3054		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3055		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3056		dma->dma_map = NULL;
3057	}
3058	bus_dma_tag_destroy(dma->dma_tag);
3059	dma->dma_tag = NULL;
3060}
3061
3062
3063/*********************************************************************
3064 *
3065 *  Allocate memory for the transmit and receive rings, and then
3066 *  the descriptors associated with each, called only once at attach.
3067 *
3068 **********************************************************************/
3069static int
3070igb_allocate_queues(struct adapter *adapter)
3071{
3072	device_t dev = adapter->dev;
3073	struct igb_queue	*que = NULL;
3074	struct tx_ring		*txr = NULL;
3075	struct rx_ring		*rxr = NULL;
3076	int rsize, tsize, error = E1000_SUCCESS;
3077	int txconf = 0, rxconf = 0;
3078
3079	/* First allocate the top level queue structs */
3080	if (!(adapter->queues =
3081	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3082	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3083		device_printf(dev, "Unable to allocate queue memory\n");
3084		error = ENOMEM;
3085		goto fail;
3086	}
3087
3088	/* Next allocate the TX ring struct memory */
3089	if (!(adapter->tx_rings =
3090	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3091	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3092		device_printf(dev, "Unable to allocate TX ring memory\n");
3093		error = ENOMEM;
3094		goto tx_fail;
3095	}
3096
3097	/* Now allocate the RX */
3098	if (!(adapter->rx_rings =
3099	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3100	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3101		device_printf(dev, "Unable to allocate RX ring memory\n");
3102		error = ENOMEM;
3103		goto rx_fail;
3104	}
3105
3106	tsize = roundup2(adapter->num_tx_desc *
3107	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3108	/*
3109	 * Now set up the TX queues, txconf is needed to handle the
3110	 * possibility that things fail midcourse and we need to
3111	 * undo memory gracefully
3112	 */
3113	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3114		/* Set up some basics */
3115		txr = &adapter->tx_rings[i];
3116		txr->adapter = adapter;
3117		txr->me = i;
3118
3119		/* Initialize the TX lock */
3120		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3121		    device_get_nameunit(dev), txr->me);
3122		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3123
3124		if (igb_dma_malloc(adapter, tsize,
3125			&txr->txdma, BUS_DMA_NOWAIT)) {
3126			device_printf(dev,
3127			    "Unable to allocate TX Descriptor memory\n");
3128			error = ENOMEM;
3129			goto err_tx_desc;
3130		}
3131		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3132		bzero((void *)txr->tx_base, tsize);
3133
3134        	/* Now allocate transmit buffers for the ring */
3135        	if (igb_allocate_transmit_buffers(txr)) {
3136			device_printf(dev,
3137			    "Critical Failure setting up transmit buffers\n");
3138			error = ENOMEM;
3139			goto err_tx_desc;
3140        	}
3141#if __FreeBSD_version >= 800000
3142		/* Allocate a buf ring */
3143		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
3144		    M_WAITOK, &txr->tx_mtx);
3145#endif
3146	}
3147
3148	/*
3149	 * Next the RX queues...
3150	 */
3151	rsize = roundup2(adapter->num_rx_desc *
3152	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3153	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3154		rxr = &adapter->rx_rings[i];
3155		rxr->adapter = adapter;
3156		rxr->me = i;
3157
3158		/* Initialize the RX lock */
3159		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3160		    device_get_nameunit(dev), txr->me);
3161		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3162
3163		if (igb_dma_malloc(adapter, rsize,
3164			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3165			device_printf(dev,
3166			    "Unable to allocate RxDescriptor memory\n");
3167			error = ENOMEM;
3168			goto err_rx_desc;
3169		}
3170		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3171		bzero((void *)rxr->rx_base, rsize);
3172
3173        	/* Allocate receive buffers for the ring*/
3174		if (igb_allocate_receive_buffers(rxr)) {
3175			device_printf(dev,
3176			    "Critical Failure setting up receive buffers\n");
3177			error = ENOMEM;
3178			goto err_rx_desc;
3179		}
3180	}
3181
3182	/*
3183	** Finally set up the queue holding structs
3184	*/
3185	for (int i = 0; i < adapter->num_queues; i++) {
3186		que = &adapter->queues[i];
3187		que->adapter = adapter;
3188		que->txr = &adapter->tx_rings[i];
3189		que->rxr = &adapter->rx_rings[i];
3190	}
3191
3192	return (0);
3193
3194err_rx_desc:
3195	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3196		igb_dma_free(adapter, &rxr->rxdma);
3197err_tx_desc:
3198	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3199		igb_dma_free(adapter, &txr->txdma);
3200	free(adapter->rx_rings, M_DEVBUF);
3201rx_fail:
3202#if __FreeBSD_version >= 800000
3203	buf_ring_free(txr->br, M_DEVBUF);
3204#endif
3205	free(adapter->tx_rings, M_DEVBUF);
3206tx_fail:
3207	free(adapter->queues, M_DEVBUF);
3208fail:
3209	return (error);
3210}
3211
3212/*********************************************************************
3213 *
3214 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3215 *  the information needed to transmit a packet on the wire. This is
3216 *  called only once at attach, setup is done every reset.
3217 *
3218 **********************************************************************/
3219static int
3220igb_allocate_transmit_buffers(struct tx_ring *txr)
3221{
3222	struct adapter *adapter = txr->adapter;
3223	device_t dev = adapter->dev;
3224	struct igb_tx_buffer *txbuf;
3225	int error, i;
3226
3227	/*
3228	 * Setup DMA descriptor areas.
3229	 */
3230	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3231			       1, 0,			/* alignment, bounds */
3232			       BUS_SPACE_MAXADDR,	/* lowaddr */
3233			       BUS_SPACE_MAXADDR,	/* highaddr */
3234			       NULL, NULL,		/* filter, filterarg */
3235			       IGB_TSO_SIZE,		/* maxsize */
3236			       IGB_MAX_SCATTER,		/* nsegments */
3237			       PAGE_SIZE,		/* maxsegsize */
3238			       0,			/* flags */
3239			       NULL,			/* lockfunc */
3240			       NULL,			/* lockfuncarg */
3241			       &txr->txtag))) {
3242		device_printf(dev,"Unable to allocate TX DMA tag\n");
3243		goto fail;
3244	}
3245
3246	if (!(txr->tx_buffers =
3247	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3248	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3249		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3250		error = ENOMEM;
3251		goto fail;
3252	}
3253
3254        /* Create the descriptor buffer dma maps */
3255	txbuf = txr->tx_buffers;
3256	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3257		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3258		if (error != 0) {
3259			device_printf(dev, "Unable to create TX DMA map\n");
3260			goto fail;
3261		}
3262	}
3263
3264	return 0;
3265fail:
3266	/* We free all, it handles case where we are in the middle */
3267	igb_free_transmit_structures(adapter);
3268	return (error);
3269}
3270
3271/*********************************************************************
3272 *
3273 *  Initialize a transmit ring.
3274 *
3275 **********************************************************************/
3276static void
3277igb_setup_transmit_ring(struct tx_ring *txr)
3278{
3279	struct adapter *adapter = txr->adapter;
3280	struct igb_tx_buffer *txbuf;
3281	int i;
3282
3283	/* Clear the old descriptor contents */
3284	IGB_TX_LOCK(txr);
3285	bzero((void *)txr->tx_base,
3286	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3287	/* Reset indices */
3288	txr->next_avail_desc = 0;
3289	txr->next_to_clean = 0;
3290
3291	/* Free any existing tx buffers. */
3292        txbuf = txr->tx_buffers;
3293	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3294		if (txbuf->m_head != NULL) {
3295			bus_dmamap_sync(txr->txtag, txbuf->map,
3296			    BUS_DMASYNC_POSTWRITE);
3297			bus_dmamap_unload(txr->txtag, txbuf->map);
3298			m_freem(txbuf->m_head);
3299			txbuf->m_head = NULL;
3300		}
3301		/* clear the watch index */
3302		txbuf->next_eop = -1;
3303        }
3304
3305	/* Set number of descriptors available */
3306	txr->tx_avail = adapter->num_tx_desc;
3307
3308	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3309	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3310	IGB_TX_UNLOCK(txr);
3311}
3312
3313/*********************************************************************
3314 *
3315 *  Initialize all transmit rings.
3316 *
3317 **********************************************************************/
3318static void
3319igb_setup_transmit_structures(struct adapter *adapter)
3320{
3321	struct tx_ring *txr = adapter->tx_rings;
3322
3323	for (int i = 0; i < adapter->num_queues; i++, txr++)
3324		igb_setup_transmit_ring(txr);
3325
3326	return;
3327}
3328
3329/*********************************************************************
3330 *
3331 *  Enable transmit unit.
3332 *
3333 **********************************************************************/
3334static void
3335igb_initialize_transmit_units(struct adapter *adapter)
3336{
3337	struct tx_ring	*txr = adapter->tx_rings;
3338	struct e1000_hw *hw = &adapter->hw;
3339	u32		tctl, txdctl;
3340
3341	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3342	tctl = txdctl = 0;
3343
3344	/* Setup the Tx Descriptor Rings */
3345	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3346		u64 bus_addr = txr->txdma.dma_paddr;
3347
3348		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3349		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3350		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3351		    (uint32_t)(bus_addr >> 32));
3352		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3353		    (uint32_t)bus_addr);
3354
3355		/* Setup the HW Tx Head and Tail descriptor pointers */
3356		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3357		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3358
3359		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3360		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3361		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3362
3363		txr->queue_status = IGB_QUEUE_IDLE;
3364
3365		txdctl |= IGB_TX_PTHRESH;
3366		txdctl |= IGB_TX_HTHRESH << 8;
3367		txdctl |= IGB_TX_WTHRESH << 16;
3368		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3369		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3370	}
3371
3372	if (adapter->vf_ifp)
3373		return;
3374
3375	e1000_config_collision_dist(hw);
3376
3377	/* Program the Transmit Control Register */
3378	tctl = E1000_READ_REG(hw, E1000_TCTL);
3379	tctl &= ~E1000_TCTL_CT;
3380	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3381		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3382
3383	/* This write will effectively turn on the transmit unit. */
3384	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3385}
3386
3387/*********************************************************************
3388 *
3389 *  Free all transmit rings.
3390 *
3391 **********************************************************************/
3392static void
3393igb_free_transmit_structures(struct adapter *adapter)
3394{
3395	struct tx_ring *txr = adapter->tx_rings;
3396
3397	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3398		IGB_TX_LOCK(txr);
3399		igb_free_transmit_buffers(txr);
3400		igb_dma_free(adapter, &txr->txdma);
3401		IGB_TX_UNLOCK(txr);
3402		IGB_TX_LOCK_DESTROY(txr);
3403	}
3404	free(adapter->tx_rings, M_DEVBUF);
3405}
3406
3407/*********************************************************************
3408 *
3409 *  Free transmit ring related data structures.
3410 *
3411 **********************************************************************/
3412static void
3413igb_free_transmit_buffers(struct tx_ring *txr)
3414{
3415	struct adapter *adapter = txr->adapter;
3416	struct igb_tx_buffer *tx_buffer;
3417	int             i;
3418
3419	INIT_DEBUGOUT("free_transmit_ring: begin");
3420
3421	if (txr->tx_buffers == NULL)
3422		return;
3423
3424	tx_buffer = txr->tx_buffers;
3425	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3426		if (tx_buffer->m_head != NULL) {
3427			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3428			    BUS_DMASYNC_POSTWRITE);
3429			bus_dmamap_unload(txr->txtag,
3430			    tx_buffer->map);
3431			m_freem(tx_buffer->m_head);
3432			tx_buffer->m_head = NULL;
3433			if (tx_buffer->map != NULL) {
3434				bus_dmamap_destroy(txr->txtag,
3435				    tx_buffer->map);
3436				tx_buffer->map = NULL;
3437			}
3438		} else if (tx_buffer->map != NULL) {
3439			bus_dmamap_unload(txr->txtag,
3440			    tx_buffer->map);
3441			bus_dmamap_destroy(txr->txtag,
3442			    tx_buffer->map);
3443			tx_buffer->map = NULL;
3444		}
3445	}
3446#if __FreeBSD_version >= 800000
3447	if (txr->br != NULL)
3448		buf_ring_free(txr->br, M_DEVBUF);
3449#endif
3450	if (txr->tx_buffers != NULL) {
3451		free(txr->tx_buffers, M_DEVBUF);
3452		txr->tx_buffers = NULL;
3453	}
3454	if (txr->txtag != NULL) {
3455		bus_dma_tag_destroy(txr->txtag);
3456		txr->txtag = NULL;
3457	}
3458	return;
3459}
3460
3461/**********************************************************************
3462 *
3463 *  Setup work for hardware segmentation offload (TSO)
3464 *
3465 **********************************************************************/
3466static bool
3467igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ehdrlen,
3468	struct ip *ip, struct tcphdr *th)
3469{
3470	struct adapter *adapter = txr->adapter;
3471	struct e1000_adv_tx_context_desc *TXD;
3472	struct igb_tx_buffer        *tx_buffer;
3473	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3474	u32 mss_l4len_idx = 0;
3475	u16 vtag = 0;
3476	int ctxd, ip_hlen, tcp_hlen;
3477
3478	ctxd = txr->next_avail_desc;
3479	tx_buffer = &txr->tx_buffers[ctxd];
3480	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3481
3482	ip->ip_sum = 0;
3483	ip_hlen = ip->ip_hl << 2;
3484	tcp_hlen = th->th_off << 2;
3485
3486	/* VLAN MACLEN IPLEN */
3487	if (mp->m_flags & M_VLANTAG) {
3488		vtag = htole16(mp->m_pkthdr.ether_vtag);
3489		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3490	}
3491
3492	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3493	vlan_macip_lens |= ip_hlen;
3494	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3495
3496	/* ADV DTYPE TUCMD */
3497	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3498	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3499	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3500	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3501
3502	/* MSS L4LEN IDX */
3503	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3504	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3505	/* 82575 needs the queue index added */
3506	if (adapter->hw.mac.type == e1000_82575)
3507		mss_l4len_idx |= txr->me << 4;
3508	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3509
3510	TXD->seqnum_seed = htole32(0);
3511	tx_buffer->m_head = NULL;
3512	tx_buffer->next_eop = -1;
3513
3514	if (++ctxd == adapter->num_tx_desc)
3515		ctxd = 0;
3516
3517	txr->tx_avail--;
3518	txr->next_avail_desc = ctxd;
3519	return TRUE;
3520}
3521
3522
3523/*********************************************************************
3524 *
3525 *  Context Descriptor setup for VLAN or CSUM
3526 *
3527 **********************************************************************/
3528
3529static bool
3530igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3531{
3532	struct adapter *adapter = txr->adapter;
3533	struct e1000_adv_tx_context_desc *TXD;
3534	struct igb_tx_buffer        *tx_buffer;
3535	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3536	struct ether_vlan_header *eh;
3537	struct ip *ip = NULL;
3538	struct ip6_hdr *ip6;
3539	int  ehdrlen, ctxd, ip_hlen = 0;
3540	u16	etype, vtag = 0;
3541	u8	ipproto = 0;
3542	bool	offload = TRUE;
3543
3544	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3545		offload = FALSE;
3546
3547	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3548	ctxd = txr->next_avail_desc;
3549	tx_buffer = &txr->tx_buffers[ctxd];
3550	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3551
3552	/*
3553	** In advanced descriptors the vlan tag must
3554	** be placed into the context descriptor, thus
3555	** we need to be here just for that setup.
3556	*/
3557	if (mp->m_flags & M_VLANTAG) {
3558		vtag = htole16(mp->m_pkthdr.ether_vtag);
3559		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3560	} else if (offload == FALSE)
3561		return FALSE;
3562
3563	/*
3564	 * Determine where frame payload starts.
3565	 * Jump over vlan headers if already present,
3566	 * helpful for QinQ too.
3567	 */
3568	eh = mtod(mp, struct ether_vlan_header *);
3569	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3570		etype = ntohs(eh->evl_proto);
3571		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3572	} else {
3573		etype = ntohs(eh->evl_encap_proto);
3574		ehdrlen = ETHER_HDR_LEN;
3575	}
3576
3577	/* Set the ether header length */
3578	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3579
3580	switch (etype) {
3581		case ETHERTYPE_IP:
3582			ip = (struct ip *)(mp->m_data + ehdrlen);
3583			ip_hlen = ip->ip_hl << 2;
3584			if (mp->m_len < ehdrlen + ip_hlen) {
3585				offload = FALSE;
3586				break;
3587			}
3588			ipproto = ip->ip_p;
3589			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3590			break;
3591		case ETHERTYPE_IPV6:
3592			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3593			ip_hlen = sizeof(struct ip6_hdr);
3594			ipproto = ip6->ip6_nxt;
3595			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3596			break;
3597		default:
3598			offload = FALSE;
3599			break;
3600	}
3601
3602	vlan_macip_lens |= ip_hlen;
3603	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3604
3605	switch (ipproto) {
3606		case IPPROTO_TCP:
3607			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3608				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3609			break;
3610		case IPPROTO_UDP:
3611			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3612				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3613			break;
3614#if __FreeBSD_version >= 800000
3615		case IPPROTO_SCTP:
3616			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3617				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3618			break;
3619#endif
3620		default:
3621			offload = FALSE;
3622			break;
3623	}
3624
3625	/* 82575 needs the queue index added */
3626	if (adapter->hw.mac.type == e1000_82575)
3627		mss_l4len_idx = txr->me << 4;
3628
3629	/* Now copy bits into descriptor */
3630	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3631	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3632	TXD->seqnum_seed = htole32(0);
3633	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3634
3635	tx_buffer->m_head = NULL;
3636	tx_buffer->next_eop = -1;
3637
3638	/* We've consumed the first desc, adjust counters */
3639	if (++ctxd == adapter->num_tx_desc)
3640		ctxd = 0;
3641	txr->next_avail_desc = ctxd;
3642	--txr->tx_avail;
3643
3644        return (offload);
3645}
3646
3647
3648/**********************************************************************
3649 *
3650 *  Examine each tx_buffer in the used queue. If the hardware is done
3651 *  processing the packet then free associated resources. The
3652 *  tx_buffer is put back on the free queue.
3653 *
3654 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3655 **********************************************************************/
3656static bool
3657igb_txeof(struct tx_ring *txr)
3658{
3659	struct adapter	*adapter = txr->adapter;
3660        int first, last, done, processed;
3661        struct igb_tx_buffer *tx_buffer;
3662        struct e1000_tx_desc   *tx_desc, *eop_desc;
3663	struct ifnet   *ifp = adapter->ifp;
3664
3665	IGB_TX_LOCK_ASSERT(txr);
3666
3667        if (txr->tx_avail == adapter->num_tx_desc) {
3668		txr->queue_status = IGB_QUEUE_IDLE;
3669                return FALSE;
3670	}
3671
3672	processed = 0;
3673        first = txr->next_to_clean;
3674        tx_desc = &txr->tx_base[first];
3675        tx_buffer = &txr->tx_buffers[first];
3676	last = tx_buffer->next_eop;
3677        eop_desc = &txr->tx_base[last];
3678
3679	/*
3680	 * What this does is get the index of the
3681	 * first descriptor AFTER the EOP of the
3682	 * first packet, that way we can do the
3683	 * simple comparison on the inner while loop.
3684	 */
3685	if (++last == adapter->num_tx_desc)
3686 		last = 0;
3687	done = last;
3688
3689        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3690            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3691
3692        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3693		/* We clean the range of the packet */
3694		while (first != done) {
3695                	tx_desc->upper.data = 0;
3696                	tx_desc->lower.data = 0;
3697                	tx_desc->buffer_addr = 0;
3698                	++txr->tx_avail;
3699			++processed;
3700
3701			if (tx_buffer->m_head) {
3702				txr->bytes +=
3703				    tx_buffer->m_head->m_pkthdr.len;
3704				bus_dmamap_sync(txr->txtag,
3705				    tx_buffer->map,
3706				    BUS_DMASYNC_POSTWRITE);
3707				bus_dmamap_unload(txr->txtag,
3708				    tx_buffer->map);
3709
3710                        	m_freem(tx_buffer->m_head);
3711                        	tx_buffer->m_head = NULL;
3712                	}
3713			tx_buffer->next_eop = -1;
3714			txr->watchdog_time = ticks;
3715
3716	                if (++first == adapter->num_tx_desc)
3717				first = 0;
3718
3719	                tx_buffer = &txr->tx_buffers[first];
3720			tx_desc = &txr->tx_base[first];
3721		}
3722		++txr->packets;
3723		++ifp->if_opackets;
3724		/* See if we can continue to the next packet */
3725		last = tx_buffer->next_eop;
3726		if (last != -1) {
3727        		eop_desc = &txr->tx_base[last];
3728			/* Get new done point */
3729			if (++last == adapter->num_tx_desc) last = 0;
3730			done = last;
3731		} else
3732			break;
3733        }
3734        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3735            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3736
3737        txr->next_to_clean = first;
3738
3739	/*
3740	** Watchdog calculation, we know there's
3741	** work outstanding or the first return
3742	** would have been taken, so none processed
3743	** for too long indicates a hang.
3744	*/
3745	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3746		txr->queue_status |= IGB_QUEUE_HUNG;
3747        /*
3748         * If we have a minimum free,
3749         * clear depleted state bit
3750         */
3751        if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
3752                txr->queue_status &= ~IGB_QUEUE_DEPLETED;
3753
3754	/* All clean, turn off the watchdog */
3755	if (txr->tx_avail == adapter->num_tx_desc) {
3756		txr->queue_status = IGB_QUEUE_IDLE;
3757		return (FALSE);
3758        }
3759
3760	return (TRUE);
3761}
3762
3763/*********************************************************************
3764 *
3765 *  Refresh mbuf buffers for RX descriptor rings
3766 *   - now keeps its own state so discards due to resource
3767 *     exhaustion are unnecessary, if an mbuf cannot be obtained
3768 *     it just returns, keeping its placeholder, thus it can simply
3769 *     be recalled to try again.
3770 *
3771 **********************************************************************/
3772static void
3773igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3774{
3775	struct adapter		*adapter = rxr->adapter;
3776	bus_dma_segment_t	hseg[1];
3777	bus_dma_segment_t	pseg[1];
3778	struct igb_rx_buf	*rxbuf;
3779	struct mbuf		*mh, *mp;
3780	int			i, j, nsegs, error;
3781	bool			refreshed = FALSE;
3782
3783	i = j = rxr->next_to_refresh;
3784	/*
3785	** Get one descriptor beyond
3786	** our work mark to control
3787	** the loop.
3788        */
3789	if (++j == adapter->num_rx_desc)
3790		j = 0;
3791
3792	while (j != limit) {
3793		rxbuf = &rxr->rx_buffers[i];
3794		/* No hdr mbuf used with header split off */
3795		if (rxr->hdr_split == FALSE)
3796			goto no_split;
3797		if (rxbuf->m_head == NULL) {
3798			mh = m_gethdr(M_DONTWAIT, MT_DATA);
3799			if (mh == NULL)
3800				goto update;
3801		} else
3802			mh = rxbuf->m_head;
3803
3804		mh->m_pkthdr.len = mh->m_len = MHLEN;
3805		mh->m_len = MHLEN;
3806		mh->m_flags |= M_PKTHDR;
3807		/* Get the memory mapping */
3808		error = bus_dmamap_load_mbuf_sg(rxr->htag,
3809		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
3810		if (error != 0) {
3811			printf("Refresh mbufs: hdr dmamap load"
3812			    " failure - %d\n", error);
3813			m_free(mh);
3814			rxbuf->m_head = NULL;
3815			goto update;
3816		}
3817		rxbuf->m_head = mh;
3818		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3819		    BUS_DMASYNC_PREREAD);
3820		rxr->rx_base[i].read.hdr_addr =
3821		    htole64(hseg[0].ds_addr);
3822no_split:
3823		if (rxbuf->m_pack == NULL) {
3824			mp = m_getjcl(M_DONTWAIT, MT_DATA,
3825			    M_PKTHDR, adapter->rx_mbuf_sz);
3826			if (mp == NULL)
3827				goto update;
3828		} else
3829			mp = rxbuf->m_pack;
3830
3831		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3832		/* Get the memory mapping */
3833		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3834		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
3835		if (error != 0) {
3836			printf("Refresh mbufs: payload dmamap load"
3837			    " failure - %d\n", error);
3838			m_free(mp);
3839			rxbuf->m_pack = NULL;
3840			goto update;
3841		}
3842		rxbuf->m_pack = mp;
3843		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3844		    BUS_DMASYNC_PREREAD);
3845		rxr->rx_base[i].read.pkt_addr =
3846		    htole64(pseg[0].ds_addr);
3847		refreshed = TRUE; /* I feel wefreshed :) */
3848
3849		i = j; /* our next is precalculated */
3850		rxr->next_to_refresh = i;
3851		if (++j == adapter->num_rx_desc)
3852			j = 0;
3853	}
3854update:
3855	if (refreshed) /* update tail */
3856		E1000_WRITE_REG(&adapter->hw,
3857		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3858	return;
3859}
3860
3861
3862/*********************************************************************
3863 *
3864 *  Allocate memory for rx_buffer structures. Since we use one
3865 *  rx_buffer per received packet, the maximum number of rx_buffer's
3866 *  that we'll need is equal to the number of receive descriptors
3867 *  that we've allocated.
3868 *
3869 **********************************************************************/
3870static int
3871igb_allocate_receive_buffers(struct rx_ring *rxr)
3872{
3873	struct	adapter 	*adapter = rxr->adapter;
3874	device_t 		dev = adapter->dev;
3875	struct igb_rx_buf	*rxbuf;
3876	int             	i, bsize, error;
3877
3878	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3879	if (!(rxr->rx_buffers =
3880	    (struct igb_rx_buf *) malloc(bsize,
3881	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3882		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3883		error = ENOMEM;
3884		goto fail;
3885	}
3886
3887	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3888				   1, 0,		/* alignment, bounds */
3889				   BUS_SPACE_MAXADDR,	/* lowaddr */
3890				   BUS_SPACE_MAXADDR,	/* highaddr */
3891				   NULL, NULL,		/* filter, filterarg */
3892				   MSIZE,		/* maxsize */
3893				   1,			/* nsegments */
3894				   MSIZE,		/* maxsegsize */
3895				   0,			/* flags */
3896				   NULL,		/* lockfunc */
3897				   NULL,		/* lockfuncarg */
3898				   &rxr->htag))) {
3899		device_printf(dev, "Unable to create RX DMA tag\n");
3900		goto fail;
3901	}
3902
3903	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3904				   1, 0,		/* alignment, bounds */
3905				   BUS_SPACE_MAXADDR,	/* lowaddr */
3906				   BUS_SPACE_MAXADDR,	/* highaddr */
3907				   NULL, NULL,		/* filter, filterarg */
3908				   MJUM9BYTES,		/* maxsize */
3909				   1,			/* nsegments */
3910				   MJUM9BYTES,		/* maxsegsize */
3911				   0,			/* flags */
3912				   NULL,		/* lockfunc */
3913				   NULL,		/* lockfuncarg */
3914				   &rxr->ptag))) {
3915		device_printf(dev, "Unable to create RX payload DMA tag\n");
3916		goto fail;
3917	}
3918
3919	for (i = 0; i < adapter->num_rx_desc; i++) {
3920		rxbuf = &rxr->rx_buffers[i];
3921		error = bus_dmamap_create(rxr->htag,
3922		    BUS_DMA_NOWAIT, &rxbuf->hmap);
3923		if (error) {
3924			device_printf(dev,
3925			    "Unable to create RX head DMA maps\n");
3926			goto fail;
3927		}
3928		error = bus_dmamap_create(rxr->ptag,
3929		    BUS_DMA_NOWAIT, &rxbuf->pmap);
3930		if (error) {
3931			device_printf(dev,
3932			    "Unable to create RX packet DMA maps\n");
3933			goto fail;
3934		}
3935	}
3936
3937	return (0);
3938
3939fail:
3940	/* Frees all, but can handle partial completion */
3941	igb_free_receive_structures(adapter);
3942	return (error);
3943}
3944
3945
3946static void
3947igb_free_receive_ring(struct rx_ring *rxr)
3948{
3949	struct	adapter		*adapter = rxr->adapter;
3950	struct igb_rx_buf	*rxbuf;
3951
3952
3953	for (int i = 0; i < adapter->num_rx_desc; i++) {
3954		rxbuf = &rxr->rx_buffers[i];
3955		if (rxbuf->m_head != NULL) {
3956			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3957			    BUS_DMASYNC_POSTREAD);
3958			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3959			rxbuf->m_head->m_flags |= M_PKTHDR;
3960			m_freem(rxbuf->m_head);
3961		}
3962		if (rxbuf->m_pack != NULL) {
3963			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3964			    BUS_DMASYNC_POSTREAD);
3965			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3966			rxbuf->m_pack->m_flags |= M_PKTHDR;
3967			m_freem(rxbuf->m_pack);
3968		}
3969		rxbuf->m_head = NULL;
3970		rxbuf->m_pack = NULL;
3971	}
3972}
3973
3974
3975/*********************************************************************
3976 *
3977 *  Initialize a receive ring and its buffers.
3978 *
3979 **********************************************************************/
3980static int
3981igb_setup_receive_ring(struct rx_ring *rxr)
3982{
3983	struct	adapter		*adapter;
3984	struct  ifnet		*ifp;
3985	device_t		dev;
3986	struct igb_rx_buf	*rxbuf;
3987	bus_dma_segment_t	pseg[1], hseg[1];
3988	struct lro_ctrl		*lro = &rxr->lro;
3989	int			rsize, nsegs, error = 0;
3990
3991	adapter = rxr->adapter;
3992	dev = adapter->dev;
3993	ifp = adapter->ifp;
3994
3995	/* Clear the ring contents */
3996	IGB_RX_LOCK(rxr);
3997	rsize = roundup2(adapter->num_rx_desc *
3998	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3999	bzero((void *)rxr->rx_base, rsize);
4000
4001	/*
4002	** Free current RX buffer structures and their mbufs
4003	*/
4004	igb_free_receive_ring(rxr);
4005
4006	/* Configure for header split? */
4007	if (igb_header_split)
4008		rxr->hdr_split = TRUE;
4009
4010        /* Now replenish the ring mbufs */
4011	for (int j = 0; j < adapter->num_rx_desc; ++j) {
4012		struct mbuf	*mh, *mp;
4013
4014		rxbuf = &rxr->rx_buffers[j];
4015		if (rxr->hdr_split == FALSE)
4016			goto skip_head;
4017
4018		/* First the header */
4019		rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
4020		if (rxbuf->m_head == NULL) {
4021			error = ENOBUFS;
4022                        goto fail;
4023		}
4024		m_adj(rxbuf->m_head, ETHER_ALIGN);
4025		mh = rxbuf->m_head;
4026		mh->m_len = mh->m_pkthdr.len = MHLEN;
4027		mh->m_flags |= M_PKTHDR;
4028		/* Get the memory mapping */
4029		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4030		    rxbuf->hmap, rxbuf->m_head, hseg,
4031		    &nsegs, BUS_DMA_NOWAIT);
4032		if (error != 0) /* Nothing elegant to do here */
4033                        goto fail;
4034		bus_dmamap_sync(rxr->htag,
4035		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
4036		/* Update descriptor */
4037		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4038
4039skip_head:
4040		/* Now the payload cluster */
4041		rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
4042		    M_PKTHDR, adapter->rx_mbuf_sz);
4043		if (rxbuf->m_pack == NULL) {
4044			error = ENOBUFS;
4045                        goto fail;
4046		}
4047		mp = rxbuf->m_pack;
4048		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4049		/* Get the memory mapping */
4050		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4051		    rxbuf->pmap, mp, pseg,
4052		    &nsegs, BUS_DMA_NOWAIT);
4053		if (error != 0)
4054                        goto fail;
4055		bus_dmamap_sync(rxr->ptag,
4056		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
4057		/* Update descriptor */
4058		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4059        }
4060
4061	/* Setup our descriptor indices */
4062	rxr->next_to_check = 0;
4063	rxr->next_to_refresh = adapter->num_rx_desc - 1;
4064	rxr->lro_enabled = FALSE;
4065	rxr->rx_split_packets = 0;
4066	rxr->rx_bytes = 0;
4067
4068	rxr->fmp = NULL;
4069	rxr->lmp = NULL;
4070	rxr->discard = FALSE;
4071
4072	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4073	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4074
4075	/*
4076	** Now set up the LRO interface, we
4077	** also only do head split when LRO
4078	** is enabled, since so often they
4079	** are undesireable in similar setups.
4080	*/
4081	if (ifp->if_capenable & IFCAP_LRO) {
4082		error = tcp_lro_init(lro);
4083		if (error) {
4084			device_printf(dev, "LRO Initialization failed!\n");
4085			goto fail;
4086		}
4087		INIT_DEBUGOUT("RX LRO Initialized\n");
4088		rxr->lro_enabled = TRUE;
4089		lro->ifp = adapter->ifp;
4090	}
4091
4092	IGB_RX_UNLOCK(rxr);
4093	return (0);
4094
4095fail:
4096	igb_free_receive_ring(rxr);
4097	IGB_RX_UNLOCK(rxr);
4098	return (error);
4099}
4100
4101
4102/*********************************************************************
4103 *
4104 *  Initialize all receive rings.
4105 *
4106 **********************************************************************/
4107static int
4108igb_setup_receive_structures(struct adapter *adapter)
4109{
4110	struct rx_ring *rxr = adapter->rx_rings;
4111	int i;
4112
4113	for (i = 0; i < adapter->num_queues; i++, rxr++)
4114		if (igb_setup_receive_ring(rxr))
4115			goto fail;
4116
4117	return (0);
4118fail:
4119	/*
4120	 * Free RX buffers allocated so far, we will only handle
4121	 * the rings that completed, the failing case will have
4122	 * cleaned up for itself. 'i' is the endpoint.
4123	 */
4124	for (int j = 0; j > i; ++j) {
4125		rxr = &adapter->rx_rings[i];
4126		IGB_RX_LOCK(rxr);
4127		igb_free_receive_ring(rxr);
4128		IGB_RX_UNLOCK(rxr);
4129	}
4130
4131	return (ENOBUFS);
4132}
4133
4134/*********************************************************************
4135 *
4136 *  Enable receive unit.
4137 *
4138 **********************************************************************/
4139static void
4140igb_initialize_receive_units(struct adapter *adapter)
4141{
4142	struct rx_ring	*rxr = adapter->rx_rings;
4143	struct ifnet	*ifp = adapter->ifp;
4144	struct e1000_hw *hw = &adapter->hw;
4145	u32		rctl, rxcsum, psize, srrctl = 0;
4146
4147	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4148
4149	/*
4150	 * Make sure receives are disabled while setting
4151	 * up the descriptor ring
4152	 */
4153	rctl = E1000_READ_REG(hw, E1000_RCTL);
4154	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4155
4156	/*
4157	** Set up for header split
4158	*/
4159	if (igb_header_split) {
4160		/* Use a standard mbuf for the header */
4161		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4162		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4163	} else
4164		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4165
4166	/*
4167	** Set up for jumbo frames
4168	*/
4169	if (ifp->if_mtu > ETHERMTU) {
4170		rctl |= E1000_RCTL_LPE;
4171		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4172			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4173			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4174		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4175			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4176			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4177		}
4178		/* Set maximum packet len */
4179		psize = adapter->max_frame_size;
4180		/* are we on a vlan? */
4181		if (adapter->ifp->if_vlantrunk != NULL)
4182			psize += VLAN_TAG_SIZE;
4183		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4184	} else {
4185		rctl &= ~E1000_RCTL_LPE;
4186		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4187		rctl |= E1000_RCTL_SZ_2048;
4188	}
4189
4190	/* Setup the Base and Length of the Rx Descriptor Rings */
4191	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4192		u64 bus_addr = rxr->rxdma.dma_paddr;
4193		u32 rxdctl;
4194
4195		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4196		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4197		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4198		    (uint32_t)(bus_addr >> 32));
4199		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4200		    (uint32_t)bus_addr);
4201		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4202		/* Enable this Queue */
4203		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4204		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4205		rxdctl &= 0xFFF00000;
4206		rxdctl |= IGB_RX_PTHRESH;
4207		rxdctl |= IGB_RX_HTHRESH << 8;
4208		rxdctl |= IGB_RX_WTHRESH << 16;
4209		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4210	}
4211
4212	/*
4213	** Setup for RX MultiQueue
4214	*/
4215	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4216	if (adapter->num_queues >1) {
4217		u32 random[10], mrqc, shift = 0;
4218		union igb_reta {
4219			u32 dword;
4220			u8  bytes[4];
4221		} reta;
4222
4223		arc4rand(&random, sizeof(random), 0);
4224		if (adapter->hw.mac.type == e1000_82575)
4225			shift = 6;
4226		/* Warning FM follows */
4227		for (int i = 0; i < 128; i++) {
4228			reta.bytes[i & 3] =
4229			    (i % adapter->num_queues) << shift;
4230			if ((i & 3) == 3)
4231				E1000_WRITE_REG(hw,
4232				    E1000_RETA(i >> 2), reta.dword);
4233		}
4234		/* Now fill in hash table */
4235		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4236		for (int i = 0; i < 10; i++)
4237			E1000_WRITE_REG_ARRAY(hw,
4238			    E1000_RSSRK(0), i, random[i]);
4239
4240		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4241		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4242		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4243		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4244		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4245		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4246		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4247		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4248
4249		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4250
4251		/*
4252		** NOTE: Receive Full-Packet Checksum Offload
4253		** is mutually exclusive with Multiqueue. However
4254		** this is not the same as TCP/IP checksums which
4255		** still work.
4256		*/
4257		rxcsum |= E1000_RXCSUM_PCSD;
4258#if __FreeBSD_version >= 800000
4259		/* For SCTP Offload */
4260		if ((hw->mac.type == e1000_82576)
4261		    && (ifp->if_capenable & IFCAP_RXCSUM))
4262			rxcsum |= E1000_RXCSUM_CRCOFL;
4263#endif
4264	} else {
4265		/* Non RSS setup */
4266		if (ifp->if_capenable & IFCAP_RXCSUM) {
4267			rxcsum |= E1000_RXCSUM_IPPCSE;
4268#if __FreeBSD_version >= 800000
4269			if (adapter->hw.mac.type == e1000_82576)
4270				rxcsum |= E1000_RXCSUM_CRCOFL;
4271#endif
4272		} else
4273			rxcsum &= ~E1000_RXCSUM_TUOFL;
4274	}
4275	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4276
4277	/* Setup the Receive Control Register */
4278	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4279	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4280		   E1000_RCTL_RDMTS_HALF |
4281		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4282	/* Strip CRC bytes. */
4283	rctl |= E1000_RCTL_SECRC;
4284	/* Make sure VLAN Filters are off */
4285	rctl &= ~E1000_RCTL_VFE;
4286	/* Don't store bad packets */
4287	rctl &= ~E1000_RCTL_SBP;
4288
4289	/* Enable Receives */
4290	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4291
4292	/*
4293	 * Setup the HW Rx Head and Tail Descriptor Pointers
4294	 *   - needs to be after enable
4295	 */
4296	for (int i = 0; i < adapter->num_queues; i++) {
4297		rxr = &adapter->rx_rings[i];
4298		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4299		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4300	}
4301	return;
4302}
4303
4304/*********************************************************************
4305 *
4306 *  Free receive rings.
4307 *
4308 **********************************************************************/
4309static void
4310igb_free_receive_structures(struct adapter *adapter)
4311{
4312	struct rx_ring *rxr = adapter->rx_rings;
4313
4314	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4315		struct lro_ctrl	*lro = &rxr->lro;
4316		igb_free_receive_buffers(rxr);
4317		tcp_lro_free(lro);
4318		igb_dma_free(adapter, &rxr->rxdma);
4319	}
4320
4321	free(adapter->rx_rings, M_DEVBUF);
4322}
4323
4324/*********************************************************************
4325 *
4326 *  Free receive ring data structures.
4327 *
4328 **********************************************************************/
4329static void
4330igb_free_receive_buffers(struct rx_ring *rxr)
4331{
4332	struct adapter		*adapter = rxr->adapter;
4333	struct igb_rx_buf	*rxbuf;
4334	int i;
4335
4336	INIT_DEBUGOUT("free_receive_structures: begin");
4337
4338	/* Cleanup any existing buffers */
4339	if (rxr->rx_buffers != NULL) {
4340		for (i = 0; i < adapter->num_rx_desc; i++) {
4341			rxbuf = &rxr->rx_buffers[i];
4342			if (rxbuf->m_head != NULL) {
4343				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4344				    BUS_DMASYNC_POSTREAD);
4345				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4346				rxbuf->m_head->m_flags |= M_PKTHDR;
4347				m_freem(rxbuf->m_head);
4348			}
4349			if (rxbuf->m_pack != NULL) {
4350				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4351				    BUS_DMASYNC_POSTREAD);
4352				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4353				rxbuf->m_pack->m_flags |= M_PKTHDR;
4354				m_freem(rxbuf->m_pack);
4355			}
4356			rxbuf->m_head = NULL;
4357			rxbuf->m_pack = NULL;
4358			if (rxbuf->hmap != NULL) {
4359				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4360				rxbuf->hmap = NULL;
4361			}
4362			if (rxbuf->pmap != NULL) {
4363				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4364				rxbuf->pmap = NULL;
4365			}
4366		}
4367		if (rxr->rx_buffers != NULL) {
4368			free(rxr->rx_buffers, M_DEVBUF);
4369			rxr->rx_buffers = NULL;
4370		}
4371	}
4372
4373	if (rxr->htag != NULL) {
4374		bus_dma_tag_destroy(rxr->htag);
4375		rxr->htag = NULL;
4376	}
4377	if (rxr->ptag != NULL) {
4378		bus_dma_tag_destroy(rxr->ptag);
4379		rxr->ptag = NULL;
4380	}
4381}
4382
4383static __inline void
4384igb_rx_discard(struct rx_ring *rxr, int i)
4385{
4386	struct igb_rx_buf	*rbuf;
4387
4388	rbuf = &rxr->rx_buffers[i];
4389
4390	/* Partially received? Free the chain */
4391	if (rxr->fmp != NULL) {
4392		rxr->fmp->m_flags |= M_PKTHDR;
4393		m_freem(rxr->fmp);
4394		rxr->fmp = NULL;
4395		rxr->lmp = NULL;
4396	}
4397
4398	/*
4399	** With advanced descriptors the writeback
4400	** clobbers the buffer addrs, so its easier
4401	** to just free the existing mbufs and take
4402	** the normal refresh path to get new buffers
4403	** and mapping.
4404	*/
4405	if (rbuf->m_head) {
4406		m_free(rbuf->m_head);
4407		rbuf->m_head = NULL;
4408	}
4409
4410	if (rbuf->m_pack) {
4411		m_free(rbuf->m_pack);
4412		rbuf->m_pack = NULL;
4413	}
4414
4415	return;
4416}
4417
4418static __inline void
4419igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4420{
4421
4422	/*
4423	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4424	 * should be computed by hardware. Also it should not have VLAN tag in
4425	 * ethernet header.
4426	 */
4427	if (rxr->lro_enabled &&
4428	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4429	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4430	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4431	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4432	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4433	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4434		/*
4435		 * Send to the stack if:
4436		 **  - LRO not enabled, or
4437		 **  - no LRO resources, or
4438		 **  - lro enqueue fails
4439		 */
4440		if (rxr->lro.lro_cnt != 0)
4441			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4442				return;
4443	}
4444	IGB_RX_UNLOCK(rxr);
4445	(*ifp->if_input)(ifp, m);
4446	IGB_RX_LOCK(rxr);
4447}
4448
4449/*********************************************************************
4450 *
4451 *  This routine executes in interrupt context. It replenishes
4452 *  the mbufs in the descriptor and sends data which has been
4453 *  dma'ed into host memory to upper layer.
4454 *
4455 *  We loop at most count times if count is > 0, or until done if
4456 *  count < 0.
4457 *
4458 *  Return TRUE if more to clean, FALSE otherwise
4459 *********************************************************************/
4460static bool
4461igb_rxeof(struct igb_queue *que, int count, int *done)
4462{
4463	struct adapter		*adapter = que->adapter;
4464	struct rx_ring		*rxr = que->rxr;
4465	struct ifnet		*ifp = adapter->ifp;
4466	struct lro_ctrl		*lro = &rxr->lro;
4467	struct lro_entry	*queued;
4468	int			i, processed = 0, rxdone = 0;
4469	u32			ptype, staterr = 0;
4470	union e1000_adv_rx_desc	*cur;
4471
4472	IGB_RX_LOCK(rxr);
4473	/* Sync the ring. */
4474	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4475	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4476
4477	/* Main clean loop */
4478	for (i = rxr->next_to_check; count != 0;) {
4479		struct mbuf		*sendmp, *mh, *mp;
4480		struct igb_rx_buf	*rxbuf;
4481		u16			hlen, plen, hdr, vtag;
4482		bool			eop = FALSE;
4483
4484		cur = &rxr->rx_base[i];
4485		staterr = le32toh(cur->wb.upper.status_error);
4486		if ((staterr & E1000_RXD_STAT_DD) == 0)
4487			break;
4488		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4489			break;
4490		count--;
4491		sendmp = mh = mp = NULL;
4492		cur->wb.upper.status_error = 0;
4493		rxbuf = &rxr->rx_buffers[i];
4494		plen = le16toh(cur->wb.upper.length);
4495		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4496		if ((adapter->hw.mac.type == e1000_i350) &&
4497		    (staterr & E1000_RXDEXT_STATERR_LB))
4498			vtag = be16toh(cur->wb.upper.vlan);
4499		else
4500			vtag = le16toh(cur->wb.upper.vlan);
4501		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4502		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4503
4504		/* Make sure all segments of a bad packet are discarded */
4505		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4506		    (rxr->discard)) {
4507			ifp->if_ierrors++;
4508			++rxr->rx_discarded;
4509			if (!eop) /* Catch subsequent segs */
4510				rxr->discard = TRUE;
4511			else
4512				rxr->discard = FALSE;
4513			igb_rx_discard(rxr, i);
4514			goto next_desc;
4515		}
4516
4517		/*
4518		** The way the hardware is configured to
4519		** split, it will ONLY use the header buffer
4520		** when header split is enabled, otherwise we
4521		** get normal behavior, ie, both header and
4522		** payload are DMA'd into the payload buffer.
4523		**
4524		** The fmp test is to catch the case where a
4525		** packet spans multiple descriptors, in that
4526		** case only the first header is valid.
4527		*/
4528		if (rxr->hdr_split && rxr->fmp == NULL) {
4529			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4530			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4531			if (hlen > IGB_HDR_BUF)
4532				hlen = IGB_HDR_BUF;
4533			mh = rxr->rx_buffers[i].m_head;
4534			mh->m_len = hlen;
4535			/* clear buf pointer for refresh */
4536			rxbuf->m_head = NULL;
4537			/*
4538			** Get the payload length, this
4539			** could be zero if its a small
4540			** packet.
4541			*/
4542			if (plen > 0) {
4543				mp = rxr->rx_buffers[i].m_pack;
4544				mp->m_len = plen;
4545				mh->m_next = mp;
4546				/* clear buf pointer */
4547				rxbuf->m_pack = NULL;
4548				rxr->rx_split_packets++;
4549			}
4550		} else {
4551			/*
4552			** Either no header split, or a
4553			** secondary piece of a fragmented
4554			** split packet.
4555			*/
4556			mh = rxr->rx_buffers[i].m_pack;
4557			mh->m_len = plen;
4558			/* clear buf info for refresh */
4559			rxbuf->m_pack = NULL;
4560		}
4561
4562		++processed; /* So we know when to refresh */
4563
4564		/* Initial frame - setup */
4565		if (rxr->fmp == NULL) {
4566			mh->m_pkthdr.len = mh->m_len;
4567			/* Save the head of the chain */
4568			rxr->fmp = mh;
4569			rxr->lmp = mh;
4570			if (mp != NULL) {
4571				/* Add payload if split */
4572				mh->m_pkthdr.len += mp->m_len;
4573				rxr->lmp = mh->m_next;
4574			}
4575		} else {
4576			/* Chain mbuf's together */
4577			rxr->lmp->m_next = mh;
4578			rxr->lmp = rxr->lmp->m_next;
4579			rxr->fmp->m_pkthdr.len += mh->m_len;
4580		}
4581
4582		if (eop) {
4583			rxr->fmp->m_pkthdr.rcvif = ifp;
4584			ifp->if_ipackets++;
4585			rxr->rx_packets++;
4586			/* capture data for AIM */
4587			rxr->packets++;
4588			rxr->bytes += rxr->fmp->m_pkthdr.len;
4589			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4590
4591			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4592				igb_rx_checksum(staterr, rxr->fmp, ptype);
4593
4594			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4595			    (staterr & E1000_RXD_STAT_VP) != 0) {
4596				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4597				rxr->fmp->m_flags |= M_VLANTAG;
4598			}
4599#if __FreeBSD_version >= 800000
4600			rxr->fmp->m_pkthdr.flowid = que->msix;
4601			rxr->fmp->m_flags |= M_FLOWID;
4602#endif
4603			sendmp = rxr->fmp;
4604			/* Make sure to set M_PKTHDR. */
4605			sendmp->m_flags |= M_PKTHDR;
4606			rxr->fmp = NULL;
4607			rxr->lmp = NULL;
4608		}
4609
4610next_desc:
4611		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4612		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4613
4614		/* Advance our pointers to the next descriptor. */
4615		if (++i == adapter->num_rx_desc)
4616			i = 0;
4617		/*
4618		** Send to the stack or LRO
4619		*/
4620		if (sendmp != NULL) {
4621			rxr->next_to_check = i;
4622			igb_rx_input(rxr, ifp, sendmp, ptype);
4623			i = rxr->next_to_check;
4624			rxdone++;
4625		}
4626
4627		/* Every 8 descriptors we go to refresh mbufs */
4628		if (processed == 8) {
4629                        igb_refresh_mbufs(rxr, i);
4630                        processed = 0;
4631		}
4632	}
4633
4634	/* Catch any remainders */
4635	if (igb_rx_unrefreshed(rxr))
4636		igb_refresh_mbufs(rxr, i);
4637
4638	rxr->next_to_check = i;
4639
4640	/*
4641	 * Flush any outstanding LRO work
4642	 */
4643	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4644		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4645		tcp_lro_flush(lro, queued);
4646	}
4647
4648	if (done != NULL)
4649		*done = rxdone;
4650
4651	IGB_RX_UNLOCK(rxr);
4652	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4653}
4654
4655/*********************************************************************
4656 *
4657 *  Verify that the hardware indicated that the checksum is valid.
4658 *  Inform the stack about the status of checksum so that stack
4659 *  doesn't spend time verifying the checksum.
4660 *
4661 *********************************************************************/
4662static void
4663igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4664{
4665	u16 status = (u16)staterr;
4666	u8  errors = (u8) (staterr >> 24);
4667	int sctp;
4668
4669	/* Ignore Checksum bit is set */
4670	if (status & E1000_RXD_STAT_IXSM) {
4671		mp->m_pkthdr.csum_flags = 0;
4672		return;
4673	}
4674
4675	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4676	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4677		sctp = 1;
4678	else
4679		sctp = 0;
4680	if (status & E1000_RXD_STAT_IPCS) {
4681		/* Did it pass? */
4682		if (!(errors & E1000_RXD_ERR_IPE)) {
4683			/* IP Checksum Good */
4684			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4685			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4686		} else
4687			mp->m_pkthdr.csum_flags = 0;
4688	}
4689
4690	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4691		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4692#if __FreeBSD_version >= 800000
4693		if (sctp) /* reassign */
4694			type = CSUM_SCTP_VALID;
4695#endif
4696		/* Did it pass? */
4697		if (!(errors & E1000_RXD_ERR_TCPE)) {
4698			mp->m_pkthdr.csum_flags |= type;
4699			if (sctp == 0)
4700				mp->m_pkthdr.csum_data = htons(0xffff);
4701		}
4702	}
4703	return;
4704}
4705
4706/*
4707 * This routine is run via an vlan
4708 * config EVENT
4709 */
4710static void
4711igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4712{
4713	struct adapter	*adapter = ifp->if_softc;
4714	u32		index, bit;
4715
4716	if (ifp->if_softc !=  arg)   /* Not our event */
4717		return;
4718
4719	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4720                return;
4721
4722	IGB_CORE_LOCK(adapter);
4723	index = (vtag >> 5) & 0x7F;
4724	bit = vtag & 0x1F;
4725	adapter->shadow_vfta[index] |= (1 << bit);
4726	++adapter->num_vlans;
4727	/* Change hw filter setting */
4728	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4729		igb_setup_vlan_hw_support(adapter);
4730	IGB_CORE_UNLOCK(adapter);
4731}
4732
4733/*
4734 * This routine is run via an vlan
4735 * unconfig EVENT
4736 */
4737static void
4738igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4739{
4740	struct adapter	*adapter = ifp->if_softc;
4741	u32		index, bit;
4742
4743	if (ifp->if_softc !=  arg)
4744		return;
4745
4746	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4747                return;
4748
4749	IGB_CORE_LOCK(adapter);
4750	index = (vtag >> 5) & 0x7F;
4751	bit = vtag & 0x1F;
4752	adapter->shadow_vfta[index] &= ~(1 << bit);
4753	--adapter->num_vlans;
4754	/* Change hw filter setting */
4755	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4756		igb_setup_vlan_hw_support(adapter);
4757	IGB_CORE_UNLOCK(adapter);
4758}
4759
4760static void
4761igb_setup_vlan_hw_support(struct adapter *adapter)
4762{
4763	struct e1000_hw *hw = &adapter->hw;
4764	struct ifnet	*ifp = adapter->ifp;
4765	u32             reg;
4766
4767	if (adapter->vf_ifp) {
4768		e1000_rlpml_set_vf(hw,
4769		    adapter->max_frame_size + VLAN_TAG_SIZE);
4770		return;
4771	}
4772
4773	reg = E1000_READ_REG(hw, E1000_CTRL);
4774	reg |= E1000_CTRL_VME;
4775	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4776
4777	/* Enable the Filter Table */
4778	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
4779		reg = E1000_READ_REG(hw, E1000_RCTL);
4780		reg &= ~E1000_RCTL_CFIEN;
4781		reg |= E1000_RCTL_VFE;
4782		E1000_WRITE_REG(hw, E1000_RCTL, reg);
4783	}
4784
4785	/* Update the frame size */
4786	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4787	    adapter->max_frame_size + VLAN_TAG_SIZE);
4788
4789	/* Don't bother with table if no vlans */
4790	if ((adapter->num_vlans == 0) ||
4791	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
4792                return;
4793	/*
4794	** A soft reset zero's out the VFTA, so
4795	** we need to repopulate it now.
4796	*/
4797	for (int i = 0; i < IGB_VFTA_SIZE; i++)
4798                if (adapter->shadow_vfta[i] != 0) {
4799			if (adapter->vf_ifp)
4800				e1000_vfta_set_vf(hw,
4801				    adapter->shadow_vfta[i], TRUE);
4802			else
4803				e1000_write_vfta(hw,
4804				    i, adapter->shadow_vfta[i]);
4805		}
4806}
4807
4808static void
4809igb_enable_intr(struct adapter *adapter)
4810{
4811	/* With RSS set up what to auto clear */
4812	if (adapter->msix_mem) {
4813		u32 mask = (adapter->que_mask | adapter->link_mask);
4814		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
4815		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
4816		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
4817		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4818		    E1000_IMS_LSC);
4819	} else {
4820		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4821		    IMS_ENABLE_MASK);
4822	}
4823	E1000_WRITE_FLUSH(&adapter->hw);
4824
4825	return;
4826}
4827
4828static void
4829igb_disable_intr(struct adapter *adapter)
4830{
4831	if (adapter->msix_mem) {
4832		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4833		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4834	}
4835	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4836	E1000_WRITE_FLUSH(&adapter->hw);
4837	return;
4838}
4839
4840/*
4841 * Bit of a misnomer, what this really means is
4842 * to enable OS management of the system... aka
4843 * to disable special hardware management features
4844 */
4845static void
4846igb_init_manageability(struct adapter *adapter)
4847{
4848	if (adapter->has_manage) {
4849		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4850		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4851
4852		/* disable hardware interception of ARP */
4853		manc &= ~(E1000_MANC_ARP_EN);
4854
4855                /* enable receiving management packets to the host */
4856		manc |= E1000_MANC_EN_MNG2HOST;
4857		manc2h |= 1 << 5;  /* Mng Port 623 */
4858		manc2h |= 1 << 6;  /* Mng Port 664 */
4859		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4860		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4861	}
4862}
4863
4864/*
4865 * Give control back to hardware management
4866 * controller if there is one.
4867 */
4868static void
4869igb_release_manageability(struct adapter *adapter)
4870{
4871	if (adapter->has_manage) {
4872		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4873
4874		/* re-enable hardware interception of ARP */
4875		manc |= E1000_MANC_ARP_EN;
4876		manc &= ~E1000_MANC_EN_MNG2HOST;
4877
4878		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4879	}
4880}
4881
4882/*
4883 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4884 * For ASF and Pass Through versions of f/w this means that
4885 * the driver is loaded.
4886 *
4887 */
4888static void
4889igb_get_hw_control(struct adapter *adapter)
4890{
4891	u32 ctrl_ext;
4892
4893	if (adapter->vf_ifp)
4894		return;
4895
4896	/* Let firmware know the driver has taken over */
4897	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4898	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4899	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4900}
4901
4902/*
4903 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4904 * For ASF and Pass Through versions of f/w this means that the
4905 * driver is no longer loaded.
4906 *
4907 */
4908static void
4909igb_release_hw_control(struct adapter *adapter)
4910{
4911	u32 ctrl_ext;
4912
4913	if (adapter->vf_ifp)
4914		return;
4915
4916	/* Let firmware taken over control of h/w */
4917	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4918	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4919	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4920}
4921
4922static int
4923igb_is_valid_ether_addr(uint8_t *addr)
4924{
4925	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4926
4927	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4928		return (FALSE);
4929	}
4930
4931	return (TRUE);
4932}
4933
4934
4935/*
4936 * Enable PCI Wake On Lan capability
4937 */
4938static void
4939igb_enable_wakeup(device_t dev)
4940{
4941	u16     cap, status;
4942	u8      id;
4943
4944	/* First find the capabilities pointer*/
4945	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4946	/* Read the PM Capabilities */
4947	id = pci_read_config(dev, cap, 1);
4948	if (id != PCIY_PMG)     /* Something wrong */
4949		return;
4950	/* OK, we have the power capabilities, so
4951	   now get the status register */
4952	cap += PCIR_POWER_STATUS;
4953	status = pci_read_config(dev, cap, 2);
4954	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4955	pci_write_config(dev, cap, status, 2);
4956	return;
4957}
4958
4959static void
4960igb_led_func(void *arg, int onoff)
4961{
4962	struct adapter	*adapter = arg;
4963
4964	IGB_CORE_LOCK(adapter);
4965	if (onoff) {
4966		e1000_setup_led(&adapter->hw);
4967		e1000_led_on(&adapter->hw);
4968	} else {
4969		e1000_led_off(&adapter->hw);
4970		e1000_cleanup_led(&adapter->hw);
4971	}
4972	IGB_CORE_UNLOCK(adapter);
4973}
4974
4975/**********************************************************************
4976 *
4977 *  Update the board statistics counters.
4978 *
4979 **********************************************************************/
4980static void
4981igb_update_stats_counters(struct adapter *adapter)
4982{
4983	struct ifnet		*ifp;
4984        struct e1000_hw		*hw = &adapter->hw;
4985	struct e1000_hw_stats	*stats;
4986
4987	/*
4988	** The virtual function adapter has only a
4989	** small controlled set of stats, do only
4990	** those and return.
4991	*/
4992	if (adapter->vf_ifp) {
4993		igb_update_vf_stats_counters(adapter);
4994		return;
4995	}
4996
4997	stats = (struct e1000_hw_stats	*)adapter->stats;
4998
4999	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5000	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5001		stats->symerrs +=
5002		    E1000_READ_REG(hw,E1000_SYMERRS);
5003		stats->sec += E1000_READ_REG(hw, E1000_SEC);
5004	}
5005
5006	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5007	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5008	stats->scc += E1000_READ_REG(hw, E1000_SCC);
5009	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5010
5011	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5012	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5013	stats->colc += E1000_READ_REG(hw, E1000_COLC);
5014	stats->dc += E1000_READ_REG(hw, E1000_DC);
5015	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5016	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5017	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5018	/*
5019	** For watchdog management we need to know if we have been
5020	** paused during the last interval, so capture that here.
5021	*/
5022        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5023        stats->xoffrxc += adapter->pause_frames;
5024	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5025	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5026	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5027	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5028	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5029	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5030	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5031	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5032	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5033	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5034	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5035	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5036
5037	/* For the 64-bit byte counters the low dword must be read first. */
5038	/* Both registers clear on the read of the high dword */
5039
5040	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5041	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5042	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5043	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5044
5045	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5046	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5047	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5048	stats->roc += E1000_READ_REG(hw, E1000_ROC);
5049	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5050
5051	stats->tor += E1000_READ_REG(hw, E1000_TORH);
5052	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5053
5054	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5055	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5056	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5057	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5058	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5059	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5060	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5061	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5062	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5063	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5064
5065	/* Interrupt Counts */
5066
5067	stats->iac += E1000_READ_REG(hw, E1000_IAC);
5068	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5069	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5070	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5071	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5072	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5073	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5074	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5075	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5076
5077	/* Host to Card Statistics */
5078
5079	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5080	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5081	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5082	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5083	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5084	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5085	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5086	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5087	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5088	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5089	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5090	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5091	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5092	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5093
5094	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5095	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5096	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5097	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5098	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5099	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5100
5101	ifp = adapter->ifp;
5102	ifp->if_collisions = stats->colc;
5103
5104	/* Rx Errors */
5105	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5106	    stats->crcerrs + stats->algnerrc +
5107	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5108
5109	/* Tx Errors */
5110	ifp->if_oerrors = stats->ecol +
5111	    stats->latecol + adapter->watchdog_events;
5112
5113	/* Driver specific counters */
5114	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5115	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5116	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5117	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5118	adapter->packet_buf_alloc_tx =
5119	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5120	adapter->packet_buf_alloc_rx =
5121	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5122}
5123
5124
5125/**********************************************************************
5126 *
5127 *  Initialize the VF board statistics counters.
5128 *
5129 **********************************************************************/
5130static void
5131igb_vf_init_stats(struct adapter *adapter)
5132{
5133        struct e1000_hw *hw = &adapter->hw;
5134	struct e1000_vf_stats	*stats;
5135
5136	stats = (struct e1000_vf_stats	*)adapter->stats;
5137	if (stats == NULL)
5138		return;
5139        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5140        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5141        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5142        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5143        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5144}
5145
5146/**********************************************************************
5147 *
5148 *  Update the VF board statistics counters.
5149 *
5150 **********************************************************************/
5151static void
5152igb_update_vf_stats_counters(struct adapter *adapter)
5153{
5154	struct e1000_hw *hw = &adapter->hw;
5155	struct e1000_vf_stats	*stats;
5156
5157	if (adapter->link_speed == 0)
5158		return;
5159
5160	stats = (struct e1000_vf_stats	*)adapter->stats;
5161
5162	UPDATE_VF_REG(E1000_VFGPRC,
5163	    stats->last_gprc, stats->gprc);
5164	UPDATE_VF_REG(E1000_VFGORC,
5165	    stats->last_gorc, stats->gorc);
5166	UPDATE_VF_REG(E1000_VFGPTC,
5167	    stats->last_gptc, stats->gptc);
5168	UPDATE_VF_REG(E1000_VFGOTC,
5169	    stats->last_gotc, stats->gotc);
5170	UPDATE_VF_REG(E1000_VFMPRC,
5171	    stats->last_mprc, stats->mprc);
5172}
5173
5174/* Export a single 32-bit register via a read-only sysctl. */
5175static int
5176igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5177{
5178	struct adapter *adapter;
5179	u_int val;
5180
5181	adapter = oidp->oid_arg1;
5182	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5183	return (sysctl_handle_int(oidp, &val, 0, req));
5184}
5185
5186/*
5187**  Tuneable interrupt rate handler
5188*/
5189static int
5190igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5191{
5192	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5193	int			error;
5194	u32			reg, usec, rate;
5195
5196	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5197	usec = ((reg & 0x7FFC) >> 2);
5198	if (usec > 0)
5199		rate = 1000000 / usec;
5200	else
5201		rate = 0;
5202	error = sysctl_handle_int(oidp, &rate, 0, req);
5203	if (error || !req->newptr)
5204		return error;
5205	return 0;
5206}
5207
5208/*
5209 * Add sysctl variables, one per statistic, to the system.
5210 */
5211static void
5212igb_add_hw_stats(struct adapter *adapter)
5213{
5214	device_t dev = adapter->dev;
5215
5216	struct tx_ring *txr = adapter->tx_rings;
5217	struct rx_ring *rxr = adapter->rx_rings;
5218
5219	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5220	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5221	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5222	struct e1000_hw_stats *stats = adapter->stats;
5223
5224	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5225	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5226
5227#define QUEUE_NAME_LEN 32
5228	char namebuf[QUEUE_NAME_LEN];
5229
5230	/* Driver Statistics */
5231	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
5232			CTLFLAG_RD, &adapter->link_irq, 0,
5233			"Link MSIX IRQ Handled");
5234	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5235			CTLFLAG_RD, &adapter->dropped_pkts,
5236			"Driver dropped packets");
5237	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5238			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5239			"Driver tx dma failure in xmit");
5240	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5241			CTLFLAG_RD, &adapter->rx_overruns,
5242			"RX overruns");
5243	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5244			CTLFLAG_RD, &adapter->watchdog_events,
5245			"Watchdog timeouts");
5246
5247	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5248			CTLFLAG_RD, &adapter->device_control,
5249			"Device Control Register");
5250	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5251			CTLFLAG_RD, &adapter->rx_control,
5252			"Receiver Control Register");
5253	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5254			CTLFLAG_RD, &adapter->int_mask,
5255			"Interrupt Mask");
5256	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5257			CTLFLAG_RD, &adapter->eint_mask,
5258			"Extended Interrupt Mask");
5259	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5260			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5261			"Transmit Buffer Packet Allocation");
5262	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5263			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5264			"Receive Buffer Packet Allocation");
5265	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5266			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5267			"Flow Control High Watermark");
5268	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5269			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5270			"Flow Control Low Watermark");
5271
5272	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5273		struct lro_ctrl *lro = &rxr->lro;
5274
5275		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5276		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5277					    CTLFLAG_RD, NULL, "Queue Name");
5278		queue_list = SYSCTL_CHILDREN(queue_node);
5279
5280		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5281				CTLFLAG_RD, &adapter->queues[i],
5282				sizeof(&adapter->queues[i]),
5283				igb_sysctl_interrupt_rate_handler,
5284				"IU", "Interrupt Rate");
5285
5286		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5287				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5288				igb_sysctl_reg_handler, "IU",
5289 				"Transmit Descriptor Head");
5290		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5291				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5292				igb_sysctl_reg_handler, "IU",
5293 				"Transmit Descriptor Tail");
5294		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5295				CTLFLAG_RD, &txr->no_desc_avail,
5296				"Queue No Descriptor Available");
5297		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5298				CTLFLAG_RD, &txr->tx_packets,
5299				"Queue Packets Transmitted");
5300
5301		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5302				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5303				igb_sysctl_reg_handler, "IU",
5304				"Receive Descriptor Head");
5305		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5306				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5307				igb_sysctl_reg_handler, "IU",
5308				"Receive Descriptor Tail");
5309		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5310				CTLFLAG_RD, &rxr->rx_packets,
5311				"Queue Packets Received");
5312		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5313				CTLFLAG_RD, &rxr->rx_bytes,
5314				"Queue Bytes Received");
5315		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5316				CTLFLAG_RD, &lro->lro_queued, 0,
5317				"LRO Queued");
5318		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5319				CTLFLAG_RD, &lro->lro_flushed, 0,
5320				"LRO Flushed");
5321	}
5322
5323	/* MAC stats get their own sub node */
5324
5325	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5326				    CTLFLAG_RD, NULL, "MAC Statistics");
5327	stat_list = SYSCTL_CHILDREN(stat_node);
5328
5329	/*
5330	** VF adapter has a very limited set of stats
5331	** since its not managing the metal, so to speak.
5332	*/
5333	if (adapter->vf_ifp) {
5334	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5335			CTLFLAG_RD, &stats->gprc,
5336			"Good Packets Received");
5337	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5338			CTLFLAG_RD, &stats->gptc,
5339			"Good Packets Transmitted");
5340 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5341 			CTLFLAG_RD, &stats->gorc,
5342 			"Good Octets Received");
5343 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5344 			CTLFLAG_RD, &stats->gotc,
5345 			"Good Octets Transmitted");
5346	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5347			CTLFLAG_RD, &stats->mprc,
5348			"Multicast Packets Received");
5349		return;
5350	}
5351
5352	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5353			CTLFLAG_RD, &stats->ecol,
5354			"Excessive collisions");
5355	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5356			CTLFLAG_RD, &stats->scc,
5357			"Single collisions");
5358	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5359			CTLFLAG_RD, &stats->mcc,
5360			"Multiple collisions");
5361	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5362			CTLFLAG_RD, &stats->latecol,
5363			"Late collisions");
5364	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5365			CTLFLAG_RD, &stats->colc,
5366			"Collision Count");
5367	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5368			CTLFLAG_RD, &stats->symerrs,
5369			"Symbol Errors");
5370	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5371			CTLFLAG_RD, &stats->sec,
5372			"Sequence Errors");
5373	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5374			CTLFLAG_RD, &stats->dc,
5375			"Defer Count");
5376	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5377			CTLFLAG_RD, &stats->mpc,
5378			"Missed Packets");
5379	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5380			CTLFLAG_RD, &stats->rnbc,
5381			"Receive No Buffers");
5382	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5383			CTLFLAG_RD, &stats->ruc,
5384			"Receive Undersize");
5385	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5386			CTLFLAG_RD, &stats->rfc,
5387			"Fragmented Packets Received ");
5388	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5389			CTLFLAG_RD, &stats->roc,
5390			"Oversized Packets Received");
5391	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5392			CTLFLAG_RD, &stats->rjc,
5393			"Recevied Jabber");
5394	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5395			CTLFLAG_RD, &stats->rxerrc,
5396			"Receive Errors");
5397	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5398			CTLFLAG_RD, &stats->crcerrs,
5399			"CRC errors");
5400	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5401			CTLFLAG_RD, &stats->algnerrc,
5402			"Alignment Errors");
5403	/* On 82575 these are collision counts */
5404	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5405			CTLFLAG_RD, &stats->cexterr,
5406			"Collision/Carrier extension errors");
5407	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5408			CTLFLAG_RD, &stats->xonrxc,
5409			"XON Received");
5410	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5411			CTLFLAG_RD, &stats->xontxc,
5412			"XON Transmitted");
5413	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5414			CTLFLAG_RD, &stats->xoffrxc,
5415			"XOFF Received");
5416	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5417			CTLFLAG_RD, &stats->xofftxc,
5418			"XOFF Transmitted");
5419	/* Packet Reception Stats */
5420	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5421			CTLFLAG_RD, &stats->tpr,
5422			"Total Packets Received ");
5423	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5424			CTLFLAG_RD, &stats->gprc,
5425			"Good Packets Received");
5426	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5427			CTLFLAG_RD, &stats->bprc,
5428			"Broadcast Packets Received");
5429	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5430			CTLFLAG_RD, &stats->mprc,
5431			"Multicast Packets Received");
5432	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5433			CTLFLAG_RD, &stats->prc64,
5434			"64 byte frames received ");
5435	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5436			CTLFLAG_RD, &stats->prc127,
5437			"65-127 byte frames received");
5438	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5439			CTLFLAG_RD, &stats->prc255,
5440			"128-255 byte frames received");
5441	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5442			CTLFLAG_RD, &stats->prc511,
5443			"256-511 byte frames received");
5444	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5445			CTLFLAG_RD, &stats->prc1023,
5446			"512-1023 byte frames received");
5447	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5448			CTLFLAG_RD, &stats->prc1522,
5449			"1023-1522 byte frames received");
5450 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5451 			CTLFLAG_RD, &stats->gorc,
5452 			"Good Octets Received");
5453
5454	/* Packet Transmission Stats */
5455 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5456 			CTLFLAG_RD, &stats->gotc,
5457 			"Good Octets Transmitted");
5458	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5459			CTLFLAG_RD, &stats->tpt,
5460			"Total Packets Transmitted");
5461	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5462			CTLFLAG_RD, &stats->gptc,
5463			"Good Packets Transmitted");
5464	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5465			CTLFLAG_RD, &stats->bptc,
5466			"Broadcast Packets Transmitted");
5467	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5468			CTLFLAG_RD, &stats->mptc,
5469			"Multicast Packets Transmitted");
5470	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5471			CTLFLAG_RD, &stats->ptc64,
5472			"64 byte frames transmitted ");
5473	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5474			CTLFLAG_RD, &stats->ptc127,
5475			"65-127 byte frames transmitted");
5476	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5477			CTLFLAG_RD, &stats->ptc255,
5478			"128-255 byte frames transmitted");
5479	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5480			CTLFLAG_RD, &stats->ptc511,
5481			"256-511 byte frames transmitted");
5482	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5483			CTLFLAG_RD, &stats->ptc1023,
5484			"512-1023 byte frames transmitted");
5485	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5486			CTLFLAG_RD, &stats->ptc1522,
5487			"1024-1522 byte frames transmitted");
5488	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5489			CTLFLAG_RD, &stats->tsctc,
5490			"TSO Contexts Transmitted");
5491	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5492			CTLFLAG_RD, &stats->tsctfc,
5493			"TSO Contexts Failed");
5494
5495
5496	/* Interrupt Stats */
5497
5498	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5499				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5500	int_list = SYSCTL_CHILDREN(int_node);
5501
5502	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5503			CTLFLAG_RD, &stats->iac,
5504			"Interrupt Assertion Count");
5505
5506	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5507			CTLFLAG_RD, &stats->icrxptc,
5508			"Interrupt Cause Rx Pkt Timer Expire Count");
5509
5510	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5511			CTLFLAG_RD, &stats->icrxatc,
5512			"Interrupt Cause Rx Abs Timer Expire Count");
5513
5514	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5515			CTLFLAG_RD, &stats->ictxptc,
5516			"Interrupt Cause Tx Pkt Timer Expire Count");
5517
5518	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5519			CTLFLAG_RD, &stats->ictxatc,
5520			"Interrupt Cause Tx Abs Timer Expire Count");
5521
5522	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5523			CTLFLAG_RD, &stats->ictxqec,
5524			"Interrupt Cause Tx Queue Empty Count");
5525
5526	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5527			CTLFLAG_RD, &stats->ictxqmtc,
5528			"Interrupt Cause Tx Queue Min Thresh Count");
5529
5530	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5531			CTLFLAG_RD, &stats->icrxdmtc,
5532			"Interrupt Cause Rx Desc Min Thresh Count");
5533
5534	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5535			CTLFLAG_RD, &stats->icrxoc,
5536			"Interrupt Cause Receiver Overrun Count");
5537
5538	/* Host to Card Stats */
5539
5540	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5541				    CTLFLAG_RD, NULL,
5542				    "Host to Card Statistics");
5543
5544	host_list = SYSCTL_CHILDREN(host_node);
5545
5546	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5547			CTLFLAG_RD, &stats->cbtmpc,
5548			"Circuit Breaker Tx Packet Count");
5549
5550	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5551			CTLFLAG_RD, &stats->htdpmc,
5552			"Host Transmit Discarded Packets");
5553
5554	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5555			CTLFLAG_RD, &stats->rpthc,
5556			"Rx Packets To Host");
5557
5558	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5559			CTLFLAG_RD, &stats->cbrmpc,
5560			"Circuit Breaker Rx Packet Count");
5561
5562	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5563			CTLFLAG_RD, &stats->cbrdpc,
5564			"Circuit Breaker Rx Dropped Count");
5565
5566	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5567			CTLFLAG_RD, &stats->hgptc,
5568			"Host Good Packets Tx Count");
5569
5570	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5571			CTLFLAG_RD, &stats->htcbdpc,
5572			"Host Tx Circuit Breaker Dropped Count");
5573
5574	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5575			CTLFLAG_RD, &stats->hgorc,
5576			"Host Good Octets Received Count");
5577
5578	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5579			CTLFLAG_RD, &stats->hgotc,
5580			"Host Good Octets Transmit Count");
5581
5582	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5583			CTLFLAG_RD, &stats->lenerrs,
5584			"Length Errors");
5585
5586	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5587			CTLFLAG_RD, &stats->scvpc,
5588			"SerDes/SGMII Code Violation Pkt Count");
5589
5590	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5591			CTLFLAG_RD, &stats->hrmpc,
5592			"Header Redirection Missed Packet Count");
5593}
5594
5595
5596/**********************************************************************
5597 *
5598 *  This routine provides a way to dump out the adapter eeprom,
5599 *  often a useful debug/service tool. This only dumps the first
5600 *  32 words, stuff that matters is in that extent.
5601 *
5602 **********************************************************************/
5603static int
5604igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5605{
5606	struct adapter *adapter;
5607	int error;
5608	int result;
5609
5610	result = -1;
5611	error = sysctl_handle_int(oidp, &result, 0, req);
5612
5613	if (error || !req->newptr)
5614		return (error);
5615
5616	/*
5617	 * This value will cause a hex dump of the
5618	 * first 32 16-bit words of the EEPROM to
5619	 * the screen.
5620	 */
5621	if (result == 1) {
5622		adapter = (struct adapter *)arg1;
5623		igb_print_nvm_info(adapter);
5624        }
5625
5626	return (error);
5627}
5628
5629static void
5630igb_print_nvm_info(struct adapter *adapter)
5631{
5632	u16	eeprom_data;
5633	int	i, j, row = 0;
5634
5635	/* Its a bit crude, but it gets the job done */
5636	printf("\nInterface EEPROM Dump:\n");
5637	printf("Offset\n0x0000  ");
5638	for (i = 0, j = 0; i < 32; i++, j++) {
5639		if (j == 8) { /* Make the offset block */
5640			j = 0; ++row;
5641			printf("\n0x00%x0  ",row);
5642		}
5643		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5644		printf("%04x ", eeprom_data);
5645	}
5646	printf("\n");
5647}
5648
5649static void
5650igb_set_sysctl_value(struct adapter *adapter, const char *name,
5651	const char *description, int *limit, int value)
5652{
5653	*limit = value;
5654	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5655	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5656	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5657}
5658
5659/*
5660** Set flow control using sysctl:
5661** Flow control values:
5662** 	0 - off
5663**	1 - rx pause
5664**	2 - tx pause
5665**	3 - full
5666*/
5667static int
5668igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
5669{
5670	int		error;
5671	static int	input = 3; /* default is full */
5672	struct adapter	*adapter = (struct adapter *) arg1;
5673
5674	error = sysctl_handle_int(oidp, &input, 0, req);
5675
5676	if ((error) || (req->newptr == NULL))
5677		return (error);
5678
5679	switch (input) {
5680		case e1000_fc_rx_pause:
5681		case e1000_fc_tx_pause:
5682		case e1000_fc_full:
5683		case e1000_fc_none:
5684			adapter->hw.fc.requested_mode = input;
5685			adapter->fc = input;
5686			break;
5687		default:
5688			/* Do nothing */
5689			return (error);
5690	}
5691
5692	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5693	e1000_force_mac_fc(&adapter->hw);
5694	return (error);
5695}
5696
5697/*
5698** Manage DMA Coalesce:
5699** Control values:
5700** 	0/1 - off/on
5701**	Legal timer values are:
5702**	250,500,1000-10000 in thousands
5703*/
5704static int
5705igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
5706{
5707	struct adapter *adapter = (struct adapter *) arg1;
5708	int		error;
5709
5710	error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
5711
5712	if ((error) || (req->newptr == NULL))
5713		return (error);
5714
5715	switch (adapter->dmac) {
5716		case 0:
5717			/*Disabling */
5718			break;
5719		case 1: /* Just enable and use default */
5720			adapter->dmac = 1000;
5721			break;
5722		case 250:
5723		case 500:
5724		case 1000:
5725		case 2000:
5726		case 3000:
5727		case 4000:
5728		case 5000:
5729		case 6000:
5730		case 7000:
5731		case 8000:
5732		case 9000:
5733		case 10000:
5734			/* Legal values - allow */
5735			break;
5736		default:
5737			/* Do nothing, illegal value */
5738			adapter->dmac = 0;
5739			return (error);
5740	}
5741	/* Reinit the interface */
5742	igb_init(adapter);
5743	return (error);
5744}
5745