if_igb.c revision 208103
1196200Sscottl/******************************************************************************
2196200Sscottl
3196200Sscottl  Copyright (c) 2001-2010, Intel Corporation
4196200Sscottl  All rights reserved.
5196200Sscottl
6196200Sscottl  Redistribution and use in source and binary forms, with or without
7196200Sscottl  modification, are permitted provided that the following conditions are met:
8196200Sscottl
9196200Sscottl   1. Redistributions of source code must retain the above copyright notice,
10196200Sscottl      this list of conditions and the following disclaimer.
11196200Sscottl
12196200Sscottl   2. Redistributions in binary form must reproduce the above copyright
13196200Sscottl      notice, this list of conditions and the following disclaimer in the
14196200Sscottl      documentation and/or other materials provided with the distribution.
15196200Sscottl
16196200Sscottl   3. Neither the name of the Intel Corporation nor the names of its
17196200Sscottl      contributors may be used to endorse or promote products derived from
18196200Sscottl      this software without specific prior written permission.
19196200Sscottl
20196200Sscottl  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21196200Sscottl  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22196200Sscottl  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23196200Sscottl  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24196200Sscottl  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25196200Sscottl  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26196200Sscottl  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27196200Sscottl  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28196200Sscottl  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29196200Sscottl  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30196200Sscottl  POSSIBILITY OF SUCH DAMAGE.
31196200Sscottl
32196200Sscottl******************************************************************************/
33196200Sscottl/*$FreeBSD: head/sys/dev/e1000/if_igb.c 208103 2010-05-14 22:18:34Z jfv $*/
34196200Sscottl
35196200Sscottl
36196200Sscottl#ifdef HAVE_KERNEL_OPTION_HEADERS
37196200Sscottl#include "opt_device_polling.h"
38196200Sscottl#include "opt_inet.h"
39196200Sscottl#include "opt_altq.h"
40196200Sscottl#endif
41196200Sscottl
42196200Sscottl#include <sys/param.h>
43196200Sscottl#include <sys/systm.h>
44196200Sscottl#if __FreeBSD_version >= 800000
45196200Sscottl#include <sys/buf_ring.h>
46196200Sscottl#endif
47196200Sscottl#include <sys/bus.h>
48196200Sscottl#include <sys/endian.h>
49196200Sscottl#include <sys/kernel.h>
50196200Sscottl#include <sys/kthread.h>
51196200Sscottl#include <sys/malloc.h>
52196200Sscottl#include <sys/mbuf.h>
53196200Sscottl#include <sys/module.h>
54196200Sscottl#include <sys/rman.h>
55196200Sscottl#include <sys/socket.h>
56196200Sscottl#include <sys/sockio.h>
57214396Sjhb#include <sys/sysctl.h>
58196200Sscottl#include <sys/taskqueue.h>
59196200Sscottl#include <sys/eventhandler.h>
60196200Sscottl#include <sys/pcpu.h>
61196200Sscottl#include <sys/smp.h>
62196200Sscottl#include <machine/smp.h>
63196200Sscottl#include <machine/bus.h>
64196200Sscottl#include <machine/resource.h>
65196200Sscottl
66214396Sjhb#include <net/bpf.h>
67196200Sscottl#include <net/ethernet.h>
68214396Sjhb#include <net/if.h>
69196200Sscottl#include <net/if_arp.h>
70196200Sscottl#include <net/if_dl.h>
71196200Sscottl#include <net/if_media.h>
72214396Sjhb
73196200Sscottl#include <net/if_types.h>
74222899Sbz#include <net/if_vlan_var.h>
75214396Sjhb
76196200Sscottl#include <netinet/in_systm.h>
77196200Sscottl#include <netinet/in.h>
78196200Sscottl#include <netinet/if_ether.h>
79196200Sscottl#include <netinet/ip.h>
80196200Sscottl#include <netinet/ip6.h>
81196200Sscottl#include <netinet/tcp.h>
82196200Sscottl#include <netinet/tcp_lro.h>
83196200Sscottl#include <netinet/udp.h>
84196200Sscottl
85196200Sscottl#include <machine/in_cksum.h>
86196200Sscottl#include <dev/led/led.h>
87196200Sscottl#include <dev/pci/pcivar.h>
88196200Sscottl#include <dev/pci/pcireg.h>
89196200Sscottl
90196200Sscottl#include "e1000_api.h"
91196200Sscottl#include "e1000_82575.h"
92196200Sscottl#include "if_igb.h"
93196200Sscottl
94196200Sscottl/*********************************************************************
95196200Sscottl *  Set this to one to display debug statistics
96196200Sscottl *********************************************************************/
97196200Sscottlint	igb_display_debug_stats = 0;
98196200Sscottl
99196200Sscottl/*********************************************************************
100196200Sscottl *  Driver version:
101196200Sscottl *********************************************************************/
102196200Sscottlchar igb_driver_version[] = "version - 1.9.5";
103196200Sscottl
104196200Sscottl
105196200Sscottl/*********************************************************************
106196200Sscottl *  PCI Device ID Table
107196200Sscottl *
108196200Sscottl *  Used by probe to select devices to load on
109196200Sscottl *  Last field stores an index into e1000_strings
110196200Sscottl *  Last entry must be all 0s
111196200Sscottl *
112196200Sscottl *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
113196200Sscottl *********************************************************************/
114196200Sscottl
115196200Sscottlstatic igb_vendor_info_t igb_vendor_info_array[] =
116196200Sscottl{
117196200Sscottl	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
118196200Sscottl	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
119196200Sscottl						PCI_ANY_ID, PCI_ANY_ID, 0},
120196200Sscottl	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
121196200Sscottl						PCI_ANY_ID, PCI_ANY_ID, 0},
122196200Sscottl	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
123196200Sscottl	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
124196200Sscottl	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
125196200Sscottl	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
126196200Sscottl	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
127196200Sscottl	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
128196200Sscottl						PCI_ANY_ID, PCI_ANY_ID, 0},
129196200Sscottl	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
130196200Sscottl						PCI_ANY_ID, PCI_ANY_ID, 0},
131196200Sscottl	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132196200Sscottl	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133196200Sscottl	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
134196200Sscottl	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
135196200Sscottl	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
136196200Sscottl						PCI_ANY_ID, PCI_ANY_ID, 0},
137196200Sscottl	/* required last entry */
138196200Sscottl	{ 0, 0, 0, 0, 0}
139196200Sscottl};
140196200Sscottl
141196200Sscottl/*********************************************************************
142219717Sjhb *  Table of branding strings for all supported NICs.
143196200Sscottl *********************************************************************/
144219717Sjhb
145196200Sscottlstatic char *igb_strings[] = {
146196200Sscottl	"Intel(R) PRO/1000 Network Connection"
147196200Sscottl};
148196200Sscottl
149196200Sscottl/*********************************************************************
150196200Sscottl *  Function prototypes
151196200Sscottl *********************************************************************/
152196200Sscottlstatic int	igb_probe(device_t);
153214396Sjhbstatic int	igb_attach(device_t);
154196200Sscottlstatic int	igb_detach(device_t);
155214396Sjhbstatic int	igb_shutdown(device_t);
156196200Sscottlstatic int	igb_suspend(device_t);
157196200Sscottlstatic int	igb_resume(device_t);
158196200Sscottlstatic void	igb_start(struct ifnet *);
159196200Sscottlstatic void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
160196200Sscottl#if __FreeBSD_version >= 800000
161196200Sscottlstatic int	igb_mq_start(struct ifnet *, struct mbuf *);
162222899Sbzstatic int	igb_mq_start_locked(struct ifnet *,
163196200Sscottl		    struct tx_ring *, struct mbuf *);
164196200Sscottlstatic void	igb_qflush(struct ifnet *);
165214396Sjhb#endif
166196200Sscottlstatic int	igb_ioctl(struct ifnet *, u_long, caddr_t);
167222899Sbzstatic void	igb_init(void *);
168214396Sjhbstatic void	igb_init_locked(struct adapter *);
169196200Sscottlstatic void	igb_stop(void *);
170196200Sscottlstatic void	igb_media_status(struct ifnet *, struct ifmediareq *);
171196200Sscottlstatic int	igb_media_change(struct ifnet *);
172196200Sscottlstatic void	igb_identify_hardware(struct adapter *);
173214396Sjhbstatic int	igb_allocate_pci_resources(struct adapter *);
174196200Sscottlstatic int	igb_allocate_msix(struct adapter *);
175222899Sbzstatic int	igb_allocate_legacy(struct adapter *);
176214396Sjhbstatic int	igb_setup_msix(struct adapter *);
177196200Sscottlstatic void	igb_free_pci_resources(struct adapter *);
178196200Sscottlstatic void	igb_local_timer(void *);
179219717Sjhbstatic void	igb_reset(struct adapter *);
180219717Sjhbstatic void	igb_setup_interface(device_t, struct adapter *);
181219722Sjhbstatic int	igb_allocate_queues(struct adapter *);
182219717Sjhbstatic void	igb_configure_queues(struct adapter *);
183222899Sbz
184219722Sjhbstatic int	igb_allocate_transmit_buffers(struct tx_ring *);
185219717Sjhbstatic void	igb_setup_transmit_structures(struct adapter *);
186219717Sjhbstatic void	igb_setup_transmit_ring(struct tx_ring *);
187196200Sscottlstatic void	igb_initialize_transmit_units(struct adapter *);
188219717Sjhbstatic void	igb_free_transmit_structures(struct adapter *);
189196200Sscottlstatic void	igb_free_transmit_buffers(struct tx_ring *);
190219717Sjhb
191219717Sjhbstatic int	igb_allocate_receive_buffers(struct rx_ring *);
192219717Sjhbstatic int	igb_setup_receive_structures(struct adapter *);
193219717Sjhbstatic int	igb_setup_receive_ring(struct rx_ring *);
194219717Sjhbstatic void	igb_initialize_receive_units(struct adapter *);
195219717Sjhbstatic void	igb_free_receive_structures(struct adapter *);
196219717Sjhbstatic void	igb_free_receive_buffers(struct rx_ring *);
197219717Sjhbstatic void	igb_free_receive_ring(struct rx_ring *);
198219717Sjhb
199219717Sjhbstatic void	igb_enable_intr(struct adapter *);
200219717Sjhbstatic void	igb_disable_intr(struct adapter *);
201219717Sjhbstatic void	igb_update_stats_counters(struct adapter *);
202219717Sjhbstatic bool	igb_txeof(struct tx_ring *);
203219717Sjhb
204219717Sjhbstatic __inline	void igb_rx_discard(struct rx_ring *, int);
205219717Sjhbstatic __inline void igb_rx_input(struct rx_ring *,
206219717Sjhb		    struct ifnet *, struct mbuf *, u32);
207219717Sjhb
208219717Sjhbstatic bool	igb_rxeof(struct igb_queue *, int);
209219717Sjhbstatic void	igb_rx_checksum(u32, struct mbuf *, u32);
210219717Sjhbstatic int	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
211219717Sjhbstatic bool	igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
212219717Sjhbstatic void	igb_set_promisc(struct adapter *);
213219717Sjhbstatic void	igb_disable_promisc(struct adapter *);
214219717Sjhbstatic void	igb_set_multi(struct adapter *);
215219717Sjhbstatic void	igb_print_hw_stats(struct adapter *);
216219717Sjhbstatic void	igb_update_link_status(struct adapter *);
217219717Sjhbstatic void	igb_refresh_mbufs(struct rx_ring *, int);
218219717Sjhb
219219717Sjhbstatic void	igb_register_vlan(void *, struct ifnet *, u16);
220219717Sjhbstatic void	igb_unregister_vlan(void *, struct ifnet *, u16);
221219717Sjhbstatic void	igb_setup_vlan_hw_support(struct adapter *);
222219717Sjhb
223219717Sjhbstatic int	igb_xmit(struct tx_ring *, struct mbuf **);
224219717Sjhbstatic int	igb_dma_malloc(struct adapter *, bus_size_t,
225219717Sjhb		    struct igb_dma_alloc *, int);
226219717Sjhbstatic void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
227219717Sjhbstatic void	igb_print_debug_info(struct adapter *);
228219717Sjhbstatic void	igb_print_nvm_info(struct adapter *);
229219717Sjhbstatic int 	igb_is_valid_ether_addr(u8 *);
230219717Sjhbstatic int	igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
231219717Sjhbstatic int	igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
232196200Sscottl/* Management and WOL Support */
233196200Sscottlstatic void	igb_init_manageability(struct adapter *);
234196200Sscottlstatic void	igb_release_manageability(struct adapter *);
235196200Sscottlstatic void     igb_get_hw_control(struct adapter *);
236196200Sscottlstatic void     igb_release_hw_control(struct adapter *);
237196200Sscottlstatic void     igb_enable_wakeup(device_t);
238196200Sscottlstatic void     igb_led_func(void *, int);
239196200Sscottl
240196200Sscottlstatic int	igb_irq_fast(void *);
241196200Sscottlstatic void	igb_add_rx_process_limit(struct adapter *, const char *,
242196200Sscottl		    const char *, int *, int);
243196200Sscottlstatic void	igb_handle_rxtx(void *context, int pending);
244196200Sscottlstatic void	igb_handle_que(void *context, int pending);
245196200Sscottlstatic void	igb_handle_link(void *context, int pending);
246196200Sscottl
247196200Sscottl/* These are MSIX only irq handlers */
248196200Sscottlstatic void	igb_msix_que(void *);
249196200Sscottlstatic void	igb_msix_link(void *);
250196200Sscottl
251196200Sscottl#ifdef DEVICE_POLLING
252196200Sscottlstatic poll_handler_t igb_poll;
253196200Sscottl#endif /* POLLING */
254196200Sscottl
255196200Sscottl/*********************************************************************
256196200Sscottl *  FreeBSD Device Interface Entry Points
257196200Sscottl *********************************************************************/
258196200Sscottl
259196200Sscottlstatic device_method_t igb_methods[] = {
260196200Sscottl	/* Device interface */
261223345Sbz	DEVMETHOD(device_probe, igb_probe),
262196200Sscottl	DEVMETHOD(device_attach, igb_attach),
263196200Sscottl	DEVMETHOD(device_detach, igb_detach),
264196200Sscottl	DEVMETHOD(device_shutdown, igb_shutdown),
265196200Sscottl	DEVMETHOD(device_suspend, igb_suspend),
266196200Sscottl	DEVMETHOD(device_resume, igb_resume),
267196200Sscottl	{0, 0}
268196200Sscottl};
269196200Sscottl
270196200Sscottlstatic driver_t igb_driver = {
271196200Sscottl	"igb", igb_methods, sizeof(struct adapter),
272196200Sscottl};
273196200Sscottl
274196200Sscottlstatic devclass_t igb_devclass;
275196200SscottlDRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
276196200SscottlMODULE_DEPEND(igb, pci, 1, 1, 1);
277196200SscottlMODULE_DEPEND(igb, ether, 1, 1, 1);
278196200Sscottl
279196200Sscottl/*********************************************************************
280196200Sscottl *  Tunable default values.
281196200Sscottl *********************************************************************/
282196200Sscottl
283196200Sscottl/* Descriptor defaults */
284196200Sscottlstatic int igb_rxd = IGB_DEFAULT_RXD;
285196200Sscottlstatic int igb_txd = IGB_DEFAULT_TXD;
286196200SscottlTUNABLE_INT("hw.igb.rxd", &igb_rxd);
287196200SscottlTUNABLE_INT("hw.igb.txd", &igb_txd);
288196200Sscottl
289214396Sjhb/*
290196200Sscottl** AIM: Adaptive Interrupt Moderation
291196200Sscottl** which means that the interrupt rate
292196200Sscottl** is varied over time based on the
293196200Sscottl** traffic for that interrupt vector
294196200Sscottl*/
295196200Sscottlstatic int igb_enable_aim = TRUE;
296196200SscottlTUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
297196200Sscottl
298214396Sjhb/*
299196200Sscottl * MSIX should be the default for best performance,
300214396Sjhb * but this allows it to be forced off for testing.
301196200Sscottl */
302196200Sscottlstatic int igb_enable_msix = 1;
303196200SscottlTUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
304196200Sscottl
305214396Sjhb/*
306196200Sscottl * Header split has seemed to be beneficial in
307222899Sbz * many circumstances tested, however there have
308214396Sjhb * been some stability issues, so the default is
309196200Sscottl * off.
310196200Sscottl */
311196200Sscottlstatic bool igb_header_split = FALSE;
312196200SscottlTUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
313196200Sscottl
314196200Sscottl/*
315196200Sscottl** This will autoconfigure based on
316196200Sscottl** the number of CPUs if left at 0.
317196200Sscottl*/
318196200Sscottlstatic int igb_num_queues = 0;
319196200SscottlTUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
320196200Sscottl
321196200Sscottl/* How many packets rxeof tries to clean at a time */
322196200Sscottlstatic int igb_rx_process_limit = 100;
323223345SbzTUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
324223345Sbz
325223345Sbz/* Flow control setting - default to FULL */
326223345Sbzstatic int igb_fc_setting = e1000_fc_full;
327196200SscottlTUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
328196200Sscottl
329196200Sscottl/*
330196200Sscottl** Shadow VFTA table, this is needed because
331196200Sscottl** the real filter table gets cleared during
332223345Sbz** a soft reset and the driver needs to be able
333196200Sscottl** to repopulate it.
334225168Sbz*/
335196200Sscottlstatic u32 igb_shadow_vfta[IGB_VFTA_SIZE];
336196200Sscottl
337196200Sscottl
338196200Sscottl/*********************************************************************
339196200Sscottl *  Device identification routine
340196200Sscottl *
341196200Sscottl *  igb_probe determines if the driver should be loaded on
342196200Sscottl *  adapter based on PCI vendor/device id of the adapter.
343196200Sscottl *
344196200Sscottl *  return BUS_PROBE_DEFAULT on success, positive on failure
345196200Sscottl *********************************************************************/
346196200Sscottl
347196200Sscottlstatic int
348196200Sscottligb_probe(device_t dev)
349196200Sscottl{
350196200Sscottl	char		adapter_name[60];
351196200Sscottl	uint16_t	pci_vendor_id = 0;
352196200Sscottl	uint16_t	pci_device_id = 0;
353196200Sscottl	uint16_t	pci_subvendor_id = 0;
354196200Sscottl	uint16_t	pci_subdevice_id = 0;
355196200Sscottl	igb_vendor_info_t *ent;
356196200Sscottl
357196200Sscottl	INIT_DEBUGOUT("igb_probe: begin");
358196200Sscottl
359196200Sscottl	pci_vendor_id = pci_get_vendor(dev);
360196200Sscottl	if (pci_vendor_id != IGB_VENDOR_ID)
361223345Sbz		return (ENXIO);
362196200Sscottl
363223345Sbz	pci_device_id = pci_get_device(dev);
364223345Sbz	pci_subvendor_id = pci_get_subvendor(dev);
365196200Sscottl	pci_subdevice_id = pci_get_subdevice(dev);
366196200Sscottl
367196200Sscottl	ent = igb_vendor_info_array;
368223345Sbz	while (ent->vendor_id != 0) {
369196200Sscottl		if ((pci_vendor_id == ent->vendor_id) &&
370196200Sscottl		    (pci_device_id == ent->device_id) &&
371196200Sscottl
372196200Sscottl		    ((pci_subvendor_id == ent->subvendor_id) ||
373196200Sscottl		    (ent->subvendor_id == PCI_ANY_ID)) &&
374196200Sscottl
375196200Sscottl		    ((pci_subdevice_id == ent->subdevice_id) ||
376196200Sscottl		    (ent->subdevice_id == PCI_ANY_ID))) {
377222899Sbz			sprintf(adapter_name, "%s %s",
378196200Sscottl				igb_strings[ent->index],
379196200Sscottl				igb_driver_version);
380196200Sscottl			device_set_desc_copy(dev, adapter_name);
381196200Sscottl			return (BUS_PROBE_DEFAULT);
382196200Sscottl		}
383196200Sscottl		ent++;
384196200Sscottl	}
385196200Sscottl
386196200Sscottl	return (ENXIO);
387196200Sscottl}
388196200Sscottl
389214396Sjhb/*********************************************************************
390196200Sscottl *  Device initialization routine
391196200Sscottl *
392196200Sscottl *  The attach entry point is called when the driver is being loaded.
393196200Sscottl *  This routine identifies the type of hardware, allocates all resources
394196200Sscottl *  and initializes the hardware.
395196200Sscottl *
396196200Sscottl *  return 0 on success, positive on failure
397196200Sscottl *********************************************************************/
398196200Sscottl
399214396Sjhbstatic int
400196200Sscottligb_attach(device_t dev)
401214396Sjhb{
402196200Sscottl	struct adapter	*adapter;
403196200Sscottl	int		error = 0;
404196200Sscottl	u16		eeprom_data;
405196200Sscottl
406214396Sjhb	INIT_DEBUGOUT("igb_attach: begin");
407196200Sscottl
408222899Sbz	adapter = device_get_softc(dev);
409214396Sjhb	adapter->dev = adapter->osdep.dev = dev;
410196200Sscottl	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
411196200Sscottl
412196200Sscottl	/* SYSCTL stuff */
413196200Sscottl	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
414196200Sscottl	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
415196200Sscottl	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
416196200Sscottl	    igb_sysctl_debug_info, "I", "Debug Information");
417196200Sscottl
418196200Sscottl	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
419196200Sscottl	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
420196200Sscottl	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
421196200Sscottl	    igb_sysctl_stats, "I", "Statistics");
422196200Sscottl
423196200Sscottl	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
424196200Sscottl	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
425196200Sscottl	    OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
426196200Sscottl	    &igb_fc_setting, 0, "Flow Control");
427196200Sscottl
428196200Sscottl	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
429196200Sscottl	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
430196200Sscottl	    OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
431214396Sjhb	    &igb_enable_aim, 1, "Interrupt Moderation");
432196200Sscottl
433196200Sscottl	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
434222899Sbz
435214396Sjhb	/* Determine hardware and mac info */
436196200Sscottl	igb_identify_hardware(adapter);
437196200Sscottl
438196200Sscottl	/* Setup PCI resources */
439196200Sscottl	if (igb_allocate_pci_resources(adapter)) {
440196200Sscottl		device_printf(dev, "Allocation of PCI resources failed\n");
441196200Sscottl		error = ENXIO;
442196200Sscottl		goto err_pci;
443196200Sscottl	}
444196200Sscottl
445196200Sscottl	/* Do Shared Code initialization */
446196200Sscottl	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
447196200Sscottl		device_printf(dev, "Setup of Shared code failed\n");
448196200Sscottl		error = ENXIO;
449196200Sscottl		goto err_pci;
450196200Sscottl	}
451196200Sscottl
452196200Sscottl	e1000_get_bus_info(&adapter->hw);
453196200Sscottl
454196200Sscottl	/* Sysctls for limiting the amount of work done in the taskqueue */
455196200Sscottl	igb_add_rx_process_limit(adapter, "rx_processing_limit",
456196200Sscottl	    "max number of rx packets to process", &adapter->rx_process_limit,
457196200Sscottl	    igb_rx_process_limit);
458196200Sscottl
459196200Sscottl	/*
460196200Sscottl	 * Validate number of transmit and receive descriptors. It
461196200Sscottl	 * must not exceed hardware maximum, and must be multiple
462196200Sscottl	 * of E1000_DBA_ALIGN.
463196200Sscottl	 */
464196200Sscottl	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
465196200Sscottl	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
466196200Sscottl		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
467196200Sscottl		    IGB_DEFAULT_TXD, igb_txd);
468196200Sscottl		adapter->num_tx_desc = IGB_DEFAULT_TXD;
469196200Sscottl	} else
470196200Sscottl		adapter->num_tx_desc = igb_txd;
471196200Sscottl	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
472196200Sscottl	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
473214396Sjhb		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
474196200Sscottl		    IGB_DEFAULT_RXD, igb_rxd);
475196200Sscottl		adapter->num_rx_desc = IGB_DEFAULT_RXD;
476196200Sscottl	} else
477196200Sscottl		adapter->num_rx_desc = igb_rxd;
478196200Sscottl
479196200Sscottl	adapter->hw.mac.autoneg = DO_AUTO_NEG;
480196200Sscottl	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
481196200Sscottl	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
482214396Sjhb
483196200Sscottl	/* Copper options */
484214396Sjhb	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
485196200Sscottl		adapter->hw.phy.mdix = AUTO_ALL_MODES;
486196200Sscottl		adapter->hw.phy.disable_polarity_correction = FALSE;
487222899Sbz		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
488196200Sscottl	}
489214396Sjhb
490196200Sscottl	/*
491222899Sbz	 * Set the frame limits assuming
492196200Sscottl	 * standard ethernet sized frames.
493196200Sscottl	 */
494196200Sscottl	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
495196200Sscottl	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
496196200Sscottl
497196200Sscottl	/*
498196200Sscottl	** Allocate and Setup Queues
499196200Sscottl	*/
500196200Sscottl	if (igb_allocate_queues(adapter)) {
501196200Sscottl		error = ENOMEM;
502214396Sjhb		goto err_pci;
503196200Sscottl	}
504196200Sscottl
505222899Sbz	/*
506196200Sscottl	** Start from a known state, this is
507196200Sscottl	** important in reading the nvm and
508196200Sscottl	** mac from that.
509196200Sscottl	*/
510196200Sscottl	e1000_reset_hw(&adapter->hw);
511196200Sscottl
512196200Sscottl	/* Make sure we have a good EEPROM before we read from it */
513196200Sscottl	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
514196200Sscottl		/*
515196200Sscottl		** Some PCI-E parts fail the first check due to
516196200Sscottl		** the link being in sleep state, call it again,
517196200Sscottl		** if it fails a second time its a real issue.
518196200Sscottl		*/
519196200Sscottl		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
520196200Sscottl			device_printf(dev,
521196200Sscottl			    "The EEPROM Checksum Is Not Valid\n");
522196200Sscottl			error = EIO;
523214396Sjhb			goto err_late;
524196200Sscottl		}
525196200Sscottl	}
526222899Sbz
527196200Sscottl	/*
528196200Sscottl	** Copy the permanent MAC address out of the EEPROM
529223345Sbz	*/
530223345Sbz	if (e1000_read_mac_addr(&adapter->hw) < 0) {
531223345Sbz		device_printf(dev, "EEPROM read error while reading MAC"
532223345Sbz		    " address\n");
533223345Sbz		error = EIO;
534196200Sscottl		goto err_late;
535196200Sscottl	}
536224495Sjhb	/* Check its sanity */
537222899Sbz	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
538222899Sbz		device_printf(dev, "Invalid MAC address\n");
539196200Sscottl		error = EIO;
540196200Sscottl		goto err_late;
541222899Sbz	}
542196200Sscottl
543196200Sscottl	/*
544196200Sscottl	** Configure Interrupts
545196200Sscottl	*/
546196200Sscottl	if ((adapter->msix > 1) && (igb_enable_msix))
547196200Sscottl		error = igb_allocate_msix(adapter);
548196200Sscottl	else /* MSI or Legacy */
549196200Sscottl		error = igb_allocate_legacy(adapter);
550196200Sscottl	if (error)
551196200Sscottl		goto err_late;
552196200Sscottl
553196200Sscottl	/* Setup OS specific network interface */
554196200Sscottl	igb_setup_interface(dev, adapter);
555196200Sscottl
556196200Sscottl	/* Now get a good starting state */
557196200Sscottl	igb_reset(adapter);
558196200Sscottl
559196200Sscottl	/* Initialize statistics */
560196200Sscottl	igb_update_stats_counters(adapter);
561196200Sscottl
562196200Sscottl	adapter->hw.mac.get_link_status = 1;
563196200Sscottl	igb_update_link_status(adapter);
564196200Sscottl
565196200Sscottl	/* Indicate SOL/IDER usage */
566196200Sscottl	if (e1000_check_reset_block(&adapter->hw))
567196200Sscottl		device_printf(dev,
568196200Sscottl		    "PHY reset is blocked due to SOL/IDER session.\n");
569196200Sscottl
570196200Sscottl	/* Determine if we have to control management hardware */
571196200Sscottl	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
572196200Sscottl
573196200Sscottl	/*
574196200Sscottl	 * Setup Wake-on-Lan
575196200Sscottl	 */
576196200Sscottl	/* APME bit in EEPROM is mapped to WUC.APME */
577196200Sscottl	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
578196200Sscottl	if (eeprom_data)
579196200Sscottl		adapter->wol = E1000_WUFC_MAG;
580214396Sjhb
581196200Sscottl	/* Register for VLAN events */
582196200Sscottl	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
583196200Sscottl	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
584221208Sjhb	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
585196200Sscottl	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
586196200Sscottl
587196200Sscottl	/* Tell the stack that the interface is not active */
588196200Sscottl	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
589196200Sscottl
590214396Sjhb	adapter->led_dev = led_create(igb_led_func, adapter,
591196200Sscottl	    device_get_nameunit(dev));
592214396Sjhb
593196200Sscottl	INIT_DEBUGOUT("igb_attach: end");
594196200Sscottl
595196200Sscottl	return (0);
596214396Sjhb
597196200Sscottlerr_late:
598222899Sbz	igb_free_transmit_structures(adapter);
599214396Sjhb	igb_free_receive_structures(adapter);
600196200Sscottl	igb_release_hw_control(adapter);
601196200Sscottlerr_pci:
602196200Sscottl	igb_free_pci_resources(adapter);
603196200Sscottl	IGB_CORE_LOCK_DESTROY(adapter);
604196200Sscottl
605196200Sscottl	return (error);
606196200Sscottl}
607196200Sscottl
608196200Sscottl/*********************************************************************
609196200Sscottl *  Device removal routine
610196200Sscottl *
611196200Sscottl *  The detach entry point is called when the driver is being removed.
612196200Sscottl *  This routine stops the adapter and deallocates all the resources
613196200Sscottl *  that were allocated for driver operation.
614196200Sscottl *
615196200Sscottl *  return 0 on success, positive on failure
616196200Sscottl *********************************************************************/
617196200Sscottl
618196200Sscottlstatic int
619196200Sscottligb_detach(device_t dev)
620196200Sscottl{
621196200Sscottl	struct adapter	*adapter = device_get_softc(dev);
622196200Sscottl	struct ifnet	*ifp = adapter->ifp;
623196200Sscottl
624196200Sscottl	INIT_DEBUGOUT("igb_detach: begin");
625196200Sscottl
626196200Sscottl	/* Make sure VLANS are not using driver */
627196200Sscottl	if (adapter->ifp->if_vlantrunk != NULL) {
628196200Sscottl		device_printf(dev,"Vlan in use, detach first\n");
629196200Sscottl		return (EBUSY);
630221208Sjhb	}
631221208Sjhb
632221208Sjhb	if (adapter->led_dev != NULL)
633221208Sjhb		led_destroy(adapter->led_dev);
634221208Sjhb
635221208Sjhb#ifdef DEVICE_POLLING
636221208Sjhb	if (ifp->if_capenable & IFCAP_POLLING)
637221208Sjhb		ether_poll_deregister(ifp);
638221208Sjhb#endif
639221208Sjhb
640221208Sjhb	IGB_CORE_LOCK(adapter);
641221208Sjhb	adapter->in_detach = 1;
642221208Sjhb	igb_stop(adapter);
643221208Sjhb	IGB_CORE_UNLOCK(adapter);
644221208Sjhb
645221208Sjhb	e1000_phy_hw_reset(&adapter->hw);
646221208Sjhb
647221208Sjhb	/* Give control back to firmware */
648221208Sjhb	igb_release_manageability(adapter);
649221208Sjhb	igb_release_hw_control(adapter);
650221208Sjhb
651221208Sjhb	if (adapter->wol) {
652221208Sjhb		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
653221208Sjhb		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
654221208Sjhb		igb_enable_wakeup(dev);
655221208Sjhb	}
656221208Sjhb
657221208Sjhb	/* Unregister VLAN events */
658222899Sbz	if (adapter->vlan_attach != NULL)
659221208Sjhb		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
660221208Sjhb	if (adapter->vlan_detach != NULL)
661221208Sjhb		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
662221208Sjhb
663221208Sjhb	ether_ifdetach(adapter->ifp);
664222899Sbz
665221208Sjhb	callout_drain(&adapter->timer);
666221208Sjhb
667221208Sjhb	igb_free_pci_resources(adapter);
668222899Sbz	bus_generic_detach(dev);
669221208Sjhb	if_free(ifp);
670221208Sjhb
671221208Sjhb	igb_free_transmit_structures(adapter);
672221208Sjhb	igb_free_receive_structures(adapter);
673221208Sjhb
674221208Sjhb	IGB_CORE_LOCK_DESTROY(adapter);
675222899Sbz
676222899Sbz	return (0);
677221208Sjhb}
678221208Sjhb
679221208Sjhb/*********************************************************************
680221208Sjhb *
681221208Sjhb *  Shutdown entry point
682221208Sjhb *
683221208Sjhb **********************************************************************/
684221208Sjhb
685221208Sjhbstatic int
686221208Sjhbigb_shutdown(device_t dev)
687221208Sjhb{
688221208Sjhb	return igb_suspend(dev);
689221208Sjhb}
690221208Sjhb
691221208Sjhb/*
692221208Sjhb * Suspend/resume device methods.
693221208Sjhb */
694221208Sjhbstatic int
695221208Sjhbigb_suspend(device_t dev)
696221208Sjhb{
697221208Sjhb	struct adapter *adapter = device_get_softc(dev);
698221208Sjhb
699221208Sjhb	IGB_CORE_LOCK(adapter);
700221208Sjhb
701221208Sjhb	igb_stop(adapter);
702221208Sjhb
703221208Sjhb        igb_release_manageability(adapter);
704221208Sjhb	igb_release_hw_control(adapter);
705221208Sjhb
706221208Sjhb        if (adapter->wol) {
707221208Sjhb                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
708221208Sjhb                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
709221208Sjhb                igb_enable_wakeup(dev);
710221208Sjhb        }
711221208Sjhb
712221208Sjhb	IGB_CORE_UNLOCK(adapter);
713222899Sbz
714222899Sbz	return bus_generic_suspend(dev);
715221208Sjhb}
716221208Sjhb
717221208Sjhbstatic int
718221208Sjhbigb_resume(device_t dev)
719223345Sbz{
720223345Sbz	struct adapter *adapter = device_get_softc(dev);
721221208Sjhb	struct ifnet *ifp = adapter->ifp;
722221208Sjhb
723221208Sjhb	IGB_CORE_LOCK(adapter);
724221208Sjhb	igb_init_locked(adapter);
725223345Sbz	igb_init_manageability(adapter);
726223345Sbz
727221208Sjhb	if ((ifp->if_flags & IFF_UP) &&
728221208Sjhb	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
729221208Sjhb		igb_start(ifp);
730221208Sjhb
731221208Sjhb	IGB_CORE_UNLOCK(adapter);
732223345Sbz
733223345Sbz	return bus_generic_resume(dev);
734221208Sjhb}
735221208Sjhb
736221208Sjhb
737221208Sjhb/*********************************************************************
738221208Sjhb *  Transmit entry point
739222899Sbz *
740221208Sjhb *  igb_start is called by the stack to initiate a transmit.
741221208Sjhb *  The driver will remain in this routine as long as there are
742221208Sjhb *  packets to transmit and transmit resources are available.
743221208Sjhb *  In case resources are not available stack is notified and
744221208Sjhb *  the packet is requeued.
745221208Sjhb **********************************************************************/
746221208Sjhb
747221208Sjhbstatic void
748igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
749{
750	struct adapter	*adapter = ifp->if_softc;
751	struct mbuf	*m_head;
752
753	IGB_TX_LOCK_ASSERT(txr);
754
755	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
756	    IFF_DRV_RUNNING)
757		return;
758	if (!adapter->link_active)
759		return;
760
761	/* Call cleanup if number of TX descriptors low */
762	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
763		igb_txeof(txr);
764
765	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
766		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
767			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
768			break;
769		}
770		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
771		if (m_head == NULL)
772			break;
773		/*
774		 *  Encapsulation can modify our pointer, and or make it
775		 *  NULL on failure.  In that event, we can't requeue.
776		 */
777		if (igb_xmit(txr, &m_head)) {
778			if (m_head == NULL)
779				break;
780			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
781			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
782			break;
783		}
784
785		/* Send a copy of the frame to the BPF listener */
786		ETHER_BPF_MTAP(ifp, m_head);
787
788		/* Set watchdog on */
789		txr->watchdog_time = ticks;
790		txr->watchdog_check = TRUE;
791	}
792}
793
794/*
795 * Legacy TX driver routine, called from the
796 * stack, always uses tx[0], and spins for it.
797 * Should not be used with multiqueue tx
798 */
799static void
800igb_start(struct ifnet *ifp)
801{
802	struct adapter	*adapter = ifp->if_softc;
803	struct tx_ring	*txr = adapter->tx_rings;
804
805	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
806		IGB_TX_LOCK(txr);
807		igb_start_locked(txr, ifp);
808		IGB_TX_UNLOCK(txr);
809	}
810	return;
811}
812
813#if __FreeBSD_version >= 800000
814/*
815** Multiqueue Transmit driver
816**
817*/
818static int
819igb_mq_start(struct ifnet *ifp, struct mbuf *m)
820{
821	struct adapter	*adapter = ifp->if_softc;
822	struct tx_ring	*txr;
823	int 		i = 0, err = 0;
824
825	/* Which queue to use */
826	if ((m->m_flags & M_FLOWID) != 0)
827		i = m->m_pkthdr.flowid % adapter->num_queues;
828
829	txr = &adapter->tx_rings[i];
830
831	if (IGB_TX_TRYLOCK(txr)) {
832		err = igb_mq_start_locked(ifp, txr, m);
833		IGB_TX_UNLOCK(txr);
834	} else
835		err = drbr_enqueue(ifp, txr->br, m);
836
837	return (err);
838}
839
840static int
841igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
842{
843	struct adapter  *adapter = txr->adapter;
844        struct mbuf     *next;
845        int             err = 0, enq;
846
847	IGB_TX_LOCK_ASSERT(txr);
848
849	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
850	    IFF_DRV_RUNNING || adapter->link_active == 0) {
851		if (m != NULL)
852			err = drbr_enqueue(ifp, txr->br, m);
853		return (err);
854	}
855
856	/* Call cleanup if number of TX descriptors low */
857	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
858		igb_txeof(txr);
859
860	enq = 0;
861	if (m == NULL) {
862		next = drbr_dequeue(ifp, txr->br);
863	} else if (drbr_needs_enqueue(ifp, txr->br)) {
864		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
865			return (err);
866		next = drbr_dequeue(ifp, txr->br);
867	} else
868		next = m;
869
870	/* Process the queue */
871	while (next != NULL) {
872		if ((err = igb_xmit(txr, &next)) != 0) {
873			if (next != NULL)
874				err = drbr_enqueue(ifp, txr->br, next);
875			break;
876		}
877		enq++;
878		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
879		ETHER_BPF_MTAP(ifp, next);
880		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
881			break;
882		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
883			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
884			break;
885		}
886		next = drbr_dequeue(ifp, txr->br);
887	}
888	if (enq > 0) {
889		/* Set the watchdog */
890		txr->watchdog_check = TRUE;
891		txr->watchdog_time = ticks;
892	}
893	return (err);
894}
895
896/*
897** Flush all ring buffers
898*/
899static void
900igb_qflush(struct ifnet *ifp)
901{
902	struct adapter	*adapter = ifp->if_softc;
903	struct tx_ring	*txr = adapter->tx_rings;
904	struct mbuf	*m;
905
906	for (int i = 0; i < adapter->num_queues; i++, txr++) {
907		IGB_TX_LOCK(txr);
908		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
909			m_freem(m);
910		IGB_TX_UNLOCK(txr);
911	}
912	if_qflush(ifp);
913}
914#endif /* __FreeBSD_version >= 800000 */
915
916/*********************************************************************
917 *  Ioctl entry point
918 *
919 *  igb_ioctl is called when the user wants to configure the
920 *  interface.
921 *
922 *  return 0 on success, positive on failure
923 **********************************************************************/
924
925static int
926igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
927{
928	struct adapter	*adapter = ifp->if_softc;
929	struct ifreq *ifr = (struct ifreq *)data;
930#ifdef INET
931	struct ifaddr *ifa = (struct ifaddr *)data;
932#endif
933	int error = 0;
934
935	if (adapter->in_detach)
936		return (error);
937
938	switch (command) {
939	case SIOCSIFADDR:
940#ifdef INET
941		if (ifa->ifa_addr->sa_family == AF_INET) {
942			/*
943			 * XXX
944			 * Since resetting hardware takes a very long time
945			 * and results in link renegotiation we only
946			 * initialize the hardware only when it is absolutely
947			 * required.
948			 */
949			ifp->if_flags |= IFF_UP;
950			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
951				IGB_CORE_LOCK(adapter);
952				igb_init_locked(adapter);
953				IGB_CORE_UNLOCK(adapter);
954			}
955			if (!(ifp->if_flags & IFF_NOARP))
956				arp_ifinit(ifp, ifa);
957		} else
958#endif
959			error = ether_ioctl(ifp, command, data);
960		break;
961	case SIOCSIFMTU:
962	    {
963		int max_frame_size;
964
965		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
966
967		IGB_CORE_LOCK(adapter);
968		max_frame_size = 9234;
969		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
970		    ETHER_CRC_LEN) {
971			IGB_CORE_UNLOCK(adapter);
972			error = EINVAL;
973			break;
974		}
975
976		ifp->if_mtu = ifr->ifr_mtu;
977		adapter->max_frame_size =
978		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
979		igb_init_locked(adapter);
980		IGB_CORE_UNLOCK(adapter);
981		break;
982	    }
983	case SIOCSIFFLAGS:
984		IOCTL_DEBUGOUT("ioctl rcv'd:\
985		    SIOCSIFFLAGS (Set Interface Flags)");
986		IGB_CORE_LOCK(adapter);
987		if (ifp->if_flags & IFF_UP) {
988			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
989				if ((ifp->if_flags ^ adapter->if_flags) &
990				    (IFF_PROMISC | IFF_ALLMULTI)) {
991					igb_disable_promisc(adapter);
992					igb_set_promisc(adapter);
993				}
994			} else
995				igb_init_locked(adapter);
996		} else
997			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
998				igb_stop(adapter);
999		adapter->if_flags = ifp->if_flags;
1000		IGB_CORE_UNLOCK(adapter);
1001		break;
1002	case SIOCADDMULTI:
1003	case SIOCDELMULTI:
1004		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1005		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1006			IGB_CORE_LOCK(adapter);
1007			igb_disable_intr(adapter);
1008			igb_set_multi(adapter);
1009#ifdef DEVICE_POLLING
1010			if (!(ifp->if_capenable & IFCAP_POLLING))
1011#endif
1012				igb_enable_intr(adapter);
1013			IGB_CORE_UNLOCK(adapter);
1014		}
1015		break;
1016	case SIOCSIFMEDIA:
1017		/* Check SOL/IDER usage */
1018		IGB_CORE_LOCK(adapter);
1019		if (e1000_check_reset_block(&adapter->hw)) {
1020			IGB_CORE_UNLOCK(adapter);
1021			device_printf(adapter->dev, "Media change is"
1022			    " blocked due to SOL/IDER session.\n");
1023			break;
1024		}
1025		IGB_CORE_UNLOCK(adapter);
1026	case SIOCGIFMEDIA:
1027		IOCTL_DEBUGOUT("ioctl rcv'd: \
1028		    SIOCxIFMEDIA (Get/Set Interface Media)");
1029		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1030		break;
1031	case SIOCSIFCAP:
1032	    {
1033		int mask, reinit;
1034
1035		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1036		reinit = 0;
1037		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1038#ifdef DEVICE_POLLING
1039		if (mask & IFCAP_POLLING) {
1040			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1041				error = ether_poll_register(igb_poll, ifp);
1042				if (error)
1043					return (error);
1044				IGB_CORE_LOCK(adapter);
1045				igb_disable_intr(adapter);
1046				ifp->if_capenable |= IFCAP_POLLING;
1047				IGB_CORE_UNLOCK(adapter);
1048			} else {
1049				error = ether_poll_deregister(ifp);
1050				/* Enable interrupt even in error case */
1051				IGB_CORE_LOCK(adapter);
1052				igb_enable_intr(adapter);
1053				ifp->if_capenable &= ~IFCAP_POLLING;
1054				IGB_CORE_UNLOCK(adapter);
1055			}
1056		}
1057#endif
1058		if (mask & IFCAP_HWCSUM) {
1059			ifp->if_capenable ^= IFCAP_HWCSUM;
1060			reinit = 1;
1061		}
1062		if (mask & IFCAP_TSO4) {
1063			ifp->if_capenable ^= IFCAP_TSO4;
1064			reinit = 1;
1065		}
1066		if (mask & IFCAP_VLAN_HWTAGGING) {
1067			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1068			reinit = 1;
1069		}
1070		if (mask & IFCAP_VLAN_HWFILTER) {
1071			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1072			reinit = 1;
1073		}
1074		if (mask & IFCAP_LRO) {
1075			ifp->if_capenable ^= IFCAP_LRO;
1076			reinit = 1;
1077		}
1078		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1079			igb_init(adapter);
1080		VLAN_CAPABILITIES(ifp);
1081		break;
1082	    }
1083
1084	default:
1085		error = ether_ioctl(ifp, command, data);
1086		break;
1087	}
1088
1089	return (error);
1090}
1091
1092
1093/*********************************************************************
1094 *  Init entry point
1095 *
1096 *  This routine is used in two ways. It is used by the stack as
1097 *  init entry point in network interface structure. It is also used
1098 *  by the driver as a hw/sw initialization routine to get to a
1099 *  consistent state.
1100 *
1101 *  return 0 on success, positive on failure
1102 **********************************************************************/
1103
1104static void
1105igb_init_locked(struct adapter *adapter)
1106{
1107	struct ifnet	*ifp = adapter->ifp;
1108	device_t	dev = adapter->dev;
1109
1110	INIT_DEBUGOUT("igb_init: begin");
1111
1112	IGB_CORE_LOCK_ASSERT(adapter);
1113
1114	igb_disable_intr(adapter);
1115	callout_stop(&adapter->timer);
1116
1117	/* Get the latest mac address, User can use a LAA */
1118        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1119              ETHER_ADDR_LEN);
1120
1121	/* Put the address into the Receive Address Array */
1122	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1123
1124	igb_reset(adapter);
1125	igb_update_link_status(adapter);
1126
1127	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1128
1129        /* Use real VLAN Filter support? */
1130	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1131		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1132			/* Use real VLAN Filter support */
1133			igb_setup_vlan_hw_support(adapter);
1134		else {
1135			u32 ctrl;
1136			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1137			ctrl |= E1000_CTRL_VME;
1138			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1139		}
1140	}
1141
1142	/* Set hardware offload abilities */
1143	ifp->if_hwassist = 0;
1144	if (ifp->if_capenable & IFCAP_TXCSUM) {
1145		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1146#if __FreeBSD_version >= 800000
1147		if (adapter->hw.mac.type == e1000_82576)
1148			ifp->if_hwassist |= CSUM_SCTP;
1149#endif
1150	}
1151
1152	if (ifp->if_capenable & IFCAP_TSO4)
1153		ifp->if_hwassist |= CSUM_TSO;
1154
1155	/* Configure for OS presence */
1156	igb_init_manageability(adapter);
1157
1158	/* Prepare transmit descriptors and buffers */
1159	igb_setup_transmit_structures(adapter);
1160	igb_initialize_transmit_units(adapter);
1161
1162	/* Setup Multicast table */
1163	igb_set_multi(adapter);
1164
1165	/*
1166	** Figure out the desired mbuf pool
1167	** for doing jumbo/packetsplit
1168	*/
1169	if (ifp->if_mtu > ETHERMTU)
1170		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1171	else
1172		adapter->rx_mbuf_sz = MCLBYTES;
1173
1174	/* Prepare receive descriptors and buffers */
1175	if (igb_setup_receive_structures(adapter)) {
1176		device_printf(dev, "Could not setup receive structures\n");
1177		return;
1178	}
1179	igb_initialize_receive_units(adapter);
1180
1181	/* Don't lose promiscuous settings */
1182	igb_set_promisc(adapter);
1183
1184	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1185	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1186
1187	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1188	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1189
1190	if (adapter->msix > 1) /* Set up queue routing */
1191		igb_configure_queues(adapter);
1192
1193	/* Set up VLAN tag offload and filter */
1194	igb_setup_vlan_hw_support(adapter);
1195
1196	/* this clears any pending interrupts */
1197	E1000_READ_REG(&adapter->hw, E1000_ICR);
1198#ifdef DEVICE_POLLING
1199	/*
1200	 * Only enable interrupts if we are not polling, make sure
1201	 * they are off otherwise.
1202	 */
1203	if (ifp->if_capenable & IFCAP_POLLING)
1204		igb_disable_intr(adapter);
1205	else
1206#endif /* DEVICE_POLLING */
1207	{
1208	igb_enable_intr(adapter);
1209	E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1210	}
1211
1212	/* Don't reset the phy next time init gets called */
1213	adapter->hw.phy.reset_disable = TRUE;
1214}
1215
1216static void
1217igb_init(void *arg)
1218{
1219	struct adapter *adapter = arg;
1220
1221	IGB_CORE_LOCK(adapter);
1222	igb_init_locked(adapter);
1223	IGB_CORE_UNLOCK(adapter);
1224}
1225
1226
1227static void
1228igb_handle_rxtx(void *context, int pending)
1229{
1230	struct igb_queue	*que = context;
1231	struct adapter		*adapter = que->adapter;
1232	struct tx_ring		*txr = adapter->tx_rings;
1233	struct ifnet		*ifp;
1234
1235	ifp = adapter->ifp;
1236
1237	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1238		if (igb_rxeof(que, adapter->rx_process_limit))
1239			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1240		IGB_TX_LOCK(txr);
1241		igb_txeof(txr);
1242
1243#if __FreeBSD_version >= 800000
1244		if (!drbr_empty(ifp, txr->br))
1245			igb_mq_start_locked(ifp, txr, NULL);
1246#else
1247		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1248			igb_start_locked(txr, ifp);
1249#endif
1250		IGB_TX_UNLOCK(txr);
1251	}
1252
1253	igb_enable_intr(adapter);
1254}
1255
1256static void
1257igb_handle_que(void *context, int pending)
1258{
1259	struct igb_queue *que = context;
1260	struct adapter *adapter = que->adapter;
1261	struct tx_ring *txr = que->txr;
1262	struct ifnet	*ifp = adapter->ifp;
1263	bool		more;
1264
1265	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1266		more = igb_rxeof(que, -1);
1267
1268		IGB_TX_LOCK(txr);
1269		igb_txeof(txr);
1270#if __FreeBSD_version >= 800000
1271		igb_mq_start_locked(ifp, txr, NULL);
1272#else
1273		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1274			igb_start_locked(txr, ifp);
1275#endif
1276		IGB_TX_UNLOCK(txr);
1277		if (more) {
1278			taskqueue_enqueue(que->tq, &que->que_task);
1279			return;
1280		}
1281	}
1282
1283	/* Reenable this interrupt */
1284#ifdef DEVICE_POLLING
1285	if (!(ifp->if_capenable & IFCAP_POLLING))
1286#endif
1287	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1288}
1289
1290/* Deal with link in a sleepable context */
1291static void
1292igb_handle_link(void *context, int pending)
1293{
1294	struct adapter *adapter = context;
1295
1296	adapter->hw.mac.get_link_status = 1;
1297	igb_update_link_status(adapter);
1298}
1299
1300/*********************************************************************
1301 *
1302 *  MSI/Legacy Deferred
1303 *  Interrupt Service routine
1304 *
1305 *********************************************************************/
1306static int
1307igb_irq_fast(void *arg)
1308{
1309	struct adapter	*adapter = arg;
1310	uint32_t	reg_icr;
1311
1312
1313	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1314
1315	/* Hot eject?  */
1316	if (reg_icr == 0xffffffff)
1317		return FILTER_STRAY;
1318
1319	/* Definitely not our interrupt.  */
1320	if (reg_icr == 0x0)
1321		return FILTER_STRAY;
1322
1323	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1324		return FILTER_STRAY;
1325
1326	/*
1327	 * Mask interrupts until the taskqueue is finished running.  This is
1328	 * cheap, just assume that it is needed.  This also works around the
1329	 * MSI message reordering errata on certain systems.
1330	 */
1331	igb_disable_intr(adapter);
1332	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1333
1334	/* Link status change */
1335	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1336		taskqueue_enqueue(adapter->tq, &adapter->link_task);
1337
1338	if (reg_icr & E1000_ICR_RXO)
1339		adapter->rx_overruns++;
1340	return FILTER_HANDLED;
1341}
1342
1343#ifdef DEVICE_POLLING
1344/*********************************************************************
1345 *
1346 *  Legacy polling routine : if using this code you MUST be sure that
1347 *  multiqueue is not defined, ie, set igb_num_queues to 1.
1348 *
1349 *********************************************************************/
1350#if __FreeBSD_version >= 800000
1351#define POLL_RETURN_COUNT(a) (a)
1352static int
1353#else
1354#define POLL_RETURN_COUNT(a)
1355static void
1356#endif
1357igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1358{
1359	struct adapter		*adapter = ifp->if_softc;
1360	struct igb_queue	*que = adapter->queues;
1361	struct tx_ring		*txr = adapter->tx_rings;
1362	u32			reg_icr, rx_done = 0;
1363	u32			loop = IGB_MAX_LOOP;
1364	bool			more;
1365
1366	IGB_CORE_LOCK(adapter);
1367	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1368		IGB_CORE_UNLOCK(adapter);
1369		return POLL_RETURN_COUNT(rx_done);
1370	}
1371
1372	if (cmd == POLL_AND_CHECK_STATUS) {
1373		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1374		/* Link status change */
1375		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1376			taskqueue_enqueue(adapter->tq, &adapter->link_task);
1377
1378		if (reg_icr & E1000_ICR_RXO)
1379			adapter->rx_overruns++;
1380	}
1381	IGB_CORE_UNLOCK(adapter);
1382
1383	/* TODO: rx_count */
1384	rx_done = igb_rxeof(que, count) ? 1 : 0;
1385
1386	IGB_TX_LOCK(txr);
1387	do {
1388		more = igb_txeof(txr);
1389	} while (loop-- && more);
1390#if __FreeBSD_version >= 800000
1391	if (!drbr_empty(ifp, txr->br))
1392		igb_mq_start_locked(ifp, txr, NULL);
1393#else
1394	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1395		igb_start_locked(txr, ifp);
1396#endif
1397	IGB_TX_UNLOCK(txr);
1398	return POLL_RETURN_COUNT(rx_done);
1399}
1400#endif /* DEVICE_POLLING */
1401
1402/*********************************************************************
1403 *
1404 *  MSIX TX Interrupt Service routine
1405 *
1406 **********************************************************************/
1407static void
1408igb_msix_que(void *arg)
1409{
1410	struct igb_queue *que = arg;
1411	struct adapter *adapter = que->adapter;
1412	struct tx_ring *txr = que->txr;
1413	struct rx_ring *rxr = que->rxr;
1414	u32		newitr = 0;
1415	bool		more_tx, more_rx;
1416
1417	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1418	++que->irqs;
1419
1420	IGB_TX_LOCK(txr);
1421	more_tx = igb_txeof(txr);
1422	IGB_TX_UNLOCK(txr);
1423
1424	more_rx = igb_rxeof(que, adapter->rx_process_limit);
1425
1426	if (igb_enable_aim == FALSE)
1427		goto no_calc;
1428	/*
1429	** Do Adaptive Interrupt Moderation:
1430        **  - Write out last calculated setting
1431	**  - Calculate based on average size over
1432	**    the last interval.
1433	*/
1434        if (que->eitr_setting)
1435                E1000_WRITE_REG(&adapter->hw,
1436                    E1000_EITR(que->msix), que->eitr_setting);
1437
1438        que->eitr_setting = 0;
1439
1440        /* Idle, do nothing */
1441        if ((txr->bytes == 0) && (rxr->bytes == 0))
1442                goto no_calc;
1443
1444        /* Used half Default if sub-gig */
1445        if (adapter->link_speed != 1000)
1446                newitr = IGB_DEFAULT_ITR / 2;
1447        else {
1448		if ((txr->bytes) && (txr->packets))
1449                	newitr = txr->bytes/txr->packets;
1450		if ((rxr->bytes) && (rxr->packets))
1451			newitr = max(newitr,
1452			    (rxr->bytes / rxr->packets));
1453                newitr += 24; /* account for hardware frame, crc */
1454		/* set an upper boundary */
1455		newitr = min(newitr, 3000);
1456		/* Be nice to the mid range */
1457                if ((newitr > 300) && (newitr < 1200))
1458                        newitr = (newitr / 3);
1459                else
1460                        newitr = (newitr / 2);
1461        }
1462        newitr &= 0x7FFC;  /* Mask invalid bits */
1463        if (adapter->hw.mac.type == e1000_82575)
1464                newitr |= newitr << 16;
1465        else
1466                newitr |= E1000_EITR_CNT_IGNR;
1467
1468        /* save for next interrupt */
1469        que->eitr_setting = newitr;
1470
1471        /* Reset state */
1472        txr->bytes = 0;
1473        txr->packets = 0;
1474        rxr->bytes = 0;
1475        rxr->packets = 0;
1476
1477no_calc:
1478	/* Schedule a clean task if needed*/
1479	if (more_tx || more_rx)
1480		taskqueue_enqueue(que->tq, &que->que_task);
1481	else
1482		/* Reenable this interrupt */
1483		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1484	return;
1485}
1486
1487
1488/*********************************************************************
1489 *
1490 *  MSIX Link Interrupt Service routine
1491 *
1492 **********************************************************************/
1493
1494static void
1495igb_msix_link(void *arg)
1496{
1497	struct adapter	*adapter = arg;
1498	u32       	icr;
1499
1500	++adapter->link_irq;
1501	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1502	if (!(icr & E1000_ICR_LSC))
1503		goto spurious;
1504	taskqueue_enqueue(adapter->tq, &adapter->link_task);
1505
1506spurious:
1507	/* Rearm */
1508	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1509	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1510	return;
1511}
1512
1513
1514/*********************************************************************
1515 *
1516 *  Media Ioctl callback
1517 *
1518 *  This routine is called whenever the user queries the status of
1519 *  the interface using ifconfig.
1520 *
1521 **********************************************************************/
1522static void
1523igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1524{
1525	struct adapter *adapter = ifp->if_softc;
1526	u_char fiber_type = IFM_1000_SX;
1527
1528	INIT_DEBUGOUT("igb_media_status: begin");
1529
1530	IGB_CORE_LOCK(adapter);
1531	igb_update_link_status(adapter);
1532
1533	ifmr->ifm_status = IFM_AVALID;
1534	ifmr->ifm_active = IFM_ETHER;
1535
1536	if (!adapter->link_active) {
1537		IGB_CORE_UNLOCK(adapter);
1538		return;
1539	}
1540
1541	ifmr->ifm_status |= IFM_ACTIVE;
1542
1543	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1544	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1545		ifmr->ifm_active |= fiber_type | IFM_FDX;
1546	else {
1547		switch (adapter->link_speed) {
1548		case 10:
1549			ifmr->ifm_active |= IFM_10_T;
1550			break;
1551		case 100:
1552			ifmr->ifm_active |= IFM_100_TX;
1553			break;
1554		case 1000:
1555			ifmr->ifm_active |= IFM_1000_T;
1556			break;
1557		}
1558		if (adapter->link_duplex == FULL_DUPLEX)
1559			ifmr->ifm_active |= IFM_FDX;
1560		else
1561			ifmr->ifm_active |= IFM_HDX;
1562	}
1563	IGB_CORE_UNLOCK(adapter);
1564}
1565
1566/*********************************************************************
1567 *
1568 *  Media Ioctl callback
1569 *
1570 *  This routine is called when the user changes speed/duplex using
1571 *  media/mediopt option with ifconfig.
1572 *
1573 **********************************************************************/
1574static int
1575igb_media_change(struct ifnet *ifp)
1576{
1577	struct adapter *adapter = ifp->if_softc;
1578	struct ifmedia  *ifm = &adapter->media;
1579
1580	INIT_DEBUGOUT("igb_media_change: begin");
1581
1582	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1583		return (EINVAL);
1584
1585	IGB_CORE_LOCK(adapter);
1586	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1587	case IFM_AUTO:
1588		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1589		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1590		break;
1591	case IFM_1000_LX:
1592	case IFM_1000_SX:
1593	case IFM_1000_T:
1594		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1595		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1596		break;
1597	case IFM_100_TX:
1598		adapter->hw.mac.autoneg = FALSE;
1599		adapter->hw.phy.autoneg_advertised = 0;
1600		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1601			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1602		else
1603			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1604		break;
1605	case IFM_10_T:
1606		adapter->hw.mac.autoneg = FALSE;
1607		adapter->hw.phy.autoneg_advertised = 0;
1608		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1609			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1610		else
1611			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1612		break;
1613	default:
1614		device_printf(adapter->dev, "Unsupported media type\n");
1615	}
1616
1617	/* As the speed/duplex settings my have changed we need to
1618	 * reset the PHY.
1619	 */
1620	adapter->hw.phy.reset_disable = FALSE;
1621
1622	igb_init_locked(adapter);
1623	IGB_CORE_UNLOCK(adapter);
1624
1625	return (0);
1626}
1627
1628
1629/*********************************************************************
1630 *
1631 *  This routine maps the mbufs to Advanced TX descriptors.
1632 *  used by the 82575 adapter.
1633 *
1634 **********************************************************************/
1635
1636static int
1637igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1638{
1639	struct adapter		*adapter = txr->adapter;
1640	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1641	bus_dmamap_t		map;
1642	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1643	union e1000_adv_tx_desc	*txd = NULL;
1644	struct mbuf		*m_head;
1645	u32			olinfo_status = 0, cmd_type_len = 0;
1646	int			nsegs, i, j, error, first, last = 0;
1647	u32			hdrlen = 0;
1648
1649	m_head = *m_headp;
1650
1651
1652	/* Set basic descriptor constants */
1653	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1654	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1655	if (m_head->m_flags & M_VLANTAG)
1656		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1657
1658        /*
1659         * Force a cleanup if number of TX descriptors
1660         * available hits the threshold
1661         */
1662	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1663		igb_txeof(txr);
1664		/* Now do we at least have a minimal? */
1665		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1666			txr->no_desc_avail++;
1667			return (ENOBUFS);
1668		}
1669	}
1670
1671	/*
1672         * Map the packet for DMA.
1673	 *
1674	 * Capture the first descriptor index,
1675	 * this descriptor will have the index
1676	 * of the EOP which is the only one that
1677	 * now gets a DONE bit writeback.
1678	 */
1679	first = txr->next_avail_desc;
1680	tx_buffer = &txr->tx_buffers[first];
1681	tx_buffer_mapped = tx_buffer;
1682	map = tx_buffer->map;
1683
1684	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1685	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1686
1687	if (error == EFBIG) {
1688		struct mbuf *m;
1689
1690		m = m_defrag(*m_headp, M_DONTWAIT);
1691		if (m == NULL) {
1692			adapter->mbuf_defrag_failed++;
1693			m_freem(*m_headp);
1694			*m_headp = NULL;
1695			return (ENOBUFS);
1696		}
1697		*m_headp = m;
1698
1699		/* Try it again */
1700		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1701		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1702
1703		if (error == ENOMEM) {
1704			adapter->no_tx_dma_setup++;
1705			return (error);
1706		} else if (error != 0) {
1707			adapter->no_tx_dma_setup++;
1708			m_freem(*m_headp);
1709			*m_headp = NULL;
1710			return (error);
1711		}
1712	} else if (error == ENOMEM) {
1713		adapter->no_tx_dma_setup++;
1714		return (error);
1715	} else if (error != 0) {
1716		adapter->no_tx_dma_setup++;
1717		m_freem(*m_headp);
1718		*m_headp = NULL;
1719		return (error);
1720	}
1721
1722	/* Check again to be sure we have enough descriptors */
1723        if (nsegs > (txr->tx_avail - 2)) {
1724                txr->no_desc_avail++;
1725		bus_dmamap_unload(txr->txtag, map);
1726		return (ENOBUFS);
1727        }
1728	m_head = *m_headp;
1729
1730        /*
1731         * Set up the context descriptor:
1732         * used when any hardware offload is done.
1733	 * This includes CSUM, VLAN, and TSO. It
1734	 * will use the first descriptor.
1735         */
1736        if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1737		if (igb_tso_setup(txr, m_head, &hdrlen)) {
1738			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1739			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1740			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1741		} else
1742			return (ENXIO);
1743	} else if (igb_tx_ctx_setup(txr, m_head))
1744		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1745
1746	/* Calculate payload length */
1747	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1748	    << E1000_ADVTXD_PAYLEN_SHIFT);
1749
1750	/* 82575 needs the queue index added */
1751	if (adapter->hw.mac.type == e1000_82575)
1752		olinfo_status |= txr->me << 4;
1753
1754	/* Set up our transmit descriptors */
1755	i = txr->next_avail_desc;
1756	for (j = 0; j < nsegs; j++) {
1757		bus_size_t seg_len;
1758		bus_addr_t seg_addr;
1759
1760		tx_buffer = &txr->tx_buffers[i];
1761		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1762		seg_addr = segs[j].ds_addr;
1763		seg_len  = segs[j].ds_len;
1764
1765		txd->read.buffer_addr = htole64(seg_addr);
1766		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1767		txd->read.olinfo_status = htole32(olinfo_status);
1768		last = i;
1769		if (++i == adapter->num_tx_desc)
1770			i = 0;
1771		tx_buffer->m_head = NULL;
1772		tx_buffer->next_eop = -1;
1773	}
1774
1775	txr->next_avail_desc = i;
1776	txr->tx_avail -= nsegs;
1777
1778        tx_buffer->m_head = m_head;
1779	tx_buffer_mapped->map = tx_buffer->map;
1780	tx_buffer->map = map;
1781        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1782
1783        /*
1784         * Last Descriptor of Packet
1785	 * needs End Of Packet (EOP)
1786	 * and Report Status (RS)
1787         */
1788        txd->read.cmd_type_len |=
1789	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1790	/*
1791	 * Keep track in the first buffer which
1792	 * descriptor will be written back
1793	 */
1794	tx_buffer = &txr->tx_buffers[first];
1795	tx_buffer->next_eop = last;
1796	txr->watchdog_time = ticks;
1797
1798	/*
1799	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1800	 * that this frame is available to transmit.
1801	 */
1802	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1803	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1804	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1805	++txr->tx_packets;
1806
1807	return (0);
1808
1809}
1810
1811static void
1812igb_set_promisc(struct adapter *adapter)
1813{
1814	struct ifnet	*ifp = adapter->ifp;
1815	uint32_t	reg_rctl;
1816
1817	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1818
1819	if (ifp->if_flags & IFF_PROMISC) {
1820		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1821		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1822	} else if (ifp->if_flags & IFF_ALLMULTI) {
1823		reg_rctl |= E1000_RCTL_MPE;
1824		reg_rctl &= ~E1000_RCTL_UPE;
1825		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1826	}
1827}
1828
1829static void
1830igb_disable_promisc(struct adapter *adapter)
1831{
1832	uint32_t	reg_rctl;
1833
1834	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1835
1836	reg_rctl &=  (~E1000_RCTL_UPE);
1837	reg_rctl &=  (~E1000_RCTL_MPE);
1838	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1839}
1840
1841
1842/*********************************************************************
1843 *  Multicast Update
1844 *
1845 *  This routine is called whenever multicast address list is updated.
1846 *
1847 **********************************************************************/
1848
1849static void
1850igb_set_multi(struct adapter *adapter)
1851{
1852	struct ifnet	*ifp = adapter->ifp;
1853	struct ifmultiaddr *ifma;
1854	u32 reg_rctl = 0;
1855	u8  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1856
1857	int mcnt = 0;
1858
1859	IOCTL_DEBUGOUT("igb_set_multi: begin");
1860
1861#if __FreeBSD_version < 800000
1862	IF_ADDR_LOCK(ifp);
1863#else
1864	if_maddr_rlock(ifp);
1865#endif
1866	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1867		if (ifma->ifma_addr->sa_family != AF_LINK)
1868			continue;
1869
1870		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1871			break;
1872
1873		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1874		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1875		mcnt++;
1876	}
1877#if __FreeBSD_version < 800000
1878	IF_ADDR_UNLOCK(ifp);
1879#else
1880	if_maddr_runlock(ifp);
1881#endif
1882
1883	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1884		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1885		reg_rctl |= E1000_RCTL_MPE;
1886		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1887	} else
1888		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1889}
1890
1891
1892/*********************************************************************
1893 *  Timer routine:
1894 *  	This routine checks for link status,
1895 *	updates statistics, and does the watchdog.
1896 *
1897 **********************************************************************/
1898
1899static void
1900igb_local_timer(void *arg)
1901{
1902	struct adapter		*adapter = arg;
1903	struct ifnet		*ifp = adapter->ifp;
1904	device_t		dev = adapter->dev;
1905	struct tx_ring		*txr = adapter->tx_rings;
1906
1907
1908	IGB_CORE_LOCK_ASSERT(adapter);
1909
1910	igb_update_link_status(adapter);
1911	igb_update_stats_counters(adapter);
1912
1913	if (igb_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1914		igb_print_hw_stats(adapter);
1915
1916        /*
1917        ** Watchdog: check for time since any descriptor was cleaned
1918        */
1919	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1920		if (txr->watchdog_check == FALSE)
1921			continue;
1922		if ((ticks - txr->watchdog_time) > IGB_WATCHDOG)
1923			goto timeout;
1924	}
1925
1926	/* Trigger an RX interrupt on all queues */
1927#ifdef DEVICE_POLLING
1928	if (!(ifp->if_capenable & IFCAP_POLLING))
1929#endif
1930	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->rx_mask);
1931	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1932	return;
1933
1934timeout:
1935	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
1936	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
1937            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
1938            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
1939	device_printf(dev,"TX(%d) desc avail = %d,"
1940            "Next TX to Clean = %d\n",
1941            txr->me, txr->tx_avail, txr->next_to_clean);
1942	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1943	adapter->watchdog_events++;
1944	igb_init_locked(adapter);
1945}
1946
1947static void
1948igb_update_link_status(struct adapter *adapter)
1949{
1950	struct e1000_hw *hw = &adapter->hw;
1951	struct ifnet *ifp = adapter->ifp;
1952	device_t dev = adapter->dev;
1953	struct tx_ring *txr = adapter->tx_rings;
1954	u32 link_check = 0;
1955
1956	/* Get the cached link value or read for real */
1957        switch (hw->phy.media_type) {
1958        case e1000_media_type_copper:
1959                if (hw->mac.get_link_status) {
1960			/* Do the work to read phy */
1961                        e1000_check_for_link(hw);
1962                        link_check = !hw->mac.get_link_status;
1963                } else
1964                        link_check = TRUE;
1965                break;
1966        case e1000_media_type_fiber:
1967                e1000_check_for_link(hw);
1968                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1969                                 E1000_STATUS_LU);
1970                break;
1971        case e1000_media_type_internal_serdes:
1972                e1000_check_for_link(hw);
1973                link_check = adapter->hw.mac.serdes_has_link;
1974                break;
1975        default:
1976        case e1000_media_type_unknown:
1977                break;
1978        }
1979
1980	/* Now we check if a transition has happened */
1981	if (link_check && (adapter->link_active == 0)) {
1982		e1000_get_speed_and_duplex(&adapter->hw,
1983		    &adapter->link_speed, &adapter->link_duplex);
1984		if (bootverbose)
1985			device_printf(dev, "Link is up %d Mbps %s\n",
1986			    adapter->link_speed,
1987			    ((adapter->link_duplex == FULL_DUPLEX) ?
1988			    "Full Duplex" : "Half Duplex"));
1989		adapter->link_active = 1;
1990		ifp->if_baudrate = adapter->link_speed * 1000000;
1991		/* This can sleep */
1992		if_link_state_change(ifp, LINK_STATE_UP);
1993	} else if (!link_check && (adapter->link_active == 1)) {
1994		ifp->if_baudrate = adapter->link_speed = 0;
1995		adapter->link_duplex = 0;
1996		if (bootverbose)
1997			device_printf(dev, "Link is Down\n");
1998		adapter->link_active = 0;
1999		/* This can sleep */
2000		if_link_state_change(ifp, LINK_STATE_DOWN);
2001		/* Turn off watchdogs */
2002		for (int i = 0; i < adapter->num_queues; i++, txr++)
2003			txr->watchdog_check = FALSE;
2004	}
2005}
2006
2007/*********************************************************************
2008 *
2009 *  This routine disables all traffic on the adapter by issuing a
2010 *  global reset on the MAC and deallocates TX/RX buffers.
2011 *
2012 **********************************************************************/
2013
2014static void
2015igb_stop(void *arg)
2016{
2017	struct adapter	*adapter = arg;
2018	struct ifnet	*ifp = adapter->ifp;
2019	struct tx_ring *txr = adapter->tx_rings;
2020
2021	IGB_CORE_LOCK_ASSERT(adapter);
2022
2023	INIT_DEBUGOUT("igb_stop: begin");
2024
2025	igb_disable_intr(adapter);
2026
2027	callout_stop(&adapter->timer);
2028
2029	/* Tell the stack that the interface is no longer active */
2030	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2031
2032	/* Unarm watchdog timer. */
2033	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2034		IGB_TX_LOCK(txr);
2035		txr->watchdog_check = FALSE;
2036		IGB_TX_UNLOCK(txr);
2037	}
2038
2039	e1000_reset_hw(&adapter->hw);
2040	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2041
2042	e1000_led_off(&adapter->hw);
2043	e1000_cleanup_led(&adapter->hw);
2044}
2045
2046
2047/*********************************************************************
2048 *
2049 *  Determine hardware revision.
2050 *
2051 **********************************************************************/
2052static void
2053igb_identify_hardware(struct adapter *adapter)
2054{
2055	device_t dev = adapter->dev;
2056
2057	/* Make sure our PCI config space has the necessary stuff set */
2058	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2059	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2060	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2061		device_printf(dev, "Memory Access and/or Bus Master bits "
2062		    "were not set!\n");
2063		adapter->hw.bus.pci_cmd_word |=
2064		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2065		pci_write_config(dev, PCIR_COMMAND,
2066		    adapter->hw.bus.pci_cmd_word, 2);
2067	}
2068
2069	/* Save off the information about this board */
2070	adapter->hw.vendor_id = pci_get_vendor(dev);
2071	adapter->hw.device_id = pci_get_device(dev);
2072	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2073	adapter->hw.subsystem_vendor_id =
2074	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2075	adapter->hw.subsystem_device_id =
2076	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2077
2078	/* Do Shared Code Init and Setup */
2079	if (e1000_set_mac_type(&adapter->hw)) {
2080		device_printf(dev, "Setup init failure\n");
2081		return;
2082	}
2083}
2084
2085static int
2086igb_allocate_pci_resources(struct adapter *adapter)
2087{
2088	device_t	dev = adapter->dev;
2089	int		rid;
2090
2091	rid = PCIR_BAR(0);
2092	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2093	    &rid, RF_ACTIVE);
2094	if (adapter->pci_mem == NULL) {
2095		device_printf(dev, "Unable to allocate bus resource: memory\n");
2096		return (ENXIO);
2097	}
2098	adapter->osdep.mem_bus_space_tag =
2099	    rman_get_bustag(adapter->pci_mem);
2100	adapter->osdep.mem_bus_space_handle =
2101	    rman_get_bushandle(adapter->pci_mem);
2102	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2103
2104	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2105
2106	/* This will setup either MSI/X or MSI */
2107	adapter->msix = igb_setup_msix(adapter);
2108	adapter->hw.back = &adapter->osdep;
2109
2110	return (0);
2111}
2112
2113/*********************************************************************
2114 *
2115 *  Setup the Legacy or MSI Interrupt handler
2116 *
2117 **********************************************************************/
2118static int
2119igb_allocate_legacy(struct adapter *adapter)
2120{
2121	device_t		dev = adapter->dev;
2122	struct igb_queue	*que = adapter->queues;
2123	int			error, rid = 0;
2124
2125	/* Turn off all interrupts */
2126	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2127
2128	/* MSI RID is 1 */
2129	if (adapter->msix == 1)
2130		rid = 1;
2131
2132	/* We allocate a single interrupt resource */
2133	adapter->res = bus_alloc_resource_any(dev,
2134	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2135	if (adapter->res == NULL) {
2136		device_printf(dev, "Unable to allocate bus resource: "
2137		    "interrupt\n");
2138		return (ENXIO);
2139	}
2140
2141	/*
2142	 * Try allocating a fast interrupt and the associated deferred
2143	 * processing contexts.
2144	 */
2145	TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, que);
2146	/* Make tasklet for deferred link handling */
2147	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2148	adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2149	    taskqueue_thread_enqueue, &adapter->tq);
2150	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2151	    device_get_nameunit(adapter->dev));
2152	if ((error = bus_setup_intr(dev, adapter->res,
2153	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2154	    adapter, &adapter->tag)) != 0) {
2155		device_printf(dev, "Failed to register fast interrupt "
2156			    "handler: %d\n", error);
2157		taskqueue_free(adapter->tq);
2158		adapter->tq = NULL;
2159		return (error);
2160	}
2161
2162	return (0);
2163}
2164
2165
2166/*********************************************************************
2167 *
2168 *  Setup the MSIX Queue Interrupt handlers:
2169 *
2170 **********************************************************************/
2171static int
2172igb_allocate_msix(struct adapter *adapter)
2173{
2174	device_t		dev = adapter->dev;
2175	struct igb_queue	*que = adapter->queues;
2176	int			error, rid, vector = 0;
2177
2178
2179	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2180		rid = vector +1;
2181		que->res = bus_alloc_resource_any(dev,
2182		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2183		if (que->res == NULL) {
2184			device_printf(dev,
2185			    "Unable to allocate bus resource: "
2186			    "MSIX Queue Interrupt\n");
2187			return (ENXIO);
2188		}
2189		error = bus_setup_intr(dev, que->res,
2190	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2191		    igb_msix_que, que, &que->tag);
2192		if (error) {
2193			que->res = NULL;
2194			device_printf(dev, "Failed to register Queue handler");
2195			return (error);
2196		}
2197		que->msix = vector;
2198		if (adapter->hw.mac.type == e1000_82575)
2199			que->eims = E1000_EICR_TX_QUEUE0 << i;
2200		else
2201			que->eims = 1 << vector;
2202		/*
2203		** Bind the msix vector, and thus the
2204		** rings to the corresponding cpu.
2205		*/
2206		if (adapter->num_queues > 1)
2207			bus_bind_intr(dev, que->res, i);
2208		/* Make tasklet for deferred handling */
2209		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2210		que->tq = taskqueue_create_fast("igb_que", M_NOWAIT,
2211		    taskqueue_thread_enqueue, &que->tq);
2212		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2213		    device_get_nameunit(adapter->dev));
2214	}
2215
2216	/* And Link */
2217	rid = vector + 1;
2218	adapter->res = bus_alloc_resource_any(dev,
2219	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2220	if (adapter->res == NULL) {
2221		device_printf(dev,
2222		    "Unable to allocate bus resource: "
2223		    "MSIX Link Interrupt\n");
2224		return (ENXIO);
2225	}
2226	if ((error = bus_setup_intr(dev, adapter->res,
2227	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2228	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2229		device_printf(dev, "Failed to register Link handler");
2230		return (error);
2231	}
2232	adapter->linkvec = vector;
2233
2234	/* Make tasklet for deferred handling */
2235	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2236	adapter->tq = taskqueue_create_fast("igb_link", M_NOWAIT,
2237	    taskqueue_thread_enqueue, &adapter->tq);
2238	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s link",
2239	    device_get_nameunit(adapter->dev));
2240
2241	return (0);
2242}
2243
2244
2245static void
2246igb_configure_queues(struct adapter *adapter)
2247{
2248	struct	e1000_hw	*hw = &adapter->hw;
2249	struct	igb_queue	*que;
2250	u32			tmp, ivar = 0;
2251	u32			newitr = IGB_DEFAULT_ITR;
2252
2253	/* First turn on RSS capability */
2254	if (adapter->hw.mac.type > e1000_82575)
2255		E1000_WRITE_REG(hw, E1000_GPIE,
2256		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2257		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2258
2259	/* Turn on MSIX */
2260	switch (adapter->hw.mac.type) {
2261	case e1000_82580:
2262		/* RX entries */
2263		for (int i = 0; i < adapter->num_queues; i++) {
2264			u32 index = i >> 1;
2265			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2266			que = &adapter->queues[i];
2267			if (i & 1) {
2268				ivar &= 0xFF00FFFF;
2269				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2270			} else {
2271				ivar &= 0xFFFFFF00;
2272				ivar |= que->msix | E1000_IVAR_VALID;
2273			}
2274			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2275		}
2276		/* TX entries */
2277		for (int i = 0; i < adapter->num_queues; i++) {
2278			u32 index = i >> 1;
2279			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2280			que = &adapter->queues[i];
2281			if (i & 1) {
2282				ivar &= 0x00FFFFFF;
2283				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2284			} else {
2285				ivar &= 0xFFFF00FF;
2286				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2287			}
2288			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2289			adapter->eims_mask |= que->eims;
2290		}
2291
2292		/* And for the link interrupt */
2293		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2294		adapter->link_mask = 1 << adapter->linkvec;
2295		adapter->eims_mask |= adapter->link_mask;
2296		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2297		break;
2298	case e1000_82576:
2299		/* RX entries */
2300		for (int i = 0; i < adapter->num_queues; i++) {
2301			u32 index = i & 0x7; /* Each IVAR has two entries */
2302			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2303			que = &adapter->queues[i];
2304			if (i < 8) {
2305				ivar &= 0xFFFFFF00;
2306				ivar |= que->msix | E1000_IVAR_VALID;
2307			} else {
2308				ivar &= 0xFF00FFFF;
2309				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2310			}
2311			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2312			adapter->eims_mask |= que->eims;
2313		}
2314		/* TX entries */
2315		for (int i = 0; i < adapter->num_queues; i++) {
2316			u32 index = i & 0x7; /* Each IVAR has two entries */
2317			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2318			que = &adapter->queues[i];
2319			if (i < 8) {
2320				ivar &= 0xFFFF00FF;
2321				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2322			} else {
2323				ivar &= 0x00FFFFFF;
2324				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2325			}
2326			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2327			adapter->eims_mask |= que->eims;
2328		}
2329
2330		/* And for the link interrupt */
2331		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2332		adapter->link_mask = 1 << adapter->linkvec;
2333		adapter->eims_mask |= adapter->link_mask;
2334		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2335		break;
2336
2337	case e1000_82575:
2338                /* enable MSI-X support*/
2339		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2340                tmp |= E1000_CTRL_EXT_PBA_CLR;
2341                /* Auto-Mask interrupts upon ICR read. */
2342                tmp |= E1000_CTRL_EXT_EIAME;
2343                tmp |= E1000_CTRL_EXT_IRCA;
2344                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2345
2346		/* Queues */
2347		for (int i = 0; i < adapter->num_queues; i++) {
2348			que = &adapter->queues[i];
2349			tmp = E1000_EICR_RX_QUEUE0 << i;
2350			tmp |= E1000_EICR_TX_QUEUE0 << i;
2351			que->eims = tmp;
2352			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2353			    i, que->eims);
2354			adapter->eims_mask |= que->eims;
2355		}
2356
2357		/* Link */
2358		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2359		    E1000_EIMS_OTHER);
2360		adapter->link_mask |= E1000_EIMS_OTHER;
2361		adapter->eims_mask |= adapter->link_mask;
2362	default:
2363		break;
2364	}
2365
2366	/* Set the starting interrupt rate */
2367        if (hw->mac.type == e1000_82575)
2368                newitr |= newitr << 16;
2369        else
2370                newitr |= E1000_EITR_CNT_IGNR;
2371
2372	for (int i = 0; i < adapter->num_queues; i++) {
2373		que = &adapter->queues[i];
2374		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2375	}
2376
2377	return;
2378}
2379
2380
2381static void
2382igb_free_pci_resources(struct adapter *adapter)
2383{
2384	struct		igb_queue *que = adapter->queues;
2385	device_t	dev = adapter->dev;
2386	int		rid;
2387
2388	/*
2389	** There is a slight possibility of a failure mode
2390	** in attach that will result in entering this function
2391	** before interrupt resources have been initialized, and
2392	** in that case we do not want to execute the loops below
2393	** We can detect this reliably by the state of the adapter
2394	** res pointer.
2395	*/
2396	if (adapter->res == NULL)
2397		goto mem;
2398
2399	/*
2400	 * First release all the interrupt resources:
2401	 */
2402	for (int i = 0; i < adapter->num_queues; i++, que++) {
2403		rid = que->msix + 1;
2404		if (que->tag != NULL) {
2405			bus_teardown_intr(dev, que->res, que->tag);
2406			que->tag = NULL;
2407		}
2408		if (que->res != NULL)
2409			bus_release_resource(dev,
2410			    SYS_RES_IRQ, rid, que->res);
2411	}
2412
2413	/* Clean the Legacy or Link interrupt last */
2414	if (adapter->linkvec) /* we are doing MSIX */
2415		rid = adapter->linkvec + 1;
2416	else
2417		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2418
2419	if (adapter->tag != NULL) {
2420		bus_teardown_intr(dev, adapter->res, adapter->tag);
2421		adapter->tag = NULL;
2422	}
2423	if (adapter->res != NULL)
2424		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2425
2426mem:
2427	if (adapter->msix)
2428		pci_release_msi(dev);
2429
2430	if (adapter->msix_mem != NULL)
2431		bus_release_resource(dev, SYS_RES_MEMORY,
2432		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2433
2434	if (adapter->pci_mem != NULL)
2435		bus_release_resource(dev, SYS_RES_MEMORY,
2436		    PCIR_BAR(0), adapter->pci_mem);
2437
2438}
2439
2440/*
2441 * Setup Either MSI/X or MSI
2442 */
2443static int
2444igb_setup_msix(struct adapter *adapter)
2445{
2446	device_t dev = adapter->dev;
2447	int rid, want, queues, msgs;
2448
2449	/* tuneable override */
2450	if (igb_enable_msix == 0)
2451		goto msi;
2452
2453	/* First try MSI/X */
2454	rid = PCIR_BAR(IGB_MSIX_BAR);
2455	adapter->msix_mem = bus_alloc_resource_any(dev,
2456	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2457       	if (!adapter->msix_mem) {
2458		/* May not be enabled */
2459		device_printf(adapter->dev,
2460		    "Unable to map MSIX table \n");
2461		goto msi;
2462	}
2463
2464	msgs = pci_msix_count(dev);
2465	if (msgs == 0) { /* system has msix disabled */
2466		bus_release_resource(dev, SYS_RES_MEMORY,
2467		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2468		adapter->msix_mem = NULL;
2469		goto msi;
2470	}
2471
2472	/* Figure out a reasonable auto config value */
2473	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2474
2475	/* Manual override */
2476	if (igb_num_queues != 0)
2477		queues = igb_num_queues;
2478
2479	/* Can have max of 4 queues on 82575 */
2480	if ((adapter->hw.mac.type == e1000_82575) && (queues > 4))
2481		queues = 4;
2482
2483	/*
2484	** One vector (RX/TX pair) per queue
2485	** plus an additional for Link interrupt
2486	*/
2487	want = queues + 1;
2488	if (msgs >= want)
2489		msgs = want;
2490	else {
2491               	device_printf(adapter->dev,
2492		    "MSIX Configuration Problem, "
2493		    "%d vectors configured, but %d queues wanted!\n",
2494		    msgs, want);
2495		return (ENXIO);
2496	}
2497	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2498               	device_printf(adapter->dev,
2499		    "Using MSIX interrupts with %d vectors\n", msgs);
2500		adapter->num_queues = queues;
2501		return (msgs);
2502	}
2503msi:
2504       	msgs = pci_msi_count(dev);
2505       	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2506               	device_printf(adapter->dev,"Using MSI interrupt\n");
2507	return (msgs);
2508}
2509
2510/*********************************************************************
2511 *
2512 *  Set up an fresh starting state
2513 *
2514 **********************************************************************/
2515static void
2516igb_reset(struct adapter *adapter)
2517{
2518	device_t	dev = adapter->dev;
2519	struct e1000_hw *hw = &adapter->hw;
2520	struct e1000_fc_info *fc = &hw->fc;
2521	struct ifnet	*ifp = adapter->ifp;
2522	u32		pba = 0;
2523	u16		hwm;
2524
2525	INIT_DEBUGOUT("igb_reset: begin");
2526
2527	/* Let the firmware know the OS is in control */
2528	igb_get_hw_control(adapter);
2529
2530	/*
2531	 * Packet Buffer Allocation (PBA)
2532	 * Writing PBA sets the receive portion of the buffer
2533	 * the remainder is used for the transmit buffer.
2534	 */
2535	switch (hw->mac.type) {
2536	case e1000_82575:
2537		pba = E1000_PBA_32K;
2538		break;
2539	case e1000_82576:
2540		pba = E1000_PBA_64K;
2541		break;
2542	case e1000_82580:
2543		pba = E1000_PBA_35K;
2544	default:
2545		break;
2546	}
2547
2548	/* Special needs in case of Jumbo frames */
2549	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2550		u32 tx_space, min_tx, min_rx;
2551		pba = E1000_READ_REG(hw, E1000_PBA);
2552		tx_space = pba >> 16;
2553		pba &= 0xffff;
2554		min_tx = (adapter->max_frame_size +
2555		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2556		min_tx = roundup2(min_tx, 1024);
2557		min_tx >>= 10;
2558                min_rx = adapter->max_frame_size;
2559                min_rx = roundup2(min_rx, 1024);
2560                min_rx >>= 10;
2561		if (tx_space < min_tx &&
2562		    ((min_tx - tx_space) < pba)) {
2563			pba = pba - (min_tx - tx_space);
2564			/*
2565                         * if short on rx space, rx wins
2566                         * and must trump tx adjustment
2567			 */
2568                        if (pba < min_rx)
2569                                pba = min_rx;
2570		}
2571		E1000_WRITE_REG(hw, E1000_PBA, pba);
2572	}
2573
2574	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2575
2576	/*
2577	 * These parameters control the automatic generation (Tx) and
2578	 * response (Rx) to Ethernet PAUSE frames.
2579	 * - High water mark should allow for at least two frames to be
2580	 *   received after sending an XOFF.
2581	 * - Low water mark works best when it is very near the high water mark.
2582	 *   This allows the receiver to restart by sending XON when it has
2583	 *   drained a bit.
2584	 */
2585	hwm = min(((pba << 10) * 9 / 10),
2586	    ((pba << 10) - 2 * adapter->max_frame_size));
2587
2588	if (hw->mac.type < e1000_82576) {
2589		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2590		fc->low_water = fc->high_water - 8;
2591	} else {
2592		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2593		fc->low_water = fc->high_water - 16;
2594	}
2595
2596	fc->pause_time = IGB_FC_PAUSE_TIME;
2597	fc->send_xon = TRUE;
2598
2599	/* Set Flow control, use the tunable location if sane */
2600	if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2601		fc->requested_mode = igb_fc_setting;
2602	else
2603		fc->requested_mode = e1000_fc_none;
2604
2605	fc->current_mode = fc->requested_mode;
2606
2607	/* Issue a global reset */
2608	e1000_reset_hw(hw);
2609	E1000_WRITE_REG(hw, E1000_WUC, 0);
2610
2611	if (e1000_init_hw(hw) < 0)
2612		device_printf(dev, "Hardware Initialization Failed\n");
2613
2614	if (hw->mac.type == e1000_82580) {
2615		u32 reg;
2616
2617		hwm = (pba << 10) - (2 * adapter->max_frame_size);
2618		/*
2619		 * 0x80000000 - enable DMA COAL
2620		 * 0x10000000 - use L0s as low power
2621		 * 0x20000000 - use L1 as low power
2622		 * X << 16 - exit dma coal when rx data exceeds X kB
2623		 * Y - upper limit to stay in dma coal in units of 32usecs
2624		 */
2625		E1000_WRITE_REG(hw, E1000_DMACR,
2626		    0xA0000006 | ((hwm << 6) & 0x00FF0000));
2627
2628		/* set hwm to PBA -  2 * max frame size */
2629		E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2630		/*
2631		 * This sets the time to wait before requesting transition to
2632		 * low power state to number of usecs needed to receive 1 512
2633		 * byte frame at gigabit line rate
2634		 */
2635		E1000_WRITE_REG(hw, E1000_DMCTLX, 4);
2636
2637		/* free space in tx packet buffer to wake from DMA coal */
2638		E1000_WRITE_REG(hw, E1000_DMCTXTH,
2639		    (20480 - (2 * adapter->max_frame_size)) >> 6);
2640
2641		/* make low power state decision controlled by DMA coal */
2642		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2643		E1000_WRITE_REG(hw, E1000_PCIEMISC,
2644		    reg | E1000_PCIEMISC_LX_DECISION);
2645	}
2646
2647	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2648	e1000_get_phy_info(hw);
2649	e1000_check_for_link(hw);
2650	return;
2651}
2652
2653/*********************************************************************
2654 *
2655 *  Setup networking device structure and register an interface.
2656 *
2657 **********************************************************************/
2658static void
2659igb_setup_interface(device_t dev, struct adapter *adapter)
2660{
2661	struct ifnet   *ifp;
2662
2663	INIT_DEBUGOUT("igb_setup_interface: begin");
2664
2665	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2666	if (ifp == NULL)
2667		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2668	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2669	ifp->if_mtu = ETHERMTU;
2670	ifp->if_init =  igb_init;
2671	ifp->if_softc = adapter;
2672	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2673	ifp->if_ioctl = igb_ioctl;
2674	ifp->if_start = igb_start;
2675#if __FreeBSD_version >= 800000
2676	ifp->if_transmit = igb_mq_start;
2677	ifp->if_qflush = igb_qflush;
2678#endif
2679	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2680	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2681	IFQ_SET_READY(&ifp->if_snd);
2682
2683	ether_ifattach(ifp, adapter->hw.mac.addr);
2684
2685	ifp->if_capabilities = ifp->if_capenable = 0;
2686
2687	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_MTU;
2688	ifp->if_capabilities |= IFCAP_TSO4;
2689	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2690	if (igb_header_split)
2691		ifp->if_capabilities |= IFCAP_LRO;
2692
2693	ifp->if_capenable = ifp->if_capabilities;
2694#ifdef DEVICE_POLLING
2695	ifp->if_capabilities |= IFCAP_POLLING;
2696#endif
2697
2698	/*
2699	 * Tell the upper layer(s) we
2700	 * support full VLAN capability.
2701	 */
2702	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2703	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2704	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2705
2706	/*
2707	** Dont turn this on by default, if vlans are
2708	** created on another pseudo device (eg. lagg)
2709	** then vlan events are not passed thru, breaking
2710	** operation, but with HW FILTER off it works. If
2711	** using vlans directly on the em driver you can
2712	** enable this and get full hardware tag filtering.
2713	*/
2714	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2715
2716	/*
2717	 * Specify the media types supported by this adapter and register
2718	 * callbacks to update media and link information
2719	 */
2720	ifmedia_init(&adapter->media, IFM_IMASK,
2721	    igb_media_change, igb_media_status);
2722	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2723	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2724		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2725			    0, NULL);
2726		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2727	} else {
2728		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2729		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2730			    0, NULL);
2731		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2732			    0, NULL);
2733		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2734			    0, NULL);
2735		if (adapter->hw.phy.type != e1000_phy_ife) {
2736			ifmedia_add(&adapter->media,
2737				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2738			ifmedia_add(&adapter->media,
2739				IFM_ETHER | IFM_1000_T, 0, NULL);
2740		}
2741	}
2742	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2743	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2744}
2745
2746
2747/*
2748 * Manage DMA'able memory.
2749 */
2750static void
2751igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2752{
2753	if (error)
2754		return;
2755	*(bus_addr_t *) arg = segs[0].ds_addr;
2756}
2757
2758static int
2759igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2760        struct igb_dma_alloc *dma, int mapflags)
2761{
2762	int error;
2763
2764	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2765				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
2766				BUS_SPACE_MAXADDR,	/* lowaddr */
2767				BUS_SPACE_MAXADDR,	/* highaddr */
2768				NULL, NULL,		/* filter, filterarg */
2769				size,			/* maxsize */
2770				1,			/* nsegments */
2771				size,			/* maxsegsize */
2772				0,			/* flags */
2773				NULL,			/* lockfunc */
2774				NULL,			/* lockarg */
2775				&dma->dma_tag);
2776	if (error) {
2777		device_printf(adapter->dev,
2778		    "%s: bus_dma_tag_create failed: %d\n",
2779		    __func__, error);
2780		goto fail_0;
2781	}
2782
2783	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2784	    BUS_DMA_NOWAIT, &dma->dma_map);
2785	if (error) {
2786		device_printf(adapter->dev,
2787		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2788		    __func__, (uintmax_t)size, error);
2789		goto fail_2;
2790	}
2791
2792	dma->dma_paddr = 0;
2793	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2794	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2795	if (error || dma->dma_paddr == 0) {
2796		device_printf(adapter->dev,
2797		    "%s: bus_dmamap_load failed: %d\n",
2798		    __func__, error);
2799		goto fail_3;
2800	}
2801
2802	return (0);
2803
2804fail_3:
2805	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2806fail_2:
2807	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2808	bus_dma_tag_destroy(dma->dma_tag);
2809fail_0:
2810	dma->dma_map = NULL;
2811	dma->dma_tag = NULL;
2812
2813	return (error);
2814}
2815
2816static void
2817igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2818{
2819	if (dma->dma_tag == NULL)
2820		return;
2821	if (dma->dma_map != NULL) {
2822		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2823		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2824		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2825		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2826		dma->dma_map = NULL;
2827	}
2828	bus_dma_tag_destroy(dma->dma_tag);
2829	dma->dma_tag = NULL;
2830}
2831
2832
2833/*********************************************************************
2834 *
2835 *  Allocate memory for the transmit and receive rings, and then
2836 *  the descriptors associated with each, called only once at attach.
2837 *
2838 **********************************************************************/
2839static int
2840igb_allocate_queues(struct adapter *adapter)
2841{
2842	device_t dev = adapter->dev;
2843	struct igb_queue	*que = NULL;
2844	struct tx_ring		*txr = NULL;
2845	struct rx_ring		*rxr = NULL;
2846	int rsize, tsize, error = E1000_SUCCESS;
2847	int txconf = 0, rxconf = 0;
2848
2849	/* First allocate the top level queue structs */
2850	if (!(adapter->queues =
2851	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
2852	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2853		device_printf(dev, "Unable to allocate queue memory\n");
2854		error = ENOMEM;
2855		goto fail;
2856	}
2857
2858	/* Next allocate the TX ring struct memory */
2859	if (!(adapter->tx_rings =
2860	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2861	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2862		device_printf(dev, "Unable to allocate TX ring memory\n");
2863		error = ENOMEM;
2864		goto tx_fail;
2865	}
2866
2867	/* Now allocate the RX */
2868	if (!(adapter->rx_rings =
2869	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2870	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2871		device_printf(dev, "Unable to allocate RX ring memory\n");
2872		error = ENOMEM;
2873		goto rx_fail;
2874	}
2875
2876	tsize = roundup2(adapter->num_tx_desc *
2877	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2878	/*
2879	 * Now set up the TX queues, txconf is needed to handle the
2880	 * possibility that things fail midcourse and we need to
2881	 * undo memory gracefully
2882	 */
2883	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2884		/* Set up some basics */
2885		txr = &adapter->tx_rings[i];
2886		txr->adapter = adapter;
2887		txr->me = i;
2888
2889		/* Initialize the TX lock */
2890		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2891		    device_get_nameunit(dev), txr->me);
2892		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2893
2894		if (igb_dma_malloc(adapter, tsize,
2895			&txr->txdma, BUS_DMA_NOWAIT)) {
2896			device_printf(dev,
2897			    "Unable to allocate TX Descriptor memory\n");
2898			error = ENOMEM;
2899			goto err_tx_desc;
2900		}
2901		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2902		bzero((void *)txr->tx_base, tsize);
2903
2904        	/* Now allocate transmit buffers for the ring */
2905        	if (igb_allocate_transmit_buffers(txr)) {
2906			device_printf(dev,
2907			    "Critical Failure setting up transmit buffers\n");
2908			error = ENOMEM;
2909			goto err_tx_desc;
2910        	}
2911#if __FreeBSD_version >= 800000
2912		/* Allocate a buf ring */
2913		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2914		    M_WAITOK, &txr->tx_mtx);
2915#endif
2916	}
2917
2918	/*
2919	 * Next the RX queues...
2920	 */
2921	rsize = roundup2(adapter->num_rx_desc *
2922	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2923	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2924		rxr = &adapter->rx_rings[i];
2925		rxr->adapter = adapter;
2926		rxr->me = i;
2927
2928		/* Initialize the RX lock */
2929		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2930		    device_get_nameunit(dev), txr->me);
2931		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2932
2933		if (igb_dma_malloc(adapter, rsize,
2934			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2935			device_printf(dev,
2936			    "Unable to allocate RxDescriptor memory\n");
2937			error = ENOMEM;
2938			goto err_rx_desc;
2939		}
2940		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2941		bzero((void *)rxr->rx_base, rsize);
2942
2943        	/* Allocate receive buffers for the ring*/
2944		if (igb_allocate_receive_buffers(rxr)) {
2945			device_printf(dev,
2946			    "Critical Failure setting up receive buffers\n");
2947			error = ENOMEM;
2948			goto err_rx_desc;
2949		}
2950	}
2951
2952	/*
2953	** Finally set up the queue holding structs
2954	*/
2955	for (int i = 0; i < adapter->num_queues; i++) {
2956		que = &adapter->queues[i];
2957		que->adapter = adapter;
2958		que->txr = &adapter->tx_rings[i];
2959		que->rxr = &adapter->rx_rings[i];
2960	}
2961
2962	return (0);
2963
2964err_rx_desc:
2965	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2966		igb_dma_free(adapter, &rxr->rxdma);
2967err_tx_desc:
2968	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2969		igb_dma_free(adapter, &txr->txdma);
2970	free(adapter->rx_rings, M_DEVBUF);
2971rx_fail:
2972#if __FreeBSD_version >= 800000
2973	buf_ring_free(txr->br, M_DEVBUF);
2974#endif
2975	free(adapter->tx_rings, M_DEVBUF);
2976tx_fail:
2977	free(adapter->queues, M_DEVBUF);
2978fail:
2979	return (error);
2980}
2981
2982/*********************************************************************
2983 *
2984 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2985 *  the information needed to transmit a packet on the wire. This is
2986 *  called only once at attach, setup is done every reset.
2987 *
2988 **********************************************************************/
2989static int
2990igb_allocate_transmit_buffers(struct tx_ring *txr)
2991{
2992	struct adapter *adapter = txr->adapter;
2993	device_t dev = adapter->dev;
2994	struct igb_tx_buffer *txbuf;
2995	int error, i;
2996
2997	/*
2998	 * Setup DMA descriptor areas.
2999	 */
3000	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3001			       1, 0,			/* alignment, bounds */
3002			       BUS_SPACE_MAXADDR,	/* lowaddr */
3003			       BUS_SPACE_MAXADDR,	/* highaddr */
3004			       NULL, NULL,		/* filter, filterarg */
3005			       IGB_TSO_SIZE,		/* maxsize */
3006			       IGB_MAX_SCATTER,		/* nsegments */
3007			       PAGE_SIZE,		/* maxsegsize */
3008			       0,			/* flags */
3009			       NULL,			/* lockfunc */
3010			       NULL,			/* lockfuncarg */
3011			       &txr->txtag))) {
3012		device_printf(dev,"Unable to allocate TX DMA tag\n");
3013		goto fail;
3014	}
3015
3016	if (!(txr->tx_buffers =
3017	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3018	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3019		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3020		error = ENOMEM;
3021		goto fail;
3022	}
3023
3024        /* Create the descriptor buffer dma maps */
3025	txbuf = txr->tx_buffers;
3026	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3027		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3028		if (error != 0) {
3029			device_printf(dev, "Unable to create TX DMA map\n");
3030			goto fail;
3031		}
3032	}
3033
3034	return 0;
3035fail:
3036	/* We free all, it handles case where we are in the middle */
3037	igb_free_transmit_structures(adapter);
3038	return (error);
3039}
3040
3041/*********************************************************************
3042 *
3043 *  Initialize a transmit ring.
3044 *
3045 **********************************************************************/
3046static void
3047igb_setup_transmit_ring(struct tx_ring *txr)
3048{
3049	struct adapter *adapter = txr->adapter;
3050	struct igb_tx_buffer *txbuf;
3051	int i;
3052
3053	/* Clear the old descriptor contents */
3054	IGB_TX_LOCK(txr);
3055	bzero((void *)txr->tx_base,
3056	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3057	/* Reset indices */
3058	txr->next_avail_desc = 0;
3059	txr->next_to_clean = 0;
3060
3061	/* Free any existing tx buffers. */
3062        txbuf = txr->tx_buffers;
3063	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3064		if (txbuf->m_head != NULL) {
3065			bus_dmamap_sync(txr->txtag, txbuf->map,
3066			    BUS_DMASYNC_POSTWRITE);
3067			bus_dmamap_unload(txr->txtag, txbuf->map);
3068			m_freem(txbuf->m_head);
3069			txbuf->m_head = NULL;
3070		}
3071		/* clear the watch index */
3072		txbuf->next_eop = -1;
3073        }
3074
3075	/* Set number of descriptors available */
3076	txr->tx_avail = adapter->num_tx_desc;
3077
3078	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3079	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3080	IGB_TX_UNLOCK(txr);
3081}
3082
3083/*********************************************************************
3084 *
3085 *  Initialize all transmit rings.
3086 *
3087 **********************************************************************/
3088static void
3089igb_setup_transmit_structures(struct adapter *adapter)
3090{
3091	struct tx_ring *txr = adapter->tx_rings;
3092
3093	for (int i = 0; i < adapter->num_queues; i++, txr++)
3094		igb_setup_transmit_ring(txr);
3095
3096	return;
3097}
3098
3099/*********************************************************************
3100 *
3101 *  Enable transmit unit.
3102 *
3103 **********************************************************************/
3104static void
3105igb_initialize_transmit_units(struct adapter *adapter)
3106{
3107	struct tx_ring	*txr = adapter->tx_rings;
3108	struct e1000_hw *hw = &adapter->hw;
3109	u32		tctl, txdctl;
3110
3111	 INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3112
3113	/* Setup the Tx Descriptor Rings */
3114	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3115		u64 bus_addr = txr->txdma.dma_paddr;
3116
3117		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3118		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3119		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3120		    (uint32_t)(bus_addr >> 32));
3121		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3122		    (uint32_t)bus_addr);
3123
3124		/* Setup the HW Tx Head and Tail descriptor pointers */
3125		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3126		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3127
3128		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3129		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3130		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3131
3132		txr->watchdog_check = FALSE;
3133
3134		txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
3135		txdctl |= IGB_TX_PTHRESH;
3136		txdctl |= IGB_TX_HTHRESH << 8;
3137		txdctl |= IGB_TX_WTHRESH << 16;
3138		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3139		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3140	}
3141
3142	/* Program the Transmit Control Register */
3143	tctl = E1000_READ_REG(hw, E1000_TCTL);
3144	tctl &= ~E1000_TCTL_CT;
3145	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3146		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3147
3148	e1000_config_collision_dist(hw);
3149
3150	/* This write will effectively turn on the transmit unit. */
3151	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3152}
3153
3154/*********************************************************************
3155 *
3156 *  Free all transmit rings.
3157 *
3158 **********************************************************************/
3159static void
3160igb_free_transmit_structures(struct adapter *adapter)
3161{
3162	struct tx_ring *txr = adapter->tx_rings;
3163
3164	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3165		IGB_TX_LOCK(txr);
3166		igb_free_transmit_buffers(txr);
3167		igb_dma_free(adapter, &txr->txdma);
3168		IGB_TX_UNLOCK(txr);
3169		IGB_TX_LOCK_DESTROY(txr);
3170	}
3171	free(adapter->tx_rings, M_DEVBUF);
3172}
3173
3174/*********************************************************************
3175 *
3176 *  Free transmit ring related data structures.
3177 *
3178 **********************************************************************/
3179static void
3180igb_free_transmit_buffers(struct tx_ring *txr)
3181{
3182	struct adapter *adapter = txr->adapter;
3183	struct igb_tx_buffer *tx_buffer;
3184	int             i;
3185
3186	INIT_DEBUGOUT("free_transmit_ring: begin");
3187
3188	if (txr->tx_buffers == NULL)
3189		return;
3190
3191	tx_buffer = txr->tx_buffers;
3192	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3193		if (tx_buffer->m_head != NULL) {
3194			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3195			    BUS_DMASYNC_POSTWRITE);
3196			bus_dmamap_unload(txr->txtag,
3197			    tx_buffer->map);
3198			m_freem(tx_buffer->m_head);
3199			tx_buffer->m_head = NULL;
3200			if (tx_buffer->map != NULL) {
3201				bus_dmamap_destroy(txr->txtag,
3202				    tx_buffer->map);
3203				tx_buffer->map = NULL;
3204			}
3205		} else if (tx_buffer->map != NULL) {
3206			bus_dmamap_unload(txr->txtag,
3207			    tx_buffer->map);
3208			bus_dmamap_destroy(txr->txtag,
3209			    tx_buffer->map);
3210			tx_buffer->map = NULL;
3211		}
3212	}
3213#if __FreeBSD_version >= 800000
3214	if (txr->br != NULL)
3215		buf_ring_free(txr->br, M_DEVBUF);
3216#endif
3217	if (txr->tx_buffers != NULL) {
3218		free(txr->tx_buffers, M_DEVBUF);
3219		txr->tx_buffers = NULL;
3220	}
3221	if (txr->txtag != NULL) {
3222		bus_dma_tag_destroy(txr->txtag);
3223		txr->txtag = NULL;
3224	}
3225	return;
3226}
3227
3228/**********************************************************************
3229 *
3230 *  Setup work for hardware segmentation offload (TSO)
3231 *
3232 **********************************************************************/
3233static boolean_t
3234igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3235{
3236	struct adapter *adapter = txr->adapter;
3237	struct e1000_adv_tx_context_desc *TXD;
3238	struct igb_tx_buffer        *tx_buffer;
3239	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3240	u32 mss_l4len_idx = 0;
3241	u16 vtag = 0;
3242	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3243	struct ether_vlan_header *eh;
3244	struct ip *ip;
3245	struct tcphdr *th;
3246
3247
3248	/*
3249	 * Determine where frame payload starts.
3250	 * Jump over vlan headers if already present
3251	 */
3252	eh = mtod(mp, struct ether_vlan_header *);
3253	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3254		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3255	else
3256		ehdrlen = ETHER_HDR_LEN;
3257
3258	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3259	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3260		return FALSE;
3261
3262	/* Only supports IPV4 for now */
3263	ctxd = txr->next_avail_desc;
3264	tx_buffer = &txr->tx_buffers[ctxd];
3265	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3266
3267	ip = (struct ip *)(mp->m_data + ehdrlen);
3268	if (ip->ip_p != IPPROTO_TCP)
3269                return FALSE;   /* 0 */
3270	ip->ip_sum = 0;
3271	ip_hlen = ip->ip_hl << 2;
3272	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3273	th->th_sum = in_pseudo(ip->ip_src.s_addr,
3274	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3275	tcp_hlen = th->th_off << 2;
3276	/*
3277	 * Calculate header length, this is used
3278	 * in the transmit desc in igb_xmit
3279	 */
3280	*hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3281
3282	/* VLAN MACLEN IPLEN */
3283	if (mp->m_flags & M_VLANTAG) {
3284		vtag = htole16(mp->m_pkthdr.ether_vtag);
3285		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3286	}
3287
3288	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3289	vlan_macip_lens |= ip_hlen;
3290	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3291
3292	/* ADV DTYPE TUCMD */
3293	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3294	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3295	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3296	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3297
3298	/* MSS L4LEN IDX */
3299	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3300	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3301	/* 82575 needs the queue index added */
3302	if (adapter->hw.mac.type == e1000_82575)
3303		mss_l4len_idx |= txr->me << 4;
3304	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3305
3306	TXD->seqnum_seed = htole32(0);
3307	tx_buffer->m_head = NULL;
3308	tx_buffer->next_eop = -1;
3309
3310	if (++ctxd == adapter->num_tx_desc)
3311		ctxd = 0;
3312
3313	txr->tx_avail--;
3314	txr->next_avail_desc = ctxd;
3315	return TRUE;
3316}
3317
3318
3319/*********************************************************************
3320 *
3321 *  Context Descriptor setup for VLAN or CSUM
3322 *
3323 **********************************************************************/
3324
3325static bool
3326igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3327{
3328	struct adapter *adapter = txr->adapter;
3329	struct e1000_adv_tx_context_desc *TXD;
3330	struct igb_tx_buffer        *tx_buffer;
3331	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3332	struct ether_vlan_header *eh;
3333	struct ip *ip = NULL;
3334	struct ip6_hdr *ip6;
3335	int  ehdrlen, ctxd, ip_hlen = 0;
3336	u16	etype, vtag = 0;
3337	u8	ipproto = 0;
3338	bool	offload = TRUE;
3339
3340	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3341		offload = FALSE;
3342
3343	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3344	ctxd = txr->next_avail_desc;
3345	tx_buffer = &txr->tx_buffers[ctxd];
3346	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3347
3348	/*
3349	** In advanced descriptors the vlan tag must
3350	** be placed into the context descriptor, thus
3351	** we need to be here just for that setup.
3352	*/
3353	if (mp->m_flags & M_VLANTAG) {
3354		vtag = htole16(mp->m_pkthdr.ether_vtag);
3355		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3356	} else if (offload == FALSE)
3357		return FALSE;
3358
3359	/*
3360	 * Determine where frame payload starts.
3361	 * Jump over vlan headers if already present,
3362	 * helpful for QinQ too.
3363	 */
3364	eh = mtod(mp, struct ether_vlan_header *);
3365	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3366		etype = ntohs(eh->evl_proto);
3367		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3368	} else {
3369		etype = ntohs(eh->evl_encap_proto);
3370		ehdrlen = ETHER_HDR_LEN;
3371	}
3372
3373	/* Set the ether header length */
3374	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3375
3376	switch (etype) {
3377		case ETHERTYPE_IP:
3378			ip = (struct ip *)(mp->m_data + ehdrlen);
3379			ip_hlen = ip->ip_hl << 2;
3380			if (mp->m_len < ehdrlen + ip_hlen) {
3381				offload = FALSE;
3382				break;
3383			}
3384			ipproto = ip->ip_p;
3385			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3386			break;
3387		case ETHERTYPE_IPV6:
3388			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3389			ip_hlen = sizeof(struct ip6_hdr);
3390			if (mp->m_len < ehdrlen + ip_hlen)
3391				return (FALSE);
3392			ipproto = ip6->ip6_nxt;
3393			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3394			break;
3395		default:
3396			offload = FALSE;
3397			break;
3398	}
3399
3400	vlan_macip_lens |= ip_hlen;
3401	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3402
3403	switch (ipproto) {
3404		case IPPROTO_TCP:
3405			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3406				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3407			break;
3408		case IPPROTO_UDP:
3409			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3410				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3411			break;
3412#if __FreeBSD_version >= 800000
3413		case IPPROTO_SCTP:
3414			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3415				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3416			break;
3417#endif
3418		default:
3419			offload = FALSE;
3420			break;
3421	}
3422
3423	/* 82575 needs the queue index added */
3424	if (adapter->hw.mac.type == e1000_82575)
3425		mss_l4len_idx = txr->me << 4;
3426
3427	/* Now copy bits into descriptor */
3428	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3429	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3430	TXD->seqnum_seed = htole32(0);
3431	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3432
3433	tx_buffer->m_head = NULL;
3434	tx_buffer->next_eop = -1;
3435
3436	/* We've consumed the first desc, adjust counters */
3437	if (++ctxd == adapter->num_tx_desc)
3438		ctxd = 0;
3439	txr->next_avail_desc = ctxd;
3440	--txr->tx_avail;
3441
3442        return (offload);
3443}
3444
3445
3446/**********************************************************************
3447 *
3448 *  Examine each tx_buffer in the used queue. If the hardware is done
3449 *  processing the packet then free associated resources. The
3450 *  tx_buffer is put back on the free queue.
3451 *
3452 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3453 **********************************************************************/
3454static bool
3455igb_txeof(struct tx_ring *txr)
3456{
3457	struct adapter	*adapter = txr->adapter;
3458        int first, last, done;
3459        struct igb_tx_buffer *tx_buffer;
3460        struct e1000_tx_desc   *tx_desc, *eop_desc;
3461	struct ifnet   *ifp = adapter->ifp;
3462
3463	IGB_TX_LOCK_ASSERT(txr);
3464
3465        if (txr->tx_avail == adapter->num_tx_desc)
3466                return FALSE;
3467
3468        first = txr->next_to_clean;
3469        tx_desc = &txr->tx_base[first];
3470        tx_buffer = &txr->tx_buffers[first];
3471	last = tx_buffer->next_eop;
3472        eop_desc = &txr->tx_base[last];
3473
3474	/*
3475	 * What this does is get the index of the
3476	 * first descriptor AFTER the EOP of the
3477	 * first packet, that way we can do the
3478	 * simple comparison on the inner while loop.
3479	 */
3480	if (++last == adapter->num_tx_desc)
3481 		last = 0;
3482	done = last;
3483
3484        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3485            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3486
3487        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3488		/* We clean the range of the packet */
3489		while (first != done) {
3490                	tx_desc->upper.data = 0;
3491                	tx_desc->lower.data = 0;
3492                	tx_desc->buffer_addr = 0;
3493                	++txr->tx_avail;
3494
3495			if (tx_buffer->m_head) {
3496				txr->bytes +=
3497				    tx_buffer->m_head->m_pkthdr.len;
3498				bus_dmamap_sync(txr->txtag,
3499				    tx_buffer->map,
3500				    BUS_DMASYNC_POSTWRITE);
3501				bus_dmamap_unload(txr->txtag,
3502				    tx_buffer->map);
3503
3504                        	m_freem(tx_buffer->m_head);
3505                        	tx_buffer->m_head = NULL;
3506                	}
3507			tx_buffer->next_eop = -1;
3508			txr->watchdog_time = ticks;
3509
3510	                if (++first == adapter->num_tx_desc)
3511				first = 0;
3512
3513	                tx_buffer = &txr->tx_buffers[first];
3514			tx_desc = &txr->tx_base[first];
3515		}
3516		++txr->packets;
3517		++ifp->if_opackets;
3518		/* See if we can continue to the next packet */
3519		last = tx_buffer->next_eop;
3520		if (last != -1) {
3521        		eop_desc = &txr->tx_base[last];
3522			/* Get new done point */
3523			if (++last == adapter->num_tx_desc) last = 0;
3524			done = last;
3525		} else
3526			break;
3527        }
3528        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3529            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3530
3531        txr->next_to_clean = first;
3532
3533        /*
3534         * If we have enough room, clear IFF_DRV_OACTIVE
3535         * to tell the stack that it is OK to send packets.
3536         */
3537        if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {
3538                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3539		/* All clean, turn off the watchdog */
3540                if (txr->tx_avail == adapter->num_tx_desc) {
3541			txr->watchdog_check = FALSE;
3542			return FALSE;
3543		}
3544        }
3545
3546	return (TRUE);
3547}
3548
3549
3550/*********************************************************************
3551 *
3552 *  Refresh mbuf buffers for RX descriptor rings
3553 *   - now keeps its own state so discards due to resource
3554 *     exhaustion are unnecessary, if an mbuf cannot be obtained
3555 *     it just returns, keeping its placeholder, thus it can simply
3556 *     be recalled to try again.
3557 *
3558 **********************************************************************/
3559static void
3560igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3561{
3562	struct adapter		*adapter = rxr->adapter;
3563	bus_dma_segment_t	hseg[1];
3564	bus_dma_segment_t	pseg[1];
3565	struct igb_rx_buf	*rxbuf;
3566	struct mbuf		*mh, *mp;
3567	int			i, nsegs, error, cleaned;
3568
3569	i = rxr->next_to_refresh;
3570	cleaned = -1; /* Signify no completions */
3571	while (i != limit) {
3572		rxbuf = &rxr->rx_buffers[i];
3573		if (rxbuf->m_head == NULL) {
3574			mh = m_gethdr(M_DONTWAIT, MT_DATA);
3575			if (mh == NULL)
3576				goto update;
3577			mh->m_pkthdr.len = mh->m_len = MHLEN;
3578			mh->m_len = MHLEN;
3579			mh->m_flags |= M_PKTHDR;
3580			m_adj(mh, ETHER_ALIGN);
3581			/* Get the memory mapping */
3582			error = bus_dmamap_load_mbuf_sg(rxr->htag,
3583			    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
3584			if (error != 0) {
3585				printf("GET BUF: dmamap load"
3586				    " failure - %d\n", error);
3587				m_free(mh);
3588				goto update;
3589			}
3590			rxbuf->m_head = mh;
3591			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3592			    BUS_DMASYNC_PREREAD);
3593			rxr->rx_base[i].read.hdr_addr =
3594			    htole64(hseg[0].ds_addr);
3595		}
3596
3597		if (rxbuf->m_pack == NULL) {
3598			mp = m_getjcl(M_DONTWAIT, MT_DATA,
3599			    M_PKTHDR, adapter->rx_mbuf_sz);
3600			if (mp == NULL)
3601				goto update;
3602			mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3603			/* Get the memory mapping */
3604			error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3605			    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
3606			if (error != 0) {
3607				printf("GET BUF: dmamap load"
3608				    " failure - %d\n", error);
3609				m_free(mp);
3610				goto update;
3611			}
3612			rxbuf->m_pack = mp;
3613			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3614			    BUS_DMASYNC_PREREAD);
3615			rxr->rx_base[i].read.pkt_addr =
3616			    htole64(pseg[0].ds_addr);
3617		}
3618
3619		cleaned = i;
3620		/* Calculate next index */
3621		if (++i == adapter->num_rx_desc)
3622			i = 0;
3623		/* This is the work marker for refresh */
3624		rxr->next_to_refresh = i;
3625	}
3626update:
3627	if (cleaned != -1) /* If we refreshed some, bump tail */
3628		E1000_WRITE_REG(&adapter->hw,
3629		    E1000_RDT(rxr->me), cleaned);
3630	return;
3631}
3632
3633
3634/*********************************************************************
3635 *
3636 *  Allocate memory for rx_buffer structures. Since we use one
3637 *  rx_buffer per received packet, the maximum number of rx_buffer's
3638 *  that we'll need is equal to the number of receive descriptors
3639 *  that we've allocated.
3640 *
3641 **********************************************************************/
3642static int
3643igb_allocate_receive_buffers(struct rx_ring *rxr)
3644{
3645	struct	adapter 	*adapter = rxr->adapter;
3646	device_t 		dev = adapter->dev;
3647	struct igb_rx_buf	*rxbuf;
3648	int             	i, bsize, error;
3649
3650	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3651	if (!(rxr->rx_buffers =
3652	    (struct igb_rx_buf *) malloc(bsize,
3653	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3654		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3655		error = ENOMEM;
3656		goto fail;
3657	}
3658
3659	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3660				   1, 0,		/* alignment, bounds */
3661				   BUS_SPACE_MAXADDR,	/* lowaddr */
3662				   BUS_SPACE_MAXADDR,	/* highaddr */
3663				   NULL, NULL,		/* filter, filterarg */
3664				   MSIZE,		/* maxsize */
3665				   1,			/* nsegments */
3666				   MSIZE,		/* maxsegsize */
3667				   0,			/* flags */
3668				   NULL,		/* lockfunc */
3669				   NULL,		/* lockfuncarg */
3670				   &rxr->htag))) {
3671		device_printf(dev, "Unable to create RX DMA tag\n");
3672		goto fail;
3673	}
3674
3675	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3676				   1, 0,		/* alignment, bounds */
3677				   BUS_SPACE_MAXADDR,	/* lowaddr */
3678				   BUS_SPACE_MAXADDR,	/* highaddr */
3679				   NULL, NULL,		/* filter, filterarg */
3680				   MJUMPAGESIZE,	/* maxsize */
3681				   1,			/* nsegments */
3682				   MJUMPAGESIZE,	/* maxsegsize */
3683				   0,			/* flags */
3684				   NULL,		/* lockfunc */
3685				   NULL,		/* lockfuncarg */
3686				   &rxr->ptag))) {
3687		device_printf(dev, "Unable to create RX payload DMA tag\n");
3688		goto fail;
3689	}
3690
3691	for (i = 0; i < adapter->num_rx_desc; i++) {
3692		rxbuf = &rxr->rx_buffers[i];
3693		error = bus_dmamap_create(rxr->htag,
3694		    BUS_DMA_NOWAIT, &rxbuf->hmap);
3695		if (error) {
3696			device_printf(dev,
3697			    "Unable to create RX head DMA maps\n");
3698			goto fail;
3699		}
3700		error = bus_dmamap_create(rxr->ptag,
3701		    BUS_DMA_NOWAIT, &rxbuf->pmap);
3702		if (error) {
3703			device_printf(dev,
3704			    "Unable to create RX packet DMA maps\n");
3705			goto fail;
3706		}
3707	}
3708
3709	return (0);
3710
3711fail:
3712	/* Frees all, but can handle partial completion */
3713	igb_free_receive_structures(adapter);
3714	return (error);
3715}
3716
3717
3718static void
3719igb_free_receive_ring(struct rx_ring *rxr)
3720{
3721	struct	adapter		*adapter;
3722	struct igb_rx_buf	*rxbuf;
3723	int i;
3724
3725	adapter = rxr->adapter;
3726	for (i = 0; i < adapter->num_rx_desc; i++) {
3727		rxbuf = &rxr->rx_buffers[i];
3728		if (rxbuf->m_head != NULL) {
3729			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3730			    BUS_DMASYNC_POSTREAD);
3731			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3732			rxbuf->m_head->m_flags |= M_PKTHDR;
3733			m_freem(rxbuf->m_head);
3734		}
3735		if (rxbuf->m_pack != NULL) {
3736			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3737			    BUS_DMASYNC_POSTREAD);
3738			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3739			rxbuf->m_pack->m_flags |= M_PKTHDR;
3740			m_freem(rxbuf->m_pack);
3741		}
3742		rxbuf->m_head = NULL;
3743		rxbuf->m_pack = NULL;
3744	}
3745}
3746
3747
3748/*********************************************************************
3749 *
3750 *  Initialize a receive ring and its buffers.
3751 *
3752 **********************************************************************/
3753static int
3754igb_setup_receive_ring(struct rx_ring *rxr)
3755{
3756	struct	adapter		*adapter;
3757	struct  ifnet		*ifp;
3758	device_t		dev;
3759	struct igb_rx_buf	*rxbuf;
3760	bus_dma_segment_t	pseg[1], hseg[1];
3761	struct lro_ctrl		*lro = &rxr->lro;
3762	int			rsize, nsegs, error = 0;
3763
3764	adapter = rxr->adapter;
3765	dev = adapter->dev;
3766	ifp = adapter->ifp;
3767
3768	/* Clear the ring contents */
3769	IGB_RX_LOCK(rxr);
3770	rsize = roundup2(adapter->num_rx_desc *
3771	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3772	bzero((void *)rxr->rx_base, rsize);
3773
3774	/*
3775	** Free current RX buffer structures and their mbufs
3776	*/
3777	igb_free_receive_ring(rxr);
3778
3779        /* Now replenish the ring mbufs */
3780	for (int j = 0; j != adapter->num_rx_desc; ++j) {
3781		struct mbuf	*mh, *mp;
3782
3783		rxbuf = &rxr->rx_buffers[j];
3784
3785		/* First the header */
3786		rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
3787		if (rxbuf->m_head == NULL)
3788                        goto fail;
3789		m_adj(rxbuf->m_head, ETHER_ALIGN);
3790		mh = rxbuf->m_head;
3791		mh->m_len = mh->m_pkthdr.len = MHLEN;
3792		mh->m_flags |= M_PKTHDR;
3793		/* Get the memory mapping */
3794		error = bus_dmamap_load_mbuf_sg(rxr->htag,
3795		    rxbuf->hmap, rxbuf->m_head, hseg,
3796		    &nsegs, BUS_DMA_NOWAIT);
3797		if (error != 0) /* Nothing elegant to do here */
3798                        goto fail;
3799		bus_dmamap_sync(rxr->htag,
3800		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
3801		/* Update descriptor */
3802		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
3803
3804		/* Now the payload cluster */
3805		rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
3806		    M_PKTHDR, adapter->rx_mbuf_sz);
3807		if (rxbuf->m_pack == NULL)
3808                        goto fail;
3809		mp = rxbuf->m_pack;
3810		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3811		/* Get the memory mapping */
3812		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3813		    rxbuf->pmap, mp, pseg,
3814		    &nsegs, BUS_DMA_NOWAIT);
3815		if (error != 0)
3816                        goto fail;
3817		bus_dmamap_sync(rxr->ptag,
3818		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
3819		/* Update descriptor */
3820		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
3821        }
3822
3823	/* Setup our descriptor indices */
3824	rxr->next_to_check = 0;
3825	rxr->next_to_refresh = 0;
3826	rxr->lro_enabled = FALSE;
3827
3828	if (igb_header_split)
3829		rxr->hdr_split = TRUE;
3830	else
3831		ifp->if_capabilities &= ~IFCAP_LRO;
3832
3833	rxr->fmp = NULL;
3834	rxr->lmp = NULL;
3835	rxr->discard = FALSE;
3836
3837	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3838	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3839
3840	/*
3841	** Now set up the LRO interface, we
3842	** also only do head split when LRO
3843	** is enabled, since so often they
3844	** are undesireable in similar setups.
3845	*/
3846	if (ifp->if_capenable & IFCAP_LRO) {
3847		int err = tcp_lro_init(lro);
3848		if (err) {
3849			device_printf(dev, "LRO Initialization failed!\n");
3850			goto fail;
3851		}
3852		INIT_DEBUGOUT("RX LRO Initialized\n");
3853		rxr->lro_enabled = TRUE;
3854		lro->ifp = adapter->ifp;
3855	}
3856
3857	IGB_RX_UNLOCK(rxr);
3858	return (0);
3859
3860fail:
3861	igb_free_receive_ring(rxr);
3862	IGB_RX_UNLOCK(rxr);
3863	return (error);
3864}
3865
3866/*********************************************************************
3867 *
3868 *  Initialize all receive rings.
3869 *
3870 **********************************************************************/
3871static int
3872igb_setup_receive_structures(struct adapter *adapter)
3873{
3874	struct rx_ring *rxr = adapter->rx_rings;
3875	int i, j;
3876
3877	for (i = 0; i < adapter->num_queues; i++, rxr++)
3878		if (igb_setup_receive_ring(rxr))
3879			goto fail;
3880
3881	return (0);
3882fail:
3883	/*
3884	 * Free RX buffers allocated so far, we will only handle
3885	 * the rings that completed, the failing case will have
3886	 * cleaned up for itself. The value of 'i' will be the
3887	 * failed ring so we must pre-decrement it.
3888	 */
3889	rxr = adapter->rx_rings;
3890	for (--i; i > 0; i--, rxr++) {
3891		for (j = 0; j < adapter->num_rx_desc; j++)
3892			igb_free_receive_ring(rxr);
3893	}
3894
3895	return (ENOBUFS);
3896}
3897
3898/*********************************************************************
3899 *
3900 *  Enable receive unit.
3901 *
3902 **********************************************************************/
3903static void
3904igb_initialize_receive_units(struct adapter *adapter)
3905{
3906	struct rx_ring	*rxr = adapter->rx_rings;
3907	struct ifnet	*ifp = adapter->ifp;
3908	struct e1000_hw *hw = &adapter->hw;
3909	u32		rctl, rxcsum, psize, srrctl = 0;
3910
3911	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3912
3913	/*
3914	 * Make sure receives are disabled while setting
3915	 * up the descriptor ring
3916	 */
3917	rctl = E1000_READ_REG(hw, E1000_RCTL);
3918	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3919
3920	/*
3921	** Set up for header split
3922	*/
3923	if (rxr->hdr_split) {
3924		/* Use a standard mbuf for the header */
3925		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3926		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3927	} else
3928		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3929
3930	/*
3931	** Set up for jumbo frames
3932	*/
3933	if (ifp->if_mtu > ETHERMTU) {
3934		rctl |= E1000_RCTL_LPE;
3935		srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3936		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
3937
3938		/* Set maximum packet len */
3939		psize = adapter->max_frame_size;
3940		/* are we on a vlan? */
3941		if (adapter->ifp->if_vlantrunk != NULL)
3942			psize += VLAN_TAG_SIZE;
3943		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3944	} else {
3945		rctl &= ~E1000_RCTL_LPE;
3946		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3947		rctl |= E1000_RCTL_SZ_2048;
3948	}
3949
3950	/* Setup the Base and Length of the Rx Descriptor Rings */
3951	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3952		u64 bus_addr = rxr->rxdma.dma_paddr;
3953		u32 rxdctl;
3954
3955		E1000_WRITE_REG(hw, E1000_RDLEN(i),
3956		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3957		E1000_WRITE_REG(hw, E1000_RDBAH(i),
3958		    (uint32_t)(bus_addr >> 32));
3959		E1000_WRITE_REG(hw, E1000_RDBAL(i),
3960		    (uint32_t)bus_addr);
3961		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
3962		/* Enable this Queue */
3963		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
3964		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3965		rxdctl &= 0xFFF00000;
3966		rxdctl |= IGB_RX_PTHRESH;
3967		rxdctl |= IGB_RX_HTHRESH << 8;
3968		rxdctl |= IGB_RX_WTHRESH << 16;
3969		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
3970	}
3971
3972	/*
3973	** Setup for RX MultiQueue
3974	*/
3975	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
3976	if (adapter->num_queues >1) {
3977		u32 random[10], mrqc, shift = 0;
3978		union igb_reta {
3979			u32 dword;
3980			u8  bytes[4];
3981		} reta;
3982
3983		arc4rand(&random, sizeof(random), 0);
3984		if (adapter->hw.mac.type == e1000_82575)
3985			shift = 6;
3986		/* Warning FM follows */
3987		for (int i = 0; i < 128; i++) {
3988			reta.bytes[i & 3] =
3989			    (i % adapter->num_queues) << shift;
3990			if ((i & 3) == 3)
3991				E1000_WRITE_REG(hw,
3992				    E1000_RETA(i >> 2), reta.dword);
3993		}
3994		/* Now fill in hash table */
3995		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3996		for (int i = 0; i < 10; i++)
3997			E1000_WRITE_REG_ARRAY(hw,
3998			    E1000_RSSRK(0), i, random[i]);
3999
4000		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4001		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4002		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4003		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4004		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4005		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4006		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4007		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4008
4009		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4010
4011		/*
4012		** NOTE: Receive Full-Packet Checksum Offload
4013		** is mutually exclusive with Multiqueue. However
4014		** this is not the same as TCP/IP checksums which
4015		** still work.
4016		*/
4017		rxcsum |= E1000_RXCSUM_PCSD;
4018#if __FreeBSD_version >= 800000
4019		/* For SCTP Offload */
4020		if ((hw->mac.type == e1000_82576)
4021		    && (ifp->if_capenable & IFCAP_RXCSUM))
4022			rxcsum |= E1000_RXCSUM_CRCOFL;
4023#endif
4024	} else {
4025		/* Non RSS setup */
4026		if (ifp->if_capenable & IFCAP_RXCSUM) {
4027			rxcsum |= E1000_RXCSUM_IPPCSE;
4028#if __FreeBSD_version >= 800000
4029			if (adapter->hw.mac.type == e1000_82576)
4030				rxcsum |= E1000_RXCSUM_CRCOFL;
4031#endif
4032		} else
4033			rxcsum &= ~E1000_RXCSUM_TUOFL;
4034	}
4035	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4036
4037	/* Setup the Receive Control Register */
4038	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4039	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4040		   E1000_RCTL_RDMTS_HALF |
4041		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4042	/* Strip CRC bytes. */
4043	rctl |= E1000_RCTL_SECRC;
4044	/* Make sure VLAN Filters are off */
4045	rctl &= ~E1000_RCTL_VFE;
4046	/* Don't store bad packets */
4047	rctl &= ~E1000_RCTL_SBP;
4048
4049	/* Enable Receives */
4050	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4051
4052	/*
4053	 * Setup the HW Rx Head and Tail Descriptor Pointers
4054	 *   - needs to be after enable
4055	 */
4056	for (int i = 0; i < adapter->num_queues; i++) {
4057		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4058		E1000_WRITE_REG(hw, E1000_RDT(i),
4059		     adapter->num_rx_desc - 1);
4060	}
4061	return;
4062}
4063
4064/*********************************************************************
4065 *
4066 *  Free receive rings.
4067 *
4068 **********************************************************************/
4069static void
4070igb_free_receive_structures(struct adapter *adapter)
4071{
4072	struct rx_ring *rxr = adapter->rx_rings;
4073
4074	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4075		struct lro_ctrl	*lro = &rxr->lro;
4076		igb_free_receive_buffers(rxr);
4077		tcp_lro_free(lro);
4078		igb_dma_free(adapter, &rxr->rxdma);
4079	}
4080
4081	free(adapter->rx_rings, M_DEVBUF);
4082}
4083
4084/*********************************************************************
4085 *
4086 *  Free receive ring data structures.
4087 *
4088 **********************************************************************/
4089static void
4090igb_free_receive_buffers(struct rx_ring *rxr)
4091{
4092	struct adapter		*adapter = rxr->adapter;
4093	struct igb_rx_buf	*rxbuf;
4094	int i;
4095
4096	INIT_DEBUGOUT("free_receive_structures: begin");
4097
4098	/* Cleanup any existing buffers */
4099	if (rxr->rx_buffers != NULL) {
4100		for (i = 0; i < adapter->num_rx_desc; i++) {
4101			rxbuf = &rxr->rx_buffers[i];
4102			if (rxbuf->m_head != NULL) {
4103				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4104				    BUS_DMASYNC_POSTREAD);
4105				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4106				rxbuf->m_head->m_flags |= M_PKTHDR;
4107				m_freem(rxbuf->m_head);
4108			}
4109			if (rxbuf->m_pack != NULL) {
4110				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4111				    BUS_DMASYNC_POSTREAD);
4112				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4113				rxbuf->m_pack->m_flags |= M_PKTHDR;
4114				m_freem(rxbuf->m_pack);
4115			}
4116			rxbuf->m_head = NULL;
4117			rxbuf->m_pack = NULL;
4118			if (rxbuf->hmap != NULL) {
4119				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4120				rxbuf->hmap = NULL;
4121			}
4122			if (rxbuf->pmap != NULL) {
4123				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4124				rxbuf->pmap = NULL;
4125			}
4126		}
4127		if (rxr->rx_buffers != NULL) {
4128			free(rxr->rx_buffers, M_DEVBUF);
4129			rxr->rx_buffers = NULL;
4130		}
4131	}
4132
4133	if (rxr->htag != NULL) {
4134		bus_dma_tag_destroy(rxr->htag);
4135		rxr->htag = NULL;
4136	}
4137	if (rxr->ptag != NULL) {
4138		bus_dma_tag_destroy(rxr->ptag);
4139		rxr->ptag = NULL;
4140	}
4141}
4142
4143static __inline void
4144igb_rx_discard(struct rx_ring *rxr, int i)
4145{
4146	struct adapter		*adapter = rxr->adapter;
4147	struct igb_rx_buf	*rbuf;
4148	struct mbuf             *mh, *mp;
4149
4150	rbuf = &rxr->rx_buffers[i];
4151	if (rxr->fmp != NULL) {
4152		rxr->fmp->m_flags |= M_PKTHDR;
4153		m_freem(rxr->fmp);
4154		rxr->fmp = NULL;
4155		rxr->lmp = NULL;
4156	}
4157
4158	mh = rbuf->m_head;
4159	mp = rbuf->m_pack;
4160
4161	/* Reuse loaded DMA map and just update mbuf chain */
4162	mh->m_len = MHLEN;
4163	mh->m_flags |= M_PKTHDR;
4164	mh->m_next = NULL;
4165
4166	mp->m_len = mp->m_pkthdr.len = adapter->rx_mbuf_sz;
4167	mp->m_data = mp->m_ext.ext_buf;
4168	mp->m_next = NULL;
4169	return;
4170}
4171
4172static __inline void
4173igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4174{
4175
4176	/*
4177	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4178	 * should be computed by hardware. Also it should not have VLAN tag in
4179	 * ethernet header.
4180	 */
4181	if (rxr->lro_enabled &&
4182	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4183	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4184	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4185	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4186	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4187	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4188		/*
4189		 * Send to the stack if:
4190		 **  - LRO not enabled, or
4191		 **  - no LRO resources, or
4192		 **  - lro enqueue fails
4193		 */
4194		if (rxr->lro.lro_cnt != 0)
4195			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4196				return;
4197	}
4198	(*ifp->if_input)(ifp, m);
4199}
4200
4201/*********************************************************************
4202 *
4203 *  This routine executes in interrupt context. It replenishes
4204 *  the mbufs in the descriptor and sends data which has been
4205 *  dma'ed into host memory to upper layer.
4206 *
4207 *  We loop at most count times if count is > 0, or until done if
4208 *  count < 0.
4209 *
4210 *  Return TRUE if more to clean, FALSE otherwise
4211 *********************************************************************/
4212static bool
4213igb_rxeof(struct igb_queue *que, int count)
4214{
4215	struct adapter		*adapter = que->adapter;
4216	struct rx_ring		*rxr = que->rxr;
4217	struct ifnet		*ifp = adapter->ifp;
4218	struct lro_ctrl		*lro = &rxr->lro;
4219	struct lro_entry	*queued;
4220	int			i, processed = 0;
4221	u32			ptype, staterr = 0;
4222	union e1000_adv_rx_desc	*cur;
4223
4224	IGB_RX_LOCK(rxr);
4225	/* Sync the ring. */
4226	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4227	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4228
4229	/* Main clean loop */
4230	for (i = rxr->next_to_check; count != 0;) {
4231		struct mbuf		*sendmp, *mh, *mp;
4232		struct igb_rx_buf	*rxbuf;
4233		u16			hlen, plen, hdr, vtag;
4234		bool			eop = FALSE;
4235
4236		cur = &rxr->rx_base[i];
4237		staterr = le32toh(cur->wb.upper.status_error);
4238		if ((staterr & E1000_RXD_STAT_DD) == 0)
4239			break;
4240		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4241			break;
4242		count--;
4243		sendmp = mh = mp = NULL;
4244		cur->wb.upper.status_error = 0;
4245		rxbuf = &rxr->rx_buffers[i];
4246		plen = le16toh(cur->wb.upper.length);
4247		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4248		vtag = le16toh(cur->wb.upper.vlan);
4249		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4250		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4251
4252		/* Make sure all segments of a bad packet are discarded */
4253		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4254		    (rxr->discard)) {
4255			ifp->if_ierrors++;
4256			++rxr->rx_discarded;
4257			if (!eop) /* Catch subsequent segs */
4258				rxr->discard = TRUE;
4259			else
4260				rxr->discard = FALSE;
4261			igb_rx_discard(rxr, i);
4262			goto next_desc;
4263		}
4264
4265		/*
4266		** The way the hardware is configured to
4267		** split, it will ONLY use the header buffer
4268		** when header split is enabled, otherwise we
4269		** get normal behavior, ie, both header and
4270		** payload are DMA'd into the payload buffer.
4271		**
4272		** The fmp test is to catch the case where a
4273		** packet spans multiple descriptors, in that
4274		** case only the first header is valid.
4275		*/
4276		if (rxr->hdr_split && rxr->fmp == NULL) {
4277			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4278			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4279			if (hlen > IGB_HDR_BUF)
4280				hlen = IGB_HDR_BUF;
4281			/* Handle the header mbuf */
4282			mh = rxr->rx_buffers[i].m_head;
4283			mh->m_len = hlen;
4284			/* clear buf info for refresh */
4285			rxbuf->m_head = NULL;
4286			/*
4287			** Get the payload length, this
4288			** could be zero if its a small
4289			** packet.
4290			*/
4291			if (plen > 0) {
4292				mp = rxr->rx_buffers[i].m_pack;
4293				mp->m_len = plen;
4294				mh->m_next = mp;
4295				/* clear buf info for refresh */
4296				rxbuf->m_pack = NULL;
4297				rxr->rx_split_packets++;
4298			}
4299		} else {
4300			/*
4301			** Either no header split, or a
4302			** secondary piece of a fragmented
4303			** split packet.
4304			*/
4305			mh = rxr->rx_buffers[i].m_pack;
4306			mh->m_len = plen;
4307			/* clear buf info for refresh */
4308			rxbuf->m_pack = NULL;
4309		}
4310
4311		++processed; /* So we know when to refresh */
4312
4313		/* Initial frame - setup */
4314		if (rxr->fmp == NULL) {
4315			mh->m_pkthdr.len = mh->m_len;
4316			/* Store the first mbuf */
4317			rxr->fmp = mh;
4318			rxr->lmp = mh;
4319			if (mp != NULL) {
4320				/* Add payload if split */
4321				mh->m_pkthdr.len += mp->m_len;
4322				rxr->lmp = mh->m_next;
4323			}
4324		} else {
4325			/* Chain mbuf's together */
4326			rxr->lmp->m_next = mh;
4327			rxr->lmp = rxr->lmp->m_next;
4328			rxr->fmp->m_pkthdr.len += mh->m_len;
4329		}
4330
4331		if (eop) {
4332			rxr->fmp->m_pkthdr.rcvif = ifp;
4333			ifp->if_ipackets++;
4334			rxr->rx_packets++;
4335			/* capture data for AIM */
4336			rxr->packets++;
4337			rxr->bytes += rxr->fmp->m_pkthdr.len;
4338			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4339
4340			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4341				igb_rx_checksum(staterr, rxr->fmp, ptype);
4342
4343			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4344			    (staterr & E1000_RXD_STAT_VP) != 0) {
4345				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4346				rxr->fmp->m_flags |= M_VLANTAG;
4347			}
4348#if __FreeBSD_version >= 800000
4349			rxr->fmp->m_pkthdr.flowid = que->msix;
4350			rxr->fmp->m_flags |= M_FLOWID;
4351#endif
4352			sendmp = rxr->fmp;
4353			/* Make sure to set M_PKTHDR. */
4354			sendmp->m_flags |= M_PKTHDR;
4355			rxr->fmp = NULL;
4356			rxr->lmp = NULL;
4357		}
4358
4359next_desc:
4360		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4361		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4362
4363		/* Advance our pointers to the next descriptor. */
4364		if (++i == adapter->num_rx_desc)
4365			i = 0;
4366		/*
4367		** Send to the stack or LRO
4368		*/
4369		if (sendmp != NULL)
4370			igb_rx_input(rxr, ifp, sendmp, ptype);
4371
4372		/* Every 8 descriptors we go to refresh mbufs */
4373		if (processed == 8) {
4374                        igb_refresh_mbufs(rxr, i);
4375                        processed = 0;
4376		}
4377	}
4378
4379	/* Catch any remainders */
4380	if (processed != 0) {
4381		igb_refresh_mbufs(rxr, i);
4382		processed = 0;
4383	}
4384
4385	rxr->next_to_check = i;
4386
4387	/*
4388	 * Flush any outstanding LRO work
4389	 */
4390	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4391		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4392		tcp_lro_flush(lro, queued);
4393	}
4394
4395	IGB_RX_UNLOCK(rxr);
4396
4397	/*
4398	** We still have cleaning to do?
4399	** Schedule another interrupt if so.
4400	*/
4401	if ((staterr & E1000_RXD_STAT_DD) != 0)
4402		return (TRUE);
4403
4404	return (FALSE);
4405}
4406
4407/*********************************************************************
4408 *
4409 *  Verify that the hardware indicated that the checksum is valid.
4410 *  Inform the stack about the status of checksum so that stack
4411 *  doesn't spend time verifying the checksum.
4412 *
4413 *********************************************************************/
4414static void
4415igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4416{
4417	u16 status = (u16)staterr;
4418	u8  errors = (u8) (staterr >> 24);
4419	int sctp;
4420
4421	/* Ignore Checksum bit is set */
4422	if (status & E1000_RXD_STAT_IXSM) {
4423		mp->m_pkthdr.csum_flags = 0;
4424		return;
4425	}
4426
4427	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4428	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4429		sctp = 1;
4430	else
4431		sctp = 0;
4432	if (status & E1000_RXD_STAT_IPCS) {
4433		/* Did it pass? */
4434		if (!(errors & E1000_RXD_ERR_IPE)) {
4435			/* IP Checksum Good */
4436			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4437			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4438		} else
4439			mp->m_pkthdr.csum_flags = 0;
4440	}
4441
4442	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4443		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4444#if __FreeBSD_version >= 800000
4445		if (sctp) /* reassign */
4446			type = CSUM_SCTP_VALID;
4447#endif
4448		/* Did it pass? */
4449		if (!(errors & E1000_RXD_ERR_TCPE)) {
4450			mp->m_pkthdr.csum_flags |= type;
4451			if (sctp == 0)
4452				mp->m_pkthdr.csum_data = htons(0xffff);
4453		}
4454	}
4455	return;
4456}
4457
4458/*
4459 * This routine is run via an vlan
4460 * config EVENT
4461 */
4462static void
4463igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4464{
4465	struct adapter	*adapter = ifp->if_softc;
4466	u32		index, bit;
4467
4468	if (ifp->if_softc !=  arg)   /* Not our event */
4469		return;
4470
4471	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4472                return;
4473
4474	index = (vtag >> 5) & 0x7F;
4475	bit = vtag & 0x1F;
4476	igb_shadow_vfta[index] |= (1 << bit);
4477	++adapter->num_vlans;
4478	/* Re-init to load the changes */
4479	igb_init(adapter);
4480}
4481
4482/*
4483 * This routine is run via an vlan
4484 * unconfig EVENT
4485 */
4486static void
4487igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4488{
4489	struct adapter	*adapter = ifp->if_softc;
4490	u32		index, bit;
4491
4492	if (ifp->if_softc !=  arg)
4493		return;
4494
4495	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4496                return;
4497
4498	index = (vtag >> 5) & 0x7F;
4499	bit = vtag & 0x1F;
4500	igb_shadow_vfta[index] &= ~(1 << bit);
4501	--adapter->num_vlans;
4502	/* Re-init to load the changes */
4503	igb_init(adapter);
4504}
4505
4506static void
4507igb_setup_vlan_hw_support(struct adapter *adapter)
4508{
4509	struct e1000_hw *hw = &adapter->hw;
4510	u32             reg;
4511
4512	/*
4513	** We get here thru init_locked, meaning
4514	** a soft reset, this has already cleared
4515	** the VFTA and other state, so if there
4516	** have been no vlan's registered do nothing.
4517	*/
4518	if (adapter->num_vlans == 0)
4519                return;
4520
4521	/*
4522	** A soft reset zero's out the VFTA, so
4523	** we need to repopulate it now.
4524	*/
4525	for (int i = 0; i < IGB_VFTA_SIZE; i++)
4526                if (igb_shadow_vfta[i] != 0)
4527			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4528                            i, igb_shadow_vfta[i]);
4529
4530	reg = E1000_READ_REG(hw, E1000_CTRL);
4531	reg |= E1000_CTRL_VME;
4532	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4533
4534	/* Enable the Filter Table */
4535	reg = E1000_READ_REG(hw, E1000_RCTL);
4536	reg &= ~E1000_RCTL_CFIEN;
4537	reg |= E1000_RCTL_VFE;
4538	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4539
4540	/* Update the frame size */
4541	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4542	    adapter->max_frame_size + VLAN_TAG_SIZE);
4543}
4544
4545static void
4546igb_enable_intr(struct adapter *adapter)
4547{
4548	/* With RSS set up what to auto clear */
4549	if (adapter->msix_mem) {
4550		E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4551		    adapter->eims_mask);
4552		E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4553		    adapter->eims_mask);
4554		E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4555		    adapter->eims_mask);
4556		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4557		    E1000_IMS_LSC);
4558	} else {
4559		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4560		    IMS_ENABLE_MASK);
4561	}
4562	E1000_WRITE_FLUSH(&adapter->hw);
4563
4564	return;
4565}
4566
4567static void
4568igb_disable_intr(struct adapter *adapter)
4569{
4570	if (adapter->msix_mem) {
4571		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4572		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4573	}
4574	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4575	E1000_WRITE_FLUSH(&adapter->hw);
4576	return;
4577}
4578
4579/*
4580 * Bit of a misnomer, what this really means is
4581 * to enable OS management of the system... aka
4582 * to disable special hardware management features
4583 */
4584static void
4585igb_init_manageability(struct adapter *adapter)
4586{
4587	if (adapter->has_manage) {
4588		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4589		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4590
4591		/* disable hardware interception of ARP */
4592		manc &= ~(E1000_MANC_ARP_EN);
4593
4594                /* enable receiving management packets to the host */
4595		manc |= E1000_MANC_EN_MNG2HOST;
4596		manc2h |= 1 << 5;  /* Mng Port 623 */
4597		manc2h |= 1 << 6;  /* Mng Port 664 */
4598		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4599		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4600	}
4601}
4602
4603/*
4604 * Give control back to hardware management
4605 * controller if there is one.
4606 */
4607static void
4608igb_release_manageability(struct adapter *adapter)
4609{
4610	if (adapter->has_manage) {
4611		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4612
4613		/* re-enable hardware interception of ARP */
4614		manc |= E1000_MANC_ARP_EN;
4615		manc &= ~E1000_MANC_EN_MNG2HOST;
4616
4617		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4618	}
4619}
4620
4621/*
4622 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4623 * For ASF and Pass Through versions of f/w this means that
4624 * the driver is loaded.
4625 *
4626 */
4627static void
4628igb_get_hw_control(struct adapter *adapter)
4629{
4630	u32 ctrl_ext;
4631
4632	/* Let firmware know the driver has taken over */
4633	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4634	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4635	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4636}
4637
4638/*
4639 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4640 * For ASF and Pass Through versions of f/w this means that the
4641 * driver is no longer loaded.
4642 *
4643 */
4644static void
4645igb_release_hw_control(struct adapter *adapter)
4646{
4647	u32 ctrl_ext;
4648
4649	/* Let firmware taken over control of h/w */
4650	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4651	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4652	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4653}
4654
4655static int
4656igb_is_valid_ether_addr(uint8_t *addr)
4657{
4658	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4659
4660	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4661		return (FALSE);
4662	}
4663
4664	return (TRUE);
4665}
4666
4667
4668/*
4669 * Enable PCI Wake On Lan capability
4670 */
4671static void
4672igb_enable_wakeup(device_t dev)
4673{
4674	u16     cap, status;
4675	u8      id;
4676
4677	/* First find the capabilities pointer*/
4678	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4679	/* Read the PM Capabilities */
4680	id = pci_read_config(dev, cap, 1);
4681	if (id != PCIY_PMG)     /* Something wrong */
4682		return;
4683	/* OK, we have the power capabilities, so
4684	   now get the status register */
4685	cap += PCIR_POWER_STATUS;
4686	status = pci_read_config(dev, cap, 2);
4687	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4688	pci_write_config(dev, cap, status, 2);
4689	return;
4690}
4691
4692static void
4693igb_led_func(void *arg, int onoff)
4694{
4695	struct adapter	*adapter = arg;
4696
4697	IGB_CORE_LOCK(adapter);
4698	if (onoff) {
4699		e1000_setup_led(&adapter->hw);
4700		e1000_led_on(&adapter->hw);
4701	} else {
4702		e1000_led_off(&adapter->hw);
4703		e1000_cleanup_led(&adapter->hw);
4704	}
4705	IGB_CORE_UNLOCK(adapter);
4706}
4707
4708/**********************************************************************
4709 *
4710 *  Update the board statistics counters.
4711 *
4712 **********************************************************************/
4713static void
4714igb_update_stats_counters(struct adapter *adapter)
4715{
4716	struct ifnet   *ifp;
4717
4718	if (adapter->hw.phy.media_type == e1000_media_type_copper ||
4719	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4720		adapter->stats.symerrs +=
4721		    E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4722		adapter->stats.sec +=
4723		    E1000_READ_REG(&adapter->hw, E1000_SEC);
4724	}
4725	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4726	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4727	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4728	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4729
4730	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4731	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4732	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4733	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4734	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4735	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4736	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4737	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4738	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4739	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4740	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4741	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4742	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4743	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4744	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4745	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4746	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4747	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4748	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4749	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4750
4751	/* For the 64-bit byte counters the low dword must be read first. */
4752	/* Both registers clear on the read of the high dword */
4753
4754	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4755	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4756
4757	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4758	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4759	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4760	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4761	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4762
4763	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4764	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4765
4766	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4767	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4768	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4769	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4770	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4771	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4772	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4773	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4774	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4775	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4776
4777	adapter->stats.algnerrc +=
4778		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4779	adapter->stats.rxerrc +=
4780		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4781	adapter->stats.tncrs +=
4782		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4783	adapter->stats.cexterr +=
4784		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4785	adapter->stats.tsctc +=
4786		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4787	adapter->stats.tsctfc +=
4788		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4789	ifp = adapter->ifp;
4790
4791	ifp->if_collisions = adapter->stats.colc;
4792
4793	/* Rx Errors */
4794	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4795	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4796	    adapter->stats.ruc + adapter->stats.roc +
4797	    adapter->stats.mpc + adapter->stats.cexterr;
4798
4799	/* Tx Errors */
4800	ifp->if_oerrors = adapter->stats.ecol +
4801	    adapter->stats.latecol + adapter->watchdog_events;
4802}
4803
4804
4805/**********************************************************************
4806 *
4807 *  This routine is called only when igb_display_debug_stats is enabled.
4808 *  This routine provides a way to take a look at important statistics
4809 *  maintained by the driver and hardware.
4810 *
4811 **********************************************************************/
4812static void
4813igb_print_debug_info(struct adapter *adapter)
4814{
4815	device_t dev = adapter->dev;
4816	struct igb_queue *que = adapter->queues;
4817	struct rx_ring *rxr = adapter->rx_rings;
4818	struct tx_ring *txr = adapter->tx_rings;
4819	uint8_t *hw_addr = adapter->hw.hw_addr;
4820
4821	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4822	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4823	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4824	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4825
4826#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4827	device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4828	    E1000_READ_REG(&adapter->hw, E1000_IMS),
4829	    E1000_READ_REG(&adapter->hw, E1000_EIMS));
4830#endif
4831
4832	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4833	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4834	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4835	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4836	    adapter->hw.fc.high_water,
4837	    adapter->hw.fc.low_water);
4838
4839	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
4840		device_printf(dev, "Queue(%d) tdh = %d, tdt = %d  ", i,
4841		    E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4842		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4843		device_printf(dev, "rdh = %d, rdt = %d\n",
4844		    E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4845		    E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4846		device_printf(dev, "TX(%d) no descriptors avail event = %lld\n",
4847		    txr->me, (long long)txr->no_desc_avail);
4848		device_printf(dev, "TX(%d) Packets sent = %lld\n",
4849		    txr->me, (long long)txr->tx_packets);
4850		device_printf(dev, "RX(%d) Packets received = %lld  ",
4851		    rxr->me, (long long)rxr->rx_packets);
4852	}
4853
4854	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4855		struct lro_ctrl *lro = &rxr->lro;
4856		device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4857		    E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4858		    E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4859		device_printf(dev, "RX(%d) Packets received = %lld\n", rxr->me,
4860		    (long long)rxr->rx_packets);
4861		device_printf(dev, " Split Packets = %lld ",
4862		    (long long)rxr->rx_split_packets);
4863		device_printf(dev, " Byte count = %lld\n",
4864		    (long long)rxr->rx_bytes);
4865		device_printf(dev,"RX(%d) LRO Queued= %d  ",
4866		    i, lro->lro_queued);
4867		device_printf(dev,"LRO Flushed= %d\n",lro->lro_flushed);
4868	}
4869
4870	for (int i = 0; i < adapter->num_queues; i++, que++)
4871		device_printf(dev,"QUE(%d) IRQs = %llx\n",
4872		    i, (long long)que->irqs);
4873
4874	device_printf(dev, "LINK MSIX IRQ Handled = %u\n", adapter->link_irq);
4875	device_printf(dev, "Mbuf defrag failed = %ld\n",
4876	    adapter->mbuf_defrag_failed);
4877	device_printf(dev, "Std mbuf header failed = %ld\n",
4878	    adapter->mbuf_header_failed);
4879	device_printf(dev, "Std mbuf packet failed = %ld\n",
4880	    adapter->mbuf_packet_failed);
4881	device_printf(dev, "Driver dropped packets = %ld\n",
4882	    adapter->dropped_pkts);
4883	device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4884		adapter->no_tx_dma_setup);
4885}
4886
4887static void
4888igb_print_hw_stats(struct adapter *adapter)
4889{
4890	device_t dev = adapter->dev;
4891
4892	device_printf(dev, "Excessive collisions = %lld\n",
4893	    (long long)adapter->stats.ecol);
4894#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4895	device_printf(dev, "Symbol errors = %lld\n",
4896	    (long long)adapter->stats.symerrs);
4897#endif
4898	device_printf(dev, "Sequence errors = %lld\n",
4899	    (long long)adapter->stats.sec);
4900	device_printf(dev, "Defer count = %lld\n",
4901	    (long long)adapter->stats.dc);
4902	device_printf(dev, "Missed Packets = %lld\n",
4903	    (long long)adapter->stats.mpc);
4904	device_printf(dev, "Receive No Buffers = %lld\n",
4905	    (long long)adapter->stats.rnbc);
4906	/* RLEC is inaccurate on some hardware, calculate our own. */
4907	device_printf(dev, "Receive Length Errors = %lld\n",
4908	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4909	device_printf(dev, "Receive errors = %lld\n",
4910	    (long long)adapter->stats.rxerrc);
4911	device_printf(dev, "Crc errors = %lld\n",
4912	    (long long)adapter->stats.crcerrs);
4913	device_printf(dev, "Alignment errors = %lld\n",
4914	    (long long)adapter->stats.algnerrc);
4915	/* On 82575 these are collision counts */
4916	device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4917	    (long long)adapter->stats.cexterr);
4918	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4919	device_printf(dev, "watchdog timeouts = %ld\n",
4920	    adapter->watchdog_events);
4921	device_printf(dev, "XON Rcvd = %lld\n",
4922	    (long long)adapter->stats.xonrxc);
4923	device_printf(dev, "XON Xmtd = %lld\n",
4924	    (long long)adapter->stats.xontxc);
4925	device_printf(dev, "XOFF Rcvd = %lld\n",
4926	    (long long)adapter->stats.xoffrxc);
4927	device_printf(dev, "XOFF Xmtd = %lld\n",
4928	    (long long)adapter->stats.xofftxc);
4929	device_printf(dev, "Good Packets Rcvd = %lld\n",
4930	    (long long)adapter->stats.gprc);
4931	device_printf(dev, "Good Packets Xmtd = %lld\n",
4932	    (long long)adapter->stats.gptc);
4933	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4934	    (long long)adapter->stats.tsctc);
4935	device_printf(dev, "TSO Contexts Failed = %lld\n",
4936	    (long long)adapter->stats.tsctfc);
4937}
4938
4939/**********************************************************************
4940 *
4941 *  This routine provides a way to dump out the adapter eeprom,
4942 *  often a useful debug/service tool. This only dumps the first
4943 *  32 words, stuff that matters is in that extent.
4944 *
4945 **********************************************************************/
4946static void
4947igb_print_nvm_info(struct adapter *adapter)
4948{
4949	u16	eeprom_data;
4950	int	i, j, row = 0;
4951
4952	/* Its a bit crude, but it gets the job done */
4953	printf("\nInterface EEPROM Dump:\n");
4954	printf("Offset\n0x0000  ");
4955	for (i = 0, j = 0; i < 32; i++, j++) {
4956		if (j == 8) { /* Make the offset block */
4957			j = 0; ++row;
4958			printf("\n0x00%x0  ",row);
4959		}
4960		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4961		printf("%04x ", eeprom_data);
4962	}
4963	printf("\n");
4964}
4965
4966static int
4967igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4968{
4969	struct adapter *adapter;
4970	int error;
4971	int result;
4972
4973	result = -1;
4974	error = sysctl_handle_int(oidp, &result, 0, req);
4975
4976	if (error || !req->newptr)
4977		return (error);
4978
4979	if (result == 1) {
4980		adapter = (struct adapter *)arg1;
4981		igb_print_debug_info(adapter);
4982	}
4983	/*
4984	 * This value will cause a hex dump of the
4985	 * first 32 16-bit words of the EEPROM to
4986	 * the screen.
4987	 */
4988	if (result == 2) {
4989		adapter = (struct adapter *)arg1;
4990		igb_print_nvm_info(adapter);
4991        }
4992
4993	return (error);
4994}
4995
4996
4997static int
4998igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
4999{
5000	struct adapter *adapter;
5001	int error;
5002	int result;
5003
5004	result = -1;
5005	error = sysctl_handle_int(oidp, &result, 0, req);
5006
5007	if (error || !req->newptr)
5008		return (error);
5009
5010	if (result == 1) {
5011		adapter = (struct adapter *)arg1;
5012		igb_print_hw_stats(adapter);
5013	}
5014
5015	return (error);
5016}
5017
5018static void
5019igb_add_rx_process_limit(struct adapter *adapter, const char *name,
5020	const char *description, int *limit, int value)
5021{
5022	*limit = value;
5023	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5024	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5025	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5026}
5027