if_igb.c revision 217556
1248590Smm/******************************************************************************
2248590Smm
3248590Smm  Copyright (c) 2001-2010, Intel Corporation
4248590Smm  All rights reserved.
5248590Smm
6248590Smm  Redistribution and use in source and binary forms, with or without
7248590Smm  modification, are permitted provided that the following conditions are met:
8248590Smm
9248590Smm   1. Redistributions of source code must retain the above copyright notice,
10248590Smm      this list of conditions and the following disclaimer.
11248590Smm
12248590Smm   2. Redistributions in binary form must reproduce the above copyright
13248590Smm      notice, this list of conditions and the following disclaimer in the
14248590Smm      documentation and/or other materials provided with the distribution.
15248590Smm
16248590Smm   3. Neither the name of the Intel Corporation nor the names of its
17248590Smm      contributors may be used to endorse or promote products derived from
18248590Smm      this software without specific prior written permission.
19248590Smm
20248590Smm  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21248590Smm  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22248590Smm  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23248590Smm  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24248590Smm  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25248590Smm  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26248590Smm  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27248590Smm  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28248590Smm  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29248590Smm  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30248590Smm  POSSIBILITY OF SUCH DAMAGE.
31248590Smm
32248590Smm******************************************************************************/
33248590Smm/*$FreeBSD: head/sys/dev/e1000/if_igb.c 217556 2011-01-18 21:14:23Z mdf $*/
34248590Smm
35248590Smm
36248590Smm#ifdef HAVE_KERNEL_OPTION_HEADERS
37248590Smm#include "opt_device_polling.h"
38248590Smm#include "opt_inet.h"
39248590Smm#include "opt_altq.h"
40248590Smm#endif
41248590Smm
42248590Smm#include <sys/param.h>
43248590Smm#include <sys/systm.h>
44248590Smm#if __FreeBSD_version >= 800000
45248590Smm#include <sys/buf_ring.h>
46248590Smm#endif
47248590Smm#include <sys/bus.h>
48248590Smm#include <sys/endian.h>
49248590Smm#include <sys/kernel.h>
50248590Smm#include <sys/kthread.h>
51248590Smm#include <sys/malloc.h>
52248590Smm#include <sys/mbuf.h>
53248590Smm#include <sys/module.h>
54248590Smm#include <sys/rman.h>
55248590Smm#include <sys/socket.h>
56248590Smm#include <sys/sockio.h>
57248590Smm#include <sys/sysctl.h>
58248590Smm#include <sys/taskqueue.h>
59248590Smm#include <sys/eventhandler.h>
60248590Smm#include <sys/pcpu.h>
61248590Smm#include <sys/smp.h>
62248590Smm#include <machine/smp.h>
63248590Smm#include <machine/bus.h>
64248590Smm#include <machine/resource.h>
65248590Smm
66248590Smm#include <net/bpf.h>
67248590Smm#include <net/ethernet.h>
68248590Smm#include <net/if.h>
69248590Smm#include <net/if_arp.h>
70248590Smm#include <net/if_dl.h>
71248590Smm#include <net/if_media.h>
72248590Smm
73248590Smm#include <net/if_types.h>
74248590Smm#include <net/if_vlan_var.h>
75248590Smm
76248590Smm#include <netinet/in_systm.h>
77248590Smm#include <netinet/in.h>
78248590Smm#include <netinet/if_ether.h>
79248590Smm#include <netinet/ip.h>
80248590Smm#include <netinet/ip6.h>
81248590Smm#include <netinet/tcp.h>
82248590Smm#include <netinet/tcp_lro.h>
83248590Smm#include <netinet/udp.h>
84248590Smm
85248590Smm#include <machine/in_cksum.h>
86248590Smm#include <dev/led/led.h>
87248590Smm#include <dev/pci/pcivar.h>
88248590Smm#include <dev/pci/pcireg.h>
89248590Smm
90248590Smm#include "e1000_api.h"
91248590Smm#include "e1000_82575.h"
92248590Smm#include "if_igb.h"
93248590Smm
94248590Smm/*********************************************************************
95248590Smm *  Set this to one to display debug statistics
96248590Smm *********************************************************************/
97248590Smmint	igb_display_debug_stats = 0;
98248590Smm
99248590Smm/*********************************************************************
100248590Smm *  Driver version:
101248590Smm *********************************************************************/
102248590Smmchar igb_driver_version[] = "version - 2.0.7";
103248590Smm
104248590Smm
105248590Smm/*********************************************************************
106248590Smm *  PCI Device ID Table
107248590Smm *
108248590Smm *  Used by probe to select devices to load on
109248590Smm *  Last field stores an index into e1000_strings
110248590Smm *  Last entry must be all 0s
111248590Smm *
112248590Smm *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
113248590Smm *********************************************************************/
114248590Smm
115248590Smmstatic igb_vendor_info_t igb_vendor_info_array[] =
116248590Smm{
117248590Smm	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
118248590Smm	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
119248590Smm						PCI_ANY_ID, PCI_ANY_ID, 0},
120248590Smm	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
121248590Smm						PCI_ANY_ID, PCI_ANY_ID, 0},
122248590Smm	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
123248590Smm	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
124248590Smm	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
125248590Smm	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
126248590Smm	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
127248590Smm	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
128248590Smm						PCI_ANY_ID, PCI_ANY_ID, 0},
129248590Smm	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
130248590Smm						PCI_ANY_ID, PCI_ANY_ID, 0},
131248590Smm	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
132248590Smm						PCI_ANY_ID, PCI_ANY_ID, 0},
133248590Smm	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
134248590Smm	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
135248590Smm	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136248590Smm	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
137248590Smm	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
138248590Smm	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
139248590Smm						PCI_ANY_ID, PCI_ANY_ID, 0},
140248590Smm	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
141248590Smm						PCI_ANY_ID, PCI_ANY_ID, 0},
142248590Smm	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
143248590Smm	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
144248590Smm	/* required last entry */
145248590Smm	{ 0, 0, 0, 0, 0}
146248590Smm};
147248590Smm
148248590Smm/*********************************************************************
149248590Smm *  Table of branding strings for all supported NICs.
150248590Smm *********************************************************************/
151248590Smm
152248590Smmstatic char *igb_strings[] = {
153248590Smm	"Intel(R) PRO/1000 Network Connection"
154248590Smm};
155248590Smm
156248590Smm/*********************************************************************
157248590Smm *  Function prototypes
158248590Smm *********************************************************************/
159248590Smmstatic int	igb_probe(device_t);
160248590Smmstatic int	igb_attach(device_t);
161248590Smmstatic int	igb_detach(device_t);
162248590Smmstatic int	igb_shutdown(device_t);
163248590Smmstatic int	igb_suspend(device_t);
164248590Smmstatic int	igb_resume(device_t);
165248590Smmstatic void	igb_start(struct ifnet *);
166248590Smmstatic void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
167248590Smm#if __FreeBSD_version >= 800000
168248590Smmstatic int	igb_mq_start(struct ifnet *, struct mbuf *);
169248590Smmstatic int	igb_mq_start_locked(struct ifnet *,
170248590Smm		    struct tx_ring *, struct mbuf *);
171248590Smmstatic void	igb_qflush(struct ifnet *);
172248590Smm#endif
173248590Smmstatic int	igb_ioctl(struct ifnet *, u_long, caddr_t);
174248590Smmstatic void	igb_init(void *);
175248590Smmstatic void	igb_init_locked(struct adapter *);
176248590Smmstatic void	igb_stop(void *);
177248590Smmstatic void	igb_media_status(struct ifnet *, struct ifmediareq *);
178248590Smmstatic int	igb_media_change(struct ifnet *);
179248590Smmstatic void	igb_identify_hardware(struct adapter *);
180248590Smmstatic int	igb_allocate_pci_resources(struct adapter *);
181248590Smmstatic int	igb_allocate_msix(struct adapter *);
182248590Smmstatic int	igb_allocate_legacy(struct adapter *);
183248590Smmstatic int	igb_setup_msix(struct adapter *);
184248590Smmstatic void	igb_free_pci_resources(struct adapter *);
185248590Smmstatic void	igb_local_timer(void *);
186248590Smmstatic void	igb_reset(struct adapter *);
187248590Smmstatic int	igb_setup_interface(device_t, struct adapter *);
188248590Smmstatic int	igb_allocate_queues(struct adapter *);
189248590Smmstatic void	igb_configure_queues(struct adapter *);
190248590Smm
191248590Smmstatic int	igb_allocate_transmit_buffers(struct tx_ring *);
192248590Smmstatic void	igb_setup_transmit_structures(struct adapter *);
193248590Smmstatic void	igb_setup_transmit_ring(struct tx_ring *);
194248590Smmstatic void	igb_initialize_transmit_units(struct adapter *);
195248590Smmstatic void	igb_free_transmit_structures(struct adapter *);
196248590Smmstatic void	igb_free_transmit_buffers(struct tx_ring *);
197248590Smm
198248590Smmstatic int	igb_allocate_receive_buffers(struct rx_ring *);
199248590Smmstatic int	igb_setup_receive_structures(struct adapter *);
200248590Smmstatic int	igb_setup_receive_ring(struct rx_ring *);
201248590Smmstatic void	igb_initialize_receive_units(struct adapter *);
202248590Smmstatic void	igb_free_receive_structures(struct adapter *);
203248590Smmstatic void	igb_free_receive_buffers(struct rx_ring *);
204248590Smmstatic void	igb_free_receive_ring(struct rx_ring *);
205248590Smm
206248590Smmstatic void	igb_enable_intr(struct adapter *);
207248590Smmstatic void	igb_disable_intr(struct adapter *);
208248590Smmstatic void	igb_update_stats_counters(struct adapter *);
209248590Smmstatic bool	igb_txeof(struct tx_ring *);
210248590Smm
211248590Smmstatic __inline	void igb_rx_discard(struct rx_ring *, int);
212248590Smmstatic __inline void igb_rx_input(struct rx_ring *,
213248590Smm		    struct ifnet *, struct mbuf *, u32);
214248590Smm
215248590Smmstatic bool	igb_rxeof(struct igb_queue *, int, int *);
216248590Smmstatic void	igb_rx_checksum(u32, struct mbuf *, u32);
217248590Smmstatic int	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
218248590Smmstatic bool	igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
219248590Smmstatic void	igb_set_promisc(struct adapter *);
220248590Smmstatic void	igb_disable_promisc(struct adapter *);
221248590Smmstatic void	igb_set_multi(struct adapter *);
222248590Smmstatic void	igb_update_link_status(struct adapter *);
223248590Smmstatic void	igb_refresh_mbufs(struct rx_ring *, int);
224248590Smm
225248590Smmstatic void	igb_register_vlan(void *, struct ifnet *, u16);
226248590Smmstatic void	igb_unregister_vlan(void *, struct ifnet *, u16);
227248590Smmstatic void	igb_setup_vlan_hw_support(struct adapter *);
228248590Smm
229248590Smmstatic int	igb_xmit(struct tx_ring *, struct mbuf **);
230248590Smmstatic int	igb_dma_malloc(struct adapter *, bus_size_t,
231248590Smm		    struct igb_dma_alloc *, int);
232248590Smmstatic void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
233248590Smmstatic int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
234248590Smmstatic void	igb_print_nvm_info(struct adapter *);
235248590Smmstatic int 	igb_is_valid_ether_addr(u8 *);
236248590Smmstatic void     igb_add_hw_stats(struct adapter *);
237248590Smm
238248590Smmstatic void	igb_vf_init_stats(struct adapter *);
239248590Smmstatic void	igb_update_vf_stats_counters(struct adapter *);
240248590Smm
241248590Smm/* Management and WOL Support */
242248590Smmstatic void	igb_init_manageability(struct adapter *);
243248590Smmstatic void	igb_release_manageability(struct adapter *);
244248590Smmstatic void     igb_get_hw_control(struct adapter *);
245248590Smmstatic void     igb_release_hw_control(struct adapter *);
246248590Smmstatic void     igb_enable_wakeup(device_t);
247248590Smmstatic void     igb_led_func(void *, int);
248248590Smm
249248590Smmstatic int	igb_irq_fast(void *);
250248590Smmstatic void	igb_add_rx_process_limit(struct adapter *, const char *,
251248590Smm		    const char *, int *, int);
252248590Smmstatic void	igb_handle_que(void *context, int pending);
253248590Smmstatic void	igb_handle_link(void *context, int pending);
254248590Smm
255248590Smm/* These are MSIX only irq handlers */
256248590Smmstatic void	igb_msix_que(void *);
257248590Smmstatic void	igb_msix_link(void *);
258248590Smm
259248590Smm#ifdef DEVICE_POLLING
260248590Smmstatic poll_handler_t igb_poll;
261248590Smm#endif /* POLLING */
262248590Smm
263248590Smm/*********************************************************************
264248590Smm *  FreeBSD Device Interface Entry Points
265248590Smm *********************************************************************/
266248590Smm
267248590Smmstatic device_method_t igb_methods[] = {
268248590Smm	/* Device interface */
269248590Smm	DEVMETHOD(device_probe, igb_probe),
270248590Smm	DEVMETHOD(device_attach, igb_attach),
271248590Smm	DEVMETHOD(device_detach, igb_detach),
272248590Smm	DEVMETHOD(device_shutdown, igb_shutdown),
273248590Smm	DEVMETHOD(device_suspend, igb_suspend),
274248590Smm	DEVMETHOD(device_resume, igb_resume),
275248590Smm	{0, 0}
276248590Smm};
277248590Smm
278248590Smmstatic driver_t igb_driver = {
279248590Smm	"igb", igb_methods, sizeof(struct adapter),
280248590Smm};
281248590Smm
282248590Smmstatic devclass_t igb_devclass;
283248590SmmDRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
284248590SmmMODULE_DEPEND(igb, pci, 1, 1, 1);
285248590SmmMODULE_DEPEND(igb, ether, 1, 1, 1);
286248590Smm
287248590Smm/*********************************************************************
288248590Smm *  Tunable default values.
289248590Smm *********************************************************************/
290248590Smm
291248590Smm/* Descriptor defaults */
292248590Smmstatic int igb_rxd = IGB_DEFAULT_RXD;
293248590Smmstatic int igb_txd = IGB_DEFAULT_TXD;
294248590SmmTUNABLE_INT("hw.igb.rxd", &igb_rxd);
295248590SmmTUNABLE_INT("hw.igb.txd", &igb_txd);
296248590Smm
297248590Smm/*
298248590Smm** AIM: Adaptive Interrupt Moderation
299248590Smm** which means that the interrupt rate
300248590Smm** is varied over time based on the
301248590Smm** traffic for that interrupt vector
302248590Smm*/
303248590Smmstatic int igb_enable_aim = TRUE;
304248590SmmTUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
305248590Smm
306248590Smm/*
307248590Smm * MSIX should be the default for best performance,
308248590Smm * but this allows it to be forced off for testing.
309248590Smm */
310248590Smmstatic int igb_enable_msix = 1;
311248590SmmTUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
312248590Smm
313248590Smm/*
314248590Smm** Tuneable Interrupt rate
315248590Smm*/
316248590Smmstatic int igb_max_interrupt_rate = 8000;
317248590SmmTUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
318248590Smm
319248590Smm/*
320248590Smm** Header split causes the packet header to
321248590Smm** be dma'd to a seperate mbuf from the payload.
322248590Smm** this can have memory alignment benefits. But
323248590Smm** another plus is that small packets often fit
324248590Smm** into the header and thus use no cluster. Its
325248590Smm** a very workload dependent type feature.
326248590Smm*/
327248590Smmstatic bool igb_header_split = FALSE;
328248590SmmTUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
329248590Smm
330248590Smm/*
331248590Smm** This will autoconfigure based on
332248590Smm** the number of CPUs if left at 0.
333248590Smm*/
334248590Smmstatic int igb_num_queues = 0;
335248590SmmTUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
336248590Smm
337248590Smm/* How many packets rxeof tries to clean at a time */
338248590Smmstatic int igb_rx_process_limit = 100;
339248590SmmTUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
340248590Smm
341248590Smm/* Flow control setting - default to FULL */
342248590Smmstatic int igb_fc_setting = e1000_fc_full;
343248590SmmTUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
344248590Smm
345248590Smm/*********************************************************************
346248590Smm *  Device identification routine
347248590Smm *
348248590Smm *  igb_probe determines if the driver should be loaded on
349248590Smm *  adapter based on PCI vendor/device id of the adapter.
350248590Smm *
351248590Smm *  return BUS_PROBE_DEFAULT on success, positive on failure
352248590Smm *********************************************************************/
353248590Smm
354248590Smmstatic int
355248590Smmigb_probe(device_t dev)
356248590Smm{
357248590Smm	char		adapter_name[60];
358248590Smm	uint16_t	pci_vendor_id = 0;
359248590Smm	uint16_t	pci_device_id = 0;
360248590Smm	uint16_t	pci_subvendor_id = 0;
361248590Smm	uint16_t	pci_subdevice_id = 0;
362248590Smm	igb_vendor_info_t *ent;
363248590Smm
364248590Smm	INIT_DEBUGOUT("igb_probe: begin");
365248590Smm
366248590Smm	pci_vendor_id = pci_get_vendor(dev);
367248590Smm	if (pci_vendor_id != IGB_VENDOR_ID)
368248590Smm		return (ENXIO);
369248590Smm
370248590Smm	pci_device_id = pci_get_device(dev);
371248590Smm	pci_subvendor_id = pci_get_subvendor(dev);
372248590Smm	pci_subdevice_id = pci_get_subdevice(dev);
373248590Smm
374248590Smm	ent = igb_vendor_info_array;
375248590Smm	while (ent->vendor_id != 0) {
376248590Smm		if ((pci_vendor_id == ent->vendor_id) &&
377248590Smm		    (pci_device_id == ent->device_id) &&
378248590Smm
379248590Smm		    ((pci_subvendor_id == ent->subvendor_id) ||
380248590Smm		    (ent->subvendor_id == PCI_ANY_ID)) &&
381248590Smm
382248590Smm		    ((pci_subdevice_id == ent->subdevice_id) ||
383248590Smm		    (ent->subdevice_id == PCI_ANY_ID))) {
384248590Smm			sprintf(adapter_name, "%s %s",
385248590Smm				igb_strings[ent->index],
386248590Smm				igb_driver_version);
387248590Smm			device_set_desc_copy(dev, adapter_name);
388248590Smm			return (BUS_PROBE_DEFAULT);
389248590Smm		}
390248590Smm		ent++;
391248590Smm	}
392248590Smm
393248590Smm	return (ENXIO);
394248590Smm}
395248590Smm
396248590Smm/*********************************************************************
397248590Smm *  Device initialization routine
398248590Smm *
399248590Smm *  The attach entry point is called when the driver is being loaded.
400248590Smm *  This routine identifies the type of hardware, allocates all resources
401248590Smm *  and initializes the hardware.
402248590Smm *
403248590Smm *  return 0 on success, positive on failure
404248590Smm *********************************************************************/
405248590Smm
406248590Smmstatic int
407248590Smmigb_attach(device_t dev)
408248590Smm{
409248590Smm	struct adapter	*adapter;
410248590Smm	int		error = 0;
411248590Smm	u16		eeprom_data;
412248590Smm
413248590Smm	INIT_DEBUGOUT("igb_attach: begin");
414248590Smm
415248590Smm	adapter = device_get_softc(dev);
416248590Smm	adapter->dev = adapter->osdep.dev = dev;
417248590Smm	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
418248590Smm
419248590Smm	/* SYSCTL stuff */
420248590Smm	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
421248590Smm	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
422248590Smm	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
423248590Smm	    igb_sysctl_nvm_info, "I", "NVM Information");
424248590Smm
425248590Smm	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
426248590Smm	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
427248590Smm	    OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
428248590Smm	    &igb_fc_setting, 0, "Flow Control");
429248590Smm
430248590Smm	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
431248590Smm	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
432248590Smm	    OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
433248590Smm	    &igb_enable_aim, 1, "Interrupt Moderation");
434248590Smm
435248590Smm	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
436248590Smm
437248590Smm	/* Determine hardware and mac info */
438248590Smm	igb_identify_hardware(adapter);
439248590Smm
440248590Smm	/* Setup PCI resources */
441248590Smm	if (igb_allocate_pci_resources(adapter)) {
442248590Smm		device_printf(dev, "Allocation of PCI resources failed\n");
443248590Smm		error = ENXIO;
444248590Smm		goto err_pci;
445248590Smm	}
446248590Smm
447248590Smm	/* Do Shared Code initialization */
448248590Smm	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
449248590Smm		device_printf(dev, "Setup of Shared code failed\n");
450248590Smm		error = ENXIO;
451248590Smm		goto err_pci;
452248590Smm	}
453248590Smm
454248590Smm	e1000_get_bus_info(&adapter->hw);
455248590Smm
456248590Smm	/* Sysctls for limiting the amount of work done in the taskqueue */
457248590Smm	igb_add_rx_process_limit(adapter, "rx_processing_limit",
458248590Smm	    "max number of rx packets to process", &adapter->rx_process_limit,
459248590Smm	    igb_rx_process_limit);
460248590Smm
461248590Smm	/*
462248590Smm	 * Validate number of transmit and receive descriptors. It
463248590Smm	 * must not exceed hardware maximum, and must be multiple
464248590Smm	 * of E1000_DBA_ALIGN.
465248590Smm	 */
466248590Smm	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
467248590Smm	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
468248590Smm		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
469248590Smm		    IGB_DEFAULT_TXD, igb_txd);
470248590Smm		adapter->num_tx_desc = IGB_DEFAULT_TXD;
471248590Smm	} else
472248590Smm		adapter->num_tx_desc = igb_txd;
473248590Smm	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
474248590Smm	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
475248590Smm		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
476248590Smm		    IGB_DEFAULT_RXD, igb_rxd);
477248590Smm		adapter->num_rx_desc = IGB_DEFAULT_RXD;
478248590Smm	} else
479248590Smm		adapter->num_rx_desc = igb_rxd;
480248590Smm
481248590Smm	adapter->hw.mac.autoneg = DO_AUTO_NEG;
482248590Smm	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
483248590Smm	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
484248590Smm
485248590Smm	/* Copper options */
486248590Smm	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
487248590Smm		adapter->hw.phy.mdix = AUTO_ALL_MODES;
488248590Smm		adapter->hw.phy.disable_polarity_correction = FALSE;
489248590Smm		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
490248590Smm	}
491248590Smm
492248590Smm	/*
493248590Smm	 * Set the frame limits assuming
494248590Smm	 * standard ethernet sized frames.
495248590Smm	 */
496248590Smm	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
497248590Smm	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
498248590Smm
499248590Smm	/*
500248590Smm	** Allocate and Setup Queues
501248590Smm	*/
502248590Smm	if (igb_allocate_queues(adapter)) {
503248590Smm		error = ENOMEM;
504248590Smm		goto err_pci;
505248590Smm	}
506248590Smm
507248590Smm	/* Allocate the appropriate stats memory */
508248590Smm	if (adapter->hw.mac.type == e1000_vfadapt) {
509248590Smm		adapter->stats =
510248590Smm		    (struct e1000_vf_stats *)malloc(sizeof \
511248590Smm		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
512248590Smm		igb_vf_init_stats(adapter);
513248590Smm	} else
514248590Smm		adapter->stats =
515248590Smm		    (struct e1000_hw_stats *)malloc(sizeof \
516248590Smm		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
517248590Smm	if (adapter->stats == NULL) {
518248590Smm		device_printf(dev, "Can not allocate stats memory\n");
519248590Smm		error = ENOMEM;
520248590Smm		goto err_late;
521248590Smm	}
522248590Smm
523248590Smm	/* Allocate multicast array memory. */
524248590Smm	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
525248590Smm	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
526248590Smm	if (adapter->mta == NULL) {
527248590Smm		device_printf(dev, "Can not allocate multicast setup array\n");
528248590Smm		error = ENOMEM;
529248590Smm		goto err_late;
530248590Smm	}
531248590Smm
532248590Smm	/*
533248590Smm	** Start from a known state, this is
534248590Smm	** important in reading the nvm and
535248590Smm	** mac from that.
536248590Smm	*/
537248590Smm	e1000_reset_hw(&adapter->hw);
538248590Smm
539248590Smm	/* Make sure we have a good EEPROM before we read from it */
540248590Smm	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
541248590Smm		/*
542248590Smm		** Some PCI-E parts fail the first check due to
543248590Smm		** the link being in sleep state, call it again,
544248590Smm		** if it fails a second time its a real issue.
545248590Smm		*/
546248590Smm		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
547248590Smm			device_printf(dev,
548248590Smm			    "The EEPROM Checksum Is Not Valid\n");
549248590Smm			error = EIO;
550248590Smm			goto err_late;
551248590Smm		}
552248590Smm	}
553248590Smm
554248590Smm	/*
555248590Smm	** Copy the permanent MAC address out of the EEPROM
556248590Smm	*/
557248590Smm	if (e1000_read_mac_addr(&adapter->hw) < 0) {
558248590Smm		device_printf(dev, "EEPROM read error while reading MAC"
559248590Smm		    " address\n");
560248590Smm		error = EIO;
561248590Smm		goto err_late;
562248590Smm	}
563248590Smm	/* Check its sanity */
564248590Smm	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
565248590Smm		device_printf(dev, "Invalid MAC address\n");
566248590Smm		error = EIO;
567248590Smm		goto err_late;
568248590Smm	}
569248590Smm
570248590Smm	/*
571248590Smm	** Configure Interrupts
572248590Smm	*/
573248590Smm	if ((adapter->msix > 1) && (igb_enable_msix))
574248590Smm		error = igb_allocate_msix(adapter);
575248590Smm	else /* MSI or Legacy */
576248590Smm		error = igb_allocate_legacy(adapter);
577248590Smm	if (error)
578248590Smm		goto err_late;
579248590Smm
580248590Smm	/* Setup OS specific network interface */
581248590Smm	if (igb_setup_interface(dev, adapter) != 0)
582248590Smm		goto err_late;
583248590Smm
584248590Smm	/* Now get a good starting state */
585248590Smm	igb_reset(adapter);
586248590Smm
587248590Smm	/* Initialize statistics */
588248590Smm	igb_update_stats_counters(adapter);
589248590Smm
590248590Smm	adapter->hw.mac.get_link_status = 1;
591248590Smm	igb_update_link_status(adapter);
592248590Smm
593248590Smm	/* Indicate SOL/IDER usage */
594248590Smm	if (e1000_check_reset_block(&adapter->hw))
595248590Smm		device_printf(dev,
596248590Smm		    "PHY reset is blocked due to SOL/IDER session.\n");
597248590Smm
598248590Smm	/* Determine if we have to control management hardware */
599248590Smm	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
600248590Smm
601248590Smm	/*
602248590Smm	 * Setup Wake-on-Lan
603248590Smm	 */
604248590Smm	/* APME bit in EEPROM is mapped to WUC.APME */
605248590Smm	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
606248590Smm	if (eeprom_data)
607248590Smm		adapter->wol = E1000_WUFC_MAG;
608248590Smm
609248590Smm	/* Register for VLAN events */
610248590Smm	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
611248590Smm	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
612248590Smm	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
613248590Smm	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
614248590Smm
615248590Smm	igb_add_hw_stats(adapter);
616248590Smm
617248590Smm	/* Tell the stack that the interface is not active */
618248590Smm	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
619248590Smm
620248590Smm	adapter->led_dev = led_create(igb_led_func, adapter,
621248590Smm	    device_get_nameunit(dev));
622248590Smm
623248590Smm	INIT_DEBUGOUT("igb_attach: end");
624248590Smm
625248590Smm	return (0);
626248590Smm
627248590Smmerr_late:
628248590Smm	igb_free_transmit_structures(adapter);
629248590Smm	igb_free_receive_structures(adapter);
630248590Smm	igb_release_hw_control(adapter);
631248590Smm	if (adapter->ifp != NULL)
632248590Smm		if_free(adapter->ifp);
633248590Smmerr_pci:
634248590Smm	igb_free_pci_resources(adapter);
635248590Smm	free(adapter->mta, M_DEVBUF);
636248590Smm	IGB_CORE_LOCK_DESTROY(adapter);
637248590Smm
638248590Smm	return (error);
639248590Smm}
640248590Smm
641248590Smm/*********************************************************************
642248590Smm *  Device removal routine
643248590Smm *
644248590Smm *  The detach entry point is called when the driver is being removed.
645248590Smm *  This routine stops the adapter and deallocates all the resources
646248590Smm *  that were allocated for driver operation.
647248590Smm *
648248590Smm *  return 0 on success, positive on failure
649248590Smm *********************************************************************/
650248590Smm
651248590Smmstatic int
652248590Smmigb_detach(device_t dev)
653248590Smm{
654248590Smm	struct adapter	*adapter = device_get_softc(dev);
655248590Smm	struct ifnet	*ifp = adapter->ifp;
656248590Smm
657248590Smm	INIT_DEBUGOUT("igb_detach: begin");
658248590Smm
659248590Smm	/* Make sure VLANS are not using driver */
660248590Smm	if (adapter->ifp->if_vlantrunk != NULL) {
661248590Smm		device_printf(dev,"Vlan in use, detach first\n");
662248590Smm		return (EBUSY);
663248590Smm	}
664248590Smm
665248590Smm	if (adapter->led_dev != NULL)
666248590Smm		led_destroy(adapter->led_dev);
667248590Smm
668248590Smm#ifdef DEVICE_POLLING
669248590Smm	if (ifp->if_capenable & IFCAP_POLLING)
670248590Smm		ether_poll_deregister(ifp);
671248590Smm#endif
672248590Smm
673248590Smm	IGB_CORE_LOCK(adapter);
674248590Smm	adapter->in_detach = 1;
675248590Smm	igb_stop(adapter);
676248590Smm	IGB_CORE_UNLOCK(adapter);
677248590Smm
678248590Smm	e1000_phy_hw_reset(&adapter->hw);
679248590Smm
680248590Smm	/* Give control back to firmware */
681248590Smm	igb_release_manageability(adapter);
682248590Smm	igb_release_hw_control(adapter);
683248590Smm
684248590Smm	if (adapter->wol) {
685248590Smm		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
686248590Smm		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
687248590Smm		igb_enable_wakeup(dev);
688248590Smm	}
689248590Smm
690248590Smm	/* Unregister VLAN events */
691248590Smm	if (adapter->vlan_attach != NULL)
692248590Smm		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
693248590Smm	if (adapter->vlan_detach != NULL)
694248590Smm		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
695248590Smm
696248590Smm	ether_ifdetach(adapter->ifp);
697248590Smm
698248590Smm	callout_drain(&adapter->timer);
699248590Smm
700248590Smm	igb_free_pci_resources(adapter);
701248590Smm	bus_generic_detach(dev);
702248590Smm	if_free(ifp);
703248590Smm
704248590Smm	igb_free_transmit_structures(adapter);
705248590Smm	igb_free_receive_structures(adapter);
706248590Smm	free(adapter->mta, M_DEVBUF);
707248590Smm
708248590Smm	IGB_CORE_LOCK_DESTROY(adapter);
709248590Smm
710248590Smm	return (0);
711248590Smm}
712248590Smm
713248590Smm/*********************************************************************
714248590Smm *
715248590Smm *  Shutdown entry point
716248590Smm *
717248590Smm **********************************************************************/
718248590Smm
719248590Smmstatic int
720248590Smmigb_shutdown(device_t dev)
721248590Smm{
722248590Smm	return igb_suspend(dev);
723248590Smm}
724248590Smm
725248590Smm/*
726248590Smm * Suspend/resume device methods.
727248590Smm */
728248590Smmstatic int
729248590Smmigb_suspend(device_t dev)
730248590Smm{
731248590Smm	struct adapter *adapter = device_get_softc(dev);
732248590Smm
733248590Smm	IGB_CORE_LOCK(adapter);
734248590Smm
735248590Smm	igb_stop(adapter);
736248590Smm
737248590Smm        igb_release_manageability(adapter);
738248590Smm	igb_release_hw_control(adapter);
739248590Smm
740248590Smm        if (adapter->wol) {
741248590Smm                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
742248590Smm                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
743248590Smm                igb_enable_wakeup(dev);
744248590Smm        }
745248590Smm
746248590Smm	IGB_CORE_UNLOCK(adapter);
747248590Smm
748248590Smm	return bus_generic_suspend(dev);
749248590Smm}
750248590Smm
751248590Smmstatic int
752248590Smmigb_resume(device_t dev)
753248590Smm{
754248590Smm	struct adapter *adapter = device_get_softc(dev);
755248590Smm	struct ifnet *ifp = adapter->ifp;
756248590Smm
757248590Smm	IGB_CORE_LOCK(adapter);
758248590Smm	igb_init_locked(adapter);
759248590Smm	igb_init_manageability(adapter);
760248590Smm
761248590Smm	if ((ifp->if_flags & IFF_UP) &&
762248590Smm	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
763248590Smm		igb_start(ifp);
764248590Smm
765248590Smm	IGB_CORE_UNLOCK(adapter);
766248590Smm
767248590Smm	return bus_generic_resume(dev);
768248590Smm}
769248590Smm
770248590Smm
771248590Smm/*********************************************************************
772248590Smm *  Transmit entry point
773248590Smm *
774248590Smm *  igb_start is called by the stack to initiate a transmit.
775248590Smm *  The driver will remain in this routine as long as there are
776248590Smm *  packets to transmit and transmit resources are available.
777248590Smm *  In case resources are not available stack is notified and
778248590Smm *  the packet is requeued.
779248590Smm **********************************************************************/
780248590Smm
781248590Smmstatic void
782248590Smmigb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
783248590Smm{
784248590Smm	struct adapter	*adapter = ifp->if_softc;
785248590Smm	struct mbuf	*m_head;
786248590Smm
787248590Smm	IGB_TX_LOCK_ASSERT(txr);
788248590Smm
789248590Smm	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
790248590Smm	    IFF_DRV_RUNNING)
791248590Smm		return;
792248590Smm	if (!adapter->link_active)
793248590Smm		return;
794248590Smm
795248590Smm	/* Call cleanup if number of TX descriptors low */
796248590Smm	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
797248590Smm		igb_txeof(txr);
798248590Smm
799248590Smm	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
800248590Smm		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
801248590Smm			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
802248590Smm			break;
803248590Smm		}
804248590Smm		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
805248590Smm		if (m_head == NULL)
806248590Smm			break;
807248590Smm		/*
808248590Smm		 *  Encapsulation can modify our pointer, and or make it
809248590Smm		 *  NULL on failure.  In that event, we can't requeue.
810248590Smm		 */
811248590Smm		if (igb_xmit(txr, &m_head)) {
812248590Smm			if (m_head == NULL)
813248590Smm				break;
814248590Smm			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
815248590Smm			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
816248590Smm			break;
817248590Smm		}
818248590Smm
819248590Smm		/* Send a copy of the frame to the BPF listener */
820248590Smm		ETHER_BPF_MTAP(ifp, m_head);
821248590Smm
822248590Smm		/* Set watchdog on */
823248590Smm		txr->watchdog_time = ticks;
824248590Smm		txr->queue_status = IGB_QUEUE_WORKING;
825248590Smm	}
826248590Smm}
827248590Smm
828248590Smm/*
829248590Smm * Legacy TX driver routine, called from the
830248590Smm * stack, always uses tx[0], and spins for it.
831248590Smm * Should not be used with multiqueue tx
832248590Smm */
833248590Smmstatic void
834248590Smmigb_start(struct ifnet *ifp)
835248590Smm{
836248590Smm	struct adapter	*adapter = ifp->if_softc;
837248590Smm	struct tx_ring	*txr = adapter->tx_rings;
838248590Smm
839248590Smm	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
840248590Smm		IGB_TX_LOCK(txr);
841248590Smm		igb_start_locked(txr, ifp);
842248590Smm		IGB_TX_UNLOCK(txr);
843248590Smm	}
844248590Smm	return;
845248590Smm}
846248590Smm
847248590Smm#if __FreeBSD_version >= 800000
848248590Smm/*
849248590Smm** Multiqueue Transmit driver
850248590Smm**
851248590Smm*/
852248590Smmstatic int
853248590Smmigb_mq_start(struct ifnet *ifp, struct mbuf *m)
854248590Smm{
855248590Smm	struct adapter		*adapter = ifp->if_softc;
856248590Smm	struct igb_queue	*que;
857248590Smm	struct tx_ring		*txr;
858248590Smm	int 			i = 0, err = 0;
859248590Smm
860248590Smm	/* Which queue to use */
861248590Smm	if ((m->m_flags & M_FLOWID) != 0)
862248590Smm		i = m->m_pkthdr.flowid % adapter->num_queues;
863248590Smm
864248590Smm	txr = &adapter->tx_rings[i];
865248590Smm	que = &adapter->queues[i];
866248590Smm
867248590Smm	if (IGB_TX_TRYLOCK(txr)) {
868248590Smm		err = igb_mq_start_locked(ifp, txr, m);
869248590Smm		IGB_TX_UNLOCK(txr);
870248590Smm	} else {
871248590Smm		err = drbr_enqueue(ifp, txr->br, m);
872248590Smm		taskqueue_enqueue(que->tq, &que->que_task);
873248590Smm	}
874248590Smm
875248590Smm	return (err);
876248590Smm}
877248590Smm
878248590Smmstatic int
879248590Smmigb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
880248590Smm{
881248590Smm	struct adapter  *adapter = txr->adapter;
882248590Smm        struct mbuf     *next;
883248590Smm        int             err = 0, enq;
884248590Smm
885248590Smm	IGB_TX_LOCK_ASSERT(txr);
886248590Smm
887248590Smm	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
888248590Smm	    IFF_DRV_RUNNING || adapter->link_active == 0) {
889248590Smm		if (m != NULL)
890248590Smm			err = drbr_enqueue(ifp, txr->br, m);
891248590Smm		return (err);
892248590Smm	}
893
894	/* Call cleanup if number of TX descriptors low */
895	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
896		igb_txeof(txr);
897
898	enq = 0;
899	if (m == NULL) {
900		next = drbr_dequeue(ifp, txr->br);
901	} else if (drbr_needs_enqueue(ifp, txr->br)) {
902		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
903			return (err);
904		next = drbr_dequeue(ifp, txr->br);
905	} else
906		next = m;
907
908	/* Process the queue */
909	while (next != NULL) {
910		if ((err = igb_xmit(txr, &next)) != 0) {
911			if (next != NULL)
912				err = drbr_enqueue(ifp, txr->br, next);
913			break;
914		}
915		enq++;
916		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
917		ETHER_BPF_MTAP(ifp, next);
918		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
919			break;
920		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
921			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
922			break;
923		}
924		next = drbr_dequeue(ifp, txr->br);
925	}
926	if (enq > 0) {
927		/* Set the watchdog */
928		txr->queue_status = IGB_QUEUE_WORKING;
929		txr->watchdog_time = ticks;
930	}
931	return (err);
932}
933
934/*
935** Flush all ring buffers
936*/
937static void
938igb_qflush(struct ifnet *ifp)
939{
940	struct adapter	*adapter = ifp->if_softc;
941	struct tx_ring	*txr = adapter->tx_rings;
942	struct mbuf	*m;
943
944	for (int i = 0; i < adapter->num_queues; i++, txr++) {
945		IGB_TX_LOCK(txr);
946		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
947			m_freem(m);
948		IGB_TX_UNLOCK(txr);
949	}
950	if_qflush(ifp);
951}
952#endif /* __FreeBSD_version >= 800000 */
953
954/*********************************************************************
955 *  Ioctl entry point
956 *
957 *  igb_ioctl is called when the user wants to configure the
958 *  interface.
959 *
960 *  return 0 on success, positive on failure
961 **********************************************************************/
962
963static int
964igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
965{
966	struct adapter	*adapter = ifp->if_softc;
967	struct ifreq *ifr = (struct ifreq *)data;
968#ifdef INET
969	struct ifaddr *ifa = (struct ifaddr *)data;
970#endif
971	int error = 0;
972
973	if (adapter->in_detach)
974		return (error);
975
976	switch (command) {
977	case SIOCSIFADDR:
978#ifdef INET
979		if (ifa->ifa_addr->sa_family == AF_INET) {
980			/*
981			 * XXX
982			 * Since resetting hardware takes a very long time
983			 * and results in link renegotiation we only
984			 * initialize the hardware only when it is absolutely
985			 * required.
986			 */
987			ifp->if_flags |= IFF_UP;
988			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
989				IGB_CORE_LOCK(adapter);
990				igb_init_locked(adapter);
991				IGB_CORE_UNLOCK(adapter);
992			}
993			if (!(ifp->if_flags & IFF_NOARP))
994				arp_ifinit(ifp, ifa);
995		} else
996#endif
997			error = ether_ioctl(ifp, command, data);
998		break;
999	case SIOCSIFMTU:
1000	    {
1001		int max_frame_size;
1002
1003		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1004
1005		IGB_CORE_LOCK(adapter);
1006		max_frame_size = 9234;
1007		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1008		    ETHER_CRC_LEN) {
1009			IGB_CORE_UNLOCK(adapter);
1010			error = EINVAL;
1011			break;
1012		}
1013
1014		ifp->if_mtu = ifr->ifr_mtu;
1015		adapter->max_frame_size =
1016		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1017		igb_init_locked(adapter);
1018		IGB_CORE_UNLOCK(adapter);
1019		break;
1020	    }
1021	case SIOCSIFFLAGS:
1022		IOCTL_DEBUGOUT("ioctl rcv'd:\
1023		    SIOCSIFFLAGS (Set Interface Flags)");
1024		IGB_CORE_LOCK(adapter);
1025		if (ifp->if_flags & IFF_UP) {
1026			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1027				if ((ifp->if_flags ^ adapter->if_flags) &
1028				    (IFF_PROMISC | IFF_ALLMULTI)) {
1029					igb_disable_promisc(adapter);
1030					igb_set_promisc(adapter);
1031				}
1032			} else
1033				igb_init_locked(adapter);
1034		} else
1035			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1036				igb_stop(adapter);
1037		adapter->if_flags = ifp->if_flags;
1038		IGB_CORE_UNLOCK(adapter);
1039		break;
1040	case SIOCADDMULTI:
1041	case SIOCDELMULTI:
1042		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1043		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1044			IGB_CORE_LOCK(adapter);
1045			igb_disable_intr(adapter);
1046			igb_set_multi(adapter);
1047#ifdef DEVICE_POLLING
1048			if (!(ifp->if_capenable & IFCAP_POLLING))
1049#endif
1050				igb_enable_intr(adapter);
1051			IGB_CORE_UNLOCK(adapter);
1052		}
1053		break;
1054	case SIOCSIFMEDIA:
1055		/*
1056		** As the speed/duplex settings are being
1057		** changed, we need toreset the PHY.
1058		*/
1059		adapter->hw.phy.reset_disable = FALSE;
1060		/* Check SOL/IDER usage */
1061		IGB_CORE_LOCK(adapter);
1062		if (e1000_check_reset_block(&adapter->hw)) {
1063			IGB_CORE_UNLOCK(adapter);
1064			device_printf(adapter->dev, "Media change is"
1065			    " blocked due to SOL/IDER session.\n");
1066			break;
1067		}
1068		IGB_CORE_UNLOCK(adapter);
1069	case SIOCGIFMEDIA:
1070		IOCTL_DEBUGOUT("ioctl rcv'd: \
1071		    SIOCxIFMEDIA (Get/Set Interface Media)");
1072		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1073		break;
1074	case SIOCSIFCAP:
1075	    {
1076		int mask, reinit;
1077
1078		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1079		reinit = 0;
1080		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1081#ifdef DEVICE_POLLING
1082		if (mask & IFCAP_POLLING) {
1083			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1084				error = ether_poll_register(igb_poll, ifp);
1085				if (error)
1086					return (error);
1087				IGB_CORE_LOCK(adapter);
1088				igb_disable_intr(adapter);
1089				ifp->if_capenable |= IFCAP_POLLING;
1090				IGB_CORE_UNLOCK(adapter);
1091			} else {
1092				error = ether_poll_deregister(ifp);
1093				/* Enable interrupt even in error case */
1094				IGB_CORE_LOCK(adapter);
1095				igb_enable_intr(adapter);
1096				ifp->if_capenable &= ~IFCAP_POLLING;
1097				IGB_CORE_UNLOCK(adapter);
1098			}
1099		}
1100#endif
1101		if (mask & IFCAP_HWCSUM) {
1102			ifp->if_capenable ^= IFCAP_HWCSUM;
1103			reinit = 1;
1104		}
1105		if (mask & IFCAP_TSO4) {
1106			ifp->if_capenable ^= IFCAP_TSO4;
1107			reinit = 1;
1108		}
1109		if (mask & IFCAP_VLAN_HWTAGGING) {
1110			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1111			reinit = 1;
1112		}
1113		if (mask & IFCAP_VLAN_HWFILTER) {
1114			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1115			reinit = 1;
1116		}
1117		if (mask & IFCAP_LRO) {
1118			ifp->if_capenable ^= IFCAP_LRO;
1119			reinit = 1;
1120		}
1121		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1122			igb_init(adapter);
1123		VLAN_CAPABILITIES(ifp);
1124		break;
1125	    }
1126
1127	default:
1128		error = ether_ioctl(ifp, command, data);
1129		break;
1130	}
1131
1132	return (error);
1133}
1134
1135
1136/*********************************************************************
1137 *  Init entry point
1138 *
1139 *  This routine is used in two ways. It is used by the stack as
1140 *  init entry point in network interface structure. It is also used
1141 *  by the driver as a hw/sw initialization routine to get to a
1142 *  consistent state.
1143 *
1144 *  return 0 on success, positive on failure
1145 **********************************************************************/
1146
1147static void
1148igb_init_locked(struct adapter *adapter)
1149{
1150	struct ifnet	*ifp = adapter->ifp;
1151	device_t	dev = adapter->dev;
1152
1153	INIT_DEBUGOUT("igb_init: begin");
1154
1155	IGB_CORE_LOCK_ASSERT(adapter);
1156
1157	igb_disable_intr(adapter);
1158	callout_stop(&adapter->timer);
1159
1160	/* Get the latest mac address, User can use a LAA */
1161        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1162              ETHER_ADDR_LEN);
1163
1164	/* Put the address into the Receive Address Array */
1165	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1166
1167	igb_reset(adapter);
1168	igb_update_link_status(adapter);
1169
1170	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1171
1172	/* Set hardware offload abilities */
1173	ifp->if_hwassist = 0;
1174	if (ifp->if_capenable & IFCAP_TXCSUM) {
1175		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1176#if __FreeBSD_version >= 800000
1177		if (adapter->hw.mac.type == e1000_82576)
1178			ifp->if_hwassist |= CSUM_SCTP;
1179#endif
1180	}
1181
1182	if (ifp->if_capenable & IFCAP_TSO4)
1183		ifp->if_hwassist |= CSUM_TSO;
1184
1185	/* Configure for OS presence */
1186	igb_init_manageability(adapter);
1187
1188	/* Prepare transmit descriptors and buffers */
1189	igb_setup_transmit_structures(adapter);
1190	igb_initialize_transmit_units(adapter);
1191
1192	/* Setup Multicast table */
1193	igb_set_multi(adapter);
1194
1195	/*
1196	** Figure out the desired mbuf pool
1197	** for doing jumbo/packetsplit
1198	*/
1199	if (adapter->max_frame_size <= 2048)
1200		adapter->rx_mbuf_sz = MCLBYTES;
1201	else if (adapter->max_frame_size <= 4096)
1202		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1203	else
1204		adapter->rx_mbuf_sz = MJUM9BYTES;
1205
1206	/* Prepare receive descriptors and buffers */
1207	if (igb_setup_receive_structures(adapter)) {
1208		device_printf(dev, "Could not setup receive structures\n");
1209		return;
1210	}
1211	igb_initialize_receive_units(adapter);
1212
1213        /* Use real VLAN Filter support? */
1214	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1215		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1216			/* Use real VLAN Filter support */
1217			igb_setup_vlan_hw_support(adapter);
1218		else {
1219			u32 ctrl;
1220			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1221			ctrl |= E1000_CTRL_VME;
1222			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1223		}
1224	}
1225
1226	/* Don't lose promiscuous settings */
1227	igb_set_promisc(adapter);
1228
1229	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1230	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1231
1232	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1233	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1234
1235	if (adapter->msix > 1) /* Set up queue routing */
1236		igb_configure_queues(adapter);
1237
1238	/* this clears any pending interrupts */
1239	E1000_READ_REG(&adapter->hw, E1000_ICR);
1240#ifdef DEVICE_POLLING
1241	/*
1242	 * Only enable interrupts if we are not polling, make sure
1243	 * they are off otherwise.
1244	 */
1245	if (ifp->if_capenable & IFCAP_POLLING)
1246		igb_disable_intr(adapter);
1247	else
1248#endif /* DEVICE_POLLING */
1249	{
1250	igb_enable_intr(adapter);
1251	E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1252	}
1253
1254	/* Don't reset the phy next time init gets called */
1255	adapter->hw.phy.reset_disable = TRUE;
1256}
1257
1258static void
1259igb_init(void *arg)
1260{
1261	struct adapter *adapter = arg;
1262
1263	IGB_CORE_LOCK(adapter);
1264	igb_init_locked(adapter);
1265	IGB_CORE_UNLOCK(adapter);
1266}
1267
1268
1269static void
1270igb_handle_que(void *context, int pending)
1271{
1272	struct igb_queue *que = context;
1273	struct adapter *adapter = que->adapter;
1274	struct tx_ring *txr = que->txr;
1275	struct ifnet	*ifp = adapter->ifp;
1276
1277	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1278		bool	more;
1279
1280		more = igb_rxeof(que, -1, NULL);
1281
1282		IGB_TX_LOCK(txr);
1283		if (igb_txeof(txr))
1284			more = TRUE;
1285#if __FreeBSD_version >= 800000
1286		if (!drbr_empty(ifp, txr->br))
1287			igb_mq_start_locked(ifp, txr, NULL);
1288#else
1289		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1290			igb_start_locked(txr, ifp);
1291#endif
1292		IGB_TX_UNLOCK(txr);
1293		if (more) {
1294			taskqueue_enqueue(que->tq, &que->que_task);
1295			return;
1296		}
1297	}
1298
1299#ifdef DEVICE_POLLING
1300	if (ifp->if_capenable & IFCAP_POLLING)
1301		return;
1302#endif
1303	/* Reenable this interrupt */
1304	if (que->eims)
1305		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1306	else
1307		igb_enable_intr(adapter);
1308}
1309
1310/* Deal with link in a sleepable context */
1311static void
1312igb_handle_link(void *context, int pending)
1313{
1314	struct adapter *adapter = context;
1315
1316	adapter->hw.mac.get_link_status = 1;
1317	igb_update_link_status(adapter);
1318}
1319
1320/*********************************************************************
1321 *
1322 *  MSI/Legacy Deferred
1323 *  Interrupt Service routine
1324 *
1325 *********************************************************************/
1326static int
1327igb_irq_fast(void *arg)
1328{
1329	struct adapter		*adapter = arg;
1330	struct igb_queue	*que = adapter->queues;
1331	u32			reg_icr;
1332
1333
1334	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1335
1336	/* Hot eject?  */
1337	if (reg_icr == 0xffffffff)
1338		return FILTER_STRAY;
1339
1340	/* Definitely not our interrupt.  */
1341	if (reg_icr == 0x0)
1342		return FILTER_STRAY;
1343
1344	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1345		return FILTER_STRAY;
1346
1347	/*
1348	 * Mask interrupts until the taskqueue is finished running.  This is
1349	 * cheap, just assume that it is needed.  This also works around the
1350	 * MSI message reordering errata on certain systems.
1351	 */
1352	igb_disable_intr(adapter);
1353	taskqueue_enqueue(que->tq, &que->que_task);
1354
1355	/* Link status change */
1356	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1357		taskqueue_enqueue(que->tq, &adapter->link_task);
1358
1359	if (reg_icr & E1000_ICR_RXO)
1360		adapter->rx_overruns++;
1361	return FILTER_HANDLED;
1362}
1363
1364#ifdef DEVICE_POLLING
1365/*********************************************************************
1366 *
1367 *  Legacy polling routine : if using this code you MUST be sure that
1368 *  multiqueue is not defined, ie, set igb_num_queues to 1.
1369 *
1370 *********************************************************************/
1371#if __FreeBSD_version >= 800000
1372#define POLL_RETURN_COUNT(a) (a)
1373static int
1374#else
1375#define POLL_RETURN_COUNT(a)
1376static void
1377#endif
1378igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1379{
1380	struct adapter		*adapter = ifp->if_softc;
1381	struct igb_queue	*que = adapter->queues;
1382	struct tx_ring		*txr = adapter->tx_rings;
1383	u32			reg_icr, rx_done = 0;
1384	u32			loop = IGB_MAX_LOOP;
1385	bool			more;
1386
1387	IGB_CORE_LOCK(adapter);
1388	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1389		IGB_CORE_UNLOCK(adapter);
1390		return POLL_RETURN_COUNT(rx_done);
1391	}
1392
1393	if (cmd == POLL_AND_CHECK_STATUS) {
1394		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1395		/* Link status change */
1396		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1397			igb_handle_link(adapter, 0);
1398
1399		if (reg_icr & E1000_ICR_RXO)
1400			adapter->rx_overruns++;
1401	}
1402	IGB_CORE_UNLOCK(adapter);
1403
1404	igb_rxeof(que, count, &rx_done);
1405
1406	IGB_TX_LOCK(txr);
1407	do {
1408		more = igb_txeof(txr);
1409	} while (loop-- && more);
1410#if __FreeBSD_version >= 800000
1411	if (!drbr_empty(ifp, txr->br))
1412		igb_mq_start_locked(ifp, txr, NULL);
1413#else
1414	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1415		igb_start_locked(txr, ifp);
1416#endif
1417	IGB_TX_UNLOCK(txr);
1418	return POLL_RETURN_COUNT(rx_done);
1419}
1420#endif /* DEVICE_POLLING */
1421
1422/*********************************************************************
1423 *
1424 *  MSIX TX Interrupt Service routine
1425 *
1426 **********************************************************************/
1427static void
1428igb_msix_que(void *arg)
1429{
1430	struct igb_queue *que = arg;
1431	struct adapter *adapter = que->adapter;
1432	struct tx_ring *txr = que->txr;
1433	struct rx_ring *rxr = que->rxr;
1434	u32		newitr = 0;
1435	bool		more_tx, more_rx;
1436
1437	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1438	++que->irqs;
1439
1440	IGB_TX_LOCK(txr);
1441	more_tx = igb_txeof(txr);
1442	IGB_TX_UNLOCK(txr);
1443
1444	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1445
1446	if (igb_enable_aim == FALSE)
1447		goto no_calc;
1448	/*
1449	** Do Adaptive Interrupt Moderation:
1450        **  - Write out last calculated setting
1451	**  - Calculate based on average size over
1452	**    the last interval.
1453	*/
1454        if (que->eitr_setting)
1455                E1000_WRITE_REG(&adapter->hw,
1456                    E1000_EITR(que->msix), que->eitr_setting);
1457
1458        que->eitr_setting = 0;
1459
1460        /* Idle, do nothing */
1461        if ((txr->bytes == 0) && (rxr->bytes == 0))
1462                goto no_calc;
1463
1464        /* Used half Default if sub-gig */
1465        if (adapter->link_speed != 1000)
1466                newitr = IGB_DEFAULT_ITR / 2;
1467        else {
1468		if ((txr->bytes) && (txr->packets))
1469                	newitr = txr->bytes/txr->packets;
1470		if ((rxr->bytes) && (rxr->packets))
1471			newitr = max(newitr,
1472			    (rxr->bytes / rxr->packets));
1473                newitr += 24; /* account for hardware frame, crc */
1474		/* set an upper boundary */
1475		newitr = min(newitr, 3000);
1476		/* Be nice to the mid range */
1477                if ((newitr > 300) && (newitr < 1200))
1478                        newitr = (newitr / 3);
1479                else
1480                        newitr = (newitr / 2);
1481        }
1482        newitr &= 0x7FFC;  /* Mask invalid bits */
1483        if (adapter->hw.mac.type == e1000_82575)
1484                newitr |= newitr << 16;
1485        else
1486                newitr |= E1000_EITR_CNT_IGNR;
1487
1488        /* save for next interrupt */
1489        que->eitr_setting = newitr;
1490
1491        /* Reset state */
1492        txr->bytes = 0;
1493        txr->packets = 0;
1494        rxr->bytes = 0;
1495        rxr->packets = 0;
1496
1497no_calc:
1498	/* Schedule a clean task if needed*/
1499	if (more_tx || more_rx)
1500		taskqueue_enqueue(que->tq, &que->que_task);
1501	else
1502		/* Reenable this interrupt */
1503		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1504	return;
1505}
1506
1507
1508/*********************************************************************
1509 *
1510 *  MSIX Link Interrupt Service routine
1511 *
1512 **********************************************************************/
1513
1514static void
1515igb_msix_link(void *arg)
1516{
1517	struct adapter	*adapter = arg;
1518	u32       	icr;
1519
1520	++adapter->link_irq;
1521	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1522	if (!(icr & E1000_ICR_LSC))
1523		goto spurious;
1524	igb_handle_link(adapter, 0);
1525
1526spurious:
1527	/* Rearm */
1528	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1529	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1530	return;
1531}
1532
1533
1534/*********************************************************************
1535 *
1536 *  Media Ioctl callback
1537 *
1538 *  This routine is called whenever the user queries the status of
1539 *  the interface using ifconfig.
1540 *
1541 **********************************************************************/
1542static void
1543igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1544{
1545	struct adapter *adapter = ifp->if_softc;
1546	u_char fiber_type = IFM_1000_SX;
1547
1548	INIT_DEBUGOUT("igb_media_status: begin");
1549
1550	IGB_CORE_LOCK(adapter);
1551	igb_update_link_status(adapter);
1552
1553	ifmr->ifm_status = IFM_AVALID;
1554	ifmr->ifm_active = IFM_ETHER;
1555
1556	if (!adapter->link_active) {
1557		IGB_CORE_UNLOCK(adapter);
1558		return;
1559	}
1560
1561	ifmr->ifm_status |= IFM_ACTIVE;
1562
1563	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1564	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1565		ifmr->ifm_active |= fiber_type | IFM_FDX;
1566	else {
1567		switch (adapter->link_speed) {
1568		case 10:
1569			ifmr->ifm_active |= IFM_10_T;
1570			break;
1571		case 100:
1572			ifmr->ifm_active |= IFM_100_TX;
1573			break;
1574		case 1000:
1575			ifmr->ifm_active |= IFM_1000_T;
1576			break;
1577		}
1578		if (adapter->link_duplex == FULL_DUPLEX)
1579			ifmr->ifm_active |= IFM_FDX;
1580		else
1581			ifmr->ifm_active |= IFM_HDX;
1582	}
1583	IGB_CORE_UNLOCK(adapter);
1584}
1585
1586/*********************************************************************
1587 *
1588 *  Media Ioctl callback
1589 *
1590 *  This routine is called when the user changes speed/duplex using
1591 *  media/mediopt option with ifconfig.
1592 *
1593 **********************************************************************/
1594static int
1595igb_media_change(struct ifnet *ifp)
1596{
1597	struct adapter *adapter = ifp->if_softc;
1598	struct ifmedia  *ifm = &adapter->media;
1599
1600	INIT_DEBUGOUT("igb_media_change: begin");
1601
1602	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1603		return (EINVAL);
1604
1605	IGB_CORE_LOCK(adapter);
1606	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1607	case IFM_AUTO:
1608		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1609		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1610		break;
1611	case IFM_1000_LX:
1612	case IFM_1000_SX:
1613	case IFM_1000_T:
1614		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1615		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1616		break;
1617	case IFM_100_TX:
1618		adapter->hw.mac.autoneg = FALSE;
1619		adapter->hw.phy.autoneg_advertised = 0;
1620		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1621			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1622		else
1623			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1624		break;
1625	case IFM_10_T:
1626		adapter->hw.mac.autoneg = FALSE;
1627		adapter->hw.phy.autoneg_advertised = 0;
1628		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1629			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1630		else
1631			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1632		break;
1633	default:
1634		device_printf(adapter->dev, "Unsupported media type\n");
1635	}
1636
1637	igb_init_locked(adapter);
1638	IGB_CORE_UNLOCK(adapter);
1639
1640	return (0);
1641}
1642
1643
1644/*********************************************************************
1645 *
1646 *  This routine maps the mbufs to Advanced TX descriptors.
1647 *  used by the 82575 adapter.
1648 *
1649 **********************************************************************/
1650
1651static int
1652igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1653{
1654	struct adapter		*adapter = txr->adapter;
1655	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1656	bus_dmamap_t		map;
1657	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1658	union e1000_adv_tx_desc	*txd = NULL;
1659	struct mbuf		*m_head;
1660	u32			olinfo_status = 0, cmd_type_len = 0;
1661	int			nsegs, i, j, error, first, last = 0;
1662	u32			hdrlen = 0;
1663
1664	m_head = *m_headp;
1665
1666
1667	/* Set basic descriptor constants */
1668	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1669	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1670	if (m_head->m_flags & M_VLANTAG)
1671		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1672
1673        /*
1674         * Force a cleanup if number of TX descriptors
1675         * available hits the threshold
1676         */
1677	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1678		igb_txeof(txr);
1679		/* Now do we at least have a minimal? */
1680		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1681			txr->no_desc_avail++;
1682			return (ENOBUFS);
1683		}
1684	}
1685
1686	/*
1687         * Map the packet for DMA.
1688	 *
1689	 * Capture the first descriptor index,
1690	 * this descriptor will have the index
1691	 * of the EOP which is the only one that
1692	 * now gets a DONE bit writeback.
1693	 */
1694	first = txr->next_avail_desc;
1695	tx_buffer = &txr->tx_buffers[first];
1696	tx_buffer_mapped = tx_buffer;
1697	map = tx_buffer->map;
1698
1699	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1700	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1701
1702	if (error == EFBIG) {
1703		struct mbuf *m;
1704
1705		m = m_defrag(*m_headp, M_DONTWAIT);
1706		if (m == NULL) {
1707			adapter->mbuf_defrag_failed++;
1708			m_freem(*m_headp);
1709			*m_headp = NULL;
1710			return (ENOBUFS);
1711		}
1712		*m_headp = m;
1713
1714		/* Try it again */
1715		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1716		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1717
1718		if (error == ENOMEM) {
1719			adapter->no_tx_dma_setup++;
1720			return (error);
1721		} else if (error != 0) {
1722			adapter->no_tx_dma_setup++;
1723			m_freem(*m_headp);
1724			*m_headp = NULL;
1725			return (error);
1726		}
1727	} else if (error == ENOMEM) {
1728		adapter->no_tx_dma_setup++;
1729		return (error);
1730	} else if (error != 0) {
1731		adapter->no_tx_dma_setup++;
1732		m_freem(*m_headp);
1733		*m_headp = NULL;
1734		return (error);
1735	}
1736
1737	/* Check again to be sure we have enough descriptors */
1738        if (nsegs > (txr->tx_avail - 2)) {
1739                txr->no_desc_avail++;
1740		bus_dmamap_unload(txr->txtag, map);
1741		return (ENOBUFS);
1742        }
1743	m_head = *m_headp;
1744
1745        /*
1746         * Set up the context descriptor:
1747         * used when any hardware offload is done.
1748	 * This includes CSUM, VLAN, and TSO. It
1749	 * will use the first descriptor.
1750         */
1751        if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1752		if (igb_tso_setup(txr, m_head, &hdrlen)) {
1753			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1754			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1755			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1756		} else
1757			return (ENXIO);
1758	} else if (igb_tx_ctx_setup(txr, m_head))
1759		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1760
1761	/* Calculate payload length */
1762	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1763	    << E1000_ADVTXD_PAYLEN_SHIFT);
1764
1765	/* 82575 needs the queue index added */
1766	if (adapter->hw.mac.type == e1000_82575)
1767		olinfo_status |= txr->me << 4;
1768
1769	/* Set up our transmit descriptors */
1770	i = txr->next_avail_desc;
1771	for (j = 0; j < nsegs; j++) {
1772		bus_size_t seg_len;
1773		bus_addr_t seg_addr;
1774
1775		tx_buffer = &txr->tx_buffers[i];
1776		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1777		seg_addr = segs[j].ds_addr;
1778		seg_len  = segs[j].ds_len;
1779
1780		txd->read.buffer_addr = htole64(seg_addr);
1781		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1782		txd->read.olinfo_status = htole32(olinfo_status);
1783		last = i;
1784		if (++i == adapter->num_tx_desc)
1785			i = 0;
1786		tx_buffer->m_head = NULL;
1787		tx_buffer->next_eop = -1;
1788	}
1789
1790	txr->next_avail_desc = i;
1791	txr->tx_avail -= nsegs;
1792
1793        tx_buffer->m_head = m_head;
1794	tx_buffer_mapped->map = tx_buffer->map;
1795	tx_buffer->map = map;
1796        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1797
1798        /*
1799         * Last Descriptor of Packet
1800	 * needs End Of Packet (EOP)
1801	 * and Report Status (RS)
1802         */
1803        txd->read.cmd_type_len |=
1804	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1805	/*
1806	 * Keep track in the first buffer which
1807	 * descriptor will be written back
1808	 */
1809	tx_buffer = &txr->tx_buffers[first];
1810	tx_buffer->next_eop = last;
1811	txr->watchdog_time = ticks;
1812
1813	/*
1814	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1815	 * that this frame is available to transmit.
1816	 */
1817	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1818	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1819	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1820	++txr->tx_packets;
1821
1822	return (0);
1823
1824}
1825
1826static void
1827igb_set_promisc(struct adapter *adapter)
1828{
1829	struct ifnet	*ifp = adapter->ifp;
1830	struct e1000_hw *hw = &adapter->hw;
1831	u32		reg;
1832
1833	if (hw->mac.type == e1000_vfadapt) {
1834		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1835		return;
1836	}
1837
1838	reg = E1000_READ_REG(hw, E1000_RCTL);
1839	if (ifp->if_flags & IFF_PROMISC) {
1840		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1841		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1842	} else if (ifp->if_flags & IFF_ALLMULTI) {
1843		reg |= E1000_RCTL_MPE;
1844		reg &= ~E1000_RCTL_UPE;
1845		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1846	}
1847}
1848
1849static void
1850igb_disable_promisc(struct adapter *adapter)
1851{
1852	struct e1000_hw *hw = &adapter->hw;
1853	u32		reg;
1854
1855	if (hw->mac.type == e1000_vfadapt) {
1856		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1857		return;
1858	}
1859	reg = E1000_READ_REG(hw, E1000_RCTL);
1860	reg &=  (~E1000_RCTL_UPE);
1861	reg &=  (~E1000_RCTL_MPE);
1862	E1000_WRITE_REG(hw, E1000_RCTL, reg);
1863}
1864
1865
1866/*********************************************************************
1867 *  Multicast Update
1868 *
1869 *  This routine is called whenever multicast address list is updated.
1870 *
1871 **********************************************************************/
1872
1873static void
1874igb_set_multi(struct adapter *adapter)
1875{
1876	struct ifnet	*ifp = adapter->ifp;
1877	struct ifmultiaddr *ifma;
1878	u32 reg_rctl = 0;
1879	u8  *mta;
1880
1881	int mcnt = 0;
1882
1883	IOCTL_DEBUGOUT("igb_set_multi: begin");
1884
1885	mta = adapter->mta;
1886	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
1887	    MAX_NUM_MULTICAST_ADDRESSES);
1888
1889#if __FreeBSD_version < 800000
1890	IF_ADDR_LOCK(ifp);
1891#else
1892	if_maddr_rlock(ifp);
1893#endif
1894	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1895		if (ifma->ifma_addr->sa_family != AF_LINK)
1896			continue;
1897
1898		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1899			break;
1900
1901		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1902		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1903		mcnt++;
1904	}
1905#if __FreeBSD_version < 800000
1906	IF_ADDR_UNLOCK(ifp);
1907#else
1908	if_maddr_runlock(ifp);
1909#endif
1910
1911	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1912		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1913		reg_rctl |= E1000_RCTL_MPE;
1914		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1915	} else
1916		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1917}
1918
1919
1920/*********************************************************************
1921 *  Timer routine:
1922 *  	This routine checks for link status,
1923 *	updates statistics, and does the watchdog.
1924 *
1925 **********************************************************************/
1926
1927static void
1928igb_local_timer(void *arg)
1929{
1930	struct adapter		*adapter = arg;
1931	device_t		dev = adapter->dev;
1932	struct tx_ring		*txr = adapter->tx_rings;
1933
1934
1935	IGB_CORE_LOCK_ASSERT(adapter);
1936
1937	igb_update_link_status(adapter);
1938	igb_update_stats_counters(adapter);
1939
1940	/*
1941	** If flow control has paused us since last checking
1942	** it invalidates the watchdog timing, so dont run it.
1943	*/
1944	if (adapter->pause_frames) {
1945		adapter->pause_frames = 0;
1946		goto out;
1947	}
1948
1949        /*
1950        ** Watchdog: check for time since any descriptor was cleaned
1951        */
1952	for (int i = 0; i < adapter->num_queues; i++, txr++)
1953		if (txr->queue_status == IGB_QUEUE_HUNG)
1954			goto timeout;
1955out:
1956	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1957	return;
1958
1959timeout:
1960	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
1961	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
1962            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
1963            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
1964	device_printf(dev,"TX(%d) desc avail = %d,"
1965            "Next TX to Clean = %d\n",
1966            txr->me, txr->tx_avail, txr->next_to_clean);
1967	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1968	adapter->watchdog_events++;
1969	igb_init_locked(adapter);
1970}
1971
1972static void
1973igb_update_link_status(struct adapter *adapter)
1974{
1975	struct e1000_hw *hw = &adapter->hw;
1976	struct ifnet *ifp = adapter->ifp;
1977	device_t dev = adapter->dev;
1978	struct tx_ring *txr = adapter->tx_rings;
1979	u32 link_check = 0;
1980
1981	/* Get the cached link value or read for real */
1982        switch (hw->phy.media_type) {
1983        case e1000_media_type_copper:
1984                if (hw->mac.get_link_status) {
1985			/* Do the work to read phy */
1986                        e1000_check_for_link(hw);
1987                        link_check = !hw->mac.get_link_status;
1988                } else
1989                        link_check = TRUE;
1990                break;
1991        case e1000_media_type_fiber:
1992                e1000_check_for_link(hw);
1993                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1994                                 E1000_STATUS_LU);
1995                break;
1996        case e1000_media_type_internal_serdes:
1997                e1000_check_for_link(hw);
1998                link_check = adapter->hw.mac.serdes_has_link;
1999                break;
2000	/* VF device is type_unknown */
2001        case e1000_media_type_unknown:
2002                e1000_check_for_link(hw);
2003		link_check = !hw->mac.get_link_status;
2004		/* Fall thru */
2005        default:
2006                break;
2007        }
2008
2009	/* Now we check if a transition has happened */
2010	if (link_check && (adapter->link_active == 0)) {
2011		e1000_get_speed_and_duplex(&adapter->hw,
2012		    &adapter->link_speed, &adapter->link_duplex);
2013		if (bootverbose)
2014			device_printf(dev, "Link is up %d Mbps %s\n",
2015			    adapter->link_speed,
2016			    ((adapter->link_duplex == FULL_DUPLEX) ?
2017			    "Full Duplex" : "Half Duplex"));
2018		adapter->link_active = 1;
2019		ifp->if_baudrate = adapter->link_speed * 1000000;
2020		/* This can sleep */
2021		if_link_state_change(ifp, LINK_STATE_UP);
2022	} else if (!link_check && (adapter->link_active == 1)) {
2023		ifp->if_baudrate = adapter->link_speed = 0;
2024		adapter->link_duplex = 0;
2025		if (bootverbose)
2026			device_printf(dev, "Link is Down\n");
2027		adapter->link_active = 0;
2028		/* This can sleep */
2029		if_link_state_change(ifp, LINK_STATE_DOWN);
2030		/* Turn off watchdogs */
2031		for (int i = 0; i < adapter->num_queues; i++, txr++)
2032			txr->queue_status = IGB_QUEUE_IDLE;
2033	}
2034}
2035
2036/*********************************************************************
2037 *
2038 *  This routine disables all traffic on the adapter by issuing a
2039 *  global reset on the MAC and deallocates TX/RX buffers.
2040 *
2041 **********************************************************************/
2042
2043static void
2044igb_stop(void *arg)
2045{
2046	struct adapter	*adapter = arg;
2047	struct ifnet	*ifp = adapter->ifp;
2048	struct tx_ring *txr = adapter->tx_rings;
2049
2050	IGB_CORE_LOCK_ASSERT(adapter);
2051
2052	INIT_DEBUGOUT("igb_stop: begin");
2053
2054	igb_disable_intr(adapter);
2055
2056	callout_stop(&adapter->timer);
2057
2058	/* Tell the stack that the interface is no longer active */
2059	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2060
2061	/* Unarm watchdog timer. */
2062	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2063		IGB_TX_LOCK(txr);
2064		txr->queue_status = IGB_QUEUE_IDLE;
2065		IGB_TX_UNLOCK(txr);
2066	}
2067
2068	e1000_reset_hw(&adapter->hw);
2069	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2070
2071	e1000_led_off(&adapter->hw);
2072	e1000_cleanup_led(&adapter->hw);
2073}
2074
2075
2076/*********************************************************************
2077 *
2078 *  Determine hardware revision.
2079 *
2080 **********************************************************************/
2081static void
2082igb_identify_hardware(struct adapter *adapter)
2083{
2084	device_t dev = adapter->dev;
2085
2086	/* Make sure our PCI config space has the necessary stuff set */
2087	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2088	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2089	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2090		INIT_DEBUGOUT("Memory Access and/or Bus Master "
2091		    "bits were not set!\n");
2092		adapter->hw.bus.pci_cmd_word |=
2093		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2094		pci_write_config(dev, PCIR_COMMAND,
2095		    adapter->hw.bus.pci_cmd_word, 2);
2096	}
2097
2098	/* Save off the information about this board */
2099	adapter->hw.vendor_id = pci_get_vendor(dev);
2100	adapter->hw.device_id = pci_get_device(dev);
2101	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2102	adapter->hw.subsystem_vendor_id =
2103	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2104	adapter->hw.subsystem_device_id =
2105	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2106
2107	/* Set MAC type early for PCI setup */
2108	e1000_set_mac_type(&adapter->hw);
2109}
2110
2111static int
2112igb_allocate_pci_resources(struct adapter *adapter)
2113{
2114	device_t	dev = adapter->dev;
2115	int		rid;
2116
2117	rid = PCIR_BAR(0);
2118	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2119	    &rid, RF_ACTIVE);
2120	if (adapter->pci_mem == NULL) {
2121		device_printf(dev, "Unable to allocate bus resource: memory\n");
2122		return (ENXIO);
2123	}
2124	adapter->osdep.mem_bus_space_tag =
2125	    rman_get_bustag(adapter->pci_mem);
2126	adapter->osdep.mem_bus_space_handle =
2127	    rman_get_bushandle(adapter->pci_mem);
2128	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2129
2130	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2131
2132	/* This will setup either MSI/X or MSI */
2133	adapter->msix = igb_setup_msix(adapter);
2134	adapter->hw.back = &adapter->osdep;
2135
2136	return (0);
2137}
2138
2139/*********************************************************************
2140 *
2141 *  Setup the Legacy or MSI Interrupt handler
2142 *
2143 **********************************************************************/
2144static int
2145igb_allocate_legacy(struct adapter *adapter)
2146{
2147	device_t		dev = adapter->dev;
2148	struct igb_queue	*que = adapter->queues;
2149	int			error, rid = 0;
2150
2151	/* Turn off all interrupts */
2152	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2153
2154	/* MSI RID is 1 */
2155	if (adapter->msix == 1)
2156		rid = 1;
2157
2158	/* We allocate a single interrupt resource */
2159	adapter->res = bus_alloc_resource_any(dev,
2160	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2161	if (adapter->res == NULL) {
2162		device_printf(dev, "Unable to allocate bus resource: "
2163		    "interrupt\n");
2164		return (ENXIO);
2165	}
2166
2167	/*
2168	 * Try allocating a fast interrupt and the associated deferred
2169	 * processing contexts.
2170	 */
2171	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2172	/* Make tasklet for deferred link handling */
2173	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2174	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2175	    taskqueue_thread_enqueue, &que->tq);
2176	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2177	    device_get_nameunit(adapter->dev));
2178	if ((error = bus_setup_intr(dev, adapter->res,
2179	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2180	    adapter, &adapter->tag)) != 0) {
2181		device_printf(dev, "Failed to register fast interrupt "
2182			    "handler: %d\n", error);
2183		taskqueue_free(que->tq);
2184		que->tq = NULL;
2185		return (error);
2186	}
2187
2188	return (0);
2189}
2190
2191
2192/*********************************************************************
2193 *
2194 *  Setup the MSIX Queue Interrupt handlers:
2195 *
2196 **********************************************************************/
2197static int
2198igb_allocate_msix(struct adapter *adapter)
2199{
2200	device_t		dev = adapter->dev;
2201	struct igb_queue	*que = adapter->queues;
2202	int			error, rid, vector = 0;
2203
2204
2205	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2206		rid = vector +1;
2207		que->res = bus_alloc_resource_any(dev,
2208		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2209		if (que->res == NULL) {
2210			device_printf(dev,
2211			    "Unable to allocate bus resource: "
2212			    "MSIX Queue Interrupt\n");
2213			return (ENXIO);
2214		}
2215		error = bus_setup_intr(dev, que->res,
2216	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2217		    igb_msix_que, que, &que->tag);
2218		if (error) {
2219			que->res = NULL;
2220			device_printf(dev, "Failed to register Queue handler");
2221			return (error);
2222		}
2223#if __FreeBSD_version >= 800504
2224		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2225#endif
2226		que->msix = vector;
2227		if (adapter->hw.mac.type == e1000_82575)
2228			que->eims = E1000_EICR_TX_QUEUE0 << i;
2229		else
2230			que->eims = 1 << vector;
2231		/*
2232		** Bind the msix vector, and thus the
2233		** rings to the corresponding cpu.
2234		*/
2235		if (adapter->num_queues > 1)
2236			bus_bind_intr(dev, que->res, i);
2237		/* Make tasklet for deferred handling */
2238		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2239		que->tq = taskqueue_create_fast("igb_que", M_NOWAIT,
2240		    taskqueue_thread_enqueue, &que->tq);
2241		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2242		    device_get_nameunit(adapter->dev));
2243	}
2244
2245	/* And Link */
2246	rid = vector + 1;
2247	adapter->res = bus_alloc_resource_any(dev,
2248	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2249	if (adapter->res == NULL) {
2250		device_printf(dev,
2251		    "Unable to allocate bus resource: "
2252		    "MSIX Link Interrupt\n");
2253		return (ENXIO);
2254	}
2255	if ((error = bus_setup_intr(dev, adapter->res,
2256	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2257	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2258		device_printf(dev, "Failed to register Link handler");
2259		return (error);
2260	}
2261#if __FreeBSD_version >= 800504
2262	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2263#endif
2264	adapter->linkvec = vector;
2265
2266	return (0);
2267}
2268
2269
2270static void
2271igb_configure_queues(struct adapter *adapter)
2272{
2273	struct	e1000_hw	*hw = &adapter->hw;
2274	struct	igb_queue	*que;
2275	u32			tmp, ivar = 0, newitr = 0;
2276
2277	/* First turn on RSS capability */
2278	if (adapter->hw.mac.type > e1000_82575)
2279		E1000_WRITE_REG(hw, E1000_GPIE,
2280		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2281		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2282
2283	/* Turn on MSIX */
2284	switch (adapter->hw.mac.type) {
2285	case e1000_82580:
2286	case e1000_vfadapt:
2287		/* RX entries */
2288		for (int i = 0; i < adapter->num_queues; i++) {
2289			u32 index = i >> 1;
2290			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2291			que = &adapter->queues[i];
2292			if (i & 1) {
2293				ivar &= 0xFF00FFFF;
2294				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2295			} else {
2296				ivar &= 0xFFFFFF00;
2297				ivar |= que->msix | E1000_IVAR_VALID;
2298			}
2299			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2300		}
2301		/* TX entries */
2302		for (int i = 0; i < adapter->num_queues; i++) {
2303			u32 index = i >> 1;
2304			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2305			que = &adapter->queues[i];
2306			if (i & 1) {
2307				ivar &= 0x00FFFFFF;
2308				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2309			} else {
2310				ivar &= 0xFFFF00FF;
2311				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2312			}
2313			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2314			adapter->eims_mask |= que->eims;
2315		}
2316
2317		/* And for the link interrupt */
2318		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2319		adapter->link_mask = 1 << adapter->linkvec;
2320		adapter->eims_mask |= adapter->link_mask;
2321		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2322		break;
2323	case e1000_82576:
2324		/* RX entries */
2325		for (int i = 0; i < adapter->num_queues; i++) {
2326			u32 index = i & 0x7; /* Each IVAR has two entries */
2327			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2328			que = &adapter->queues[i];
2329			if (i < 8) {
2330				ivar &= 0xFFFFFF00;
2331				ivar |= que->msix | E1000_IVAR_VALID;
2332			} else {
2333				ivar &= 0xFF00FFFF;
2334				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2335			}
2336			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2337			adapter->eims_mask |= que->eims;
2338		}
2339		/* TX entries */
2340		for (int i = 0; i < adapter->num_queues; i++) {
2341			u32 index = i & 0x7; /* Each IVAR has two entries */
2342			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2343			que = &adapter->queues[i];
2344			if (i < 8) {
2345				ivar &= 0xFFFF00FF;
2346				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2347			} else {
2348				ivar &= 0x00FFFFFF;
2349				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2350			}
2351			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2352			adapter->eims_mask |= que->eims;
2353		}
2354
2355		/* And for the link interrupt */
2356		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2357		adapter->link_mask = 1 << adapter->linkvec;
2358		adapter->eims_mask |= adapter->link_mask;
2359		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2360		break;
2361
2362	case e1000_82575:
2363                /* enable MSI-X support*/
2364		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2365                tmp |= E1000_CTRL_EXT_PBA_CLR;
2366                /* Auto-Mask interrupts upon ICR read. */
2367                tmp |= E1000_CTRL_EXT_EIAME;
2368                tmp |= E1000_CTRL_EXT_IRCA;
2369                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2370
2371		/* Queues */
2372		for (int i = 0; i < adapter->num_queues; i++) {
2373			que = &adapter->queues[i];
2374			tmp = E1000_EICR_RX_QUEUE0 << i;
2375			tmp |= E1000_EICR_TX_QUEUE0 << i;
2376			que->eims = tmp;
2377			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2378			    i, que->eims);
2379			adapter->eims_mask |= que->eims;
2380		}
2381
2382		/* Link */
2383		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2384		    E1000_EIMS_OTHER);
2385		adapter->link_mask |= E1000_EIMS_OTHER;
2386		adapter->eims_mask |= adapter->link_mask;
2387	default:
2388		break;
2389	}
2390
2391	/* Set the starting interrupt rate */
2392	if (igb_max_interrupt_rate > 0)
2393		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2394
2395        if (hw->mac.type == e1000_82575)
2396                newitr |= newitr << 16;
2397        else
2398                newitr |= E1000_EITR_CNT_IGNR;
2399
2400	for (int i = 0; i < adapter->num_queues; i++) {
2401		que = &adapter->queues[i];
2402		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2403	}
2404
2405	return;
2406}
2407
2408
2409static void
2410igb_free_pci_resources(struct adapter *adapter)
2411{
2412	struct		igb_queue *que = adapter->queues;
2413	device_t	dev = adapter->dev;
2414	int		rid;
2415
2416	/*
2417	** There is a slight possibility of a failure mode
2418	** in attach that will result in entering this function
2419	** before interrupt resources have been initialized, and
2420	** in that case we do not want to execute the loops below
2421	** We can detect this reliably by the state of the adapter
2422	** res pointer.
2423	*/
2424	if (adapter->res == NULL)
2425		goto mem;
2426
2427	/*
2428	 * First release all the interrupt resources:
2429	 */
2430	for (int i = 0; i < adapter->num_queues; i++, que++) {
2431		rid = que->msix + 1;
2432		if (que->tag != NULL) {
2433			bus_teardown_intr(dev, que->res, que->tag);
2434			que->tag = NULL;
2435		}
2436		if (que->res != NULL)
2437			bus_release_resource(dev,
2438			    SYS_RES_IRQ, rid, que->res);
2439	}
2440
2441	/* Clean the Legacy or Link interrupt last */
2442	if (adapter->linkvec) /* we are doing MSIX */
2443		rid = adapter->linkvec + 1;
2444	else
2445		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2446
2447	if (adapter->tag != NULL) {
2448		bus_teardown_intr(dev, adapter->res, adapter->tag);
2449		adapter->tag = NULL;
2450	}
2451	if (adapter->res != NULL)
2452		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2453
2454mem:
2455	if (adapter->msix)
2456		pci_release_msi(dev);
2457
2458	if (adapter->msix_mem != NULL)
2459		bus_release_resource(dev, SYS_RES_MEMORY,
2460		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2461
2462	if (adapter->pci_mem != NULL)
2463		bus_release_resource(dev, SYS_RES_MEMORY,
2464		    PCIR_BAR(0), adapter->pci_mem);
2465
2466}
2467
2468/*
2469 * Setup Either MSI/X or MSI
2470 */
2471static int
2472igb_setup_msix(struct adapter *adapter)
2473{
2474	device_t dev = adapter->dev;
2475	int rid, want, queues, msgs;
2476
2477	/* tuneable override */
2478	if (igb_enable_msix == 0)
2479		goto msi;
2480
2481	/* First try MSI/X */
2482	rid = PCIR_BAR(IGB_MSIX_BAR);
2483	adapter->msix_mem = bus_alloc_resource_any(dev,
2484	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2485       	if (!adapter->msix_mem) {
2486		/* May not be enabled */
2487		device_printf(adapter->dev,
2488		    "Unable to map MSIX table \n");
2489		goto msi;
2490	}
2491
2492	msgs = pci_msix_count(dev);
2493	if (msgs == 0) { /* system has msix disabled */
2494		bus_release_resource(dev, SYS_RES_MEMORY,
2495		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2496		adapter->msix_mem = NULL;
2497		goto msi;
2498	}
2499
2500	/* Figure out a reasonable auto config value */
2501	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2502
2503	/* Manual override */
2504	if (igb_num_queues != 0)
2505		queues = igb_num_queues;
2506	if (queues > 8)  /* max queues */
2507		queues = 8;
2508
2509	/* Can have max of 4 queues on 82575 */
2510	if ((adapter->hw.mac.type == e1000_82575) && (queues > 4))
2511		queues = 4;
2512
2513	/* Limit the VF adapter to one queue */
2514	if (adapter->hw.mac.type == e1000_vfadapt)
2515		queues = 1;
2516
2517	/*
2518	** One vector (RX/TX pair) per queue
2519	** plus an additional for Link interrupt
2520	*/
2521	want = queues + 1;
2522	if (msgs >= want)
2523		msgs = want;
2524	else {
2525               	device_printf(adapter->dev,
2526		    "MSIX Configuration Problem, "
2527		    "%d vectors configured, but %d queues wanted!\n",
2528		    msgs, want);
2529		return (ENXIO);
2530	}
2531	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2532               	device_printf(adapter->dev,
2533		    "Using MSIX interrupts with %d vectors\n", msgs);
2534		adapter->num_queues = queues;
2535		return (msgs);
2536	}
2537msi:
2538       	msgs = pci_msi_count(dev);
2539       	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2540               	device_printf(adapter->dev,"Using MSI interrupt\n");
2541	return (msgs);
2542}
2543
2544/*********************************************************************
2545 *
2546 *  Set up an fresh starting state
2547 *
2548 **********************************************************************/
2549static void
2550igb_reset(struct adapter *adapter)
2551{
2552	device_t	dev = adapter->dev;
2553	struct e1000_hw *hw = &adapter->hw;
2554	struct e1000_fc_info *fc = &hw->fc;
2555	struct ifnet	*ifp = adapter->ifp;
2556	u32		pba = 0;
2557	u16		hwm;
2558
2559	INIT_DEBUGOUT("igb_reset: begin");
2560
2561	/* Let the firmware know the OS is in control */
2562	igb_get_hw_control(adapter);
2563
2564	/*
2565	 * Packet Buffer Allocation (PBA)
2566	 * Writing PBA sets the receive portion of the buffer
2567	 * the remainder is used for the transmit buffer.
2568	 */
2569	switch (hw->mac.type) {
2570	case e1000_82575:
2571		pba = E1000_PBA_32K;
2572		break;
2573	case e1000_82576:
2574	case e1000_vfadapt:
2575		pba = E1000_PBA_64K;
2576		break;
2577	case e1000_82580:
2578		pba = E1000_PBA_35K;
2579	default:
2580		break;
2581	}
2582
2583	/* Special needs in case of Jumbo frames */
2584	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2585		u32 tx_space, min_tx, min_rx;
2586		pba = E1000_READ_REG(hw, E1000_PBA);
2587		tx_space = pba >> 16;
2588		pba &= 0xffff;
2589		min_tx = (adapter->max_frame_size +
2590		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2591		min_tx = roundup2(min_tx, 1024);
2592		min_tx >>= 10;
2593                min_rx = adapter->max_frame_size;
2594                min_rx = roundup2(min_rx, 1024);
2595                min_rx >>= 10;
2596		if (tx_space < min_tx &&
2597		    ((min_tx - tx_space) < pba)) {
2598			pba = pba - (min_tx - tx_space);
2599			/*
2600                         * if short on rx space, rx wins
2601                         * and must trump tx adjustment
2602			 */
2603                        if (pba < min_rx)
2604                                pba = min_rx;
2605		}
2606		E1000_WRITE_REG(hw, E1000_PBA, pba);
2607	}
2608
2609	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2610
2611	/*
2612	 * These parameters control the automatic generation (Tx) and
2613	 * response (Rx) to Ethernet PAUSE frames.
2614	 * - High water mark should allow for at least two frames to be
2615	 *   received after sending an XOFF.
2616	 * - Low water mark works best when it is very near the high water mark.
2617	 *   This allows the receiver to restart by sending XON when it has
2618	 *   drained a bit.
2619	 */
2620	hwm = min(((pba << 10) * 9 / 10),
2621	    ((pba << 10) - 2 * adapter->max_frame_size));
2622
2623	if (hw->mac.type < e1000_82576) {
2624		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2625		fc->low_water = fc->high_water - 8;
2626	} else {
2627		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2628		fc->low_water = fc->high_water - 16;
2629	}
2630
2631	fc->pause_time = IGB_FC_PAUSE_TIME;
2632	fc->send_xon = TRUE;
2633
2634	/* Set Flow control, use the tunable location if sane */
2635	if ((igb_fc_setting >= 0) && (igb_fc_setting < 4))
2636		fc->requested_mode = igb_fc_setting;
2637	else
2638		fc->requested_mode = e1000_fc_none;
2639
2640	fc->current_mode = fc->requested_mode;
2641
2642	/* Issue a global reset */
2643	e1000_reset_hw(hw);
2644	E1000_WRITE_REG(hw, E1000_WUC, 0);
2645
2646	if (e1000_init_hw(hw) < 0)
2647		device_printf(dev, "Hardware Initialization Failed\n");
2648
2649	if (hw->mac.type == e1000_82580) {
2650		u32 reg;
2651
2652		hwm = (pba << 10) - (2 * adapter->max_frame_size);
2653		/*
2654		 * 0x80000000 - enable DMA COAL
2655		 * 0x10000000 - use L0s as low power
2656		 * 0x20000000 - use L1 as low power
2657		 * X << 16 - exit dma coal when rx data exceeds X kB
2658		 * Y - upper limit to stay in dma coal in units of 32usecs
2659		 */
2660		E1000_WRITE_REG(hw, E1000_DMACR,
2661		    0xA0000006 | ((hwm << 6) & 0x00FF0000));
2662
2663		/* set hwm to PBA -  2 * max frame size */
2664		E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2665		/*
2666		 * This sets the time to wait before requesting transition to
2667		 * low power state to number of usecs needed to receive 1 512
2668		 * byte frame at gigabit line rate
2669		 */
2670		E1000_WRITE_REG(hw, E1000_DMCTLX, 4);
2671
2672		/* free space in tx packet buffer to wake from DMA coal */
2673		E1000_WRITE_REG(hw, E1000_DMCTXTH,
2674		    (20480 - (2 * adapter->max_frame_size)) >> 6);
2675
2676		/* make low power state decision controlled by DMA coal */
2677		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2678		E1000_WRITE_REG(hw, E1000_PCIEMISC,
2679		    reg | E1000_PCIEMISC_LX_DECISION);
2680	}
2681
2682	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2683	e1000_get_phy_info(hw);
2684	e1000_check_for_link(hw);
2685	return;
2686}
2687
2688/*********************************************************************
2689 *
2690 *  Setup networking device structure and register an interface.
2691 *
2692 **********************************************************************/
2693static int
2694igb_setup_interface(device_t dev, struct adapter *adapter)
2695{
2696	struct ifnet   *ifp;
2697
2698	INIT_DEBUGOUT("igb_setup_interface: begin");
2699
2700	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2701	if (ifp == NULL) {
2702		device_printf(dev, "can not allocate ifnet structure\n");
2703		return (-1);
2704	}
2705	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2706	ifp->if_mtu = ETHERMTU;
2707	ifp->if_init =  igb_init;
2708	ifp->if_softc = adapter;
2709	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2710	ifp->if_ioctl = igb_ioctl;
2711	ifp->if_start = igb_start;
2712#if __FreeBSD_version >= 800000
2713	ifp->if_transmit = igb_mq_start;
2714	ifp->if_qflush = igb_qflush;
2715#endif
2716	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2717	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2718	IFQ_SET_READY(&ifp->if_snd);
2719
2720	ether_ifattach(ifp, adapter->hw.mac.addr);
2721
2722	ifp->if_capabilities = ifp->if_capenable = 0;
2723
2724	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2725	ifp->if_capabilities |= IFCAP_TSO4;
2726	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2727	ifp->if_capenable = ifp->if_capabilities;
2728
2729	/* Don't enable LRO by default */
2730	ifp->if_capabilities |= IFCAP_LRO;
2731
2732#ifdef DEVICE_POLLING
2733	ifp->if_capabilities |= IFCAP_POLLING;
2734#endif
2735
2736	/*
2737	 * Tell the upper layer(s) we
2738	 * support full VLAN capability.
2739	 */
2740	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2741	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2742	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2743
2744	/*
2745	** Dont turn this on by default, if vlans are
2746	** created on another pseudo device (eg. lagg)
2747	** then vlan events are not passed thru, breaking
2748	** operation, but with HW FILTER off it works. If
2749	** using vlans directly on the em driver you can
2750	** enable this and get full hardware tag filtering.
2751	*/
2752	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2753
2754	/*
2755	 * Specify the media types supported by this adapter and register
2756	 * callbacks to update media and link information
2757	 */
2758	ifmedia_init(&adapter->media, IFM_IMASK,
2759	    igb_media_change, igb_media_status);
2760	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2761	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2762		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2763			    0, NULL);
2764		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2765	} else {
2766		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2767		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2768			    0, NULL);
2769		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2770			    0, NULL);
2771		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2772			    0, NULL);
2773		if (adapter->hw.phy.type != e1000_phy_ife) {
2774			ifmedia_add(&adapter->media,
2775				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2776			ifmedia_add(&adapter->media,
2777				IFM_ETHER | IFM_1000_T, 0, NULL);
2778		}
2779	}
2780	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2781	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2782	return (0);
2783}
2784
2785
2786/*
2787 * Manage DMA'able memory.
2788 */
2789static void
2790igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2791{
2792	if (error)
2793		return;
2794	*(bus_addr_t *) arg = segs[0].ds_addr;
2795}
2796
2797static int
2798igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2799        struct igb_dma_alloc *dma, int mapflags)
2800{
2801	int error;
2802
2803	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2804				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
2805				BUS_SPACE_MAXADDR,	/* lowaddr */
2806				BUS_SPACE_MAXADDR,	/* highaddr */
2807				NULL, NULL,		/* filter, filterarg */
2808				size,			/* maxsize */
2809				1,			/* nsegments */
2810				size,			/* maxsegsize */
2811				0,			/* flags */
2812				NULL,			/* lockfunc */
2813				NULL,			/* lockarg */
2814				&dma->dma_tag);
2815	if (error) {
2816		device_printf(adapter->dev,
2817		    "%s: bus_dma_tag_create failed: %d\n",
2818		    __func__, error);
2819		goto fail_0;
2820	}
2821
2822	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2823	    BUS_DMA_NOWAIT, &dma->dma_map);
2824	if (error) {
2825		device_printf(adapter->dev,
2826		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2827		    __func__, (uintmax_t)size, error);
2828		goto fail_2;
2829	}
2830
2831	dma->dma_paddr = 0;
2832	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2833	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2834	if (error || dma->dma_paddr == 0) {
2835		device_printf(adapter->dev,
2836		    "%s: bus_dmamap_load failed: %d\n",
2837		    __func__, error);
2838		goto fail_3;
2839	}
2840
2841	return (0);
2842
2843fail_3:
2844	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2845fail_2:
2846	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2847	bus_dma_tag_destroy(dma->dma_tag);
2848fail_0:
2849	dma->dma_map = NULL;
2850	dma->dma_tag = NULL;
2851
2852	return (error);
2853}
2854
2855static void
2856igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2857{
2858	if (dma->dma_tag == NULL)
2859		return;
2860	if (dma->dma_map != NULL) {
2861		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2862		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2863		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2864		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2865		dma->dma_map = NULL;
2866	}
2867	bus_dma_tag_destroy(dma->dma_tag);
2868	dma->dma_tag = NULL;
2869}
2870
2871
2872/*********************************************************************
2873 *
2874 *  Allocate memory for the transmit and receive rings, and then
2875 *  the descriptors associated with each, called only once at attach.
2876 *
2877 **********************************************************************/
2878static int
2879igb_allocate_queues(struct adapter *adapter)
2880{
2881	device_t dev = adapter->dev;
2882	struct igb_queue	*que = NULL;
2883	struct tx_ring		*txr = NULL;
2884	struct rx_ring		*rxr = NULL;
2885	int rsize, tsize, error = E1000_SUCCESS;
2886	int txconf = 0, rxconf = 0;
2887
2888	/* First allocate the top level queue structs */
2889	if (!(adapter->queues =
2890	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
2891	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2892		device_printf(dev, "Unable to allocate queue memory\n");
2893		error = ENOMEM;
2894		goto fail;
2895	}
2896
2897	/* Next allocate the TX ring struct memory */
2898	if (!(adapter->tx_rings =
2899	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2900	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2901		device_printf(dev, "Unable to allocate TX ring memory\n");
2902		error = ENOMEM;
2903		goto tx_fail;
2904	}
2905
2906	/* Now allocate the RX */
2907	if (!(adapter->rx_rings =
2908	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2909	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2910		device_printf(dev, "Unable to allocate RX ring memory\n");
2911		error = ENOMEM;
2912		goto rx_fail;
2913	}
2914
2915	tsize = roundup2(adapter->num_tx_desc *
2916	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2917	/*
2918	 * Now set up the TX queues, txconf is needed to handle the
2919	 * possibility that things fail midcourse and we need to
2920	 * undo memory gracefully
2921	 */
2922	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2923		/* Set up some basics */
2924		txr = &adapter->tx_rings[i];
2925		txr->adapter = adapter;
2926		txr->me = i;
2927
2928		/* Initialize the TX lock */
2929		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2930		    device_get_nameunit(dev), txr->me);
2931		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2932
2933		if (igb_dma_malloc(adapter, tsize,
2934			&txr->txdma, BUS_DMA_NOWAIT)) {
2935			device_printf(dev,
2936			    "Unable to allocate TX Descriptor memory\n");
2937			error = ENOMEM;
2938			goto err_tx_desc;
2939		}
2940		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2941		bzero((void *)txr->tx_base, tsize);
2942
2943        	/* Now allocate transmit buffers for the ring */
2944        	if (igb_allocate_transmit_buffers(txr)) {
2945			device_printf(dev,
2946			    "Critical Failure setting up transmit buffers\n");
2947			error = ENOMEM;
2948			goto err_tx_desc;
2949        	}
2950#if __FreeBSD_version >= 800000
2951		/* Allocate a buf ring */
2952		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2953		    M_WAITOK, &txr->tx_mtx);
2954#endif
2955	}
2956
2957	/*
2958	 * Next the RX queues...
2959	 */
2960	rsize = roundup2(adapter->num_rx_desc *
2961	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2962	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2963		rxr = &adapter->rx_rings[i];
2964		rxr->adapter = adapter;
2965		rxr->me = i;
2966
2967		/* Initialize the RX lock */
2968		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2969		    device_get_nameunit(dev), txr->me);
2970		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2971
2972		if (igb_dma_malloc(adapter, rsize,
2973			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2974			device_printf(dev,
2975			    "Unable to allocate RxDescriptor memory\n");
2976			error = ENOMEM;
2977			goto err_rx_desc;
2978		}
2979		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2980		bzero((void *)rxr->rx_base, rsize);
2981
2982        	/* Allocate receive buffers for the ring*/
2983		if (igb_allocate_receive_buffers(rxr)) {
2984			device_printf(dev,
2985			    "Critical Failure setting up receive buffers\n");
2986			error = ENOMEM;
2987			goto err_rx_desc;
2988		}
2989	}
2990
2991	/*
2992	** Finally set up the queue holding structs
2993	*/
2994	for (int i = 0; i < adapter->num_queues; i++) {
2995		que = &adapter->queues[i];
2996		que->adapter = adapter;
2997		que->txr = &adapter->tx_rings[i];
2998		que->rxr = &adapter->rx_rings[i];
2999	}
3000
3001	return (0);
3002
3003err_rx_desc:
3004	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3005		igb_dma_free(adapter, &rxr->rxdma);
3006err_tx_desc:
3007	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3008		igb_dma_free(adapter, &txr->txdma);
3009	free(adapter->rx_rings, M_DEVBUF);
3010rx_fail:
3011#if __FreeBSD_version >= 800000
3012	buf_ring_free(txr->br, M_DEVBUF);
3013#endif
3014	free(adapter->tx_rings, M_DEVBUF);
3015tx_fail:
3016	free(adapter->queues, M_DEVBUF);
3017fail:
3018	return (error);
3019}
3020
3021/*********************************************************************
3022 *
3023 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3024 *  the information needed to transmit a packet on the wire. This is
3025 *  called only once at attach, setup is done every reset.
3026 *
3027 **********************************************************************/
3028static int
3029igb_allocate_transmit_buffers(struct tx_ring *txr)
3030{
3031	struct adapter *adapter = txr->adapter;
3032	device_t dev = adapter->dev;
3033	struct igb_tx_buffer *txbuf;
3034	int error, i;
3035
3036	/*
3037	 * Setup DMA descriptor areas.
3038	 */
3039	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3040			       1, 0,			/* alignment, bounds */
3041			       BUS_SPACE_MAXADDR,	/* lowaddr */
3042			       BUS_SPACE_MAXADDR,	/* highaddr */
3043			       NULL, NULL,		/* filter, filterarg */
3044			       IGB_TSO_SIZE,		/* maxsize */
3045			       IGB_MAX_SCATTER,		/* nsegments */
3046			       PAGE_SIZE,		/* maxsegsize */
3047			       0,			/* flags */
3048			       NULL,			/* lockfunc */
3049			       NULL,			/* lockfuncarg */
3050			       &txr->txtag))) {
3051		device_printf(dev,"Unable to allocate TX DMA tag\n");
3052		goto fail;
3053	}
3054
3055	if (!(txr->tx_buffers =
3056	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3057	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3058		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3059		error = ENOMEM;
3060		goto fail;
3061	}
3062
3063        /* Create the descriptor buffer dma maps */
3064	txbuf = txr->tx_buffers;
3065	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3066		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3067		if (error != 0) {
3068			device_printf(dev, "Unable to create TX DMA map\n");
3069			goto fail;
3070		}
3071	}
3072
3073	return 0;
3074fail:
3075	/* We free all, it handles case where we are in the middle */
3076	igb_free_transmit_structures(adapter);
3077	return (error);
3078}
3079
3080/*********************************************************************
3081 *
3082 *  Initialize a transmit ring.
3083 *
3084 **********************************************************************/
3085static void
3086igb_setup_transmit_ring(struct tx_ring *txr)
3087{
3088	struct adapter *adapter = txr->adapter;
3089	struct igb_tx_buffer *txbuf;
3090	int i;
3091
3092	/* Clear the old descriptor contents */
3093	IGB_TX_LOCK(txr);
3094	bzero((void *)txr->tx_base,
3095	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3096	/* Reset indices */
3097	txr->next_avail_desc = 0;
3098	txr->next_to_clean = 0;
3099
3100	/* Free any existing tx buffers. */
3101        txbuf = txr->tx_buffers;
3102	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3103		if (txbuf->m_head != NULL) {
3104			bus_dmamap_sync(txr->txtag, txbuf->map,
3105			    BUS_DMASYNC_POSTWRITE);
3106			bus_dmamap_unload(txr->txtag, txbuf->map);
3107			m_freem(txbuf->m_head);
3108			txbuf->m_head = NULL;
3109		}
3110		/* clear the watch index */
3111		txbuf->next_eop = -1;
3112        }
3113
3114	/* Set number of descriptors available */
3115	txr->tx_avail = adapter->num_tx_desc;
3116
3117	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3118	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3119	IGB_TX_UNLOCK(txr);
3120}
3121
3122/*********************************************************************
3123 *
3124 *  Initialize all transmit rings.
3125 *
3126 **********************************************************************/
3127static void
3128igb_setup_transmit_structures(struct adapter *adapter)
3129{
3130	struct tx_ring *txr = adapter->tx_rings;
3131
3132	for (int i = 0; i < adapter->num_queues; i++, txr++)
3133		igb_setup_transmit_ring(txr);
3134
3135	return;
3136}
3137
3138/*********************************************************************
3139 *
3140 *  Enable transmit unit.
3141 *
3142 **********************************************************************/
3143static void
3144igb_initialize_transmit_units(struct adapter *adapter)
3145{
3146	struct tx_ring	*txr = adapter->tx_rings;
3147	struct e1000_hw *hw = &adapter->hw;
3148	u32		tctl, txdctl;
3149
3150	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3151	tctl = txdctl = 0;
3152
3153	/* Setup the Tx Descriptor Rings */
3154	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3155		u64 bus_addr = txr->txdma.dma_paddr;
3156
3157		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3158		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3159		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3160		    (uint32_t)(bus_addr >> 32));
3161		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3162		    (uint32_t)bus_addr);
3163
3164		/* Setup the HW Tx Head and Tail descriptor pointers */
3165		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3166		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3167
3168		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3169		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3170		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3171
3172		txr->queue_status = IGB_QUEUE_IDLE;
3173
3174		txdctl |= IGB_TX_PTHRESH;
3175		txdctl |= IGB_TX_HTHRESH << 8;
3176		txdctl |= IGB_TX_WTHRESH << 16;
3177		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3178		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3179	}
3180
3181	if (adapter->hw.mac.type == e1000_vfadapt)
3182		return;
3183
3184	e1000_config_collision_dist(hw);
3185
3186	/* Program the Transmit Control Register */
3187	tctl = E1000_READ_REG(hw, E1000_TCTL);
3188	tctl &= ~E1000_TCTL_CT;
3189	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3190		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3191
3192	/* This write will effectively turn on the transmit unit. */
3193	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3194}
3195
3196/*********************************************************************
3197 *
3198 *  Free all transmit rings.
3199 *
3200 **********************************************************************/
3201static void
3202igb_free_transmit_structures(struct adapter *adapter)
3203{
3204	struct tx_ring *txr = adapter->tx_rings;
3205
3206	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3207		IGB_TX_LOCK(txr);
3208		igb_free_transmit_buffers(txr);
3209		igb_dma_free(adapter, &txr->txdma);
3210		IGB_TX_UNLOCK(txr);
3211		IGB_TX_LOCK_DESTROY(txr);
3212	}
3213	free(adapter->tx_rings, M_DEVBUF);
3214}
3215
3216/*********************************************************************
3217 *
3218 *  Free transmit ring related data structures.
3219 *
3220 **********************************************************************/
3221static void
3222igb_free_transmit_buffers(struct tx_ring *txr)
3223{
3224	struct adapter *adapter = txr->adapter;
3225	struct igb_tx_buffer *tx_buffer;
3226	int             i;
3227
3228	INIT_DEBUGOUT("free_transmit_ring: begin");
3229
3230	if (txr->tx_buffers == NULL)
3231		return;
3232
3233	tx_buffer = txr->tx_buffers;
3234	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3235		if (tx_buffer->m_head != NULL) {
3236			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3237			    BUS_DMASYNC_POSTWRITE);
3238			bus_dmamap_unload(txr->txtag,
3239			    tx_buffer->map);
3240			m_freem(tx_buffer->m_head);
3241			tx_buffer->m_head = NULL;
3242			if (tx_buffer->map != NULL) {
3243				bus_dmamap_destroy(txr->txtag,
3244				    tx_buffer->map);
3245				tx_buffer->map = NULL;
3246			}
3247		} else if (tx_buffer->map != NULL) {
3248			bus_dmamap_unload(txr->txtag,
3249			    tx_buffer->map);
3250			bus_dmamap_destroy(txr->txtag,
3251			    tx_buffer->map);
3252			tx_buffer->map = NULL;
3253		}
3254	}
3255#if __FreeBSD_version >= 800000
3256	if (txr->br != NULL)
3257		buf_ring_free(txr->br, M_DEVBUF);
3258#endif
3259	if (txr->tx_buffers != NULL) {
3260		free(txr->tx_buffers, M_DEVBUF);
3261		txr->tx_buffers = NULL;
3262	}
3263	if (txr->txtag != NULL) {
3264		bus_dma_tag_destroy(txr->txtag);
3265		txr->txtag = NULL;
3266	}
3267	return;
3268}
3269
3270/**********************************************************************
3271 *
3272 *  Setup work for hardware segmentation offload (TSO)
3273 *
3274 **********************************************************************/
3275static boolean_t
3276igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3277{
3278	struct adapter *adapter = txr->adapter;
3279	struct e1000_adv_tx_context_desc *TXD;
3280	struct igb_tx_buffer        *tx_buffer;
3281	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3282	u32 mss_l4len_idx = 0;
3283	u16 vtag = 0;
3284	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3285	struct ether_vlan_header *eh;
3286	struct ip *ip;
3287	struct tcphdr *th;
3288
3289
3290	/*
3291	 * Determine where frame payload starts.
3292	 * Jump over vlan headers if already present
3293	 */
3294	eh = mtod(mp, struct ether_vlan_header *);
3295	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3296		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3297	else
3298		ehdrlen = ETHER_HDR_LEN;
3299
3300	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3301	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3302		return FALSE;
3303
3304	/* Only supports IPV4 for now */
3305	ctxd = txr->next_avail_desc;
3306	tx_buffer = &txr->tx_buffers[ctxd];
3307	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3308
3309	ip = (struct ip *)(mp->m_data + ehdrlen);
3310	if (ip->ip_p != IPPROTO_TCP)
3311                return FALSE;   /* 0 */
3312	ip->ip_sum = 0;
3313	ip_hlen = ip->ip_hl << 2;
3314	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3315	th->th_sum = in_pseudo(ip->ip_src.s_addr,
3316	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3317	tcp_hlen = th->th_off << 2;
3318	/*
3319	 * Calculate header length, this is used
3320	 * in the transmit desc in igb_xmit
3321	 */
3322	*hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3323
3324	/* VLAN MACLEN IPLEN */
3325	if (mp->m_flags & M_VLANTAG) {
3326		vtag = htole16(mp->m_pkthdr.ether_vtag);
3327		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3328	}
3329
3330	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3331	vlan_macip_lens |= ip_hlen;
3332	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3333
3334	/* ADV DTYPE TUCMD */
3335	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3336	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3337	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3338	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3339
3340	/* MSS L4LEN IDX */
3341	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3342	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3343	/* 82575 needs the queue index added */
3344	if (adapter->hw.mac.type == e1000_82575)
3345		mss_l4len_idx |= txr->me << 4;
3346	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3347
3348	TXD->seqnum_seed = htole32(0);
3349	tx_buffer->m_head = NULL;
3350	tx_buffer->next_eop = -1;
3351
3352	if (++ctxd == adapter->num_tx_desc)
3353		ctxd = 0;
3354
3355	txr->tx_avail--;
3356	txr->next_avail_desc = ctxd;
3357	return TRUE;
3358}
3359
3360
3361/*********************************************************************
3362 *
3363 *  Context Descriptor setup for VLAN or CSUM
3364 *
3365 **********************************************************************/
3366
3367static bool
3368igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3369{
3370	struct adapter *adapter = txr->adapter;
3371	struct e1000_adv_tx_context_desc *TXD;
3372	struct igb_tx_buffer        *tx_buffer;
3373	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3374	struct ether_vlan_header *eh;
3375	struct ip *ip = NULL;
3376	struct ip6_hdr *ip6;
3377	int  ehdrlen, ctxd, ip_hlen = 0;
3378	u16	etype, vtag = 0;
3379	u8	ipproto = 0;
3380	bool	offload = TRUE;
3381
3382	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3383		offload = FALSE;
3384
3385	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3386	ctxd = txr->next_avail_desc;
3387	tx_buffer = &txr->tx_buffers[ctxd];
3388	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3389
3390	/*
3391	** In advanced descriptors the vlan tag must
3392	** be placed into the context descriptor, thus
3393	** we need to be here just for that setup.
3394	*/
3395	if (mp->m_flags & M_VLANTAG) {
3396		vtag = htole16(mp->m_pkthdr.ether_vtag);
3397		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3398	} else if (offload == FALSE)
3399		return FALSE;
3400
3401	/*
3402	 * Determine where frame payload starts.
3403	 * Jump over vlan headers if already present,
3404	 * helpful for QinQ too.
3405	 */
3406	eh = mtod(mp, struct ether_vlan_header *);
3407	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3408		etype = ntohs(eh->evl_proto);
3409		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3410	} else {
3411		etype = ntohs(eh->evl_encap_proto);
3412		ehdrlen = ETHER_HDR_LEN;
3413	}
3414
3415	/* Set the ether header length */
3416	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3417
3418	switch (etype) {
3419		case ETHERTYPE_IP:
3420			ip = (struct ip *)(mp->m_data + ehdrlen);
3421			ip_hlen = ip->ip_hl << 2;
3422			if (mp->m_len < ehdrlen + ip_hlen) {
3423				offload = FALSE;
3424				break;
3425			}
3426			ipproto = ip->ip_p;
3427			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3428			break;
3429		case ETHERTYPE_IPV6:
3430			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3431			ip_hlen = sizeof(struct ip6_hdr);
3432			ipproto = ip6->ip6_nxt;
3433			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3434			break;
3435		default:
3436			offload = FALSE;
3437			break;
3438	}
3439
3440	vlan_macip_lens |= ip_hlen;
3441	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3442
3443	switch (ipproto) {
3444		case IPPROTO_TCP:
3445			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3446				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3447			break;
3448		case IPPROTO_UDP:
3449			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3450				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3451			break;
3452#if __FreeBSD_version >= 800000
3453		case IPPROTO_SCTP:
3454			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3455				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3456			break;
3457#endif
3458		default:
3459			offload = FALSE;
3460			break;
3461	}
3462
3463	/* 82575 needs the queue index added */
3464	if (adapter->hw.mac.type == e1000_82575)
3465		mss_l4len_idx = txr->me << 4;
3466
3467	/* Now copy bits into descriptor */
3468	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3469	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3470	TXD->seqnum_seed = htole32(0);
3471	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3472
3473	tx_buffer->m_head = NULL;
3474	tx_buffer->next_eop = -1;
3475
3476	/* We've consumed the first desc, adjust counters */
3477	if (++ctxd == adapter->num_tx_desc)
3478		ctxd = 0;
3479	txr->next_avail_desc = ctxd;
3480	--txr->tx_avail;
3481
3482        return (offload);
3483}
3484
3485
3486/**********************************************************************
3487 *
3488 *  Examine each tx_buffer in the used queue. If the hardware is done
3489 *  processing the packet then free associated resources. The
3490 *  tx_buffer is put back on the free queue.
3491 *
3492 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3493 **********************************************************************/
3494static bool
3495igb_txeof(struct tx_ring *txr)
3496{
3497	struct adapter	*adapter = txr->adapter;
3498        int first, last, done, processed;
3499        struct igb_tx_buffer *tx_buffer;
3500        struct e1000_tx_desc   *tx_desc, *eop_desc;
3501	struct ifnet   *ifp = adapter->ifp;
3502
3503	IGB_TX_LOCK_ASSERT(txr);
3504
3505        if (txr->tx_avail == adapter->num_tx_desc) {
3506		txr->queue_status = IGB_QUEUE_IDLE;
3507                return FALSE;
3508	}
3509
3510	processed = 0;
3511        first = txr->next_to_clean;
3512        tx_desc = &txr->tx_base[first];
3513        tx_buffer = &txr->tx_buffers[first];
3514	last = tx_buffer->next_eop;
3515        eop_desc = &txr->tx_base[last];
3516
3517	/*
3518	 * What this does is get the index of the
3519	 * first descriptor AFTER the EOP of the
3520	 * first packet, that way we can do the
3521	 * simple comparison on the inner while loop.
3522	 */
3523	if (++last == adapter->num_tx_desc)
3524 		last = 0;
3525	done = last;
3526
3527        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3528            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3529
3530        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3531		/* We clean the range of the packet */
3532		while (first != done) {
3533                	tx_desc->upper.data = 0;
3534                	tx_desc->lower.data = 0;
3535                	tx_desc->buffer_addr = 0;
3536                	++txr->tx_avail;
3537			++processed;
3538
3539			if (tx_buffer->m_head) {
3540				txr->bytes +=
3541				    tx_buffer->m_head->m_pkthdr.len;
3542				bus_dmamap_sync(txr->txtag,
3543				    tx_buffer->map,
3544				    BUS_DMASYNC_POSTWRITE);
3545				bus_dmamap_unload(txr->txtag,
3546				    tx_buffer->map);
3547
3548                        	m_freem(tx_buffer->m_head);
3549                        	tx_buffer->m_head = NULL;
3550                	}
3551			tx_buffer->next_eop = -1;
3552			txr->watchdog_time = ticks;
3553
3554	                if (++first == adapter->num_tx_desc)
3555				first = 0;
3556
3557	                tx_buffer = &txr->tx_buffers[first];
3558			tx_desc = &txr->tx_base[first];
3559		}
3560		++txr->packets;
3561		++ifp->if_opackets;
3562		/* See if we can continue to the next packet */
3563		last = tx_buffer->next_eop;
3564		if (last != -1) {
3565        		eop_desc = &txr->tx_base[last];
3566			/* Get new done point */
3567			if (++last == adapter->num_tx_desc) last = 0;
3568			done = last;
3569		} else
3570			break;
3571        }
3572        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3573            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3574
3575        txr->next_to_clean = first;
3576
3577	/*
3578	** Watchdog calculation, we know there's
3579	** work outstanding or the first return
3580	** would have been taken, so none processed
3581	** for too long indicates a hang.
3582	*/
3583	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3584		txr->queue_status = IGB_QUEUE_HUNG;
3585
3586        /*
3587         * If we have enough room, clear IFF_DRV_OACTIVE
3588         * to tell the stack that it is OK to send packets.
3589         */
3590        if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {
3591                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3592		/* All clean, turn off the watchdog */
3593                if (txr->tx_avail == adapter->num_tx_desc) {
3594			txr->queue_status = IGB_QUEUE_IDLE;
3595			return (FALSE);
3596		}
3597        }
3598
3599	return (TRUE);
3600}
3601
3602
3603/*********************************************************************
3604 *
3605 *  Refresh mbuf buffers for RX descriptor rings
3606 *   - now keeps its own state so discards due to resource
3607 *     exhaustion are unnecessary, if an mbuf cannot be obtained
3608 *     it just returns, keeping its placeholder, thus it can simply
3609 *     be recalled to try again.
3610 *
3611 **********************************************************************/
3612static void
3613igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3614{
3615	struct adapter		*adapter = rxr->adapter;
3616	bus_dma_segment_t	hseg[1];
3617	bus_dma_segment_t	pseg[1];
3618	struct igb_rx_buf	*rxbuf;
3619	struct mbuf		*mh, *mp;
3620	int			i, nsegs, error, cleaned;
3621
3622	i = rxr->next_to_refresh;
3623	cleaned = -1; /* Signify no completions */
3624	while (i != limit) {
3625		rxbuf = &rxr->rx_buffers[i];
3626		/* No hdr mbuf used with header split off */
3627		if (rxr->hdr_split == FALSE)
3628			goto no_split;
3629		if (rxbuf->m_head == NULL) {
3630			mh = m_gethdr(M_DONTWAIT, MT_DATA);
3631			if (mh == NULL)
3632				goto update;
3633		} else
3634			mh = rxbuf->m_head;
3635
3636		mh->m_pkthdr.len = mh->m_len = MHLEN;
3637		mh->m_len = MHLEN;
3638		mh->m_flags |= M_PKTHDR;
3639		/* Get the memory mapping */
3640		error = bus_dmamap_load_mbuf_sg(rxr->htag,
3641		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
3642		if (error != 0) {
3643			printf("Refresh mbufs: hdr dmamap load"
3644			    " failure - %d\n", error);
3645			m_free(mh);
3646			rxbuf->m_head = NULL;
3647			goto update;
3648		}
3649		rxbuf->m_head = mh;
3650		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3651		    BUS_DMASYNC_PREREAD);
3652		rxr->rx_base[i].read.hdr_addr =
3653		    htole64(hseg[0].ds_addr);
3654no_split:
3655		if (rxbuf->m_pack == NULL) {
3656			mp = m_getjcl(M_DONTWAIT, MT_DATA,
3657			    M_PKTHDR, adapter->rx_mbuf_sz);
3658			if (mp == NULL)
3659				goto update;
3660		} else
3661			mp = rxbuf->m_pack;
3662
3663		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3664		/* Get the memory mapping */
3665		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3666		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
3667		if (error != 0) {
3668			printf("Refresh mbufs: payload dmamap load"
3669			    " failure - %d\n", error);
3670			m_free(mp);
3671			rxbuf->m_pack = NULL;
3672			goto update;
3673		}
3674		rxbuf->m_pack = mp;
3675		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3676		    BUS_DMASYNC_PREREAD);
3677		rxr->rx_base[i].read.pkt_addr =
3678		    htole64(pseg[0].ds_addr);
3679
3680		cleaned = i;
3681		/* Calculate next index */
3682		if (++i == adapter->num_rx_desc)
3683			i = 0;
3684		/* This is the work marker for refresh */
3685		rxr->next_to_refresh = i;
3686	}
3687update:
3688	if (cleaned != -1) /* If we refreshed some, bump tail */
3689		E1000_WRITE_REG(&adapter->hw,
3690		    E1000_RDT(rxr->me), cleaned);
3691	return;
3692}
3693
3694
3695/*********************************************************************
3696 *
3697 *  Allocate memory for rx_buffer structures. Since we use one
3698 *  rx_buffer per received packet, the maximum number of rx_buffer's
3699 *  that we'll need is equal to the number of receive descriptors
3700 *  that we've allocated.
3701 *
3702 **********************************************************************/
3703static int
3704igb_allocate_receive_buffers(struct rx_ring *rxr)
3705{
3706	struct	adapter 	*adapter = rxr->adapter;
3707	device_t 		dev = adapter->dev;
3708	struct igb_rx_buf	*rxbuf;
3709	int             	i, bsize, error;
3710
3711	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3712	if (!(rxr->rx_buffers =
3713	    (struct igb_rx_buf *) malloc(bsize,
3714	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3715		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3716		error = ENOMEM;
3717		goto fail;
3718	}
3719
3720	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3721				   1, 0,		/* alignment, bounds */
3722				   BUS_SPACE_MAXADDR,	/* lowaddr */
3723				   BUS_SPACE_MAXADDR,	/* highaddr */
3724				   NULL, NULL,		/* filter, filterarg */
3725				   MSIZE,		/* maxsize */
3726				   1,			/* nsegments */
3727				   MSIZE,		/* maxsegsize */
3728				   0,			/* flags */
3729				   NULL,		/* lockfunc */
3730				   NULL,		/* lockfuncarg */
3731				   &rxr->htag))) {
3732		device_printf(dev, "Unable to create RX DMA tag\n");
3733		goto fail;
3734	}
3735
3736	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3737				   1, 0,		/* alignment, bounds */
3738				   BUS_SPACE_MAXADDR,	/* lowaddr */
3739				   BUS_SPACE_MAXADDR,	/* highaddr */
3740				   NULL, NULL,		/* filter, filterarg */
3741				   MJUM9BYTES,		/* maxsize */
3742				   1,			/* nsegments */
3743				   MJUM9BYTES,		/* maxsegsize */
3744				   0,			/* flags */
3745				   NULL,		/* lockfunc */
3746				   NULL,		/* lockfuncarg */
3747				   &rxr->ptag))) {
3748		device_printf(dev, "Unable to create RX payload DMA tag\n");
3749		goto fail;
3750	}
3751
3752	for (i = 0; i < adapter->num_rx_desc; i++) {
3753		rxbuf = &rxr->rx_buffers[i];
3754		error = bus_dmamap_create(rxr->htag,
3755		    BUS_DMA_NOWAIT, &rxbuf->hmap);
3756		if (error) {
3757			device_printf(dev,
3758			    "Unable to create RX head DMA maps\n");
3759			goto fail;
3760		}
3761		error = bus_dmamap_create(rxr->ptag,
3762		    BUS_DMA_NOWAIT, &rxbuf->pmap);
3763		if (error) {
3764			device_printf(dev,
3765			    "Unable to create RX packet DMA maps\n");
3766			goto fail;
3767		}
3768	}
3769
3770	return (0);
3771
3772fail:
3773	/* Frees all, but can handle partial completion */
3774	igb_free_receive_structures(adapter);
3775	return (error);
3776}
3777
3778
3779static void
3780igb_free_receive_ring(struct rx_ring *rxr)
3781{
3782	struct	adapter		*adapter;
3783	struct igb_rx_buf	*rxbuf;
3784	int i;
3785
3786	adapter = rxr->adapter;
3787	for (i = 0; i < adapter->num_rx_desc; i++) {
3788		rxbuf = &rxr->rx_buffers[i];
3789		if (rxbuf->m_head != NULL) {
3790			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3791			    BUS_DMASYNC_POSTREAD);
3792			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3793			rxbuf->m_head->m_flags |= M_PKTHDR;
3794			m_freem(rxbuf->m_head);
3795		}
3796		if (rxbuf->m_pack != NULL) {
3797			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3798			    BUS_DMASYNC_POSTREAD);
3799			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3800			rxbuf->m_pack->m_flags |= M_PKTHDR;
3801			m_freem(rxbuf->m_pack);
3802		}
3803		rxbuf->m_head = NULL;
3804		rxbuf->m_pack = NULL;
3805	}
3806}
3807
3808
3809/*********************************************************************
3810 *
3811 *  Initialize a receive ring and its buffers.
3812 *
3813 **********************************************************************/
3814static int
3815igb_setup_receive_ring(struct rx_ring *rxr)
3816{
3817	struct	adapter		*adapter;
3818	struct  ifnet		*ifp;
3819	device_t		dev;
3820	struct igb_rx_buf	*rxbuf;
3821	bus_dma_segment_t	pseg[1], hseg[1];
3822	struct lro_ctrl		*lro = &rxr->lro;
3823	int			rsize, nsegs, error = 0;
3824
3825	adapter = rxr->adapter;
3826	dev = adapter->dev;
3827	ifp = adapter->ifp;
3828
3829	/* Clear the ring contents */
3830	IGB_RX_LOCK(rxr);
3831	rsize = roundup2(adapter->num_rx_desc *
3832	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3833	bzero((void *)rxr->rx_base, rsize);
3834
3835	/*
3836	** Free current RX buffer structures and their mbufs
3837	*/
3838	igb_free_receive_ring(rxr);
3839
3840	/* Configure for header split? */
3841	if (igb_header_split)
3842		rxr->hdr_split = TRUE;
3843
3844        /* Now replenish the ring mbufs */
3845	for (int j = 0; j < adapter->num_rx_desc; ++j) {
3846		struct mbuf	*mh, *mp;
3847
3848		rxbuf = &rxr->rx_buffers[j];
3849		if (rxr->hdr_split == FALSE)
3850			goto skip_head;
3851
3852		/* First the header */
3853		rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
3854		if (rxbuf->m_head == NULL) {
3855			error = ENOBUFS;
3856                        goto fail;
3857		}
3858		m_adj(rxbuf->m_head, ETHER_ALIGN);
3859		mh = rxbuf->m_head;
3860		mh->m_len = mh->m_pkthdr.len = MHLEN;
3861		mh->m_flags |= M_PKTHDR;
3862		/* Get the memory mapping */
3863		error = bus_dmamap_load_mbuf_sg(rxr->htag,
3864		    rxbuf->hmap, rxbuf->m_head, hseg,
3865		    &nsegs, BUS_DMA_NOWAIT);
3866		if (error != 0) /* Nothing elegant to do here */
3867                        goto fail;
3868		bus_dmamap_sync(rxr->htag,
3869		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
3870		/* Update descriptor */
3871		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
3872
3873skip_head:
3874		/* Now the payload cluster */
3875		rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
3876		    M_PKTHDR, adapter->rx_mbuf_sz);
3877		if (rxbuf->m_pack == NULL) {
3878			error = ENOBUFS;
3879                        goto fail;
3880		}
3881		mp = rxbuf->m_pack;
3882		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3883		/* Get the memory mapping */
3884		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3885		    rxbuf->pmap, mp, pseg,
3886		    &nsegs, BUS_DMA_NOWAIT);
3887		if (error != 0)
3888                        goto fail;
3889		bus_dmamap_sync(rxr->ptag,
3890		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
3891		/* Update descriptor */
3892		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
3893        }
3894
3895	/* Setup our descriptor indices */
3896	rxr->next_to_check = 0;
3897	rxr->next_to_refresh = 0;
3898	rxr->lro_enabled = FALSE;
3899	rxr->rx_split_packets = 0;
3900	rxr->rx_bytes = 0;
3901
3902	rxr->fmp = NULL;
3903	rxr->lmp = NULL;
3904	rxr->discard = FALSE;
3905
3906	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3907	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3908
3909	/*
3910	** Now set up the LRO interface, we
3911	** also only do head split when LRO
3912	** is enabled, since so often they
3913	** are undesireable in similar setups.
3914	*/
3915	if (ifp->if_capenable & IFCAP_LRO) {
3916		error = tcp_lro_init(lro);
3917		if (error) {
3918			device_printf(dev, "LRO Initialization failed!\n");
3919			goto fail;
3920		}
3921		INIT_DEBUGOUT("RX LRO Initialized\n");
3922		rxr->lro_enabled = TRUE;
3923		lro->ifp = adapter->ifp;
3924	}
3925
3926	IGB_RX_UNLOCK(rxr);
3927	return (0);
3928
3929fail:
3930	igb_free_receive_ring(rxr);
3931	IGB_RX_UNLOCK(rxr);
3932	return (error);
3933}
3934
3935/*********************************************************************
3936 *
3937 *  Initialize all receive rings.
3938 *
3939 **********************************************************************/
3940static int
3941igb_setup_receive_structures(struct adapter *adapter)
3942{
3943	struct rx_ring *rxr = adapter->rx_rings;
3944	int i;
3945
3946	for (i = 0; i < adapter->num_queues; i++, rxr++)
3947		if (igb_setup_receive_ring(rxr))
3948			goto fail;
3949
3950	return (0);
3951fail:
3952	/*
3953	 * Free RX buffers allocated so far, we will only handle
3954	 * the rings that completed, the failing case will have
3955	 * cleaned up for itself. 'i' is the endpoint.
3956	 */
3957	for (int j = 0; j > i; ++j) {
3958		rxr = &adapter->rx_rings[i];
3959		IGB_RX_LOCK(rxr);
3960		igb_free_receive_ring(rxr);
3961		IGB_RX_UNLOCK(rxr);
3962	}
3963
3964	return (ENOBUFS);
3965}
3966
3967/*********************************************************************
3968 *
3969 *  Enable receive unit.
3970 *
3971 **********************************************************************/
3972static void
3973igb_initialize_receive_units(struct adapter *adapter)
3974{
3975	struct rx_ring	*rxr = adapter->rx_rings;
3976	struct ifnet	*ifp = adapter->ifp;
3977	struct e1000_hw *hw = &adapter->hw;
3978	u32		rctl, rxcsum, psize, srrctl = 0;
3979
3980	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3981
3982	/*
3983	 * Make sure receives are disabled while setting
3984	 * up the descriptor ring
3985	 */
3986	rctl = E1000_READ_REG(hw, E1000_RCTL);
3987	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3988
3989	/*
3990	** Set up for header split
3991	*/
3992	if (rxr->hdr_split) {
3993		/* Use a standard mbuf for the header */
3994		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3995		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3996	} else
3997		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3998
3999	/*
4000	** Set up for jumbo frames
4001	*/
4002	if (ifp->if_mtu > ETHERMTU) {
4003		rctl |= E1000_RCTL_LPE;
4004		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4005			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4006			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4007		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4008			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4009			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4010		}
4011		/* Set maximum packet len */
4012		psize = adapter->max_frame_size;
4013		/* are we on a vlan? */
4014		if (adapter->ifp->if_vlantrunk != NULL)
4015			psize += VLAN_TAG_SIZE;
4016		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4017	} else {
4018		rctl &= ~E1000_RCTL_LPE;
4019		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4020		rctl |= E1000_RCTL_SZ_2048;
4021	}
4022
4023	/* Setup the Base and Length of the Rx Descriptor Rings */
4024	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4025		u64 bus_addr = rxr->rxdma.dma_paddr;
4026		u32 rxdctl;
4027
4028		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4029		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4030		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4031		    (uint32_t)(bus_addr >> 32));
4032		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4033		    (uint32_t)bus_addr);
4034		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4035		/* Enable this Queue */
4036		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4037		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4038		rxdctl &= 0xFFF00000;
4039		rxdctl |= IGB_RX_PTHRESH;
4040		rxdctl |= IGB_RX_HTHRESH << 8;
4041		rxdctl |= IGB_RX_WTHRESH << 16;
4042		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4043	}
4044
4045	/*
4046	** Setup for RX MultiQueue
4047	*/
4048	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4049	if (adapter->num_queues >1) {
4050		u32 random[10], mrqc, shift = 0;
4051		union igb_reta {
4052			u32 dword;
4053			u8  bytes[4];
4054		} reta;
4055
4056		arc4rand(&random, sizeof(random), 0);
4057		if (adapter->hw.mac.type == e1000_82575)
4058			shift = 6;
4059		/* Warning FM follows */
4060		for (int i = 0; i < 128; i++) {
4061			reta.bytes[i & 3] =
4062			    (i % adapter->num_queues) << shift;
4063			if ((i & 3) == 3)
4064				E1000_WRITE_REG(hw,
4065				    E1000_RETA(i >> 2), reta.dword);
4066		}
4067		/* Now fill in hash table */
4068		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4069		for (int i = 0; i < 10; i++)
4070			E1000_WRITE_REG_ARRAY(hw,
4071			    E1000_RSSRK(0), i, random[i]);
4072
4073		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4074		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4075		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4076		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4077		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4078		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4079		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4080		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4081
4082		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4083
4084		/*
4085		** NOTE: Receive Full-Packet Checksum Offload
4086		** is mutually exclusive with Multiqueue. However
4087		** this is not the same as TCP/IP checksums which
4088		** still work.
4089		*/
4090		rxcsum |= E1000_RXCSUM_PCSD;
4091#if __FreeBSD_version >= 800000
4092		/* For SCTP Offload */
4093		if ((hw->mac.type == e1000_82576)
4094		    && (ifp->if_capenable & IFCAP_RXCSUM))
4095			rxcsum |= E1000_RXCSUM_CRCOFL;
4096#endif
4097	} else {
4098		/* Non RSS setup */
4099		if (ifp->if_capenable & IFCAP_RXCSUM) {
4100			rxcsum |= E1000_RXCSUM_IPPCSE;
4101#if __FreeBSD_version >= 800000
4102			if (adapter->hw.mac.type == e1000_82576)
4103				rxcsum |= E1000_RXCSUM_CRCOFL;
4104#endif
4105		} else
4106			rxcsum &= ~E1000_RXCSUM_TUOFL;
4107	}
4108	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4109
4110	/* Setup the Receive Control Register */
4111	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4112	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4113		   E1000_RCTL_RDMTS_HALF |
4114		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4115	/* Strip CRC bytes. */
4116	rctl |= E1000_RCTL_SECRC;
4117	/* Make sure VLAN Filters are off */
4118	rctl &= ~E1000_RCTL_VFE;
4119	/* Don't store bad packets */
4120	rctl &= ~E1000_RCTL_SBP;
4121
4122	/* Enable Receives */
4123	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4124
4125	/*
4126	 * Setup the HW Rx Head and Tail Descriptor Pointers
4127	 *   - needs to be after enable
4128	 */
4129	for (int i = 0; i < adapter->num_queues; i++) {
4130		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4131		E1000_WRITE_REG(hw, E1000_RDT(i),
4132		     adapter->num_rx_desc - 1);
4133	}
4134	return;
4135}
4136
4137/*********************************************************************
4138 *
4139 *  Free receive rings.
4140 *
4141 **********************************************************************/
4142static void
4143igb_free_receive_structures(struct adapter *adapter)
4144{
4145	struct rx_ring *rxr = adapter->rx_rings;
4146
4147	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4148		struct lro_ctrl	*lro = &rxr->lro;
4149		igb_free_receive_buffers(rxr);
4150		tcp_lro_free(lro);
4151		igb_dma_free(adapter, &rxr->rxdma);
4152	}
4153
4154	free(adapter->rx_rings, M_DEVBUF);
4155}
4156
4157/*********************************************************************
4158 *
4159 *  Free receive ring data structures.
4160 *
4161 **********************************************************************/
4162static void
4163igb_free_receive_buffers(struct rx_ring *rxr)
4164{
4165	struct adapter		*adapter = rxr->adapter;
4166	struct igb_rx_buf	*rxbuf;
4167	int i;
4168
4169	INIT_DEBUGOUT("free_receive_structures: begin");
4170
4171	/* Cleanup any existing buffers */
4172	if (rxr->rx_buffers != NULL) {
4173		for (i = 0; i < adapter->num_rx_desc; i++) {
4174			rxbuf = &rxr->rx_buffers[i];
4175			if (rxbuf->m_head != NULL) {
4176				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4177				    BUS_DMASYNC_POSTREAD);
4178				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4179				rxbuf->m_head->m_flags |= M_PKTHDR;
4180				m_freem(rxbuf->m_head);
4181			}
4182			if (rxbuf->m_pack != NULL) {
4183				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4184				    BUS_DMASYNC_POSTREAD);
4185				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4186				rxbuf->m_pack->m_flags |= M_PKTHDR;
4187				m_freem(rxbuf->m_pack);
4188			}
4189			rxbuf->m_head = NULL;
4190			rxbuf->m_pack = NULL;
4191			if (rxbuf->hmap != NULL) {
4192				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4193				rxbuf->hmap = NULL;
4194			}
4195			if (rxbuf->pmap != NULL) {
4196				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4197				rxbuf->pmap = NULL;
4198			}
4199		}
4200		if (rxr->rx_buffers != NULL) {
4201			free(rxr->rx_buffers, M_DEVBUF);
4202			rxr->rx_buffers = NULL;
4203		}
4204	}
4205
4206	if (rxr->htag != NULL) {
4207		bus_dma_tag_destroy(rxr->htag);
4208		rxr->htag = NULL;
4209	}
4210	if (rxr->ptag != NULL) {
4211		bus_dma_tag_destroy(rxr->ptag);
4212		rxr->ptag = NULL;
4213	}
4214}
4215
4216static __inline void
4217igb_rx_discard(struct rx_ring *rxr, int i)
4218{
4219	struct igb_rx_buf	*rbuf;
4220
4221	rbuf = &rxr->rx_buffers[i];
4222
4223	/* Partially received? Free the chain */
4224	if (rxr->fmp != NULL) {
4225		rxr->fmp->m_flags |= M_PKTHDR;
4226		m_freem(rxr->fmp);
4227		rxr->fmp = NULL;
4228		rxr->lmp = NULL;
4229	}
4230
4231	/*
4232	** With advanced descriptors the writeback
4233	** clobbers the buffer addrs, so its easier
4234	** to just free the existing mbufs and take
4235	** the normal refresh path to get new buffers
4236	** and mapping.
4237	*/
4238	if (rbuf->m_head) {
4239		m_free(rbuf->m_head);
4240		rbuf->m_head = NULL;
4241	}
4242
4243	if (rbuf->m_pack) {
4244		m_free(rbuf->m_pack);
4245		rbuf->m_pack = NULL;
4246	}
4247
4248	return;
4249}
4250
4251static __inline void
4252igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4253{
4254
4255	/*
4256	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4257	 * should be computed by hardware. Also it should not have VLAN tag in
4258	 * ethernet header.
4259	 */
4260	if (rxr->lro_enabled &&
4261	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4262	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4263	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4264	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4265	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4266	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4267		/*
4268		 * Send to the stack if:
4269		 **  - LRO not enabled, or
4270		 **  - no LRO resources, or
4271		 **  - lro enqueue fails
4272		 */
4273		if (rxr->lro.lro_cnt != 0)
4274			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4275				return;
4276	}
4277	IGB_RX_UNLOCK(rxr);
4278	(*ifp->if_input)(ifp, m);
4279	IGB_RX_LOCK(rxr);
4280}
4281
4282/*********************************************************************
4283 *
4284 *  This routine executes in interrupt context. It replenishes
4285 *  the mbufs in the descriptor and sends data which has been
4286 *  dma'ed into host memory to upper layer.
4287 *
4288 *  We loop at most count times if count is > 0, or until done if
4289 *  count < 0.
4290 *
4291 *  Return TRUE if more to clean, FALSE otherwise
4292 *********************************************************************/
4293static bool
4294igb_rxeof(struct igb_queue *que, int count, int *done)
4295{
4296	struct adapter		*adapter = que->adapter;
4297	struct rx_ring		*rxr = que->rxr;
4298	struct ifnet		*ifp = adapter->ifp;
4299	struct lro_ctrl		*lro = &rxr->lro;
4300	struct lro_entry	*queued;
4301	int			i, processed = 0, rxdone = 0;
4302	u32			ptype, staterr = 0;
4303	union e1000_adv_rx_desc	*cur;
4304
4305	IGB_RX_LOCK(rxr);
4306	/* Sync the ring. */
4307	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4308	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4309
4310	/* Main clean loop */
4311	for (i = rxr->next_to_check; count != 0;) {
4312		struct mbuf		*sendmp, *mh, *mp;
4313		struct igb_rx_buf	*rxbuf;
4314		u16			hlen, plen, hdr, vtag;
4315		bool			eop = FALSE;
4316
4317		cur = &rxr->rx_base[i];
4318		staterr = le32toh(cur->wb.upper.status_error);
4319		if ((staterr & E1000_RXD_STAT_DD) == 0)
4320			break;
4321		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4322			break;
4323		count--;
4324		sendmp = mh = mp = NULL;
4325		cur->wb.upper.status_error = 0;
4326		rxbuf = &rxr->rx_buffers[i];
4327		plen = le16toh(cur->wb.upper.length);
4328		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4329		vtag = le16toh(cur->wb.upper.vlan);
4330		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4331		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4332
4333		/* Make sure all segments of a bad packet are discarded */
4334		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4335		    (rxr->discard)) {
4336			ifp->if_ierrors++;
4337			++rxr->rx_discarded;
4338			if (!eop) /* Catch subsequent segs */
4339				rxr->discard = TRUE;
4340			else
4341				rxr->discard = FALSE;
4342			igb_rx_discard(rxr, i);
4343			goto next_desc;
4344		}
4345
4346		/*
4347		** The way the hardware is configured to
4348		** split, it will ONLY use the header buffer
4349		** when header split is enabled, otherwise we
4350		** get normal behavior, ie, both header and
4351		** payload are DMA'd into the payload buffer.
4352		**
4353		** The fmp test is to catch the case where a
4354		** packet spans multiple descriptors, in that
4355		** case only the first header is valid.
4356		*/
4357		if (rxr->hdr_split && rxr->fmp == NULL) {
4358			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4359			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4360			if (hlen > IGB_HDR_BUF)
4361				hlen = IGB_HDR_BUF;
4362			mh = rxr->rx_buffers[i].m_head;
4363			mh->m_len = hlen;
4364			/* clear buf pointer for refresh */
4365			rxbuf->m_head = NULL;
4366			/*
4367			** Get the payload length, this
4368			** could be zero if its a small
4369			** packet.
4370			*/
4371			if (plen > 0) {
4372				mp = rxr->rx_buffers[i].m_pack;
4373				mp->m_len = plen;
4374				mh->m_next = mp;
4375				/* clear buf pointer */
4376				rxbuf->m_pack = NULL;
4377				rxr->rx_split_packets++;
4378			}
4379		} else {
4380			/*
4381			** Either no header split, or a
4382			** secondary piece of a fragmented
4383			** split packet.
4384			*/
4385			mh = rxr->rx_buffers[i].m_pack;
4386			mh->m_len = plen;
4387			/* clear buf info for refresh */
4388			rxbuf->m_pack = NULL;
4389		}
4390
4391		++processed; /* So we know when to refresh */
4392
4393		/* Initial frame - setup */
4394		if (rxr->fmp == NULL) {
4395			mh->m_pkthdr.len = mh->m_len;
4396			/* Save the head of the chain */
4397			rxr->fmp = mh;
4398			rxr->lmp = mh;
4399			if (mp != NULL) {
4400				/* Add payload if split */
4401				mh->m_pkthdr.len += mp->m_len;
4402				rxr->lmp = mh->m_next;
4403			}
4404		} else {
4405			/* Chain mbuf's together */
4406			rxr->lmp->m_next = mh;
4407			rxr->lmp = rxr->lmp->m_next;
4408			rxr->fmp->m_pkthdr.len += mh->m_len;
4409		}
4410
4411		if (eop) {
4412			rxr->fmp->m_pkthdr.rcvif = ifp;
4413			ifp->if_ipackets++;
4414			rxr->rx_packets++;
4415			/* capture data for AIM */
4416			rxr->packets++;
4417			rxr->bytes += rxr->fmp->m_pkthdr.len;
4418			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4419
4420			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4421				igb_rx_checksum(staterr, rxr->fmp, ptype);
4422
4423			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4424			    (staterr & E1000_RXD_STAT_VP) != 0) {
4425				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4426				rxr->fmp->m_flags |= M_VLANTAG;
4427			}
4428#if __FreeBSD_version >= 800000
4429			rxr->fmp->m_pkthdr.flowid = que->msix;
4430			rxr->fmp->m_flags |= M_FLOWID;
4431#endif
4432			sendmp = rxr->fmp;
4433			/* Make sure to set M_PKTHDR. */
4434			sendmp->m_flags |= M_PKTHDR;
4435			rxr->fmp = NULL;
4436			rxr->lmp = NULL;
4437		}
4438
4439next_desc:
4440		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4441		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4442
4443		/* Advance our pointers to the next descriptor. */
4444		if (++i == adapter->num_rx_desc)
4445			i = 0;
4446		/*
4447		** Send to the stack or LRO
4448		*/
4449		if (sendmp != NULL) {
4450			rxr->next_to_check = i;
4451			igb_rx_input(rxr, ifp, sendmp, ptype);
4452			i = rxr->next_to_check;
4453			rxdone++;
4454		}
4455
4456		/* Every 8 descriptors we go to refresh mbufs */
4457		if (processed == 8) {
4458                        igb_refresh_mbufs(rxr, i);
4459                        processed = 0;
4460		}
4461	}
4462
4463	/* Catch any remainders */
4464	if (processed != 0) {
4465		igb_refresh_mbufs(rxr, i);
4466		processed = 0;
4467	}
4468
4469	rxr->next_to_check = i;
4470
4471	/*
4472	 * Flush any outstanding LRO work
4473	 */
4474	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4475		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4476		tcp_lro_flush(lro, queued);
4477	}
4478
4479	IGB_RX_UNLOCK(rxr);
4480
4481	if (done != NULL)
4482		*done = rxdone;
4483
4484	/*
4485	** We still have cleaning to do?
4486	** Schedule another interrupt if so.
4487	*/
4488	if ((staterr & E1000_RXD_STAT_DD) != 0)
4489		return (TRUE);
4490
4491	return (FALSE);
4492}
4493
4494/*********************************************************************
4495 *
4496 *  Verify that the hardware indicated that the checksum is valid.
4497 *  Inform the stack about the status of checksum so that stack
4498 *  doesn't spend time verifying the checksum.
4499 *
4500 *********************************************************************/
4501static void
4502igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4503{
4504	u16 status = (u16)staterr;
4505	u8  errors = (u8) (staterr >> 24);
4506	int sctp;
4507
4508	/* Ignore Checksum bit is set */
4509	if (status & E1000_RXD_STAT_IXSM) {
4510		mp->m_pkthdr.csum_flags = 0;
4511		return;
4512	}
4513
4514	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4515	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4516		sctp = 1;
4517	else
4518		sctp = 0;
4519	if (status & E1000_RXD_STAT_IPCS) {
4520		/* Did it pass? */
4521		if (!(errors & E1000_RXD_ERR_IPE)) {
4522			/* IP Checksum Good */
4523			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4524			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4525		} else
4526			mp->m_pkthdr.csum_flags = 0;
4527	}
4528
4529	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4530		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4531#if __FreeBSD_version >= 800000
4532		if (sctp) /* reassign */
4533			type = CSUM_SCTP_VALID;
4534#endif
4535		/* Did it pass? */
4536		if (!(errors & E1000_RXD_ERR_TCPE)) {
4537			mp->m_pkthdr.csum_flags |= type;
4538			if (sctp == 0)
4539				mp->m_pkthdr.csum_data = htons(0xffff);
4540		}
4541	}
4542	return;
4543}
4544
4545/*
4546 * This routine is run via an vlan
4547 * config EVENT
4548 */
4549static void
4550igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4551{
4552	struct adapter	*adapter = ifp->if_softc;
4553	u32		index, bit;
4554
4555	if (ifp->if_softc !=  arg)   /* Not our event */
4556		return;
4557
4558	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4559                return;
4560
4561	IGB_CORE_LOCK(adapter);
4562	index = (vtag >> 5) & 0x7F;
4563	bit = vtag & 0x1F;
4564	adapter->shadow_vfta[index] |= (1 << bit);
4565	++adapter->num_vlans;
4566	/* Re-init to load the changes */
4567	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4568		igb_init_locked(adapter);
4569	IGB_CORE_UNLOCK(adapter);
4570}
4571
4572/*
4573 * This routine is run via an vlan
4574 * unconfig EVENT
4575 */
4576static void
4577igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4578{
4579	struct adapter	*adapter = ifp->if_softc;
4580	u32		index, bit;
4581
4582	if (ifp->if_softc !=  arg)
4583		return;
4584
4585	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4586                return;
4587
4588	IGB_CORE_LOCK(adapter);
4589	index = (vtag >> 5) & 0x7F;
4590	bit = vtag & 0x1F;
4591	adapter->shadow_vfta[index] &= ~(1 << bit);
4592	--adapter->num_vlans;
4593	/* Re-init to load the changes */
4594	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4595		igb_init_locked(adapter);
4596	IGB_CORE_UNLOCK(adapter);
4597}
4598
4599static void
4600igb_setup_vlan_hw_support(struct adapter *adapter)
4601{
4602	struct e1000_hw *hw = &adapter->hw;
4603	u32             reg;
4604
4605	/*
4606	** We get here thru init_locked, meaning
4607	** a soft reset, this has already cleared
4608	** the VFTA and other state, so if there
4609	** have been no vlan's registered do nothing.
4610	*/
4611	if (adapter->num_vlans == 0)
4612                return;
4613
4614	/*
4615	** A soft reset zero's out the VFTA, so
4616	** we need to repopulate it now.
4617	*/
4618	for (int i = 0; i < IGB_VFTA_SIZE; i++)
4619                if (adapter->shadow_vfta[i] != 0) {
4620			if (hw->mac.type == e1000_vfadapt)
4621				e1000_vfta_set_vf(hw,
4622				    adapter->shadow_vfta[i], TRUE);
4623			else
4624				E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4625                           	 i, adapter->shadow_vfta[i]);
4626		}
4627
4628	if (hw->mac.type == e1000_vfadapt)
4629		e1000_rlpml_set_vf(hw,
4630		    adapter->max_frame_size + VLAN_TAG_SIZE);
4631	else {
4632		reg = E1000_READ_REG(hw, E1000_CTRL);
4633		reg |= E1000_CTRL_VME;
4634		E1000_WRITE_REG(hw, E1000_CTRL, reg);
4635
4636		/* Enable the Filter Table */
4637		reg = E1000_READ_REG(hw, E1000_RCTL);
4638		reg &= ~E1000_RCTL_CFIEN;
4639		reg |= E1000_RCTL_VFE;
4640		E1000_WRITE_REG(hw, E1000_RCTL, reg);
4641
4642		/* Update the frame size */
4643		E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4644		    adapter->max_frame_size + VLAN_TAG_SIZE);
4645	}
4646}
4647
4648static void
4649igb_enable_intr(struct adapter *adapter)
4650{
4651	/* With RSS set up what to auto clear */
4652	if (adapter->msix_mem) {
4653		E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4654		    adapter->eims_mask);
4655		E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4656		    adapter->eims_mask);
4657		E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4658		    adapter->eims_mask);
4659		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4660		    E1000_IMS_LSC);
4661	} else {
4662		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4663		    IMS_ENABLE_MASK);
4664	}
4665	E1000_WRITE_FLUSH(&adapter->hw);
4666
4667	return;
4668}
4669
4670static void
4671igb_disable_intr(struct adapter *adapter)
4672{
4673	if (adapter->msix_mem) {
4674		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4675		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4676	}
4677	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4678	E1000_WRITE_FLUSH(&adapter->hw);
4679	return;
4680}
4681
4682/*
4683 * Bit of a misnomer, what this really means is
4684 * to enable OS management of the system... aka
4685 * to disable special hardware management features
4686 */
4687static void
4688igb_init_manageability(struct adapter *adapter)
4689{
4690	if (adapter->has_manage) {
4691		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4692		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4693
4694		/* disable hardware interception of ARP */
4695		manc &= ~(E1000_MANC_ARP_EN);
4696
4697                /* enable receiving management packets to the host */
4698		manc |= E1000_MANC_EN_MNG2HOST;
4699		manc2h |= 1 << 5;  /* Mng Port 623 */
4700		manc2h |= 1 << 6;  /* Mng Port 664 */
4701		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4702		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4703	}
4704}
4705
4706/*
4707 * Give control back to hardware management
4708 * controller if there is one.
4709 */
4710static void
4711igb_release_manageability(struct adapter *adapter)
4712{
4713	if (adapter->has_manage) {
4714		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4715
4716		/* re-enable hardware interception of ARP */
4717		manc |= E1000_MANC_ARP_EN;
4718		manc &= ~E1000_MANC_EN_MNG2HOST;
4719
4720		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4721	}
4722}
4723
4724/*
4725 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4726 * For ASF and Pass Through versions of f/w this means that
4727 * the driver is loaded.
4728 *
4729 */
4730static void
4731igb_get_hw_control(struct adapter *adapter)
4732{
4733	u32 ctrl_ext;
4734
4735	if (adapter->hw.mac.type == e1000_vfadapt)
4736		return;
4737
4738	/* Let firmware know the driver has taken over */
4739	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4740	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4741	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4742}
4743
4744/*
4745 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4746 * For ASF and Pass Through versions of f/w this means that the
4747 * driver is no longer loaded.
4748 *
4749 */
4750static void
4751igb_release_hw_control(struct adapter *adapter)
4752{
4753	u32 ctrl_ext;
4754
4755	if (adapter->hw.mac.type == e1000_vfadapt)
4756		return;
4757
4758	/* Let firmware taken over control of h/w */
4759	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4760	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4761	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4762}
4763
4764static int
4765igb_is_valid_ether_addr(uint8_t *addr)
4766{
4767	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4768
4769	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4770		return (FALSE);
4771	}
4772
4773	return (TRUE);
4774}
4775
4776
4777/*
4778 * Enable PCI Wake On Lan capability
4779 */
4780static void
4781igb_enable_wakeup(device_t dev)
4782{
4783	u16     cap, status;
4784	u8      id;
4785
4786	/* First find the capabilities pointer*/
4787	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4788	/* Read the PM Capabilities */
4789	id = pci_read_config(dev, cap, 1);
4790	if (id != PCIY_PMG)     /* Something wrong */
4791		return;
4792	/* OK, we have the power capabilities, so
4793	   now get the status register */
4794	cap += PCIR_POWER_STATUS;
4795	status = pci_read_config(dev, cap, 2);
4796	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4797	pci_write_config(dev, cap, status, 2);
4798	return;
4799}
4800
4801static void
4802igb_led_func(void *arg, int onoff)
4803{
4804	struct adapter	*adapter = arg;
4805
4806	IGB_CORE_LOCK(adapter);
4807	if (onoff) {
4808		e1000_setup_led(&adapter->hw);
4809		e1000_led_on(&adapter->hw);
4810	} else {
4811		e1000_led_off(&adapter->hw);
4812		e1000_cleanup_led(&adapter->hw);
4813	}
4814	IGB_CORE_UNLOCK(adapter);
4815}
4816
4817/**********************************************************************
4818 *
4819 *  Update the board statistics counters.
4820 *
4821 **********************************************************************/
4822static void
4823igb_update_stats_counters(struct adapter *adapter)
4824{
4825	struct ifnet		*ifp;
4826        struct e1000_hw		*hw = &adapter->hw;
4827	struct e1000_hw_stats	*stats;
4828
4829	/*
4830	** The virtual function adapter has only a
4831	** small controlled set of stats, do only
4832	** those and return.
4833	*/
4834	if (adapter->hw.mac.type == e1000_vfadapt) {
4835		igb_update_vf_stats_counters(adapter);
4836		return;
4837	}
4838
4839	stats = (struct e1000_hw_stats	*)adapter->stats;
4840
4841	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4842	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
4843		stats->symerrs +=
4844		    E1000_READ_REG(hw,E1000_SYMERRS);
4845		stats->sec += E1000_READ_REG(hw, E1000_SEC);
4846	}
4847
4848	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
4849	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
4850	stats->scc += E1000_READ_REG(hw, E1000_SCC);
4851	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
4852
4853	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
4854	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
4855	stats->colc += E1000_READ_REG(hw, E1000_COLC);
4856	stats->dc += E1000_READ_REG(hw, E1000_DC);
4857	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
4858	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
4859	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
4860	/*
4861	** For watchdog management we need to know if we have been
4862	** paused during the last interval, so capture that here.
4863	*/
4864        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4865        stats->xoffrxc += adapter->pause_frames;
4866	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
4867	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
4868	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
4869	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
4870	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
4871	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
4872	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
4873	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
4874	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
4875	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
4876	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
4877	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
4878
4879	/* For the 64-bit byte counters the low dword must be read first. */
4880	/* Both registers clear on the read of the high dword */
4881
4882	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
4883	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
4884	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
4885	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
4886
4887	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
4888	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
4889	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
4890	stats->roc += E1000_READ_REG(hw, E1000_ROC);
4891	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
4892
4893	stats->tor += E1000_READ_REG(hw, E1000_TORH);
4894	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
4895
4896	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
4897	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
4898	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
4899	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
4900	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
4901	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
4902	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
4903	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
4904	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
4905	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
4906
4907	/* Interrupt Counts */
4908
4909	stats->iac += E1000_READ_REG(hw, E1000_IAC);
4910	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
4911	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
4912	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
4913	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
4914	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
4915	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
4916	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
4917	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
4918
4919	/* Host to Card Statistics */
4920
4921	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
4922	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
4923	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
4924	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
4925	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
4926	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
4927	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
4928	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
4929	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
4930	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
4931	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
4932	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
4933	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
4934	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
4935
4936	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
4937	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
4938	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
4939	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
4940	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
4941	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
4942
4943	ifp = adapter->ifp;
4944	ifp->if_collisions = stats->colc;
4945
4946	/* Rx Errors */
4947	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
4948	    stats->crcerrs + stats->algnerrc +
4949	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
4950
4951	/* Tx Errors */
4952	ifp->if_oerrors = stats->ecol +
4953	    stats->latecol + adapter->watchdog_events;
4954
4955	/* Driver specific counters */
4956	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
4957	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
4958	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
4959	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
4960	adapter->packet_buf_alloc_tx =
4961	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
4962	adapter->packet_buf_alloc_rx =
4963	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
4964}
4965
4966
4967/**********************************************************************
4968 *
4969 *  Initialize the VF board statistics counters.
4970 *
4971 **********************************************************************/
4972static void
4973igb_vf_init_stats(struct adapter *adapter)
4974{
4975        struct e1000_hw *hw = &adapter->hw;
4976	struct e1000_vf_stats	*stats;
4977
4978	stats = (struct e1000_vf_stats	*)adapter->stats;
4979	if (stats == NULL)
4980		return;
4981        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
4982        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
4983        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
4984        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
4985        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
4986}
4987
4988/**********************************************************************
4989 *
4990 *  Update the VF board statistics counters.
4991 *
4992 **********************************************************************/
4993static void
4994igb_update_vf_stats_counters(struct adapter *adapter)
4995{
4996	struct e1000_hw *hw = &adapter->hw;
4997	struct e1000_vf_stats	*stats;
4998
4999	if (adapter->link_speed == 0)
5000		return;
5001
5002	stats = (struct e1000_vf_stats	*)adapter->stats;
5003
5004	UPDATE_VF_REG(E1000_VFGPRC,
5005	    stats->last_gprc, stats->gprc);
5006	UPDATE_VF_REG(E1000_VFGORC,
5007	    stats->last_gorc, stats->gorc);
5008	UPDATE_VF_REG(E1000_VFGPTC,
5009	    stats->last_gptc, stats->gptc);
5010	UPDATE_VF_REG(E1000_VFGOTC,
5011	    stats->last_gotc, stats->gotc);
5012	UPDATE_VF_REG(E1000_VFMPRC,
5013	    stats->last_mprc, stats->mprc);
5014}
5015
5016/* Export a single 32-bit register via a read-only sysctl. */
5017static int
5018igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5019{
5020	struct adapter *adapter;
5021	u_int val;
5022
5023	adapter = oidp->oid_arg1;
5024	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5025	return (sysctl_handle_int(oidp, &val, 0, req));
5026}
5027
5028/*
5029**  Tuneable interrupt rate handler
5030*/
5031static int
5032igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5033{
5034	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5035	int			error;
5036	u32			reg, usec, rate;
5037
5038	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5039	usec = ((reg & 0x7FFC) >> 2);
5040	if (usec > 0)
5041		rate = 1000000 / usec;
5042	else
5043		rate = 0;
5044	error = sysctl_handle_int(oidp, &rate, 0, req);
5045	if (error || !req->newptr)
5046		return error;
5047	return 0;
5048}
5049
5050/*
5051 * Add sysctl variables, one per statistic, to the system.
5052 */
5053static void
5054igb_add_hw_stats(struct adapter *adapter)
5055{
5056	device_t dev = adapter->dev;
5057
5058	struct tx_ring *txr = adapter->tx_rings;
5059	struct rx_ring *rxr = adapter->rx_rings;
5060
5061	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5062	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5063	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5064	struct e1000_hw_stats *stats = adapter->stats;
5065
5066	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5067	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5068
5069#define QUEUE_NAME_LEN 32
5070	char namebuf[QUEUE_NAME_LEN];
5071
5072	/* Driver Statistics */
5073	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "link_irq",
5074			CTLFLAG_RD, &adapter->link_irq, 0,
5075			"Link MSIX IRQ Handled");
5076	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5077			CTLFLAG_RD, &adapter->dropped_pkts,
5078			"Driver dropped packets");
5079	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5080			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5081			"Driver tx dma failure in xmit");
5082	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5083			CTLFLAG_RD, &adapter->rx_overruns,
5084			"RX overruns");
5085	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5086			CTLFLAG_RD, &adapter->watchdog_events,
5087			"Watchdog timeouts");
5088
5089	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5090			CTLFLAG_RD, &adapter->device_control,
5091			"Device Control Register");
5092	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5093			CTLFLAG_RD, &adapter->rx_control,
5094			"Receiver Control Register");
5095	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5096			CTLFLAG_RD, &adapter->int_mask,
5097			"Interrupt Mask");
5098	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5099			CTLFLAG_RD, &adapter->eint_mask,
5100			"Extended Interrupt Mask");
5101	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5102			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5103			"Transmit Buffer Packet Allocation");
5104	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5105			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5106			"Receive Buffer Packet Allocation");
5107	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5108			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5109			"Flow Control High Watermark");
5110	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5111			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5112			"Flow Control Low Watermark");
5113
5114	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5115		struct lro_ctrl *lro = &rxr->lro;
5116
5117		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5118		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5119					    CTLFLAG_RD, NULL, "Queue Name");
5120		queue_list = SYSCTL_CHILDREN(queue_node);
5121
5122		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5123				CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i],
5124				sizeof(&adapter->queues[i]),
5125				igb_sysctl_interrupt_rate_handler,
5126				"IU", "Interrupt Rate");
5127
5128		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5129				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5130				E1000_TDH(txr->me),
5131				igb_sysctl_reg_handler, "IU",
5132 				"Transmit Descriptor Head");
5133		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5134				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5135				E1000_TDT(txr->me),
5136				igb_sysctl_reg_handler, "IU",
5137 				"Transmit Descriptor Tail");
5138		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5139				CTLFLAG_RD, &txr->no_desc_avail,
5140				"Queue No Descriptor Available");
5141		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5142				CTLFLAG_RD, &txr->tx_packets,
5143				"Queue Packets Transmitted");
5144
5145		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5146				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5147				E1000_RDH(rxr->me),
5148				igb_sysctl_reg_handler, "IU",
5149				"Receive Descriptor Head");
5150		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5151				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5152				E1000_RDT(rxr->me),
5153				igb_sysctl_reg_handler, "IU",
5154				"Receive Descriptor Tail");
5155		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5156				CTLFLAG_RD, &rxr->rx_packets,
5157				"Queue Packets Received");
5158		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5159				CTLFLAG_RD, &rxr->rx_bytes,
5160				"Queue Bytes Received");
5161		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
5162				CTLFLAG_RD, &lro->lro_queued, 0,
5163				"LRO Queued");
5164		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
5165				CTLFLAG_RD, &lro->lro_flushed, 0,
5166				"LRO Flushed");
5167	}
5168
5169	/* MAC stats get their own sub node */
5170
5171	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5172				    CTLFLAG_RD, NULL, "MAC Statistics");
5173	stat_list = SYSCTL_CHILDREN(stat_node);
5174
5175	/*
5176	** VF adapter has a very limited set of stats
5177	** since its not managing the metal, so to speak.
5178	*/
5179	if (adapter->hw.mac.type == e1000_vfadapt) {
5180	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5181			CTLFLAG_RD, &stats->gprc,
5182			"Good Packets Received");
5183	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5184			CTLFLAG_RD, &stats->gptc,
5185			"Good Packets Transmitted");
5186 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5187 			CTLFLAG_RD, &stats->gorc,
5188 			"Good Octets Received");
5189 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5190 			CTLFLAG_RD, &stats->gotc,
5191 			"Good Octets Transmitted");
5192	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5193			CTLFLAG_RD, &stats->mprc,
5194			"Multicast Packets Received");
5195		return;
5196	}
5197
5198	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5199			CTLFLAG_RD, &stats->ecol,
5200			"Excessive collisions");
5201	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5202			CTLFLAG_RD, &stats->scc,
5203			"Single collisions");
5204	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5205			CTLFLAG_RD, &stats->mcc,
5206			"Multiple collisions");
5207	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5208			CTLFLAG_RD, &stats->latecol,
5209			"Late collisions");
5210	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5211			CTLFLAG_RD, &stats->colc,
5212			"Collision Count");
5213	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5214			CTLFLAG_RD, &stats->symerrs,
5215			"Symbol Errors");
5216	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5217			CTLFLAG_RD, &stats->sec,
5218			"Sequence Errors");
5219	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5220			CTLFLAG_RD, &stats->dc,
5221			"Defer Count");
5222	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5223			CTLFLAG_RD, &stats->mpc,
5224			"Missed Packets");
5225	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5226			CTLFLAG_RD, &stats->rnbc,
5227			"Receive No Buffers");
5228	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5229			CTLFLAG_RD, &stats->ruc,
5230			"Receive Undersize");
5231	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5232			CTLFLAG_RD, &stats->rfc,
5233			"Fragmented Packets Received ");
5234	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5235			CTLFLAG_RD, &stats->roc,
5236			"Oversized Packets Received");
5237	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5238			CTLFLAG_RD, &stats->rjc,
5239			"Recevied Jabber");
5240	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5241			CTLFLAG_RD, &stats->rxerrc,
5242			"Receive Errors");
5243	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5244			CTLFLAG_RD, &stats->crcerrs,
5245			"CRC errors");
5246	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5247			CTLFLAG_RD, &stats->algnerrc,
5248			"Alignment Errors");
5249	/* On 82575 these are collision counts */
5250	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5251			CTLFLAG_RD, &stats->cexterr,
5252			"Collision/Carrier extension errors");
5253	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5254			CTLFLAG_RD, &stats->xonrxc,
5255			"XON Received");
5256	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5257			CTLFLAG_RD, &stats->xontxc,
5258			"XON Transmitted");
5259	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5260			CTLFLAG_RD, &stats->xoffrxc,
5261			"XOFF Received");
5262	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5263			CTLFLAG_RD, &stats->xofftxc,
5264			"XOFF Transmitted");
5265	/* Packet Reception Stats */
5266	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5267			CTLFLAG_RD, &stats->tpr,
5268			"Total Packets Received ");
5269	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5270			CTLFLAG_RD, &stats->gprc,
5271			"Good Packets Received");
5272	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5273			CTLFLAG_RD, &stats->bprc,
5274			"Broadcast Packets Received");
5275	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5276			CTLFLAG_RD, &stats->mprc,
5277			"Multicast Packets Received");
5278	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5279			CTLFLAG_RD, &stats->prc64,
5280			"64 byte frames received ");
5281	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5282			CTLFLAG_RD, &stats->prc127,
5283			"65-127 byte frames received");
5284	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5285			CTLFLAG_RD, &stats->prc255,
5286			"128-255 byte frames received");
5287	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5288			CTLFLAG_RD, &stats->prc511,
5289			"256-511 byte frames received");
5290	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5291			CTLFLAG_RD, &stats->prc1023,
5292			"512-1023 byte frames received");
5293	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5294			CTLFLAG_RD, &stats->prc1522,
5295			"1023-1522 byte frames received");
5296 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5297 			CTLFLAG_RD, &stats->gorc,
5298 			"Good Octets Received");
5299
5300	/* Packet Transmission Stats */
5301 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5302 			CTLFLAG_RD, &stats->gotc,
5303 			"Good Octets Transmitted");
5304	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5305			CTLFLAG_RD, &stats->tpt,
5306			"Total Packets Transmitted");
5307	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5308			CTLFLAG_RD, &stats->gptc,
5309			"Good Packets Transmitted");
5310	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5311			CTLFLAG_RD, &stats->bptc,
5312			"Broadcast Packets Transmitted");
5313	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5314			CTLFLAG_RD, &stats->mptc,
5315			"Multicast Packets Transmitted");
5316	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5317			CTLFLAG_RD, &stats->ptc64,
5318			"64 byte frames transmitted ");
5319	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5320			CTLFLAG_RD, &stats->ptc127,
5321			"65-127 byte frames transmitted");
5322	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5323			CTLFLAG_RD, &stats->ptc255,
5324			"128-255 byte frames transmitted");
5325	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5326			CTLFLAG_RD, &stats->ptc511,
5327			"256-511 byte frames transmitted");
5328	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5329			CTLFLAG_RD, &stats->ptc1023,
5330			"512-1023 byte frames transmitted");
5331	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5332			CTLFLAG_RD, &stats->ptc1522,
5333			"1024-1522 byte frames transmitted");
5334	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5335			CTLFLAG_RD, &stats->tsctc,
5336			"TSO Contexts Transmitted");
5337	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5338			CTLFLAG_RD, &stats->tsctfc,
5339			"TSO Contexts Failed");
5340
5341
5342	/* Interrupt Stats */
5343
5344	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5345				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5346	int_list = SYSCTL_CHILDREN(int_node);
5347
5348	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5349			CTLFLAG_RD, &stats->iac,
5350			"Interrupt Assertion Count");
5351
5352	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5353			CTLFLAG_RD, &stats->icrxptc,
5354			"Interrupt Cause Rx Pkt Timer Expire Count");
5355
5356	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5357			CTLFLAG_RD, &stats->icrxatc,
5358			"Interrupt Cause Rx Abs Timer Expire Count");
5359
5360	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5361			CTLFLAG_RD, &stats->ictxptc,
5362			"Interrupt Cause Tx Pkt Timer Expire Count");
5363
5364	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5365			CTLFLAG_RD, &stats->ictxatc,
5366			"Interrupt Cause Tx Abs Timer Expire Count");
5367
5368	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5369			CTLFLAG_RD, &stats->ictxqec,
5370			"Interrupt Cause Tx Queue Empty Count");
5371
5372	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5373			CTLFLAG_RD, &stats->ictxqmtc,
5374			"Interrupt Cause Tx Queue Min Thresh Count");
5375
5376	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5377			CTLFLAG_RD, &stats->icrxdmtc,
5378			"Interrupt Cause Rx Desc Min Thresh Count");
5379
5380	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5381			CTLFLAG_RD, &stats->icrxoc,
5382			"Interrupt Cause Receiver Overrun Count");
5383
5384	/* Host to Card Stats */
5385
5386	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5387				    CTLFLAG_RD, NULL,
5388				    "Host to Card Statistics");
5389
5390	host_list = SYSCTL_CHILDREN(host_node);
5391
5392	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5393			CTLFLAG_RD, &stats->cbtmpc,
5394			"Circuit Breaker Tx Packet Count");
5395
5396	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5397			CTLFLAG_RD, &stats->htdpmc,
5398			"Host Transmit Discarded Packets");
5399
5400	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5401			CTLFLAG_RD, &stats->rpthc,
5402			"Rx Packets To Host");
5403
5404	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5405			CTLFLAG_RD, &stats->cbrmpc,
5406			"Circuit Breaker Rx Packet Count");
5407
5408	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5409			CTLFLAG_RD, &stats->cbrdpc,
5410			"Circuit Breaker Rx Dropped Count");
5411
5412	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5413			CTLFLAG_RD, &stats->hgptc,
5414			"Host Good Packets Tx Count");
5415
5416	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5417			CTLFLAG_RD, &stats->htcbdpc,
5418			"Host Tx Circuit Breaker Dropped Count");
5419
5420	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5421			CTLFLAG_RD, &stats->hgorc,
5422			"Host Good Octets Received Count");
5423
5424	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5425			CTLFLAG_RD, &stats->hgotc,
5426			"Host Good Octets Transmit Count");
5427
5428	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "length_errors",
5429			CTLFLAG_RD, &stats->lenerrs,
5430			"Length Errors");
5431
5432	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5433			CTLFLAG_RD, &stats->scvpc,
5434			"SerDes/SGMII Code Violation Pkt Count");
5435
5436	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5437			CTLFLAG_RD, &stats->hrmpc,
5438			"Header Redirection Missed Packet Count");
5439}
5440
5441
5442/**********************************************************************
5443 *
5444 *  This routine provides a way to dump out the adapter eeprom,
5445 *  often a useful debug/service tool. This only dumps the first
5446 *  32 words, stuff that matters is in that extent.
5447 *
5448 **********************************************************************/
5449static int
5450igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5451{
5452	struct adapter *adapter;
5453	int error;
5454	int result;
5455
5456	result = -1;
5457	error = sysctl_handle_int(oidp, &result, 0, req);
5458
5459	if (error || !req->newptr)
5460		return (error);
5461
5462	/*
5463	 * This value will cause a hex dump of the
5464	 * first 32 16-bit words of the EEPROM to
5465	 * the screen.
5466	 */
5467	if (result == 1) {
5468		adapter = (struct adapter *)arg1;
5469		igb_print_nvm_info(adapter);
5470        }
5471
5472	return (error);
5473}
5474
5475static void
5476igb_print_nvm_info(struct adapter *adapter)
5477{
5478	u16	eeprom_data;
5479	int	i, j, row = 0;
5480
5481	/* Its a bit crude, but it gets the job done */
5482	printf("\nInterface EEPROM Dump:\n");
5483	printf("Offset\n0x0000  ");
5484	for (i = 0, j = 0; i < 32; i++, j++) {
5485		if (j == 8) { /* Make the offset block */
5486			j = 0; ++row;
5487			printf("\n0x00%x0  ",row);
5488		}
5489		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5490		printf("%04x ", eeprom_data);
5491	}
5492	printf("\n");
5493}
5494
5495static void
5496igb_add_rx_process_limit(struct adapter *adapter, const char *name,
5497	const char *description, int *limit, int value)
5498{
5499	*limit = value;
5500	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5501	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5502	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5503}
5504