Deleted Added
full compact
if_igb.c (286162) if_igb.c (286833)
1/******************************************************************************
2
1/******************************************************************************
2
3 Copyright (c) 2001-2013, Intel Corporation
3 Copyright (c) 2001-2015, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 286162 2015-08-01 20:40:37Z hselasky $*/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 286833 2015-08-16 20:13:58Z sbruno $*/
34
35
36#include "opt_inet.h"
37#include "opt_inet6.h"
38#include "opt_rss.h"
39
40#ifdef HAVE_KERNEL_OPTION_HEADERS
41#include "opt_device_polling.h"
42#include "opt_altq.h"
43#endif
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#ifndef IGB_LEGACY_TX
48#include <sys/buf_ring.h>
49#endif
50#include <sys/bus.h>
51#include <sys/endian.h>
52#include <sys/kernel.h>
53#include <sys/kthread.h>
54#include <sys/malloc.h>
55#include <sys/mbuf.h>
56#include <sys/module.h>
57#include <sys/rman.h>
58#include <sys/socket.h>
59#include <sys/sockio.h>
60#include <sys/sysctl.h>
61#include <sys/taskqueue.h>
62#include <sys/eventhandler.h>
63#include <sys/pcpu.h>
64#include <sys/smp.h>
65#include <machine/smp.h>
66#include <machine/bus.h>
67#include <machine/resource.h>
68
69#include <net/bpf.h>
70#include <net/ethernet.h>
71#include <net/if.h>
72#include <net/if_var.h>
73#include <net/if_arp.h>
74#include <net/if_dl.h>
75#include <net/if_media.h>
76#ifdef RSS
77#include <net/rss_config.h>
78#endif
79
80#include <net/if_types.h>
81#include <net/if_vlan_var.h>
82
83#include <netinet/in_systm.h>
84#include <netinet/in.h>
85#include <netinet/if_ether.h>
86#include <netinet/ip.h>
87#include <netinet/ip6.h>
88#include <netinet/tcp.h>
89#include <netinet/tcp_lro.h>
90#include <netinet/udp.h>
91
92#include <machine/in_cksum.h>
93#include <dev/led/led.h>
94#include <dev/pci/pcivar.h>
95#include <dev/pci/pcireg.h>
96
97#include "e1000_api.h"
98#include "e1000_82575.h"
99#include "if_igb.h"
100
101/*********************************************************************
102 * Set this to one to display debug statistics
103 *********************************************************************/
104int igb_display_debug_stats = 0;
105
106/*********************************************************************
107 * Driver version:
108 *********************************************************************/
109char igb_driver_version[] = "version - 2.4.0";
110
111
112/*********************************************************************
113 * PCI Device ID Table
114 *
115 * Used by probe to select devices to load on
116 * Last field stores an index into e1000_strings
117 * Last entry must be all 0s
118 *
119 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
120 *********************************************************************/
121
122static igb_vendor_info_t igb_vendor_info_array[] =
123{
124 { 0x8086, E1000_DEV_ID_82575EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
125 { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
126 PCI_ANY_ID, PCI_ANY_ID, 0},
127 { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
128 PCI_ANY_ID, PCI_ANY_ID, 0},
129 { 0x8086, E1000_DEV_ID_82576, PCI_ANY_ID, PCI_ANY_ID, 0},
130 { 0x8086, E1000_DEV_ID_82576_NS, PCI_ANY_ID, PCI_ANY_ID, 0},
131 { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
132 { 0x8086, E1000_DEV_ID_82576_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
133 { 0x8086, E1000_DEV_ID_82576_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
134 { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
135 PCI_ANY_ID, PCI_ANY_ID, 0},
136 { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
137 PCI_ANY_ID, PCI_ANY_ID, 0},
138 { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
139 PCI_ANY_ID, PCI_ANY_ID, 0},
140 { 0x8086, E1000_DEV_ID_82576_VF, PCI_ANY_ID, PCI_ANY_ID, 0},
141 { 0x8086, E1000_DEV_ID_82580_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
142 { 0x8086, E1000_DEV_ID_82580_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
143 { 0x8086, E1000_DEV_ID_82580_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
144 { 0x8086, E1000_DEV_ID_82580_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0},
145 { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
146 PCI_ANY_ID, PCI_ANY_ID, 0},
147 { 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
148 PCI_ANY_ID, PCI_ANY_ID, 0},
149 { 0x8086, E1000_DEV_ID_DH89XXCC_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
150 { 0x8086, E1000_DEV_ID_DH89XXCC_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0},
151 { 0x8086, E1000_DEV_ID_DH89XXCC_SFP, PCI_ANY_ID, PCI_ANY_ID, 0},
152 { 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
153 PCI_ANY_ID, PCI_ANY_ID, 0},
154 { 0x8086, E1000_DEV_ID_I350_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
155 { 0x8086, E1000_DEV_ID_I350_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
156 { 0x8086, E1000_DEV_ID_I350_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
157 { 0x8086, E1000_DEV_ID_I350_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0},
158 { 0x8086, E1000_DEV_ID_I350_VF, PCI_ANY_ID, PCI_ANY_ID, 0},
159 { 0x8086, E1000_DEV_ID_I210_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
160 { 0x8086, E1000_DEV_ID_I210_COPPER_IT, PCI_ANY_ID, PCI_ANY_ID, 0},
161 { 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
162 PCI_ANY_ID, PCI_ANY_ID, 0},
163 { 0x8086, E1000_DEV_ID_I210_COPPER_FLASHLESS,
164 PCI_ANY_ID, PCI_ANY_ID, 0},
165 { 0x8086, E1000_DEV_ID_I210_SERDES_FLASHLESS,
166 PCI_ANY_ID, PCI_ANY_ID, 0},
167 { 0x8086, E1000_DEV_ID_I210_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
168 { 0x8086, E1000_DEV_ID_I210_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
169 { 0x8086, E1000_DEV_ID_I210_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0},
170 { 0x8086, E1000_DEV_ID_I211_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
171 { 0x8086, E1000_DEV_ID_I354_BACKPLANE_1GBPS,
172 PCI_ANY_ID, PCI_ANY_ID, 0},
173 { 0x8086, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS,
174 PCI_ANY_ID, PCI_ANY_ID, 0},
175 { 0x8086, E1000_DEV_ID_I354_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0},
176 /* required last entry */
177 { 0, 0, 0, 0, 0}
178};
179
180/*********************************************************************
181 * Table of branding strings for all supported NICs.
182 *********************************************************************/
183
184static char *igb_strings[] = {
185 "Intel(R) PRO/1000 Network Connection"
186};
187
188/*********************************************************************
189 * Function prototypes
190 *********************************************************************/
191static int igb_probe(device_t);
192static int igb_attach(device_t);
193static int igb_detach(device_t);
194static int igb_shutdown(device_t);
195static int igb_suspend(device_t);
196static int igb_resume(device_t);
197#ifndef IGB_LEGACY_TX
198static int igb_mq_start(struct ifnet *, struct mbuf *);
199static int igb_mq_start_locked(struct ifnet *, struct tx_ring *);
200static void igb_qflush(struct ifnet *);
201static void igb_deferred_mq_start(void *, int);
202#else
203static void igb_start(struct ifnet *);
204static void igb_start_locked(struct tx_ring *, struct ifnet *ifp);
205#endif
206static int igb_ioctl(struct ifnet *, u_long, caddr_t);
207static uint64_t igb_get_counter(if_t, ift_counter);
208static void igb_init(void *);
209static void igb_init_locked(struct adapter *);
210static void igb_stop(void *);
211static void igb_media_status(struct ifnet *, struct ifmediareq *);
212static int igb_media_change(struct ifnet *);
213static void igb_identify_hardware(struct adapter *);
214static int igb_allocate_pci_resources(struct adapter *);
215static int igb_allocate_msix(struct adapter *);
216static int igb_allocate_legacy(struct adapter *);
217static int igb_setup_msix(struct adapter *);
218static void igb_free_pci_resources(struct adapter *);
219static void igb_local_timer(void *);
220static void igb_reset(struct adapter *);
221static int igb_setup_interface(device_t, struct adapter *);
222static int igb_allocate_queues(struct adapter *);
223static void igb_configure_queues(struct adapter *);
224
225static int igb_allocate_transmit_buffers(struct tx_ring *);
226static void igb_setup_transmit_structures(struct adapter *);
227static void igb_setup_transmit_ring(struct tx_ring *);
228static void igb_initialize_transmit_units(struct adapter *);
229static void igb_free_transmit_structures(struct adapter *);
230static void igb_free_transmit_buffers(struct tx_ring *);
231
232static int igb_allocate_receive_buffers(struct rx_ring *);
233static int igb_setup_receive_structures(struct adapter *);
234static int igb_setup_receive_ring(struct rx_ring *);
235static void igb_initialize_receive_units(struct adapter *);
236static void igb_free_receive_structures(struct adapter *);
237static void igb_free_receive_buffers(struct rx_ring *);
238static void igb_free_receive_ring(struct rx_ring *);
239
240static void igb_enable_intr(struct adapter *);
241static void igb_disable_intr(struct adapter *);
242static void igb_update_stats_counters(struct adapter *);
243static bool igb_txeof(struct tx_ring *);
244
245static __inline void igb_rx_discard(struct rx_ring *, int);
246static __inline void igb_rx_input(struct rx_ring *,
247 struct ifnet *, struct mbuf *, u32);
248
249static bool igb_rxeof(struct igb_queue *, int, int *);
250static void igb_rx_checksum(u32, struct mbuf *, u32);
251static int igb_tx_ctx_setup(struct tx_ring *,
252 struct mbuf *, u32 *, u32 *);
253static int igb_tso_setup(struct tx_ring *,
254 struct mbuf *, u32 *, u32 *);
255static void igb_set_promisc(struct adapter *);
256static void igb_disable_promisc(struct adapter *);
257static void igb_set_multi(struct adapter *);
258static void igb_update_link_status(struct adapter *);
259static void igb_refresh_mbufs(struct rx_ring *, int);
260
261static void igb_register_vlan(void *, struct ifnet *, u16);
262static void igb_unregister_vlan(void *, struct ifnet *, u16);
263static void igb_setup_vlan_hw_support(struct adapter *);
264
265static int igb_xmit(struct tx_ring *, struct mbuf **);
266static int igb_dma_malloc(struct adapter *, bus_size_t,
267 struct igb_dma_alloc *, int);
268static void igb_dma_free(struct adapter *, struct igb_dma_alloc *);
269static int igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
270static void igb_print_nvm_info(struct adapter *);
271static int igb_is_valid_ether_addr(u8 *);
272static void igb_add_hw_stats(struct adapter *);
273
274static void igb_vf_init_stats(struct adapter *);
275static void igb_update_vf_stats_counters(struct adapter *);
276
277/* Management and WOL Support */
278static void igb_init_manageability(struct adapter *);
279static void igb_release_manageability(struct adapter *);
280static void igb_get_hw_control(struct adapter *);
281static void igb_release_hw_control(struct adapter *);
282static void igb_enable_wakeup(device_t);
283static void igb_led_func(void *, int);
284
285static int igb_irq_fast(void *);
286static void igb_msix_que(void *);
287static void igb_msix_link(void *);
288static void igb_handle_que(void *context, int pending);
289static void igb_handle_link(void *context, int pending);
290static void igb_handle_link_locked(struct adapter *);
291
292static void igb_set_sysctl_value(struct adapter *, const char *,
293 const char *, int *, int);
294static int igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
295static int igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
296static int igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
297
298#ifdef DEVICE_POLLING
299static poll_handler_t igb_poll;
300#endif /* POLLING */
301
302/*********************************************************************
303 * FreeBSD Device Interface Entry Points
304 *********************************************************************/
305
306static device_method_t igb_methods[] = {
307 /* Device interface */
308 DEVMETHOD(device_probe, igb_probe),
309 DEVMETHOD(device_attach, igb_attach),
310 DEVMETHOD(device_detach, igb_detach),
311 DEVMETHOD(device_shutdown, igb_shutdown),
312 DEVMETHOD(device_suspend, igb_suspend),
313 DEVMETHOD(device_resume, igb_resume),
314 DEVMETHOD_END
315};
316
317static driver_t igb_driver = {
318 "igb", igb_methods, sizeof(struct adapter),
319};
320
321static devclass_t igb_devclass;
322DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
323MODULE_DEPEND(igb, pci, 1, 1, 1);
324MODULE_DEPEND(igb, ether, 1, 1, 1);
325#ifdef DEV_NETMAP
326MODULE_DEPEND(igb, netmap, 1, 1, 1);
327#endif /* DEV_NETMAP */
328
329/*********************************************************************
330 * Tunable default values.
331 *********************************************************************/
332
333static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
334
335/* Descriptor defaults */
336static int igb_rxd = IGB_DEFAULT_RXD;
337static int igb_txd = IGB_DEFAULT_TXD;
338SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
339 "Number of receive descriptors per queue");
340SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
341 "Number of transmit descriptors per queue");
342
343/*
344** AIM: Adaptive Interrupt Moderation
345** which means that the interrupt rate
346** is varied over time based on the
347** traffic for that interrupt vector
348*/
349static int igb_enable_aim = TRUE;
350SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &igb_enable_aim, 0,
351 "Enable adaptive interrupt moderation");
352
353/*
354 * MSIX should be the default for best performance,
355 * but this allows it to be forced off for testing.
356 */
357static int igb_enable_msix = 1;
358SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
359 "Enable MSI-X interrupts");
360
361/*
362** Tuneable Interrupt rate
363*/
364static int igb_max_interrupt_rate = 8000;
365SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
366 &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
367
368#ifndef IGB_LEGACY_TX
369/*
370** Tuneable number of buffers in the buf-ring (drbr_xxx)
371*/
372static int igb_buf_ring_size = IGB_BR_SIZE;
373SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
374 &igb_buf_ring_size, 0, "Size of the bufring");
375#endif
376
377/*
378** Header split causes the packet header to
379** be dma'd to a seperate mbuf from the payload.
380** this can have memory alignment benefits. But
381** another plus is that small packets often fit
382** into the header and thus use no cluster. Its
383** a very workload dependent type feature.
384*/
385static int igb_header_split = FALSE;
386SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
387 "Enable receive mbuf header split");
388
389/*
390** This will autoconfigure based on the
391** number of CPUs and max supported
392** MSIX messages if left at 0.
393*/
394static int igb_num_queues = 0;
395SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
396 "Number of queues to configure, 0 indicates autoconfigure");
397
398/*
399** Global variable to store last used CPU when binding queues
400** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a
401** queue is bound to a cpu.
402*/
403static int igb_last_bind_cpu = -1;
404
405/* How many packets rxeof tries to clean at a time */
406static int igb_rx_process_limit = 100;
407SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
408 &igb_rx_process_limit, 0,
409 "Maximum number of received packets to process at a time, -1 means unlimited");
410
411#ifdef DEV_NETMAP /* see ixgbe.c for details */
412#include <dev/netmap/if_igb_netmap.h>
413#endif /* DEV_NETMAP */
414/*********************************************************************
415 * Device identification routine
416 *
417 * igb_probe determines if the driver should be loaded on
418 * adapter based on PCI vendor/device id of the adapter.
419 *
420 * return BUS_PROBE_DEFAULT on success, positive on failure
421 *********************************************************************/
422
423static int
424igb_probe(device_t dev)
425{
426 char adapter_name[60];
427 uint16_t pci_vendor_id = 0;
428 uint16_t pci_device_id = 0;
429 uint16_t pci_subvendor_id = 0;
430 uint16_t pci_subdevice_id = 0;
431 igb_vendor_info_t *ent;
432
433 INIT_DEBUGOUT("igb_probe: begin");
434
435 pci_vendor_id = pci_get_vendor(dev);
436 if (pci_vendor_id != IGB_VENDOR_ID)
437 return (ENXIO);
438
439 pci_device_id = pci_get_device(dev);
440 pci_subvendor_id = pci_get_subvendor(dev);
441 pci_subdevice_id = pci_get_subdevice(dev);
442
443 ent = igb_vendor_info_array;
444 while (ent->vendor_id != 0) {
445 if ((pci_vendor_id == ent->vendor_id) &&
446 (pci_device_id == ent->device_id) &&
447
448 ((pci_subvendor_id == ent->subvendor_id) ||
449 (ent->subvendor_id == PCI_ANY_ID)) &&
450
451 ((pci_subdevice_id == ent->subdevice_id) ||
452 (ent->subdevice_id == PCI_ANY_ID))) {
453 sprintf(adapter_name, "%s %s",
454 igb_strings[ent->index],
455 igb_driver_version);
456 device_set_desc_copy(dev, adapter_name);
457 return (BUS_PROBE_DEFAULT);
458 }
459 ent++;
460 }
461
462 return (ENXIO);
463}
464
465/*********************************************************************
466 * Device initialization routine
467 *
468 * The attach entry point is called when the driver is being loaded.
469 * This routine identifies the type of hardware, allocates all resources
470 * and initializes the hardware.
471 *
472 * return 0 on success, positive on failure
473 *********************************************************************/
474
475static int
476igb_attach(device_t dev)
477{
478 struct adapter *adapter;
479 int error = 0;
480 u16 eeprom_data;
481
482 INIT_DEBUGOUT("igb_attach: begin");
483
484 if (resource_disabled("igb", device_get_unit(dev))) {
485 device_printf(dev, "Disabled by device hint\n");
486 return (ENXIO);
487 }
488
489 adapter = device_get_softc(dev);
490 adapter->dev = adapter->osdep.dev = dev;
491 IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
492
493 /* SYSCTL stuff */
494 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
495 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
496 OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
497 igb_sysctl_nvm_info, "I", "NVM Information");
498
499 igb_set_sysctl_value(adapter, "enable_aim",
500 "Interrupt Moderation", &adapter->enable_aim,
501 igb_enable_aim);
502
503 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
504 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
505 OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
506 adapter, 0, igb_set_flowcntl, "I", "Flow Control");
507
508 callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
509
510 /* Determine hardware and mac info */
511 igb_identify_hardware(adapter);
512
513 /* Setup PCI resources */
514 if (igb_allocate_pci_resources(adapter)) {
515 device_printf(dev, "Allocation of PCI resources failed\n");
516 error = ENXIO;
517 goto err_pci;
518 }
519
520 /* Do Shared Code initialization */
521 if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
522 device_printf(dev, "Setup of Shared code failed\n");
523 error = ENXIO;
524 goto err_pci;
525 }
526
527 e1000_get_bus_info(&adapter->hw);
528
529 /* Sysctl for limiting the amount of work done in the taskqueue */
530 igb_set_sysctl_value(adapter, "rx_processing_limit",
531 "max number of rx packets to process",
532 &adapter->rx_process_limit, igb_rx_process_limit);
533
534 /*
535 * Validate number of transmit and receive descriptors. It
536 * must not exceed hardware maximum, and must be multiple
537 * of E1000_DBA_ALIGN.
538 */
539 if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
540 (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
541 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
542 IGB_DEFAULT_TXD, igb_txd);
543 adapter->num_tx_desc = IGB_DEFAULT_TXD;
544 } else
545 adapter->num_tx_desc = igb_txd;
546 if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
547 (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
548 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
549 IGB_DEFAULT_RXD, igb_rxd);
550 adapter->num_rx_desc = IGB_DEFAULT_RXD;
551 } else
552 adapter->num_rx_desc = igb_rxd;
553
554 adapter->hw.mac.autoneg = DO_AUTO_NEG;
555 adapter->hw.phy.autoneg_wait_to_complete = FALSE;
556 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
557
558 /* Copper options */
559 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
560 adapter->hw.phy.mdix = AUTO_ALL_MODES;
561 adapter->hw.phy.disable_polarity_correction = FALSE;
562 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
563 }
564
565 /*
566 * Set the frame limits assuming
567 * standard ethernet sized frames.
568 */
569 adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
570
571 /*
572 ** Allocate and Setup Queues
573 */
574 if (igb_allocate_queues(adapter)) {
575 error = ENOMEM;
576 goto err_pci;
577 }
578
579 /* Allocate the appropriate stats memory */
580 if (adapter->vf_ifp) {
581 adapter->stats =
582 (struct e1000_vf_stats *)malloc(sizeof \
583 (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
584 igb_vf_init_stats(adapter);
585 } else
586 adapter->stats =
587 (struct e1000_hw_stats *)malloc(sizeof \
588 (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
589 if (adapter->stats == NULL) {
590 device_printf(dev, "Can not allocate stats memory\n");
591 error = ENOMEM;
592 goto err_late;
593 }
594
595 /* Allocate multicast array memory. */
596 adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
597 MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
598 if (adapter->mta == NULL) {
599 device_printf(dev, "Can not allocate multicast setup array\n");
600 error = ENOMEM;
601 goto err_late;
602 }
603
604 /* Some adapter-specific advanced features */
605 if (adapter->hw.mac.type >= e1000_i350) {
606 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
607 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
608 OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
609 adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
610 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
611 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
612 OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
613 adapter, 0, igb_sysctl_eee, "I",
614 "Disable Energy Efficient Ethernet");
615 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
616 if (adapter->hw.mac.type == e1000_i354)
617 e1000_set_eee_i354(&adapter->hw);
618 else
619 e1000_set_eee_i350(&adapter->hw);
620 }
621 }
622
623 /*
624 ** Start from a known state, this is
625 ** important in reading the nvm and
626 ** mac from that.
627 */
628 e1000_reset_hw(&adapter->hw);
629
630 /* Make sure we have a good EEPROM before we read from it */
631 if (((adapter->hw.mac.type != e1000_i210) &&
632 (adapter->hw.mac.type != e1000_i211)) &&
633 (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
634 /*
635 ** Some PCI-E parts fail the first check due to
636 ** the link being in sleep state, call it again,
637 ** if it fails a second time its a real issue.
638 */
639 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
640 device_printf(dev,
641 "The EEPROM Checksum Is Not Valid\n");
642 error = EIO;
643 goto err_late;
644 }
645 }
646
647 /*
648 ** Copy the permanent MAC address out of the EEPROM
649 */
650 if (e1000_read_mac_addr(&adapter->hw) < 0) {
651 device_printf(dev, "EEPROM read error while reading MAC"
652 " address\n");
653 error = EIO;
654 goto err_late;
655 }
656 /* Check its sanity */
657 if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
658 device_printf(dev, "Invalid MAC address\n");
659 error = EIO;
660 goto err_late;
661 }
662
663 /* Setup OS specific network interface */
664 if (igb_setup_interface(dev, adapter) != 0)
665 goto err_late;
666
667 /* Now get a good starting state */
668 igb_reset(adapter);
669
670 /* Initialize statistics */
671 igb_update_stats_counters(adapter);
672
673 adapter->hw.mac.get_link_status = 1;
674 igb_update_link_status(adapter);
675
676 /* Indicate SOL/IDER usage */
677 if (e1000_check_reset_block(&adapter->hw))
678 device_printf(dev,
679 "PHY reset is blocked due to SOL/IDER session.\n");
680
681 /* Determine if we have to control management hardware */
682 adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
683
684 /*
685 * Setup Wake-on-Lan
686 */
687 /* APME bit in EEPROM is mapped to WUC.APME */
688 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
689 if (eeprom_data)
690 adapter->wol = E1000_WUFC_MAG;
691
692 /* Register for VLAN events */
693 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
694 igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
695 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
696 igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
697
698 igb_add_hw_stats(adapter);
699
700 /* Tell the stack that the interface is not active */
701 adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
702 adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
703
704 adapter->led_dev = led_create(igb_led_func, adapter,
705 device_get_nameunit(dev));
706
707 /*
708 ** Configure Interrupts
709 */
710 if ((adapter->msix > 1) && (igb_enable_msix))
711 error = igb_allocate_msix(adapter);
712 else /* MSI or Legacy */
713 error = igb_allocate_legacy(adapter);
714 if (error)
715 goto err_late;
716
717#ifdef DEV_NETMAP
718 igb_netmap_attach(adapter);
719#endif /* DEV_NETMAP */
720 INIT_DEBUGOUT("igb_attach: end");
721
722 return (0);
723
724err_late:
725 igb_detach(dev);
726 igb_free_transmit_structures(adapter);
727 igb_free_receive_structures(adapter);
728 igb_release_hw_control(adapter);
729err_pci:
730 igb_free_pci_resources(adapter);
731 if (adapter->ifp != NULL)
732 if_free(adapter->ifp);
733 free(adapter->mta, M_DEVBUF);
734 IGB_CORE_LOCK_DESTROY(adapter);
735
736 return (error);
737}
738
739/*********************************************************************
740 * Device removal routine
741 *
742 * The detach entry point is called when the driver is being removed.
743 * This routine stops the adapter and deallocates all the resources
744 * that were allocated for driver operation.
745 *
746 * return 0 on success, positive on failure
747 *********************************************************************/
748
749static int
750igb_detach(device_t dev)
751{
752 struct adapter *adapter = device_get_softc(dev);
753 struct ifnet *ifp = adapter->ifp;
754
755 INIT_DEBUGOUT("igb_detach: begin");
756
757 /* Make sure VLANS are not using driver */
758 if (adapter->ifp->if_vlantrunk != NULL) {
759 device_printf(dev,"Vlan in use, detach first\n");
760 return (EBUSY);
761 }
762
763 ether_ifdetach(adapter->ifp);
764
765 if (adapter->led_dev != NULL)
766 led_destroy(adapter->led_dev);
767
768#ifdef DEVICE_POLLING
769 if (ifp->if_capenable & IFCAP_POLLING)
770 ether_poll_deregister(ifp);
771#endif
772
773 IGB_CORE_LOCK(adapter);
774 adapter->in_detach = 1;
775 igb_stop(adapter);
776 IGB_CORE_UNLOCK(adapter);
777
778 e1000_phy_hw_reset(&adapter->hw);
779
780 /* Give control back to firmware */
781 igb_release_manageability(adapter);
782 igb_release_hw_control(adapter);
783
784 if (adapter->wol) {
785 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
786 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
787 igb_enable_wakeup(dev);
788 }
789
790 /* Unregister VLAN events */
791 if (adapter->vlan_attach != NULL)
792 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
793 if (adapter->vlan_detach != NULL)
794 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
795
796 callout_drain(&adapter->timer);
797
798#ifdef DEV_NETMAP
799 netmap_detach(adapter->ifp);
800#endif /* DEV_NETMAP */
801 igb_free_pci_resources(adapter);
802 bus_generic_detach(dev);
803 if_free(ifp);
804
805 igb_free_transmit_structures(adapter);
806 igb_free_receive_structures(adapter);
807 if (adapter->mta != NULL)
808 free(adapter->mta, M_DEVBUF);
809
810 IGB_CORE_LOCK_DESTROY(adapter);
811
812 return (0);
813}
814
815/*********************************************************************
816 *
817 * Shutdown entry point
818 *
819 **********************************************************************/
820
821static int
822igb_shutdown(device_t dev)
823{
824 return igb_suspend(dev);
825}
826
827/*
828 * Suspend/resume device methods.
829 */
830static int
831igb_suspend(device_t dev)
832{
833 struct adapter *adapter = device_get_softc(dev);
834
835 IGB_CORE_LOCK(adapter);
836
837 igb_stop(adapter);
838
839 igb_release_manageability(adapter);
840 igb_release_hw_control(adapter);
841
842 if (adapter->wol) {
843 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
844 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
845 igb_enable_wakeup(dev);
846 }
847
848 IGB_CORE_UNLOCK(adapter);
849
850 return bus_generic_suspend(dev);
851}
852
853static int
854igb_resume(device_t dev)
855{
856 struct adapter *adapter = device_get_softc(dev);
857 struct tx_ring *txr = adapter->tx_rings;
858 struct ifnet *ifp = adapter->ifp;
859
860 IGB_CORE_LOCK(adapter);
861 igb_init_locked(adapter);
862 igb_init_manageability(adapter);
863
864 if ((ifp->if_flags & IFF_UP) &&
865 (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
866 for (int i = 0; i < adapter->num_queues; i++, txr++) {
867 IGB_TX_LOCK(txr);
868#ifndef IGB_LEGACY_TX
869 /* Process the stack queue only if not depleted */
870 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
871 !drbr_empty(ifp, txr->br))
872 igb_mq_start_locked(ifp, txr);
873#else
874 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
875 igb_start_locked(txr, ifp);
876#endif
877 IGB_TX_UNLOCK(txr);
878 }
879 }
880 IGB_CORE_UNLOCK(adapter);
881
882 return bus_generic_resume(dev);
883}
884
885
886#ifdef IGB_LEGACY_TX
887
888/*********************************************************************
889 * Transmit entry point
890 *
891 * igb_start is called by the stack to initiate a transmit.
892 * The driver will remain in this routine as long as there are
893 * packets to transmit and transmit resources are available.
894 * In case resources are not available stack is notified and
895 * the packet is requeued.
896 **********************************************************************/
897
898static void
899igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
900{
901 struct adapter *adapter = ifp->if_softc;
902 struct mbuf *m_head;
903
904 IGB_TX_LOCK_ASSERT(txr);
905
906 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
907 IFF_DRV_RUNNING)
908 return;
909 if (!adapter->link_active)
910 return;
911
912 /* Call cleanup if number of TX descriptors low */
913 if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
914 igb_txeof(txr);
915
916 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
917 if (txr->tx_avail <= IGB_MAX_SCATTER) {
918 txr->queue_status |= IGB_QUEUE_DEPLETED;
919 break;
920 }
921 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
922 if (m_head == NULL)
923 break;
924 /*
925 * Encapsulation can modify our pointer, and or make it
926 * NULL on failure. In that event, we can't requeue.
927 */
928 if (igb_xmit(txr, &m_head)) {
929 if (m_head != NULL)
930 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
931 if (txr->tx_avail <= IGB_MAX_SCATTER)
932 txr->queue_status |= IGB_QUEUE_DEPLETED;
933 break;
934 }
935
936 /* Send a copy of the frame to the BPF listener */
937 ETHER_BPF_MTAP(ifp, m_head);
938
939 /* Set watchdog on */
940 txr->watchdog_time = ticks;
941 txr->queue_status |= IGB_QUEUE_WORKING;
942 }
943}
944
945/*
946 * Legacy TX driver routine, called from the
947 * stack, always uses tx[0], and spins for it.
948 * Should not be used with multiqueue tx
949 */
950static void
951igb_start(struct ifnet *ifp)
952{
953 struct adapter *adapter = ifp->if_softc;
954 struct tx_ring *txr = adapter->tx_rings;
955
956 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
957 IGB_TX_LOCK(txr);
958 igb_start_locked(txr, ifp);
959 IGB_TX_UNLOCK(txr);
960 }
961 return;
962}
963
964#else /* ~IGB_LEGACY_TX */
965
966/*
967** Multiqueue Transmit Entry:
968** quick turnaround to the stack
969**
970*/
971static int
972igb_mq_start(struct ifnet *ifp, struct mbuf *m)
973{
974 struct adapter *adapter = ifp->if_softc;
975 struct igb_queue *que;
976 struct tx_ring *txr;
977 int i, err = 0;
978#ifdef RSS
979 uint32_t bucket_id;
980#endif
981
982 /* Which queue to use */
983 /*
984 * When doing RSS, map it to the same outbound queue
985 * as the incoming flow would be mapped to.
986 *
987 * If everything is setup correctly, it should be the
988 * same bucket that the current CPU we're on is.
989 */
990 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
991#ifdef RSS
992 if (rss_hash2bucket(m->m_pkthdr.flowid,
993 M_HASHTYPE_GET(m), &bucket_id) == 0) {
994 /* XXX TODO: spit out something if bucket_id > num_queues? */
995 i = bucket_id % adapter->num_queues;
996 } else {
997#endif
998 i = m->m_pkthdr.flowid % adapter->num_queues;
999#ifdef RSS
1000 }
1001#endif
1002 } else {
1003 i = curcpu % adapter->num_queues;
1004 }
1005 txr = &adapter->tx_rings[i];
1006 que = &adapter->queues[i];
1007
1008 err = drbr_enqueue(ifp, txr->br, m);
1009 if (err)
1010 return (err);
1011 if (IGB_TX_TRYLOCK(txr)) {
1012 igb_mq_start_locked(ifp, txr);
1013 IGB_TX_UNLOCK(txr);
1014 } else
1015 taskqueue_enqueue(que->tq, &txr->txq_task);
1016
1017 return (0);
1018}
1019
1020static int
1021igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1022{
1023 struct adapter *adapter = txr->adapter;
1024 struct mbuf *next;
1025 int err = 0, enq = 0;
1026
1027 IGB_TX_LOCK_ASSERT(txr);
1028
1029 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
1030 adapter->link_active == 0)
1031 return (ENETDOWN);
1032
1033
1034 /* Process the queue */
1035 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1036 if ((err = igb_xmit(txr, &next)) != 0) {
1037 if (next == NULL) {
1038 /* It was freed, move forward */
1039 drbr_advance(ifp, txr->br);
1040 } else {
1041 /*
1042 * Still have one left, it may not be
1043 * the same since the transmit function
1044 * may have changed it.
1045 */
1046 drbr_putback(ifp, txr->br, next);
1047 }
1048 break;
1049 }
1050 drbr_advance(ifp, txr->br);
1051 enq++;
1052 if (next->m_flags & M_MCAST && adapter->vf_ifp)
1053 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1054 ETHER_BPF_MTAP(ifp, next);
1055 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1056 break;
1057 }
1058 if (enq > 0) {
1059 /* Set the watchdog */
1060 txr->queue_status |= IGB_QUEUE_WORKING;
1061 txr->watchdog_time = ticks;
1062 }
1063 if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1064 igb_txeof(txr);
1065 if (txr->tx_avail <= IGB_MAX_SCATTER)
1066 txr->queue_status |= IGB_QUEUE_DEPLETED;
1067 return (err);
1068}
1069
1070/*
1071 * Called from a taskqueue to drain queued transmit packets.
1072 */
1073static void
1074igb_deferred_mq_start(void *arg, int pending)
1075{
1076 struct tx_ring *txr = arg;
1077 struct adapter *adapter = txr->adapter;
1078 struct ifnet *ifp = adapter->ifp;
1079
1080 IGB_TX_LOCK(txr);
1081 if (!drbr_empty(ifp, txr->br))
1082 igb_mq_start_locked(ifp, txr);
1083 IGB_TX_UNLOCK(txr);
1084}
1085
1086/*
1087** Flush all ring buffers
1088*/
1089static void
1090igb_qflush(struct ifnet *ifp)
1091{
1092 struct adapter *adapter = ifp->if_softc;
1093 struct tx_ring *txr = adapter->tx_rings;
1094 struct mbuf *m;
1095
1096 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1097 IGB_TX_LOCK(txr);
1098 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1099 m_freem(m);
1100 IGB_TX_UNLOCK(txr);
1101 }
1102 if_qflush(ifp);
1103}
1104#endif /* ~IGB_LEGACY_TX */
1105
1106/*********************************************************************
1107 * Ioctl entry point
1108 *
1109 * igb_ioctl is called when the user wants to configure the
1110 * interface.
1111 *
1112 * return 0 on success, positive on failure
1113 **********************************************************************/
1114
1115static int
1116igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1117{
1118 struct adapter *adapter = ifp->if_softc;
1119 struct ifreq *ifr = (struct ifreq *)data;
1120#if defined(INET) || defined(INET6)
1121 struct ifaddr *ifa = (struct ifaddr *)data;
1122#endif
1123 bool avoid_reset = FALSE;
1124 int error = 0;
1125
1126 if (adapter->in_detach)
1127 return (error);
1128
1129 switch (command) {
1130 case SIOCSIFADDR:
1131#ifdef INET
1132 if (ifa->ifa_addr->sa_family == AF_INET)
1133 avoid_reset = TRUE;
1134#endif
1135#ifdef INET6
1136 if (ifa->ifa_addr->sa_family == AF_INET6)
1137 avoid_reset = TRUE;
1138#endif
1139 /*
1140 ** Calling init results in link renegotiation,
1141 ** so we avoid doing it when possible.
1142 */
1143 if (avoid_reset) {
1144 ifp->if_flags |= IFF_UP;
1145 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1146 igb_init(adapter);
1147#ifdef INET
1148 if (!(ifp->if_flags & IFF_NOARP))
1149 arp_ifinit(ifp, ifa);
1150#endif
1151 } else
1152 error = ether_ioctl(ifp, command, data);
1153 break;
1154 case SIOCSIFMTU:
1155 {
1156 int max_frame_size;
1157
1158 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1159
1160 IGB_CORE_LOCK(adapter);
1161 max_frame_size = 9234;
1162 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1163 ETHER_CRC_LEN) {
1164 IGB_CORE_UNLOCK(adapter);
1165 error = EINVAL;
1166 break;
1167 }
1168
1169 ifp->if_mtu = ifr->ifr_mtu;
1170 adapter->max_frame_size =
1171 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1172 igb_init_locked(adapter);
1173 IGB_CORE_UNLOCK(adapter);
1174 break;
1175 }
1176 case SIOCSIFFLAGS:
1177 IOCTL_DEBUGOUT("ioctl rcv'd:\
1178 SIOCSIFFLAGS (Set Interface Flags)");
1179 IGB_CORE_LOCK(adapter);
1180 if (ifp->if_flags & IFF_UP) {
1181 if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1182 if ((ifp->if_flags ^ adapter->if_flags) &
1183 (IFF_PROMISC | IFF_ALLMULTI)) {
1184 igb_disable_promisc(adapter);
1185 igb_set_promisc(adapter);
1186 }
1187 } else
1188 igb_init_locked(adapter);
1189 } else
1190 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1191 igb_stop(adapter);
1192 adapter->if_flags = ifp->if_flags;
1193 IGB_CORE_UNLOCK(adapter);
1194 break;
1195 case SIOCADDMULTI:
1196 case SIOCDELMULTI:
1197 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1198 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1199 IGB_CORE_LOCK(adapter);
1200 igb_disable_intr(adapter);
1201 igb_set_multi(adapter);
1202#ifdef DEVICE_POLLING
1203 if (!(ifp->if_capenable & IFCAP_POLLING))
1204#endif
1205 igb_enable_intr(adapter);
1206 IGB_CORE_UNLOCK(adapter);
1207 }
1208 break;
1209 case SIOCSIFMEDIA:
1210 /* Check SOL/IDER usage */
1211 IGB_CORE_LOCK(adapter);
1212 if (e1000_check_reset_block(&adapter->hw)) {
1213 IGB_CORE_UNLOCK(adapter);
1214 device_printf(adapter->dev, "Media change is"
1215 " blocked due to SOL/IDER session.\n");
1216 break;
1217 }
1218 IGB_CORE_UNLOCK(adapter);
1219 case SIOCGIFMEDIA:
1220 IOCTL_DEBUGOUT("ioctl rcv'd: \
1221 SIOCxIFMEDIA (Get/Set Interface Media)");
1222 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1223 break;
1224 case SIOCSIFCAP:
1225 {
1226 int mask, reinit;
1227
1228 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1229 reinit = 0;
1230 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1231#ifdef DEVICE_POLLING
1232 if (mask & IFCAP_POLLING) {
1233 if (ifr->ifr_reqcap & IFCAP_POLLING) {
1234 error = ether_poll_register(igb_poll, ifp);
1235 if (error)
1236 return (error);
1237 IGB_CORE_LOCK(adapter);
1238 igb_disable_intr(adapter);
1239 ifp->if_capenable |= IFCAP_POLLING;
1240 IGB_CORE_UNLOCK(adapter);
1241 } else {
1242 error = ether_poll_deregister(ifp);
1243 /* Enable interrupt even in error case */
1244 IGB_CORE_LOCK(adapter);
1245 igb_enable_intr(adapter);
1246 ifp->if_capenable &= ~IFCAP_POLLING;
1247 IGB_CORE_UNLOCK(adapter);
1248 }
1249 }
1250#endif
1251 if (mask & IFCAP_HWCSUM) {
1252 ifp->if_capenable ^= IFCAP_HWCSUM;
1253 reinit = 1;
1254 }
1255 if (mask & IFCAP_TSO4) {
1256 ifp->if_capenable ^= IFCAP_TSO4;
1257 reinit = 1;
1258 }
1259 if (mask & IFCAP_TSO6) {
1260 ifp->if_capenable ^= IFCAP_TSO6;
1261 reinit = 1;
1262 }
1263 if (mask & IFCAP_VLAN_HWTAGGING) {
1264 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1265 reinit = 1;
1266 }
1267 if (mask & IFCAP_VLAN_HWFILTER) {
1268 ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1269 reinit = 1;
1270 }
1271 if (mask & IFCAP_VLAN_HWTSO) {
1272 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1273 reinit = 1;
1274 }
1275 if (mask & IFCAP_LRO) {
1276 ifp->if_capenable ^= IFCAP_LRO;
1277 reinit = 1;
1278 }
1279 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1280 igb_init(adapter);
1281 VLAN_CAPABILITIES(ifp);
1282 break;
1283 }
1284
1285 default:
1286 error = ether_ioctl(ifp, command, data);
1287 break;
1288 }
1289
1290 return (error);
1291}
1292
1293
1294/*********************************************************************
1295 * Init entry point
1296 *
1297 * This routine is used in two ways. It is used by the stack as
1298 * init entry point in network interface structure. It is also used
1299 * by the driver as a hw/sw initialization routine to get to a
1300 * consistent state.
1301 *
1302 * return 0 on success, positive on failure
1303 **********************************************************************/
1304
1305static void
1306igb_init_locked(struct adapter *adapter)
1307{
1308 struct ifnet *ifp = adapter->ifp;
1309 device_t dev = adapter->dev;
1310
1311 INIT_DEBUGOUT("igb_init: begin");
1312
1313 IGB_CORE_LOCK_ASSERT(adapter);
1314
1315 igb_disable_intr(adapter);
1316 callout_stop(&adapter->timer);
1317
1318 /* Get the latest mac address, User can use a LAA */
1319 bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1320 ETHER_ADDR_LEN);
1321
1322 /* Put the address into the Receive Address Array */
1323 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1324
1325 igb_reset(adapter);
1326 igb_update_link_status(adapter);
1327
1328 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1329
1330 /* Set hardware offload abilities */
1331 ifp->if_hwassist = 0;
1332 if (ifp->if_capenable & IFCAP_TXCSUM) {
1333 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1334#if __FreeBSD_version >= 800000
1335 if (adapter->hw.mac.type == e1000_82576)
1336 ifp->if_hwassist |= CSUM_SCTP;
1337#endif
1338 }
1339
1340 if (ifp->if_capenable & IFCAP_TSO)
1341 ifp->if_hwassist |= CSUM_TSO;
1342
1343 /* Configure for OS presence */
1344 igb_init_manageability(adapter);
1345
1346 /* Prepare transmit descriptors and buffers */
1347 igb_setup_transmit_structures(adapter);
1348 igb_initialize_transmit_units(adapter);
1349
1350 /* Setup Multicast table */
1351 igb_set_multi(adapter);
1352
1353 /*
1354 ** Figure out the desired mbuf pool
1355 ** for doing jumbo/packetsplit
1356 */
1357 if (adapter->max_frame_size <= 2048)
1358 adapter->rx_mbuf_sz = MCLBYTES;
1359 else if (adapter->max_frame_size <= 4096)
1360 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1361 else
1362 adapter->rx_mbuf_sz = MJUM9BYTES;
1363
1364 /* Prepare receive descriptors and buffers */
1365 if (igb_setup_receive_structures(adapter)) {
1366 device_printf(dev, "Could not setup receive structures\n");
1367 return;
1368 }
1369 igb_initialize_receive_units(adapter);
1370
1371 /* Enable VLAN support */
1372 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1373 igb_setup_vlan_hw_support(adapter);
1374
1375 /* Don't lose promiscuous settings */
1376 igb_set_promisc(adapter);
1377
1378 ifp->if_drv_flags |= IFF_DRV_RUNNING;
1379 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1380
1381 callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1382 e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1383
1384 if (adapter->msix > 1) /* Set up queue routing */
1385 igb_configure_queues(adapter);
1386
1387 /* this clears any pending interrupts */
1388 E1000_READ_REG(&adapter->hw, E1000_ICR);
1389#ifdef DEVICE_POLLING
1390 /*
1391 * Only enable interrupts if we are not polling, make sure
1392 * they are off otherwise.
1393 */
1394 if (ifp->if_capenable & IFCAP_POLLING)
1395 igb_disable_intr(adapter);
1396 else
1397#endif /* DEVICE_POLLING */
1398 {
1399 igb_enable_intr(adapter);
1400 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1401 }
1402
1403 /* Set Energy Efficient Ethernet */
1404 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
1405 if (adapter->hw.mac.type == e1000_i354)
1406 e1000_set_eee_i354(&adapter->hw);
1407 else
1408 e1000_set_eee_i350(&adapter->hw);
1409 }
1410}
1411
1412static void
1413igb_init(void *arg)
1414{
1415 struct adapter *adapter = arg;
1416
1417 IGB_CORE_LOCK(adapter);
1418 igb_init_locked(adapter);
1419 IGB_CORE_UNLOCK(adapter);
1420}
1421
1422
1423static void
1424igb_handle_que(void *context, int pending)
1425{
1426 struct igb_queue *que = context;
1427 struct adapter *adapter = que->adapter;
1428 struct tx_ring *txr = que->txr;
1429 struct ifnet *ifp = adapter->ifp;
1430
1431 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1432 bool more;
1433
1434 more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1435
1436 IGB_TX_LOCK(txr);
1437 igb_txeof(txr);
1438#ifndef IGB_LEGACY_TX
1439 /* Process the stack queue only if not depleted */
1440 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1441 !drbr_empty(ifp, txr->br))
1442 igb_mq_start_locked(ifp, txr);
1443#else
1444 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1445 igb_start_locked(txr, ifp);
1446#endif
1447 IGB_TX_UNLOCK(txr);
1448 /* Do we need another? */
1449 if (more) {
1450 taskqueue_enqueue(que->tq, &que->que_task);
1451 return;
1452 }
1453 }
1454
1455#ifdef DEVICE_POLLING
1456 if (ifp->if_capenable & IFCAP_POLLING)
1457 return;
1458#endif
1459 /* Reenable this interrupt */
1460 if (que->eims)
1461 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1462 else
1463 igb_enable_intr(adapter);
1464}
1465
1466/* Deal with link in a sleepable context */
1467static void
1468igb_handle_link(void *context, int pending)
1469{
1470 struct adapter *adapter = context;
1471
1472 IGB_CORE_LOCK(adapter);
1473 igb_handle_link_locked(adapter);
1474 IGB_CORE_UNLOCK(adapter);
1475}
1476
1477static void
1478igb_handle_link_locked(struct adapter *adapter)
1479{
1480 struct tx_ring *txr = adapter->tx_rings;
1481 struct ifnet *ifp = adapter->ifp;
1482
1483 IGB_CORE_LOCK_ASSERT(adapter);
1484 adapter->hw.mac.get_link_status = 1;
1485 igb_update_link_status(adapter);
1486 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1487 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1488 IGB_TX_LOCK(txr);
1489#ifndef IGB_LEGACY_TX
1490 /* Process the stack queue only if not depleted */
1491 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1492 !drbr_empty(ifp, txr->br))
1493 igb_mq_start_locked(ifp, txr);
1494#else
1495 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1496 igb_start_locked(txr, ifp);
1497#endif
1498 IGB_TX_UNLOCK(txr);
1499 }
1500 }
1501}
1502
1503/*********************************************************************
1504 *
1505 * MSI/Legacy Deferred
1506 * Interrupt Service routine
1507 *
1508 *********************************************************************/
1509static int
1510igb_irq_fast(void *arg)
1511{
1512 struct adapter *adapter = arg;
1513 struct igb_queue *que = adapter->queues;
1514 u32 reg_icr;
1515
1516
1517 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1518
1519 /* Hot eject? */
1520 if (reg_icr == 0xffffffff)
1521 return FILTER_STRAY;
1522
1523 /* Definitely not our interrupt. */
1524 if (reg_icr == 0x0)
1525 return FILTER_STRAY;
1526
1527 if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1528 return FILTER_STRAY;
1529
1530 /*
1531 * Mask interrupts until the taskqueue is finished running. This is
1532 * cheap, just assume that it is needed. This also works around the
1533 * MSI message reordering errata on certain systems.
1534 */
1535 igb_disable_intr(adapter);
1536 taskqueue_enqueue(que->tq, &que->que_task);
1537
1538 /* Link status change */
1539 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1540 taskqueue_enqueue(que->tq, &adapter->link_task);
1541
1542 if (reg_icr & E1000_ICR_RXO)
1543 adapter->rx_overruns++;
1544 return FILTER_HANDLED;
1545}
1546
1547#ifdef DEVICE_POLLING
1548#if __FreeBSD_version >= 800000
1549#define POLL_RETURN_COUNT(a) (a)
1550static int
1551#else
1552#define POLL_RETURN_COUNT(a)
1553static void
1554#endif
1555igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1556{
1557 struct adapter *adapter = ifp->if_softc;
1558 struct igb_queue *que;
1559 struct tx_ring *txr;
1560 u32 reg_icr, rx_done = 0;
1561 u32 loop = IGB_MAX_LOOP;
1562 bool more;
1563
1564 IGB_CORE_LOCK(adapter);
1565 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1566 IGB_CORE_UNLOCK(adapter);
1567 return POLL_RETURN_COUNT(rx_done);
1568 }
1569
1570 if (cmd == POLL_AND_CHECK_STATUS) {
1571 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1572 /* Link status change */
1573 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1574 igb_handle_link_locked(adapter);
1575
1576 if (reg_icr & E1000_ICR_RXO)
1577 adapter->rx_overruns++;
1578 }
1579 IGB_CORE_UNLOCK(adapter);
1580
1581 for (int i = 0; i < adapter->num_queues; i++) {
1582 que = &adapter->queues[i];
1583 txr = que->txr;
1584
1585 igb_rxeof(que, count, &rx_done);
1586
1587 IGB_TX_LOCK(txr);
1588 do {
1589 more = igb_txeof(txr);
1590 } while (loop-- && more);
1591#ifndef IGB_LEGACY_TX
1592 if (!drbr_empty(ifp, txr->br))
1593 igb_mq_start_locked(ifp, txr);
1594#else
1595 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1596 igb_start_locked(txr, ifp);
1597#endif
1598 IGB_TX_UNLOCK(txr);
1599 }
1600
1601 return POLL_RETURN_COUNT(rx_done);
1602}
1603#endif /* DEVICE_POLLING */
1604
1605/*********************************************************************
1606 *
1607 * MSIX Que Interrupt Service routine
1608 *
1609 **********************************************************************/
1610static void
1611igb_msix_que(void *arg)
1612{
1613 struct igb_queue *que = arg;
1614 struct adapter *adapter = que->adapter;
1615 struct ifnet *ifp = adapter->ifp;
1616 struct tx_ring *txr = que->txr;
1617 struct rx_ring *rxr = que->rxr;
1618 u32 newitr = 0;
1619 bool more_rx;
1620
1621 /* Ignore spurious interrupts */
1622 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1623 return;
1624
1625 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1626 ++que->irqs;
1627
1628 IGB_TX_LOCK(txr);
1629 igb_txeof(txr);
1630#ifndef IGB_LEGACY_TX
1631 /* Process the stack queue only if not depleted */
1632 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1633 !drbr_empty(ifp, txr->br))
1634 igb_mq_start_locked(ifp, txr);
1635#else
1636 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1637 igb_start_locked(txr, ifp);
1638#endif
1639 IGB_TX_UNLOCK(txr);
1640
1641 more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1642
1643 if (adapter->enable_aim == FALSE)
1644 goto no_calc;
1645 /*
1646 ** Do Adaptive Interrupt Moderation:
1647 ** - Write out last calculated setting
1648 ** - Calculate based on average size over
1649 ** the last interval.
1650 */
1651 if (que->eitr_setting)
1652 E1000_WRITE_REG(&adapter->hw,
1653 E1000_EITR(que->msix), que->eitr_setting);
1654
1655 que->eitr_setting = 0;
1656
1657 /* Idle, do nothing */
1658 if ((txr->bytes == 0) && (rxr->bytes == 0))
1659 goto no_calc;
1660
1661 /* Used half Default if sub-gig */
1662 if (adapter->link_speed != 1000)
1663 newitr = IGB_DEFAULT_ITR / 2;
1664 else {
1665 if ((txr->bytes) && (txr->packets))
1666 newitr = txr->bytes/txr->packets;
1667 if ((rxr->bytes) && (rxr->packets))
1668 newitr = max(newitr,
1669 (rxr->bytes / rxr->packets));
1670 newitr += 24; /* account for hardware frame, crc */
1671 /* set an upper boundary */
1672 newitr = min(newitr, 3000);
1673 /* Be nice to the mid range */
1674 if ((newitr > 300) && (newitr < 1200))
1675 newitr = (newitr / 3);
1676 else
1677 newitr = (newitr / 2);
1678 }
1679 newitr &= 0x7FFC; /* Mask invalid bits */
1680 if (adapter->hw.mac.type == e1000_82575)
1681 newitr |= newitr << 16;
1682 else
1683 newitr |= E1000_EITR_CNT_IGNR;
1684
1685 /* save for next interrupt */
1686 que->eitr_setting = newitr;
1687
1688 /* Reset state */
1689 txr->bytes = 0;
1690 txr->packets = 0;
1691 rxr->bytes = 0;
1692 rxr->packets = 0;
1693
1694no_calc:
1695 /* Schedule a clean task if needed*/
1696 if (more_rx)
1697 taskqueue_enqueue(que->tq, &que->que_task);
1698 else
1699 /* Reenable this interrupt */
1700 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1701 return;
1702}
1703
1704
1705/*********************************************************************
1706 *
1707 * MSIX Link Interrupt Service routine
1708 *
1709 **********************************************************************/
1710
1711static void
1712igb_msix_link(void *arg)
1713{
1714 struct adapter *adapter = arg;
1715 u32 icr;
1716
1717 ++adapter->link_irq;
1718 icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1719 if (!(icr & E1000_ICR_LSC))
1720 goto spurious;
1721 igb_handle_link(adapter, 0);
1722
1723spurious:
1724 /* Rearm */
1725 E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1726 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1727 return;
1728}
1729
1730
1731/*********************************************************************
1732 *
1733 * Media Ioctl callback
1734 *
1735 * This routine is called whenever the user queries the status of
1736 * the interface using ifconfig.
1737 *
1738 **********************************************************************/
1739static void
1740igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1741{
1742 struct adapter *adapter = ifp->if_softc;
1743
1744 INIT_DEBUGOUT("igb_media_status: begin");
1745
1746 IGB_CORE_LOCK(adapter);
1747 igb_update_link_status(adapter);
1748
1749 ifmr->ifm_status = IFM_AVALID;
1750 ifmr->ifm_active = IFM_ETHER;
1751
1752 if (!adapter->link_active) {
1753 IGB_CORE_UNLOCK(adapter);
1754 return;
1755 }
1756
1757 ifmr->ifm_status |= IFM_ACTIVE;
1758
1759 switch (adapter->link_speed) {
1760 case 10:
1761 ifmr->ifm_active |= IFM_10_T;
1762 break;
1763 case 100:
1764 /*
1765 ** Support for 100Mb SFP - these are Fiber
1766 ** but the media type appears as serdes
1767 */
1768 if (adapter->hw.phy.media_type ==
1769 e1000_media_type_internal_serdes)
1770 ifmr->ifm_active |= IFM_100_FX;
1771 else
1772 ifmr->ifm_active |= IFM_100_TX;
1773 break;
1774 case 1000:
1775 ifmr->ifm_active |= IFM_1000_T;
1776 break;
1777 case 2500:
1778 ifmr->ifm_active |= IFM_2500_SX;
1779 break;
1780 }
1781
1782 if (adapter->link_duplex == FULL_DUPLEX)
1783 ifmr->ifm_active |= IFM_FDX;
1784 else
1785 ifmr->ifm_active |= IFM_HDX;
1786
1787 IGB_CORE_UNLOCK(adapter);
1788}
1789
1790/*********************************************************************
1791 *
1792 * Media Ioctl callback
1793 *
1794 * This routine is called when the user changes speed/duplex using
1795 * media/mediopt option with ifconfig.
1796 *
1797 **********************************************************************/
1798static int
1799igb_media_change(struct ifnet *ifp)
1800{
1801 struct adapter *adapter = ifp->if_softc;
1802 struct ifmedia *ifm = &adapter->media;
1803
1804 INIT_DEBUGOUT("igb_media_change: begin");
1805
1806 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1807 return (EINVAL);
1808
1809 IGB_CORE_LOCK(adapter);
1810 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1811 case IFM_AUTO:
1812 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1813 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1814 break;
1815 case IFM_1000_LX:
1816 case IFM_1000_SX:
1817 case IFM_1000_T:
1818 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1819 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1820 break;
1821 case IFM_100_TX:
1822 adapter->hw.mac.autoneg = FALSE;
1823 adapter->hw.phy.autoneg_advertised = 0;
1824 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1825 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1826 else
1827 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1828 break;
1829 case IFM_10_T:
1830 adapter->hw.mac.autoneg = FALSE;
1831 adapter->hw.phy.autoneg_advertised = 0;
1832 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1833 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1834 else
1835 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1836 break;
1837 default:
1838 device_printf(adapter->dev, "Unsupported media type\n");
1839 }
1840
1841 igb_init_locked(adapter);
1842 IGB_CORE_UNLOCK(adapter);
1843
1844 return (0);
1845}
1846
1847
1848/*********************************************************************
1849 *
1850 * This routine maps the mbufs to Advanced TX descriptors.
1851 *
1852 **********************************************************************/
1853static int
1854igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1855{
1856 struct adapter *adapter = txr->adapter;
1857 u32 olinfo_status = 0, cmd_type_len;
1858 int i, j, error, nsegs;
1859 int first;
1860 bool remap = TRUE;
1861 struct mbuf *m_head;
1862 bus_dma_segment_t segs[IGB_MAX_SCATTER];
1863 bus_dmamap_t map;
1864 struct igb_tx_buf *txbuf;
1865 union e1000_adv_tx_desc *txd = NULL;
1866
1867 m_head = *m_headp;
1868
1869 /* Basic descriptor defines */
1870 cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
1871 E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
1872
1873 if (m_head->m_flags & M_VLANTAG)
1874 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1875
1876 /*
1877 * Important to capture the first descriptor
1878 * used because it will contain the index of
1879 * the one we tell the hardware to report back
1880 */
1881 first = txr->next_avail_desc;
1882 txbuf = &txr->tx_buffers[first];
1883 map = txbuf->map;
1884
1885 /*
1886 * Map the packet for DMA.
1887 */
1888retry:
1889 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1890 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1891
1892 if (__predict_false(error)) {
1893 struct mbuf *m;
1894
1895 switch (error) {
1896 case EFBIG:
1897 /* Try it again? - one try */
1898 if (remap == TRUE) {
1899 remap = FALSE;
1900 m = m_defrag(*m_headp, M_NOWAIT);
1901 if (m == NULL) {
1902 adapter->mbuf_defrag_failed++;
1903 m_freem(*m_headp);
1904 *m_headp = NULL;
1905 return (ENOBUFS);
1906 }
1907 *m_headp = m;
1908 goto retry;
1909 } else
1910 return (error);
1911 default:
1912 txr->no_tx_dma_setup++;
1913 m_freem(*m_headp);
1914 *m_headp = NULL;
1915 return (error);
1916 }
1917 }
1918
1919 /* Make certain there are enough descriptors */
1920 if (nsegs > txr->tx_avail - 2) {
1921 txr->no_desc_avail++;
1922 bus_dmamap_unload(txr->txtag, map);
1923 return (ENOBUFS);
1924 }
1925 m_head = *m_headp;
1926
1927 /*
1928 ** Set up the appropriate offload context
1929 ** this will consume the first descriptor
1930 */
1931 error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1932 if (__predict_false(error)) {
1933 m_freem(*m_headp);
1934 *m_headp = NULL;
1935 return (error);
1936 }
1937
1938 /* 82575 needs the queue index added */
1939 if (adapter->hw.mac.type == e1000_82575)
1940 olinfo_status |= txr->me << 4;
1941
1942 i = txr->next_avail_desc;
1943 for (j = 0; j < nsegs; j++) {
1944 bus_size_t seglen;
1945 bus_addr_t segaddr;
1946
1947 txbuf = &txr->tx_buffers[i];
1948 txd = &txr->tx_base[i];
1949 seglen = segs[j].ds_len;
1950 segaddr = htole64(segs[j].ds_addr);
1951
1952 txd->read.buffer_addr = segaddr;
1953 txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
1954 cmd_type_len | seglen);
1955 txd->read.olinfo_status = htole32(olinfo_status);
1956
1957 if (++i == txr->num_desc)
1958 i = 0;
1959 }
1960
1961 txd->read.cmd_type_len |=
1962 htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1963 txr->tx_avail -= nsegs;
1964 txr->next_avail_desc = i;
1965
1966 txbuf->m_head = m_head;
1967 /*
1968 ** Here we swap the map so the last descriptor,
1969 ** which gets the completion interrupt has the
1970 ** real map, and the first descriptor gets the
1971 ** unused map from this descriptor.
1972 */
1973 txr->tx_buffers[first].map = txbuf->map;
1974 txbuf->map = map;
1975 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1976
1977 /* Set the EOP descriptor that will be marked done */
1978 txbuf = &txr->tx_buffers[first];
1979 txbuf->eop = txd;
1980
1981 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1982 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1983 /*
1984 * Advance the Transmit Descriptor Tail (Tdt), this tells the
1985 * hardware that this frame is available to transmit.
1986 */
1987 ++txr->total_packets;
1988 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1989
1990 return (0);
1991}
1992static void
1993igb_set_promisc(struct adapter *adapter)
1994{
1995 struct ifnet *ifp = adapter->ifp;
1996 struct e1000_hw *hw = &adapter->hw;
1997 u32 reg;
1998
1999 if (adapter->vf_ifp) {
2000 e1000_promisc_set_vf(hw, e1000_promisc_enabled);
2001 return;
2002 }
2003
2004 reg = E1000_READ_REG(hw, E1000_RCTL);
2005 if (ifp->if_flags & IFF_PROMISC) {
2006 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2007 E1000_WRITE_REG(hw, E1000_RCTL, reg);
2008 } else if (ifp->if_flags & IFF_ALLMULTI) {
2009 reg |= E1000_RCTL_MPE;
2010 reg &= ~E1000_RCTL_UPE;
2011 E1000_WRITE_REG(hw, E1000_RCTL, reg);
2012 }
2013}
2014
2015static void
2016igb_disable_promisc(struct adapter *adapter)
2017{
2018 struct e1000_hw *hw = &adapter->hw;
2019 struct ifnet *ifp = adapter->ifp;
2020 u32 reg;
2021 int mcnt = 0;
2022
2023 if (adapter->vf_ifp) {
2024 e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2025 return;
2026 }
2027 reg = E1000_READ_REG(hw, E1000_RCTL);
2028 reg &= (~E1000_RCTL_UPE);
2029 if (ifp->if_flags & IFF_ALLMULTI)
2030 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2031 else {
2032 struct ifmultiaddr *ifma;
2033#if __FreeBSD_version < 800000
2034 IF_ADDR_LOCK(ifp);
2035#else
2036 if_maddr_rlock(ifp);
2037#endif
2038 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2039 if (ifma->ifma_addr->sa_family != AF_LINK)
2040 continue;
2041 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2042 break;
2043 mcnt++;
2044 }
2045#if __FreeBSD_version < 800000
2046 IF_ADDR_UNLOCK(ifp);
2047#else
2048 if_maddr_runlock(ifp);
2049#endif
2050 }
2051 /* Don't disable if in MAX groups */
2052 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2053 reg &= (~E1000_RCTL_MPE);
2054 E1000_WRITE_REG(hw, E1000_RCTL, reg);
2055}
2056
2057
2058/*********************************************************************
2059 * Multicast Update
2060 *
2061 * This routine is called whenever multicast address list is updated.
2062 *
2063 **********************************************************************/
2064
2065static void
2066igb_set_multi(struct adapter *adapter)
2067{
2068 struct ifnet *ifp = adapter->ifp;
2069 struct ifmultiaddr *ifma;
2070 u32 reg_rctl = 0;
2071 u8 *mta;
2072
2073 int mcnt = 0;
2074
2075 IOCTL_DEBUGOUT("igb_set_multi: begin");
2076
2077 mta = adapter->mta;
2078 bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2079 MAX_NUM_MULTICAST_ADDRESSES);
2080
2081#if __FreeBSD_version < 800000
2082 IF_ADDR_LOCK(ifp);
2083#else
2084 if_maddr_rlock(ifp);
2085#endif
2086 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2087 if (ifma->ifma_addr->sa_family != AF_LINK)
2088 continue;
2089
2090 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2091 break;
2092
2093 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2094 &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2095 mcnt++;
2096 }
2097#if __FreeBSD_version < 800000
2098 IF_ADDR_UNLOCK(ifp);
2099#else
2100 if_maddr_runlock(ifp);
2101#endif
2102
2103 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2104 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2105 reg_rctl |= E1000_RCTL_MPE;
2106 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2107 } else
2108 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2109}
2110
2111
2112/*********************************************************************
2113 * Timer routine:
2114 * This routine checks for link status,
2115 * updates statistics, and does the watchdog.
2116 *
2117 **********************************************************************/
2118
2119static void
2120igb_local_timer(void *arg)
2121{
2122 struct adapter *adapter = arg;
2123 device_t dev = adapter->dev;
2124 struct ifnet *ifp = adapter->ifp;
2125 struct tx_ring *txr = adapter->tx_rings;
2126 struct igb_queue *que = adapter->queues;
2127 int hung = 0, busy = 0;
2128
2129
2130 IGB_CORE_LOCK_ASSERT(adapter);
2131
2132 igb_update_link_status(adapter);
2133 igb_update_stats_counters(adapter);
2134
2135 /*
2136 ** Check the TX queues status
2137 ** - central locked handling of OACTIVE
2138 ** - watchdog only if all queues show hung
2139 */
2140 for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2141 if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2142 (adapter->pause_frames == 0))
2143 ++hung;
2144 if (txr->queue_status & IGB_QUEUE_DEPLETED)
2145 ++busy;
2146 if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2147 taskqueue_enqueue(que->tq, &que->que_task);
2148 }
2149 if (hung == adapter->num_queues)
2150 goto timeout;
2151 if (busy == adapter->num_queues)
2152 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2153 else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2154 (busy < adapter->num_queues))
2155 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2156
2157 adapter->pause_frames = 0;
2158 callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2159#ifndef DEVICE_POLLING
2160 /* Schedule all queue interrupts - deadlock protection */
2161 E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2162#endif
2163 return;
2164
2165timeout:
2166 device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2167 device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2168 E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2169 E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2170 device_printf(dev,"TX(%d) desc avail = %d,"
2171 "Next TX to Clean = %d\n",
2172 txr->me, txr->tx_avail, txr->next_to_clean);
2173 adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2174 adapter->watchdog_events++;
2175 igb_init_locked(adapter);
2176}
2177
2178static void
2179igb_update_link_status(struct adapter *adapter)
2180{
2181 struct e1000_hw *hw = &adapter->hw;
2182 struct e1000_fc_info *fc = &hw->fc;
2183 struct ifnet *ifp = adapter->ifp;
2184 device_t dev = adapter->dev;
2185 struct tx_ring *txr = adapter->tx_rings;
2186 u32 link_check, thstat, ctrl;
2187 char *flowctl = NULL;
2188
2189 link_check = thstat = ctrl = 0;
2190
2191 /* Get the cached link value or read for real */
2192 switch (hw->phy.media_type) {
2193 case e1000_media_type_copper:
2194 if (hw->mac.get_link_status) {
2195 /* Do the work to read phy */
2196 e1000_check_for_link(hw);
2197 link_check = !hw->mac.get_link_status;
2198 } else
2199 link_check = TRUE;
2200 break;
2201 case e1000_media_type_fiber:
2202 e1000_check_for_link(hw);
2203 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2204 E1000_STATUS_LU);
2205 break;
2206 case e1000_media_type_internal_serdes:
2207 e1000_check_for_link(hw);
2208 link_check = adapter->hw.mac.serdes_has_link;
2209 break;
2210 /* VF device is type_unknown */
2211 case e1000_media_type_unknown:
2212 e1000_check_for_link(hw);
2213 link_check = !hw->mac.get_link_status;
2214 /* Fall thru */
2215 default:
2216 break;
2217 }
2218
2219 /* Check for thermal downshift or shutdown */
2220 if (hw->mac.type == e1000_i350) {
2221 thstat = E1000_READ_REG(hw, E1000_THSTAT);
2222 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2223 }
2224
2225 /* Get the flow control for display */
2226 switch (fc->current_mode) {
2227 case e1000_fc_rx_pause:
2228 flowctl = "RX";
2229 break;
2230 case e1000_fc_tx_pause:
2231 flowctl = "TX";
2232 break;
2233 case e1000_fc_full:
2234 flowctl = "Full";
2235 break;
2236 case e1000_fc_none:
2237 default:
2238 flowctl = "None";
2239 break;
2240 }
2241
2242 /* Now we check if a transition has happened */
2243 if (link_check && (adapter->link_active == 0)) {
2244 e1000_get_speed_and_duplex(&adapter->hw,
2245 &adapter->link_speed, &adapter->link_duplex);
2246 if (bootverbose)
2247 device_printf(dev, "Link is up %d Mbps %s,"
2248 " Flow Control: %s\n",
2249 adapter->link_speed,
2250 ((adapter->link_duplex == FULL_DUPLEX) ?
2251 "Full Duplex" : "Half Duplex"), flowctl);
2252 adapter->link_active = 1;
2253 ifp->if_baudrate = adapter->link_speed * 1000000;
2254 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2255 (thstat & E1000_THSTAT_LINK_THROTTLE))
2256 device_printf(dev, "Link: thermal downshift\n");
2257 /* Delay Link Up for Phy update */
2258 if (((hw->mac.type == e1000_i210) ||
2259 (hw->mac.type == e1000_i211)) &&
2260 (hw->phy.id == I210_I_PHY_ID))
2261 msec_delay(I210_LINK_DELAY);
2262 /* Reset if the media type changed. */
2263 if (hw->dev_spec._82575.media_changed) {
2264 hw->dev_spec._82575.media_changed = false;
2265 adapter->flags |= IGB_MEDIA_RESET;
2266 igb_reset(adapter);
2267 }
2268 /* This can sleep */
2269 if_link_state_change(ifp, LINK_STATE_UP);
2270 } else if (!link_check && (adapter->link_active == 1)) {
2271 ifp->if_baudrate = adapter->link_speed = 0;
2272 adapter->link_duplex = 0;
2273 if (bootverbose)
2274 device_printf(dev, "Link is Down\n");
2275 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2276 (thstat & E1000_THSTAT_PWR_DOWN))
2277 device_printf(dev, "Link: thermal shutdown\n");
2278 adapter->link_active = 0;
2279 /* This can sleep */
2280 if_link_state_change(ifp, LINK_STATE_DOWN);
2281 /* Reset queue state */
2282 for (int i = 0; i < adapter->num_queues; i++, txr++)
2283 txr->queue_status = IGB_QUEUE_IDLE;
2284 }
2285}
2286
2287/*********************************************************************
2288 *
2289 * This routine disables all traffic on the adapter by issuing a
2290 * global reset on the MAC and deallocates TX/RX buffers.
2291 *
2292 **********************************************************************/
2293
2294static void
2295igb_stop(void *arg)
2296{
2297 struct adapter *adapter = arg;
2298 struct ifnet *ifp = adapter->ifp;
2299 struct tx_ring *txr = adapter->tx_rings;
2300
2301 IGB_CORE_LOCK_ASSERT(adapter);
2302
2303 INIT_DEBUGOUT("igb_stop: begin");
2304
2305 igb_disable_intr(adapter);
2306
2307 callout_stop(&adapter->timer);
2308
2309 /* Tell the stack that the interface is no longer active */
2310 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2311 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2312
2313 /* Disarm watchdog timer. */
2314 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2315 IGB_TX_LOCK(txr);
2316 txr->queue_status = IGB_QUEUE_IDLE;
2317 IGB_TX_UNLOCK(txr);
2318 }
2319
2320 e1000_reset_hw(&adapter->hw);
2321 E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2322
2323 e1000_led_off(&adapter->hw);
2324 e1000_cleanup_led(&adapter->hw);
2325}
2326
2327
2328/*********************************************************************
2329 *
2330 * Determine hardware revision.
2331 *
2332 **********************************************************************/
2333static void
2334igb_identify_hardware(struct adapter *adapter)
2335{
2336 device_t dev = adapter->dev;
2337
2338 /* Make sure our PCI config space has the necessary stuff set */
2339 pci_enable_busmaster(dev);
2340 adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2341
2342 /* Save off the information about this board */
2343 adapter->hw.vendor_id = pci_get_vendor(dev);
2344 adapter->hw.device_id = pci_get_device(dev);
2345 adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2346 adapter->hw.subsystem_vendor_id =
2347 pci_read_config(dev, PCIR_SUBVEND_0, 2);
2348 adapter->hw.subsystem_device_id =
2349 pci_read_config(dev, PCIR_SUBDEV_0, 2);
2350
2351 /* Set MAC type early for PCI setup */
2352 e1000_set_mac_type(&adapter->hw);
2353
2354 /* Are we a VF device? */
2355 if ((adapter->hw.mac.type == e1000_vfadapt) ||
2356 (adapter->hw.mac.type == e1000_vfadapt_i350))
2357 adapter->vf_ifp = 1;
2358 else
2359 adapter->vf_ifp = 0;
2360}
2361
2362static int
2363igb_allocate_pci_resources(struct adapter *adapter)
2364{
2365 device_t dev = adapter->dev;
2366 int rid;
2367
2368 rid = PCIR_BAR(0);
2369 adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2370 &rid, RF_ACTIVE);
2371 if (adapter->pci_mem == NULL) {
2372 device_printf(dev, "Unable to allocate bus resource: memory\n");
2373 return (ENXIO);
2374 }
2375 adapter->osdep.mem_bus_space_tag =
2376 rman_get_bustag(adapter->pci_mem);
2377 adapter->osdep.mem_bus_space_handle =
2378 rman_get_bushandle(adapter->pci_mem);
2379 adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2380
2381 adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2382
2383 /* This will setup either MSI/X or MSI */
2384 adapter->msix = igb_setup_msix(adapter);
2385 adapter->hw.back = &adapter->osdep;
2386
2387 return (0);
2388}
2389
2390/*********************************************************************
2391 *
2392 * Setup the Legacy or MSI Interrupt handler
2393 *
2394 **********************************************************************/
2395static int
2396igb_allocate_legacy(struct adapter *adapter)
2397{
2398 device_t dev = adapter->dev;
2399 struct igb_queue *que = adapter->queues;
2400#ifndef IGB_LEGACY_TX
2401 struct tx_ring *txr = adapter->tx_rings;
2402#endif
2403 int error, rid = 0;
2404
2405 /* Turn off all interrupts */
2406 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2407
2408 /* MSI RID is 1 */
2409 if (adapter->msix == 1)
2410 rid = 1;
2411
2412 /* We allocate a single interrupt resource */
2413 adapter->res = bus_alloc_resource_any(dev,
2414 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2415 if (adapter->res == NULL) {
2416 device_printf(dev, "Unable to allocate bus resource: "
2417 "interrupt\n");
2418 return (ENXIO);
2419 }
2420
2421#ifndef IGB_LEGACY_TX
2422 TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2423#endif
2424
2425 /*
2426 * Try allocating a fast interrupt and the associated deferred
2427 * processing contexts.
2428 */
2429 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2430 /* Make tasklet for deferred link handling */
2431 TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2432 que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2433 taskqueue_thread_enqueue, &que->tq);
2434 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2435 device_get_nameunit(adapter->dev));
2436 if ((error = bus_setup_intr(dev, adapter->res,
2437 INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2438 adapter, &adapter->tag)) != 0) {
2439 device_printf(dev, "Failed to register fast interrupt "
2440 "handler: %d\n", error);
2441 taskqueue_free(que->tq);
2442 que->tq = NULL;
2443 return (error);
2444 }
2445
2446 return (0);
2447}
2448
2449
2450/*********************************************************************
2451 *
2452 * Setup the MSIX Queue Interrupt handlers:
2453 *
2454 **********************************************************************/
2455static int
2456igb_allocate_msix(struct adapter *adapter)
2457{
2458 device_t dev = adapter->dev;
2459 struct igb_queue *que = adapter->queues;
2460 int error, rid, vector = 0;
2461 int cpu_id = 0;
2462#ifdef RSS
2463 cpuset_t cpu_mask;
2464#endif
2465
2466 /* Be sure to start with all interrupts disabled */
2467 E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2468 E1000_WRITE_FLUSH(&adapter->hw);
2469
2470#ifdef RSS
2471 /*
2472 * If we're doing RSS, the number of queues needs to
2473 * match the number of RSS buckets that are configured.
2474 *
2475 * + If there's more queues than RSS buckets, we'll end
2476 * up with queues that get no traffic.
2477 *
2478 * + If there's more RSS buckets than queues, we'll end
2479 * up having multiple RSS buckets map to the same queue,
2480 * so there'll be some contention.
2481 */
2482 if (adapter->num_queues != rss_getnumbuckets()) {
2483 device_printf(dev,
2484 "%s: number of queues (%d) != number of RSS buckets (%d)"
2485 "; performance will be impacted.\n",
2486 __func__,
2487 adapter->num_queues,
2488 rss_getnumbuckets());
2489 }
2490#endif
2491
2492 for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2493 rid = vector +1;
2494 que->res = bus_alloc_resource_any(dev,
2495 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2496 if (que->res == NULL) {
2497 device_printf(dev,
2498 "Unable to allocate bus resource: "
2499 "MSIX Queue Interrupt\n");
2500 return (ENXIO);
2501 }
2502 error = bus_setup_intr(dev, que->res,
2503 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2504 igb_msix_que, que, &que->tag);
2505 if (error) {
2506 que->res = NULL;
2507 device_printf(dev, "Failed to register Queue handler");
2508 return (error);
2509 }
2510#if __FreeBSD_version >= 800504
2511 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2512#endif
2513 que->msix = vector;
2514 if (adapter->hw.mac.type == e1000_82575)
2515 que->eims = E1000_EICR_TX_QUEUE0 << i;
2516 else
2517 que->eims = 1 << vector;
2518
2519#ifdef RSS
2520 /*
2521 * The queue ID is used as the RSS layer bucket ID.
2522 * We look up the queue ID -> RSS CPU ID and select
2523 * that.
2524 */
2525 cpu_id = rss_getcpu(i % rss_getnumbuckets());
2526#else
2527 /*
2528 * Bind the msix vector, and thus the
2529 * rings to the corresponding cpu.
2530 *
2531 * This just happens to match the default RSS round-robin
2532 * bucket -> queue -> CPU allocation.
2533 */
2534 if (adapter->num_queues > 1) {
2535 if (igb_last_bind_cpu < 0)
2536 igb_last_bind_cpu = CPU_FIRST();
2537 cpu_id = igb_last_bind_cpu;
2538 }
2539#endif
2540
2541 if (adapter->num_queues > 1) {
2542 bus_bind_intr(dev, que->res, cpu_id);
2543#ifdef RSS
2544 device_printf(dev,
2545 "Bound queue %d to RSS bucket %d\n",
2546 i, cpu_id);
2547#else
2548 device_printf(dev,
2549 "Bound queue %d to cpu %d\n",
2550 i, cpu_id);
2551#endif
2552 }
2553
2554#ifndef IGB_LEGACY_TX
2555 TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2556 que->txr);
2557#endif
2558 /* Make tasklet for deferred handling */
2559 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2560 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2561 taskqueue_thread_enqueue, &que->tq);
2562 if (adapter->num_queues > 1) {
2563 /*
2564 * Only pin the taskqueue thread to a CPU if
2565 * RSS is in use.
2566 *
2567 * This again just happens to match the default RSS
2568 * round-robin bucket -> queue -> CPU allocation.
2569 */
2570#ifdef RSS
2571 CPU_SETOF(cpu_id, &cpu_mask);
2572 taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET,
2573 &cpu_mask,
2574 "%s que (bucket %d)",
2575 device_get_nameunit(adapter->dev),
2576 cpu_id);
2577#else
2578 taskqueue_start_threads(&que->tq, 1, PI_NET,
2579 "%s que (qid %d)",
2580 device_get_nameunit(adapter->dev),
2581 cpu_id);
2582#endif
2583 } else {
2584 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2585 device_get_nameunit(adapter->dev));
2586 }
2587
2588 /* Finally update the last bound CPU id */
2589 if (adapter->num_queues > 1)
2590 igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2591 }
2592
2593 /* And Link */
2594 rid = vector + 1;
2595 adapter->res = bus_alloc_resource_any(dev,
2596 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2597 if (adapter->res == NULL) {
2598 device_printf(dev,
2599 "Unable to allocate bus resource: "
2600 "MSIX Link Interrupt\n");
2601 return (ENXIO);
2602 }
2603 if ((error = bus_setup_intr(dev, adapter->res,
2604 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2605 igb_msix_link, adapter, &adapter->tag)) != 0) {
2606 device_printf(dev, "Failed to register Link handler");
2607 return (error);
2608 }
2609#if __FreeBSD_version >= 800504
2610 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2611#endif
2612 adapter->linkvec = vector;
2613
2614 return (0);
2615}
2616
2617
2618static void
2619igb_configure_queues(struct adapter *adapter)
2620{
2621 struct e1000_hw *hw = &adapter->hw;
2622 struct igb_queue *que;
2623 u32 tmp, ivar = 0, newitr = 0;
2624
2625 /* First turn on RSS capability */
2626 if (adapter->hw.mac.type != e1000_82575)
2627 E1000_WRITE_REG(hw, E1000_GPIE,
2628 E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2629 E1000_GPIE_PBA | E1000_GPIE_NSICR);
2630
2631 /* Turn on MSIX */
2632 switch (adapter->hw.mac.type) {
2633 case e1000_82580:
2634 case e1000_i350:
2635 case e1000_i354:
2636 case e1000_i210:
2637 case e1000_i211:
2638 case e1000_vfadapt:
2639 case e1000_vfadapt_i350:
2640 /* RX entries */
2641 for (int i = 0; i < adapter->num_queues; i++) {
2642 u32 index = i >> 1;
2643 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2644 que = &adapter->queues[i];
2645 if (i & 1) {
2646 ivar &= 0xFF00FFFF;
2647 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2648 } else {
2649 ivar &= 0xFFFFFF00;
2650 ivar |= que->msix | E1000_IVAR_VALID;
2651 }
2652 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2653 }
2654 /* TX entries */
2655 for (int i = 0; i < adapter->num_queues; i++) {
2656 u32 index = i >> 1;
2657 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2658 que = &adapter->queues[i];
2659 if (i & 1) {
2660 ivar &= 0x00FFFFFF;
2661 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2662 } else {
2663 ivar &= 0xFFFF00FF;
2664 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2665 }
2666 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2667 adapter->que_mask |= que->eims;
2668 }
2669
2670 /* And for the link interrupt */
2671 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2672 adapter->link_mask = 1 << adapter->linkvec;
2673 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2674 break;
2675 case e1000_82576:
2676 /* RX entries */
2677 for (int i = 0; i < adapter->num_queues; i++) {
2678 u32 index = i & 0x7; /* Each IVAR has two entries */
2679 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2680 que = &adapter->queues[i];
2681 if (i < 8) {
2682 ivar &= 0xFFFFFF00;
2683 ivar |= que->msix | E1000_IVAR_VALID;
2684 } else {
2685 ivar &= 0xFF00FFFF;
2686 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2687 }
2688 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2689 adapter->que_mask |= que->eims;
2690 }
2691 /* TX entries */
2692 for (int i = 0; i < adapter->num_queues; i++) {
2693 u32 index = i & 0x7; /* Each IVAR has two entries */
2694 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2695 que = &adapter->queues[i];
2696 if (i < 8) {
2697 ivar &= 0xFFFF00FF;
2698 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2699 } else {
2700 ivar &= 0x00FFFFFF;
2701 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2702 }
2703 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2704 adapter->que_mask |= que->eims;
2705 }
2706
2707 /* And for the link interrupt */
2708 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2709 adapter->link_mask = 1 << adapter->linkvec;
2710 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2711 break;
2712
2713 case e1000_82575:
2714 /* enable MSI-X support*/
2715 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2716 tmp |= E1000_CTRL_EXT_PBA_CLR;
2717 /* Auto-Mask interrupts upon ICR read. */
2718 tmp |= E1000_CTRL_EXT_EIAME;
2719 tmp |= E1000_CTRL_EXT_IRCA;
2720 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2721
2722 /* Queues */
2723 for (int i = 0; i < adapter->num_queues; i++) {
2724 que = &adapter->queues[i];
2725 tmp = E1000_EICR_RX_QUEUE0 << i;
2726 tmp |= E1000_EICR_TX_QUEUE0 << i;
2727 que->eims = tmp;
2728 E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2729 i, que->eims);
2730 adapter->que_mask |= que->eims;
2731 }
2732
2733 /* Link */
2734 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2735 E1000_EIMS_OTHER);
2736 adapter->link_mask |= E1000_EIMS_OTHER;
2737 default:
2738 break;
2739 }
2740
2741 /* Set the starting interrupt rate */
2742 if (igb_max_interrupt_rate > 0)
2743 newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2744
2745 if (hw->mac.type == e1000_82575)
2746 newitr |= newitr << 16;
2747 else
2748 newitr |= E1000_EITR_CNT_IGNR;
2749
2750 for (int i = 0; i < adapter->num_queues; i++) {
2751 que = &adapter->queues[i];
2752 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2753 }
2754
2755 return;
2756}
2757
2758
2759static void
2760igb_free_pci_resources(struct adapter *adapter)
2761{
2762 struct igb_queue *que = adapter->queues;
2763 device_t dev = adapter->dev;
2764 int rid;
2765
2766 /*
2767 ** There is a slight possibility of a failure mode
2768 ** in attach that will result in entering this function
2769 ** before interrupt resources have been initialized, and
2770 ** in that case we do not want to execute the loops below
2771 ** We can detect this reliably by the state of the adapter
2772 ** res pointer.
2773 */
2774 if (adapter->res == NULL)
2775 goto mem;
2776
2777 /*
2778 * First release all the interrupt resources:
2779 */
2780 for (int i = 0; i < adapter->num_queues; i++, que++) {
2781 rid = que->msix + 1;
2782 if (que->tag != NULL) {
2783 bus_teardown_intr(dev, que->res, que->tag);
2784 que->tag = NULL;
2785 }
2786 if (que->res != NULL)
2787 bus_release_resource(dev,
2788 SYS_RES_IRQ, rid, que->res);
2789 }
2790
2791 /* Clean the Legacy or Link interrupt last */
2792 if (adapter->linkvec) /* we are doing MSIX */
2793 rid = adapter->linkvec + 1;
2794 else
2795 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2796
2797 que = adapter->queues;
2798 if (adapter->tag != NULL) {
2799 taskqueue_drain(que->tq, &adapter->link_task);
2800 bus_teardown_intr(dev, adapter->res, adapter->tag);
2801 adapter->tag = NULL;
2802 }
2803 if (adapter->res != NULL)
2804 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2805
2806 for (int i = 0; i < adapter->num_queues; i++, que++) {
2807 if (que->tq != NULL) {
2808#ifndef IGB_LEGACY_TX
2809 taskqueue_drain(que->tq, &que->txr->txq_task);
2810#endif
2811 taskqueue_drain(que->tq, &que->que_task);
2812 taskqueue_free(que->tq);
2813 }
2814 }
2815mem:
2816 if (adapter->msix)
2817 pci_release_msi(dev);
2818
2819 if (adapter->msix_mem != NULL)
2820 bus_release_resource(dev, SYS_RES_MEMORY,
2821 adapter->memrid, adapter->msix_mem);
2822
2823 if (adapter->pci_mem != NULL)
2824 bus_release_resource(dev, SYS_RES_MEMORY,
2825 PCIR_BAR(0), adapter->pci_mem);
2826
2827}
2828
2829/*
2830 * Setup Either MSI/X or MSI
2831 */
2832static int
2833igb_setup_msix(struct adapter *adapter)
2834{
2835 device_t dev = adapter->dev;
2836 int bar, want, queues, msgs, maxqueues;
2837
2838 /* tuneable override */
2839 if (igb_enable_msix == 0)
2840 goto msi;
2841
2842 /* First try MSI/X */
2843 msgs = pci_msix_count(dev);
2844 if (msgs == 0)
2845 goto msi;
2846 /*
2847 ** Some new devices, as with ixgbe, now may
2848 ** use a different BAR, so we need to keep
2849 ** track of which is used.
2850 */
2851 adapter->memrid = PCIR_BAR(IGB_MSIX_BAR);
2852 bar = pci_read_config(dev, adapter->memrid, 4);
2853 if (bar == 0) /* use next bar */
2854 adapter->memrid += 4;
2855 adapter->msix_mem = bus_alloc_resource_any(dev,
2856 SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2857 if (adapter->msix_mem == NULL) {
2858 /* May not be enabled */
2859 device_printf(adapter->dev,
2860 "Unable to map MSIX table \n");
2861 goto msi;
2862 }
2863
2864 queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2865
2866 /* Override via tuneable */
2867 if (igb_num_queues != 0)
2868 queues = igb_num_queues;
2869
2870#ifdef RSS
2871 /* If we're doing RSS, clamp at the number of RSS buckets */
2872 if (queues > rss_getnumbuckets())
2873 queues = rss_getnumbuckets();
2874#endif
2875
2876
2877 /* Sanity check based on HW */
2878 switch (adapter->hw.mac.type) {
2879 case e1000_82575:
2880 maxqueues = 4;
2881 break;
2882 case e1000_82576:
2883 case e1000_82580:
2884 case e1000_i350:
2885 case e1000_i354:
2886 maxqueues = 8;
2887 break;
2888 case e1000_i210:
2889 maxqueues = 4;
2890 break;
2891 case e1000_i211:
2892 maxqueues = 2;
2893 break;
2894 default: /* VF interfaces */
2895 maxqueues = 1;
2896 break;
2897 }
2898
2899 /* Final clamp on the actual hardware capability */
2900 if (queues > maxqueues)
2901 queues = maxqueues;
2902
2903 /*
2904 ** One vector (RX/TX pair) per queue
2905 ** plus an additional for Link interrupt
2906 */
2907 want = queues + 1;
2908 if (msgs >= want)
2909 msgs = want;
2910 else {
2911 device_printf(adapter->dev,
2912 "MSIX Configuration Problem, "
2913 "%d vectors configured, but %d queues wanted!\n",
2914 msgs, want);
2915 goto msi;
2916 }
2917 if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2918 device_printf(adapter->dev,
2919 "Using MSIX interrupts with %d vectors\n", msgs);
2920 adapter->num_queues = queues;
2921 return (msgs);
2922 }
2923 /*
2924 ** If MSIX alloc failed or provided us with
2925 ** less than needed, free and fall through to MSI
2926 */
2927 pci_release_msi(dev);
2928
2929msi:
2930 if (adapter->msix_mem != NULL) {
2931 bus_release_resource(dev, SYS_RES_MEMORY,
2932 PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2933 adapter->msix_mem = NULL;
2934 }
2935 msgs = 1;
2936 if (pci_alloc_msi(dev, &msgs) == 0) {
2937 device_printf(adapter->dev," Using an MSI interrupt\n");
2938 return (msgs);
2939 }
2940 device_printf(adapter->dev," Using a Legacy interrupt\n");
2941 return (0);
2942}
2943
2944/*********************************************************************
2945 *
2946 * Initialize the DMA Coalescing feature
2947 *
2948 **********************************************************************/
2949static void
2950igb_init_dmac(struct adapter *adapter, u32 pba)
2951{
2952 device_t dev = adapter->dev;
2953 struct e1000_hw *hw = &adapter->hw;
2954 u32 dmac, reg = ~E1000_DMACR_DMAC_EN;
2955 u16 hwm;
2956
2957 if (hw->mac.type == e1000_i211)
2958 return;
2959
2960 if (hw->mac.type > e1000_82580) {
2961
2962 if (adapter->dmac == 0) { /* Disabling it */
2963 E1000_WRITE_REG(hw, E1000_DMACR, reg);
2964 return;
2965 } else
2966 device_printf(dev, "DMA Coalescing enabled\n");
2967
2968 /* Set starting threshold */
2969 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2970
2971 hwm = 64 * pba - adapter->max_frame_size / 16;
2972 if (hwm < 64 * (pba - 6))
2973 hwm = 64 * (pba - 6);
2974 reg = E1000_READ_REG(hw, E1000_FCRTC);
2975 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2976 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2977 & E1000_FCRTC_RTH_COAL_MASK);
2978 E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2979
2980
2981 dmac = pba - adapter->max_frame_size / 512;
2982 if (dmac < pba - 10)
2983 dmac = pba - 10;
2984 reg = E1000_READ_REG(hw, E1000_DMACR);
2985 reg &= ~E1000_DMACR_DMACTHR_MASK;
2986 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
2987 & E1000_DMACR_DMACTHR_MASK);
2988
2989 /* transition to L0x or L1 if available..*/
2990 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2991
2992 /* Check if status is 2.5Gb backplane connection
2993 * before configuration of watchdog timer, which is
2994 * in msec values in 12.8usec intervals
2995 * watchdog timer= msec values in 32usec intervals
2996 * for non 2.5Gb connection
2997 */
2998 if (hw->mac.type == e1000_i354) {
2999 int status = E1000_READ_REG(hw, E1000_STATUS);
3000 if ((status & E1000_STATUS_2P5_SKU) &&
3001 (!(status & E1000_STATUS_2P5_SKU_OVER)))
3002 reg |= ((adapter->dmac * 5) >> 6);
3003 else
3004 reg |= (adapter->dmac >> 5);
3005 } else {
3006 reg |= (adapter->dmac >> 5);
3007 }
3008
3009 E1000_WRITE_REG(hw, E1000_DMACR, reg);
3010
3011#ifdef I210_OBFF_SUPPORT
3012 /*
3013 * Set the OBFF Rx threshold to DMA Coalescing Rx
3014 * threshold - 2KB and enable the feature in the
3015 * hardware for I210.
3016 */
3017 if (hw->mac.type == e1000_i210) {
3018 int obff = dmac - 2;
3019 reg = E1000_READ_REG(hw, E1000_DOBFFCTL);
3020 reg &= ~E1000_DOBFFCTL_OBFFTHR_MASK;
3021 reg |= (obff & E1000_DOBFFCTL_OBFFTHR_MASK)
3022 | E1000_DOBFFCTL_EXIT_ACT_MASK;
3023 E1000_WRITE_REG(hw, E1000_DOBFFCTL, reg);
3024 }
3025#endif
3026 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
3027
3028 /* Set the interval before transition */
3029 reg = E1000_READ_REG(hw, E1000_DMCTLX);
3030 if (hw->mac.type == e1000_i350)
3031 reg |= IGB_DMCTLX_DCFLUSH_DIS;
3032 /*
3033 ** in 2.5Gb connection, TTLX unit is 0.4 usec
3034 ** which is 0x4*2 = 0xA. But delay is still 4 usec
3035 */
3036 if (hw->mac.type == e1000_i354) {
3037 int status = E1000_READ_REG(hw, E1000_STATUS);
3038 if ((status & E1000_STATUS_2P5_SKU) &&
3039 (!(status & E1000_STATUS_2P5_SKU_OVER)))
3040 reg |= 0xA;
3041 else
3042 reg |= 0x4;
3043 } else {
3044 reg |= 0x4;
3045 }
3046
3047 E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
3048
3049 /* free space in tx packet buffer to wake from DMA coal */
3050 E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE -
3051 (2 * adapter->max_frame_size)) >> 6);
3052
3053 /* make low power state decision controlled by DMA coal */
3054 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3055 reg &= ~E1000_PCIEMISC_LX_DECISION;
3056 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
3057
3058 } else if (hw->mac.type == e1000_82580) {
3059 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3060 E1000_WRITE_REG(hw, E1000_PCIEMISC,
3061 reg & ~E1000_PCIEMISC_LX_DECISION);
3062 E1000_WRITE_REG(hw, E1000_DMACR, 0);
3063 }
3064}
3065
3066
3067/*********************************************************************
3068 *
3069 * Set up an fresh starting state
3070 *
3071 **********************************************************************/
3072static void
3073igb_reset(struct adapter *adapter)
3074{
3075 device_t dev = adapter->dev;
3076 struct e1000_hw *hw = &adapter->hw;
3077 struct e1000_fc_info *fc = &hw->fc;
3078 struct ifnet *ifp = adapter->ifp;
3079 u32 pba = 0;
3080 u16 hwm;
3081
3082 INIT_DEBUGOUT("igb_reset: begin");
3083
3084 /* Let the firmware know the OS is in control */
3085 igb_get_hw_control(adapter);
3086
3087 /*
3088 * Packet Buffer Allocation (PBA)
3089 * Writing PBA sets the receive portion of the buffer
3090 * the remainder is used for the transmit buffer.
3091 */
3092 switch (hw->mac.type) {
3093 case e1000_82575:
3094 pba = E1000_PBA_32K;
3095 break;
3096 case e1000_82576:
3097 case e1000_vfadapt:
3098 pba = E1000_READ_REG(hw, E1000_RXPBS);
3099 pba &= E1000_RXPBS_SIZE_MASK_82576;
3100 break;
3101 case e1000_82580:
3102 case e1000_i350:
3103 case e1000_i354:
3104 case e1000_vfadapt_i350:
3105 pba = E1000_READ_REG(hw, E1000_RXPBS);
3106 pba = e1000_rxpbs_adjust_82580(pba);
3107 break;
3108 case e1000_i210:
3109 case e1000_i211:
3110 pba = E1000_PBA_34K;
3111 default:
3112 break;
3113 }
3114
3115 /* Special needs in case of Jumbo frames */
3116 if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
3117 u32 tx_space, min_tx, min_rx;
3118 pba = E1000_READ_REG(hw, E1000_PBA);
3119 tx_space = pba >> 16;
3120 pba &= 0xffff;
3121 min_tx = (adapter->max_frame_size +
3122 sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
3123 min_tx = roundup2(min_tx, 1024);
3124 min_tx >>= 10;
3125 min_rx = adapter->max_frame_size;
3126 min_rx = roundup2(min_rx, 1024);
3127 min_rx >>= 10;
3128 if (tx_space < min_tx &&
3129 ((min_tx - tx_space) < pba)) {
3130 pba = pba - (min_tx - tx_space);
3131 /*
3132 * if short on rx space, rx wins
3133 * and must trump tx adjustment
3134 */
3135 if (pba < min_rx)
3136 pba = min_rx;
3137 }
3138 E1000_WRITE_REG(hw, E1000_PBA, pba);
3139 }
3140
3141 INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3142
3143 /*
3144 * These parameters control the automatic generation (Tx) and
3145 * response (Rx) to Ethernet PAUSE frames.
3146 * - High water mark should allow for at least two frames to be
3147 * received after sending an XOFF.
3148 * - Low water mark works best when it is very near the high water mark.
3149 * This allows the receiver to restart by sending XON when it has
3150 * drained a bit.
3151 */
3152 hwm = min(((pba << 10) * 9 / 10),
3153 ((pba << 10) - 2 * adapter->max_frame_size));
3154
3155 if (hw->mac.type < e1000_82576) {
3156 fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */
3157 fc->low_water = fc->high_water - 8;
3158 } else {
3159 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
3160 fc->low_water = fc->high_water - 16;
3161 }
3162
3163 fc->pause_time = IGB_FC_PAUSE_TIME;
3164 fc->send_xon = TRUE;
3165 if (adapter->fc)
3166 fc->requested_mode = adapter->fc;
3167 else
3168 fc->requested_mode = e1000_fc_default;
3169
3170 /* Issue a global reset */
3171 e1000_reset_hw(hw);
3172 E1000_WRITE_REG(hw, E1000_WUC, 0);
3173
3174 /* Reset for AutoMediaDetect */
3175 if (adapter->flags & IGB_MEDIA_RESET) {
3176 e1000_setup_init_funcs(hw, TRUE);
3177 e1000_get_bus_info(hw);
3178 adapter->flags &= ~IGB_MEDIA_RESET;
3179 }
3180
3181 if (e1000_init_hw(hw) < 0)
3182 device_printf(dev, "Hardware Initialization Failed\n");
3183
3184 /* Setup DMA Coalescing */
3185 igb_init_dmac(adapter, pba);
3186
3187 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3188 e1000_get_phy_info(hw);
3189 e1000_check_for_link(hw);
3190 return;
3191}
3192
3193/*********************************************************************
3194 *
3195 * Setup networking device structure and register an interface.
3196 *
3197 **********************************************************************/
3198static int
3199igb_setup_interface(device_t dev, struct adapter *adapter)
3200{
3201 struct ifnet *ifp;
3202
3203 INIT_DEBUGOUT("igb_setup_interface: begin");
3204
3205 ifp = adapter->ifp = if_alloc(IFT_ETHER);
3206 if (ifp == NULL) {
3207 device_printf(dev, "can not allocate ifnet structure\n");
3208 return (-1);
3209 }
3210 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3211 ifp->if_init = igb_init;
3212 ifp->if_softc = adapter;
3213 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3214 ifp->if_ioctl = igb_ioctl;
3215 ifp->if_get_counter = igb_get_counter;
3216#ifndef IGB_LEGACY_TX
3217 ifp->if_transmit = igb_mq_start;
3218 ifp->if_qflush = igb_qflush;
3219#else
3220 ifp->if_start = igb_start;
3221 IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3222 ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3223 IFQ_SET_READY(&ifp->if_snd);
3224#endif
3225
3226 ether_ifattach(ifp, adapter->hw.mac.addr);
3227
3228 ifp->if_capabilities = ifp->if_capenable = 0;
3229
3230 ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3231 ifp->if_capabilities |= IFCAP_TSO;
3232 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3233 ifp->if_capenable = ifp->if_capabilities;
3234
3235 /* Don't enable LRO by default */
3236 ifp->if_capabilities |= IFCAP_LRO;
3237
3238#ifdef DEVICE_POLLING
3239 ifp->if_capabilities |= IFCAP_POLLING;
3240#endif
3241
3242 /*
3243 * Tell the upper layer(s) we
3244 * support full VLAN capability.
3245 */
3246 ifp->if_hdrlen = sizeof(struct ether_vlan_header);
3247 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3248 | IFCAP_VLAN_HWTSO
3249 | IFCAP_VLAN_MTU;
3250 ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3251 | IFCAP_VLAN_HWTSO
3252 | IFCAP_VLAN_MTU;
3253
3254 /*
3255 ** Don't turn this on by default, if vlans are
3256 ** created on another pseudo device (eg. lagg)
3257 ** then vlan events are not passed thru, breaking
3258 ** operation, but with HW FILTER off it works. If
3259 ** using vlans directly on the igb driver you can
3260 ** enable this and get full hardware tag filtering.
3261 */
3262 ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3263
3264 /*
3265 * Specify the media types supported by this adapter and register
3266 * callbacks to update media and link information
3267 */
3268 ifmedia_init(&adapter->media, IFM_IMASK,
3269 igb_media_change, igb_media_status);
3270 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3271 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3272 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
3273 0, NULL);
3274 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3275 } else {
3276 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3277 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3278 0, NULL);
3279 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3280 0, NULL);
3281 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3282 0, NULL);
3283 if (adapter->hw.phy.type != e1000_phy_ife) {
3284 ifmedia_add(&adapter->media,
3285 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3286 ifmedia_add(&adapter->media,
3287 IFM_ETHER | IFM_1000_T, 0, NULL);
3288 }
3289 }
3290 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3291 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3292 return (0);
3293}
3294
3295
3296/*
3297 * Manage DMA'able memory.
3298 */
3299static void
3300igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3301{
3302 if (error)
3303 return;
3304 *(bus_addr_t *) arg = segs[0].ds_addr;
3305}
3306
3307static int
3308igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3309 struct igb_dma_alloc *dma, int mapflags)
3310{
3311 int error;
3312
3313 error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3314 IGB_DBA_ALIGN, 0, /* alignment, bounds */
3315 BUS_SPACE_MAXADDR, /* lowaddr */
3316 BUS_SPACE_MAXADDR, /* highaddr */
3317 NULL, NULL, /* filter, filterarg */
3318 size, /* maxsize */
3319 1, /* nsegments */
3320 size, /* maxsegsize */
3321 0, /* flags */
3322 NULL, /* lockfunc */
3323 NULL, /* lockarg */
3324 &dma->dma_tag);
3325 if (error) {
3326 device_printf(adapter->dev,
3327 "%s: bus_dma_tag_create failed: %d\n",
3328 __func__, error);
3329 goto fail_0;
3330 }
3331
3332 error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3333 BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3334 if (error) {
3335 device_printf(adapter->dev,
3336 "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3337 __func__, (uintmax_t)size, error);
3338 goto fail_2;
3339 }
3340
3341 dma->dma_paddr = 0;
3342 error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3343 size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3344 if (error || dma->dma_paddr == 0) {
3345 device_printf(adapter->dev,
3346 "%s: bus_dmamap_load failed: %d\n",
3347 __func__, error);
3348 goto fail_3;
3349 }
3350
3351 return (0);
3352
3353fail_3:
3354 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3355fail_2:
3356 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3357 bus_dma_tag_destroy(dma->dma_tag);
3358fail_0:
3359 dma->dma_tag = NULL;
3360
3361 return (error);
3362}
3363
3364static void
3365igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3366{
3367 if (dma->dma_tag == NULL)
3368 return;
3369 if (dma->dma_paddr != 0) {
3370 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3371 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3372 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3373 dma->dma_paddr = 0;
3374 }
3375 if (dma->dma_vaddr != NULL) {
3376 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3377 dma->dma_vaddr = NULL;
3378 }
3379 bus_dma_tag_destroy(dma->dma_tag);
3380 dma->dma_tag = NULL;
3381}
3382
3383
3384/*********************************************************************
3385 *
3386 * Allocate memory for the transmit and receive rings, and then
3387 * the descriptors associated with each, called only once at attach.
3388 *
3389 **********************************************************************/
3390static int
3391igb_allocate_queues(struct adapter *adapter)
3392{
3393 device_t dev = adapter->dev;
3394 struct igb_queue *que = NULL;
3395 struct tx_ring *txr = NULL;
3396 struct rx_ring *rxr = NULL;
3397 int rsize, tsize, error = E1000_SUCCESS;
3398 int txconf = 0, rxconf = 0;
3399
3400 /* First allocate the top level queue structs */
3401 if (!(adapter->queues =
3402 (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3403 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3404 device_printf(dev, "Unable to allocate queue memory\n");
3405 error = ENOMEM;
3406 goto fail;
3407 }
3408
3409 /* Next allocate the TX ring struct memory */
3410 if (!(adapter->tx_rings =
3411 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3412 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3413 device_printf(dev, "Unable to allocate TX ring memory\n");
3414 error = ENOMEM;
3415 goto tx_fail;
3416 }
3417
3418 /* Now allocate the RX */
3419 if (!(adapter->rx_rings =
3420 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3421 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3422 device_printf(dev, "Unable to allocate RX ring memory\n");
3423 error = ENOMEM;
3424 goto rx_fail;
3425 }
3426
3427 tsize = roundup2(adapter->num_tx_desc *
3428 sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3429 /*
3430 * Now set up the TX queues, txconf is needed to handle the
3431 * possibility that things fail midcourse and we need to
3432 * undo memory gracefully
3433 */
3434 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3435 /* Set up some basics */
3436 txr = &adapter->tx_rings[i];
3437 txr->adapter = adapter;
3438 txr->me = i;
3439 txr->num_desc = adapter->num_tx_desc;
3440
3441 /* Initialize the TX lock */
3442 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3443 device_get_nameunit(dev), txr->me);
3444 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3445
3446 if (igb_dma_malloc(adapter, tsize,
3447 &txr->txdma, BUS_DMA_NOWAIT)) {
3448 device_printf(dev,
3449 "Unable to allocate TX Descriptor memory\n");
3450 error = ENOMEM;
3451 goto err_tx_desc;
3452 }
3453 txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr;
3454 bzero((void *)txr->tx_base, tsize);
3455
3456 /* Now allocate transmit buffers for the ring */
3457 if (igb_allocate_transmit_buffers(txr)) {
3458 device_printf(dev,
3459 "Critical Failure setting up transmit buffers\n");
3460 error = ENOMEM;
3461 goto err_tx_desc;
3462 }
3463#ifndef IGB_LEGACY_TX
3464 /* Allocate a buf ring */
3465 txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3466 M_WAITOK, &txr->tx_mtx);
3467#endif
3468 }
3469
3470 /*
3471 * Next the RX queues...
3472 */
3473 rsize = roundup2(adapter->num_rx_desc *
3474 sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3475 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3476 rxr = &adapter->rx_rings[i];
3477 rxr->adapter = adapter;
3478 rxr->me = i;
3479
3480 /* Initialize the RX lock */
3481 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3482 device_get_nameunit(dev), txr->me);
3483 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3484
3485 if (igb_dma_malloc(adapter, rsize,
3486 &rxr->rxdma, BUS_DMA_NOWAIT)) {
3487 device_printf(dev,
3488 "Unable to allocate RxDescriptor memory\n");
3489 error = ENOMEM;
3490 goto err_rx_desc;
3491 }
3492 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3493 bzero((void *)rxr->rx_base, rsize);
3494
3495 /* Allocate receive buffers for the ring*/
3496 if (igb_allocate_receive_buffers(rxr)) {
3497 device_printf(dev,
3498 "Critical Failure setting up receive buffers\n");
3499 error = ENOMEM;
3500 goto err_rx_desc;
3501 }
3502 }
3503
3504 /*
3505 ** Finally set up the queue holding structs
3506 */
3507 for (int i = 0; i < adapter->num_queues; i++) {
3508 que = &adapter->queues[i];
3509 que->adapter = adapter;
3510 que->txr = &adapter->tx_rings[i];
3511 que->rxr = &adapter->rx_rings[i];
3512 }
3513
3514 return (0);
3515
3516err_rx_desc:
3517 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3518 igb_dma_free(adapter, &rxr->rxdma);
3519err_tx_desc:
3520 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3521 igb_dma_free(adapter, &txr->txdma);
3522 free(adapter->rx_rings, M_DEVBUF);
3523rx_fail:
3524#ifndef IGB_LEGACY_TX
3525 buf_ring_free(txr->br, M_DEVBUF);
3526#endif
3527 free(adapter->tx_rings, M_DEVBUF);
3528tx_fail:
3529 free(adapter->queues, M_DEVBUF);
3530fail:
3531 return (error);
3532}
3533
3534/*********************************************************************
3535 *
3536 * Allocate memory for tx_buffer structures. The tx_buffer stores all
3537 * the information needed to transmit a packet on the wire. This is
3538 * called only once at attach, setup is done every reset.
3539 *
3540 **********************************************************************/
3541static int
3542igb_allocate_transmit_buffers(struct tx_ring *txr)
3543{
3544 struct adapter *adapter = txr->adapter;
3545 device_t dev = adapter->dev;
3546 struct igb_tx_buf *txbuf;
3547 int error, i;
3548
3549 /*
3550 * Setup DMA descriptor areas.
3551 */
3552 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3553 1, 0, /* alignment, bounds */
3554 BUS_SPACE_MAXADDR, /* lowaddr */
3555 BUS_SPACE_MAXADDR, /* highaddr */
3556 NULL, NULL, /* filter, filterarg */
3557 IGB_TSO_SIZE, /* maxsize */
3558 IGB_MAX_SCATTER, /* nsegments */
3559 PAGE_SIZE, /* maxsegsize */
3560 0, /* flags */
3561 NULL, /* lockfunc */
3562 NULL, /* lockfuncarg */
3563 &txr->txtag))) {
3564 device_printf(dev,"Unable to allocate TX DMA tag\n");
3565 goto fail;
3566 }
3567
3568 if (!(txr->tx_buffers =
3569 (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) *
3570 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3571 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3572 error = ENOMEM;
3573 goto fail;
3574 }
3575
3576 /* Create the descriptor buffer dma maps */
3577 txbuf = txr->tx_buffers;
3578 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3579 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3580 if (error != 0) {
3581 device_printf(dev, "Unable to create TX DMA map\n");
3582 goto fail;
3583 }
3584 }
3585
3586 return 0;
3587fail:
3588 /* We free all, it handles case where we are in the middle */
3589 igb_free_transmit_structures(adapter);
3590 return (error);
3591}
3592
3593/*********************************************************************
3594 *
3595 * Initialize a transmit ring.
3596 *
3597 **********************************************************************/
3598static void
3599igb_setup_transmit_ring(struct tx_ring *txr)
3600{
3601 struct adapter *adapter = txr->adapter;
3602 struct igb_tx_buf *txbuf;
3603 int i;
3604#ifdef DEV_NETMAP
3605 struct netmap_adapter *na = NA(adapter->ifp);
3606 struct netmap_slot *slot;
3607#endif /* DEV_NETMAP */
3608
3609 /* Clear the old descriptor contents */
3610 IGB_TX_LOCK(txr);
3611#ifdef DEV_NETMAP
3612 slot = netmap_reset(na, NR_TX, txr->me, 0);
3613#endif /* DEV_NETMAP */
3614 bzero((void *)txr->tx_base,
3615 (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3616 /* Reset indices */
3617 txr->next_avail_desc = 0;
3618 txr->next_to_clean = 0;
3619
3620 /* Free any existing tx buffers. */
3621 txbuf = txr->tx_buffers;
3622 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3623 if (txbuf->m_head != NULL) {
3624 bus_dmamap_sync(txr->txtag, txbuf->map,
3625 BUS_DMASYNC_POSTWRITE);
3626 bus_dmamap_unload(txr->txtag, txbuf->map);
3627 m_freem(txbuf->m_head);
3628 txbuf->m_head = NULL;
3629 }
3630#ifdef DEV_NETMAP
3631 if (slot) {
3632 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3633 /* no need to set the address */
3634 netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
3635 }
3636#endif /* DEV_NETMAP */
3637 /* clear the watch index */
3638 txbuf->eop = NULL;
3639 }
3640
3641 /* Set number of descriptors available */
3642 txr->tx_avail = adapter->num_tx_desc;
3643
3644 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3645 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3646 IGB_TX_UNLOCK(txr);
3647}
3648
3649/*********************************************************************
3650 *
3651 * Initialize all transmit rings.
3652 *
3653 **********************************************************************/
3654static void
3655igb_setup_transmit_structures(struct adapter *adapter)
3656{
3657 struct tx_ring *txr = adapter->tx_rings;
3658
3659 for (int i = 0; i < adapter->num_queues; i++, txr++)
3660 igb_setup_transmit_ring(txr);
3661
3662 return;
3663}
3664
3665/*********************************************************************
3666 *
3667 * Enable transmit unit.
3668 *
3669 **********************************************************************/
3670static void
3671igb_initialize_transmit_units(struct adapter *adapter)
3672{
3673 struct tx_ring *txr = adapter->tx_rings;
3674 struct e1000_hw *hw = &adapter->hw;
3675 u32 tctl, txdctl;
3676
3677 INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3678 tctl = txdctl = 0;
3679
3680 /* Setup the Tx Descriptor Rings */
3681 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3682 u64 bus_addr = txr->txdma.dma_paddr;
3683
3684 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3685 adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3686 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3687 (uint32_t)(bus_addr >> 32));
3688 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3689 (uint32_t)bus_addr);
3690
3691 /* Setup the HW Tx Head and Tail descriptor pointers */
3692 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3693 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3694
3695 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3696 E1000_READ_REG(hw, E1000_TDBAL(i)),
3697 E1000_READ_REG(hw, E1000_TDLEN(i)));
3698
3699 txr->queue_status = IGB_QUEUE_IDLE;
3700
3701 txdctl |= IGB_TX_PTHRESH;
3702 txdctl |= IGB_TX_HTHRESH << 8;
3703 txdctl |= IGB_TX_WTHRESH << 16;
3704 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3705 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3706 }
3707
3708 if (adapter->vf_ifp)
3709 return;
3710
3711 e1000_config_collision_dist(hw);
3712
3713 /* Program the Transmit Control Register */
3714 tctl = E1000_READ_REG(hw, E1000_TCTL);
3715 tctl &= ~E1000_TCTL_CT;
3716 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3717 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3718
3719 /* This write will effectively turn on the transmit unit. */
3720 E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3721}
3722
3723/*********************************************************************
3724 *
3725 * Free all transmit rings.
3726 *
3727 **********************************************************************/
3728static void
3729igb_free_transmit_structures(struct adapter *adapter)
3730{
3731 struct tx_ring *txr = adapter->tx_rings;
3732
3733 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3734 IGB_TX_LOCK(txr);
3735 igb_free_transmit_buffers(txr);
3736 igb_dma_free(adapter, &txr->txdma);
3737 IGB_TX_UNLOCK(txr);
3738 IGB_TX_LOCK_DESTROY(txr);
3739 }
3740 free(adapter->tx_rings, M_DEVBUF);
3741}
3742
3743/*********************************************************************
3744 *
3745 * Free transmit ring related data structures.
3746 *
3747 **********************************************************************/
3748static void
3749igb_free_transmit_buffers(struct tx_ring *txr)
3750{
3751 struct adapter *adapter = txr->adapter;
3752 struct igb_tx_buf *tx_buffer;
3753 int i;
3754
3755 INIT_DEBUGOUT("free_transmit_ring: begin");
3756
3757 if (txr->tx_buffers == NULL)
3758 return;
3759
3760 tx_buffer = txr->tx_buffers;
3761 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3762 if (tx_buffer->m_head != NULL) {
3763 bus_dmamap_sync(txr->txtag, tx_buffer->map,
3764 BUS_DMASYNC_POSTWRITE);
3765 bus_dmamap_unload(txr->txtag,
3766 tx_buffer->map);
3767 m_freem(tx_buffer->m_head);
3768 tx_buffer->m_head = NULL;
3769 if (tx_buffer->map != NULL) {
3770 bus_dmamap_destroy(txr->txtag,
3771 tx_buffer->map);
3772 tx_buffer->map = NULL;
3773 }
3774 } else if (tx_buffer->map != NULL) {
3775 bus_dmamap_unload(txr->txtag,
3776 tx_buffer->map);
3777 bus_dmamap_destroy(txr->txtag,
3778 tx_buffer->map);
3779 tx_buffer->map = NULL;
3780 }
3781 }
3782#ifndef IGB_LEGACY_TX
3783 if (txr->br != NULL)
3784 buf_ring_free(txr->br, M_DEVBUF);
3785#endif
3786 if (txr->tx_buffers != NULL) {
3787 free(txr->tx_buffers, M_DEVBUF);
3788 txr->tx_buffers = NULL;
3789 }
3790 if (txr->txtag != NULL) {
3791 bus_dma_tag_destroy(txr->txtag);
3792 txr->txtag = NULL;
3793 }
3794 return;
3795}
3796
3797/**********************************************************************
3798 *
3799 * Setup work for hardware segmentation offload (TSO) on
3800 * adapters using advanced tx descriptors
3801 *
3802 **********************************************************************/
3803static int
3804igb_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3805 u32 *cmd_type_len, u32 *olinfo_status)
3806{
3807 struct adapter *adapter = txr->adapter;
3808 struct e1000_adv_tx_context_desc *TXD;
3809 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3810 u32 mss_l4len_idx = 0, paylen;
3811 u16 vtag = 0, eh_type;
3812 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3813 struct ether_vlan_header *eh;
3814#ifdef INET6
3815 struct ip6_hdr *ip6;
3816#endif
3817#ifdef INET
3818 struct ip *ip;
3819#endif
3820 struct tcphdr *th;
3821
3822
3823 /*
3824 * Determine where frame payload starts.
3825 * Jump over vlan headers if already present
3826 */
3827 eh = mtod(mp, struct ether_vlan_header *);
3828 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3829 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3830 eh_type = eh->evl_proto;
3831 } else {
3832 ehdrlen = ETHER_HDR_LEN;
3833 eh_type = eh->evl_encap_proto;
3834 }
3835
3836 switch (ntohs(eh_type)) {
3837#ifdef INET6
3838 case ETHERTYPE_IPV6:
3839 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3840 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3841 if (ip6->ip6_nxt != IPPROTO_TCP)
3842 return (ENXIO);
3843 ip_hlen = sizeof(struct ip6_hdr);
3844 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3845 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3846 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3847 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3848 break;
3849#endif
3850#ifdef INET
3851 case ETHERTYPE_IP:
3852 ip = (struct ip *)(mp->m_data + ehdrlen);
3853 if (ip->ip_p != IPPROTO_TCP)
3854 return (ENXIO);
3855 ip->ip_sum = 0;
3856 ip_hlen = ip->ip_hl << 2;
3857 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3858 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3859 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3860 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3861 /* Tell transmit desc to also do IPv4 checksum. */
3862 *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3863 break;
3864#endif
3865 default:
3866 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3867 __func__, ntohs(eh_type));
3868 break;
3869 }
3870
3871 ctxd = txr->next_avail_desc;
3872 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3873
3874 tcp_hlen = th->th_off << 2;
3875
3876 /* This is used in the transmit desc in encap */
3877 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3878
3879 /* VLAN MACLEN IPLEN */
3880 if (mp->m_flags & M_VLANTAG) {
3881 vtag = htole16(mp->m_pkthdr.ether_vtag);
3882 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3883 }
3884
3885 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3886 vlan_macip_lens |= ip_hlen;
3887 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3888
3889 /* ADV DTYPE TUCMD */
3890 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3891 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3892 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3893
3894 /* MSS L4LEN IDX */
3895 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3896 mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3897 /* 82575 needs the queue index added */
3898 if (adapter->hw.mac.type == e1000_82575)
3899 mss_l4len_idx |= txr->me << 4;
3900 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3901
3902 TXD->seqnum_seed = htole32(0);
3903
3904 if (++ctxd == txr->num_desc)
3905 ctxd = 0;
3906
3907 txr->tx_avail--;
3908 txr->next_avail_desc = ctxd;
3909 *cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3910 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3911 *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
3912 ++txr->tso_tx;
3913 return (0);
3914}
3915
3916/*********************************************************************
3917 *
3918 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
3919 *
3920 **********************************************************************/
3921
3922static int
3923igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3924 u32 *cmd_type_len, u32 *olinfo_status)
3925{
3926 struct e1000_adv_tx_context_desc *TXD;
3927 struct adapter *adapter = txr->adapter;
3928 struct ether_vlan_header *eh;
3929 struct ip *ip;
3930 struct ip6_hdr *ip6;
3931 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
3932 int ehdrlen, ip_hlen = 0;
3933 u16 etype;
3934 u8 ipproto = 0;
3935 int offload = TRUE;
3936 int ctxd = txr->next_avail_desc;
3937 u16 vtag = 0;
3938
3939 /* First check if TSO is to be used */
3940 if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3941 return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3942
3943 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3944 offload = FALSE;
3945
3946 /* Indicate the whole packet as payload when not doing TSO */
3947 *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
3948
3949 /* Now ready a context descriptor */
3950 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3951
3952 /*
3953 ** In advanced descriptors the vlan tag must
3954 ** be placed into the context descriptor. Hence
3955 ** we need to make one even if not doing offloads.
3956 */
3957 if (mp->m_flags & M_VLANTAG) {
3958 vtag = htole16(mp->m_pkthdr.ether_vtag);
3959 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3960 } else if (offload == FALSE) /* ... no offload to do */
3961 return (0);
3962
3963 /*
3964 * Determine where frame payload starts.
3965 * Jump over vlan headers if already present,
3966 * helpful for QinQ too.
3967 */
3968 eh = mtod(mp, struct ether_vlan_header *);
3969 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3970 etype = ntohs(eh->evl_proto);
3971 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3972 } else {
3973 etype = ntohs(eh->evl_encap_proto);
3974 ehdrlen = ETHER_HDR_LEN;
3975 }
3976
3977 /* Set the ether header length */
3978 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3979
3980 switch (etype) {
3981 case ETHERTYPE_IP:
3982 ip = (struct ip *)(mp->m_data + ehdrlen);
3983 ip_hlen = ip->ip_hl << 2;
3984 ipproto = ip->ip_p;
3985 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3986 break;
3987 case ETHERTYPE_IPV6:
3988 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3989 ip_hlen = sizeof(struct ip6_hdr);
3990 /* XXX-BZ this will go badly in case of ext hdrs. */
3991 ipproto = ip6->ip6_nxt;
3992 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3993 break;
3994 default:
3995 offload = FALSE;
3996 break;
3997 }
3998
3999 vlan_macip_lens |= ip_hlen;
4000 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4001
4002 switch (ipproto) {
4003 case IPPROTO_TCP:
4004 if (mp->m_pkthdr.csum_flags & CSUM_TCP)
4005 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
4006 break;
4007 case IPPROTO_UDP:
4008 if (mp->m_pkthdr.csum_flags & CSUM_UDP)
4009 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
4010 break;
4011
4012#if __FreeBSD_version >= 800000
4013 case IPPROTO_SCTP:
4014 if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
4015 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4016 break;
4017#endif
4018 default:
4019 offload = FALSE;
4020 break;
4021 }
4022
4023 if (offload) /* For the TX descriptor setup */
4024 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4025
4026 /* 82575 needs the queue index added */
4027 if (adapter->hw.mac.type == e1000_82575)
4028 mss_l4len_idx = txr->me << 4;
4029
4030 /* Now copy bits into descriptor */
4031 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
4032 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
4033 TXD->seqnum_seed = htole32(0);
4034 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
4035
4036 /* We've consumed the first desc, adjust counters */
4037 if (++ctxd == txr->num_desc)
4038 ctxd = 0;
4039 txr->next_avail_desc = ctxd;
4040 --txr->tx_avail;
4041
4042 return (0);
4043}
4044
4045/**********************************************************************
4046 *
4047 * Examine each tx_buffer in the used queue. If the hardware is done
4048 * processing the packet then free associated resources. The
4049 * tx_buffer is put back on the free queue.
4050 *
4051 * TRUE return means there's work in the ring to clean, FALSE its empty.
4052 **********************************************************************/
4053static bool
4054igb_txeof(struct tx_ring *txr)
4055{
4056 struct adapter *adapter = txr->adapter;
4057#ifdef DEV_NETMAP
4058 struct ifnet *ifp = adapter->ifp;
4059#endif /* DEV_NETMAP */
4060 u32 work, processed = 0;
4061 u16 limit = txr->process_limit;
4062 struct igb_tx_buf *buf;
4063 union e1000_adv_tx_desc *txd;
4064
4065 mtx_assert(&txr->tx_mtx, MA_OWNED);
4066
4067#ifdef DEV_NETMAP
4068 if (netmap_tx_irq(ifp, txr->me))
4069 return (FALSE);
4070#endif /* DEV_NETMAP */
4071
4072 if (txr->tx_avail == txr->num_desc) {
4073 txr->queue_status = IGB_QUEUE_IDLE;
4074 return FALSE;
4075 }
4076
4077 /* Get work starting point */
4078 work = txr->next_to_clean;
4079 buf = &txr->tx_buffers[work];
4080 txd = &txr->tx_base[work];
4081 work -= txr->num_desc; /* The distance to ring end */
4082 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4083 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4084 do {
4085 union e1000_adv_tx_desc *eop = buf->eop;
4086 if (eop == NULL) /* No work */
4087 break;
4088
4089 if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
4090 break; /* I/O not complete */
4091
4092 if (buf->m_head) {
4093 txr->bytes +=
4094 buf->m_head->m_pkthdr.len;
4095 bus_dmamap_sync(txr->txtag,
4096 buf->map,
4097 BUS_DMASYNC_POSTWRITE);
4098 bus_dmamap_unload(txr->txtag,
4099 buf->map);
4100 m_freem(buf->m_head);
4101 buf->m_head = NULL;
4102 }
4103 buf->eop = NULL;
4104 ++txr->tx_avail;
4105
4106 /* We clean the range if multi segment */
4107 while (txd != eop) {
4108 ++txd;
4109 ++buf;
4110 ++work;
4111 /* wrap the ring? */
4112 if (__predict_false(!work)) {
4113 work -= txr->num_desc;
4114 buf = txr->tx_buffers;
4115 txd = txr->tx_base;
4116 }
4117 if (buf->m_head) {
4118 txr->bytes +=
4119 buf->m_head->m_pkthdr.len;
4120 bus_dmamap_sync(txr->txtag,
4121 buf->map,
4122 BUS_DMASYNC_POSTWRITE);
4123 bus_dmamap_unload(txr->txtag,
4124 buf->map);
4125 m_freem(buf->m_head);
4126 buf->m_head = NULL;
4127 }
4128 ++txr->tx_avail;
4129 buf->eop = NULL;
4130
4131 }
4132 ++txr->packets;
4133 ++processed;
4134 txr->watchdog_time = ticks;
4135
4136 /* Try the next packet */
4137 ++txd;
4138 ++buf;
4139 ++work;
4140 /* reset with a wrap */
4141 if (__predict_false(!work)) {
4142 work -= txr->num_desc;
4143 buf = txr->tx_buffers;
4144 txd = txr->tx_base;
4145 }
4146 prefetch(txd);
4147 } while (__predict_true(--limit));
4148
4149 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4150 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4151
4152 work += txr->num_desc;
4153 txr->next_to_clean = work;
4154
4155 /*
4156 ** Watchdog calculation, we know there's
4157 ** work outstanding or the first return
4158 ** would have been taken, so none processed
4159 ** for too long indicates a hang.
4160 */
4161 if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
4162 txr->queue_status |= IGB_QUEUE_HUNG;
4163
4164 if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
4165 txr->queue_status &= ~IGB_QUEUE_DEPLETED;
4166
4167 if (txr->tx_avail == txr->num_desc) {
4168 txr->queue_status = IGB_QUEUE_IDLE;
4169 return (FALSE);
4170 }
4171
4172 return (TRUE);
4173}
4174
4175/*********************************************************************
4176 *
4177 * Refresh mbuf buffers for RX descriptor rings
4178 * - now keeps its own state so discards due to resource
4179 * exhaustion are unnecessary, if an mbuf cannot be obtained
4180 * it just returns, keeping its placeholder, thus it can simply
4181 * be recalled to try again.
4182 *
4183 **********************************************************************/
4184static void
4185igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4186{
4187 struct adapter *adapter = rxr->adapter;
4188 bus_dma_segment_t hseg[1];
4189 bus_dma_segment_t pseg[1];
4190 struct igb_rx_buf *rxbuf;
4191 struct mbuf *mh, *mp;
4192 int i, j, nsegs, error;
4193 bool refreshed = FALSE;
4194
4195 i = j = rxr->next_to_refresh;
4196 /*
4197 ** Get one descriptor beyond
4198 ** our work mark to control
4199 ** the loop.
4200 */
4201 if (++j == adapter->num_rx_desc)
4202 j = 0;
4203
4204 while (j != limit) {
4205 rxbuf = &rxr->rx_buffers[i];
4206 /* No hdr mbuf used with header split off */
4207 if (rxr->hdr_split == FALSE)
4208 goto no_split;
4209 if (rxbuf->m_head == NULL) {
4210 mh = m_gethdr(M_NOWAIT, MT_DATA);
4211 if (mh == NULL)
4212 goto update;
4213 } else
4214 mh = rxbuf->m_head;
4215
4216 mh->m_pkthdr.len = mh->m_len = MHLEN;
4217 mh->m_len = MHLEN;
4218 mh->m_flags |= M_PKTHDR;
4219 /* Get the memory mapping */
4220 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4221 rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4222 if (error != 0) {
4223 printf("Refresh mbufs: hdr dmamap load"
4224 " failure - %d\n", error);
4225 m_free(mh);
4226 rxbuf->m_head = NULL;
4227 goto update;
4228 }
4229 rxbuf->m_head = mh;
4230 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4231 BUS_DMASYNC_PREREAD);
4232 rxr->rx_base[i].read.hdr_addr =
4233 htole64(hseg[0].ds_addr);
4234no_split:
4235 if (rxbuf->m_pack == NULL) {
4236 mp = m_getjcl(M_NOWAIT, MT_DATA,
4237 M_PKTHDR, adapter->rx_mbuf_sz);
4238 if (mp == NULL)
4239 goto update;
4240 } else
4241 mp = rxbuf->m_pack;
4242
4243 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4244 /* Get the memory mapping */
4245 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4246 rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4247 if (error != 0) {
4248 printf("Refresh mbufs: payload dmamap load"
4249 " failure - %d\n", error);
4250 m_free(mp);
4251 rxbuf->m_pack = NULL;
4252 goto update;
4253 }
4254 rxbuf->m_pack = mp;
4255 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4256 BUS_DMASYNC_PREREAD);
4257 rxr->rx_base[i].read.pkt_addr =
4258 htole64(pseg[0].ds_addr);
4259 refreshed = TRUE; /* I feel wefreshed :) */
4260
4261 i = j; /* our next is precalculated */
4262 rxr->next_to_refresh = i;
4263 if (++j == adapter->num_rx_desc)
4264 j = 0;
4265 }
4266update:
4267 if (refreshed) /* update tail */
4268 E1000_WRITE_REG(&adapter->hw,
4269 E1000_RDT(rxr->me), rxr->next_to_refresh);
4270 return;
4271}
4272
4273
4274/*********************************************************************
4275 *
4276 * Allocate memory for rx_buffer structures. Since we use one
4277 * rx_buffer per received packet, the maximum number of rx_buffer's
4278 * that we'll need is equal to the number of receive descriptors
4279 * that we've allocated.
4280 *
4281 **********************************************************************/
4282static int
4283igb_allocate_receive_buffers(struct rx_ring *rxr)
4284{
4285 struct adapter *adapter = rxr->adapter;
4286 device_t dev = adapter->dev;
4287 struct igb_rx_buf *rxbuf;
4288 int i, bsize, error;
4289
4290 bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4291 if (!(rxr->rx_buffers =
4292 (struct igb_rx_buf *) malloc(bsize,
4293 M_DEVBUF, M_NOWAIT | M_ZERO))) {
4294 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4295 error = ENOMEM;
4296 goto fail;
4297 }
4298
4299 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4300 1, 0, /* alignment, bounds */
4301 BUS_SPACE_MAXADDR, /* lowaddr */
4302 BUS_SPACE_MAXADDR, /* highaddr */
4303 NULL, NULL, /* filter, filterarg */
4304 MSIZE, /* maxsize */
4305 1, /* nsegments */
4306 MSIZE, /* maxsegsize */
4307 0, /* flags */
4308 NULL, /* lockfunc */
4309 NULL, /* lockfuncarg */
4310 &rxr->htag))) {
4311 device_printf(dev, "Unable to create RX DMA tag\n");
4312 goto fail;
4313 }
4314
4315 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4316 1, 0, /* alignment, bounds */
4317 BUS_SPACE_MAXADDR, /* lowaddr */
4318 BUS_SPACE_MAXADDR, /* highaddr */
4319 NULL, NULL, /* filter, filterarg */
4320 MJUM9BYTES, /* maxsize */
4321 1, /* nsegments */
4322 MJUM9BYTES, /* maxsegsize */
4323 0, /* flags */
4324 NULL, /* lockfunc */
4325 NULL, /* lockfuncarg */
4326 &rxr->ptag))) {
4327 device_printf(dev, "Unable to create RX payload DMA tag\n");
4328 goto fail;
4329 }
4330
4331 for (i = 0; i < adapter->num_rx_desc; i++) {
4332 rxbuf = &rxr->rx_buffers[i];
4333 error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap);
4334 if (error) {
4335 device_printf(dev,
4336 "Unable to create RX head DMA maps\n");
4337 goto fail;
4338 }
4339 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
4340 if (error) {
4341 device_printf(dev,
4342 "Unable to create RX packet DMA maps\n");
4343 goto fail;
4344 }
4345 }
4346
4347 return (0);
4348
4349fail:
4350 /* Frees all, but can handle partial completion */
4351 igb_free_receive_structures(adapter);
4352 return (error);
4353}
4354
4355
4356static void
4357igb_free_receive_ring(struct rx_ring *rxr)
4358{
4359 struct adapter *adapter = rxr->adapter;
4360 struct igb_rx_buf *rxbuf;
4361
4362
4363 for (int i = 0; i < adapter->num_rx_desc; i++) {
4364 rxbuf = &rxr->rx_buffers[i];
4365 if (rxbuf->m_head != NULL) {
4366 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4367 BUS_DMASYNC_POSTREAD);
4368 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4369 rxbuf->m_head->m_flags |= M_PKTHDR;
4370 m_freem(rxbuf->m_head);
4371 }
4372 if (rxbuf->m_pack != NULL) {
4373 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4374 BUS_DMASYNC_POSTREAD);
4375 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4376 rxbuf->m_pack->m_flags |= M_PKTHDR;
4377 m_freem(rxbuf->m_pack);
4378 }
4379 rxbuf->m_head = NULL;
4380 rxbuf->m_pack = NULL;
4381 }
4382}
4383
4384
4385/*********************************************************************
4386 *
4387 * Initialize a receive ring and its buffers.
4388 *
4389 **********************************************************************/
4390static int
4391igb_setup_receive_ring(struct rx_ring *rxr)
4392{
4393 struct adapter *adapter;
4394 struct ifnet *ifp;
4395 device_t dev;
4396 struct igb_rx_buf *rxbuf;
4397 bus_dma_segment_t pseg[1], hseg[1];
4398 struct lro_ctrl *lro = &rxr->lro;
4399 int rsize, nsegs, error = 0;
4400#ifdef DEV_NETMAP
4401 struct netmap_adapter *na = NA(rxr->adapter->ifp);
4402 struct netmap_slot *slot;
4403#endif /* DEV_NETMAP */
4404
4405 adapter = rxr->adapter;
4406 dev = adapter->dev;
4407 ifp = adapter->ifp;
4408
4409 /* Clear the ring contents */
4410 IGB_RX_LOCK(rxr);
4411#ifdef DEV_NETMAP
4412 slot = netmap_reset(na, NR_RX, rxr->me, 0);
4413#endif /* DEV_NETMAP */
4414 rsize = roundup2(adapter->num_rx_desc *
4415 sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4416 bzero((void *)rxr->rx_base, rsize);
4417
4418 /*
4419 ** Free current RX buffer structures and their mbufs
4420 */
4421 igb_free_receive_ring(rxr);
4422
4423 /* Configure for header split? */
4424 if (igb_header_split)
4425 rxr->hdr_split = TRUE;
4426
4427 /* Now replenish the ring mbufs */
4428 for (int j = 0; j < adapter->num_rx_desc; ++j) {
4429 struct mbuf *mh, *mp;
4430
4431 rxbuf = &rxr->rx_buffers[j];
4432#ifdef DEV_NETMAP
4433 if (slot) {
4434 /* slot sj is mapped to the j-th NIC-ring entry */
4435 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4436 uint64_t paddr;
4437 void *addr;
4438
4439 addr = PNMB(na, slot + sj, &paddr);
4440 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
4441 /* Update descriptor */
4442 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4443 continue;
4444 }
4445#endif /* DEV_NETMAP */
4446 if (rxr->hdr_split == FALSE)
4447 goto skip_head;
4448
4449 /* First the header */
4450 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4451 if (rxbuf->m_head == NULL) {
4452 error = ENOBUFS;
4453 goto fail;
4454 }
4455 m_adj(rxbuf->m_head, ETHER_ALIGN);
4456 mh = rxbuf->m_head;
4457 mh->m_len = mh->m_pkthdr.len = MHLEN;
4458 mh->m_flags |= M_PKTHDR;
4459 /* Get the memory mapping */
4460 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4461 rxbuf->hmap, rxbuf->m_head, hseg,
4462 &nsegs, BUS_DMA_NOWAIT);
4463 if (error != 0) /* Nothing elegant to do here */
4464 goto fail;
4465 bus_dmamap_sync(rxr->htag,
4466 rxbuf->hmap, BUS_DMASYNC_PREREAD);
4467 /* Update descriptor */
4468 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4469
4470skip_head:
4471 /* Now the payload cluster */
4472 rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4473 M_PKTHDR, adapter->rx_mbuf_sz);
4474 if (rxbuf->m_pack == NULL) {
4475 error = ENOBUFS;
4476 goto fail;
4477 }
4478 mp = rxbuf->m_pack;
4479 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4480 /* Get the memory mapping */
4481 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4482 rxbuf->pmap, mp, pseg,
4483 &nsegs, BUS_DMA_NOWAIT);
4484 if (error != 0)
4485 goto fail;
4486 bus_dmamap_sync(rxr->ptag,
4487 rxbuf->pmap, BUS_DMASYNC_PREREAD);
4488 /* Update descriptor */
4489 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4490 }
4491
4492 /* Setup our descriptor indices */
4493 rxr->next_to_check = 0;
4494 rxr->next_to_refresh = adapter->num_rx_desc - 1;
4495 rxr->lro_enabled = FALSE;
4496 rxr->rx_split_packets = 0;
4497 rxr->rx_bytes = 0;
4498
4499 rxr->fmp = NULL;
4500 rxr->lmp = NULL;
4501
4502 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4503 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4504
4505 /*
4506 ** Now set up the LRO interface, we
4507 ** also only do head split when LRO
4508 ** is enabled, since so often they
4509 ** are undesireable in similar setups.
4510 */
4511 if (ifp->if_capenable & IFCAP_LRO) {
4512 error = tcp_lro_init(lro);
4513 if (error) {
4514 device_printf(dev, "LRO Initialization failed!\n");
4515 goto fail;
4516 }
4517 INIT_DEBUGOUT("RX LRO Initialized\n");
4518 rxr->lro_enabled = TRUE;
4519 lro->ifp = adapter->ifp;
4520 }
4521
4522 IGB_RX_UNLOCK(rxr);
4523 return (0);
4524
4525fail:
4526 igb_free_receive_ring(rxr);
4527 IGB_RX_UNLOCK(rxr);
4528 return (error);
4529}
4530
4531
4532/*********************************************************************
4533 *
4534 * Initialize all receive rings.
4535 *
4536 **********************************************************************/
4537static int
4538igb_setup_receive_structures(struct adapter *adapter)
4539{
4540 struct rx_ring *rxr = adapter->rx_rings;
4541 int i;
4542
4543 for (i = 0; i < adapter->num_queues; i++, rxr++)
4544 if (igb_setup_receive_ring(rxr))
4545 goto fail;
4546
4547 return (0);
4548fail:
4549 /*
4550 * Free RX buffers allocated so far, we will only handle
4551 * the rings that completed, the failing case will have
4552 * cleaned up for itself. 'i' is the endpoint.
4553 */
4554 for (int j = 0; j < i; ++j) {
4555 rxr = &adapter->rx_rings[j];
4556 IGB_RX_LOCK(rxr);
4557 igb_free_receive_ring(rxr);
4558 IGB_RX_UNLOCK(rxr);
4559 }
4560
4561 return (ENOBUFS);
4562}
4563
4564/*
4565 * Initialise the RSS mapping for NICs that support multiple transmit/
4566 * receive rings.
4567 */
4568static void
4569igb_initialise_rss_mapping(struct adapter *adapter)
4570{
4571 struct e1000_hw *hw = &adapter->hw;
4572 int i;
4573 int queue_id;
4574 u32 reta;
4575 u32 rss_key[10], mrqc, shift = 0;
4576
4577 /* XXX? */
4578 if (adapter->hw.mac.type == e1000_82575)
4579 shift = 6;
4580
4581 /*
4582 * The redirection table controls which destination
4583 * queue each bucket redirects traffic to.
4584 * Each DWORD represents four queues, with the LSB
4585 * being the first queue in the DWORD.
4586 *
4587 * This just allocates buckets to queues using round-robin
4588 * allocation.
4589 *
4590 * NOTE: It Just Happens to line up with the default
4591 * RSS allocation method.
4592 */
4593
4594 /* Warning FM follows */
4595 reta = 0;
4596 for (i = 0; i < 128; i++) {
4597#ifdef RSS
4598 queue_id = rss_get_indirection_to_bucket(i);
4599 /*
4600 * If we have more queues than buckets, we'll
4601 * end up mapping buckets to a subset of the
4602 * queues.
4603 *
4604 * If we have more buckets than queues, we'll
4605 * end up instead assigning multiple buckets
4606 * to queues.
4607 *
4608 * Both are suboptimal, but we need to handle
4609 * the case so we don't go out of bounds
4610 * indexing arrays and such.
4611 */
4612 queue_id = queue_id % adapter->num_queues;
4613#else
4614 queue_id = (i % adapter->num_queues);
4615#endif
4616 /* Adjust if required */
4617 queue_id = queue_id << shift;
4618
4619 /*
4620 * The low 8 bits are for hash value (n+0);
4621 * The next 8 bits are for hash value (n+1), etc.
4622 */
4623 reta = reta >> 8;
4624 reta = reta | ( ((uint32_t) queue_id) << 24);
4625 if ((i & 3) == 3) {
4626 E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta);
4627 reta = 0;
4628 }
4629 }
4630
4631 /* Now fill in hash table */
4632
4633 /*
4634 * MRQC: Multiple Receive Queues Command
4635 * Set queuing to RSS control, number depends on the device.
4636 */
4637 mrqc = E1000_MRQC_ENABLE_RSS_8Q;
4638
4639#ifdef RSS
4640 /* XXX ew typecasting */
4641 rss_getkey((uint8_t *) &rss_key);
4642#else
4643 arc4rand(&rss_key, sizeof(rss_key), 0);
4644#endif
4645 for (i = 0; i < 10; i++)
4646 E1000_WRITE_REG_ARRAY(hw,
4647 E1000_RSSRK(0), i, rss_key[i]);
4648
4649 /*
4650 * Configure the RSS fields to hash upon.
4651 */
4652 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4653 E1000_MRQC_RSS_FIELD_IPV4_TCP);
4654 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4655 E1000_MRQC_RSS_FIELD_IPV6_TCP);
4656 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4657 E1000_MRQC_RSS_FIELD_IPV6_UDP);
4658 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4659 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4660
4661 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4662}
4663
4664/*********************************************************************
4665 *
4666 * Enable receive unit.
4667 *
4668 **********************************************************************/
4669static void
4670igb_initialize_receive_units(struct adapter *adapter)
4671{
4672 struct rx_ring *rxr = adapter->rx_rings;
4673 struct ifnet *ifp = adapter->ifp;
4674 struct e1000_hw *hw = &adapter->hw;
4675 u32 rctl, rxcsum, psize, srrctl = 0;
4676
4677 INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4678
4679 /*
4680 * Make sure receives are disabled while setting
4681 * up the descriptor ring
4682 */
4683 rctl = E1000_READ_REG(hw, E1000_RCTL);
4684 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4685
4686 /*
4687 ** Set up for header split
4688 */
4689 if (igb_header_split) {
4690 /* Use a standard mbuf for the header */
4691 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4692 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4693 } else
4694 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4695
4696 /*
4697 ** Set up for jumbo frames
4698 */
4699 if (ifp->if_mtu > ETHERMTU) {
4700 rctl |= E1000_RCTL_LPE;
4701 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4702 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4703 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4704 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4705 srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4706 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4707 }
4708 /* Set maximum packet len */
4709 psize = adapter->max_frame_size;
4710 /* are we on a vlan? */
4711 if (adapter->ifp->if_vlantrunk != NULL)
4712 psize += VLAN_TAG_SIZE;
4713 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4714 } else {
4715 rctl &= ~E1000_RCTL_LPE;
4716 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4717 rctl |= E1000_RCTL_SZ_2048;
4718 }
4719
4720 /*
4721 * If TX flow control is disabled and there's >1 queue defined,
4722 * enable DROP.
4723 *
4724 * This drops frames rather than hanging the RX MAC for all queues.
4725 */
4726 if ((adapter->num_queues > 1) &&
4727 (adapter->fc == e1000_fc_none ||
4728 adapter->fc == e1000_fc_rx_pause)) {
4729 srrctl |= E1000_SRRCTL_DROP_EN;
4730 }
4731
4732 /* Setup the Base and Length of the Rx Descriptor Rings */
4733 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4734 u64 bus_addr = rxr->rxdma.dma_paddr;
4735 u32 rxdctl;
4736
4737 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4738 adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4739 E1000_WRITE_REG(hw, E1000_RDBAH(i),
4740 (uint32_t)(bus_addr >> 32));
4741 E1000_WRITE_REG(hw, E1000_RDBAL(i),
4742 (uint32_t)bus_addr);
4743 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4744 /* Enable this Queue */
4745 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4746 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4747 rxdctl &= 0xFFF00000;
4748 rxdctl |= IGB_RX_PTHRESH;
4749 rxdctl |= IGB_RX_HTHRESH << 8;
4750 rxdctl |= IGB_RX_WTHRESH << 16;
4751 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4752 }
4753
4754 /*
4755 ** Setup for RX MultiQueue
4756 */
4757 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4758 if (adapter->num_queues >1) {
4759
4760 /* rss setup */
4761 igb_initialise_rss_mapping(adapter);
4762
4763 /*
4764 ** NOTE: Receive Full-Packet Checksum Offload
4765 ** is mutually exclusive with Multiqueue. However
4766 ** this is not the same as TCP/IP checksums which
4767 ** still work.
4768 */
4769 rxcsum |= E1000_RXCSUM_PCSD;
4770#if __FreeBSD_version >= 800000
4771 /* For SCTP Offload */
4772 if ((hw->mac.type == e1000_82576)
4773 && (ifp->if_capenable & IFCAP_RXCSUM))
4774 rxcsum |= E1000_RXCSUM_CRCOFL;
4775#endif
4776 } else {
4777 /* Non RSS setup */
4778 if (ifp->if_capenable & IFCAP_RXCSUM) {
4779 rxcsum |= E1000_RXCSUM_IPPCSE;
4780#if __FreeBSD_version >= 800000
4781 if (adapter->hw.mac.type == e1000_82576)
4782 rxcsum |= E1000_RXCSUM_CRCOFL;
4783#endif
4784 } else
4785 rxcsum &= ~E1000_RXCSUM_TUOFL;
4786 }
4787 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4788
4789 /* Setup the Receive Control Register */
4790 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4791 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4792 E1000_RCTL_RDMTS_HALF |
4793 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4794 /* Strip CRC bytes. */
4795 rctl |= E1000_RCTL_SECRC;
4796 /* Make sure VLAN Filters are off */
4797 rctl &= ~E1000_RCTL_VFE;
4798 /* Don't store bad packets */
4799 rctl &= ~E1000_RCTL_SBP;
4800
4801 /* Enable Receives */
4802 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4803
4804 /*
4805 * Setup the HW Rx Head and Tail Descriptor Pointers
4806 * - needs to be after enable
4807 */
4808 for (int i = 0; i < adapter->num_queues; i++) {
4809 rxr = &adapter->rx_rings[i];
4810 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4811#ifdef DEV_NETMAP
4812 /*
4813 * an init() while a netmap client is active must
4814 * preserve the rx buffers passed to userspace.
4815 * In this driver it means we adjust RDT to
4816 * something different from next_to_refresh
4817 * (which is not used in netmap mode).
4818 */
4819 if (ifp->if_capenable & IFCAP_NETMAP) {
4820 struct netmap_adapter *na = NA(adapter->ifp);
4821 struct netmap_kring *kring = &na->rx_rings[i];
4822 int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
4823
4824 if (t >= adapter->num_rx_desc)
4825 t -= adapter->num_rx_desc;
4826 else if (t < 0)
4827 t += adapter->num_rx_desc;
4828 E1000_WRITE_REG(hw, E1000_RDT(i), t);
4829 } else
4830#endif /* DEV_NETMAP */
4831 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4832 }
4833 return;
4834}
4835
4836/*********************************************************************
4837 *
4838 * Free receive rings.
4839 *
4840 **********************************************************************/
4841static void
4842igb_free_receive_structures(struct adapter *adapter)
4843{
4844 struct rx_ring *rxr = adapter->rx_rings;
4845
4846 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4847 struct lro_ctrl *lro = &rxr->lro;
4848 igb_free_receive_buffers(rxr);
4849 tcp_lro_free(lro);
4850 igb_dma_free(adapter, &rxr->rxdma);
4851 }
4852
4853 free(adapter->rx_rings, M_DEVBUF);
4854}
4855
4856/*********************************************************************
4857 *
4858 * Free receive ring data structures.
4859 *
4860 **********************************************************************/
4861static void
4862igb_free_receive_buffers(struct rx_ring *rxr)
4863{
4864 struct adapter *adapter = rxr->adapter;
4865 struct igb_rx_buf *rxbuf;
4866 int i;
4867
4868 INIT_DEBUGOUT("free_receive_structures: begin");
4869
4870 /* Cleanup any existing buffers */
4871 if (rxr->rx_buffers != NULL) {
4872 for (i = 0; i < adapter->num_rx_desc; i++) {
4873 rxbuf = &rxr->rx_buffers[i];
4874 if (rxbuf->m_head != NULL) {
4875 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4876 BUS_DMASYNC_POSTREAD);
4877 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4878 rxbuf->m_head->m_flags |= M_PKTHDR;
4879 m_freem(rxbuf->m_head);
4880 }
4881 if (rxbuf->m_pack != NULL) {
4882 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4883 BUS_DMASYNC_POSTREAD);
4884 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4885 rxbuf->m_pack->m_flags |= M_PKTHDR;
4886 m_freem(rxbuf->m_pack);
4887 }
4888 rxbuf->m_head = NULL;
4889 rxbuf->m_pack = NULL;
4890 if (rxbuf->hmap != NULL) {
4891 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4892 rxbuf->hmap = NULL;
4893 }
4894 if (rxbuf->pmap != NULL) {
4895 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4896 rxbuf->pmap = NULL;
4897 }
4898 }
4899 if (rxr->rx_buffers != NULL) {
4900 free(rxr->rx_buffers, M_DEVBUF);
4901 rxr->rx_buffers = NULL;
4902 }
4903 }
4904
4905 if (rxr->htag != NULL) {
4906 bus_dma_tag_destroy(rxr->htag);
4907 rxr->htag = NULL;
4908 }
4909 if (rxr->ptag != NULL) {
4910 bus_dma_tag_destroy(rxr->ptag);
4911 rxr->ptag = NULL;
4912 }
4913}
4914
4915static __inline void
4916igb_rx_discard(struct rx_ring *rxr, int i)
4917{
4918 struct igb_rx_buf *rbuf;
4919
4920 rbuf = &rxr->rx_buffers[i];
4921
4922 /* Partially received? Free the chain */
4923 if (rxr->fmp != NULL) {
4924 rxr->fmp->m_flags |= M_PKTHDR;
4925 m_freem(rxr->fmp);
4926 rxr->fmp = NULL;
4927 rxr->lmp = NULL;
4928 }
4929
4930 /*
4931 ** With advanced descriptors the writeback
4932 ** clobbers the buffer addrs, so its easier
4933 ** to just free the existing mbufs and take
4934 ** the normal refresh path to get new buffers
4935 ** and mapping.
4936 */
4937 if (rbuf->m_head) {
4938 m_free(rbuf->m_head);
4939 rbuf->m_head = NULL;
4940 bus_dmamap_unload(rxr->htag, rbuf->hmap);
4941 }
4942
4943 if (rbuf->m_pack) {
4944 m_free(rbuf->m_pack);
4945 rbuf->m_pack = NULL;
4946 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
4947 }
4948
4949 return;
4950}
4951
4952static __inline void
4953igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4954{
4955
4956 /*
4957 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4958 * should be computed by hardware. Also it should not have VLAN tag in
4959 * ethernet header.
4960 */
4961 if (rxr->lro_enabled &&
4962 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4963 (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4964 (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4965 (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4966 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4967 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4968 /*
4969 * Send to the stack if:
4970 ** - LRO not enabled, or
4971 ** - no LRO resources, or
4972 ** - lro enqueue fails
4973 */
4974 if (rxr->lro.lro_cnt != 0)
4975 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4976 return;
4977 }
4978 IGB_RX_UNLOCK(rxr);
4979 (*ifp->if_input)(ifp, m);
4980 IGB_RX_LOCK(rxr);
4981}
4982
4983/*********************************************************************
4984 *
4985 * This routine executes in interrupt context. It replenishes
4986 * the mbufs in the descriptor and sends data which has been
4987 * dma'ed into host memory to upper layer.
4988 *
4989 * We loop at most count times if count is > 0, or until done if
4990 * count < 0.
4991 *
4992 * Return TRUE if more to clean, FALSE otherwise
4993 *********************************************************************/
4994static bool
4995igb_rxeof(struct igb_queue *que, int count, int *done)
4996{
4997 struct adapter *adapter = que->adapter;
4998 struct rx_ring *rxr = que->rxr;
4999 struct ifnet *ifp = adapter->ifp;
5000 struct lro_ctrl *lro = &rxr->lro;
5001 struct lro_entry *queued;
5002 int i, processed = 0, rxdone = 0;
5003 u32 ptype, staterr = 0;
5004 union e1000_adv_rx_desc *cur;
5005
5006 IGB_RX_LOCK(rxr);
5007 /* Sync the ring. */
5008 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
5009 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
5010
5011#ifdef DEV_NETMAP
5012 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
5013 IGB_RX_UNLOCK(rxr);
5014 return (FALSE);
5015 }
5016#endif /* DEV_NETMAP */
5017
5018 /* Main clean loop */
5019 for (i = rxr->next_to_check; count != 0;) {
5020 struct mbuf *sendmp, *mh, *mp;
5021 struct igb_rx_buf *rxbuf;
5022 u16 hlen, plen, hdr, vtag, pkt_info;
5023 bool eop = FALSE;
5024
5025 cur = &rxr->rx_base[i];
5026 staterr = le32toh(cur->wb.upper.status_error);
5027 if ((staterr & E1000_RXD_STAT_DD) == 0)
5028 break;
5029 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
5030 break;
5031 count--;
5032 sendmp = mh = mp = NULL;
5033 cur->wb.upper.status_error = 0;
5034 rxbuf = &rxr->rx_buffers[i];
5035 plen = le16toh(cur->wb.upper.length);
5036 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
5037 if (((adapter->hw.mac.type == e1000_i350) ||
5038 (adapter->hw.mac.type == e1000_i354)) &&
5039 (staterr & E1000_RXDEXT_STATERR_LB))
5040 vtag = be16toh(cur->wb.upper.vlan);
5041 else
5042 vtag = le16toh(cur->wb.upper.vlan);
5043 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
5044 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
5045 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
5046
5047 /*
5048 * Free the frame (all segments) if we're at EOP and
5049 * it's an error.
5050 *
5051 * The datasheet states that EOP + status is only valid for
5052 * the final segment in a multi-segment frame.
5053 */
5054 if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) {
5055 adapter->dropped_pkts++;
5056 ++rxr->rx_discarded;
5057 igb_rx_discard(rxr, i);
5058 goto next_desc;
5059 }
5060
5061 /*
5062 ** The way the hardware is configured to
5063 ** split, it will ONLY use the header buffer
5064 ** when header split is enabled, otherwise we
5065 ** get normal behavior, ie, both header and
5066 ** payload are DMA'd into the payload buffer.
5067 **
5068 ** The fmp test is to catch the case where a
5069 ** packet spans multiple descriptors, in that
5070 ** case only the first header is valid.
5071 */
5072 if (rxr->hdr_split && rxr->fmp == NULL) {
5073 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
5074 hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
5075 E1000_RXDADV_HDRBUFLEN_SHIFT;
5076 if (hlen > IGB_HDR_BUF)
5077 hlen = IGB_HDR_BUF;
5078 mh = rxr->rx_buffers[i].m_head;
5079 mh->m_len = hlen;
5080 /* clear buf pointer for refresh */
5081 rxbuf->m_head = NULL;
5082 /*
5083 ** Get the payload length, this
5084 ** could be zero if its a small
5085 ** packet.
5086 */
5087 if (plen > 0) {
5088 mp = rxr->rx_buffers[i].m_pack;
5089 mp->m_len = plen;
5090 mh->m_next = mp;
5091 /* clear buf pointer */
5092 rxbuf->m_pack = NULL;
5093 rxr->rx_split_packets++;
5094 }
5095 } else {
5096 /*
5097 ** Either no header split, or a
5098 ** secondary piece of a fragmented
5099 ** split packet.
5100 */
5101 mh = rxr->rx_buffers[i].m_pack;
5102 mh->m_len = plen;
5103 /* clear buf info for refresh */
5104 rxbuf->m_pack = NULL;
5105 }
5106 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
5107
5108 ++processed; /* So we know when to refresh */
5109
5110 /* Initial frame - setup */
5111 if (rxr->fmp == NULL) {
5112 mh->m_pkthdr.len = mh->m_len;
5113 /* Save the head of the chain */
5114 rxr->fmp = mh;
5115 rxr->lmp = mh;
5116 if (mp != NULL) {
5117 /* Add payload if split */
5118 mh->m_pkthdr.len += mp->m_len;
5119 rxr->lmp = mh->m_next;
5120 }
5121 } else {
5122 /* Chain mbuf's together */
5123 rxr->lmp->m_next = mh;
5124 rxr->lmp = rxr->lmp->m_next;
5125 rxr->fmp->m_pkthdr.len += mh->m_len;
5126 }
5127
5128 if (eop) {
5129 rxr->fmp->m_pkthdr.rcvif = ifp;
5130 rxr->rx_packets++;
5131 /* capture data for AIM */
5132 rxr->packets++;
5133 rxr->bytes += rxr->fmp->m_pkthdr.len;
5134 rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
5135
5136 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
5137 igb_rx_checksum(staterr, rxr->fmp, ptype);
5138
5139 if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
5140 (staterr & E1000_RXD_STAT_VP) != 0) {
5141 rxr->fmp->m_pkthdr.ether_vtag = vtag;
5142 rxr->fmp->m_flags |= M_VLANTAG;
5143 }
5144
5145 /*
5146 * In case of multiqueue, we have RXCSUM.PCSD bit set
5147 * and never cleared. This means we have RSS hash
5148 * available to be used.
5149 */
5150 if (adapter->num_queues > 1) {
5151 rxr->fmp->m_pkthdr.flowid =
5152 le32toh(cur->wb.lower.hi_dword.rss);
5153 switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
5154 case E1000_RXDADV_RSSTYPE_IPV4_TCP:
5155 M_HASHTYPE_SET(rxr->fmp,
5156 M_HASHTYPE_RSS_TCP_IPV4);
5157 break;
5158 case E1000_RXDADV_RSSTYPE_IPV4:
5159 M_HASHTYPE_SET(rxr->fmp,
5160 M_HASHTYPE_RSS_IPV4);
5161 break;
5162 case E1000_RXDADV_RSSTYPE_IPV6_TCP:
5163 M_HASHTYPE_SET(rxr->fmp,
5164 M_HASHTYPE_RSS_TCP_IPV6);
5165 break;
5166 case E1000_RXDADV_RSSTYPE_IPV6_EX:
5167 M_HASHTYPE_SET(rxr->fmp,
5168 M_HASHTYPE_RSS_IPV6_EX);
5169 break;
5170 case E1000_RXDADV_RSSTYPE_IPV6:
5171 M_HASHTYPE_SET(rxr->fmp,
5172 M_HASHTYPE_RSS_IPV6);
5173 break;
5174 case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
5175 M_HASHTYPE_SET(rxr->fmp,
5176 M_HASHTYPE_RSS_TCP_IPV6_EX);
5177 break;
5178 default:
5179 /* XXX fallthrough */
5180 M_HASHTYPE_SET(rxr->fmp,
5181 M_HASHTYPE_OPAQUE);
5182 }
5183 } else {
5184#ifndef IGB_LEGACY_TX
5185 rxr->fmp->m_pkthdr.flowid = que->msix;
5186 M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
5187#endif
5188 }
5189 sendmp = rxr->fmp;
5190 /* Make sure to set M_PKTHDR. */
5191 sendmp->m_flags |= M_PKTHDR;
5192 rxr->fmp = NULL;
5193 rxr->lmp = NULL;
5194 }
5195
5196next_desc:
5197 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
5198 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
5199
5200 /* Advance our pointers to the next descriptor. */
5201 if (++i == adapter->num_rx_desc)
5202 i = 0;
5203 /*
5204 ** Send to the stack or LRO
5205 */
5206 if (sendmp != NULL) {
5207 rxr->next_to_check = i;
5208 igb_rx_input(rxr, ifp, sendmp, ptype);
5209 i = rxr->next_to_check;
5210 rxdone++;
5211 }
5212
5213 /* Every 8 descriptors we go to refresh mbufs */
5214 if (processed == 8) {
5215 igb_refresh_mbufs(rxr, i);
5216 processed = 0;
5217 }
5218 }
5219
5220 /* Catch any remainders */
5221 if (igb_rx_unrefreshed(rxr))
5222 igb_refresh_mbufs(rxr, i);
5223
5224 rxr->next_to_check = i;
5225
5226 /*
5227 * Flush any outstanding LRO work
5228 */
5229 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
5230 SLIST_REMOVE_HEAD(&lro->lro_active, next);
5231 tcp_lro_flush(lro, queued);
5232 }
5233
5234 if (done != NULL)
5235 *done += rxdone;
5236
5237 IGB_RX_UNLOCK(rxr);
5238 return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
5239}
5240
5241/*********************************************************************
5242 *
5243 * Verify that the hardware indicated that the checksum is valid.
5244 * Inform the stack about the status of checksum so that stack
5245 * doesn't spend time verifying the checksum.
5246 *
5247 *********************************************************************/
5248static void
5249igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
5250{
5251 u16 status = (u16)staterr;
5252 u8 errors = (u8) (staterr >> 24);
5253 int sctp;
5254
5255 /* Ignore Checksum bit is set */
5256 if (status & E1000_RXD_STAT_IXSM) {
5257 mp->m_pkthdr.csum_flags = 0;
5258 return;
5259 }
5260
5261 if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
5262 (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
5263 sctp = 1;
5264 else
5265 sctp = 0;
5266 if (status & E1000_RXD_STAT_IPCS) {
5267 /* Did it pass? */
5268 if (!(errors & E1000_RXD_ERR_IPE)) {
5269 /* IP Checksum Good */
5270 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
5271 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
5272 } else
5273 mp->m_pkthdr.csum_flags = 0;
5274 }
5275
5276 if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
5277 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5278#if __FreeBSD_version >= 800000
5279 if (sctp) /* reassign */
5280 type = CSUM_SCTP_VALID;
5281#endif
5282 /* Did it pass? */
5283 if (!(errors & E1000_RXD_ERR_TCPE)) {
5284 mp->m_pkthdr.csum_flags |= type;
5285 if (sctp == 0)
5286 mp->m_pkthdr.csum_data = htons(0xffff);
5287 }
5288 }
5289 return;
5290}
5291
5292/*
5293 * This routine is run via an vlan
5294 * config EVENT
5295 */
5296static void
5297igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5298{
5299 struct adapter *adapter = ifp->if_softc;
5300 u32 index, bit;
5301
5302 if (ifp->if_softc != arg) /* Not our event */
5303 return;
5304
5305 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
5306 return;
5307
5308 IGB_CORE_LOCK(adapter);
5309 index = (vtag >> 5) & 0x7F;
5310 bit = vtag & 0x1F;
5311 adapter->shadow_vfta[index] |= (1 << bit);
5312 ++adapter->num_vlans;
5313 /* Change hw filter setting */
5314 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5315 igb_setup_vlan_hw_support(adapter);
5316 IGB_CORE_UNLOCK(adapter);
5317}
5318
5319/*
5320 * This routine is run via an vlan
5321 * unconfig EVENT
5322 */
5323static void
5324igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5325{
5326 struct adapter *adapter = ifp->if_softc;
5327 u32 index, bit;
5328
5329 if (ifp->if_softc != arg)
5330 return;
5331
5332 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
5333 return;
5334
5335 IGB_CORE_LOCK(adapter);
5336 index = (vtag >> 5) & 0x7F;
5337 bit = vtag & 0x1F;
5338 adapter->shadow_vfta[index] &= ~(1 << bit);
5339 --adapter->num_vlans;
5340 /* Change hw filter setting */
5341 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5342 igb_setup_vlan_hw_support(adapter);
5343 IGB_CORE_UNLOCK(adapter);
5344}
5345
5346static void
5347igb_setup_vlan_hw_support(struct adapter *adapter)
5348{
5349 struct e1000_hw *hw = &adapter->hw;
5350 struct ifnet *ifp = adapter->ifp;
5351 u32 reg;
5352
5353 if (adapter->vf_ifp) {
5354 e1000_rlpml_set_vf(hw,
5355 adapter->max_frame_size + VLAN_TAG_SIZE);
5356 return;
5357 }
5358
5359 reg = E1000_READ_REG(hw, E1000_CTRL);
5360 reg |= E1000_CTRL_VME;
5361 E1000_WRITE_REG(hw, E1000_CTRL, reg);
5362
5363 /* Enable the Filter Table */
5364 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5365 reg = E1000_READ_REG(hw, E1000_RCTL);
5366 reg &= ~E1000_RCTL_CFIEN;
5367 reg |= E1000_RCTL_VFE;
5368 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5369 }
5370
5371 /* Update the frame size */
5372 E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5373 adapter->max_frame_size + VLAN_TAG_SIZE);
5374
5375 /* Don't bother with table if no vlans */
5376 if ((adapter->num_vlans == 0) ||
5377 ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5378 return;
5379 /*
5380 ** A soft reset zero's out the VFTA, so
5381 ** we need to repopulate it now.
5382 */
5383 for (int i = 0; i < IGB_VFTA_SIZE; i++)
5384 if (adapter->shadow_vfta[i] != 0) {
5385 if (adapter->vf_ifp)
5386 e1000_vfta_set_vf(hw,
5387 adapter->shadow_vfta[i], TRUE);
5388 else
5389 e1000_write_vfta(hw,
5390 i, adapter->shadow_vfta[i]);
5391 }
5392}
5393
5394static void
5395igb_enable_intr(struct adapter *adapter)
5396{
5397 /* With RSS set up what to auto clear */
5398 if (adapter->msix_mem) {
5399 u32 mask = (adapter->que_mask | adapter->link_mask);
5400 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5401 E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5402 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5403 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5404 E1000_IMS_LSC);
5405 } else {
5406 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5407 IMS_ENABLE_MASK);
5408 }
5409 E1000_WRITE_FLUSH(&adapter->hw);
5410
5411 return;
5412}
5413
5414static void
5415igb_disable_intr(struct adapter *adapter)
5416{
5417 if (adapter->msix_mem) {
5418 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5419 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5420 }
5421 E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5422 E1000_WRITE_FLUSH(&adapter->hw);
5423 return;
5424}
5425
5426/*
5427 * Bit of a misnomer, what this really means is
5428 * to enable OS management of the system... aka
5429 * to disable special hardware management features
5430 */
5431static void
5432igb_init_manageability(struct adapter *adapter)
5433{
5434 if (adapter->has_manage) {
5435 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5436 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5437
5438 /* disable hardware interception of ARP */
5439 manc &= ~(E1000_MANC_ARP_EN);
5440
5441 /* enable receiving management packets to the host */
5442 manc |= E1000_MANC_EN_MNG2HOST;
5443 manc2h |= 1 << 5; /* Mng Port 623 */
5444 manc2h |= 1 << 6; /* Mng Port 664 */
5445 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5446 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5447 }
5448}
5449
5450/*
5451 * Give control back to hardware management
5452 * controller if there is one.
5453 */
5454static void
5455igb_release_manageability(struct adapter *adapter)
5456{
5457 if (adapter->has_manage) {
5458 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5459
5460 /* re-enable hardware interception of ARP */
5461 manc |= E1000_MANC_ARP_EN;
5462 manc &= ~E1000_MANC_EN_MNG2HOST;
5463
5464 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5465 }
5466}
5467
5468/*
5469 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5470 * For ASF and Pass Through versions of f/w this means that
5471 * the driver is loaded.
5472 *
5473 */
5474static void
5475igb_get_hw_control(struct adapter *adapter)
5476{
5477 u32 ctrl_ext;
5478
5479 if (adapter->vf_ifp)
5480 return;
5481
5482 /* Let firmware know the driver has taken over */
5483 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5484 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5485 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5486}
5487
5488/*
5489 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5490 * For ASF and Pass Through versions of f/w this means that the
5491 * driver is no longer loaded.
5492 *
5493 */
5494static void
5495igb_release_hw_control(struct adapter *adapter)
5496{
5497 u32 ctrl_ext;
5498
5499 if (adapter->vf_ifp)
5500 return;
5501
5502 /* Let firmware taken over control of h/w */
5503 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5504 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5505 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5506}
5507
5508static int
5509igb_is_valid_ether_addr(uint8_t *addr)
5510{
5511 char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5512
5513 if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5514 return (FALSE);
5515 }
5516
5517 return (TRUE);
5518}
5519
5520
5521/*
5522 * Enable PCI Wake On Lan capability
5523 */
5524static void
5525igb_enable_wakeup(device_t dev)
5526{
5527 u16 cap, status;
5528 u8 id;
5529
5530 /* First find the capabilities pointer*/
5531 cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5532 /* Read the PM Capabilities */
5533 id = pci_read_config(dev, cap, 1);
5534 if (id != PCIY_PMG) /* Something wrong */
5535 return;
5536 /* OK, we have the power capabilities, so
5537 now get the status register */
5538 cap += PCIR_POWER_STATUS;
5539 status = pci_read_config(dev, cap, 2);
5540 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5541 pci_write_config(dev, cap, status, 2);
5542 return;
5543}
5544
5545static void
5546igb_led_func(void *arg, int onoff)
5547{
5548 struct adapter *adapter = arg;
5549
5550 IGB_CORE_LOCK(adapter);
5551 if (onoff) {
5552 e1000_setup_led(&adapter->hw);
5553 e1000_led_on(&adapter->hw);
5554 } else {
5555 e1000_led_off(&adapter->hw);
5556 e1000_cleanup_led(&adapter->hw);
5557 }
5558 IGB_CORE_UNLOCK(adapter);
5559}
5560
5561static uint64_t
5562igb_get_vf_counter(if_t ifp, ift_counter cnt)
5563{
5564 struct adapter *adapter;
5565 struct e1000_vf_stats *stats;
5566#ifndef IGB_LEGACY_TX
5567 struct tx_ring *txr;
5568 uint64_t rv;
5569#endif
5570
5571 adapter = if_getsoftc(ifp);
5572 stats = (struct e1000_vf_stats *)adapter->stats;
5573
5574 switch (cnt) {
5575 case IFCOUNTER_IPACKETS:
5576 return (stats->gprc);
5577 case IFCOUNTER_OPACKETS:
5578 return (stats->gptc);
5579 case IFCOUNTER_IBYTES:
5580 return (stats->gorc);
5581 case IFCOUNTER_OBYTES:
5582 return (stats->gotc);
5583 case IFCOUNTER_IMCASTS:
5584 return (stats->mprc);
5585 case IFCOUNTER_IERRORS:
5586 return (adapter->dropped_pkts);
5587 case IFCOUNTER_OERRORS:
5588 return (adapter->watchdog_events);
5589#ifndef IGB_LEGACY_TX
5590 case IFCOUNTER_OQDROPS:
5591 rv = 0;
5592 txr = adapter->tx_rings;
5593 for (int i = 0; i < adapter->num_queues; i++, txr++)
5594 rv += txr->br->br_drops;
5595 return (rv);
5596#endif
5597 default:
5598 return (if_get_counter_default(ifp, cnt));
5599 }
5600}
5601
5602static uint64_t
5603igb_get_counter(if_t ifp, ift_counter cnt)
5604{
5605 struct adapter *adapter;
5606 struct e1000_hw_stats *stats;
5607#ifndef IGB_LEGACY_TX
5608 struct tx_ring *txr;
5609 uint64_t rv;
5610#endif
5611
5612 adapter = if_getsoftc(ifp);
5613 if (adapter->vf_ifp)
5614 return (igb_get_vf_counter(ifp, cnt));
5615
5616 stats = (struct e1000_hw_stats *)adapter->stats;
5617
5618 switch (cnt) {
5619 case IFCOUNTER_IPACKETS:
5620 return (stats->gprc);
5621 case IFCOUNTER_OPACKETS:
5622 return (stats->gptc);
5623 case IFCOUNTER_IBYTES:
5624 return (stats->gorc);
5625 case IFCOUNTER_OBYTES:
5626 return (stats->gotc);
5627 case IFCOUNTER_IMCASTS:
5628 return (stats->mprc);
5629 case IFCOUNTER_OMCASTS:
5630 return (stats->mptc);
5631 case IFCOUNTER_IERRORS:
5632 return (adapter->dropped_pkts + stats->rxerrc +
5633 stats->crcerrs + stats->algnerrc +
5634 stats->ruc + stats->roc + stats->cexterr);
5635 case IFCOUNTER_OERRORS:
5636 return (stats->ecol + stats->latecol +
5637 adapter->watchdog_events);
5638 case IFCOUNTER_COLLISIONS:
5639 return (stats->colc);
5640 case IFCOUNTER_IQDROPS:
5641 return (stats->mpc);
5642#ifndef IGB_LEGACY_TX
5643 case IFCOUNTER_OQDROPS:
5644 rv = 0;
5645 txr = adapter->tx_rings;
5646 for (int i = 0; i < adapter->num_queues; i++, txr++)
5647 rv += txr->br->br_drops;
5648 return (rv);
5649#endif
5650 default:
5651 return (if_get_counter_default(ifp, cnt));
5652 }
5653}
5654
5655/**********************************************************************
5656 *
5657 * Update the board statistics counters.
5658 *
5659 **********************************************************************/
5660static void
5661igb_update_stats_counters(struct adapter *adapter)
5662{
5663 struct e1000_hw *hw = &adapter->hw;
5664 struct e1000_hw_stats *stats;
5665
5666 /*
5667 ** The virtual function adapter has only a
5668 ** small controlled set of stats, do only
5669 ** those and return.
5670 */
5671 if (adapter->vf_ifp) {
5672 igb_update_vf_stats_counters(adapter);
5673 return;
5674 }
5675
5676 stats = (struct e1000_hw_stats *)adapter->stats;
5677
5678 if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5679 (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5680 stats->symerrs +=
5681 E1000_READ_REG(hw,E1000_SYMERRS);
5682 stats->sec += E1000_READ_REG(hw, E1000_SEC);
5683 }
5684
5685 stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5686 stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5687 stats->scc += E1000_READ_REG(hw, E1000_SCC);
5688 stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5689
5690 stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5691 stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5692 stats->colc += E1000_READ_REG(hw, E1000_COLC);
5693 stats->dc += E1000_READ_REG(hw, E1000_DC);
5694 stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5695 stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5696 stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5697 /*
5698 ** For watchdog management we need to know if we have been
5699 ** paused during the last interval, so capture that here.
5700 */
5701 adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5702 stats->xoffrxc += adapter->pause_frames;
5703 stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5704 stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5705 stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5706 stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5707 stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5708 stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5709 stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5710 stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5711 stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5712 stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5713 stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5714 stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5715
5716 /* For the 64-bit byte counters the low dword must be read first. */
5717 /* Both registers clear on the read of the high dword */
5718
5719 stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5720 ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5721 stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5722 ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5723
5724 stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5725 stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5726 stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5727 stats->roc += E1000_READ_REG(hw, E1000_ROC);
5728 stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5729
5730 stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
5731 stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC);
5732 stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
5733
5734 stats->tor += E1000_READ_REG(hw, E1000_TORL) +
5735 ((u64)E1000_READ_REG(hw, E1000_TORH) << 32);
5736 stats->tot += E1000_READ_REG(hw, E1000_TOTL) +
5737 ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32);
5738
5739 stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5740 stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5741 stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5742 stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5743 stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5744 stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5745 stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5746 stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5747 stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5748 stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5749
5750 /* Interrupt Counts */
5751
5752 stats->iac += E1000_READ_REG(hw, E1000_IAC);
5753 stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5754 stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5755 stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5756 stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5757 stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5758 stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5759 stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5760 stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5761
5762 /* Host to Card Statistics */
5763
5764 stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5765 stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5766 stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5767 stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5768 stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5769 stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5770 stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5771 stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5772 ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5773 stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5774 ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5775 stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5776 stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5777 stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5778
5779 stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5780 stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5781 stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5782 stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5783 stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5784 stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5785
5786 /* Driver specific counters */
5787 adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5788 adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5789 adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5790 adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5791 adapter->packet_buf_alloc_tx =
5792 ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5793 adapter->packet_buf_alloc_rx =
5794 (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5795}
5796
5797
5798/**********************************************************************
5799 *
5800 * Initialize the VF board statistics counters.
5801 *
5802 **********************************************************************/
5803static void
5804igb_vf_init_stats(struct adapter *adapter)
5805{
5806 struct e1000_hw *hw = &adapter->hw;
5807 struct e1000_vf_stats *stats;
5808
5809 stats = (struct e1000_vf_stats *)adapter->stats;
5810 if (stats == NULL)
5811 return;
5812 stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5813 stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5814 stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5815 stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5816 stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5817}
5818
5819/**********************************************************************
5820 *
5821 * Update the VF board statistics counters.
5822 *
5823 **********************************************************************/
5824static void
5825igb_update_vf_stats_counters(struct adapter *adapter)
5826{
5827 struct e1000_hw *hw = &adapter->hw;
5828 struct e1000_vf_stats *stats;
5829
5830 if (adapter->link_speed == 0)
5831 return;
5832
5833 stats = (struct e1000_vf_stats *)adapter->stats;
5834
5835 UPDATE_VF_REG(E1000_VFGPRC,
5836 stats->last_gprc, stats->gprc);
5837 UPDATE_VF_REG(E1000_VFGORC,
5838 stats->last_gorc, stats->gorc);
5839 UPDATE_VF_REG(E1000_VFGPTC,
5840 stats->last_gptc, stats->gptc);
5841 UPDATE_VF_REG(E1000_VFGOTC,
5842 stats->last_gotc, stats->gotc);
5843 UPDATE_VF_REG(E1000_VFMPRC,
5844 stats->last_mprc, stats->mprc);
5845}
5846
5847/* Export a single 32-bit register via a read-only sysctl. */
5848static int
5849igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5850{
5851 struct adapter *adapter;
5852 u_int val;
5853
5854 adapter = oidp->oid_arg1;
5855 val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5856 return (sysctl_handle_int(oidp, &val, 0, req));
5857}
5858
5859/*
5860** Tuneable interrupt rate handler
5861*/
5862static int
5863igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5864{
5865 struct igb_queue *que = ((struct igb_queue *)oidp->oid_arg1);
5866 int error;
5867 u32 reg, usec, rate;
5868
5869 reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5870 usec = ((reg & 0x7FFC) >> 2);
5871 if (usec > 0)
5872 rate = 1000000 / usec;
5873 else
5874 rate = 0;
5875 error = sysctl_handle_int(oidp, &rate, 0, req);
5876 if (error || !req->newptr)
5877 return error;
5878 return 0;
5879}
5880
5881/*
5882 * Add sysctl variables, one per statistic, to the system.
5883 */
5884static void
5885igb_add_hw_stats(struct adapter *adapter)
5886{
5887 device_t dev = adapter->dev;
5888
5889 struct tx_ring *txr = adapter->tx_rings;
5890 struct rx_ring *rxr = adapter->rx_rings;
5891
5892 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5893 struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5894 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5895 struct e1000_hw_stats *stats = adapter->stats;
5896
5897 struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5898 struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5899
5900#define QUEUE_NAME_LEN 32
5901 char namebuf[QUEUE_NAME_LEN];
5902
5903 /* Driver Statistics */
5904 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5905 CTLFLAG_RD, &adapter->link_irq,
5906 "Link MSIX IRQ Handled");
5907 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5908 CTLFLAG_RD, &adapter->dropped_pkts,
5909 "Driver dropped packets");
5910 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5911 CTLFLAG_RD, &adapter->no_tx_dma_setup,
5912 "Driver tx dma failure in xmit");
5913 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5914 CTLFLAG_RD, &adapter->rx_overruns,
5915 "RX overruns");
5916 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5917 CTLFLAG_RD, &adapter->watchdog_events,
5918 "Watchdog timeouts");
5919
5920 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5921 CTLFLAG_RD, &adapter->device_control,
5922 "Device Control Register");
5923 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5924 CTLFLAG_RD, &adapter->rx_control,
5925 "Receiver Control Register");
5926 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5927 CTLFLAG_RD, &adapter->int_mask,
5928 "Interrupt Mask");
5929 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5930 CTLFLAG_RD, &adapter->eint_mask,
5931 "Extended Interrupt Mask");
5932 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5933 CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5934 "Transmit Buffer Packet Allocation");
5935 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5936 CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5937 "Receive Buffer Packet Allocation");
5938 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5939 CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5940 "Flow Control High Watermark");
5941 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5942 CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5943 "Flow Control Low Watermark");
5944
5945 for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5946 struct lro_ctrl *lro = &rxr->lro;
5947
5948 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5949 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5950 CTLFLAG_RD, NULL, "Queue Name");
5951 queue_list = SYSCTL_CHILDREN(queue_node);
5952
5953 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5954 CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i],
5955 sizeof(&adapter->queues[i]),
5956 igb_sysctl_interrupt_rate_handler,
5957 "IU", "Interrupt Rate");
5958
5959 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5960 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5961 igb_sysctl_reg_handler, "IU",
5962 "Transmit Descriptor Head");
5963 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5964 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5965 igb_sysctl_reg_handler, "IU",
5966 "Transmit Descriptor Tail");
5967 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5968 CTLFLAG_RD, &txr->no_desc_avail,
5969 "Queue Descriptors Unavailable");
5970 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5971 CTLFLAG_RD, &txr->total_packets,
5972 "Queue Packets Transmitted");
5973
5974 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5975 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5976 igb_sysctl_reg_handler, "IU",
5977 "Receive Descriptor Head");
5978 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5979 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5980 igb_sysctl_reg_handler, "IU",
5981 "Receive Descriptor Tail");
5982 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5983 CTLFLAG_RD, &rxr->rx_packets,
5984 "Queue Packets Received");
5985 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5986 CTLFLAG_RD, &rxr->rx_bytes,
5987 "Queue Bytes Received");
5988 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5989 CTLFLAG_RD, &lro->lro_queued, 0,
5990 "LRO Queued");
5991 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5992 CTLFLAG_RD, &lro->lro_flushed, 0,
5993 "LRO Flushed");
5994 }
5995
5996 /* MAC stats get their own sub node */
5997
5998 stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5999 CTLFLAG_RD, NULL, "MAC Statistics");
6000 stat_list = SYSCTL_CHILDREN(stat_node);
6001
6002 /*
6003 ** VF adapter has a very limited set of stats
6004 ** since its not managing the metal, so to speak.
6005 */
6006 if (adapter->vf_ifp) {
6007 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
6008 CTLFLAG_RD, &stats->gprc,
6009 "Good Packets Received");
6010 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
6011 CTLFLAG_RD, &stats->gptc,
6012 "Good Packets Transmitted");
6013 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
6014 CTLFLAG_RD, &stats->gorc,
6015 "Good Octets Received");
6016 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
6017 CTLFLAG_RD, &stats->gotc,
6018 "Good Octets Transmitted");
6019 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
6020 CTLFLAG_RD, &stats->mprc,
6021 "Multicast Packets Received");
6022 return;
6023 }
6024
6025 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
6026 CTLFLAG_RD, &stats->ecol,
6027 "Excessive collisions");
6028 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
6029 CTLFLAG_RD, &stats->scc,
6030 "Single collisions");
6031 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
6032 CTLFLAG_RD, &stats->mcc,
6033 "Multiple collisions");
6034 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
6035 CTLFLAG_RD, &stats->latecol,
6036 "Late collisions");
6037 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
6038 CTLFLAG_RD, &stats->colc,
6039 "Collision Count");
6040 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
6041 CTLFLAG_RD, &stats->symerrs,
6042 "Symbol Errors");
6043 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
6044 CTLFLAG_RD, &stats->sec,
6045 "Sequence Errors");
6046 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
6047 CTLFLAG_RD, &stats->dc,
6048 "Defer Count");
6049 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
6050 CTLFLAG_RD, &stats->mpc,
6051 "Missed Packets");
6052 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors",
6053 CTLFLAG_RD, &stats->rlec,
6054 "Receive Length Errors");
6055 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
6056 CTLFLAG_RD, &stats->rnbc,
6057 "Receive No Buffers");
6058 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
6059 CTLFLAG_RD, &stats->ruc,
6060 "Receive Undersize");
6061 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
6062 CTLFLAG_RD, &stats->rfc,
6063 "Fragmented Packets Received");
6064 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
6065 CTLFLAG_RD, &stats->roc,
6066 "Oversized Packets Received");
6067 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
6068 CTLFLAG_RD, &stats->rjc,
6069 "Recevied Jabber");
6070 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
6071 CTLFLAG_RD, &stats->rxerrc,
6072 "Receive Errors");
6073 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
6074 CTLFLAG_RD, &stats->crcerrs,
6075 "CRC errors");
6076 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
6077 CTLFLAG_RD, &stats->algnerrc,
6078 "Alignment Errors");
6079 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs",
6080 CTLFLAG_RD, &stats->tncrs,
6081 "Transmit with No CRS");
6082 /* On 82575 these are collision counts */
6083 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
6084 CTLFLAG_RD, &stats->cexterr,
6085 "Collision/Carrier extension errors");
6086 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
6087 CTLFLAG_RD, &stats->xonrxc,
6088 "XON Received");
6089 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
6090 CTLFLAG_RD, &stats->xontxc,
6091 "XON Transmitted");
6092 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
6093 CTLFLAG_RD, &stats->xoffrxc,
6094 "XOFF Received");
6095 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
6096 CTLFLAG_RD, &stats->xofftxc,
6097 "XOFF Transmitted");
6098 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd",
6099 CTLFLAG_RD, &stats->fcruc,
6100 "Unsupported Flow Control Received");
6101 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd",
6102 CTLFLAG_RD, &stats->mgprc,
6103 "Management Packets Received");
6104 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop",
6105 CTLFLAG_RD, &stats->mgpdc,
6106 "Management Packets Dropped");
6107 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd",
6108 CTLFLAG_RD, &stats->mgptc,
6109 "Management Packets Transmitted");
6110 /* Packet Reception Stats */
6111 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
6112 CTLFLAG_RD, &stats->tpr,
6113 "Total Packets Received");
6114 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
6115 CTLFLAG_RD, &stats->gprc,
6116 "Good Packets Received");
6117 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
6118 CTLFLAG_RD, &stats->bprc,
6119 "Broadcast Packets Received");
6120 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
6121 CTLFLAG_RD, &stats->mprc,
6122 "Multicast Packets Received");
6123 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
6124 CTLFLAG_RD, &stats->prc64,
6125 "64 byte frames received");
6126 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
6127 CTLFLAG_RD, &stats->prc127,
6128 "65-127 byte frames received");
6129 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
6130 CTLFLAG_RD, &stats->prc255,
6131 "128-255 byte frames received");
6132 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
6133 CTLFLAG_RD, &stats->prc511,
6134 "256-511 byte frames received");
6135 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
6136 CTLFLAG_RD, &stats->prc1023,
6137 "512-1023 byte frames received");
6138 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
6139 CTLFLAG_RD, &stats->prc1522,
6140 "1023-1522 byte frames received");
6141 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
6142 CTLFLAG_RD, &stats->gorc,
6143 "Good Octets Received");
6144 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd",
6145 CTLFLAG_RD, &stats->tor,
6146 "Total Octets Received");
6147
6148 /* Packet Transmission Stats */
6149 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
6150 CTLFLAG_RD, &stats->gotc,
6151 "Good Octets Transmitted");
6152 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd",
6153 CTLFLAG_RD, &stats->tot,
6154 "Total Octets Transmitted");
6155 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
6156 CTLFLAG_RD, &stats->tpt,
6157 "Total Packets Transmitted");
6158 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
6159 CTLFLAG_RD, &stats->gptc,
6160 "Good Packets Transmitted");
6161 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
6162 CTLFLAG_RD, &stats->bptc,
6163 "Broadcast Packets Transmitted");
6164 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
6165 CTLFLAG_RD, &stats->mptc,
6166 "Multicast Packets Transmitted");
6167 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
6168 CTLFLAG_RD, &stats->ptc64,
6169 "64 byte frames transmitted");
6170 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
6171 CTLFLAG_RD, &stats->ptc127,
6172 "65-127 byte frames transmitted");
6173 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
6174 CTLFLAG_RD, &stats->ptc255,
6175 "128-255 byte frames transmitted");
6176 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
6177 CTLFLAG_RD, &stats->ptc511,
6178 "256-511 byte frames transmitted");
6179 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
6180 CTLFLAG_RD, &stats->ptc1023,
6181 "512-1023 byte frames transmitted");
6182 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
6183 CTLFLAG_RD, &stats->ptc1522,
6184 "1024-1522 byte frames transmitted");
6185 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
6186 CTLFLAG_RD, &stats->tsctc,
6187 "TSO Contexts Transmitted");
6188 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
6189 CTLFLAG_RD, &stats->tsctfc,
6190 "TSO Contexts Failed");
6191
6192
6193 /* Interrupt Stats */
6194
6195 int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
6196 CTLFLAG_RD, NULL, "Interrupt Statistics");
6197 int_list = SYSCTL_CHILDREN(int_node);
6198
6199 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
6200 CTLFLAG_RD, &stats->iac,
6201 "Interrupt Assertion Count");
6202
6203 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
6204 CTLFLAG_RD, &stats->icrxptc,
6205 "Interrupt Cause Rx Pkt Timer Expire Count");
6206
6207 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
6208 CTLFLAG_RD, &stats->icrxatc,
6209 "Interrupt Cause Rx Abs Timer Expire Count");
6210
6211 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
6212 CTLFLAG_RD, &stats->ictxptc,
6213 "Interrupt Cause Tx Pkt Timer Expire Count");
6214
6215 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
6216 CTLFLAG_RD, &stats->ictxatc,
6217 "Interrupt Cause Tx Abs Timer Expire Count");
6218
6219 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
6220 CTLFLAG_RD, &stats->ictxqec,
6221 "Interrupt Cause Tx Queue Empty Count");
6222
6223 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
6224 CTLFLAG_RD, &stats->ictxqmtc,
6225 "Interrupt Cause Tx Queue Min Thresh Count");
6226
6227 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
6228 CTLFLAG_RD, &stats->icrxdmtc,
6229 "Interrupt Cause Rx Desc Min Thresh Count");
6230
6231 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
6232 CTLFLAG_RD, &stats->icrxoc,
6233 "Interrupt Cause Receiver Overrun Count");
6234
6235 /* Host to Card Stats */
6236
6237 host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
6238 CTLFLAG_RD, NULL,
6239 "Host to Card Statistics");
6240
6241 host_list = SYSCTL_CHILDREN(host_node);
6242
6243 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
6244 CTLFLAG_RD, &stats->cbtmpc,
6245 "Circuit Breaker Tx Packet Count");
6246
6247 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
6248 CTLFLAG_RD, &stats->htdpmc,
6249 "Host Transmit Discarded Packets");
6250
6251 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
6252 CTLFLAG_RD, &stats->rpthc,
6253 "Rx Packets To Host");
6254
6255 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
6256 CTLFLAG_RD, &stats->cbrmpc,
6257 "Circuit Breaker Rx Packet Count");
6258
6259 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
6260 CTLFLAG_RD, &stats->cbrdpc,
6261 "Circuit Breaker Rx Dropped Count");
6262
6263 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
6264 CTLFLAG_RD, &stats->hgptc,
6265 "Host Good Packets Tx Count");
6266
6267 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
6268 CTLFLAG_RD, &stats->htcbdpc,
6269 "Host Tx Circuit Breaker Dropped Count");
6270
6271 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
6272 CTLFLAG_RD, &stats->hgorc,
6273 "Host Good Octets Received Count");
6274
6275 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
6276 CTLFLAG_RD, &stats->hgotc,
6277 "Host Good Octets Transmit Count");
6278
6279 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
6280 CTLFLAG_RD, &stats->lenerrs,
6281 "Length Errors");
6282
6283 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
6284 CTLFLAG_RD, &stats->scvpc,
6285 "SerDes/SGMII Code Violation Pkt Count");
6286
6287 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
6288 CTLFLAG_RD, &stats->hrmpc,
6289 "Header Redirection Missed Packet Count");
6290}
6291
6292
6293/**********************************************************************
6294 *
6295 * This routine provides a way to dump out the adapter eeprom,
6296 * often a useful debug/service tool. This only dumps the first
6297 * 32 words, stuff that matters is in that extent.
6298 *
6299 **********************************************************************/
6300static int
6301igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6302{
6303 struct adapter *adapter;
6304 int error;
6305 int result;
6306
6307 result = -1;
6308 error = sysctl_handle_int(oidp, &result, 0, req);
6309
6310 if (error || !req->newptr)
6311 return (error);
6312
6313 /*
6314 * This value will cause a hex dump of the
6315 * first 32 16-bit words of the EEPROM to
6316 * the screen.
6317 */
6318 if (result == 1) {
6319 adapter = (struct adapter *)arg1;
6320 igb_print_nvm_info(adapter);
6321 }
6322
6323 return (error);
6324}
6325
6326static void
6327igb_print_nvm_info(struct adapter *adapter)
6328{
6329 u16 eeprom_data;
6330 int i, j, row = 0;
6331
6332 /* Its a bit crude, but it gets the job done */
6333 printf("\nInterface EEPROM Dump:\n");
6334 printf("Offset\n0x0000 ");
6335 for (i = 0, j = 0; i < 32; i++, j++) {
6336 if (j == 8) { /* Make the offset block */
6337 j = 0; ++row;
6338 printf("\n0x00%x0 ",row);
6339 }
6340 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6341 printf("%04x ", eeprom_data);
6342 }
6343 printf("\n");
6344}
6345
6346static void
6347igb_set_sysctl_value(struct adapter *adapter, const char *name,
6348 const char *description, int *limit, int value)
6349{
6350 *limit = value;
6351 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6352 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6353 OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6354}
6355
6356/*
6357** Set flow control using sysctl:
6358** Flow control values:
6359** 0 - off
6360** 1 - rx pause
6361** 2 - tx pause
6362** 3 - full
6363*/
6364static int
6365igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
6366{
6367 int error;
6368 static int input = 3; /* default is full */
6369 struct adapter *adapter = (struct adapter *) arg1;
6370
6371 error = sysctl_handle_int(oidp, &input, 0, req);
6372
6373 if ((error) || (req->newptr == NULL))
6374 return (error);
6375
6376 switch (input) {
6377 case e1000_fc_rx_pause:
6378 case e1000_fc_tx_pause:
6379 case e1000_fc_full:
6380 case e1000_fc_none:
6381 adapter->hw.fc.requested_mode = input;
6382 adapter->fc = input;
6383 break;
6384 default:
6385 /* Do nothing */
6386 return (error);
6387 }
6388
6389 adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6390 e1000_force_mac_fc(&adapter->hw);
6391 /* XXX TODO: update DROP_EN on each RX queue if appropriate */
6392 return (error);
6393}
6394
6395/*
6396** Manage DMA Coalesce:
6397** Control values:
6398** 0/1 - off/on
6399** Legal timer values are:
6400** 250,500,1000-10000 in thousands
6401*/
6402static int
6403igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6404{
6405 struct adapter *adapter = (struct adapter *) arg1;
6406 int error;
6407
6408 error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6409
6410 if ((error) || (req->newptr == NULL))
6411 return (error);
6412
6413 switch (adapter->dmac) {
6414 case 0:
6415 /*Disabling */
6416 break;
6417 case 1: /* Just enable and use default */
6418 adapter->dmac = 1000;
6419 break;
6420 case 250:
6421 case 500:
6422 case 1000:
6423 case 2000:
6424 case 3000:
6425 case 4000:
6426 case 5000:
6427 case 6000:
6428 case 7000:
6429 case 8000:
6430 case 9000:
6431 case 10000:
6432 /* Legal values - allow */
6433 break;
6434 default:
6435 /* Do nothing, illegal value */
6436 adapter->dmac = 0;
6437 return (EINVAL);
6438 }
6439 /* Reinit the interface */
6440 igb_init(adapter);
6441 return (error);
6442}
6443
6444/*
6445** Manage Energy Efficient Ethernet:
6446** Control values:
6447** 0/1 - enabled/disabled
6448*/
6449static int
6450igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6451{
6452 struct adapter *adapter = (struct adapter *) arg1;
6453 int error, value;
6454
6455 value = adapter->hw.dev_spec._82575.eee_disable;
6456 error = sysctl_handle_int(oidp, &value, 0, req);
6457 if (error || req->newptr == NULL)
6458 return (error);
6459 IGB_CORE_LOCK(adapter);
6460 adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6461 igb_init_locked(adapter);
6462 IGB_CORE_UNLOCK(adapter);
6463 return (0);
6464}
34
35
36#include "opt_inet.h"
37#include "opt_inet6.h"
38#include "opt_rss.h"
39
40#ifdef HAVE_KERNEL_OPTION_HEADERS
41#include "opt_device_polling.h"
42#include "opt_altq.h"
43#endif
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#ifndef IGB_LEGACY_TX
48#include <sys/buf_ring.h>
49#endif
50#include <sys/bus.h>
51#include <sys/endian.h>
52#include <sys/kernel.h>
53#include <sys/kthread.h>
54#include <sys/malloc.h>
55#include <sys/mbuf.h>
56#include <sys/module.h>
57#include <sys/rman.h>
58#include <sys/socket.h>
59#include <sys/sockio.h>
60#include <sys/sysctl.h>
61#include <sys/taskqueue.h>
62#include <sys/eventhandler.h>
63#include <sys/pcpu.h>
64#include <sys/smp.h>
65#include <machine/smp.h>
66#include <machine/bus.h>
67#include <machine/resource.h>
68
69#include <net/bpf.h>
70#include <net/ethernet.h>
71#include <net/if.h>
72#include <net/if_var.h>
73#include <net/if_arp.h>
74#include <net/if_dl.h>
75#include <net/if_media.h>
76#ifdef RSS
77#include <net/rss_config.h>
78#endif
79
80#include <net/if_types.h>
81#include <net/if_vlan_var.h>
82
83#include <netinet/in_systm.h>
84#include <netinet/in.h>
85#include <netinet/if_ether.h>
86#include <netinet/ip.h>
87#include <netinet/ip6.h>
88#include <netinet/tcp.h>
89#include <netinet/tcp_lro.h>
90#include <netinet/udp.h>
91
92#include <machine/in_cksum.h>
93#include <dev/led/led.h>
94#include <dev/pci/pcivar.h>
95#include <dev/pci/pcireg.h>
96
97#include "e1000_api.h"
98#include "e1000_82575.h"
99#include "if_igb.h"
100
101/*********************************************************************
102 * Set this to one to display debug statistics
103 *********************************************************************/
104int igb_display_debug_stats = 0;
105
106/*********************************************************************
107 * Driver version:
108 *********************************************************************/
109char igb_driver_version[] = "version - 2.4.0";
110
111
112/*********************************************************************
113 * PCI Device ID Table
114 *
115 * Used by probe to select devices to load on
116 * Last field stores an index into e1000_strings
117 * Last entry must be all 0s
118 *
119 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
120 *********************************************************************/
121
122static igb_vendor_info_t igb_vendor_info_array[] =
123{
124 { 0x8086, E1000_DEV_ID_82575EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
125 { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
126 PCI_ANY_ID, PCI_ANY_ID, 0},
127 { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
128 PCI_ANY_ID, PCI_ANY_ID, 0},
129 { 0x8086, E1000_DEV_ID_82576, PCI_ANY_ID, PCI_ANY_ID, 0},
130 { 0x8086, E1000_DEV_ID_82576_NS, PCI_ANY_ID, PCI_ANY_ID, 0},
131 { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
132 { 0x8086, E1000_DEV_ID_82576_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
133 { 0x8086, E1000_DEV_ID_82576_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
134 { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
135 PCI_ANY_ID, PCI_ANY_ID, 0},
136 { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
137 PCI_ANY_ID, PCI_ANY_ID, 0},
138 { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
139 PCI_ANY_ID, PCI_ANY_ID, 0},
140 { 0x8086, E1000_DEV_ID_82576_VF, PCI_ANY_ID, PCI_ANY_ID, 0},
141 { 0x8086, E1000_DEV_ID_82580_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
142 { 0x8086, E1000_DEV_ID_82580_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
143 { 0x8086, E1000_DEV_ID_82580_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
144 { 0x8086, E1000_DEV_ID_82580_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0},
145 { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
146 PCI_ANY_ID, PCI_ANY_ID, 0},
147 { 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
148 PCI_ANY_ID, PCI_ANY_ID, 0},
149 { 0x8086, E1000_DEV_ID_DH89XXCC_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
150 { 0x8086, E1000_DEV_ID_DH89XXCC_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0},
151 { 0x8086, E1000_DEV_ID_DH89XXCC_SFP, PCI_ANY_ID, PCI_ANY_ID, 0},
152 { 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
153 PCI_ANY_ID, PCI_ANY_ID, 0},
154 { 0x8086, E1000_DEV_ID_I350_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
155 { 0x8086, E1000_DEV_ID_I350_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
156 { 0x8086, E1000_DEV_ID_I350_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
157 { 0x8086, E1000_DEV_ID_I350_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0},
158 { 0x8086, E1000_DEV_ID_I350_VF, PCI_ANY_ID, PCI_ANY_ID, 0},
159 { 0x8086, E1000_DEV_ID_I210_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
160 { 0x8086, E1000_DEV_ID_I210_COPPER_IT, PCI_ANY_ID, PCI_ANY_ID, 0},
161 { 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
162 PCI_ANY_ID, PCI_ANY_ID, 0},
163 { 0x8086, E1000_DEV_ID_I210_COPPER_FLASHLESS,
164 PCI_ANY_ID, PCI_ANY_ID, 0},
165 { 0x8086, E1000_DEV_ID_I210_SERDES_FLASHLESS,
166 PCI_ANY_ID, PCI_ANY_ID, 0},
167 { 0x8086, E1000_DEV_ID_I210_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
168 { 0x8086, E1000_DEV_ID_I210_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
169 { 0x8086, E1000_DEV_ID_I210_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0},
170 { 0x8086, E1000_DEV_ID_I211_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
171 { 0x8086, E1000_DEV_ID_I354_BACKPLANE_1GBPS,
172 PCI_ANY_ID, PCI_ANY_ID, 0},
173 { 0x8086, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS,
174 PCI_ANY_ID, PCI_ANY_ID, 0},
175 { 0x8086, E1000_DEV_ID_I354_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0},
176 /* required last entry */
177 { 0, 0, 0, 0, 0}
178};
179
180/*********************************************************************
181 * Table of branding strings for all supported NICs.
182 *********************************************************************/
183
184static char *igb_strings[] = {
185 "Intel(R) PRO/1000 Network Connection"
186};
187
188/*********************************************************************
189 * Function prototypes
190 *********************************************************************/
191static int igb_probe(device_t);
192static int igb_attach(device_t);
193static int igb_detach(device_t);
194static int igb_shutdown(device_t);
195static int igb_suspend(device_t);
196static int igb_resume(device_t);
197#ifndef IGB_LEGACY_TX
198static int igb_mq_start(struct ifnet *, struct mbuf *);
199static int igb_mq_start_locked(struct ifnet *, struct tx_ring *);
200static void igb_qflush(struct ifnet *);
201static void igb_deferred_mq_start(void *, int);
202#else
203static void igb_start(struct ifnet *);
204static void igb_start_locked(struct tx_ring *, struct ifnet *ifp);
205#endif
206static int igb_ioctl(struct ifnet *, u_long, caddr_t);
207static uint64_t igb_get_counter(if_t, ift_counter);
208static void igb_init(void *);
209static void igb_init_locked(struct adapter *);
210static void igb_stop(void *);
211static void igb_media_status(struct ifnet *, struct ifmediareq *);
212static int igb_media_change(struct ifnet *);
213static void igb_identify_hardware(struct adapter *);
214static int igb_allocate_pci_resources(struct adapter *);
215static int igb_allocate_msix(struct adapter *);
216static int igb_allocate_legacy(struct adapter *);
217static int igb_setup_msix(struct adapter *);
218static void igb_free_pci_resources(struct adapter *);
219static void igb_local_timer(void *);
220static void igb_reset(struct adapter *);
221static int igb_setup_interface(device_t, struct adapter *);
222static int igb_allocate_queues(struct adapter *);
223static void igb_configure_queues(struct adapter *);
224
225static int igb_allocate_transmit_buffers(struct tx_ring *);
226static void igb_setup_transmit_structures(struct adapter *);
227static void igb_setup_transmit_ring(struct tx_ring *);
228static void igb_initialize_transmit_units(struct adapter *);
229static void igb_free_transmit_structures(struct adapter *);
230static void igb_free_transmit_buffers(struct tx_ring *);
231
232static int igb_allocate_receive_buffers(struct rx_ring *);
233static int igb_setup_receive_structures(struct adapter *);
234static int igb_setup_receive_ring(struct rx_ring *);
235static void igb_initialize_receive_units(struct adapter *);
236static void igb_free_receive_structures(struct adapter *);
237static void igb_free_receive_buffers(struct rx_ring *);
238static void igb_free_receive_ring(struct rx_ring *);
239
240static void igb_enable_intr(struct adapter *);
241static void igb_disable_intr(struct adapter *);
242static void igb_update_stats_counters(struct adapter *);
243static bool igb_txeof(struct tx_ring *);
244
245static __inline void igb_rx_discard(struct rx_ring *, int);
246static __inline void igb_rx_input(struct rx_ring *,
247 struct ifnet *, struct mbuf *, u32);
248
249static bool igb_rxeof(struct igb_queue *, int, int *);
250static void igb_rx_checksum(u32, struct mbuf *, u32);
251static int igb_tx_ctx_setup(struct tx_ring *,
252 struct mbuf *, u32 *, u32 *);
253static int igb_tso_setup(struct tx_ring *,
254 struct mbuf *, u32 *, u32 *);
255static void igb_set_promisc(struct adapter *);
256static void igb_disable_promisc(struct adapter *);
257static void igb_set_multi(struct adapter *);
258static void igb_update_link_status(struct adapter *);
259static void igb_refresh_mbufs(struct rx_ring *, int);
260
261static void igb_register_vlan(void *, struct ifnet *, u16);
262static void igb_unregister_vlan(void *, struct ifnet *, u16);
263static void igb_setup_vlan_hw_support(struct adapter *);
264
265static int igb_xmit(struct tx_ring *, struct mbuf **);
266static int igb_dma_malloc(struct adapter *, bus_size_t,
267 struct igb_dma_alloc *, int);
268static void igb_dma_free(struct adapter *, struct igb_dma_alloc *);
269static int igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
270static void igb_print_nvm_info(struct adapter *);
271static int igb_is_valid_ether_addr(u8 *);
272static void igb_add_hw_stats(struct adapter *);
273
274static void igb_vf_init_stats(struct adapter *);
275static void igb_update_vf_stats_counters(struct adapter *);
276
277/* Management and WOL Support */
278static void igb_init_manageability(struct adapter *);
279static void igb_release_manageability(struct adapter *);
280static void igb_get_hw_control(struct adapter *);
281static void igb_release_hw_control(struct adapter *);
282static void igb_enable_wakeup(device_t);
283static void igb_led_func(void *, int);
284
285static int igb_irq_fast(void *);
286static void igb_msix_que(void *);
287static void igb_msix_link(void *);
288static void igb_handle_que(void *context, int pending);
289static void igb_handle_link(void *context, int pending);
290static void igb_handle_link_locked(struct adapter *);
291
292static void igb_set_sysctl_value(struct adapter *, const char *,
293 const char *, int *, int);
294static int igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
295static int igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
296static int igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
297
298#ifdef DEVICE_POLLING
299static poll_handler_t igb_poll;
300#endif /* POLLING */
301
302/*********************************************************************
303 * FreeBSD Device Interface Entry Points
304 *********************************************************************/
305
306static device_method_t igb_methods[] = {
307 /* Device interface */
308 DEVMETHOD(device_probe, igb_probe),
309 DEVMETHOD(device_attach, igb_attach),
310 DEVMETHOD(device_detach, igb_detach),
311 DEVMETHOD(device_shutdown, igb_shutdown),
312 DEVMETHOD(device_suspend, igb_suspend),
313 DEVMETHOD(device_resume, igb_resume),
314 DEVMETHOD_END
315};
316
317static driver_t igb_driver = {
318 "igb", igb_methods, sizeof(struct adapter),
319};
320
321static devclass_t igb_devclass;
322DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
323MODULE_DEPEND(igb, pci, 1, 1, 1);
324MODULE_DEPEND(igb, ether, 1, 1, 1);
325#ifdef DEV_NETMAP
326MODULE_DEPEND(igb, netmap, 1, 1, 1);
327#endif /* DEV_NETMAP */
328
329/*********************************************************************
330 * Tunable default values.
331 *********************************************************************/
332
333static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
334
335/* Descriptor defaults */
336static int igb_rxd = IGB_DEFAULT_RXD;
337static int igb_txd = IGB_DEFAULT_TXD;
338SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
339 "Number of receive descriptors per queue");
340SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
341 "Number of transmit descriptors per queue");
342
343/*
344** AIM: Adaptive Interrupt Moderation
345** which means that the interrupt rate
346** is varied over time based on the
347** traffic for that interrupt vector
348*/
349static int igb_enable_aim = TRUE;
350SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &igb_enable_aim, 0,
351 "Enable adaptive interrupt moderation");
352
353/*
354 * MSIX should be the default for best performance,
355 * but this allows it to be forced off for testing.
356 */
357static int igb_enable_msix = 1;
358SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
359 "Enable MSI-X interrupts");
360
361/*
362** Tuneable Interrupt rate
363*/
364static int igb_max_interrupt_rate = 8000;
365SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
366 &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
367
368#ifndef IGB_LEGACY_TX
369/*
370** Tuneable number of buffers in the buf-ring (drbr_xxx)
371*/
372static int igb_buf_ring_size = IGB_BR_SIZE;
373SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
374 &igb_buf_ring_size, 0, "Size of the bufring");
375#endif
376
377/*
378** Header split causes the packet header to
379** be dma'd to a seperate mbuf from the payload.
380** this can have memory alignment benefits. But
381** another plus is that small packets often fit
382** into the header and thus use no cluster. Its
383** a very workload dependent type feature.
384*/
385static int igb_header_split = FALSE;
386SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
387 "Enable receive mbuf header split");
388
389/*
390** This will autoconfigure based on the
391** number of CPUs and max supported
392** MSIX messages if left at 0.
393*/
394static int igb_num_queues = 0;
395SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
396 "Number of queues to configure, 0 indicates autoconfigure");
397
398/*
399** Global variable to store last used CPU when binding queues
400** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a
401** queue is bound to a cpu.
402*/
403static int igb_last_bind_cpu = -1;
404
405/* How many packets rxeof tries to clean at a time */
406static int igb_rx_process_limit = 100;
407SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
408 &igb_rx_process_limit, 0,
409 "Maximum number of received packets to process at a time, -1 means unlimited");
410
411#ifdef DEV_NETMAP /* see ixgbe.c for details */
412#include <dev/netmap/if_igb_netmap.h>
413#endif /* DEV_NETMAP */
414/*********************************************************************
415 * Device identification routine
416 *
417 * igb_probe determines if the driver should be loaded on
418 * adapter based on PCI vendor/device id of the adapter.
419 *
420 * return BUS_PROBE_DEFAULT on success, positive on failure
421 *********************************************************************/
422
423static int
424igb_probe(device_t dev)
425{
426 char adapter_name[60];
427 uint16_t pci_vendor_id = 0;
428 uint16_t pci_device_id = 0;
429 uint16_t pci_subvendor_id = 0;
430 uint16_t pci_subdevice_id = 0;
431 igb_vendor_info_t *ent;
432
433 INIT_DEBUGOUT("igb_probe: begin");
434
435 pci_vendor_id = pci_get_vendor(dev);
436 if (pci_vendor_id != IGB_VENDOR_ID)
437 return (ENXIO);
438
439 pci_device_id = pci_get_device(dev);
440 pci_subvendor_id = pci_get_subvendor(dev);
441 pci_subdevice_id = pci_get_subdevice(dev);
442
443 ent = igb_vendor_info_array;
444 while (ent->vendor_id != 0) {
445 if ((pci_vendor_id == ent->vendor_id) &&
446 (pci_device_id == ent->device_id) &&
447
448 ((pci_subvendor_id == ent->subvendor_id) ||
449 (ent->subvendor_id == PCI_ANY_ID)) &&
450
451 ((pci_subdevice_id == ent->subdevice_id) ||
452 (ent->subdevice_id == PCI_ANY_ID))) {
453 sprintf(adapter_name, "%s %s",
454 igb_strings[ent->index],
455 igb_driver_version);
456 device_set_desc_copy(dev, adapter_name);
457 return (BUS_PROBE_DEFAULT);
458 }
459 ent++;
460 }
461
462 return (ENXIO);
463}
464
465/*********************************************************************
466 * Device initialization routine
467 *
468 * The attach entry point is called when the driver is being loaded.
469 * This routine identifies the type of hardware, allocates all resources
470 * and initializes the hardware.
471 *
472 * return 0 on success, positive on failure
473 *********************************************************************/
474
475static int
476igb_attach(device_t dev)
477{
478 struct adapter *adapter;
479 int error = 0;
480 u16 eeprom_data;
481
482 INIT_DEBUGOUT("igb_attach: begin");
483
484 if (resource_disabled("igb", device_get_unit(dev))) {
485 device_printf(dev, "Disabled by device hint\n");
486 return (ENXIO);
487 }
488
489 adapter = device_get_softc(dev);
490 adapter->dev = adapter->osdep.dev = dev;
491 IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
492
493 /* SYSCTL stuff */
494 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
495 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
496 OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
497 igb_sysctl_nvm_info, "I", "NVM Information");
498
499 igb_set_sysctl_value(adapter, "enable_aim",
500 "Interrupt Moderation", &adapter->enable_aim,
501 igb_enable_aim);
502
503 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
504 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
505 OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
506 adapter, 0, igb_set_flowcntl, "I", "Flow Control");
507
508 callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
509
510 /* Determine hardware and mac info */
511 igb_identify_hardware(adapter);
512
513 /* Setup PCI resources */
514 if (igb_allocate_pci_resources(adapter)) {
515 device_printf(dev, "Allocation of PCI resources failed\n");
516 error = ENXIO;
517 goto err_pci;
518 }
519
520 /* Do Shared Code initialization */
521 if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
522 device_printf(dev, "Setup of Shared code failed\n");
523 error = ENXIO;
524 goto err_pci;
525 }
526
527 e1000_get_bus_info(&adapter->hw);
528
529 /* Sysctl for limiting the amount of work done in the taskqueue */
530 igb_set_sysctl_value(adapter, "rx_processing_limit",
531 "max number of rx packets to process",
532 &adapter->rx_process_limit, igb_rx_process_limit);
533
534 /*
535 * Validate number of transmit and receive descriptors. It
536 * must not exceed hardware maximum, and must be multiple
537 * of E1000_DBA_ALIGN.
538 */
539 if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
540 (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
541 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
542 IGB_DEFAULT_TXD, igb_txd);
543 adapter->num_tx_desc = IGB_DEFAULT_TXD;
544 } else
545 adapter->num_tx_desc = igb_txd;
546 if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
547 (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
548 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
549 IGB_DEFAULT_RXD, igb_rxd);
550 adapter->num_rx_desc = IGB_DEFAULT_RXD;
551 } else
552 adapter->num_rx_desc = igb_rxd;
553
554 adapter->hw.mac.autoneg = DO_AUTO_NEG;
555 adapter->hw.phy.autoneg_wait_to_complete = FALSE;
556 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
557
558 /* Copper options */
559 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
560 adapter->hw.phy.mdix = AUTO_ALL_MODES;
561 adapter->hw.phy.disable_polarity_correction = FALSE;
562 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
563 }
564
565 /*
566 * Set the frame limits assuming
567 * standard ethernet sized frames.
568 */
569 adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
570
571 /*
572 ** Allocate and Setup Queues
573 */
574 if (igb_allocate_queues(adapter)) {
575 error = ENOMEM;
576 goto err_pci;
577 }
578
579 /* Allocate the appropriate stats memory */
580 if (adapter->vf_ifp) {
581 adapter->stats =
582 (struct e1000_vf_stats *)malloc(sizeof \
583 (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
584 igb_vf_init_stats(adapter);
585 } else
586 adapter->stats =
587 (struct e1000_hw_stats *)malloc(sizeof \
588 (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
589 if (adapter->stats == NULL) {
590 device_printf(dev, "Can not allocate stats memory\n");
591 error = ENOMEM;
592 goto err_late;
593 }
594
595 /* Allocate multicast array memory. */
596 adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
597 MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
598 if (adapter->mta == NULL) {
599 device_printf(dev, "Can not allocate multicast setup array\n");
600 error = ENOMEM;
601 goto err_late;
602 }
603
604 /* Some adapter-specific advanced features */
605 if (adapter->hw.mac.type >= e1000_i350) {
606 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
607 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
608 OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
609 adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
610 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
611 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
612 OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
613 adapter, 0, igb_sysctl_eee, "I",
614 "Disable Energy Efficient Ethernet");
615 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
616 if (adapter->hw.mac.type == e1000_i354)
617 e1000_set_eee_i354(&adapter->hw);
618 else
619 e1000_set_eee_i350(&adapter->hw);
620 }
621 }
622
623 /*
624 ** Start from a known state, this is
625 ** important in reading the nvm and
626 ** mac from that.
627 */
628 e1000_reset_hw(&adapter->hw);
629
630 /* Make sure we have a good EEPROM before we read from it */
631 if (((adapter->hw.mac.type != e1000_i210) &&
632 (adapter->hw.mac.type != e1000_i211)) &&
633 (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
634 /*
635 ** Some PCI-E parts fail the first check due to
636 ** the link being in sleep state, call it again,
637 ** if it fails a second time its a real issue.
638 */
639 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
640 device_printf(dev,
641 "The EEPROM Checksum Is Not Valid\n");
642 error = EIO;
643 goto err_late;
644 }
645 }
646
647 /*
648 ** Copy the permanent MAC address out of the EEPROM
649 */
650 if (e1000_read_mac_addr(&adapter->hw) < 0) {
651 device_printf(dev, "EEPROM read error while reading MAC"
652 " address\n");
653 error = EIO;
654 goto err_late;
655 }
656 /* Check its sanity */
657 if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
658 device_printf(dev, "Invalid MAC address\n");
659 error = EIO;
660 goto err_late;
661 }
662
663 /* Setup OS specific network interface */
664 if (igb_setup_interface(dev, adapter) != 0)
665 goto err_late;
666
667 /* Now get a good starting state */
668 igb_reset(adapter);
669
670 /* Initialize statistics */
671 igb_update_stats_counters(adapter);
672
673 adapter->hw.mac.get_link_status = 1;
674 igb_update_link_status(adapter);
675
676 /* Indicate SOL/IDER usage */
677 if (e1000_check_reset_block(&adapter->hw))
678 device_printf(dev,
679 "PHY reset is blocked due to SOL/IDER session.\n");
680
681 /* Determine if we have to control management hardware */
682 adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
683
684 /*
685 * Setup Wake-on-Lan
686 */
687 /* APME bit in EEPROM is mapped to WUC.APME */
688 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
689 if (eeprom_data)
690 adapter->wol = E1000_WUFC_MAG;
691
692 /* Register for VLAN events */
693 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
694 igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
695 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
696 igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
697
698 igb_add_hw_stats(adapter);
699
700 /* Tell the stack that the interface is not active */
701 adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
702 adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
703
704 adapter->led_dev = led_create(igb_led_func, adapter,
705 device_get_nameunit(dev));
706
707 /*
708 ** Configure Interrupts
709 */
710 if ((adapter->msix > 1) && (igb_enable_msix))
711 error = igb_allocate_msix(adapter);
712 else /* MSI or Legacy */
713 error = igb_allocate_legacy(adapter);
714 if (error)
715 goto err_late;
716
717#ifdef DEV_NETMAP
718 igb_netmap_attach(adapter);
719#endif /* DEV_NETMAP */
720 INIT_DEBUGOUT("igb_attach: end");
721
722 return (0);
723
724err_late:
725 igb_detach(dev);
726 igb_free_transmit_structures(adapter);
727 igb_free_receive_structures(adapter);
728 igb_release_hw_control(adapter);
729err_pci:
730 igb_free_pci_resources(adapter);
731 if (adapter->ifp != NULL)
732 if_free(adapter->ifp);
733 free(adapter->mta, M_DEVBUF);
734 IGB_CORE_LOCK_DESTROY(adapter);
735
736 return (error);
737}
738
739/*********************************************************************
740 * Device removal routine
741 *
742 * The detach entry point is called when the driver is being removed.
743 * This routine stops the adapter and deallocates all the resources
744 * that were allocated for driver operation.
745 *
746 * return 0 on success, positive on failure
747 *********************************************************************/
748
749static int
750igb_detach(device_t dev)
751{
752 struct adapter *adapter = device_get_softc(dev);
753 struct ifnet *ifp = adapter->ifp;
754
755 INIT_DEBUGOUT("igb_detach: begin");
756
757 /* Make sure VLANS are not using driver */
758 if (adapter->ifp->if_vlantrunk != NULL) {
759 device_printf(dev,"Vlan in use, detach first\n");
760 return (EBUSY);
761 }
762
763 ether_ifdetach(adapter->ifp);
764
765 if (adapter->led_dev != NULL)
766 led_destroy(adapter->led_dev);
767
768#ifdef DEVICE_POLLING
769 if (ifp->if_capenable & IFCAP_POLLING)
770 ether_poll_deregister(ifp);
771#endif
772
773 IGB_CORE_LOCK(adapter);
774 adapter->in_detach = 1;
775 igb_stop(adapter);
776 IGB_CORE_UNLOCK(adapter);
777
778 e1000_phy_hw_reset(&adapter->hw);
779
780 /* Give control back to firmware */
781 igb_release_manageability(adapter);
782 igb_release_hw_control(adapter);
783
784 if (adapter->wol) {
785 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
786 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
787 igb_enable_wakeup(dev);
788 }
789
790 /* Unregister VLAN events */
791 if (adapter->vlan_attach != NULL)
792 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
793 if (adapter->vlan_detach != NULL)
794 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
795
796 callout_drain(&adapter->timer);
797
798#ifdef DEV_NETMAP
799 netmap_detach(adapter->ifp);
800#endif /* DEV_NETMAP */
801 igb_free_pci_resources(adapter);
802 bus_generic_detach(dev);
803 if_free(ifp);
804
805 igb_free_transmit_structures(adapter);
806 igb_free_receive_structures(adapter);
807 if (adapter->mta != NULL)
808 free(adapter->mta, M_DEVBUF);
809
810 IGB_CORE_LOCK_DESTROY(adapter);
811
812 return (0);
813}
814
815/*********************************************************************
816 *
817 * Shutdown entry point
818 *
819 **********************************************************************/
820
821static int
822igb_shutdown(device_t dev)
823{
824 return igb_suspend(dev);
825}
826
827/*
828 * Suspend/resume device methods.
829 */
830static int
831igb_suspend(device_t dev)
832{
833 struct adapter *adapter = device_get_softc(dev);
834
835 IGB_CORE_LOCK(adapter);
836
837 igb_stop(adapter);
838
839 igb_release_manageability(adapter);
840 igb_release_hw_control(adapter);
841
842 if (adapter->wol) {
843 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
844 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
845 igb_enable_wakeup(dev);
846 }
847
848 IGB_CORE_UNLOCK(adapter);
849
850 return bus_generic_suspend(dev);
851}
852
853static int
854igb_resume(device_t dev)
855{
856 struct adapter *adapter = device_get_softc(dev);
857 struct tx_ring *txr = adapter->tx_rings;
858 struct ifnet *ifp = adapter->ifp;
859
860 IGB_CORE_LOCK(adapter);
861 igb_init_locked(adapter);
862 igb_init_manageability(adapter);
863
864 if ((ifp->if_flags & IFF_UP) &&
865 (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
866 for (int i = 0; i < adapter->num_queues; i++, txr++) {
867 IGB_TX_LOCK(txr);
868#ifndef IGB_LEGACY_TX
869 /* Process the stack queue only if not depleted */
870 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
871 !drbr_empty(ifp, txr->br))
872 igb_mq_start_locked(ifp, txr);
873#else
874 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
875 igb_start_locked(txr, ifp);
876#endif
877 IGB_TX_UNLOCK(txr);
878 }
879 }
880 IGB_CORE_UNLOCK(adapter);
881
882 return bus_generic_resume(dev);
883}
884
885
886#ifdef IGB_LEGACY_TX
887
888/*********************************************************************
889 * Transmit entry point
890 *
891 * igb_start is called by the stack to initiate a transmit.
892 * The driver will remain in this routine as long as there are
893 * packets to transmit and transmit resources are available.
894 * In case resources are not available stack is notified and
895 * the packet is requeued.
896 **********************************************************************/
897
898static void
899igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
900{
901 struct adapter *adapter = ifp->if_softc;
902 struct mbuf *m_head;
903
904 IGB_TX_LOCK_ASSERT(txr);
905
906 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
907 IFF_DRV_RUNNING)
908 return;
909 if (!adapter->link_active)
910 return;
911
912 /* Call cleanup if number of TX descriptors low */
913 if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
914 igb_txeof(txr);
915
916 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
917 if (txr->tx_avail <= IGB_MAX_SCATTER) {
918 txr->queue_status |= IGB_QUEUE_DEPLETED;
919 break;
920 }
921 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
922 if (m_head == NULL)
923 break;
924 /*
925 * Encapsulation can modify our pointer, and or make it
926 * NULL on failure. In that event, we can't requeue.
927 */
928 if (igb_xmit(txr, &m_head)) {
929 if (m_head != NULL)
930 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
931 if (txr->tx_avail <= IGB_MAX_SCATTER)
932 txr->queue_status |= IGB_QUEUE_DEPLETED;
933 break;
934 }
935
936 /* Send a copy of the frame to the BPF listener */
937 ETHER_BPF_MTAP(ifp, m_head);
938
939 /* Set watchdog on */
940 txr->watchdog_time = ticks;
941 txr->queue_status |= IGB_QUEUE_WORKING;
942 }
943}
944
945/*
946 * Legacy TX driver routine, called from the
947 * stack, always uses tx[0], and spins for it.
948 * Should not be used with multiqueue tx
949 */
950static void
951igb_start(struct ifnet *ifp)
952{
953 struct adapter *adapter = ifp->if_softc;
954 struct tx_ring *txr = adapter->tx_rings;
955
956 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
957 IGB_TX_LOCK(txr);
958 igb_start_locked(txr, ifp);
959 IGB_TX_UNLOCK(txr);
960 }
961 return;
962}
963
964#else /* ~IGB_LEGACY_TX */
965
966/*
967** Multiqueue Transmit Entry:
968** quick turnaround to the stack
969**
970*/
971static int
972igb_mq_start(struct ifnet *ifp, struct mbuf *m)
973{
974 struct adapter *adapter = ifp->if_softc;
975 struct igb_queue *que;
976 struct tx_ring *txr;
977 int i, err = 0;
978#ifdef RSS
979 uint32_t bucket_id;
980#endif
981
982 /* Which queue to use */
983 /*
984 * When doing RSS, map it to the same outbound queue
985 * as the incoming flow would be mapped to.
986 *
987 * If everything is setup correctly, it should be the
988 * same bucket that the current CPU we're on is.
989 */
990 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
991#ifdef RSS
992 if (rss_hash2bucket(m->m_pkthdr.flowid,
993 M_HASHTYPE_GET(m), &bucket_id) == 0) {
994 /* XXX TODO: spit out something if bucket_id > num_queues? */
995 i = bucket_id % adapter->num_queues;
996 } else {
997#endif
998 i = m->m_pkthdr.flowid % adapter->num_queues;
999#ifdef RSS
1000 }
1001#endif
1002 } else {
1003 i = curcpu % adapter->num_queues;
1004 }
1005 txr = &adapter->tx_rings[i];
1006 que = &adapter->queues[i];
1007
1008 err = drbr_enqueue(ifp, txr->br, m);
1009 if (err)
1010 return (err);
1011 if (IGB_TX_TRYLOCK(txr)) {
1012 igb_mq_start_locked(ifp, txr);
1013 IGB_TX_UNLOCK(txr);
1014 } else
1015 taskqueue_enqueue(que->tq, &txr->txq_task);
1016
1017 return (0);
1018}
1019
1020static int
1021igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1022{
1023 struct adapter *adapter = txr->adapter;
1024 struct mbuf *next;
1025 int err = 0, enq = 0;
1026
1027 IGB_TX_LOCK_ASSERT(txr);
1028
1029 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
1030 adapter->link_active == 0)
1031 return (ENETDOWN);
1032
1033
1034 /* Process the queue */
1035 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1036 if ((err = igb_xmit(txr, &next)) != 0) {
1037 if (next == NULL) {
1038 /* It was freed, move forward */
1039 drbr_advance(ifp, txr->br);
1040 } else {
1041 /*
1042 * Still have one left, it may not be
1043 * the same since the transmit function
1044 * may have changed it.
1045 */
1046 drbr_putback(ifp, txr->br, next);
1047 }
1048 break;
1049 }
1050 drbr_advance(ifp, txr->br);
1051 enq++;
1052 if (next->m_flags & M_MCAST && adapter->vf_ifp)
1053 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1054 ETHER_BPF_MTAP(ifp, next);
1055 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1056 break;
1057 }
1058 if (enq > 0) {
1059 /* Set the watchdog */
1060 txr->queue_status |= IGB_QUEUE_WORKING;
1061 txr->watchdog_time = ticks;
1062 }
1063 if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1064 igb_txeof(txr);
1065 if (txr->tx_avail <= IGB_MAX_SCATTER)
1066 txr->queue_status |= IGB_QUEUE_DEPLETED;
1067 return (err);
1068}
1069
1070/*
1071 * Called from a taskqueue to drain queued transmit packets.
1072 */
1073static void
1074igb_deferred_mq_start(void *arg, int pending)
1075{
1076 struct tx_ring *txr = arg;
1077 struct adapter *adapter = txr->adapter;
1078 struct ifnet *ifp = adapter->ifp;
1079
1080 IGB_TX_LOCK(txr);
1081 if (!drbr_empty(ifp, txr->br))
1082 igb_mq_start_locked(ifp, txr);
1083 IGB_TX_UNLOCK(txr);
1084}
1085
1086/*
1087** Flush all ring buffers
1088*/
1089static void
1090igb_qflush(struct ifnet *ifp)
1091{
1092 struct adapter *adapter = ifp->if_softc;
1093 struct tx_ring *txr = adapter->tx_rings;
1094 struct mbuf *m;
1095
1096 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1097 IGB_TX_LOCK(txr);
1098 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1099 m_freem(m);
1100 IGB_TX_UNLOCK(txr);
1101 }
1102 if_qflush(ifp);
1103}
1104#endif /* ~IGB_LEGACY_TX */
1105
1106/*********************************************************************
1107 * Ioctl entry point
1108 *
1109 * igb_ioctl is called when the user wants to configure the
1110 * interface.
1111 *
1112 * return 0 on success, positive on failure
1113 **********************************************************************/
1114
1115static int
1116igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1117{
1118 struct adapter *adapter = ifp->if_softc;
1119 struct ifreq *ifr = (struct ifreq *)data;
1120#if defined(INET) || defined(INET6)
1121 struct ifaddr *ifa = (struct ifaddr *)data;
1122#endif
1123 bool avoid_reset = FALSE;
1124 int error = 0;
1125
1126 if (adapter->in_detach)
1127 return (error);
1128
1129 switch (command) {
1130 case SIOCSIFADDR:
1131#ifdef INET
1132 if (ifa->ifa_addr->sa_family == AF_INET)
1133 avoid_reset = TRUE;
1134#endif
1135#ifdef INET6
1136 if (ifa->ifa_addr->sa_family == AF_INET6)
1137 avoid_reset = TRUE;
1138#endif
1139 /*
1140 ** Calling init results in link renegotiation,
1141 ** so we avoid doing it when possible.
1142 */
1143 if (avoid_reset) {
1144 ifp->if_flags |= IFF_UP;
1145 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1146 igb_init(adapter);
1147#ifdef INET
1148 if (!(ifp->if_flags & IFF_NOARP))
1149 arp_ifinit(ifp, ifa);
1150#endif
1151 } else
1152 error = ether_ioctl(ifp, command, data);
1153 break;
1154 case SIOCSIFMTU:
1155 {
1156 int max_frame_size;
1157
1158 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1159
1160 IGB_CORE_LOCK(adapter);
1161 max_frame_size = 9234;
1162 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1163 ETHER_CRC_LEN) {
1164 IGB_CORE_UNLOCK(adapter);
1165 error = EINVAL;
1166 break;
1167 }
1168
1169 ifp->if_mtu = ifr->ifr_mtu;
1170 adapter->max_frame_size =
1171 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1172 igb_init_locked(adapter);
1173 IGB_CORE_UNLOCK(adapter);
1174 break;
1175 }
1176 case SIOCSIFFLAGS:
1177 IOCTL_DEBUGOUT("ioctl rcv'd:\
1178 SIOCSIFFLAGS (Set Interface Flags)");
1179 IGB_CORE_LOCK(adapter);
1180 if (ifp->if_flags & IFF_UP) {
1181 if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1182 if ((ifp->if_flags ^ adapter->if_flags) &
1183 (IFF_PROMISC | IFF_ALLMULTI)) {
1184 igb_disable_promisc(adapter);
1185 igb_set_promisc(adapter);
1186 }
1187 } else
1188 igb_init_locked(adapter);
1189 } else
1190 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1191 igb_stop(adapter);
1192 adapter->if_flags = ifp->if_flags;
1193 IGB_CORE_UNLOCK(adapter);
1194 break;
1195 case SIOCADDMULTI:
1196 case SIOCDELMULTI:
1197 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1198 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1199 IGB_CORE_LOCK(adapter);
1200 igb_disable_intr(adapter);
1201 igb_set_multi(adapter);
1202#ifdef DEVICE_POLLING
1203 if (!(ifp->if_capenable & IFCAP_POLLING))
1204#endif
1205 igb_enable_intr(adapter);
1206 IGB_CORE_UNLOCK(adapter);
1207 }
1208 break;
1209 case SIOCSIFMEDIA:
1210 /* Check SOL/IDER usage */
1211 IGB_CORE_LOCK(adapter);
1212 if (e1000_check_reset_block(&adapter->hw)) {
1213 IGB_CORE_UNLOCK(adapter);
1214 device_printf(adapter->dev, "Media change is"
1215 " blocked due to SOL/IDER session.\n");
1216 break;
1217 }
1218 IGB_CORE_UNLOCK(adapter);
1219 case SIOCGIFMEDIA:
1220 IOCTL_DEBUGOUT("ioctl rcv'd: \
1221 SIOCxIFMEDIA (Get/Set Interface Media)");
1222 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1223 break;
1224 case SIOCSIFCAP:
1225 {
1226 int mask, reinit;
1227
1228 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1229 reinit = 0;
1230 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1231#ifdef DEVICE_POLLING
1232 if (mask & IFCAP_POLLING) {
1233 if (ifr->ifr_reqcap & IFCAP_POLLING) {
1234 error = ether_poll_register(igb_poll, ifp);
1235 if (error)
1236 return (error);
1237 IGB_CORE_LOCK(adapter);
1238 igb_disable_intr(adapter);
1239 ifp->if_capenable |= IFCAP_POLLING;
1240 IGB_CORE_UNLOCK(adapter);
1241 } else {
1242 error = ether_poll_deregister(ifp);
1243 /* Enable interrupt even in error case */
1244 IGB_CORE_LOCK(adapter);
1245 igb_enable_intr(adapter);
1246 ifp->if_capenable &= ~IFCAP_POLLING;
1247 IGB_CORE_UNLOCK(adapter);
1248 }
1249 }
1250#endif
1251 if (mask & IFCAP_HWCSUM) {
1252 ifp->if_capenable ^= IFCAP_HWCSUM;
1253 reinit = 1;
1254 }
1255 if (mask & IFCAP_TSO4) {
1256 ifp->if_capenable ^= IFCAP_TSO4;
1257 reinit = 1;
1258 }
1259 if (mask & IFCAP_TSO6) {
1260 ifp->if_capenable ^= IFCAP_TSO6;
1261 reinit = 1;
1262 }
1263 if (mask & IFCAP_VLAN_HWTAGGING) {
1264 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1265 reinit = 1;
1266 }
1267 if (mask & IFCAP_VLAN_HWFILTER) {
1268 ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1269 reinit = 1;
1270 }
1271 if (mask & IFCAP_VLAN_HWTSO) {
1272 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1273 reinit = 1;
1274 }
1275 if (mask & IFCAP_LRO) {
1276 ifp->if_capenable ^= IFCAP_LRO;
1277 reinit = 1;
1278 }
1279 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1280 igb_init(adapter);
1281 VLAN_CAPABILITIES(ifp);
1282 break;
1283 }
1284
1285 default:
1286 error = ether_ioctl(ifp, command, data);
1287 break;
1288 }
1289
1290 return (error);
1291}
1292
1293
1294/*********************************************************************
1295 * Init entry point
1296 *
1297 * This routine is used in two ways. It is used by the stack as
1298 * init entry point in network interface structure. It is also used
1299 * by the driver as a hw/sw initialization routine to get to a
1300 * consistent state.
1301 *
1302 * return 0 on success, positive on failure
1303 **********************************************************************/
1304
1305static void
1306igb_init_locked(struct adapter *adapter)
1307{
1308 struct ifnet *ifp = adapter->ifp;
1309 device_t dev = adapter->dev;
1310
1311 INIT_DEBUGOUT("igb_init: begin");
1312
1313 IGB_CORE_LOCK_ASSERT(adapter);
1314
1315 igb_disable_intr(adapter);
1316 callout_stop(&adapter->timer);
1317
1318 /* Get the latest mac address, User can use a LAA */
1319 bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1320 ETHER_ADDR_LEN);
1321
1322 /* Put the address into the Receive Address Array */
1323 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1324
1325 igb_reset(adapter);
1326 igb_update_link_status(adapter);
1327
1328 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1329
1330 /* Set hardware offload abilities */
1331 ifp->if_hwassist = 0;
1332 if (ifp->if_capenable & IFCAP_TXCSUM) {
1333 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1334#if __FreeBSD_version >= 800000
1335 if (adapter->hw.mac.type == e1000_82576)
1336 ifp->if_hwassist |= CSUM_SCTP;
1337#endif
1338 }
1339
1340 if (ifp->if_capenable & IFCAP_TSO)
1341 ifp->if_hwassist |= CSUM_TSO;
1342
1343 /* Configure for OS presence */
1344 igb_init_manageability(adapter);
1345
1346 /* Prepare transmit descriptors and buffers */
1347 igb_setup_transmit_structures(adapter);
1348 igb_initialize_transmit_units(adapter);
1349
1350 /* Setup Multicast table */
1351 igb_set_multi(adapter);
1352
1353 /*
1354 ** Figure out the desired mbuf pool
1355 ** for doing jumbo/packetsplit
1356 */
1357 if (adapter->max_frame_size <= 2048)
1358 adapter->rx_mbuf_sz = MCLBYTES;
1359 else if (adapter->max_frame_size <= 4096)
1360 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1361 else
1362 adapter->rx_mbuf_sz = MJUM9BYTES;
1363
1364 /* Prepare receive descriptors and buffers */
1365 if (igb_setup_receive_structures(adapter)) {
1366 device_printf(dev, "Could not setup receive structures\n");
1367 return;
1368 }
1369 igb_initialize_receive_units(adapter);
1370
1371 /* Enable VLAN support */
1372 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1373 igb_setup_vlan_hw_support(adapter);
1374
1375 /* Don't lose promiscuous settings */
1376 igb_set_promisc(adapter);
1377
1378 ifp->if_drv_flags |= IFF_DRV_RUNNING;
1379 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1380
1381 callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1382 e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1383
1384 if (adapter->msix > 1) /* Set up queue routing */
1385 igb_configure_queues(adapter);
1386
1387 /* this clears any pending interrupts */
1388 E1000_READ_REG(&adapter->hw, E1000_ICR);
1389#ifdef DEVICE_POLLING
1390 /*
1391 * Only enable interrupts if we are not polling, make sure
1392 * they are off otherwise.
1393 */
1394 if (ifp->if_capenable & IFCAP_POLLING)
1395 igb_disable_intr(adapter);
1396 else
1397#endif /* DEVICE_POLLING */
1398 {
1399 igb_enable_intr(adapter);
1400 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1401 }
1402
1403 /* Set Energy Efficient Ethernet */
1404 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
1405 if (adapter->hw.mac.type == e1000_i354)
1406 e1000_set_eee_i354(&adapter->hw);
1407 else
1408 e1000_set_eee_i350(&adapter->hw);
1409 }
1410}
1411
1412static void
1413igb_init(void *arg)
1414{
1415 struct adapter *adapter = arg;
1416
1417 IGB_CORE_LOCK(adapter);
1418 igb_init_locked(adapter);
1419 IGB_CORE_UNLOCK(adapter);
1420}
1421
1422
1423static void
1424igb_handle_que(void *context, int pending)
1425{
1426 struct igb_queue *que = context;
1427 struct adapter *adapter = que->adapter;
1428 struct tx_ring *txr = que->txr;
1429 struct ifnet *ifp = adapter->ifp;
1430
1431 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1432 bool more;
1433
1434 more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1435
1436 IGB_TX_LOCK(txr);
1437 igb_txeof(txr);
1438#ifndef IGB_LEGACY_TX
1439 /* Process the stack queue only if not depleted */
1440 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1441 !drbr_empty(ifp, txr->br))
1442 igb_mq_start_locked(ifp, txr);
1443#else
1444 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1445 igb_start_locked(txr, ifp);
1446#endif
1447 IGB_TX_UNLOCK(txr);
1448 /* Do we need another? */
1449 if (more) {
1450 taskqueue_enqueue(que->tq, &que->que_task);
1451 return;
1452 }
1453 }
1454
1455#ifdef DEVICE_POLLING
1456 if (ifp->if_capenable & IFCAP_POLLING)
1457 return;
1458#endif
1459 /* Reenable this interrupt */
1460 if (que->eims)
1461 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1462 else
1463 igb_enable_intr(adapter);
1464}
1465
1466/* Deal with link in a sleepable context */
1467static void
1468igb_handle_link(void *context, int pending)
1469{
1470 struct adapter *adapter = context;
1471
1472 IGB_CORE_LOCK(adapter);
1473 igb_handle_link_locked(adapter);
1474 IGB_CORE_UNLOCK(adapter);
1475}
1476
1477static void
1478igb_handle_link_locked(struct adapter *adapter)
1479{
1480 struct tx_ring *txr = adapter->tx_rings;
1481 struct ifnet *ifp = adapter->ifp;
1482
1483 IGB_CORE_LOCK_ASSERT(adapter);
1484 adapter->hw.mac.get_link_status = 1;
1485 igb_update_link_status(adapter);
1486 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1487 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1488 IGB_TX_LOCK(txr);
1489#ifndef IGB_LEGACY_TX
1490 /* Process the stack queue only if not depleted */
1491 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1492 !drbr_empty(ifp, txr->br))
1493 igb_mq_start_locked(ifp, txr);
1494#else
1495 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1496 igb_start_locked(txr, ifp);
1497#endif
1498 IGB_TX_UNLOCK(txr);
1499 }
1500 }
1501}
1502
1503/*********************************************************************
1504 *
1505 * MSI/Legacy Deferred
1506 * Interrupt Service routine
1507 *
1508 *********************************************************************/
1509static int
1510igb_irq_fast(void *arg)
1511{
1512 struct adapter *adapter = arg;
1513 struct igb_queue *que = adapter->queues;
1514 u32 reg_icr;
1515
1516
1517 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1518
1519 /* Hot eject? */
1520 if (reg_icr == 0xffffffff)
1521 return FILTER_STRAY;
1522
1523 /* Definitely not our interrupt. */
1524 if (reg_icr == 0x0)
1525 return FILTER_STRAY;
1526
1527 if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1528 return FILTER_STRAY;
1529
1530 /*
1531 * Mask interrupts until the taskqueue is finished running. This is
1532 * cheap, just assume that it is needed. This also works around the
1533 * MSI message reordering errata on certain systems.
1534 */
1535 igb_disable_intr(adapter);
1536 taskqueue_enqueue(que->tq, &que->que_task);
1537
1538 /* Link status change */
1539 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1540 taskqueue_enqueue(que->tq, &adapter->link_task);
1541
1542 if (reg_icr & E1000_ICR_RXO)
1543 adapter->rx_overruns++;
1544 return FILTER_HANDLED;
1545}
1546
1547#ifdef DEVICE_POLLING
1548#if __FreeBSD_version >= 800000
1549#define POLL_RETURN_COUNT(a) (a)
1550static int
1551#else
1552#define POLL_RETURN_COUNT(a)
1553static void
1554#endif
1555igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1556{
1557 struct adapter *adapter = ifp->if_softc;
1558 struct igb_queue *que;
1559 struct tx_ring *txr;
1560 u32 reg_icr, rx_done = 0;
1561 u32 loop = IGB_MAX_LOOP;
1562 bool more;
1563
1564 IGB_CORE_LOCK(adapter);
1565 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1566 IGB_CORE_UNLOCK(adapter);
1567 return POLL_RETURN_COUNT(rx_done);
1568 }
1569
1570 if (cmd == POLL_AND_CHECK_STATUS) {
1571 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1572 /* Link status change */
1573 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1574 igb_handle_link_locked(adapter);
1575
1576 if (reg_icr & E1000_ICR_RXO)
1577 adapter->rx_overruns++;
1578 }
1579 IGB_CORE_UNLOCK(adapter);
1580
1581 for (int i = 0; i < adapter->num_queues; i++) {
1582 que = &adapter->queues[i];
1583 txr = que->txr;
1584
1585 igb_rxeof(que, count, &rx_done);
1586
1587 IGB_TX_LOCK(txr);
1588 do {
1589 more = igb_txeof(txr);
1590 } while (loop-- && more);
1591#ifndef IGB_LEGACY_TX
1592 if (!drbr_empty(ifp, txr->br))
1593 igb_mq_start_locked(ifp, txr);
1594#else
1595 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1596 igb_start_locked(txr, ifp);
1597#endif
1598 IGB_TX_UNLOCK(txr);
1599 }
1600
1601 return POLL_RETURN_COUNT(rx_done);
1602}
1603#endif /* DEVICE_POLLING */
1604
1605/*********************************************************************
1606 *
1607 * MSIX Que Interrupt Service routine
1608 *
1609 **********************************************************************/
1610static void
1611igb_msix_que(void *arg)
1612{
1613 struct igb_queue *que = arg;
1614 struct adapter *adapter = que->adapter;
1615 struct ifnet *ifp = adapter->ifp;
1616 struct tx_ring *txr = que->txr;
1617 struct rx_ring *rxr = que->rxr;
1618 u32 newitr = 0;
1619 bool more_rx;
1620
1621 /* Ignore spurious interrupts */
1622 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1623 return;
1624
1625 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1626 ++que->irqs;
1627
1628 IGB_TX_LOCK(txr);
1629 igb_txeof(txr);
1630#ifndef IGB_LEGACY_TX
1631 /* Process the stack queue only if not depleted */
1632 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1633 !drbr_empty(ifp, txr->br))
1634 igb_mq_start_locked(ifp, txr);
1635#else
1636 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1637 igb_start_locked(txr, ifp);
1638#endif
1639 IGB_TX_UNLOCK(txr);
1640
1641 more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1642
1643 if (adapter->enable_aim == FALSE)
1644 goto no_calc;
1645 /*
1646 ** Do Adaptive Interrupt Moderation:
1647 ** - Write out last calculated setting
1648 ** - Calculate based on average size over
1649 ** the last interval.
1650 */
1651 if (que->eitr_setting)
1652 E1000_WRITE_REG(&adapter->hw,
1653 E1000_EITR(que->msix), que->eitr_setting);
1654
1655 que->eitr_setting = 0;
1656
1657 /* Idle, do nothing */
1658 if ((txr->bytes == 0) && (rxr->bytes == 0))
1659 goto no_calc;
1660
1661 /* Used half Default if sub-gig */
1662 if (adapter->link_speed != 1000)
1663 newitr = IGB_DEFAULT_ITR / 2;
1664 else {
1665 if ((txr->bytes) && (txr->packets))
1666 newitr = txr->bytes/txr->packets;
1667 if ((rxr->bytes) && (rxr->packets))
1668 newitr = max(newitr,
1669 (rxr->bytes / rxr->packets));
1670 newitr += 24; /* account for hardware frame, crc */
1671 /* set an upper boundary */
1672 newitr = min(newitr, 3000);
1673 /* Be nice to the mid range */
1674 if ((newitr > 300) && (newitr < 1200))
1675 newitr = (newitr / 3);
1676 else
1677 newitr = (newitr / 2);
1678 }
1679 newitr &= 0x7FFC; /* Mask invalid bits */
1680 if (adapter->hw.mac.type == e1000_82575)
1681 newitr |= newitr << 16;
1682 else
1683 newitr |= E1000_EITR_CNT_IGNR;
1684
1685 /* save for next interrupt */
1686 que->eitr_setting = newitr;
1687
1688 /* Reset state */
1689 txr->bytes = 0;
1690 txr->packets = 0;
1691 rxr->bytes = 0;
1692 rxr->packets = 0;
1693
1694no_calc:
1695 /* Schedule a clean task if needed*/
1696 if (more_rx)
1697 taskqueue_enqueue(que->tq, &que->que_task);
1698 else
1699 /* Reenable this interrupt */
1700 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1701 return;
1702}
1703
1704
1705/*********************************************************************
1706 *
1707 * MSIX Link Interrupt Service routine
1708 *
1709 **********************************************************************/
1710
1711static void
1712igb_msix_link(void *arg)
1713{
1714 struct adapter *adapter = arg;
1715 u32 icr;
1716
1717 ++adapter->link_irq;
1718 icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1719 if (!(icr & E1000_ICR_LSC))
1720 goto spurious;
1721 igb_handle_link(adapter, 0);
1722
1723spurious:
1724 /* Rearm */
1725 E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1726 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1727 return;
1728}
1729
1730
1731/*********************************************************************
1732 *
1733 * Media Ioctl callback
1734 *
1735 * This routine is called whenever the user queries the status of
1736 * the interface using ifconfig.
1737 *
1738 **********************************************************************/
1739static void
1740igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1741{
1742 struct adapter *adapter = ifp->if_softc;
1743
1744 INIT_DEBUGOUT("igb_media_status: begin");
1745
1746 IGB_CORE_LOCK(adapter);
1747 igb_update_link_status(adapter);
1748
1749 ifmr->ifm_status = IFM_AVALID;
1750 ifmr->ifm_active = IFM_ETHER;
1751
1752 if (!adapter->link_active) {
1753 IGB_CORE_UNLOCK(adapter);
1754 return;
1755 }
1756
1757 ifmr->ifm_status |= IFM_ACTIVE;
1758
1759 switch (adapter->link_speed) {
1760 case 10:
1761 ifmr->ifm_active |= IFM_10_T;
1762 break;
1763 case 100:
1764 /*
1765 ** Support for 100Mb SFP - these are Fiber
1766 ** but the media type appears as serdes
1767 */
1768 if (adapter->hw.phy.media_type ==
1769 e1000_media_type_internal_serdes)
1770 ifmr->ifm_active |= IFM_100_FX;
1771 else
1772 ifmr->ifm_active |= IFM_100_TX;
1773 break;
1774 case 1000:
1775 ifmr->ifm_active |= IFM_1000_T;
1776 break;
1777 case 2500:
1778 ifmr->ifm_active |= IFM_2500_SX;
1779 break;
1780 }
1781
1782 if (adapter->link_duplex == FULL_DUPLEX)
1783 ifmr->ifm_active |= IFM_FDX;
1784 else
1785 ifmr->ifm_active |= IFM_HDX;
1786
1787 IGB_CORE_UNLOCK(adapter);
1788}
1789
1790/*********************************************************************
1791 *
1792 * Media Ioctl callback
1793 *
1794 * This routine is called when the user changes speed/duplex using
1795 * media/mediopt option with ifconfig.
1796 *
1797 **********************************************************************/
1798static int
1799igb_media_change(struct ifnet *ifp)
1800{
1801 struct adapter *adapter = ifp->if_softc;
1802 struct ifmedia *ifm = &adapter->media;
1803
1804 INIT_DEBUGOUT("igb_media_change: begin");
1805
1806 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1807 return (EINVAL);
1808
1809 IGB_CORE_LOCK(adapter);
1810 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1811 case IFM_AUTO:
1812 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1813 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1814 break;
1815 case IFM_1000_LX:
1816 case IFM_1000_SX:
1817 case IFM_1000_T:
1818 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1819 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1820 break;
1821 case IFM_100_TX:
1822 adapter->hw.mac.autoneg = FALSE;
1823 adapter->hw.phy.autoneg_advertised = 0;
1824 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1825 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1826 else
1827 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1828 break;
1829 case IFM_10_T:
1830 adapter->hw.mac.autoneg = FALSE;
1831 adapter->hw.phy.autoneg_advertised = 0;
1832 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1833 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1834 else
1835 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1836 break;
1837 default:
1838 device_printf(adapter->dev, "Unsupported media type\n");
1839 }
1840
1841 igb_init_locked(adapter);
1842 IGB_CORE_UNLOCK(adapter);
1843
1844 return (0);
1845}
1846
1847
1848/*********************************************************************
1849 *
1850 * This routine maps the mbufs to Advanced TX descriptors.
1851 *
1852 **********************************************************************/
1853static int
1854igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1855{
1856 struct adapter *adapter = txr->adapter;
1857 u32 olinfo_status = 0, cmd_type_len;
1858 int i, j, error, nsegs;
1859 int first;
1860 bool remap = TRUE;
1861 struct mbuf *m_head;
1862 bus_dma_segment_t segs[IGB_MAX_SCATTER];
1863 bus_dmamap_t map;
1864 struct igb_tx_buf *txbuf;
1865 union e1000_adv_tx_desc *txd = NULL;
1866
1867 m_head = *m_headp;
1868
1869 /* Basic descriptor defines */
1870 cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
1871 E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
1872
1873 if (m_head->m_flags & M_VLANTAG)
1874 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1875
1876 /*
1877 * Important to capture the first descriptor
1878 * used because it will contain the index of
1879 * the one we tell the hardware to report back
1880 */
1881 first = txr->next_avail_desc;
1882 txbuf = &txr->tx_buffers[first];
1883 map = txbuf->map;
1884
1885 /*
1886 * Map the packet for DMA.
1887 */
1888retry:
1889 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1890 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1891
1892 if (__predict_false(error)) {
1893 struct mbuf *m;
1894
1895 switch (error) {
1896 case EFBIG:
1897 /* Try it again? - one try */
1898 if (remap == TRUE) {
1899 remap = FALSE;
1900 m = m_defrag(*m_headp, M_NOWAIT);
1901 if (m == NULL) {
1902 adapter->mbuf_defrag_failed++;
1903 m_freem(*m_headp);
1904 *m_headp = NULL;
1905 return (ENOBUFS);
1906 }
1907 *m_headp = m;
1908 goto retry;
1909 } else
1910 return (error);
1911 default:
1912 txr->no_tx_dma_setup++;
1913 m_freem(*m_headp);
1914 *m_headp = NULL;
1915 return (error);
1916 }
1917 }
1918
1919 /* Make certain there are enough descriptors */
1920 if (nsegs > txr->tx_avail - 2) {
1921 txr->no_desc_avail++;
1922 bus_dmamap_unload(txr->txtag, map);
1923 return (ENOBUFS);
1924 }
1925 m_head = *m_headp;
1926
1927 /*
1928 ** Set up the appropriate offload context
1929 ** this will consume the first descriptor
1930 */
1931 error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1932 if (__predict_false(error)) {
1933 m_freem(*m_headp);
1934 *m_headp = NULL;
1935 return (error);
1936 }
1937
1938 /* 82575 needs the queue index added */
1939 if (adapter->hw.mac.type == e1000_82575)
1940 olinfo_status |= txr->me << 4;
1941
1942 i = txr->next_avail_desc;
1943 for (j = 0; j < nsegs; j++) {
1944 bus_size_t seglen;
1945 bus_addr_t segaddr;
1946
1947 txbuf = &txr->tx_buffers[i];
1948 txd = &txr->tx_base[i];
1949 seglen = segs[j].ds_len;
1950 segaddr = htole64(segs[j].ds_addr);
1951
1952 txd->read.buffer_addr = segaddr;
1953 txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
1954 cmd_type_len | seglen);
1955 txd->read.olinfo_status = htole32(olinfo_status);
1956
1957 if (++i == txr->num_desc)
1958 i = 0;
1959 }
1960
1961 txd->read.cmd_type_len |=
1962 htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1963 txr->tx_avail -= nsegs;
1964 txr->next_avail_desc = i;
1965
1966 txbuf->m_head = m_head;
1967 /*
1968 ** Here we swap the map so the last descriptor,
1969 ** which gets the completion interrupt has the
1970 ** real map, and the first descriptor gets the
1971 ** unused map from this descriptor.
1972 */
1973 txr->tx_buffers[first].map = txbuf->map;
1974 txbuf->map = map;
1975 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1976
1977 /* Set the EOP descriptor that will be marked done */
1978 txbuf = &txr->tx_buffers[first];
1979 txbuf->eop = txd;
1980
1981 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1982 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1983 /*
1984 * Advance the Transmit Descriptor Tail (Tdt), this tells the
1985 * hardware that this frame is available to transmit.
1986 */
1987 ++txr->total_packets;
1988 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1989
1990 return (0);
1991}
1992static void
1993igb_set_promisc(struct adapter *adapter)
1994{
1995 struct ifnet *ifp = adapter->ifp;
1996 struct e1000_hw *hw = &adapter->hw;
1997 u32 reg;
1998
1999 if (adapter->vf_ifp) {
2000 e1000_promisc_set_vf(hw, e1000_promisc_enabled);
2001 return;
2002 }
2003
2004 reg = E1000_READ_REG(hw, E1000_RCTL);
2005 if (ifp->if_flags & IFF_PROMISC) {
2006 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2007 E1000_WRITE_REG(hw, E1000_RCTL, reg);
2008 } else if (ifp->if_flags & IFF_ALLMULTI) {
2009 reg |= E1000_RCTL_MPE;
2010 reg &= ~E1000_RCTL_UPE;
2011 E1000_WRITE_REG(hw, E1000_RCTL, reg);
2012 }
2013}
2014
2015static void
2016igb_disable_promisc(struct adapter *adapter)
2017{
2018 struct e1000_hw *hw = &adapter->hw;
2019 struct ifnet *ifp = adapter->ifp;
2020 u32 reg;
2021 int mcnt = 0;
2022
2023 if (adapter->vf_ifp) {
2024 e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2025 return;
2026 }
2027 reg = E1000_READ_REG(hw, E1000_RCTL);
2028 reg &= (~E1000_RCTL_UPE);
2029 if (ifp->if_flags & IFF_ALLMULTI)
2030 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2031 else {
2032 struct ifmultiaddr *ifma;
2033#if __FreeBSD_version < 800000
2034 IF_ADDR_LOCK(ifp);
2035#else
2036 if_maddr_rlock(ifp);
2037#endif
2038 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2039 if (ifma->ifma_addr->sa_family != AF_LINK)
2040 continue;
2041 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2042 break;
2043 mcnt++;
2044 }
2045#if __FreeBSD_version < 800000
2046 IF_ADDR_UNLOCK(ifp);
2047#else
2048 if_maddr_runlock(ifp);
2049#endif
2050 }
2051 /* Don't disable if in MAX groups */
2052 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2053 reg &= (~E1000_RCTL_MPE);
2054 E1000_WRITE_REG(hw, E1000_RCTL, reg);
2055}
2056
2057
2058/*********************************************************************
2059 * Multicast Update
2060 *
2061 * This routine is called whenever multicast address list is updated.
2062 *
2063 **********************************************************************/
2064
2065static void
2066igb_set_multi(struct adapter *adapter)
2067{
2068 struct ifnet *ifp = adapter->ifp;
2069 struct ifmultiaddr *ifma;
2070 u32 reg_rctl = 0;
2071 u8 *mta;
2072
2073 int mcnt = 0;
2074
2075 IOCTL_DEBUGOUT("igb_set_multi: begin");
2076
2077 mta = adapter->mta;
2078 bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2079 MAX_NUM_MULTICAST_ADDRESSES);
2080
2081#if __FreeBSD_version < 800000
2082 IF_ADDR_LOCK(ifp);
2083#else
2084 if_maddr_rlock(ifp);
2085#endif
2086 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2087 if (ifma->ifma_addr->sa_family != AF_LINK)
2088 continue;
2089
2090 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2091 break;
2092
2093 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2094 &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2095 mcnt++;
2096 }
2097#if __FreeBSD_version < 800000
2098 IF_ADDR_UNLOCK(ifp);
2099#else
2100 if_maddr_runlock(ifp);
2101#endif
2102
2103 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2104 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2105 reg_rctl |= E1000_RCTL_MPE;
2106 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2107 } else
2108 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2109}
2110
2111
2112/*********************************************************************
2113 * Timer routine:
2114 * This routine checks for link status,
2115 * updates statistics, and does the watchdog.
2116 *
2117 **********************************************************************/
2118
2119static void
2120igb_local_timer(void *arg)
2121{
2122 struct adapter *adapter = arg;
2123 device_t dev = adapter->dev;
2124 struct ifnet *ifp = adapter->ifp;
2125 struct tx_ring *txr = adapter->tx_rings;
2126 struct igb_queue *que = adapter->queues;
2127 int hung = 0, busy = 0;
2128
2129
2130 IGB_CORE_LOCK_ASSERT(adapter);
2131
2132 igb_update_link_status(adapter);
2133 igb_update_stats_counters(adapter);
2134
2135 /*
2136 ** Check the TX queues status
2137 ** - central locked handling of OACTIVE
2138 ** - watchdog only if all queues show hung
2139 */
2140 for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2141 if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2142 (adapter->pause_frames == 0))
2143 ++hung;
2144 if (txr->queue_status & IGB_QUEUE_DEPLETED)
2145 ++busy;
2146 if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2147 taskqueue_enqueue(que->tq, &que->que_task);
2148 }
2149 if (hung == adapter->num_queues)
2150 goto timeout;
2151 if (busy == adapter->num_queues)
2152 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2153 else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2154 (busy < adapter->num_queues))
2155 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2156
2157 adapter->pause_frames = 0;
2158 callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2159#ifndef DEVICE_POLLING
2160 /* Schedule all queue interrupts - deadlock protection */
2161 E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2162#endif
2163 return;
2164
2165timeout:
2166 device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2167 device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2168 E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2169 E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2170 device_printf(dev,"TX(%d) desc avail = %d,"
2171 "Next TX to Clean = %d\n",
2172 txr->me, txr->tx_avail, txr->next_to_clean);
2173 adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2174 adapter->watchdog_events++;
2175 igb_init_locked(adapter);
2176}
2177
2178static void
2179igb_update_link_status(struct adapter *adapter)
2180{
2181 struct e1000_hw *hw = &adapter->hw;
2182 struct e1000_fc_info *fc = &hw->fc;
2183 struct ifnet *ifp = adapter->ifp;
2184 device_t dev = adapter->dev;
2185 struct tx_ring *txr = adapter->tx_rings;
2186 u32 link_check, thstat, ctrl;
2187 char *flowctl = NULL;
2188
2189 link_check = thstat = ctrl = 0;
2190
2191 /* Get the cached link value or read for real */
2192 switch (hw->phy.media_type) {
2193 case e1000_media_type_copper:
2194 if (hw->mac.get_link_status) {
2195 /* Do the work to read phy */
2196 e1000_check_for_link(hw);
2197 link_check = !hw->mac.get_link_status;
2198 } else
2199 link_check = TRUE;
2200 break;
2201 case e1000_media_type_fiber:
2202 e1000_check_for_link(hw);
2203 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2204 E1000_STATUS_LU);
2205 break;
2206 case e1000_media_type_internal_serdes:
2207 e1000_check_for_link(hw);
2208 link_check = adapter->hw.mac.serdes_has_link;
2209 break;
2210 /* VF device is type_unknown */
2211 case e1000_media_type_unknown:
2212 e1000_check_for_link(hw);
2213 link_check = !hw->mac.get_link_status;
2214 /* Fall thru */
2215 default:
2216 break;
2217 }
2218
2219 /* Check for thermal downshift or shutdown */
2220 if (hw->mac.type == e1000_i350) {
2221 thstat = E1000_READ_REG(hw, E1000_THSTAT);
2222 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2223 }
2224
2225 /* Get the flow control for display */
2226 switch (fc->current_mode) {
2227 case e1000_fc_rx_pause:
2228 flowctl = "RX";
2229 break;
2230 case e1000_fc_tx_pause:
2231 flowctl = "TX";
2232 break;
2233 case e1000_fc_full:
2234 flowctl = "Full";
2235 break;
2236 case e1000_fc_none:
2237 default:
2238 flowctl = "None";
2239 break;
2240 }
2241
2242 /* Now we check if a transition has happened */
2243 if (link_check && (adapter->link_active == 0)) {
2244 e1000_get_speed_and_duplex(&adapter->hw,
2245 &adapter->link_speed, &adapter->link_duplex);
2246 if (bootverbose)
2247 device_printf(dev, "Link is up %d Mbps %s,"
2248 " Flow Control: %s\n",
2249 adapter->link_speed,
2250 ((adapter->link_duplex == FULL_DUPLEX) ?
2251 "Full Duplex" : "Half Duplex"), flowctl);
2252 adapter->link_active = 1;
2253 ifp->if_baudrate = adapter->link_speed * 1000000;
2254 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2255 (thstat & E1000_THSTAT_LINK_THROTTLE))
2256 device_printf(dev, "Link: thermal downshift\n");
2257 /* Delay Link Up for Phy update */
2258 if (((hw->mac.type == e1000_i210) ||
2259 (hw->mac.type == e1000_i211)) &&
2260 (hw->phy.id == I210_I_PHY_ID))
2261 msec_delay(I210_LINK_DELAY);
2262 /* Reset if the media type changed. */
2263 if (hw->dev_spec._82575.media_changed) {
2264 hw->dev_spec._82575.media_changed = false;
2265 adapter->flags |= IGB_MEDIA_RESET;
2266 igb_reset(adapter);
2267 }
2268 /* This can sleep */
2269 if_link_state_change(ifp, LINK_STATE_UP);
2270 } else if (!link_check && (adapter->link_active == 1)) {
2271 ifp->if_baudrate = adapter->link_speed = 0;
2272 adapter->link_duplex = 0;
2273 if (bootverbose)
2274 device_printf(dev, "Link is Down\n");
2275 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2276 (thstat & E1000_THSTAT_PWR_DOWN))
2277 device_printf(dev, "Link: thermal shutdown\n");
2278 adapter->link_active = 0;
2279 /* This can sleep */
2280 if_link_state_change(ifp, LINK_STATE_DOWN);
2281 /* Reset queue state */
2282 for (int i = 0; i < adapter->num_queues; i++, txr++)
2283 txr->queue_status = IGB_QUEUE_IDLE;
2284 }
2285}
2286
2287/*********************************************************************
2288 *
2289 * This routine disables all traffic on the adapter by issuing a
2290 * global reset on the MAC and deallocates TX/RX buffers.
2291 *
2292 **********************************************************************/
2293
2294static void
2295igb_stop(void *arg)
2296{
2297 struct adapter *adapter = arg;
2298 struct ifnet *ifp = adapter->ifp;
2299 struct tx_ring *txr = adapter->tx_rings;
2300
2301 IGB_CORE_LOCK_ASSERT(adapter);
2302
2303 INIT_DEBUGOUT("igb_stop: begin");
2304
2305 igb_disable_intr(adapter);
2306
2307 callout_stop(&adapter->timer);
2308
2309 /* Tell the stack that the interface is no longer active */
2310 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2311 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2312
2313 /* Disarm watchdog timer. */
2314 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2315 IGB_TX_LOCK(txr);
2316 txr->queue_status = IGB_QUEUE_IDLE;
2317 IGB_TX_UNLOCK(txr);
2318 }
2319
2320 e1000_reset_hw(&adapter->hw);
2321 E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2322
2323 e1000_led_off(&adapter->hw);
2324 e1000_cleanup_led(&adapter->hw);
2325}
2326
2327
2328/*********************************************************************
2329 *
2330 * Determine hardware revision.
2331 *
2332 **********************************************************************/
2333static void
2334igb_identify_hardware(struct adapter *adapter)
2335{
2336 device_t dev = adapter->dev;
2337
2338 /* Make sure our PCI config space has the necessary stuff set */
2339 pci_enable_busmaster(dev);
2340 adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2341
2342 /* Save off the information about this board */
2343 adapter->hw.vendor_id = pci_get_vendor(dev);
2344 adapter->hw.device_id = pci_get_device(dev);
2345 adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2346 adapter->hw.subsystem_vendor_id =
2347 pci_read_config(dev, PCIR_SUBVEND_0, 2);
2348 adapter->hw.subsystem_device_id =
2349 pci_read_config(dev, PCIR_SUBDEV_0, 2);
2350
2351 /* Set MAC type early for PCI setup */
2352 e1000_set_mac_type(&adapter->hw);
2353
2354 /* Are we a VF device? */
2355 if ((adapter->hw.mac.type == e1000_vfadapt) ||
2356 (adapter->hw.mac.type == e1000_vfadapt_i350))
2357 adapter->vf_ifp = 1;
2358 else
2359 adapter->vf_ifp = 0;
2360}
2361
2362static int
2363igb_allocate_pci_resources(struct adapter *adapter)
2364{
2365 device_t dev = adapter->dev;
2366 int rid;
2367
2368 rid = PCIR_BAR(0);
2369 adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2370 &rid, RF_ACTIVE);
2371 if (adapter->pci_mem == NULL) {
2372 device_printf(dev, "Unable to allocate bus resource: memory\n");
2373 return (ENXIO);
2374 }
2375 adapter->osdep.mem_bus_space_tag =
2376 rman_get_bustag(adapter->pci_mem);
2377 adapter->osdep.mem_bus_space_handle =
2378 rman_get_bushandle(adapter->pci_mem);
2379 adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2380
2381 adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2382
2383 /* This will setup either MSI/X or MSI */
2384 adapter->msix = igb_setup_msix(adapter);
2385 adapter->hw.back = &adapter->osdep;
2386
2387 return (0);
2388}
2389
2390/*********************************************************************
2391 *
2392 * Setup the Legacy or MSI Interrupt handler
2393 *
2394 **********************************************************************/
2395static int
2396igb_allocate_legacy(struct adapter *adapter)
2397{
2398 device_t dev = adapter->dev;
2399 struct igb_queue *que = adapter->queues;
2400#ifndef IGB_LEGACY_TX
2401 struct tx_ring *txr = adapter->tx_rings;
2402#endif
2403 int error, rid = 0;
2404
2405 /* Turn off all interrupts */
2406 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2407
2408 /* MSI RID is 1 */
2409 if (adapter->msix == 1)
2410 rid = 1;
2411
2412 /* We allocate a single interrupt resource */
2413 adapter->res = bus_alloc_resource_any(dev,
2414 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2415 if (adapter->res == NULL) {
2416 device_printf(dev, "Unable to allocate bus resource: "
2417 "interrupt\n");
2418 return (ENXIO);
2419 }
2420
2421#ifndef IGB_LEGACY_TX
2422 TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2423#endif
2424
2425 /*
2426 * Try allocating a fast interrupt and the associated deferred
2427 * processing contexts.
2428 */
2429 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2430 /* Make tasklet for deferred link handling */
2431 TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2432 que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2433 taskqueue_thread_enqueue, &que->tq);
2434 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2435 device_get_nameunit(adapter->dev));
2436 if ((error = bus_setup_intr(dev, adapter->res,
2437 INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2438 adapter, &adapter->tag)) != 0) {
2439 device_printf(dev, "Failed to register fast interrupt "
2440 "handler: %d\n", error);
2441 taskqueue_free(que->tq);
2442 que->tq = NULL;
2443 return (error);
2444 }
2445
2446 return (0);
2447}
2448
2449
2450/*********************************************************************
2451 *
2452 * Setup the MSIX Queue Interrupt handlers:
2453 *
2454 **********************************************************************/
2455static int
2456igb_allocate_msix(struct adapter *adapter)
2457{
2458 device_t dev = adapter->dev;
2459 struct igb_queue *que = adapter->queues;
2460 int error, rid, vector = 0;
2461 int cpu_id = 0;
2462#ifdef RSS
2463 cpuset_t cpu_mask;
2464#endif
2465
2466 /* Be sure to start with all interrupts disabled */
2467 E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2468 E1000_WRITE_FLUSH(&adapter->hw);
2469
2470#ifdef RSS
2471 /*
2472 * If we're doing RSS, the number of queues needs to
2473 * match the number of RSS buckets that are configured.
2474 *
2475 * + If there's more queues than RSS buckets, we'll end
2476 * up with queues that get no traffic.
2477 *
2478 * + If there's more RSS buckets than queues, we'll end
2479 * up having multiple RSS buckets map to the same queue,
2480 * so there'll be some contention.
2481 */
2482 if (adapter->num_queues != rss_getnumbuckets()) {
2483 device_printf(dev,
2484 "%s: number of queues (%d) != number of RSS buckets (%d)"
2485 "; performance will be impacted.\n",
2486 __func__,
2487 adapter->num_queues,
2488 rss_getnumbuckets());
2489 }
2490#endif
2491
2492 for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2493 rid = vector +1;
2494 que->res = bus_alloc_resource_any(dev,
2495 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2496 if (que->res == NULL) {
2497 device_printf(dev,
2498 "Unable to allocate bus resource: "
2499 "MSIX Queue Interrupt\n");
2500 return (ENXIO);
2501 }
2502 error = bus_setup_intr(dev, que->res,
2503 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2504 igb_msix_que, que, &que->tag);
2505 if (error) {
2506 que->res = NULL;
2507 device_printf(dev, "Failed to register Queue handler");
2508 return (error);
2509 }
2510#if __FreeBSD_version >= 800504
2511 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2512#endif
2513 que->msix = vector;
2514 if (adapter->hw.mac.type == e1000_82575)
2515 que->eims = E1000_EICR_TX_QUEUE0 << i;
2516 else
2517 que->eims = 1 << vector;
2518
2519#ifdef RSS
2520 /*
2521 * The queue ID is used as the RSS layer bucket ID.
2522 * We look up the queue ID -> RSS CPU ID and select
2523 * that.
2524 */
2525 cpu_id = rss_getcpu(i % rss_getnumbuckets());
2526#else
2527 /*
2528 * Bind the msix vector, and thus the
2529 * rings to the corresponding cpu.
2530 *
2531 * This just happens to match the default RSS round-robin
2532 * bucket -> queue -> CPU allocation.
2533 */
2534 if (adapter->num_queues > 1) {
2535 if (igb_last_bind_cpu < 0)
2536 igb_last_bind_cpu = CPU_FIRST();
2537 cpu_id = igb_last_bind_cpu;
2538 }
2539#endif
2540
2541 if (adapter->num_queues > 1) {
2542 bus_bind_intr(dev, que->res, cpu_id);
2543#ifdef RSS
2544 device_printf(dev,
2545 "Bound queue %d to RSS bucket %d\n",
2546 i, cpu_id);
2547#else
2548 device_printf(dev,
2549 "Bound queue %d to cpu %d\n",
2550 i, cpu_id);
2551#endif
2552 }
2553
2554#ifndef IGB_LEGACY_TX
2555 TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2556 que->txr);
2557#endif
2558 /* Make tasklet for deferred handling */
2559 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2560 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2561 taskqueue_thread_enqueue, &que->tq);
2562 if (adapter->num_queues > 1) {
2563 /*
2564 * Only pin the taskqueue thread to a CPU if
2565 * RSS is in use.
2566 *
2567 * This again just happens to match the default RSS
2568 * round-robin bucket -> queue -> CPU allocation.
2569 */
2570#ifdef RSS
2571 CPU_SETOF(cpu_id, &cpu_mask);
2572 taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET,
2573 &cpu_mask,
2574 "%s que (bucket %d)",
2575 device_get_nameunit(adapter->dev),
2576 cpu_id);
2577#else
2578 taskqueue_start_threads(&que->tq, 1, PI_NET,
2579 "%s que (qid %d)",
2580 device_get_nameunit(adapter->dev),
2581 cpu_id);
2582#endif
2583 } else {
2584 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2585 device_get_nameunit(adapter->dev));
2586 }
2587
2588 /* Finally update the last bound CPU id */
2589 if (adapter->num_queues > 1)
2590 igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2591 }
2592
2593 /* And Link */
2594 rid = vector + 1;
2595 adapter->res = bus_alloc_resource_any(dev,
2596 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2597 if (adapter->res == NULL) {
2598 device_printf(dev,
2599 "Unable to allocate bus resource: "
2600 "MSIX Link Interrupt\n");
2601 return (ENXIO);
2602 }
2603 if ((error = bus_setup_intr(dev, adapter->res,
2604 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2605 igb_msix_link, adapter, &adapter->tag)) != 0) {
2606 device_printf(dev, "Failed to register Link handler");
2607 return (error);
2608 }
2609#if __FreeBSD_version >= 800504
2610 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2611#endif
2612 adapter->linkvec = vector;
2613
2614 return (0);
2615}
2616
2617
2618static void
2619igb_configure_queues(struct adapter *adapter)
2620{
2621 struct e1000_hw *hw = &adapter->hw;
2622 struct igb_queue *que;
2623 u32 tmp, ivar = 0, newitr = 0;
2624
2625 /* First turn on RSS capability */
2626 if (adapter->hw.mac.type != e1000_82575)
2627 E1000_WRITE_REG(hw, E1000_GPIE,
2628 E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2629 E1000_GPIE_PBA | E1000_GPIE_NSICR);
2630
2631 /* Turn on MSIX */
2632 switch (adapter->hw.mac.type) {
2633 case e1000_82580:
2634 case e1000_i350:
2635 case e1000_i354:
2636 case e1000_i210:
2637 case e1000_i211:
2638 case e1000_vfadapt:
2639 case e1000_vfadapt_i350:
2640 /* RX entries */
2641 for (int i = 0; i < adapter->num_queues; i++) {
2642 u32 index = i >> 1;
2643 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2644 que = &adapter->queues[i];
2645 if (i & 1) {
2646 ivar &= 0xFF00FFFF;
2647 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2648 } else {
2649 ivar &= 0xFFFFFF00;
2650 ivar |= que->msix | E1000_IVAR_VALID;
2651 }
2652 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2653 }
2654 /* TX entries */
2655 for (int i = 0; i < adapter->num_queues; i++) {
2656 u32 index = i >> 1;
2657 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2658 que = &adapter->queues[i];
2659 if (i & 1) {
2660 ivar &= 0x00FFFFFF;
2661 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2662 } else {
2663 ivar &= 0xFFFF00FF;
2664 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2665 }
2666 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2667 adapter->que_mask |= que->eims;
2668 }
2669
2670 /* And for the link interrupt */
2671 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2672 adapter->link_mask = 1 << adapter->linkvec;
2673 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2674 break;
2675 case e1000_82576:
2676 /* RX entries */
2677 for (int i = 0; i < adapter->num_queues; i++) {
2678 u32 index = i & 0x7; /* Each IVAR has two entries */
2679 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2680 que = &adapter->queues[i];
2681 if (i < 8) {
2682 ivar &= 0xFFFFFF00;
2683 ivar |= que->msix | E1000_IVAR_VALID;
2684 } else {
2685 ivar &= 0xFF00FFFF;
2686 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2687 }
2688 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2689 adapter->que_mask |= que->eims;
2690 }
2691 /* TX entries */
2692 for (int i = 0; i < adapter->num_queues; i++) {
2693 u32 index = i & 0x7; /* Each IVAR has two entries */
2694 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2695 que = &adapter->queues[i];
2696 if (i < 8) {
2697 ivar &= 0xFFFF00FF;
2698 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2699 } else {
2700 ivar &= 0x00FFFFFF;
2701 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2702 }
2703 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2704 adapter->que_mask |= que->eims;
2705 }
2706
2707 /* And for the link interrupt */
2708 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2709 adapter->link_mask = 1 << adapter->linkvec;
2710 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2711 break;
2712
2713 case e1000_82575:
2714 /* enable MSI-X support*/
2715 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2716 tmp |= E1000_CTRL_EXT_PBA_CLR;
2717 /* Auto-Mask interrupts upon ICR read. */
2718 tmp |= E1000_CTRL_EXT_EIAME;
2719 tmp |= E1000_CTRL_EXT_IRCA;
2720 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2721
2722 /* Queues */
2723 for (int i = 0; i < adapter->num_queues; i++) {
2724 que = &adapter->queues[i];
2725 tmp = E1000_EICR_RX_QUEUE0 << i;
2726 tmp |= E1000_EICR_TX_QUEUE0 << i;
2727 que->eims = tmp;
2728 E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2729 i, que->eims);
2730 adapter->que_mask |= que->eims;
2731 }
2732
2733 /* Link */
2734 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2735 E1000_EIMS_OTHER);
2736 adapter->link_mask |= E1000_EIMS_OTHER;
2737 default:
2738 break;
2739 }
2740
2741 /* Set the starting interrupt rate */
2742 if (igb_max_interrupt_rate > 0)
2743 newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2744
2745 if (hw->mac.type == e1000_82575)
2746 newitr |= newitr << 16;
2747 else
2748 newitr |= E1000_EITR_CNT_IGNR;
2749
2750 for (int i = 0; i < adapter->num_queues; i++) {
2751 que = &adapter->queues[i];
2752 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2753 }
2754
2755 return;
2756}
2757
2758
2759static void
2760igb_free_pci_resources(struct adapter *adapter)
2761{
2762 struct igb_queue *que = adapter->queues;
2763 device_t dev = adapter->dev;
2764 int rid;
2765
2766 /*
2767 ** There is a slight possibility of a failure mode
2768 ** in attach that will result in entering this function
2769 ** before interrupt resources have been initialized, and
2770 ** in that case we do not want to execute the loops below
2771 ** We can detect this reliably by the state of the adapter
2772 ** res pointer.
2773 */
2774 if (adapter->res == NULL)
2775 goto mem;
2776
2777 /*
2778 * First release all the interrupt resources:
2779 */
2780 for (int i = 0; i < adapter->num_queues; i++, que++) {
2781 rid = que->msix + 1;
2782 if (que->tag != NULL) {
2783 bus_teardown_intr(dev, que->res, que->tag);
2784 que->tag = NULL;
2785 }
2786 if (que->res != NULL)
2787 bus_release_resource(dev,
2788 SYS_RES_IRQ, rid, que->res);
2789 }
2790
2791 /* Clean the Legacy or Link interrupt last */
2792 if (adapter->linkvec) /* we are doing MSIX */
2793 rid = adapter->linkvec + 1;
2794 else
2795 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2796
2797 que = adapter->queues;
2798 if (adapter->tag != NULL) {
2799 taskqueue_drain(que->tq, &adapter->link_task);
2800 bus_teardown_intr(dev, adapter->res, adapter->tag);
2801 adapter->tag = NULL;
2802 }
2803 if (adapter->res != NULL)
2804 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2805
2806 for (int i = 0; i < adapter->num_queues; i++, que++) {
2807 if (que->tq != NULL) {
2808#ifndef IGB_LEGACY_TX
2809 taskqueue_drain(que->tq, &que->txr->txq_task);
2810#endif
2811 taskqueue_drain(que->tq, &que->que_task);
2812 taskqueue_free(que->tq);
2813 }
2814 }
2815mem:
2816 if (adapter->msix)
2817 pci_release_msi(dev);
2818
2819 if (adapter->msix_mem != NULL)
2820 bus_release_resource(dev, SYS_RES_MEMORY,
2821 adapter->memrid, adapter->msix_mem);
2822
2823 if (adapter->pci_mem != NULL)
2824 bus_release_resource(dev, SYS_RES_MEMORY,
2825 PCIR_BAR(0), adapter->pci_mem);
2826
2827}
2828
2829/*
2830 * Setup Either MSI/X or MSI
2831 */
2832static int
2833igb_setup_msix(struct adapter *adapter)
2834{
2835 device_t dev = adapter->dev;
2836 int bar, want, queues, msgs, maxqueues;
2837
2838 /* tuneable override */
2839 if (igb_enable_msix == 0)
2840 goto msi;
2841
2842 /* First try MSI/X */
2843 msgs = pci_msix_count(dev);
2844 if (msgs == 0)
2845 goto msi;
2846 /*
2847 ** Some new devices, as with ixgbe, now may
2848 ** use a different BAR, so we need to keep
2849 ** track of which is used.
2850 */
2851 adapter->memrid = PCIR_BAR(IGB_MSIX_BAR);
2852 bar = pci_read_config(dev, adapter->memrid, 4);
2853 if (bar == 0) /* use next bar */
2854 adapter->memrid += 4;
2855 adapter->msix_mem = bus_alloc_resource_any(dev,
2856 SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2857 if (adapter->msix_mem == NULL) {
2858 /* May not be enabled */
2859 device_printf(adapter->dev,
2860 "Unable to map MSIX table \n");
2861 goto msi;
2862 }
2863
2864 queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2865
2866 /* Override via tuneable */
2867 if (igb_num_queues != 0)
2868 queues = igb_num_queues;
2869
2870#ifdef RSS
2871 /* If we're doing RSS, clamp at the number of RSS buckets */
2872 if (queues > rss_getnumbuckets())
2873 queues = rss_getnumbuckets();
2874#endif
2875
2876
2877 /* Sanity check based on HW */
2878 switch (adapter->hw.mac.type) {
2879 case e1000_82575:
2880 maxqueues = 4;
2881 break;
2882 case e1000_82576:
2883 case e1000_82580:
2884 case e1000_i350:
2885 case e1000_i354:
2886 maxqueues = 8;
2887 break;
2888 case e1000_i210:
2889 maxqueues = 4;
2890 break;
2891 case e1000_i211:
2892 maxqueues = 2;
2893 break;
2894 default: /* VF interfaces */
2895 maxqueues = 1;
2896 break;
2897 }
2898
2899 /* Final clamp on the actual hardware capability */
2900 if (queues > maxqueues)
2901 queues = maxqueues;
2902
2903 /*
2904 ** One vector (RX/TX pair) per queue
2905 ** plus an additional for Link interrupt
2906 */
2907 want = queues + 1;
2908 if (msgs >= want)
2909 msgs = want;
2910 else {
2911 device_printf(adapter->dev,
2912 "MSIX Configuration Problem, "
2913 "%d vectors configured, but %d queues wanted!\n",
2914 msgs, want);
2915 goto msi;
2916 }
2917 if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2918 device_printf(adapter->dev,
2919 "Using MSIX interrupts with %d vectors\n", msgs);
2920 adapter->num_queues = queues;
2921 return (msgs);
2922 }
2923 /*
2924 ** If MSIX alloc failed or provided us with
2925 ** less than needed, free and fall through to MSI
2926 */
2927 pci_release_msi(dev);
2928
2929msi:
2930 if (adapter->msix_mem != NULL) {
2931 bus_release_resource(dev, SYS_RES_MEMORY,
2932 PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2933 adapter->msix_mem = NULL;
2934 }
2935 msgs = 1;
2936 if (pci_alloc_msi(dev, &msgs) == 0) {
2937 device_printf(adapter->dev," Using an MSI interrupt\n");
2938 return (msgs);
2939 }
2940 device_printf(adapter->dev," Using a Legacy interrupt\n");
2941 return (0);
2942}
2943
2944/*********************************************************************
2945 *
2946 * Initialize the DMA Coalescing feature
2947 *
2948 **********************************************************************/
2949static void
2950igb_init_dmac(struct adapter *adapter, u32 pba)
2951{
2952 device_t dev = adapter->dev;
2953 struct e1000_hw *hw = &adapter->hw;
2954 u32 dmac, reg = ~E1000_DMACR_DMAC_EN;
2955 u16 hwm;
2956
2957 if (hw->mac.type == e1000_i211)
2958 return;
2959
2960 if (hw->mac.type > e1000_82580) {
2961
2962 if (adapter->dmac == 0) { /* Disabling it */
2963 E1000_WRITE_REG(hw, E1000_DMACR, reg);
2964 return;
2965 } else
2966 device_printf(dev, "DMA Coalescing enabled\n");
2967
2968 /* Set starting threshold */
2969 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2970
2971 hwm = 64 * pba - adapter->max_frame_size / 16;
2972 if (hwm < 64 * (pba - 6))
2973 hwm = 64 * (pba - 6);
2974 reg = E1000_READ_REG(hw, E1000_FCRTC);
2975 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2976 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2977 & E1000_FCRTC_RTH_COAL_MASK);
2978 E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2979
2980
2981 dmac = pba - adapter->max_frame_size / 512;
2982 if (dmac < pba - 10)
2983 dmac = pba - 10;
2984 reg = E1000_READ_REG(hw, E1000_DMACR);
2985 reg &= ~E1000_DMACR_DMACTHR_MASK;
2986 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
2987 & E1000_DMACR_DMACTHR_MASK);
2988
2989 /* transition to L0x or L1 if available..*/
2990 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2991
2992 /* Check if status is 2.5Gb backplane connection
2993 * before configuration of watchdog timer, which is
2994 * in msec values in 12.8usec intervals
2995 * watchdog timer= msec values in 32usec intervals
2996 * for non 2.5Gb connection
2997 */
2998 if (hw->mac.type == e1000_i354) {
2999 int status = E1000_READ_REG(hw, E1000_STATUS);
3000 if ((status & E1000_STATUS_2P5_SKU) &&
3001 (!(status & E1000_STATUS_2P5_SKU_OVER)))
3002 reg |= ((adapter->dmac * 5) >> 6);
3003 else
3004 reg |= (adapter->dmac >> 5);
3005 } else {
3006 reg |= (adapter->dmac >> 5);
3007 }
3008
3009 E1000_WRITE_REG(hw, E1000_DMACR, reg);
3010
3011#ifdef I210_OBFF_SUPPORT
3012 /*
3013 * Set the OBFF Rx threshold to DMA Coalescing Rx
3014 * threshold - 2KB and enable the feature in the
3015 * hardware for I210.
3016 */
3017 if (hw->mac.type == e1000_i210) {
3018 int obff = dmac - 2;
3019 reg = E1000_READ_REG(hw, E1000_DOBFFCTL);
3020 reg &= ~E1000_DOBFFCTL_OBFFTHR_MASK;
3021 reg |= (obff & E1000_DOBFFCTL_OBFFTHR_MASK)
3022 | E1000_DOBFFCTL_EXIT_ACT_MASK;
3023 E1000_WRITE_REG(hw, E1000_DOBFFCTL, reg);
3024 }
3025#endif
3026 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
3027
3028 /* Set the interval before transition */
3029 reg = E1000_READ_REG(hw, E1000_DMCTLX);
3030 if (hw->mac.type == e1000_i350)
3031 reg |= IGB_DMCTLX_DCFLUSH_DIS;
3032 /*
3033 ** in 2.5Gb connection, TTLX unit is 0.4 usec
3034 ** which is 0x4*2 = 0xA. But delay is still 4 usec
3035 */
3036 if (hw->mac.type == e1000_i354) {
3037 int status = E1000_READ_REG(hw, E1000_STATUS);
3038 if ((status & E1000_STATUS_2P5_SKU) &&
3039 (!(status & E1000_STATUS_2P5_SKU_OVER)))
3040 reg |= 0xA;
3041 else
3042 reg |= 0x4;
3043 } else {
3044 reg |= 0x4;
3045 }
3046
3047 E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
3048
3049 /* free space in tx packet buffer to wake from DMA coal */
3050 E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE -
3051 (2 * adapter->max_frame_size)) >> 6);
3052
3053 /* make low power state decision controlled by DMA coal */
3054 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3055 reg &= ~E1000_PCIEMISC_LX_DECISION;
3056 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
3057
3058 } else if (hw->mac.type == e1000_82580) {
3059 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3060 E1000_WRITE_REG(hw, E1000_PCIEMISC,
3061 reg & ~E1000_PCIEMISC_LX_DECISION);
3062 E1000_WRITE_REG(hw, E1000_DMACR, 0);
3063 }
3064}
3065
3066
3067/*********************************************************************
3068 *
3069 * Set up an fresh starting state
3070 *
3071 **********************************************************************/
3072static void
3073igb_reset(struct adapter *adapter)
3074{
3075 device_t dev = adapter->dev;
3076 struct e1000_hw *hw = &adapter->hw;
3077 struct e1000_fc_info *fc = &hw->fc;
3078 struct ifnet *ifp = adapter->ifp;
3079 u32 pba = 0;
3080 u16 hwm;
3081
3082 INIT_DEBUGOUT("igb_reset: begin");
3083
3084 /* Let the firmware know the OS is in control */
3085 igb_get_hw_control(adapter);
3086
3087 /*
3088 * Packet Buffer Allocation (PBA)
3089 * Writing PBA sets the receive portion of the buffer
3090 * the remainder is used for the transmit buffer.
3091 */
3092 switch (hw->mac.type) {
3093 case e1000_82575:
3094 pba = E1000_PBA_32K;
3095 break;
3096 case e1000_82576:
3097 case e1000_vfadapt:
3098 pba = E1000_READ_REG(hw, E1000_RXPBS);
3099 pba &= E1000_RXPBS_SIZE_MASK_82576;
3100 break;
3101 case e1000_82580:
3102 case e1000_i350:
3103 case e1000_i354:
3104 case e1000_vfadapt_i350:
3105 pba = E1000_READ_REG(hw, E1000_RXPBS);
3106 pba = e1000_rxpbs_adjust_82580(pba);
3107 break;
3108 case e1000_i210:
3109 case e1000_i211:
3110 pba = E1000_PBA_34K;
3111 default:
3112 break;
3113 }
3114
3115 /* Special needs in case of Jumbo frames */
3116 if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
3117 u32 tx_space, min_tx, min_rx;
3118 pba = E1000_READ_REG(hw, E1000_PBA);
3119 tx_space = pba >> 16;
3120 pba &= 0xffff;
3121 min_tx = (adapter->max_frame_size +
3122 sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
3123 min_tx = roundup2(min_tx, 1024);
3124 min_tx >>= 10;
3125 min_rx = adapter->max_frame_size;
3126 min_rx = roundup2(min_rx, 1024);
3127 min_rx >>= 10;
3128 if (tx_space < min_tx &&
3129 ((min_tx - tx_space) < pba)) {
3130 pba = pba - (min_tx - tx_space);
3131 /*
3132 * if short on rx space, rx wins
3133 * and must trump tx adjustment
3134 */
3135 if (pba < min_rx)
3136 pba = min_rx;
3137 }
3138 E1000_WRITE_REG(hw, E1000_PBA, pba);
3139 }
3140
3141 INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3142
3143 /*
3144 * These parameters control the automatic generation (Tx) and
3145 * response (Rx) to Ethernet PAUSE frames.
3146 * - High water mark should allow for at least two frames to be
3147 * received after sending an XOFF.
3148 * - Low water mark works best when it is very near the high water mark.
3149 * This allows the receiver to restart by sending XON when it has
3150 * drained a bit.
3151 */
3152 hwm = min(((pba << 10) * 9 / 10),
3153 ((pba << 10) - 2 * adapter->max_frame_size));
3154
3155 if (hw->mac.type < e1000_82576) {
3156 fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */
3157 fc->low_water = fc->high_water - 8;
3158 } else {
3159 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
3160 fc->low_water = fc->high_water - 16;
3161 }
3162
3163 fc->pause_time = IGB_FC_PAUSE_TIME;
3164 fc->send_xon = TRUE;
3165 if (adapter->fc)
3166 fc->requested_mode = adapter->fc;
3167 else
3168 fc->requested_mode = e1000_fc_default;
3169
3170 /* Issue a global reset */
3171 e1000_reset_hw(hw);
3172 E1000_WRITE_REG(hw, E1000_WUC, 0);
3173
3174 /* Reset for AutoMediaDetect */
3175 if (adapter->flags & IGB_MEDIA_RESET) {
3176 e1000_setup_init_funcs(hw, TRUE);
3177 e1000_get_bus_info(hw);
3178 adapter->flags &= ~IGB_MEDIA_RESET;
3179 }
3180
3181 if (e1000_init_hw(hw) < 0)
3182 device_printf(dev, "Hardware Initialization Failed\n");
3183
3184 /* Setup DMA Coalescing */
3185 igb_init_dmac(adapter, pba);
3186
3187 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3188 e1000_get_phy_info(hw);
3189 e1000_check_for_link(hw);
3190 return;
3191}
3192
3193/*********************************************************************
3194 *
3195 * Setup networking device structure and register an interface.
3196 *
3197 **********************************************************************/
3198static int
3199igb_setup_interface(device_t dev, struct adapter *adapter)
3200{
3201 struct ifnet *ifp;
3202
3203 INIT_DEBUGOUT("igb_setup_interface: begin");
3204
3205 ifp = adapter->ifp = if_alloc(IFT_ETHER);
3206 if (ifp == NULL) {
3207 device_printf(dev, "can not allocate ifnet structure\n");
3208 return (-1);
3209 }
3210 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3211 ifp->if_init = igb_init;
3212 ifp->if_softc = adapter;
3213 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3214 ifp->if_ioctl = igb_ioctl;
3215 ifp->if_get_counter = igb_get_counter;
3216#ifndef IGB_LEGACY_TX
3217 ifp->if_transmit = igb_mq_start;
3218 ifp->if_qflush = igb_qflush;
3219#else
3220 ifp->if_start = igb_start;
3221 IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3222 ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3223 IFQ_SET_READY(&ifp->if_snd);
3224#endif
3225
3226 ether_ifattach(ifp, adapter->hw.mac.addr);
3227
3228 ifp->if_capabilities = ifp->if_capenable = 0;
3229
3230 ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3231 ifp->if_capabilities |= IFCAP_TSO;
3232 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3233 ifp->if_capenable = ifp->if_capabilities;
3234
3235 /* Don't enable LRO by default */
3236 ifp->if_capabilities |= IFCAP_LRO;
3237
3238#ifdef DEVICE_POLLING
3239 ifp->if_capabilities |= IFCAP_POLLING;
3240#endif
3241
3242 /*
3243 * Tell the upper layer(s) we
3244 * support full VLAN capability.
3245 */
3246 ifp->if_hdrlen = sizeof(struct ether_vlan_header);
3247 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3248 | IFCAP_VLAN_HWTSO
3249 | IFCAP_VLAN_MTU;
3250 ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3251 | IFCAP_VLAN_HWTSO
3252 | IFCAP_VLAN_MTU;
3253
3254 /*
3255 ** Don't turn this on by default, if vlans are
3256 ** created on another pseudo device (eg. lagg)
3257 ** then vlan events are not passed thru, breaking
3258 ** operation, but with HW FILTER off it works. If
3259 ** using vlans directly on the igb driver you can
3260 ** enable this and get full hardware tag filtering.
3261 */
3262 ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3263
3264 /*
3265 * Specify the media types supported by this adapter and register
3266 * callbacks to update media and link information
3267 */
3268 ifmedia_init(&adapter->media, IFM_IMASK,
3269 igb_media_change, igb_media_status);
3270 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3271 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3272 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
3273 0, NULL);
3274 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3275 } else {
3276 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3277 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3278 0, NULL);
3279 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3280 0, NULL);
3281 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3282 0, NULL);
3283 if (adapter->hw.phy.type != e1000_phy_ife) {
3284 ifmedia_add(&adapter->media,
3285 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3286 ifmedia_add(&adapter->media,
3287 IFM_ETHER | IFM_1000_T, 0, NULL);
3288 }
3289 }
3290 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3291 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3292 return (0);
3293}
3294
3295
3296/*
3297 * Manage DMA'able memory.
3298 */
3299static void
3300igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3301{
3302 if (error)
3303 return;
3304 *(bus_addr_t *) arg = segs[0].ds_addr;
3305}
3306
3307static int
3308igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3309 struct igb_dma_alloc *dma, int mapflags)
3310{
3311 int error;
3312
3313 error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3314 IGB_DBA_ALIGN, 0, /* alignment, bounds */
3315 BUS_SPACE_MAXADDR, /* lowaddr */
3316 BUS_SPACE_MAXADDR, /* highaddr */
3317 NULL, NULL, /* filter, filterarg */
3318 size, /* maxsize */
3319 1, /* nsegments */
3320 size, /* maxsegsize */
3321 0, /* flags */
3322 NULL, /* lockfunc */
3323 NULL, /* lockarg */
3324 &dma->dma_tag);
3325 if (error) {
3326 device_printf(adapter->dev,
3327 "%s: bus_dma_tag_create failed: %d\n",
3328 __func__, error);
3329 goto fail_0;
3330 }
3331
3332 error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3333 BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3334 if (error) {
3335 device_printf(adapter->dev,
3336 "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3337 __func__, (uintmax_t)size, error);
3338 goto fail_2;
3339 }
3340
3341 dma->dma_paddr = 0;
3342 error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3343 size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3344 if (error || dma->dma_paddr == 0) {
3345 device_printf(adapter->dev,
3346 "%s: bus_dmamap_load failed: %d\n",
3347 __func__, error);
3348 goto fail_3;
3349 }
3350
3351 return (0);
3352
3353fail_3:
3354 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3355fail_2:
3356 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3357 bus_dma_tag_destroy(dma->dma_tag);
3358fail_0:
3359 dma->dma_tag = NULL;
3360
3361 return (error);
3362}
3363
3364static void
3365igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3366{
3367 if (dma->dma_tag == NULL)
3368 return;
3369 if (dma->dma_paddr != 0) {
3370 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3371 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3372 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3373 dma->dma_paddr = 0;
3374 }
3375 if (dma->dma_vaddr != NULL) {
3376 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3377 dma->dma_vaddr = NULL;
3378 }
3379 bus_dma_tag_destroy(dma->dma_tag);
3380 dma->dma_tag = NULL;
3381}
3382
3383
3384/*********************************************************************
3385 *
3386 * Allocate memory for the transmit and receive rings, and then
3387 * the descriptors associated with each, called only once at attach.
3388 *
3389 **********************************************************************/
3390static int
3391igb_allocate_queues(struct adapter *adapter)
3392{
3393 device_t dev = adapter->dev;
3394 struct igb_queue *que = NULL;
3395 struct tx_ring *txr = NULL;
3396 struct rx_ring *rxr = NULL;
3397 int rsize, tsize, error = E1000_SUCCESS;
3398 int txconf = 0, rxconf = 0;
3399
3400 /* First allocate the top level queue structs */
3401 if (!(adapter->queues =
3402 (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3403 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3404 device_printf(dev, "Unable to allocate queue memory\n");
3405 error = ENOMEM;
3406 goto fail;
3407 }
3408
3409 /* Next allocate the TX ring struct memory */
3410 if (!(adapter->tx_rings =
3411 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3412 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3413 device_printf(dev, "Unable to allocate TX ring memory\n");
3414 error = ENOMEM;
3415 goto tx_fail;
3416 }
3417
3418 /* Now allocate the RX */
3419 if (!(adapter->rx_rings =
3420 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3421 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3422 device_printf(dev, "Unable to allocate RX ring memory\n");
3423 error = ENOMEM;
3424 goto rx_fail;
3425 }
3426
3427 tsize = roundup2(adapter->num_tx_desc *
3428 sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3429 /*
3430 * Now set up the TX queues, txconf is needed to handle the
3431 * possibility that things fail midcourse and we need to
3432 * undo memory gracefully
3433 */
3434 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3435 /* Set up some basics */
3436 txr = &adapter->tx_rings[i];
3437 txr->adapter = adapter;
3438 txr->me = i;
3439 txr->num_desc = adapter->num_tx_desc;
3440
3441 /* Initialize the TX lock */
3442 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3443 device_get_nameunit(dev), txr->me);
3444 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3445
3446 if (igb_dma_malloc(adapter, tsize,
3447 &txr->txdma, BUS_DMA_NOWAIT)) {
3448 device_printf(dev,
3449 "Unable to allocate TX Descriptor memory\n");
3450 error = ENOMEM;
3451 goto err_tx_desc;
3452 }
3453 txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr;
3454 bzero((void *)txr->tx_base, tsize);
3455
3456 /* Now allocate transmit buffers for the ring */
3457 if (igb_allocate_transmit_buffers(txr)) {
3458 device_printf(dev,
3459 "Critical Failure setting up transmit buffers\n");
3460 error = ENOMEM;
3461 goto err_tx_desc;
3462 }
3463#ifndef IGB_LEGACY_TX
3464 /* Allocate a buf ring */
3465 txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3466 M_WAITOK, &txr->tx_mtx);
3467#endif
3468 }
3469
3470 /*
3471 * Next the RX queues...
3472 */
3473 rsize = roundup2(adapter->num_rx_desc *
3474 sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3475 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3476 rxr = &adapter->rx_rings[i];
3477 rxr->adapter = adapter;
3478 rxr->me = i;
3479
3480 /* Initialize the RX lock */
3481 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3482 device_get_nameunit(dev), txr->me);
3483 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3484
3485 if (igb_dma_malloc(adapter, rsize,
3486 &rxr->rxdma, BUS_DMA_NOWAIT)) {
3487 device_printf(dev,
3488 "Unable to allocate RxDescriptor memory\n");
3489 error = ENOMEM;
3490 goto err_rx_desc;
3491 }
3492 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3493 bzero((void *)rxr->rx_base, rsize);
3494
3495 /* Allocate receive buffers for the ring*/
3496 if (igb_allocate_receive_buffers(rxr)) {
3497 device_printf(dev,
3498 "Critical Failure setting up receive buffers\n");
3499 error = ENOMEM;
3500 goto err_rx_desc;
3501 }
3502 }
3503
3504 /*
3505 ** Finally set up the queue holding structs
3506 */
3507 for (int i = 0; i < adapter->num_queues; i++) {
3508 que = &adapter->queues[i];
3509 que->adapter = adapter;
3510 que->txr = &adapter->tx_rings[i];
3511 que->rxr = &adapter->rx_rings[i];
3512 }
3513
3514 return (0);
3515
3516err_rx_desc:
3517 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3518 igb_dma_free(adapter, &rxr->rxdma);
3519err_tx_desc:
3520 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3521 igb_dma_free(adapter, &txr->txdma);
3522 free(adapter->rx_rings, M_DEVBUF);
3523rx_fail:
3524#ifndef IGB_LEGACY_TX
3525 buf_ring_free(txr->br, M_DEVBUF);
3526#endif
3527 free(adapter->tx_rings, M_DEVBUF);
3528tx_fail:
3529 free(adapter->queues, M_DEVBUF);
3530fail:
3531 return (error);
3532}
3533
3534/*********************************************************************
3535 *
3536 * Allocate memory for tx_buffer structures. The tx_buffer stores all
3537 * the information needed to transmit a packet on the wire. This is
3538 * called only once at attach, setup is done every reset.
3539 *
3540 **********************************************************************/
3541static int
3542igb_allocate_transmit_buffers(struct tx_ring *txr)
3543{
3544 struct adapter *adapter = txr->adapter;
3545 device_t dev = adapter->dev;
3546 struct igb_tx_buf *txbuf;
3547 int error, i;
3548
3549 /*
3550 * Setup DMA descriptor areas.
3551 */
3552 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3553 1, 0, /* alignment, bounds */
3554 BUS_SPACE_MAXADDR, /* lowaddr */
3555 BUS_SPACE_MAXADDR, /* highaddr */
3556 NULL, NULL, /* filter, filterarg */
3557 IGB_TSO_SIZE, /* maxsize */
3558 IGB_MAX_SCATTER, /* nsegments */
3559 PAGE_SIZE, /* maxsegsize */
3560 0, /* flags */
3561 NULL, /* lockfunc */
3562 NULL, /* lockfuncarg */
3563 &txr->txtag))) {
3564 device_printf(dev,"Unable to allocate TX DMA tag\n");
3565 goto fail;
3566 }
3567
3568 if (!(txr->tx_buffers =
3569 (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) *
3570 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3571 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3572 error = ENOMEM;
3573 goto fail;
3574 }
3575
3576 /* Create the descriptor buffer dma maps */
3577 txbuf = txr->tx_buffers;
3578 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3579 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3580 if (error != 0) {
3581 device_printf(dev, "Unable to create TX DMA map\n");
3582 goto fail;
3583 }
3584 }
3585
3586 return 0;
3587fail:
3588 /* We free all, it handles case where we are in the middle */
3589 igb_free_transmit_structures(adapter);
3590 return (error);
3591}
3592
3593/*********************************************************************
3594 *
3595 * Initialize a transmit ring.
3596 *
3597 **********************************************************************/
3598static void
3599igb_setup_transmit_ring(struct tx_ring *txr)
3600{
3601 struct adapter *adapter = txr->adapter;
3602 struct igb_tx_buf *txbuf;
3603 int i;
3604#ifdef DEV_NETMAP
3605 struct netmap_adapter *na = NA(adapter->ifp);
3606 struct netmap_slot *slot;
3607#endif /* DEV_NETMAP */
3608
3609 /* Clear the old descriptor contents */
3610 IGB_TX_LOCK(txr);
3611#ifdef DEV_NETMAP
3612 slot = netmap_reset(na, NR_TX, txr->me, 0);
3613#endif /* DEV_NETMAP */
3614 bzero((void *)txr->tx_base,
3615 (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3616 /* Reset indices */
3617 txr->next_avail_desc = 0;
3618 txr->next_to_clean = 0;
3619
3620 /* Free any existing tx buffers. */
3621 txbuf = txr->tx_buffers;
3622 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3623 if (txbuf->m_head != NULL) {
3624 bus_dmamap_sync(txr->txtag, txbuf->map,
3625 BUS_DMASYNC_POSTWRITE);
3626 bus_dmamap_unload(txr->txtag, txbuf->map);
3627 m_freem(txbuf->m_head);
3628 txbuf->m_head = NULL;
3629 }
3630#ifdef DEV_NETMAP
3631 if (slot) {
3632 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3633 /* no need to set the address */
3634 netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
3635 }
3636#endif /* DEV_NETMAP */
3637 /* clear the watch index */
3638 txbuf->eop = NULL;
3639 }
3640
3641 /* Set number of descriptors available */
3642 txr->tx_avail = adapter->num_tx_desc;
3643
3644 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3645 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3646 IGB_TX_UNLOCK(txr);
3647}
3648
3649/*********************************************************************
3650 *
3651 * Initialize all transmit rings.
3652 *
3653 **********************************************************************/
3654static void
3655igb_setup_transmit_structures(struct adapter *adapter)
3656{
3657 struct tx_ring *txr = adapter->tx_rings;
3658
3659 for (int i = 0; i < adapter->num_queues; i++, txr++)
3660 igb_setup_transmit_ring(txr);
3661
3662 return;
3663}
3664
3665/*********************************************************************
3666 *
3667 * Enable transmit unit.
3668 *
3669 **********************************************************************/
3670static void
3671igb_initialize_transmit_units(struct adapter *adapter)
3672{
3673 struct tx_ring *txr = adapter->tx_rings;
3674 struct e1000_hw *hw = &adapter->hw;
3675 u32 tctl, txdctl;
3676
3677 INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3678 tctl = txdctl = 0;
3679
3680 /* Setup the Tx Descriptor Rings */
3681 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3682 u64 bus_addr = txr->txdma.dma_paddr;
3683
3684 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3685 adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3686 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3687 (uint32_t)(bus_addr >> 32));
3688 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3689 (uint32_t)bus_addr);
3690
3691 /* Setup the HW Tx Head and Tail descriptor pointers */
3692 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3693 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3694
3695 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3696 E1000_READ_REG(hw, E1000_TDBAL(i)),
3697 E1000_READ_REG(hw, E1000_TDLEN(i)));
3698
3699 txr->queue_status = IGB_QUEUE_IDLE;
3700
3701 txdctl |= IGB_TX_PTHRESH;
3702 txdctl |= IGB_TX_HTHRESH << 8;
3703 txdctl |= IGB_TX_WTHRESH << 16;
3704 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3705 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3706 }
3707
3708 if (adapter->vf_ifp)
3709 return;
3710
3711 e1000_config_collision_dist(hw);
3712
3713 /* Program the Transmit Control Register */
3714 tctl = E1000_READ_REG(hw, E1000_TCTL);
3715 tctl &= ~E1000_TCTL_CT;
3716 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3717 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3718
3719 /* This write will effectively turn on the transmit unit. */
3720 E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3721}
3722
3723/*********************************************************************
3724 *
3725 * Free all transmit rings.
3726 *
3727 **********************************************************************/
3728static void
3729igb_free_transmit_structures(struct adapter *adapter)
3730{
3731 struct tx_ring *txr = adapter->tx_rings;
3732
3733 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3734 IGB_TX_LOCK(txr);
3735 igb_free_transmit_buffers(txr);
3736 igb_dma_free(adapter, &txr->txdma);
3737 IGB_TX_UNLOCK(txr);
3738 IGB_TX_LOCK_DESTROY(txr);
3739 }
3740 free(adapter->tx_rings, M_DEVBUF);
3741}
3742
3743/*********************************************************************
3744 *
3745 * Free transmit ring related data structures.
3746 *
3747 **********************************************************************/
3748static void
3749igb_free_transmit_buffers(struct tx_ring *txr)
3750{
3751 struct adapter *adapter = txr->adapter;
3752 struct igb_tx_buf *tx_buffer;
3753 int i;
3754
3755 INIT_DEBUGOUT("free_transmit_ring: begin");
3756
3757 if (txr->tx_buffers == NULL)
3758 return;
3759
3760 tx_buffer = txr->tx_buffers;
3761 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3762 if (tx_buffer->m_head != NULL) {
3763 bus_dmamap_sync(txr->txtag, tx_buffer->map,
3764 BUS_DMASYNC_POSTWRITE);
3765 bus_dmamap_unload(txr->txtag,
3766 tx_buffer->map);
3767 m_freem(tx_buffer->m_head);
3768 tx_buffer->m_head = NULL;
3769 if (tx_buffer->map != NULL) {
3770 bus_dmamap_destroy(txr->txtag,
3771 tx_buffer->map);
3772 tx_buffer->map = NULL;
3773 }
3774 } else if (tx_buffer->map != NULL) {
3775 bus_dmamap_unload(txr->txtag,
3776 tx_buffer->map);
3777 bus_dmamap_destroy(txr->txtag,
3778 tx_buffer->map);
3779 tx_buffer->map = NULL;
3780 }
3781 }
3782#ifndef IGB_LEGACY_TX
3783 if (txr->br != NULL)
3784 buf_ring_free(txr->br, M_DEVBUF);
3785#endif
3786 if (txr->tx_buffers != NULL) {
3787 free(txr->tx_buffers, M_DEVBUF);
3788 txr->tx_buffers = NULL;
3789 }
3790 if (txr->txtag != NULL) {
3791 bus_dma_tag_destroy(txr->txtag);
3792 txr->txtag = NULL;
3793 }
3794 return;
3795}
3796
3797/**********************************************************************
3798 *
3799 * Setup work for hardware segmentation offload (TSO) on
3800 * adapters using advanced tx descriptors
3801 *
3802 **********************************************************************/
3803static int
3804igb_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3805 u32 *cmd_type_len, u32 *olinfo_status)
3806{
3807 struct adapter *adapter = txr->adapter;
3808 struct e1000_adv_tx_context_desc *TXD;
3809 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3810 u32 mss_l4len_idx = 0, paylen;
3811 u16 vtag = 0, eh_type;
3812 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3813 struct ether_vlan_header *eh;
3814#ifdef INET6
3815 struct ip6_hdr *ip6;
3816#endif
3817#ifdef INET
3818 struct ip *ip;
3819#endif
3820 struct tcphdr *th;
3821
3822
3823 /*
3824 * Determine where frame payload starts.
3825 * Jump over vlan headers if already present
3826 */
3827 eh = mtod(mp, struct ether_vlan_header *);
3828 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3829 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3830 eh_type = eh->evl_proto;
3831 } else {
3832 ehdrlen = ETHER_HDR_LEN;
3833 eh_type = eh->evl_encap_proto;
3834 }
3835
3836 switch (ntohs(eh_type)) {
3837#ifdef INET6
3838 case ETHERTYPE_IPV6:
3839 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3840 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3841 if (ip6->ip6_nxt != IPPROTO_TCP)
3842 return (ENXIO);
3843 ip_hlen = sizeof(struct ip6_hdr);
3844 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3845 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3846 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3847 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3848 break;
3849#endif
3850#ifdef INET
3851 case ETHERTYPE_IP:
3852 ip = (struct ip *)(mp->m_data + ehdrlen);
3853 if (ip->ip_p != IPPROTO_TCP)
3854 return (ENXIO);
3855 ip->ip_sum = 0;
3856 ip_hlen = ip->ip_hl << 2;
3857 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3858 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3859 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3860 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3861 /* Tell transmit desc to also do IPv4 checksum. */
3862 *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3863 break;
3864#endif
3865 default:
3866 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3867 __func__, ntohs(eh_type));
3868 break;
3869 }
3870
3871 ctxd = txr->next_avail_desc;
3872 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3873
3874 tcp_hlen = th->th_off << 2;
3875
3876 /* This is used in the transmit desc in encap */
3877 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3878
3879 /* VLAN MACLEN IPLEN */
3880 if (mp->m_flags & M_VLANTAG) {
3881 vtag = htole16(mp->m_pkthdr.ether_vtag);
3882 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3883 }
3884
3885 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3886 vlan_macip_lens |= ip_hlen;
3887 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3888
3889 /* ADV DTYPE TUCMD */
3890 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3891 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3892 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3893
3894 /* MSS L4LEN IDX */
3895 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3896 mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3897 /* 82575 needs the queue index added */
3898 if (adapter->hw.mac.type == e1000_82575)
3899 mss_l4len_idx |= txr->me << 4;
3900 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3901
3902 TXD->seqnum_seed = htole32(0);
3903
3904 if (++ctxd == txr->num_desc)
3905 ctxd = 0;
3906
3907 txr->tx_avail--;
3908 txr->next_avail_desc = ctxd;
3909 *cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3910 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3911 *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
3912 ++txr->tso_tx;
3913 return (0);
3914}
3915
3916/*********************************************************************
3917 *
3918 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
3919 *
3920 **********************************************************************/
3921
3922static int
3923igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3924 u32 *cmd_type_len, u32 *olinfo_status)
3925{
3926 struct e1000_adv_tx_context_desc *TXD;
3927 struct adapter *adapter = txr->adapter;
3928 struct ether_vlan_header *eh;
3929 struct ip *ip;
3930 struct ip6_hdr *ip6;
3931 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
3932 int ehdrlen, ip_hlen = 0;
3933 u16 etype;
3934 u8 ipproto = 0;
3935 int offload = TRUE;
3936 int ctxd = txr->next_avail_desc;
3937 u16 vtag = 0;
3938
3939 /* First check if TSO is to be used */
3940 if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3941 return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3942
3943 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3944 offload = FALSE;
3945
3946 /* Indicate the whole packet as payload when not doing TSO */
3947 *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
3948
3949 /* Now ready a context descriptor */
3950 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3951
3952 /*
3953 ** In advanced descriptors the vlan tag must
3954 ** be placed into the context descriptor. Hence
3955 ** we need to make one even if not doing offloads.
3956 */
3957 if (mp->m_flags & M_VLANTAG) {
3958 vtag = htole16(mp->m_pkthdr.ether_vtag);
3959 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3960 } else if (offload == FALSE) /* ... no offload to do */
3961 return (0);
3962
3963 /*
3964 * Determine where frame payload starts.
3965 * Jump over vlan headers if already present,
3966 * helpful for QinQ too.
3967 */
3968 eh = mtod(mp, struct ether_vlan_header *);
3969 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3970 etype = ntohs(eh->evl_proto);
3971 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3972 } else {
3973 etype = ntohs(eh->evl_encap_proto);
3974 ehdrlen = ETHER_HDR_LEN;
3975 }
3976
3977 /* Set the ether header length */
3978 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3979
3980 switch (etype) {
3981 case ETHERTYPE_IP:
3982 ip = (struct ip *)(mp->m_data + ehdrlen);
3983 ip_hlen = ip->ip_hl << 2;
3984 ipproto = ip->ip_p;
3985 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3986 break;
3987 case ETHERTYPE_IPV6:
3988 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3989 ip_hlen = sizeof(struct ip6_hdr);
3990 /* XXX-BZ this will go badly in case of ext hdrs. */
3991 ipproto = ip6->ip6_nxt;
3992 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3993 break;
3994 default:
3995 offload = FALSE;
3996 break;
3997 }
3998
3999 vlan_macip_lens |= ip_hlen;
4000 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4001
4002 switch (ipproto) {
4003 case IPPROTO_TCP:
4004 if (mp->m_pkthdr.csum_flags & CSUM_TCP)
4005 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
4006 break;
4007 case IPPROTO_UDP:
4008 if (mp->m_pkthdr.csum_flags & CSUM_UDP)
4009 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
4010 break;
4011
4012#if __FreeBSD_version >= 800000
4013 case IPPROTO_SCTP:
4014 if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
4015 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4016 break;
4017#endif
4018 default:
4019 offload = FALSE;
4020 break;
4021 }
4022
4023 if (offload) /* For the TX descriptor setup */
4024 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4025
4026 /* 82575 needs the queue index added */
4027 if (adapter->hw.mac.type == e1000_82575)
4028 mss_l4len_idx = txr->me << 4;
4029
4030 /* Now copy bits into descriptor */
4031 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
4032 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
4033 TXD->seqnum_seed = htole32(0);
4034 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
4035
4036 /* We've consumed the first desc, adjust counters */
4037 if (++ctxd == txr->num_desc)
4038 ctxd = 0;
4039 txr->next_avail_desc = ctxd;
4040 --txr->tx_avail;
4041
4042 return (0);
4043}
4044
4045/**********************************************************************
4046 *
4047 * Examine each tx_buffer in the used queue. If the hardware is done
4048 * processing the packet then free associated resources. The
4049 * tx_buffer is put back on the free queue.
4050 *
4051 * TRUE return means there's work in the ring to clean, FALSE its empty.
4052 **********************************************************************/
4053static bool
4054igb_txeof(struct tx_ring *txr)
4055{
4056 struct adapter *adapter = txr->adapter;
4057#ifdef DEV_NETMAP
4058 struct ifnet *ifp = adapter->ifp;
4059#endif /* DEV_NETMAP */
4060 u32 work, processed = 0;
4061 u16 limit = txr->process_limit;
4062 struct igb_tx_buf *buf;
4063 union e1000_adv_tx_desc *txd;
4064
4065 mtx_assert(&txr->tx_mtx, MA_OWNED);
4066
4067#ifdef DEV_NETMAP
4068 if (netmap_tx_irq(ifp, txr->me))
4069 return (FALSE);
4070#endif /* DEV_NETMAP */
4071
4072 if (txr->tx_avail == txr->num_desc) {
4073 txr->queue_status = IGB_QUEUE_IDLE;
4074 return FALSE;
4075 }
4076
4077 /* Get work starting point */
4078 work = txr->next_to_clean;
4079 buf = &txr->tx_buffers[work];
4080 txd = &txr->tx_base[work];
4081 work -= txr->num_desc; /* The distance to ring end */
4082 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4083 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4084 do {
4085 union e1000_adv_tx_desc *eop = buf->eop;
4086 if (eop == NULL) /* No work */
4087 break;
4088
4089 if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
4090 break; /* I/O not complete */
4091
4092 if (buf->m_head) {
4093 txr->bytes +=
4094 buf->m_head->m_pkthdr.len;
4095 bus_dmamap_sync(txr->txtag,
4096 buf->map,
4097 BUS_DMASYNC_POSTWRITE);
4098 bus_dmamap_unload(txr->txtag,
4099 buf->map);
4100 m_freem(buf->m_head);
4101 buf->m_head = NULL;
4102 }
4103 buf->eop = NULL;
4104 ++txr->tx_avail;
4105
4106 /* We clean the range if multi segment */
4107 while (txd != eop) {
4108 ++txd;
4109 ++buf;
4110 ++work;
4111 /* wrap the ring? */
4112 if (__predict_false(!work)) {
4113 work -= txr->num_desc;
4114 buf = txr->tx_buffers;
4115 txd = txr->tx_base;
4116 }
4117 if (buf->m_head) {
4118 txr->bytes +=
4119 buf->m_head->m_pkthdr.len;
4120 bus_dmamap_sync(txr->txtag,
4121 buf->map,
4122 BUS_DMASYNC_POSTWRITE);
4123 bus_dmamap_unload(txr->txtag,
4124 buf->map);
4125 m_freem(buf->m_head);
4126 buf->m_head = NULL;
4127 }
4128 ++txr->tx_avail;
4129 buf->eop = NULL;
4130
4131 }
4132 ++txr->packets;
4133 ++processed;
4134 txr->watchdog_time = ticks;
4135
4136 /* Try the next packet */
4137 ++txd;
4138 ++buf;
4139 ++work;
4140 /* reset with a wrap */
4141 if (__predict_false(!work)) {
4142 work -= txr->num_desc;
4143 buf = txr->tx_buffers;
4144 txd = txr->tx_base;
4145 }
4146 prefetch(txd);
4147 } while (__predict_true(--limit));
4148
4149 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4150 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4151
4152 work += txr->num_desc;
4153 txr->next_to_clean = work;
4154
4155 /*
4156 ** Watchdog calculation, we know there's
4157 ** work outstanding or the first return
4158 ** would have been taken, so none processed
4159 ** for too long indicates a hang.
4160 */
4161 if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
4162 txr->queue_status |= IGB_QUEUE_HUNG;
4163
4164 if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
4165 txr->queue_status &= ~IGB_QUEUE_DEPLETED;
4166
4167 if (txr->tx_avail == txr->num_desc) {
4168 txr->queue_status = IGB_QUEUE_IDLE;
4169 return (FALSE);
4170 }
4171
4172 return (TRUE);
4173}
4174
4175/*********************************************************************
4176 *
4177 * Refresh mbuf buffers for RX descriptor rings
4178 * - now keeps its own state so discards due to resource
4179 * exhaustion are unnecessary, if an mbuf cannot be obtained
4180 * it just returns, keeping its placeholder, thus it can simply
4181 * be recalled to try again.
4182 *
4183 **********************************************************************/
4184static void
4185igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4186{
4187 struct adapter *adapter = rxr->adapter;
4188 bus_dma_segment_t hseg[1];
4189 bus_dma_segment_t pseg[1];
4190 struct igb_rx_buf *rxbuf;
4191 struct mbuf *mh, *mp;
4192 int i, j, nsegs, error;
4193 bool refreshed = FALSE;
4194
4195 i = j = rxr->next_to_refresh;
4196 /*
4197 ** Get one descriptor beyond
4198 ** our work mark to control
4199 ** the loop.
4200 */
4201 if (++j == adapter->num_rx_desc)
4202 j = 0;
4203
4204 while (j != limit) {
4205 rxbuf = &rxr->rx_buffers[i];
4206 /* No hdr mbuf used with header split off */
4207 if (rxr->hdr_split == FALSE)
4208 goto no_split;
4209 if (rxbuf->m_head == NULL) {
4210 mh = m_gethdr(M_NOWAIT, MT_DATA);
4211 if (mh == NULL)
4212 goto update;
4213 } else
4214 mh = rxbuf->m_head;
4215
4216 mh->m_pkthdr.len = mh->m_len = MHLEN;
4217 mh->m_len = MHLEN;
4218 mh->m_flags |= M_PKTHDR;
4219 /* Get the memory mapping */
4220 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4221 rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4222 if (error != 0) {
4223 printf("Refresh mbufs: hdr dmamap load"
4224 " failure - %d\n", error);
4225 m_free(mh);
4226 rxbuf->m_head = NULL;
4227 goto update;
4228 }
4229 rxbuf->m_head = mh;
4230 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4231 BUS_DMASYNC_PREREAD);
4232 rxr->rx_base[i].read.hdr_addr =
4233 htole64(hseg[0].ds_addr);
4234no_split:
4235 if (rxbuf->m_pack == NULL) {
4236 mp = m_getjcl(M_NOWAIT, MT_DATA,
4237 M_PKTHDR, adapter->rx_mbuf_sz);
4238 if (mp == NULL)
4239 goto update;
4240 } else
4241 mp = rxbuf->m_pack;
4242
4243 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4244 /* Get the memory mapping */
4245 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4246 rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4247 if (error != 0) {
4248 printf("Refresh mbufs: payload dmamap load"
4249 " failure - %d\n", error);
4250 m_free(mp);
4251 rxbuf->m_pack = NULL;
4252 goto update;
4253 }
4254 rxbuf->m_pack = mp;
4255 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4256 BUS_DMASYNC_PREREAD);
4257 rxr->rx_base[i].read.pkt_addr =
4258 htole64(pseg[0].ds_addr);
4259 refreshed = TRUE; /* I feel wefreshed :) */
4260
4261 i = j; /* our next is precalculated */
4262 rxr->next_to_refresh = i;
4263 if (++j == adapter->num_rx_desc)
4264 j = 0;
4265 }
4266update:
4267 if (refreshed) /* update tail */
4268 E1000_WRITE_REG(&adapter->hw,
4269 E1000_RDT(rxr->me), rxr->next_to_refresh);
4270 return;
4271}
4272
4273
4274/*********************************************************************
4275 *
4276 * Allocate memory for rx_buffer structures. Since we use one
4277 * rx_buffer per received packet, the maximum number of rx_buffer's
4278 * that we'll need is equal to the number of receive descriptors
4279 * that we've allocated.
4280 *
4281 **********************************************************************/
4282static int
4283igb_allocate_receive_buffers(struct rx_ring *rxr)
4284{
4285 struct adapter *adapter = rxr->adapter;
4286 device_t dev = adapter->dev;
4287 struct igb_rx_buf *rxbuf;
4288 int i, bsize, error;
4289
4290 bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4291 if (!(rxr->rx_buffers =
4292 (struct igb_rx_buf *) malloc(bsize,
4293 M_DEVBUF, M_NOWAIT | M_ZERO))) {
4294 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4295 error = ENOMEM;
4296 goto fail;
4297 }
4298
4299 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4300 1, 0, /* alignment, bounds */
4301 BUS_SPACE_MAXADDR, /* lowaddr */
4302 BUS_SPACE_MAXADDR, /* highaddr */
4303 NULL, NULL, /* filter, filterarg */
4304 MSIZE, /* maxsize */
4305 1, /* nsegments */
4306 MSIZE, /* maxsegsize */
4307 0, /* flags */
4308 NULL, /* lockfunc */
4309 NULL, /* lockfuncarg */
4310 &rxr->htag))) {
4311 device_printf(dev, "Unable to create RX DMA tag\n");
4312 goto fail;
4313 }
4314
4315 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4316 1, 0, /* alignment, bounds */
4317 BUS_SPACE_MAXADDR, /* lowaddr */
4318 BUS_SPACE_MAXADDR, /* highaddr */
4319 NULL, NULL, /* filter, filterarg */
4320 MJUM9BYTES, /* maxsize */
4321 1, /* nsegments */
4322 MJUM9BYTES, /* maxsegsize */
4323 0, /* flags */
4324 NULL, /* lockfunc */
4325 NULL, /* lockfuncarg */
4326 &rxr->ptag))) {
4327 device_printf(dev, "Unable to create RX payload DMA tag\n");
4328 goto fail;
4329 }
4330
4331 for (i = 0; i < adapter->num_rx_desc; i++) {
4332 rxbuf = &rxr->rx_buffers[i];
4333 error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap);
4334 if (error) {
4335 device_printf(dev,
4336 "Unable to create RX head DMA maps\n");
4337 goto fail;
4338 }
4339 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
4340 if (error) {
4341 device_printf(dev,
4342 "Unable to create RX packet DMA maps\n");
4343 goto fail;
4344 }
4345 }
4346
4347 return (0);
4348
4349fail:
4350 /* Frees all, but can handle partial completion */
4351 igb_free_receive_structures(adapter);
4352 return (error);
4353}
4354
4355
4356static void
4357igb_free_receive_ring(struct rx_ring *rxr)
4358{
4359 struct adapter *adapter = rxr->adapter;
4360 struct igb_rx_buf *rxbuf;
4361
4362
4363 for (int i = 0; i < adapter->num_rx_desc; i++) {
4364 rxbuf = &rxr->rx_buffers[i];
4365 if (rxbuf->m_head != NULL) {
4366 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4367 BUS_DMASYNC_POSTREAD);
4368 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4369 rxbuf->m_head->m_flags |= M_PKTHDR;
4370 m_freem(rxbuf->m_head);
4371 }
4372 if (rxbuf->m_pack != NULL) {
4373 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4374 BUS_DMASYNC_POSTREAD);
4375 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4376 rxbuf->m_pack->m_flags |= M_PKTHDR;
4377 m_freem(rxbuf->m_pack);
4378 }
4379 rxbuf->m_head = NULL;
4380 rxbuf->m_pack = NULL;
4381 }
4382}
4383
4384
4385/*********************************************************************
4386 *
4387 * Initialize a receive ring and its buffers.
4388 *
4389 **********************************************************************/
4390static int
4391igb_setup_receive_ring(struct rx_ring *rxr)
4392{
4393 struct adapter *adapter;
4394 struct ifnet *ifp;
4395 device_t dev;
4396 struct igb_rx_buf *rxbuf;
4397 bus_dma_segment_t pseg[1], hseg[1];
4398 struct lro_ctrl *lro = &rxr->lro;
4399 int rsize, nsegs, error = 0;
4400#ifdef DEV_NETMAP
4401 struct netmap_adapter *na = NA(rxr->adapter->ifp);
4402 struct netmap_slot *slot;
4403#endif /* DEV_NETMAP */
4404
4405 adapter = rxr->adapter;
4406 dev = adapter->dev;
4407 ifp = adapter->ifp;
4408
4409 /* Clear the ring contents */
4410 IGB_RX_LOCK(rxr);
4411#ifdef DEV_NETMAP
4412 slot = netmap_reset(na, NR_RX, rxr->me, 0);
4413#endif /* DEV_NETMAP */
4414 rsize = roundup2(adapter->num_rx_desc *
4415 sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4416 bzero((void *)rxr->rx_base, rsize);
4417
4418 /*
4419 ** Free current RX buffer structures and their mbufs
4420 */
4421 igb_free_receive_ring(rxr);
4422
4423 /* Configure for header split? */
4424 if (igb_header_split)
4425 rxr->hdr_split = TRUE;
4426
4427 /* Now replenish the ring mbufs */
4428 for (int j = 0; j < adapter->num_rx_desc; ++j) {
4429 struct mbuf *mh, *mp;
4430
4431 rxbuf = &rxr->rx_buffers[j];
4432#ifdef DEV_NETMAP
4433 if (slot) {
4434 /* slot sj is mapped to the j-th NIC-ring entry */
4435 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4436 uint64_t paddr;
4437 void *addr;
4438
4439 addr = PNMB(na, slot + sj, &paddr);
4440 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
4441 /* Update descriptor */
4442 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4443 continue;
4444 }
4445#endif /* DEV_NETMAP */
4446 if (rxr->hdr_split == FALSE)
4447 goto skip_head;
4448
4449 /* First the header */
4450 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4451 if (rxbuf->m_head == NULL) {
4452 error = ENOBUFS;
4453 goto fail;
4454 }
4455 m_adj(rxbuf->m_head, ETHER_ALIGN);
4456 mh = rxbuf->m_head;
4457 mh->m_len = mh->m_pkthdr.len = MHLEN;
4458 mh->m_flags |= M_PKTHDR;
4459 /* Get the memory mapping */
4460 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4461 rxbuf->hmap, rxbuf->m_head, hseg,
4462 &nsegs, BUS_DMA_NOWAIT);
4463 if (error != 0) /* Nothing elegant to do here */
4464 goto fail;
4465 bus_dmamap_sync(rxr->htag,
4466 rxbuf->hmap, BUS_DMASYNC_PREREAD);
4467 /* Update descriptor */
4468 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4469
4470skip_head:
4471 /* Now the payload cluster */
4472 rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4473 M_PKTHDR, adapter->rx_mbuf_sz);
4474 if (rxbuf->m_pack == NULL) {
4475 error = ENOBUFS;
4476 goto fail;
4477 }
4478 mp = rxbuf->m_pack;
4479 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4480 /* Get the memory mapping */
4481 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4482 rxbuf->pmap, mp, pseg,
4483 &nsegs, BUS_DMA_NOWAIT);
4484 if (error != 0)
4485 goto fail;
4486 bus_dmamap_sync(rxr->ptag,
4487 rxbuf->pmap, BUS_DMASYNC_PREREAD);
4488 /* Update descriptor */
4489 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4490 }
4491
4492 /* Setup our descriptor indices */
4493 rxr->next_to_check = 0;
4494 rxr->next_to_refresh = adapter->num_rx_desc - 1;
4495 rxr->lro_enabled = FALSE;
4496 rxr->rx_split_packets = 0;
4497 rxr->rx_bytes = 0;
4498
4499 rxr->fmp = NULL;
4500 rxr->lmp = NULL;
4501
4502 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4503 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4504
4505 /*
4506 ** Now set up the LRO interface, we
4507 ** also only do head split when LRO
4508 ** is enabled, since so often they
4509 ** are undesireable in similar setups.
4510 */
4511 if (ifp->if_capenable & IFCAP_LRO) {
4512 error = tcp_lro_init(lro);
4513 if (error) {
4514 device_printf(dev, "LRO Initialization failed!\n");
4515 goto fail;
4516 }
4517 INIT_DEBUGOUT("RX LRO Initialized\n");
4518 rxr->lro_enabled = TRUE;
4519 lro->ifp = adapter->ifp;
4520 }
4521
4522 IGB_RX_UNLOCK(rxr);
4523 return (0);
4524
4525fail:
4526 igb_free_receive_ring(rxr);
4527 IGB_RX_UNLOCK(rxr);
4528 return (error);
4529}
4530
4531
4532/*********************************************************************
4533 *
4534 * Initialize all receive rings.
4535 *
4536 **********************************************************************/
4537static int
4538igb_setup_receive_structures(struct adapter *adapter)
4539{
4540 struct rx_ring *rxr = adapter->rx_rings;
4541 int i;
4542
4543 for (i = 0; i < adapter->num_queues; i++, rxr++)
4544 if (igb_setup_receive_ring(rxr))
4545 goto fail;
4546
4547 return (0);
4548fail:
4549 /*
4550 * Free RX buffers allocated so far, we will only handle
4551 * the rings that completed, the failing case will have
4552 * cleaned up for itself. 'i' is the endpoint.
4553 */
4554 for (int j = 0; j < i; ++j) {
4555 rxr = &adapter->rx_rings[j];
4556 IGB_RX_LOCK(rxr);
4557 igb_free_receive_ring(rxr);
4558 IGB_RX_UNLOCK(rxr);
4559 }
4560
4561 return (ENOBUFS);
4562}
4563
4564/*
4565 * Initialise the RSS mapping for NICs that support multiple transmit/
4566 * receive rings.
4567 */
4568static void
4569igb_initialise_rss_mapping(struct adapter *adapter)
4570{
4571 struct e1000_hw *hw = &adapter->hw;
4572 int i;
4573 int queue_id;
4574 u32 reta;
4575 u32 rss_key[10], mrqc, shift = 0;
4576
4577 /* XXX? */
4578 if (adapter->hw.mac.type == e1000_82575)
4579 shift = 6;
4580
4581 /*
4582 * The redirection table controls which destination
4583 * queue each bucket redirects traffic to.
4584 * Each DWORD represents four queues, with the LSB
4585 * being the first queue in the DWORD.
4586 *
4587 * This just allocates buckets to queues using round-robin
4588 * allocation.
4589 *
4590 * NOTE: It Just Happens to line up with the default
4591 * RSS allocation method.
4592 */
4593
4594 /* Warning FM follows */
4595 reta = 0;
4596 for (i = 0; i < 128; i++) {
4597#ifdef RSS
4598 queue_id = rss_get_indirection_to_bucket(i);
4599 /*
4600 * If we have more queues than buckets, we'll
4601 * end up mapping buckets to a subset of the
4602 * queues.
4603 *
4604 * If we have more buckets than queues, we'll
4605 * end up instead assigning multiple buckets
4606 * to queues.
4607 *
4608 * Both are suboptimal, but we need to handle
4609 * the case so we don't go out of bounds
4610 * indexing arrays and such.
4611 */
4612 queue_id = queue_id % adapter->num_queues;
4613#else
4614 queue_id = (i % adapter->num_queues);
4615#endif
4616 /* Adjust if required */
4617 queue_id = queue_id << shift;
4618
4619 /*
4620 * The low 8 bits are for hash value (n+0);
4621 * The next 8 bits are for hash value (n+1), etc.
4622 */
4623 reta = reta >> 8;
4624 reta = reta | ( ((uint32_t) queue_id) << 24);
4625 if ((i & 3) == 3) {
4626 E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta);
4627 reta = 0;
4628 }
4629 }
4630
4631 /* Now fill in hash table */
4632
4633 /*
4634 * MRQC: Multiple Receive Queues Command
4635 * Set queuing to RSS control, number depends on the device.
4636 */
4637 mrqc = E1000_MRQC_ENABLE_RSS_8Q;
4638
4639#ifdef RSS
4640 /* XXX ew typecasting */
4641 rss_getkey((uint8_t *) &rss_key);
4642#else
4643 arc4rand(&rss_key, sizeof(rss_key), 0);
4644#endif
4645 for (i = 0; i < 10; i++)
4646 E1000_WRITE_REG_ARRAY(hw,
4647 E1000_RSSRK(0), i, rss_key[i]);
4648
4649 /*
4650 * Configure the RSS fields to hash upon.
4651 */
4652 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4653 E1000_MRQC_RSS_FIELD_IPV4_TCP);
4654 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4655 E1000_MRQC_RSS_FIELD_IPV6_TCP);
4656 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4657 E1000_MRQC_RSS_FIELD_IPV6_UDP);
4658 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4659 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4660
4661 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4662}
4663
4664/*********************************************************************
4665 *
4666 * Enable receive unit.
4667 *
4668 **********************************************************************/
4669static void
4670igb_initialize_receive_units(struct adapter *adapter)
4671{
4672 struct rx_ring *rxr = adapter->rx_rings;
4673 struct ifnet *ifp = adapter->ifp;
4674 struct e1000_hw *hw = &adapter->hw;
4675 u32 rctl, rxcsum, psize, srrctl = 0;
4676
4677 INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4678
4679 /*
4680 * Make sure receives are disabled while setting
4681 * up the descriptor ring
4682 */
4683 rctl = E1000_READ_REG(hw, E1000_RCTL);
4684 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4685
4686 /*
4687 ** Set up for header split
4688 */
4689 if (igb_header_split) {
4690 /* Use a standard mbuf for the header */
4691 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4692 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4693 } else
4694 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4695
4696 /*
4697 ** Set up for jumbo frames
4698 */
4699 if (ifp->if_mtu > ETHERMTU) {
4700 rctl |= E1000_RCTL_LPE;
4701 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4702 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4703 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4704 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4705 srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4706 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4707 }
4708 /* Set maximum packet len */
4709 psize = adapter->max_frame_size;
4710 /* are we on a vlan? */
4711 if (adapter->ifp->if_vlantrunk != NULL)
4712 psize += VLAN_TAG_SIZE;
4713 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4714 } else {
4715 rctl &= ~E1000_RCTL_LPE;
4716 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4717 rctl |= E1000_RCTL_SZ_2048;
4718 }
4719
4720 /*
4721 * If TX flow control is disabled and there's >1 queue defined,
4722 * enable DROP.
4723 *
4724 * This drops frames rather than hanging the RX MAC for all queues.
4725 */
4726 if ((adapter->num_queues > 1) &&
4727 (adapter->fc == e1000_fc_none ||
4728 adapter->fc == e1000_fc_rx_pause)) {
4729 srrctl |= E1000_SRRCTL_DROP_EN;
4730 }
4731
4732 /* Setup the Base and Length of the Rx Descriptor Rings */
4733 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4734 u64 bus_addr = rxr->rxdma.dma_paddr;
4735 u32 rxdctl;
4736
4737 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4738 adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4739 E1000_WRITE_REG(hw, E1000_RDBAH(i),
4740 (uint32_t)(bus_addr >> 32));
4741 E1000_WRITE_REG(hw, E1000_RDBAL(i),
4742 (uint32_t)bus_addr);
4743 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4744 /* Enable this Queue */
4745 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4746 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4747 rxdctl &= 0xFFF00000;
4748 rxdctl |= IGB_RX_PTHRESH;
4749 rxdctl |= IGB_RX_HTHRESH << 8;
4750 rxdctl |= IGB_RX_WTHRESH << 16;
4751 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4752 }
4753
4754 /*
4755 ** Setup for RX MultiQueue
4756 */
4757 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4758 if (adapter->num_queues >1) {
4759
4760 /* rss setup */
4761 igb_initialise_rss_mapping(adapter);
4762
4763 /*
4764 ** NOTE: Receive Full-Packet Checksum Offload
4765 ** is mutually exclusive with Multiqueue. However
4766 ** this is not the same as TCP/IP checksums which
4767 ** still work.
4768 */
4769 rxcsum |= E1000_RXCSUM_PCSD;
4770#if __FreeBSD_version >= 800000
4771 /* For SCTP Offload */
4772 if ((hw->mac.type == e1000_82576)
4773 && (ifp->if_capenable & IFCAP_RXCSUM))
4774 rxcsum |= E1000_RXCSUM_CRCOFL;
4775#endif
4776 } else {
4777 /* Non RSS setup */
4778 if (ifp->if_capenable & IFCAP_RXCSUM) {
4779 rxcsum |= E1000_RXCSUM_IPPCSE;
4780#if __FreeBSD_version >= 800000
4781 if (adapter->hw.mac.type == e1000_82576)
4782 rxcsum |= E1000_RXCSUM_CRCOFL;
4783#endif
4784 } else
4785 rxcsum &= ~E1000_RXCSUM_TUOFL;
4786 }
4787 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4788
4789 /* Setup the Receive Control Register */
4790 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4791 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4792 E1000_RCTL_RDMTS_HALF |
4793 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4794 /* Strip CRC bytes. */
4795 rctl |= E1000_RCTL_SECRC;
4796 /* Make sure VLAN Filters are off */
4797 rctl &= ~E1000_RCTL_VFE;
4798 /* Don't store bad packets */
4799 rctl &= ~E1000_RCTL_SBP;
4800
4801 /* Enable Receives */
4802 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4803
4804 /*
4805 * Setup the HW Rx Head and Tail Descriptor Pointers
4806 * - needs to be after enable
4807 */
4808 for (int i = 0; i < adapter->num_queues; i++) {
4809 rxr = &adapter->rx_rings[i];
4810 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4811#ifdef DEV_NETMAP
4812 /*
4813 * an init() while a netmap client is active must
4814 * preserve the rx buffers passed to userspace.
4815 * In this driver it means we adjust RDT to
4816 * something different from next_to_refresh
4817 * (which is not used in netmap mode).
4818 */
4819 if (ifp->if_capenable & IFCAP_NETMAP) {
4820 struct netmap_adapter *na = NA(adapter->ifp);
4821 struct netmap_kring *kring = &na->rx_rings[i];
4822 int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
4823
4824 if (t >= adapter->num_rx_desc)
4825 t -= adapter->num_rx_desc;
4826 else if (t < 0)
4827 t += adapter->num_rx_desc;
4828 E1000_WRITE_REG(hw, E1000_RDT(i), t);
4829 } else
4830#endif /* DEV_NETMAP */
4831 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4832 }
4833 return;
4834}
4835
4836/*********************************************************************
4837 *
4838 * Free receive rings.
4839 *
4840 **********************************************************************/
4841static void
4842igb_free_receive_structures(struct adapter *adapter)
4843{
4844 struct rx_ring *rxr = adapter->rx_rings;
4845
4846 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4847 struct lro_ctrl *lro = &rxr->lro;
4848 igb_free_receive_buffers(rxr);
4849 tcp_lro_free(lro);
4850 igb_dma_free(adapter, &rxr->rxdma);
4851 }
4852
4853 free(adapter->rx_rings, M_DEVBUF);
4854}
4855
4856/*********************************************************************
4857 *
4858 * Free receive ring data structures.
4859 *
4860 **********************************************************************/
4861static void
4862igb_free_receive_buffers(struct rx_ring *rxr)
4863{
4864 struct adapter *adapter = rxr->adapter;
4865 struct igb_rx_buf *rxbuf;
4866 int i;
4867
4868 INIT_DEBUGOUT("free_receive_structures: begin");
4869
4870 /* Cleanup any existing buffers */
4871 if (rxr->rx_buffers != NULL) {
4872 for (i = 0; i < adapter->num_rx_desc; i++) {
4873 rxbuf = &rxr->rx_buffers[i];
4874 if (rxbuf->m_head != NULL) {
4875 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4876 BUS_DMASYNC_POSTREAD);
4877 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4878 rxbuf->m_head->m_flags |= M_PKTHDR;
4879 m_freem(rxbuf->m_head);
4880 }
4881 if (rxbuf->m_pack != NULL) {
4882 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4883 BUS_DMASYNC_POSTREAD);
4884 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4885 rxbuf->m_pack->m_flags |= M_PKTHDR;
4886 m_freem(rxbuf->m_pack);
4887 }
4888 rxbuf->m_head = NULL;
4889 rxbuf->m_pack = NULL;
4890 if (rxbuf->hmap != NULL) {
4891 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4892 rxbuf->hmap = NULL;
4893 }
4894 if (rxbuf->pmap != NULL) {
4895 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4896 rxbuf->pmap = NULL;
4897 }
4898 }
4899 if (rxr->rx_buffers != NULL) {
4900 free(rxr->rx_buffers, M_DEVBUF);
4901 rxr->rx_buffers = NULL;
4902 }
4903 }
4904
4905 if (rxr->htag != NULL) {
4906 bus_dma_tag_destroy(rxr->htag);
4907 rxr->htag = NULL;
4908 }
4909 if (rxr->ptag != NULL) {
4910 bus_dma_tag_destroy(rxr->ptag);
4911 rxr->ptag = NULL;
4912 }
4913}
4914
4915static __inline void
4916igb_rx_discard(struct rx_ring *rxr, int i)
4917{
4918 struct igb_rx_buf *rbuf;
4919
4920 rbuf = &rxr->rx_buffers[i];
4921
4922 /* Partially received? Free the chain */
4923 if (rxr->fmp != NULL) {
4924 rxr->fmp->m_flags |= M_PKTHDR;
4925 m_freem(rxr->fmp);
4926 rxr->fmp = NULL;
4927 rxr->lmp = NULL;
4928 }
4929
4930 /*
4931 ** With advanced descriptors the writeback
4932 ** clobbers the buffer addrs, so its easier
4933 ** to just free the existing mbufs and take
4934 ** the normal refresh path to get new buffers
4935 ** and mapping.
4936 */
4937 if (rbuf->m_head) {
4938 m_free(rbuf->m_head);
4939 rbuf->m_head = NULL;
4940 bus_dmamap_unload(rxr->htag, rbuf->hmap);
4941 }
4942
4943 if (rbuf->m_pack) {
4944 m_free(rbuf->m_pack);
4945 rbuf->m_pack = NULL;
4946 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
4947 }
4948
4949 return;
4950}
4951
4952static __inline void
4953igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4954{
4955
4956 /*
4957 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4958 * should be computed by hardware. Also it should not have VLAN tag in
4959 * ethernet header.
4960 */
4961 if (rxr->lro_enabled &&
4962 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4963 (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4964 (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4965 (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4966 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4967 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4968 /*
4969 * Send to the stack if:
4970 ** - LRO not enabled, or
4971 ** - no LRO resources, or
4972 ** - lro enqueue fails
4973 */
4974 if (rxr->lro.lro_cnt != 0)
4975 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4976 return;
4977 }
4978 IGB_RX_UNLOCK(rxr);
4979 (*ifp->if_input)(ifp, m);
4980 IGB_RX_LOCK(rxr);
4981}
4982
4983/*********************************************************************
4984 *
4985 * This routine executes in interrupt context. It replenishes
4986 * the mbufs in the descriptor and sends data which has been
4987 * dma'ed into host memory to upper layer.
4988 *
4989 * We loop at most count times if count is > 0, or until done if
4990 * count < 0.
4991 *
4992 * Return TRUE if more to clean, FALSE otherwise
4993 *********************************************************************/
4994static bool
4995igb_rxeof(struct igb_queue *que, int count, int *done)
4996{
4997 struct adapter *adapter = que->adapter;
4998 struct rx_ring *rxr = que->rxr;
4999 struct ifnet *ifp = adapter->ifp;
5000 struct lro_ctrl *lro = &rxr->lro;
5001 struct lro_entry *queued;
5002 int i, processed = 0, rxdone = 0;
5003 u32 ptype, staterr = 0;
5004 union e1000_adv_rx_desc *cur;
5005
5006 IGB_RX_LOCK(rxr);
5007 /* Sync the ring. */
5008 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
5009 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
5010
5011#ifdef DEV_NETMAP
5012 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
5013 IGB_RX_UNLOCK(rxr);
5014 return (FALSE);
5015 }
5016#endif /* DEV_NETMAP */
5017
5018 /* Main clean loop */
5019 for (i = rxr->next_to_check; count != 0;) {
5020 struct mbuf *sendmp, *mh, *mp;
5021 struct igb_rx_buf *rxbuf;
5022 u16 hlen, plen, hdr, vtag, pkt_info;
5023 bool eop = FALSE;
5024
5025 cur = &rxr->rx_base[i];
5026 staterr = le32toh(cur->wb.upper.status_error);
5027 if ((staterr & E1000_RXD_STAT_DD) == 0)
5028 break;
5029 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
5030 break;
5031 count--;
5032 sendmp = mh = mp = NULL;
5033 cur->wb.upper.status_error = 0;
5034 rxbuf = &rxr->rx_buffers[i];
5035 plen = le16toh(cur->wb.upper.length);
5036 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
5037 if (((adapter->hw.mac.type == e1000_i350) ||
5038 (adapter->hw.mac.type == e1000_i354)) &&
5039 (staterr & E1000_RXDEXT_STATERR_LB))
5040 vtag = be16toh(cur->wb.upper.vlan);
5041 else
5042 vtag = le16toh(cur->wb.upper.vlan);
5043 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
5044 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
5045 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
5046
5047 /*
5048 * Free the frame (all segments) if we're at EOP and
5049 * it's an error.
5050 *
5051 * The datasheet states that EOP + status is only valid for
5052 * the final segment in a multi-segment frame.
5053 */
5054 if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) {
5055 adapter->dropped_pkts++;
5056 ++rxr->rx_discarded;
5057 igb_rx_discard(rxr, i);
5058 goto next_desc;
5059 }
5060
5061 /*
5062 ** The way the hardware is configured to
5063 ** split, it will ONLY use the header buffer
5064 ** when header split is enabled, otherwise we
5065 ** get normal behavior, ie, both header and
5066 ** payload are DMA'd into the payload buffer.
5067 **
5068 ** The fmp test is to catch the case where a
5069 ** packet spans multiple descriptors, in that
5070 ** case only the first header is valid.
5071 */
5072 if (rxr->hdr_split && rxr->fmp == NULL) {
5073 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
5074 hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
5075 E1000_RXDADV_HDRBUFLEN_SHIFT;
5076 if (hlen > IGB_HDR_BUF)
5077 hlen = IGB_HDR_BUF;
5078 mh = rxr->rx_buffers[i].m_head;
5079 mh->m_len = hlen;
5080 /* clear buf pointer for refresh */
5081 rxbuf->m_head = NULL;
5082 /*
5083 ** Get the payload length, this
5084 ** could be zero if its a small
5085 ** packet.
5086 */
5087 if (plen > 0) {
5088 mp = rxr->rx_buffers[i].m_pack;
5089 mp->m_len = plen;
5090 mh->m_next = mp;
5091 /* clear buf pointer */
5092 rxbuf->m_pack = NULL;
5093 rxr->rx_split_packets++;
5094 }
5095 } else {
5096 /*
5097 ** Either no header split, or a
5098 ** secondary piece of a fragmented
5099 ** split packet.
5100 */
5101 mh = rxr->rx_buffers[i].m_pack;
5102 mh->m_len = plen;
5103 /* clear buf info for refresh */
5104 rxbuf->m_pack = NULL;
5105 }
5106 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
5107
5108 ++processed; /* So we know when to refresh */
5109
5110 /* Initial frame - setup */
5111 if (rxr->fmp == NULL) {
5112 mh->m_pkthdr.len = mh->m_len;
5113 /* Save the head of the chain */
5114 rxr->fmp = mh;
5115 rxr->lmp = mh;
5116 if (mp != NULL) {
5117 /* Add payload if split */
5118 mh->m_pkthdr.len += mp->m_len;
5119 rxr->lmp = mh->m_next;
5120 }
5121 } else {
5122 /* Chain mbuf's together */
5123 rxr->lmp->m_next = mh;
5124 rxr->lmp = rxr->lmp->m_next;
5125 rxr->fmp->m_pkthdr.len += mh->m_len;
5126 }
5127
5128 if (eop) {
5129 rxr->fmp->m_pkthdr.rcvif = ifp;
5130 rxr->rx_packets++;
5131 /* capture data for AIM */
5132 rxr->packets++;
5133 rxr->bytes += rxr->fmp->m_pkthdr.len;
5134 rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
5135
5136 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
5137 igb_rx_checksum(staterr, rxr->fmp, ptype);
5138
5139 if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
5140 (staterr & E1000_RXD_STAT_VP) != 0) {
5141 rxr->fmp->m_pkthdr.ether_vtag = vtag;
5142 rxr->fmp->m_flags |= M_VLANTAG;
5143 }
5144
5145 /*
5146 * In case of multiqueue, we have RXCSUM.PCSD bit set
5147 * and never cleared. This means we have RSS hash
5148 * available to be used.
5149 */
5150 if (adapter->num_queues > 1) {
5151 rxr->fmp->m_pkthdr.flowid =
5152 le32toh(cur->wb.lower.hi_dword.rss);
5153 switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
5154 case E1000_RXDADV_RSSTYPE_IPV4_TCP:
5155 M_HASHTYPE_SET(rxr->fmp,
5156 M_HASHTYPE_RSS_TCP_IPV4);
5157 break;
5158 case E1000_RXDADV_RSSTYPE_IPV4:
5159 M_HASHTYPE_SET(rxr->fmp,
5160 M_HASHTYPE_RSS_IPV4);
5161 break;
5162 case E1000_RXDADV_RSSTYPE_IPV6_TCP:
5163 M_HASHTYPE_SET(rxr->fmp,
5164 M_HASHTYPE_RSS_TCP_IPV6);
5165 break;
5166 case E1000_RXDADV_RSSTYPE_IPV6_EX:
5167 M_HASHTYPE_SET(rxr->fmp,
5168 M_HASHTYPE_RSS_IPV6_EX);
5169 break;
5170 case E1000_RXDADV_RSSTYPE_IPV6:
5171 M_HASHTYPE_SET(rxr->fmp,
5172 M_HASHTYPE_RSS_IPV6);
5173 break;
5174 case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
5175 M_HASHTYPE_SET(rxr->fmp,
5176 M_HASHTYPE_RSS_TCP_IPV6_EX);
5177 break;
5178 default:
5179 /* XXX fallthrough */
5180 M_HASHTYPE_SET(rxr->fmp,
5181 M_HASHTYPE_OPAQUE);
5182 }
5183 } else {
5184#ifndef IGB_LEGACY_TX
5185 rxr->fmp->m_pkthdr.flowid = que->msix;
5186 M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
5187#endif
5188 }
5189 sendmp = rxr->fmp;
5190 /* Make sure to set M_PKTHDR. */
5191 sendmp->m_flags |= M_PKTHDR;
5192 rxr->fmp = NULL;
5193 rxr->lmp = NULL;
5194 }
5195
5196next_desc:
5197 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
5198 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
5199
5200 /* Advance our pointers to the next descriptor. */
5201 if (++i == adapter->num_rx_desc)
5202 i = 0;
5203 /*
5204 ** Send to the stack or LRO
5205 */
5206 if (sendmp != NULL) {
5207 rxr->next_to_check = i;
5208 igb_rx_input(rxr, ifp, sendmp, ptype);
5209 i = rxr->next_to_check;
5210 rxdone++;
5211 }
5212
5213 /* Every 8 descriptors we go to refresh mbufs */
5214 if (processed == 8) {
5215 igb_refresh_mbufs(rxr, i);
5216 processed = 0;
5217 }
5218 }
5219
5220 /* Catch any remainders */
5221 if (igb_rx_unrefreshed(rxr))
5222 igb_refresh_mbufs(rxr, i);
5223
5224 rxr->next_to_check = i;
5225
5226 /*
5227 * Flush any outstanding LRO work
5228 */
5229 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
5230 SLIST_REMOVE_HEAD(&lro->lro_active, next);
5231 tcp_lro_flush(lro, queued);
5232 }
5233
5234 if (done != NULL)
5235 *done += rxdone;
5236
5237 IGB_RX_UNLOCK(rxr);
5238 return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
5239}
5240
5241/*********************************************************************
5242 *
5243 * Verify that the hardware indicated that the checksum is valid.
5244 * Inform the stack about the status of checksum so that stack
5245 * doesn't spend time verifying the checksum.
5246 *
5247 *********************************************************************/
5248static void
5249igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
5250{
5251 u16 status = (u16)staterr;
5252 u8 errors = (u8) (staterr >> 24);
5253 int sctp;
5254
5255 /* Ignore Checksum bit is set */
5256 if (status & E1000_RXD_STAT_IXSM) {
5257 mp->m_pkthdr.csum_flags = 0;
5258 return;
5259 }
5260
5261 if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
5262 (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
5263 sctp = 1;
5264 else
5265 sctp = 0;
5266 if (status & E1000_RXD_STAT_IPCS) {
5267 /* Did it pass? */
5268 if (!(errors & E1000_RXD_ERR_IPE)) {
5269 /* IP Checksum Good */
5270 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
5271 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
5272 } else
5273 mp->m_pkthdr.csum_flags = 0;
5274 }
5275
5276 if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
5277 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5278#if __FreeBSD_version >= 800000
5279 if (sctp) /* reassign */
5280 type = CSUM_SCTP_VALID;
5281#endif
5282 /* Did it pass? */
5283 if (!(errors & E1000_RXD_ERR_TCPE)) {
5284 mp->m_pkthdr.csum_flags |= type;
5285 if (sctp == 0)
5286 mp->m_pkthdr.csum_data = htons(0xffff);
5287 }
5288 }
5289 return;
5290}
5291
5292/*
5293 * This routine is run via an vlan
5294 * config EVENT
5295 */
5296static void
5297igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5298{
5299 struct adapter *adapter = ifp->if_softc;
5300 u32 index, bit;
5301
5302 if (ifp->if_softc != arg) /* Not our event */
5303 return;
5304
5305 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
5306 return;
5307
5308 IGB_CORE_LOCK(adapter);
5309 index = (vtag >> 5) & 0x7F;
5310 bit = vtag & 0x1F;
5311 adapter->shadow_vfta[index] |= (1 << bit);
5312 ++adapter->num_vlans;
5313 /* Change hw filter setting */
5314 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5315 igb_setup_vlan_hw_support(adapter);
5316 IGB_CORE_UNLOCK(adapter);
5317}
5318
5319/*
5320 * This routine is run via an vlan
5321 * unconfig EVENT
5322 */
5323static void
5324igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5325{
5326 struct adapter *adapter = ifp->if_softc;
5327 u32 index, bit;
5328
5329 if (ifp->if_softc != arg)
5330 return;
5331
5332 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
5333 return;
5334
5335 IGB_CORE_LOCK(adapter);
5336 index = (vtag >> 5) & 0x7F;
5337 bit = vtag & 0x1F;
5338 adapter->shadow_vfta[index] &= ~(1 << bit);
5339 --adapter->num_vlans;
5340 /* Change hw filter setting */
5341 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5342 igb_setup_vlan_hw_support(adapter);
5343 IGB_CORE_UNLOCK(adapter);
5344}
5345
5346static void
5347igb_setup_vlan_hw_support(struct adapter *adapter)
5348{
5349 struct e1000_hw *hw = &adapter->hw;
5350 struct ifnet *ifp = adapter->ifp;
5351 u32 reg;
5352
5353 if (adapter->vf_ifp) {
5354 e1000_rlpml_set_vf(hw,
5355 adapter->max_frame_size + VLAN_TAG_SIZE);
5356 return;
5357 }
5358
5359 reg = E1000_READ_REG(hw, E1000_CTRL);
5360 reg |= E1000_CTRL_VME;
5361 E1000_WRITE_REG(hw, E1000_CTRL, reg);
5362
5363 /* Enable the Filter Table */
5364 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5365 reg = E1000_READ_REG(hw, E1000_RCTL);
5366 reg &= ~E1000_RCTL_CFIEN;
5367 reg |= E1000_RCTL_VFE;
5368 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5369 }
5370
5371 /* Update the frame size */
5372 E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5373 adapter->max_frame_size + VLAN_TAG_SIZE);
5374
5375 /* Don't bother with table if no vlans */
5376 if ((adapter->num_vlans == 0) ||
5377 ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5378 return;
5379 /*
5380 ** A soft reset zero's out the VFTA, so
5381 ** we need to repopulate it now.
5382 */
5383 for (int i = 0; i < IGB_VFTA_SIZE; i++)
5384 if (adapter->shadow_vfta[i] != 0) {
5385 if (adapter->vf_ifp)
5386 e1000_vfta_set_vf(hw,
5387 adapter->shadow_vfta[i], TRUE);
5388 else
5389 e1000_write_vfta(hw,
5390 i, adapter->shadow_vfta[i]);
5391 }
5392}
5393
5394static void
5395igb_enable_intr(struct adapter *adapter)
5396{
5397 /* With RSS set up what to auto clear */
5398 if (adapter->msix_mem) {
5399 u32 mask = (adapter->que_mask | adapter->link_mask);
5400 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5401 E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5402 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5403 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5404 E1000_IMS_LSC);
5405 } else {
5406 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5407 IMS_ENABLE_MASK);
5408 }
5409 E1000_WRITE_FLUSH(&adapter->hw);
5410
5411 return;
5412}
5413
5414static void
5415igb_disable_intr(struct adapter *adapter)
5416{
5417 if (adapter->msix_mem) {
5418 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5419 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5420 }
5421 E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5422 E1000_WRITE_FLUSH(&adapter->hw);
5423 return;
5424}
5425
5426/*
5427 * Bit of a misnomer, what this really means is
5428 * to enable OS management of the system... aka
5429 * to disable special hardware management features
5430 */
5431static void
5432igb_init_manageability(struct adapter *adapter)
5433{
5434 if (adapter->has_manage) {
5435 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5436 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5437
5438 /* disable hardware interception of ARP */
5439 manc &= ~(E1000_MANC_ARP_EN);
5440
5441 /* enable receiving management packets to the host */
5442 manc |= E1000_MANC_EN_MNG2HOST;
5443 manc2h |= 1 << 5; /* Mng Port 623 */
5444 manc2h |= 1 << 6; /* Mng Port 664 */
5445 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5446 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5447 }
5448}
5449
5450/*
5451 * Give control back to hardware management
5452 * controller if there is one.
5453 */
5454static void
5455igb_release_manageability(struct adapter *adapter)
5456{
5457 if (adapter->has_manage) {
5458 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5459
5460 /* re-enable hardware interception of ARP */
5461 manc |= E1000_MANC_ARP_EN;
5462 manc &= ~E1000_MANC_EN_MNG2HOST;
5463
5464 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5465 }
5466}
5467
5468/*
5469 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5470 * For ASF and Pass Through versions of f/w this means that
5471 * the driver is loaded.
5472 *
5473 */
5474static void
5475igb_get_hw_control(struct adapter *adapter)
5476{
5477 u32 ctrl_ext;
5478
5479 if (adapter->vf_ifp)
5480 return;
5481
5482 /* Let firmware know the driver has taken over */
5483 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5484 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5485 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5486}
5487
5488/*
5489 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5490 * For ASF and Pass Through versions of f/w this means that the
5491 * driver is no longer loaded.
5492 *
5493 */
5494static void
5495igb_release_hw_control(struct adapter *adapter)
5496{
5497 u32 ctrl_ext;
5498
5499 if (adapter->vf_ifp)
5500 return;
5501
5502 /* Let firmware taken over control of h/w */
5503 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5504 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5505 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5506}
5507
5508static int
5509igb_is_valid_ether_addr(uint8_t *addr)
5510{
5511 char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5512
5513 if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5514 return (FALSE);
5515 }
5516
5517 return (TRUE);
5518}
5519
5520
5521/*
5522 * Enable PCI Wake On Lan capability
5523 */
5524static void
5525igb_enable_wakeup(device_t dev)
5526{
5527 u16 cap, status;
5528 u8 id;
5529
5530 /* First find the capabilities pointer*/
5531 cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5532 /* Read the PM Capabilities */
5533 id = pci_read_config(dev, cap, 1);
5534 if (id != PCIY_PMG) /* Something wrong */
5535 return;
5536 /* OK, we have the power capabilities, so
5537 now get the status register */
5538 cap += PCIR_POWER_STATUS;
5539 status = pci_read_config(dev, cap, 2);
5540 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5541 pci_write_config(dev, cap, status, 2);
5542 return;
5543}
5544
5545static void
5546igb_led_func(void *arg, int onoff)
5547{
5548 struct adapter *adapter = arg;
5549
5550 IGB_CORE_LOCK(adapter);
5551 if (onoff) {
5552 e1000_setup_led(&adapter->hw);
5553 e1000_led_on(&adapter->hw);
5554 } else {
5555 e1000_led_off(&adapter->hw);
5556 e1000_cleanup_led(&adapter->hw);
5557 }
5558 IGB_CORE_UNLOCK(adapter);
5559}
5560
5561static uint64_t
5562igb_get_vf_counter(if_t ifp, ift_counter cnt)
5563{
5564 struct adapter *adapter;
5565 struct e1000_vf_stats *stats;
5566#ifndef IGB_LEGACY_TX
5567 struct tx_ring *txr;
5568 uint64_t rv;
5569#endif
5570
5571 adapter = if_getsoftc(ifp);
5572 stats = (struct e1000_vf_stats *)adapter->stats;
5573
5574 switch (cnt) {
5575 case IFCOUNTER_IPACKETS:
5576 return (stats->gprc);
5577 case IFCOUNTER_OPACKETS:
5578 return (stats->gptc);
5579 case IFCOUNTER_IBYTES:
5580 return (stats->gorc);
5581 case IFCOUNTER_OBYTES:
5582 return (stats->gotc);
5583 case IFCOUNTER_IMCASTS:
5584 return (stats->mprc);
5585 case IFCOUNTER_IERRORS:
5586 return (adapter->dropped_pkts);
5587 case IFCOUNTER_OERRORS:
5588 return (adapter->watchdog_events);
5589#ifndef IGB_LEGACY_TX
5590 case IFCOUNTER_OQDROPS:
5591 rv = 0;
5592 txr = adapter->tx_rings;
5593 for (int i = 0; i < adapter->num_queues; i++, txr++)
5594 rv += txr->br->br_drops;
5595 return (rv);
5596#endif
5597 default:
5598 return (if_get_counter_default(ifp, cnt));
5599 }
5600}
5601
5602static uint64_t
5603igb_get_counter(if_t ifp, ift_counter cnt)
5604{
5605 struct adapter *adapter;
5606 struct e1000_hw_stats *stats;
5607#ifndef IGB_LEGACY_TX
5608 struct tx_ring *txr;
5609 uint64_t rv;
5610#endif
5611
5612 adapter = if_getsoftc(ifp);
5613 if (adapter->vf_ifp)
5614 return (igb_get_vf_counter(ifp, cnt));
5615
5616 stats = (struct e1000_hw_stats *)adapter->stats;
5617
5618 switch (cnt) {
5619 case IFCOUNTER_IPACKETS:
5620 return (stats->gprc);
5621 case IFCOUNTER_OPACKETS:
5622 return (stats->gptc);
5623 case IFCOUNTER_IBYTES:
5624 return (stats->gorc);
5625 case IFCOUNTER_OBYTES:
5626 return (stats->gotc);
5627 case IFCOUNTER_IMCASTS:
5628 return (stats->mprc);
5629 case IFCOUNTER_OMCASTS:
5630 return (stats->mptc);
5631 case IFCOUNTER_IERRORS:
5632 return (adapter->dropped_pkts + stats->rxerrc +
5633 stats->crcerrs + stats->algnerrc +
5634 stats->ruc + stats->roc + stats->cexterr);
5635 case IFCOUNTER_OERRORS:
5636 return (stats->ecol + stats->latecol +
5637 adapter->watchdog_events);
5638 case IFCOUNTER_COLLISIONS:
5639 return (stats->colc);
5640 case IFCOUNTER_IQDROPS:
5641 return (stats->mpc);
5642#ifndef IGB_LEGACY_TX
5643 case IFCOUNTER_OQDROPS:
5644 rv = 0;
5645 txr = adapter->tx_rings;
5646 for (int i = 0; i < adapter->num_queues; i++, txr++)
5647 rv += txr->br->br_drops;
5648 return (rv);
5649#endif
5650 default:
5651 return (if_get_counter_default(ifp, cnt));
5652 }
5653}
5654
5655/**********************************************************************
5656 *
5657 * Update the board statistics counters.
5658 *
5659 **********************************************************************/
5660static void
5661igb_update_stats_counters(struct adapter *adapter)
5662{
5663 struct e1000_hw *hw = &adapter->hw;
5664 struct e1000_hw_stats *stats;
5665
5666 /*
5667 ** The virtual function adapter has only a
5668 ** small controlled set of stats, do only
5669 ** those and return.
5670 */
5671 if (adapter->vf_ifp) {
5672 igb_update_vf_stats_counters(adapter);
5673 return;
5674 }
5675
5676 stats = (struct e1000_hw_stats *)adapter->stats;
5677
5678 if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5679 (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5680 stats->symerrs +=
5681 E1000_READ_REG(hw,E1000_SYMERRS);
5682 stats->sec += E1000_READ_REG(hw, E1000_SEC);
5683 }
5684
5685 stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5686 stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5687 stats->scc += E1000_READ_REG(hw, E1000_SCC);
5688 stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5689
5690 stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5691 stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5692 stats->colc += E1000_READ_REG(hw, E1000_COLC);
5693 stats->dc += E1000_READ_REG(hw, E1000_DC);
5694 stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5695 stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5696 stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5697 /*
5698 ** For watchdog management we need to know if we have been
5699 ** paused during the last interval, so capture that here.
5700 */
5701 adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5702 stats->xoffrxc += adapter->pause_frames;
5703 stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5704 stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5705 stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5706 stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5707 stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5708 stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5709 stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5710 stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5711 stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5712 stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5713 stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5714 stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5715
5716 /* For the 64-bit byte counters the low dword must be read first. */
5717 /* Both registers clear on the read of the high dword */
5718
5719 stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5720 ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5721 stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5722 ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5723
5724 stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5725 stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5726 stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5727 stats->roc += E1000_READ_REG(hw, E1000_ROC);
5728 stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5729
5730 stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
5731 stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC);
5732 stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
5733
5734 stats->tor += E1000_READ_REG(hw, E1000_TORL) +
5735 ((u64)E1000_READ_REG(hw, E1000_TORH) << 32);
5736 stats->tot += E1000_READ_REG(hw, E1000_TOTL) +
5737 ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32);
5738
5739 stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5740 stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5741 stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5742 stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5743 stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5744 stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5745 stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5746 stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5747 stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5748 stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5749
5750 /* Interrupt Counts */
5751
5752 stats->iac += E1000_READ_REG(hw, E1000_IAC);
5753 stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5754 stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5755 stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5756 stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5757 stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5758 stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5759 stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5760 stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5761
5762 /* Host to Card Statistics */
5763
5764 stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5765 stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5766 stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5767 stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5768 stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5769 stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5770 stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5771 stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5772 ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5773 stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5774 ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5775 stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5776 stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5777 stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5778
5779 stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5780 stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5781 stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5782 stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5783 stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5784 stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5785
5786 /* Driver specific counters */
5787 adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5788 adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5789 adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5790 adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5791 adapter->packet_buf_alloc_tx =
5792 ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5793 adapter->packet_buf_alloc_rx =
5794 (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5795}
5796
5797
5798/**********************************************************************
5799 *
5800 * Initialize the VF board statistics counters.
5801 *
5802 **********************************************************************/
5803static void
5804igb_vf_init_stats(struct adapter *adapter)
5805{
5806 struct e1000_hw *hw = &adapter->hw;
5807 struct e1000_vf_stats *stats;
5808
5809 stats = (struct e1000_vf_stats *)adapter->stats;
5810 if (stats == NULL)
5811 return;
5812 stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5813 stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5814 stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5815 stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5816 stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5817}
5818
5819/**********************************************************************
5820 *
5821 * Update the VF board statistics counters.
5822 *
5823 **********************************************************************/
5824static void
5825igb_update_vf_stats_counters(struct adapter *adapter)
5826{
5827 struct e1000_hw *hw = &adapter->hw;
5828 struct e1000_vf_stats *stats;
5829
5830 if (adapter->link_speed == 0)
5831 return;
5832
5833 stats = (struct e1000_vf_stats *)adapter->stats;
5834
5835 UPDATE_VF_REG(E1000_VFGPRC,
5836 stats->last_gprc, stats->gprc);
5837 UPDATE_VF_REG(E1000_VFGORC,
5838 stats->last_gorc, stats->gorc);
5839 UPDATE_VF_REG(E1000_VFGPTC,
5840 stats->last_gptc, stats->gptc);
5841 UPDATE_VF_REG(E1000_VFGOTC,
5842 stats->last_gotc, stats->gotc);
5843 UPDATE_VF_REG(E1000_VFMPRC,
5844 stats->last_mprc, stats->mprc);
5845}
5846
5847/* Export a single 32-bit register via a read-only sysctl. */
5848static int
5849igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5850{
5851 struct adapter *adapter;
5852 u_int val;
5853
5854 adapter = oidp->oid_arg1;
5855 val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5856 return (sysctl_handle_int(oidp, &val, 0, req));
5857}
5858
5859/*
5860** Tuneable interrupt rate handler
5861*/
5862static int
5863igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5864{
5865 struct igb_queue *que = ((struct igb_queue *)oidp->oid_arg1);
5866 int error;
5867 u32 reg, usec, rate;
5868
5869 reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5870 usec = ((reg & 0x7FFC) >> 2);
5871 if (usec > 0)
5872 rate = 1000000 / usec;
5873 else
5874 rate = 0;
5875 error = sysctl_handle_int(oidp, &rate, 0, req);
5876 if (error || !req->newptr)
5877 return error;
5878 return 0;
5879}
5880
5881/*
5882 * Add sysctl variables, one per statistic, to the system.
5883 */
5884static void
5885igb_add_hw_stats(struct adapter *adapter)
5886{
5887 device_t dev = adapter->dev;
5888
5889 struct tx_ring *txr = adapter->tx_rings;
5890 struct rx_ring *rxr = adapter->rx_rings;
5891
5892 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5893 struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5894 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5895 struct e1000_hw_stats *stats = adapter->stats;
5896
5897 struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5898 struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5899
5900#define QUEUE_NAME_LEN 32
5901 char namebuf[QUEUE_NAME_LEN];
5902
5903 /* Driver Statistics */
5904 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5905 CTLFLAG_RD, &adapter->link_irq,
5906 "Link MSIX IRQ Handled");
5907 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5908 CTLFLAG_RD, &adapter->dropped_pkts,
5909 "Driver dropped packets");
5910 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5911 CTLFLAG_RD, &adapter->no_tx_dma_setup,
5912 "Driver tx dma failure in xmit");
5913 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5914 CTLFLAG_RD, &adapter->rx_overruns,
5915 "RX overruns");
5916 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5917 CTLFLAG_RD, &adapter->watchdog_events,
5918 "Watchdog timeouts");
5919
5920 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5921 CTLFLAG_RD, &adapter->device_control,
5922 "Device Control Register");
5923 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5924 CTLFLAG_RD, &adapter->rx_control,
5925 "Receiver Control Register");
5926 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5927 CTLFLAG_RD, &adapter->int_mask,
5928 "Interrupt Mask");
5929 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5930 CTLFLAG_RD, &adapter->eint_mask,
5931 "Extended Interrupt Mask");
5932 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5933 CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5934 "Transmit Buffer Packet Allocation");
5935 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5936 CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5937 "Receive Buffer Packet Allocation");
5938 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5939 CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5940 "Flow Control High Watermark");
5941 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5942 CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5943 "Flow Control Low Watermark");
5944
5945 for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5946 struct lro_ctrl *lro = &rxr->lro;
5947
5948 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5949 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5950 CTLFLAG_RD, NULL, "Queue Name");
5951 queue_list = SYSCTL_CHILDREN(queue_node);
5952
5953 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5954 CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i],
5955 sizeof(&adapter->queues[i]),
5956 igb_sysctl_interrupt_rate_handler,
5957 "IU", "Interrupt Rate");
5958
5959 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5960 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5961 igb_sysctl_reg_handler, "IU",
5962 "Transmit Descriptor Head");
5963 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5964 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5965 igb_sysctl_reg_handler, "IU",
5966 "Transmit Descriptor Tail");
5967 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5968 CTLFLAG_RD, &txr->no_desc_avail,
5969 "Queue Descriptors Unavailable");
5970 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5971 CTLFLAG_RD, &txr->total_packets,
5972 "Queue Packets Transmitted");
5973
5974 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5975 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5976 igb_sysctl_reg_handler, "IU",
5977 "Receive Descriptor Head");
5978 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5979 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5980 igb_sysctl_reg_handler, "IU",
5981 "Receive Descriptor Tail");
5982 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5983 CTLFLAG_RD, &rxr->rx_packets,
5984 "Queue Packets Received");
5985 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5986 CTLFLAG_RD, &rxr->rx_bytes,
5987 "Queue Bytes Received");
5988 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5989 CTLFLAG_RD, &lro->lro_queued, 0,
5990 "LRO Queued");
5991 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5992 CTLFLAG_RD, &lro->lro_flushed, 0,
5993 "LRO Flushed");
5994 }
5995
5996 /* MAC stats get their own sub node */
5997
5998 stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5999 CTLFLAG_RD, NULL, "MAC Statistics");
6000 stat_list = SYSCTL_CHILDREN(stat_node);
6001
6002 /*
6003 ** VF adapter has a very limited set of stats
6004 ** since its not managing the metal, so to speak.
6005 */
6006 if (adapter->vf_ifp) {
6007 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
6008 CTLFLAG_RD, &stats->gprc,
6009 "Good Packets Received");
6010 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
6011 CTLFLAG_RD, &stats->gptc,
6012 "Good Packets Transmitted");
6013 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
6014 CTLFLAG_RD, &stats->gorc,
6015 "Good Octets Received");
6016 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
6017 CTLFLAG_RD, &stats->gotc,
6018 "Good Octets Transmitted");
6019 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
6020 CTLFLAG_RD, &stats->mprc,
6021 "Multicast Packets Received");
6022 return;
6023 }
6024
6025 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
6026 CTLFLAG_RD, &stats->ecol,
6027 "Excessive collisions");
6028 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
6029 CTLFLAG_RD, &stats->scc,
6030 "Single collisions");
6031 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
6032 CTLFLAG_RD, &stats->mcc,
6033 "Multiple collisions");
6034 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
6035 CTLFLAG_RD, &stats->latecol,
6036 "Late collisions");
6037 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
6038 CTLFLAG_RD, &stats->colc,
6039 "Collision Count");
6040 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
6041 CTLFLAG_RD, &stats->symerrs,
6042 "Symbol Errors");
6043 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
6044 CTLFLAG_RD, &stats->sec,
6045 "Sequence Errors");
6046 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
6047 CTLFLAG_RD, &stats->dc,
6048 "Defer Count");
6049 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
6050 CTLFLAG_RD, &stats->mpc,
6051 "Missed Packets");
6052 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors",
6053 CTLFLAG_RD, &stats->rlec,
6054 "Receive Length Errors");
6055 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
6056 CTLFLAG_RD, &stats->rnbc,
6057 "Receive No Buffers");
6058 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
6059 CTLFLAG_RD, &stats->ruc,
6060 "Receive Undersize");
6061 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
6062 CTLFLAG_RD, &stats->rfc,
6063 "Fragmented Packets Received");
6064 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
6065 CTLFLAG_RD, &stats->roc,
6066 "Oversized Packets Received");
6067 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
6068 CTLFLAG_RD, &stats->rjc,
6069 "Recevied Jabber");
6070 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
6071 CTLFLAG_RD, &stats->rxerrc,
6072 "Receive Errors");
6073 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
6074 CTLFLAG_RD, &stats->crcerrs,
6075 "CRC errors");
6076 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
6077 CTLFLAG_RD, &stats->algnerrc,
6078 "Alignment Errors");
6079 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs",
6080 CTLFLAG_RD, &stats->tncrs,
6081 "Transmit with No CRS");
6082 /* On 82575 these are collision counts */
6083 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
6084 CTLFLAG_RD, &stats->cexterr,
6085 "Collision/Carrier extension errors");
6086 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
6087 CTLFLAG_RD, &stats->xonrxc,
6088 "XON Received");
6089 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
6090 CTLFLAG_RD, &stats->xontxc,
6091 "XON Transmitted");
6092 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
6093 CTLFLAG_RD, &stats->xoffrxc,
6094 "XOFF Received");
6095 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
6096 CTLFLAG_RD, &stats->xofftxc,
6097 "XOFF Transmitted");
6098 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd",
6099 CTLFLAG_RD, &stats->fcruc,
6100 "Unsupported Flow Control Received");
6101 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd",
6102 CTLFLAG_RD, &stats->mgprc,
6103 "Management Packets Received");
6104 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop",
6105 CTLFLAG_RD, &stats->mgpdc,
6106 "Management Packets Dropped");
6107 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd",
6108 CTLFLAG_RD, &stats->mgptc,
6109 "Management Packets Transmitted");
6110 /* Packet Reception Stats */
6111 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
6112 CTLFLAG_RD, &stats->tpr,
6113 "Total Packets Received");
6114 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
6115 CTLFLAG_RD, &stats->gprc,
6116 "Good Packets Received");
6117 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
6118 CTLFLAG_RD, &stats->bprc,
6119 "Broadcast Packets Received");
6120 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
6121 CTLFLAG_RD, &stats->mprc,
6122 "Multicast Packets Received");
6123 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
6124 CTLFLAG_RD, &stats->prc64,
6125 "64 byte frames received");
6126 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
6127 CTLFLAG_RD, &stats->prc127,
6128 "65-127 byte frames received");
6129 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
6130 CTLFLAG_RD, &stats->prc255,
6131 "128-255 byte frames received");
6132 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
6133 CTLFLAG_RD, &stats->prc511,
6134 "256-511 byte frames received");
6135 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
6136 CTLFLAG_RD, &stats->prc1023,
6137 "512-1023 byte frames received");
6138 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
6139 CTLFLAG_RD, &stats->prc1522,
6140 "1023-1522 byte frames received");
6141 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
6142 CTLFLAG_RD, &stats->gorc,
6143 "Good Octets Received");
6144 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd",
6145 CTLFLAG_RD, &stats->tor,
6146 "Total Octets Received");
6147
6148 /* Packet Transmission Stats */
6149 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
6150 CTLFLAG_RD, &stats->gotc,
6151 "Good Octets Transmitted");
6152 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd",
6153 CTLFLAG_RD, &stats->tot,
6154 "Total Octets Transmitted");
6155 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
6156 CTLFLAG_RD, &stats->tpt,
6157 "Total Packets Transmitted");
6158 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
6159 CTLFLAG_RD, &stats->gptc,
6160 "Good Packets Transmitted");
6161 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
6162 CTLFLAG_RD, &stats->bptc,
6163 "Broadcast Packets Transmitted");
6164 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
6165 CTLFLAG_RD, &stats->mptc,
6166 "Multicast Packets Transmitted");
6167 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
6168 CTLFLAG_RD, &stats->ptc64,
6169 "64 byte frames transmitted");
6170 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
6171 CTLFLAG_RD, &stats->ptc127,
6172 "65-127 byte frames transmitted");
6173 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
6174 CTLFLAG_RD, &stats->ptc255,
6175 "128-255 byte frames transmitted");
6176 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
6177 CTLFLAG_RD, &stats->ptc511,
6178 "256-511 byte frames transmitted");
6179 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
6180 CTLFLAG_RD, &stats->ptc1023,
6181 "512-1023 byte frames transmitted");
6182 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
6183 CTLFLAG_RD, &stats->ptc1522,
6184 "1024-1522 byte frames transmitted");
6185 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
6186 CTLFLAG_RD, &stats->tsctc,
6187 "TSO Contexts Transmitted");
6188 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
6189 CTLFLAG_RD, &stats->tsctfc,
6190 "TSO Contexts Failed");
6191
6192
6193 /* Interrupt Stats */
6194
6195 int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
6196 CTLFLAG_RD, NULL, "Interrupt Statistics");
6197 int_list = SYSCTL_CHILDREN(int_node);
6198
6199 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
6200 CTLFLAG_RD, &stats->iac,
6201 "Interrupt Assertion Count");
6202
6203 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
6204 CTLFLAG_RD, &stats->icrxptc,
6205 "Interrupt Cause Rx Pkt Timer Expire Count");
6206
6207 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
6208 CTLFLAG_RD, &stats->icrxatc,
6209 "Interrupt Cause Rx Abs Timer Expire Count");
6210
6211 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
6212 CTLFLAG_RD, &stats->ictxptc,
6213 "Interrupt Cause Tx Pkt Timer Expire Count");
6214
6215 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
6216 CTLFLAG_RD, &stats->ictxatc,
6217 "Interrupt Cause Tx Abs Timer Expire Count");
6218
6219 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
6220 CTLFLAG_RD, &stats->ictxqec,
6221 "Interrupt Cause Tx Queue Empty Count");
6222
6223 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
6224 CTLFLAG_RD, &stats->ictxqmtc,
6225 "Interrupt Cause Tx Queue Min Thresh Count");
6226
6227 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
6228 CTLFLAG_RD, &stats->icrxdmtc,
6229 "Interrupt Cause Rx Desc Min Thresh Count");
6230
6231 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
6232 CTLFLAG_RD, &stats->icrxoc,
6233 "Interrupt Cause Receiver Overrun Count");
6234
6235 /* Host to Card Stats */
6236
6237 host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
6238 CTLFLAG_RD, NULL,
6239 "Host to Card Statistics");
6240
6241 host_list = SYSCTL_CHILDREN(host_node);
6242
6243 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
6244 CTLFLAG_RD, &stats->cbtmpc,
6245 "Circuit Breaker Tx Packet Count");
6246
6247 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
6248 CTLFLAG_RD, &stats->htdpmc,
6249 "Host Transmit Discarded Packets");
6250
6251 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
6252 CTLFLAG_RD, &stats->rpthc,
6253 "Rx Packets To Host");
6254
6255 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
6256 CTLFLAG_RD, &stats->cbrmpc,
6257 "Circuit Breaker Rx Packet Count");
6258
6259 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
6260 CTLFLAG_RD, &stats->cbrdpc,
6261 "Circuit Breaker Rx Dropped Count");
6262
6263 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
6264 CTLFLAG_RD, &stats->hgptc,
6265 "Host Good Packets Tx Count");
6266
6267 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
6268 CTLFLAG_RD, &stats->htcbdpc,
6269 "Host Tx Circuit Breaker Dropped Count");
6270
6271 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
6272 CTLFLAG_RD, &stats->hgorc,
6273 "Host Good Octets Received Count");
6274
6275 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
6276 CTLFLAG_RD, &stats->hgotc,
6277 "Host Good Octets Transmit Count");
6278
6279 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
6280 CTLFLAG_RD, &stats->lenerrs,
6281 "Length Errors");
6282
6283 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
6284 CTLFLAG_RD, &stats->scvpc,
6285 "SerDes/SGMII Code Violation Pkt Count");
6286
6287 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
6288 CTLFLAG_RD, &stats->hrmpc,
6289 "Header Redirection Missed Packet Count");
6290}
6291
6292
6293/**********************************************************************
6294 *
6295 * This routine provides a way to dump out the adapter eeprom,
6296 * often a useful debug/service tool. This only dumps the first
6297 * 32 words, stuff that matters is in that extent.
6298 *
6299 **********************************************************************/
6300static int
6301igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6302{
6303 struct adapter *adapter;
6304 int error;
6305 int result;
6306
6307 result = -1;
6308 error = sysctl_handle_int(oidp, &result, 0, req);
6309
6310 if (error || !req->newptr)
6311 return (error);
6312
6313 /*
6314 * This value will cause a hex dump of the
6315 * first 32 16-bit words of the EEPROM to
6316 * the screen.
6317 */
6318 if (result == 1) {
6319 adapter = (struct adapter *)arg1;
6320 igb_print_nvm_info(adapter);
6321 }
6322
6323 return (error);
6324}
6325
6326static void
6327igb_print_nvm_info(struct adapter *adapter)
6328{
6329 u16 eeprom_data;
6330 int i, j, row = 0;
6331
6332 /* Its a bit crude, but it gets the job done */
6333 printf("\nInterface EEPROM Dump:\n");
6334 printf("Offset\n0x0000 ");
6335 for (i = 0, j = 0; i < 32; i++, j++) {
6336 if (j == 8) { /* Make the offset block */
6337 j = 0; ++row;
6338 printf("\n0x00%x0 ",row);
6339 }
6340 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6341 printf("%04x ", eeprom_data);
6342 }
6343 printf("\n");
6344}
6345
6346static void
6347igb_set_sysctl_value(struct adapter *adapter, const char *name,
6348 const char *description, int *limit, int value)
6349{
6350 *limit = value;
6351 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6352 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6353 OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6354}
6355
6356/*
6357** Set flow control using sysctl:
6358** Flow control values:
6359** 0 - off
6360** 1 - rx pause
6361** 2 - tx pause
6362** 3 - full
6363*/
6364static int
6365igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
6366{
6367 int error;
6368 static int input = 3; /* default is full */
6369 struct adapter *adapter = (struct adapter *) arg1;
6370
6371 error = sysctl_handle_int(oidp, &input, 0, req);
6372
6373 if ((error) || (req->newptr == NULL))
6374 return (error);
6375
6376 switch (input) {
6377 case e1000_fc_rx_pause:
6378 case e1000_fc_tx_pause:
6379 case e1000_fc_full:
6380 case e1000_fc_none:
6381 adapter->hw.fc.requested_mode = input;
6382 adapter->fc = input;
6383 break;
6384 default:
6385 /* Do nothing */
6386 return (error);
6387 }
6388
6389 adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6390 e1000_force_mac_fc(&adapter->hw);
6391 /* XXX TODO: update DROP_EN on each RX queue if appropriate */
6392 return (error);
6393}
6394
6395/*
6396** Manage DMA Coalesce:
6397** Control values:
6398** 0/1 - off/on
6399** Legal timer values are:
6400** 250,500,1000-10000 in thousands
6401*/
6402static int
6403igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6404{
6405 struct adapter *adapter = (struct adapter *) arg1;
6406 int error;
6407
6408 error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6409
6410 if ((error) || (req->newptr == NULL))
6411 return (error);
6412
6413 switch (adapter->dmac) {
6414 case 0:
6415 /*Disabling */
6416 break;
6417 case 1: /* Just enable and use default */
6418 adapter->dmac = 1000;
6419 break;
6420 case 250:
6421 case 500:
6422 case 1000:
6423 case 2000:
6424 case 3000:
6425 case 4000:
6426 case 5000:
6427 case 6000:
6428 case 7000:
6429 case 8000:
6430 case 9000:
6431 case 10000:
6432 /* Legal values - allow */
6433 break;
6434 default:
6435 /* Do nothing, illegal value */
6436 adapter->dmac = 0;
6437 return (EINVAL);
6438 }
6439 /* Reinit the interface */
6440 igb_init(adapter);
6441 return (error);
6442}
6443
6444/*
6445** Manage Energy Efficient Ethernet:
6446** Control values:
6447** 0/1 - enabled/disabled
6448*/
6449static int
6450igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6451{
6452 struct adapter *adapter = (struct adapter *) arg1;
6453 int error, value;
6454
6455 value = adapter->hw.dev_spec._82575.eee_disable;
6456 error = sysctl_handle_int(oidp, &value, 0, req);
6457 if (error || req->newptr == NULL)
6458 return (error);
6459 IGB_CORE_LOCK(adapter);
6460 adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6461 igb_init_locked(adapter);
6462 IGB_CORE_UNLOCK(adapter);
6463 return (0);
6464}