Deleted Added
full compact
if_igb.c (294327) if_igb.c (295323)
1/******************************************************************************
2
3 Copyright (c) 2001-2015, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
1/******************************************************************************
2
3 Copyright (c) 2001-2015, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 294327 2016-01-19 15:33:28Z hselasky $*/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 295323 2016-02-05 17:14:37Z erj $*/
34
35
36#include "opt_inet.h"
37#include "opt_inet6.h"
38#include "opt_rss.h"
39
40#ifdef HAVE_KERNEL_OPTION_HEADERS
41#include "opt_device_polling.h"
42#include "opt_altq.h"
43#endif
44
45#include "if_igb.h"
46
47/*********************************************************************
48 * Driver version:
49 *********************************************************************/
34
35
36#include "opt_inet.h"
37#include "opt_inet6.h"
38#include "opt_rss.h"
39
40#ifdef HAVE_KERNEL_OPTION_HEADERS
41#include "opt_device_polling.h"
42#include "opt_altq.h"
43#endif
44
45#include "if_igb.h"
46
47/*********************************************************************
48 * Driver version:
49 *********************************************************************/
50char igb_driver_version[] = "2.5.2";
50char igb_driver_version[] = "2.5.3-k";
51
52
53/*********************************************************************
54 * PCI Device ID Table
55 *
56 * Used by probe to select devices to load on
57 * Last field stores an index into e1000_strings
58 * Last entry must be all 0s
59 *
60 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
61 *********************************************************************/
62
63static igb_vendor_info_t igb_vendor_info_array[] =
64{
65 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER, 0, 0, 0},
66 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES, 0, 0, 0},
67 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER, 0, 0, 0},
68 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576, 0, 0, 0},
69 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS, 0, 0, 0},
70 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES, 0, 0, 0},
71 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER, 0, 0, 0},
72 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES, 0, 0, 0},
73 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD, 0, 0, 0},
74 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER, 0, 0, 0},
75 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2, 0, 0, 0},
76 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF, 0, 0, 0},
77 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER, 0, 0, 0},
78 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER, 0, 0, 0},
79 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES, 0, 0, 0},
80 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII, 0, 0, 0},
81 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL, 0, 0, 0},
82 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER, 0, 0, 0},
83 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES, 0, 0, 0},
84 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII, 0, 0, 0},
85 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP, 0, 0, 0},
86 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE, 0, 0, 0},
87 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER, 0, 0, 0},
88 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER, 0, 0, 0},
89 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES, 0, 0, 0},
90 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII, 0, 0, 0},
91 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF, 0, 0, 0},
92 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER, 0, 0, 0},
93 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT, 0, 0, 0},
94 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1, 0, 0, 0},
95 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_FLASHLESS, 0, 0, 0},
96 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES_FLASHLESS, 0, 0, 0},
97 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER, 0, 0, 0},
98 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES, 0, 0, 0},
99 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII, 0, 0, 0},
100 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER, 0, 0, 0},
101 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS, 0, 0, 0},
102 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, 0, 0, 0},
103 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII, 0, 0, 0},
104 /* required last entry */
105 {0, 0, 0, 0, 0}
106};
107
108/*********************************************************************
109 * Table of branding strings for all supported NICs.
110 *********************************************************************/
111
112static char *igb_strings[] = {
113 "Intel(R) PRO/1000 Network Connection"
114};
115
116/*********************************************************************
117 * Function prototypes
118 *********************************************************************/
119static int igb_probe(device_t);
120static int igb_attach(device_t);
121static int igb_detach(device_t);
122static int igb_shutdown(device_t);
123static int igb_suspend(device_t);
124static int igb_resume(device_t);
125#ifndef IGB_LEGACY_TX
126static int igb_mq_start(struct ifnet *, struct mbuf *);
127static int igb_mq_start_locked(struct ifnet *, struct tx_ring *);
128static void igb_qflush(struct ifnet *);
129static void igb_deferred_mq_start(void *, int);
130#else
131static void igb_start(struct ifnet *);
132static void igb_start_locked(struct tx_ring *, struct ifnet *ifp);
133#endif
134static int igb_ioctl(struct ifnet *, u_long, caddr_t);
135static uint64_t igb_get_counter(if_t, ift_counter);
136static void igb_init(void *);
137static void igb_init_locked(struct adapter *);
138static void igb_stop(void *);
139static void igb_media_status(struct ifnet *, struct ifmediareq *);
140static int igb_media_change(struct ifnet *);
141static void igb_identify_hardware(struct adapter *);
142static int igb_allocate_pci_resources(struct adapter *);
143static int igb_allocate_msix(struct adapter *);
144static int igb_allocate_legacy(struct adapter *);
145static int igb_setup_msix(struct adapter *);
146static void igb_free_pci_resources(struct adapter *);
147static void igb_local_timer(void *);
148static void igb_reset(struct adapter *);
149static int igb_setup_interface(device_t, struct adapter *);
150static int igb_allocate_queues(struct adapter *);
151static void igb_configure_queues(struct adapter *);
152
153static int igb_allocate_transmit_buffers(struct tx_ring *);
154static void igb_setup_transmit_structures(struct adapter *);
155static void igb_setup_transmit_ring(struct tx_ring *);
156static void igb_initialize_transmit_units(struct adapter *);
157static void igb_free_transmit_structures(struct adapter *);
158static void igb_free_transmit_buffers(struct tx_ring *);
159
160static int igb_allocate_receive_buffers(struct rx_ring *);
161static int igb_setup_receive_structures(struct adapter *);
162static int igb_setup_receive_ring(struct rx_ring *);
163static void igb_initialize_receive_units(struct adapter *);
164static void igb_free_receive_structures(struct adapter *);
165static void igb_free_receive_buffers(struct rx_ring *);
166static void igb_free_receive_ring(struct rx_ring *);
167
168static void igb_enable_intr(struct adapter *);
169static void igb_disable_intr(struct adapter *);
170static void igb_update_stats_counters(struct adapter *);
171static bool igb_txeof(struct tx_ring *);
172
173static __inline void igb_rx_discard(struct rx_ring *, int);
174static __inline void igb_rx_input(struct rx_ring *,
175 struct ifnet *, struct mbuf *, u32);
176
177static bool igb_rxeof(struct igb_queue *, int, int *);
178static void igb_rx_checksum(u32, struct mbuf *, u32);
179static int igb_tx_ctx_setup(struct tx_ring *,
180 struct mbuf *, u32 *, u32 *);
181static int igb_tso_setup(struct tx_ring *,
182 struct mbuf *, u32 *, u32 *);
183static void igb_set_promisc(struct adapter *);
184static void igb_disable_promisc(struct adapter *);
185static void igb_set_multi(struct adapter *);
186static void igb_update_link_status(struct adapter *);
187static void igb_refresh_mbufs(struct rx_ring *, int);
188
189static void igb_register_vlan(void *, struct ifnet *, u16);
190static void igb_unregister_vlan(void *, struct ifnet *, u16);
191static void igb_setup_vlan_hw_support(struct adapter *);
192
193static int igb_xmit(struct tx_ring *, struct mbuf **);
194static int igb_dma_malloc(struct adapter *, bus_size_t,
195 struct igb_dma_alloc *, int);
196static void igb_dma_free(struct adapter *, struct igb_dma_alloc *);
197static int igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
198static void igb_print_nvm_info(struct adapter *);
199static int igb_is_valid_ether_addr(u8 *);
200static void igb_add_hw_stats(struct adapter *);
201
202static void igb_vf_init_stats(struct adapter *);
203static void igb_update_vf_stats_counters(struct adapter *);
204
205/* Management and WOL Support */
206static void igb_init_manageability(struct adapter *);
207static void igb_release_manageability(struct adapter *);
208static void igb_get_hw_control(struct adapter *);
209static void igb_release_hw_control(struct adapter *);
210static void igb_enable_wakeup(device_t);
211static void igb_led_func(void *, int);
212
213static int igb_irq_fast(void *);
214static void igb_msix_que(void *);
215static void igb_msix_link(void *);
216static void igb_handle_que(void *context, int pending);
217static void igb_handle_link(void *context, int pending);
218static void igb_handle_link_locked(struct adapter *);
219
220static void igb_set_sysctl_value(struct adapter *, const char *,
221 const char *, int *, int);
222static int igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
223static int igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
224static int igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
225
226#ifdef DEVICE_POLLING
227static poll_handler_t igb_poll;
228#endif /* POLLING */
229
230/*********************************************************************
231 * FreeBSD Device Interface Entry Points
232 *********************************************************************/
233
234static device_method_t igb_methods[] = {
235 /* Device interface */
236 DEVMETHOD(device_probe, igb_probe),
237 DEVMETHOD(device_attach, igb_attach),
238 DEVMETHOD(device_detach, igb_detach),
239 DEVMETHOD(device_shutdown, igb_shutdown),
240 DEVMETHOD(device_suspend, igb_suspend),
241 DEVMETHOD(device_resume, igb_resume),
242 DEVMETHOD_END
243};
244
245static driver_t igb_driver = {
246 "igb", igb_methods, sizeof(struct adapter),
247};
248
249static devclass_t igb_devclass;
250DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
251MODULE_DEPEND(igb, pci, 1, 1, 1);
252MODULE_DEPEND(igb, ether, 1, 1, 1);
253#ifdef DEV_NETMAP
254MODULE_DEPEND(igb, netmap, 1, 1, 1);
255#endif /* DEV_NETMAP */
256
257/*********************************************************************
258 * Tunable default values.
259 *********************************************************************/
260
261static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
262
263/* Descriptor defaults */
264static int igb_rxd = IGB_DEFAULT_RXD;
265static int igb_txd = IGB_DEFAULT_TXD;
266SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
267 "Number of receive descriptors per queue");
268SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
269 "Number of transmit descriptors per queue");
270
271/*
272** AIM: Adaptive Interrupt Moderation
273** which means that the interrupt rate
274** is varied over time based on the
275** traffic for that interrupt vector
276*/
277static int igb_enable_aim = TRUE;
278SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &igb_enable_aim, 0,
279 "Enable adaptive interrupt moderation");
280
281/*
282 * MSIX should be the default for best performance,
283 * but this allows it to be forced off for testing.
284 */
285static int igb_enable_msix = 1;
286SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
287 "Enable MSI-X interrupts");
288
289/*
290** Tuneable Interrupt rate
291*/
292static int igb_max_interrupt_rate = 8000;
293SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
294 &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
295
296#ifndef IGB_LEGACY_TX
297/*
298** Tuneable number of buffers in the buf-ring (drbr_xxx)
299*/
300static int igb_buf_ring_size = IGB_BR_SIZE;
301SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
302 &igb_buf_ring_size, 0, "Size of the bufring");
303#endif
304
305/*
306** Header split causes the packet header to
307** be dma'd to a seperate mbuf from the payload.
308** this can have memory alignment benefits. But
309** another plus is that small packets often fit
310** into the header and thus use no cluster. Its
311** a very workload dependent type feature.
312*/
313static int igb_header_split = FALSE;
314SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
315 "Enable receive mbuf header split");
316
317/*
318** This will autoconfigure based on the
319** number of CPUs and max supported
320** MSIX messages if left at 0.
321*/
322static int igb_num_queues = 0;
323SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
324 "Number of queues to configure, 0 indicates autoconfigure");
325
326/*
327** Global variable to store last used CPU when binding queues
328** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a
329** queue is bound to a cpu.
330*/
331static int igb_last_bind_cpu = -1;
332
333/* How many packets rxeof tries to clean at a time */
334static int igb_rx_process_limit = 100;
335SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
336 &igb_rx_process_limit, 0,
337 "Maximum number of received packets to process at a time, -1 means unlimited");
338
339/* How many packets txeof tries to clean at a time */
340static int igb_tx_process_limit = -1;
341SYSCTL_INT(_hw_igb, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN,
342 &igb_tx_process_limit, 0,
343 "Maximum number of sent packets to process at a time, -1 means unlimited");
344
345#ifdef DEV_NETMAP /* see ixgbe.c for details */
346#include <dev/netmap/if_igb_netmap.h>
347#endif /* DEV_NETMAP */
348/*********************************************************************
349 * Device identification routine
350 *
351 * igb_probe determines if the driver should be loaded on
352 * adapter based on PCI vendor/device id of the adapter.
353 *
354 * return BUS_PROBE_DEFAULT on success, positive on failure
355 *********************************************************************/
356
357static int
358igb_probe(device_t dev)
359{
360 char adapter_name[256];
361 uint16_t pci_vendor_id = 0;
362 uint16_t pci_device_id = 0;
363 uint16_t pci_subvendor_id = 0;
364 uint16_t pci_subdevice_id = 0;
365 igb_vendor_info_t *ent;
366
367 INIT_DEBUGOUT("igb_probe: begin");
368
369 pci_vendor_id = pci_get_vendor(dev);
370 if (pci_vendor_id != IGB_INTEL_VENDOR_ID)
371 return (ENXIO);
372
373 pci_device_id = pci_get_device(dev);
374 pci_subvendor_id = pci_get_subvendor(dev);
375 pci_subdevice_id = pci_get_subdevice(dev);
376
377 ent = igb_vendor_info_array;
378 while (ent->vendor_id != 0) {
379 if ((pci_vendor_id == ent->vendor_id) &&
380 (pci_device_id == ent->device_id) &&
381
382 ((pci_subvendor_id == ent->subvendor_id) ||
383 (ent->subvendor_id == 0)) &&
384
385 ((pci_subdevice_id == ent->subdevice_id) ||
386 (ent->subdevice_id == 0))) {
387 sprintf(adapter_name, "%s, Version - %s",
388 igb_strings[ent->index],
389 igb_driver_version);
390 device_set_desc_copy(dev, adapter_name);
391 return (BUS_PROBE_DEFAULT);
392 }
393 ent++;
394 }
395 return (ENXIO);
396}
397
398/*********************************************************************
399 * Device initialization routine
400 *
401 * The attach entry point is called when the driver is being loaded.
402 * This routine identifies the type of hardware, allocates all resources
403 * and initializes the hardware.
404 *
405 * return 0 on success, positive on failure
406 *********************************************************************/
407
408static int
409igb_attach(device_t dev)
410{
411 struct adapter *adapter;
412 int error = 0;
413 u16 eeprom_data;
414
415 INIT_DEBUGOUT("igb_attach: begin");
416
417 if (resource_disabled("igb", device_get_unit(dev))) {
418 device_printf(dev, "Disabled by device hint\n");
419 return (ENXIO);
420 }
421
422 adapter = device_get_softc(dev);
423 adapter->dev = adapter->osdep.dev = dev;
424 IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
425
426 /* SYSCTLs */
427 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
428 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
429 OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
430 igb_sysctl_nvm_info, "I", "NVM Information");
431
432 igb_set_sysctl_value(adapter, "enable_aim",
433 "Interrupt Moderation", &adapter->enable_aim,
434 igb_enable_aim);
435
436 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
437 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
438 OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
439 adapter, 0, igb_set_flowcntl, "I", "Flow Control");
440
441 callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
442
443 /* Determine hardware and mac info */
444 igb_identify_hardware(adapter);
445
446 /* Setup PCI resources */
447 if (igb_allocate_pci_resources(adapter)) {
448 device_printf(dev, "Allocation of PCI resources failed\n");
449 error = ENXIO;
450 goto err_pci;
451 }
452
453 /* Do Shared Code initialization */
454 if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
455 device_printf(dev, "Setup of Shared code failed\n");
456 error = ENXIO;
457 goto err_pci;
458 }
459
460 e1000_get_bus_info(&adapter->hw);
461
462 /* Sysctls for limiting the amount of work done in the taskqueues */
463 igb_set_sysctl_value(adapter, "rx_processing_limit",
464 "max number of rx packets to process",
465 &adapter->rx_process_limit, igb_rx_process_limit);
466
467 igb_set_sysctl_value(adapter, "tx_processing_limit",
468 "max number of tx packets to process",
469 &adapter->tx_process_limit, igb_tx_process_limit);
470
471 /*
472 * Validate number of transmit and receive descriptors. It
473 * must not exceed hardware maximum, and must be multiple
474 * of E1000_DBA_ALIGN.
475 */
476 if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
477 (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
478 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
479 IGB_DEFAULT_TXD, igb_txd);
480 adapter->num_tx_desc = IGB_DEFAULT_TXD;
481 } else
482 adapter->num_tx_desc = igb_txd;
483 if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
484 (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
485 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
486 IGB_DEFAULT_RXD, igb_rxd);
487 adapter->num_rx_desc = IGB_DEFAULT_RXD;
488 } else
489 adapter->num_rx_desc = igb_rxd;
490
491 adapter->hw.mac.autoneg = DO_AUTO_NEG;
492 adapter->hw.phy.autoneg_wait_to_complete = FALSE;
493 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
494
495 /* Copper options */
496 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
497 adapter->hw.phy.mdix = AUTO_ALL_MODES;
498 adapter->hw.phy.disable_polarity_correction = FALSE;
499 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
500 }
501
502 /*
503 * Set the frame limits assuming
504 * standard ethernet sized frames.
505 */
506 adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
507
508 /*
509 ** Allocate and Setup Queues
510 */
511 if (igb_allocate_queues(adapter)) {
512 error = ENOMEM;
513 goto err_pci;
514 }
515
516 /* Allocate the appropriate stats memory */
517 if (adapter->vf_ifp) {
518 adapter->stats =
519 (struct e1000_vf_stats *)malloc(sizeof \
520 (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
521 igb_vf_init_stats(adapter);
522 } else
523 adapter->stats =
524 (struct e1000_hw_stats *)malloc(sizeof \
525 (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
526 if (adapter->stats == NULL) {
527 device_printf(dev, "Can not allocate stats memory\n");
528 error = ENOMEM;
529 goto err_late;
530 }
531
532 /* Allocate multicast array memory. */
533 adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
534 MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
535 if (adapter->mta == NULL) {
536 device_printf(dev, "Can not allocate multicast setup array\n");
537 error = ENOMEM;
538 goto err_late;
539 }
540
541 /* Some adapter-specific advanced features */
542 if (adapter->hw.mac.type >= e1000_i350) {
543 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
544 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
545 OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
546 adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
547 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
548 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
549 OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
550 adapter, 0, igb_sysctl_eee, "I",
551 "Disable Energy Efficient Ethernet");
552 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
553 if (adapter->hw.mac.type == e1000_i354)
51
52
53/*********************************************************************
54 * PCI Device ID Table
55 *
56 * Used by probe to select devices to load on
57 * Last field stores an index into e1000_strings
58 * Last entry must be all 0s
59 *
60 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
61 *********************************************************************/
62
63static igb_vendor_info_t igb_vendor_info_array[] =
64{
65 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER, 0, 0, 0},
66 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES, 0, 0, 0},
67 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER, 0, 0, 0},
68 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576, 0, 0, 0},
69 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS, 0, 0, 0},
70 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES, 0, 0, 0},
71 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER, 0, 0, 0},
72 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES, 0, 0, 0},
73 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD, 0, 0, 0},
74 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER, 0, 0, 0},
75 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2, 0, 0, 0},
76 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF, 0, 0, 0},
77 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER, 0, 0, 0},
78 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER, 0, 0, 0},
79 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES, 0, 0, 0},
80 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII, 0, 0, 0},
81 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL, 0, 0, 0},
82 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER, 0, 0, 0},
83 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES, 0, 0, 0},
84 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII, 0, 0, 0},
85 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP, 0, 0, 0},
86 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE, 0, 0, 0},
87 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER, 0, 0, 0},
88 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER, 0, 0, 0},
89 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES, 0, 0, 0},
90 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII, 0, 0, 0},
91 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF, 0, 0, 0},
92 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER, 0, 0, 0},
93 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT, 0, 0, 0},
94 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1, 0, 0, 0},
95 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_FLASHLESS, 0, 0, 0},
96 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES_FLASHLESS, 0, 0, 0},
97 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER, 0, 0, 0},
98 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES, 0, 0, 0},
99 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII, 0, 0, 0},
100 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER, 0, 0, 0},
101 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS, 0, 0, 0},
102 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, 0, 0, 0},
103 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII, 0, 0, 0},
104 /* required last entry */
105 {0, 0, 0, 0, 0}
106};
107
108/*********************************************************************
109 * Table of branding strings for all supported NICs.
110 *********************************************************************/
111
112static char *igb_strings[] = {
113 "Intel(R) PRO/1000 Network Connection"
114};
115
116/*********************************************************************
117 * Function prototypes
118 *********************************************************************/
119static int igb_probe(device_t);
120static int igb_attach(device_t);
121static int igb_detach(device_t);
122static int igb_shutdown(device_t);
123static int igb_suspend(device_t);
124static int igb_resume(device_t);
125#ifndef IGB_LEGACY_TX
126static int igb_mq_start(struct ifnet *, struct mbuf *);
127static int igb_mq_start_locked(struct ifnet *, struct tx_ring *);
128static void igb_qflush(struct ifnet *);
129static void igb_deferred_mq_start(void *, int);
130#else
131static void igb_start(struct ifnet *);
132static void igb_start_locked(struct tx_ring *, struct ifnet *ifp);
133#endif
134static int igb_ioctl(struct ifnet *, u_long, caddr_t);
135static uint64_t igb_get_counter(if_t, ift_counter);
136static void igb_init(void *);
137static void igb_init_locked(struct adapter *);
138static void igb_stop(void *);
139static void igb_media_status(struct ifnet *, struct ifmediareq *);
140static int igb_media_change(struct ifnet *);
141static void igb_identify_hardware(struct adapter *);
142static int igb_allocate_pci_resources(struct adapter *);
143static int igb_allocate_msix(struct adapter *);
144static int igb_allocate_legacy(struct adapter *);
145static int igb_setup_msix(struct adapter *);
146static void igb_free_pci_resources(struct adapter *);
147static void igb_local_timer(void *);
148static void igb_reset(struct adapter *);
149static int igb_setup_interface(device_t, struct adapter *);
150static int igb_allocate_queues(struct adapter *);
151static void igb_configure_queues(struct adapter *);
152
153static int igb_allocate_transmit_buffers(struct tx_ring *);
154static void igb_setup_transmit_structures(struct adapter *);
155static void igb_setup_transmit_ring(struct tx_ring *);
156static void igb_initialize_transmit_units(struct adapter *);
157static void igb_free_transmit_structures(struct adapter *);
158static void igb_free_transmit_buffers(struct tx_ring *);
159
160static int igb_allocate_receive_buffers(struct rx_ring *);
161static int igb_setup_receive_structures(struct adapter *);
162static int igb_setup_receive_ring(struct rx_ring *);
163static void igb_initialize_receive_units(struct adapter *);
164static void igb_free_receive_structures(struct adapter *);
165static void igb_free_receive_buffers(struct rx_ring *);
166static void igb_free_receive_ring(struct rx_ring *);
167
168static void igb_enable_intr(struct adapter *);
169static void igb_disable_intr(struct adapter *);
170static void igb_update_stats_counters(struct adapter *);
171static bool igb_txeof(struct tx_ring *);
172
173static __inline void igb_rx_discard(struct rx_ring *, int);
174static __inline void igb_rx_input(struct rx_ring *,
175 struct ifnet *, struct mbuf *, u32);
176
177static bool igb_rxeof(struct igb_queue *, int, int *);
178static void igb_rx_checksum(u32, struct mbuf *, u32);
179static int igb_tx_ctx_setup(struct tx_ring *,
180 struct mbuf *, u32 *, u32 *);
181static int igb_tso_setup(struct tx_ring *,
182 struct mbuf *, u32 *, u32 *);
183static void igb_set_promisc(struct adapter *);
184static void igb_disable_promisc(struct adapter *);
185static void igb_set_multi(struct adapter *);
186static void igb_update_link_status(struct adapter *);
187static void igb_refresh_mbufs(struct rx_ring *, int);
188
189static void igb_register_vlan(void *, struct ifnet *, u16);
190static void igb_unregister_vlan(void *, struct ifnet *, u16);
191static void igb_setup_vlan_hw_support(struct adapter *);
192
193static int igb_xmit(struct tx_ring *, struct mbuf **);
194static int igb_dma_malloc(struct adapter *, bus_size_t,
195 struct igb_dma_alloc *, int);
196static void igb_dma_free(struct adapter *, struct igb_dma_alloc *);
197static int igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
198static void igb_print_nvm_info(struct adapter *);
199static int igb_is_valid_ether_addr(u8 *);
200static void igb_add_hw_stats(struct adapter *);
201
202static void igb_vf_init_stats(struct adapter *);
203static void igb_update_vf_stats_counters(struct adapter *);
204
205/* Management and WOL Support */
206static void igb_init_manageability(struct adapter *);
207static void igb_release_manageability(struct adapter *);
208static void igb_get_hw_control(struct adapter *);
209static void igb_release_hw_control(struct adapter *);
210static void igb_enable_wakeup(device_t);
211static void igb_led_func(void *, int);
212
213static int igb_irq_fast(void *);
214static void igb_msix_que(void *);
215static void igb_msix_link(void *);
216static void igb_handle_que(void *context, int pending);
217static void igb_handle_link(void *context, int pending);
218static void igb_handle_link_locked(struct adapter *);
219
220static void igb_set_sysctl_value(struct adapter *, const char *,
221 const char *, int *, int);
222static int igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
223static int igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
224static int igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
225
226#ifdef DEVICE_POLLING
227static poll_handler_t igb_poll;
228#endif /* POLLING */
229
230/*********************************************************************
231 * FreeBSD Device Interface Entry Points
232 *********************************************************************/
233
234static device_method_t igb_methods[] = {
235 /* Device interface */
236 DEVMETHOD(device_probe, igb_probe),
237 DEVMETHOD(device_attach, igb_attach),
238 DEVMETHOD(device_detach, igb_detach),
239 DEVMETHOD(device_shutdown, igb_shutdown),
240 DEVMETHOD(device_suspend, igb_suspend),
241 DEVMETHOD(device_resume, igb_resume),
242 DEVMETHOD_END
243};
244
245static driver_t igb_driver = {
246 "igb", igb_methods, sizeof(struct adapter),
247};
248
249static devclass_t igb_devclass;
250DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
251MODULE_DEPEND(igb, pci, 1, 1, 1);
252MODULE_DEPEND(igb, ether, 1, 1, 1);
253#ifdef DEV_NETMAP
254MODULE_DEPEND(igb, netmap, 1, 1, 1);
255#endif /* DEV_NETMAP */
256
257/*********************************************************************
258 * Tunable default values.
259 *********************************************************************/
260
261static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
262
263/* Descriptor defaults */
264static int igb_rxd = IGB_DEFAULT_RXD;
265static int igb_txd = IGB_DEFAULT_TXD;
266SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
267 "Number of receive descriptors per queue");
268SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
269 "Number of transmit descriptors per queue");
270
271/*
272** AIM: Adaptive Interrupt Moderation
273** which means that the interrupt rate
274** is varied over time based on the
275** traffic for that interrupt vector
276*/
277static int igb_enable_aim = TRUE;
278SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &igb_enable_aim, 0,
279 "Enable adaptive interrupt moderation");
280
281/*
282 * MSIX should be the default for best performance,
283 * but this allows it to be forced off for testing.
284 */
285static int igb_enable_msix = 1;
286SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
287 "Enable MSI-X interrupts");
288
289/*
290** Tuneable Interrupt rate
291*/
292static int igb_max_interrupt_rate = 8000;
293SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
294 &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
295
296#ifndef IGB_LEGACY_TX
297/*
298** Tuneable number of buffers in the buf-ring (drbr_xxx)
299*/
300static int igb_buf_ring_size = IGB_BR_SIZE;
301SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
302 &igb_buf_ring_size, 0, "Size of the bufring");
303#endif
304
305/*
306** Header split causes the packet header to
307** be dma'd to a seperate mbuf from the payload.
308** this can have memory alignment benefits. But
309** another plus is that small packets often fit
310** into the header and thus use no cluster. Its
311** a very workload dependent type feature.
312*/
313static int igb_header_split = FALSE;
314SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
315 "Enable receive mbuf header split");
316
317/*
318** This will autoconfigure based on the
319** number of CPUs and max supported
320** MSIX messages if left at 0.
321*/
322static int igb_num_queues = 0;
323SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
324 "Number of queues to configure, 0 indicates autoconfigure");
325
326/*
327** Global variable to store last used CPU when binding queues
328** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a
329** queue is bound to a cpu.
330*/
331static int igb_last_bind_cpu = -1;
332
333/* How many packets rxeof tries to clean at a time */
334static int igb_rx_process_limit = 100;
335SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
336 &igb_rx_process_limit, 0,
337 "Maximum number of received packets to process at a time, -1 means unlimited");
338
339/* How many packets txeof tries to clean at a time */
340static int igb_tx_process_limit = -1;
341SYSCTL_INT(_hw_igb, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN,
342 &igb_tx_process_limit, 0,
343 "Maximum number of sent packets to process at a time, -1 means unlimited");
344
345#ifdef DEV_NETMAP /* see ixgbe.c for details */
346#include <dev/netmap/if_igb_netmap.h>
347#endif /* DEV_NETMAP */
348/*********************************************************************
349 * Device identification routine
350 *
351 * igb_probe determines if the driver should be loaded on
352 * adapter based on PCI vendor/device id of the adapter.
353 *
354 * return BUS_PROBE_DEFAULT on success, positive on failure
355 *********************************************************************/
356
357static int
358igb_probe(device_t dev)
359{
360 char adapter_name[256];
361 uint16_t pci_vendor_id = 0;
362 uint16_t pci_device_id = 0;
363 uint16_t pci_subvendor_id = 0;
364 uint16_t pci_subdevice_id = 0;
365 igb_vendor_info_t *ent;
366
367 INIT_DEBUGOUT("igb_probe: begin");
368
369 pci_vendor_id = pci_get_vendor(dev);
370 if (pci_vendor_id != IGB_INTEL_VENDOR_ID)
371 return (ENXIO);
372
373 pci_device_id = pci_get_device(dev);
374 pci_subvendor_id = pci_get_subvendor(dev);
375 pci_subdevice_id = pci_get_subdevice(dev);
376
377 ent = igb_vendor_info_array;
378 while (ent->vendor_id != 0) {
379 if ((pci_vendor_id == ent->vendor_id) &&
380 (pci_device_id == ent->device_id) &&
381
382 ((pci_subvendor_id == ent->subvendor_id) ||
383 (ent->subvendor_id == 0)) &&
384
385 ((pci_subdevice_id == ent->subdevice_id) ||
386 (ent->subdevice_id == 0))) {
387 sprintf(adapter_name, "%s, Version - %s",
388 igb_strings[ent->index],
389 igb_driver_version);
390 device_set_desc_copy(dev, adapter_name);
391 return (BUS_PROBE_DEFAULT);
392 }
393 ent++;
394 }
395 return (ENXIO);
396}
397
398/*********************************************************************
399 * Device initialization routine
400 *
401 * The attach entry point is called when the driver is being loaded.
402 * This routine identifies the type of hardware, allocates all resources
403 * and initializes the hardware.
404 *
405 * return 0 on success, positive on failure
406 *********************************************************************/
407
408static int
409igb_attach(device_t dev)
410{
411 struct adapter *adapter;
412 int error = 0;
413 u16 eeprom_data;
414
415 INIT_DEBUGOUT("igb_attach: begin");
416
417 if (resource_disabled("igb", device_get_unit(dev))) {
418 device_printf(dev, "Disabled by device hint\n");
419 return (ENXIO);
420 }
421
422 adapter = device_get_softc(dev);
423 adapter->dev = adapter->osdep.dev = dev;
424 IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
425
426 /* SYSCTLs */
427 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
428 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
429 OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
430 igb_sysctl_nvm_info, "I", "NVM Information");
431
432 igb_set_sysctl_value(adapter, "enable_aim",
433 "Interrupt Moderation", &adapter->enable_aim,
434 igb_enable_aim);
435
436 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
437 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
438 OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
439 adapter, 0, igb_set_flowcntl, "I", "Flow Control");
440
441 callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
442
443 /* Determine hardware and mac info */
444 igb_identify_hardware(adapter);
445
446 /* Setup PCI resources */
447 if (igb_allocate_pci_resources(adapter)) {
448 device_printf(dev, "Allocation of PCI resources failed\n");
449 error = ENXIO;
450 goto err_pci;
451 }
452
453 /* Do Shared Code initialization */
454 if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
455 device_printf(dev, "Setup of Shared code failed\n");
456 error = ENXIO;
457 goto err_pci;
458 }
459
460 e1000_get_bus_info(&adapter->hw);
461
462 /* Sysctls for limiting the amount of work done in the taskqueues */
463 igb_set_sysctl_value(adapter, "rx_processing_limit",
464 "max number of rx packets to process",
465 &adapter->rx_process_limit, igb_rx_process_limit);
466
467 igb_set_sysctl_value(adapter, "tx_processing_limit",
468 "max number of tx packets to process",
469 &adapter->tx_process_limit, igb_tx_process_limit);
470
471 /*
472 * Validate number of transmit and receive descriptors. It
473 * must not exceed hardware maximum, and must be multiple
474 * of E1000_DBA_ALIGN.
475 */
476 if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
477 (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
478 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
479 IGB_DEFAULT_TXD, igb_txd);
480 adapter->num_tx_desc = IGB_DEFAULT_TXD;
481 } else
482 adapter->num_tx_desc = igb_txd;
483 if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
484 (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
485 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
486 IGB_DEFAULT_RXD, igb_rxd);
487 adapter->num_rx_desc = IGB_DEFAULT_RXD;
488 } else
489 adapter->num_rx_desc = igb_rxd;
490
491 adapter->hw.mac.autoneg = DO_AUTO_NEG;
492 adapter->hw.phy.autoneg_wait_to_complete = FALSE;
493 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
494
495 /* Copper options */
496 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
497 adapter->hw.phy.mdix = AUTO_ALL_MODES;
498 adapter->hw.phy.disable_polarity_correction = FALSE;
499 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
500 }
501
502 /*
503 * Set the frame limits assuming
504 * standard ethernet sized frames.
505 */
506 adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
507
508 /*
509 ** Allocate and Setup Queues
510 */
511 if (igb_allocate_queues(adapter)) {
512 error = ENOMEM;
513 goto err_pci;
514 }
515
516 /* Allocate the appropriate stats memory */
517 if (adapter->vf_ifp) {
518 adapter->stats =
519 (struct e1000_vf_stats *)malloc(sizeof \
520 (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
521 igb_vf_init_stats(adapter);
522 } else
523 adapter->stats =
524 (struct e1000_hw_stats *)malloc(sizeof \
525 (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
526 if (adapter->stats == NULL) {
527 device_printf(dev, "Can not allocate stats memory\n");
528 error = ENOMEM;
529 goto err_late;
530 }
531
532 /* Allocate multicast array memory. */
533 adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
534 MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
535 if (adapter->mta == NULL) {
536 device_printf(dev, "Can not allocate multicast setup array\n");
537 error = ENOMEM;
538 goto err_late;
539 }
540
541 /* Some adapter-specific advanced features */
542 if (adapter->hw.mac.type >= e1000_i350) {
543 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
544 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
545 OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
546 adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
547 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
548 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
549 OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
550 adapter, 0, igb_sysctl_eee, "I",
551 "Disable Energy Efficient Ethernet");
552 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
553 if (adapter->hw.mac.type == e1000_i354)
554 e1000_set_eee_i354(&adapter->hw);
554 e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
555 else
555 else
556 e1000_set_eee_i350(&adapter->hw);
556 e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
557 }
558 }
559
560 /*
561 ** Start from a known state, this is
562 ** important in reading the nvm and
563 ** mac from that.
564 */
565 e1000_reset_hw(&adapter->hw);
566
567 /* Make sure we have a good EEPROM before we read from it */
568 if (((adapter->hw.mac.type != e1000_i210) &&
569 (adapter->hw.mac.type != e1000_i211)) &&
570 (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
571 /*
572 ** Some PCI-E parts fail the first check due to
573 ** the link being in sleep state, call it again,
574 ** if it fails a second time its a real issue.
575 */
576 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
577 device_printf(dev,
578 "The EEPROM Checksum Is Not Valid\n");
579 error = EIO;
580 goto err_late;
581 }
582 }
583
584 /*
585 ** Copy the permanent MAC address out of the EEPROM
586 */
587 if (e1000_read_mac_addr(&adapter->hw) < 0) {
588 device_printf(dev, "EEPROM read error while reading MAC"
589 " address\n");
590 error = EIO;
591 goto err_late;
592 }
593 /* Check its sanity */
594 if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
595 device_printf(dev, "Invalid MAC address\n");
596 error = EIO;
597 goto err_late;
598 }
599
600 /* Setup OS specific network interface */
601 if (igb_setup_interface(dev, adapter) != 0)
602 goto err_late;
603
604 /* Now get a good starting state */
605 igb_reset(adapter);
606
607 /* Initialize statistics */
608 igb_update_stats_counters(adapter);
609
610 adapter->hw.mac.get_link_status = 1;
611 igb_update_link_status(adapter);
612
613 /* Indicate SOL/IDER usage */
614 if (e1000_check_reset_block(&adapter->hw))
615 device_printf(dev,
616 "PHY reset is blocked due to SOL/IDER session.\n");
617
618 /* Determine if we have to control management hardware */
619 adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
620
621 /*
622 * Setup Wake-on-Lan
623 */
624 /* APME bit in EEPROM is mapped to WUC.APME */
625 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
626 if (eeprom_data)
627 adapter->wol = E1000_WUFC_MAG;
628
629 /* Register for VLAN events */
630 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
631 igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
632 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
633 igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
634
635 igb_add_hw_stats(adapter);
636
637 /* Tell the stack that the interface is not active */
638 adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
639 adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
640
641 adapter->led_dev = led_create(igb_led_func, adapter,
642 device_get_nameunit(dev));
643
644 /*
645 ** Configure Interrupts
646 */
647 if ((adapter->msix > 1) && (igb_enable_msix))
648 error = igb_allocate_msix(adapter);
649 else /* MSI or Legacy */
650 error = igb_allocate_legacy(adapter);
651 if (error)
652 goto err_late;
653
654#ifdef DEV_NETMAP
655 igb_netmap_attach(adapter);
656#endif /* DEV_NETMAP */
657 INIT_DEBUGOUT("igb_attach: end");
658
659 return (0);
660
661err_late:
662 igb_detach(dev);
663 igb_free_transmit_structures(adapter);
664 igb_free_receive_structures(adapter);
665 igb_release_hw_control(adapter);
666err_pci:
667 igb_free_pci_resources(adapter);
668 if (adapter->ifp != NULL)
669 if_free(adapter->ifp);
670 free(adapter->mta, M_DEVBUF);
671 IGB_CORE_LOCK_DESTROY(adapter);
672
673 return (error);
674}
675
676/*********************************************************************
677 * Device removal routine
678 *
679 * The detach entry point is called when the driver is being removed.
680 * This routine stops the adapter and deallocates all the resources
681 * that were allocated for driver operation.
682 *
683 * return 0 on success, positive on failure
684 *********************************************************************/
685
686static int
687igb_detach(device_t dev)
688{
689 struct adapter *adapter = device_get_softc(dev);
690 struct ifnet *ifp = adapter->ifp;
691
692 INIT_DEBUGOUT("igb_detach: begin");
693
694 /* Make sure VLANS are not using driver */
695 if (adapter->ifp->if_vlantrunk != NULL) {
696 device_printf(dev,"Vlan in use, detach first\n");
697 return (EBUSY);
698 }
699
700 ether_ifdetach(adapter->ifp);
701
702 if (adapter->led_dev != NULL)
703 led_destroy(adapter->led_dev);
704
705#ifdef DEVICE_POLLING
706 if (ifp->if_capenable & IFCAP_POLLING)
707 ether_poll_deregister(ifp);
708#endif
709
710 IGB_CORE_LOCK(adapter);
711 adapter->in_detach = 1;
712 igb_stop(adapter);
713 IGB_CORE_UNLOCK(adapter);
714
715 e1000_phy_hw_reset(&adapter->hw);
716
717 /* Give control back to firmware */
718 igb_release_manageability(adapter);
719 igb_release_hw_control(adapter);
720
721 if (adapter->wol) {
722 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
723 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
724 igb_enable_wakeup(dev);
725 }
726
727 /* Unregister VLAN events */
728 if (adapter->vlan_attach != NULL)
729 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
730 if (adapter->vlan_detach != NULL)
731 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
732
733 callout_drain(&adapter->timer);
734
735#ifdef DEV_NETMAP
736 netmap_detach(adapter->ifp);
737#endif /* DEV_NETMAP */
738 igb_free_pci_resources(adapter);
739 bus_generic_detach(dev);
740 if_free(ifp);
741
742 igb_free_transmit_structures(adapter);
743 igb_free_receive_structures(adapter);
744 if (adapter->mta != NULL)
745 free(adapter->mta, M_DEVBUF);
746
747 IGB_CORE_LOCK_DESTROY(adapter);
748
749 return (0);
750}
751
752/*********************************************************************
753 *
754 * Shutdown entry point
755 *
756 **********************************************************************/
757
758static int
759igb_shutdown(device_t dev)
760{
761 return igb_suspend(dev);
762}
763
764/*
765 * Suspend/resume device methods.
766 */
767static int
768igb_suspend(device_t dev)
769{
770 struct adapter *adapter = device_get_softc(dev);
771
772 IGB_CORE_LOCK(adapter);
773
774 igb_stop(adapter);
775
776 igb_release_manageability(adapter);
777 igb_release_hw_control(adapter);
778
779 if (adapter->wol) {
780 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
781 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
782 igb_enable_wakeup(dev);
783 }
784
785 IGB_CORE_UNLOCK(adapter);
786
787 return bus_generic_suspend(dev);
788}
789
790static int
791igb_resume(device_t dev)
792{
793 struct adapter *adapter = device_get_softc(dev);
794 struct tx_ring *txr = adapter->tx_rings;
795 struct ifnet *ifp = adapter->ifp;
796
797 IGB_CORE_LOCK(adapter);
798 igb_init_locked(adapter);
799 igb_init_manageability(adapter);
800
801 if ((ifp->if_flags & IFF_UP) &&
802 (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
803 for (int i = 0; i < adapter->num_queues; i++, txr++) {
804 IGB_TX_LOCK(txr);
805#ifndef IGB_LEGACY_TX
806 /* Process the stack queue only if not depleted */
807 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
808 !drbr_empty(ifp, txr->br))
809 igb_mq_start_locked(ifp, txr);
810#else
811 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
812 igb_start_locked(txr, ifp);
813#endif
814 IGB_TX_UNLOCK(txr);
815 }
816 }
817 IGB_CORE_UNLOCK(adapter);
818
819 return bus_generic_resume(dev);
820}
821
822
823#ifdef IGB_LEGACY_TX
824
825/*********************************************************************
826 * Transmit entry point
827 *
828 * igb_start is called by the stack to initiate a transmit.
829 * The driver will remain in this routine as long as there are
830 * packets to transmit and transmit resources are available.
831 * In case resources are not available stack is notified and
832 * the packet is requeued.
833 **********************************************************************/
834
835static void
836igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
837{
838 struct adapter *adapter = ifp->if_softc;
839 struct mbuf *m_head;
840
841 IGB_TX_LOCK_ASSERT(txr);
842
843 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
844 IFF_DRV_RUNNING)
845 return;
846 if (!adapter->link_active)
847 return;
848
849 /* Call cleanup if number of TX descriptors low */
850 if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
851 igb_txeof(txr);
852
853 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
854 if (txr->tx_avail <= IGB_MAX_SCATTER) {
855 txr->queue_status |= IGB_QUEUE_DEPLETED;
856 break;
857 }
858 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
859 if (m_head == NULL)
860 break;
861 /*
862 * Encapsulation can modify our pointer, and or make it
863 * NULL on failure. In that event, we can't requeue.
864 */
865 if (igb_xmit(txr, &m_head)) {
866 if (m_head != NULL)
867 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
868 if (txr->tx_avail <= IGB_MAX_SCATTER)
869 txr->queue_status |= IGB_QUEUE_DEPLETED;
870 break;
871 }
872
873 /* Send a copy of the frame to the BPF listener */
874 ETHER_BPF_MTAP(ifp, m_head);
875
876 /* Set watchdog on */
877 txr->watchdog_time = ticks;
878 txr->queue_status |= IGB_QUEUE_WORKING;
879 }
880}
881
882/*
883 * Legacy TX driver routine, called from the
884 * stack, always uses tx[0], and spins for it.
885 * Should not be used with multiqueue tx
886 */
887static void
888igb_start(struct ifnet *ifp)
889{
890 struct adapter *adapter = ifp->if_softc;
891 struct tx_ring *txr = adapter->tx_rings;
892
893 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
894 IGB_TX_LOCK(txr);
895 igb_start_locked(txr, ifp);
896 IGB_TX_UNLOCK(txr);
897 }
898 return;
899}
900
901#else /* ~IGB_LEGACY_TX */
902
903/*
904** Multiqueue Transmit Entry:
905** quick turnaround to the stack
906**
907*/
908static int
909igb_mq_start(struct ifnet *ifp, struct mbuf *m)
910{
911 struct adapter *adapter = ifp->if_softc;
912 struct igb_queue *que;
913 struct tx_ring *txr;
914 int i, err = 0;
915#ifdef RSS
916 uint32_t bucket_id;
917#endif
918
919 /* Which queue to use */
920 /*
921 * When doing RSS, map it to the same outbound queue
922 * as the incoming flow would be mapped to.
923 *
924 * If everything is setup correctly, it should be the
925 * same bucket that the current CPU we're on is.
926 */
927 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
928#ifdef RSS
929 if (rss_hash2bucket(m->m_pkthdr.flowid,
930 M_HASHTYPE_GET(m), &bucket_id) == 0) {
931 /* XXX TODO: spit out something if bucket_id > num_queues? */
932 i = bucket_id % adapter->num_queues;
933 } else {
934#endif
935 i = m->m_pkthdr.flowid % adapter->num_queues;
936#ifdef RSS
937 }
938#endif
939 } else {
940 i = curcpu % adapter->num_queues;
941 }
942 txr = &adapter->tx_rings[i];
943 que = &adapter->queues[i];
944
945 err = drbr_enqueue(ifp, txr->br, m);
946 if (err)
947 return (err);
948 if (IGB_TX_TRYLOCK(txr)) {
949 igb_mq_start_locked(ifp, txr);
950 IGB_TX_UNLOCK(txr);
951 } else
952 taskqueue_enqueue(que->tq, &txr->txq_task);
953
954 return (0);
955}
956
957static int
958igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
959{
960 struct adapter *adapter = txr->adapter;
961 struct mbuf *next;
962 int err = 0, enq = 0;
963
964 IGB_TX_LOCK_ASSERT(txr);
965
966 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
967 adapter->link_active == 0)
968 return (ENETDOWN);
969
970 /* Process the queue */
971 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
972 if ((err = igb_xmit(txr, &next)) != 0) {
973 if (next == NULL) {
974 /* It was freed, move forward */
975 drbr_advance(ifp, txr->br);
976 } else {
977 /*
978 * Still have one left, it may not be
979 * the same since the transmit function
980 * may have changed it.
981 */
982 drbr_putback(ifp, txr->br, next);
983 }
984 break;
985 }
986 drbr_advance(ifp, txr->br);
987 enq++;
988 if (next->m_flags & M_MCAST && adapter->vf_ifp)
989 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
990 ETHER_BPF_MTAP(ifp, next);
991 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
992 break;
993 }
994 if (enq > 0) {
995 /* Set the watchdog */
996 txr->queue_status |= IGB_QUEUE_WORKING;
997 txr->watchdog_time = ticks;
998 }
999 if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1000 igb_txeof(txr);
1001 if (txr->tx_avail <= IGB_MAX_SCATTER)
1002 txr->queue_status |= IGB_QUEUE_DEPLETED;
1003 return (err);
1004}
1005
1006/*
1007 * Called from a taskqueue to drain queued transmit packets.
1008 */
1009static void
1010igb_deferred_mq_start(void *arg, int pending)
1011{
1012 struct tx_ring *txr = arg;
1013 struct adapter *adapter = txr->adapter;
1014 struct ifnet *ifp = adapter->ifp;
1015
1016 IGB_TX_LOCK(txr);
1017 if (!drbr_empty(ifp, txr->br))
1018 igb_mq_start_locked(ifp, txr);
1019 IGB_TX_UNLOCK(txr);
1020}
1021
1022/*
1023** Flush all ring buffers
1024*/
1025static void
1026igb_qflush(struct ifnet *ifp)
1027{
1028 struct adapter *adapter = ifp->if_softc;
1029 struct tx_ring *txr = adapter->tx_rings;
1030 struct mbuf *m;
1031
1032 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1033 IGB_TX_LOCK(txr);
1034 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1035 m_freem(m);
1036 IGB_TX_UNLOCK(txr);
1037 }
1038 if_qflush(ifp);
1039}
1040#endif /* ~IGB_LEGACY_TX */
1041
1042/*********************************************************************
1043 * Ioctl entry point
1044 *
1045 * igb_ioctl is called when the user wants to configure the
1046 * interface.
1047 *
1048 * return 0 on success, positive on failure
1049 **********************************************************************/
1050
1051static int
1052igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1053{
1054 struct adapter *adapter = ifp->if_softc;
1055 struct ifreq *ifr = (struct ifreq *)data;
1056#if defined(INET) || defined(INET6)
1057 struct ifaddr *ifa = (struct ifaddr *)data;
1058#endif
1059 bool avoid_reset = FALSE;
1060 int error = 0;
1061
1062 if (adapter->in_detach)
1063 return (error);
1064
1065 switch (command) {
1066 case SIOCSIFADDR:
1067#ifdef INET
1068 if (ifa->ifa_addr->sa_family == AF_INET)
1069 avoid_reset = TRUE;
1070#endif
1071#ifdef INET6
1072 if (ifa->ifa_addr->sa_family == AF_INET6)
1073 avoid_reset = TRUE;
1074#endif
1075 /*
1076 ** Calling init results in link renegotiation,
1077 ** so we avoid doing it when possible.
1078 */
1079 if (avoid_reset) {
1080 ifp->if_flags |= IFF_UP;
1081 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1082 igb_init(adapter);
1083#ifdef INET
1084 if (!(ifp->if_flags & IFF_NOARP))
1085 arp_ifinit(ifp, ifa);
1086#endif
1087 } else
1088 error = ether_ioctl(ifp, command, data);
1089 break;
1090 case SIOCSIFMTU:
1091 {
1092 int max_frame_size;
1093
1094 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1095
1096 IGB_CORE_LOCK(adapter);
1097 max_frame_size = 9234;
1098 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1099 ETHER_CRC_LEN) {
1100 IGB_CORE_UNLOCK(adapter);
1101 error = EINVAL;
1102 break;
1103 }
1104
1105 ifp->if_mtu = ifr->ifr_mtu;
1106 adapter->max_frame_size =
1107 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1108 igb_init_locked(adapter);
1109 IGB_CORE_UNLOCK(adapter);
1110 break;
1111 }
1112 case SIOCSIFFLAGS:
1113 IOCTL_DEBUGOUT("ioctl rcv'd:\
1114 SIOCSIFFLAGS (Set Interface Flags)");
1115 IGB_CORE_LOCK(adapter);
1116 if (ifp->if_flags & IFF_UP) {
1117 if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1118 if ((ifp->if_flags ^ adapter->if_flags) &
1119 (IFF_PROMISC | IFF_ALLMULTI)) {
1120 igb_disable_promisc(adapter);
1121 igb_set_promisc(adapter);
1122 }
1123 } else
1124 igb_init_locked(adapter);
1125 } else
1126 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1127 igb_stop(adapter);
1128 adapter->if_flags = ifp->if_flags;
1129 IGB_CORE_UNLOCK(adapter);
1130 break;
1131 case SIOCADDMULTI:
1132 case SIOCDELMULTI:
1133 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1134 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1135 IGB_CORE_LOCK(adapter);
1136 igb_disable_intr(adapter);
1137 igb_set_multi(adapter);
1138#ifdef DEVICE_POLLING
1139 if (!(ifp->if_capenable & IFCAP_POLLING))
1140#endif
1141 igb_enable_intr(adapter);
1142 IGB_CORE_UNLOCK(adapter);
1143 }
1144 break;
1145 case SIOCSIFMEDIA:
1146 /* Check SOL/IDER usage */
1147 IGB_CORE_LOCK(adapter);
1148 if (e1000_check_reset_block(&adapter->hw)) {
1149 IGB_CORE_UNLOCK(adapter);
1150 device_printf(adapter->dev, "Media change is"
1151 " blocked due to SOL/IDER session.\n");
1152 break;
1153 }
1154 IGB_CORE_UNLOCK(adapter);
1155 case SIOCGIFMEDIA:
1156 IOCTL_DEBUGOUT("ioctl rcv'd: \
1157 SIOCxIFMEDIA (Get/Set Interface Media)");
1158 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1159 break;
1160 case SIOCSIFCAP:
1161 {
1162 int mask, reinit;
1163
1164 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1165 reinit = 0;
1166 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1167#ifdef DEVICE_POLLING
1168 if (mask & IFCAP_POLLING) {
1169 if (ifr->ifr_reqcap & IFCAP_POLLING) {
1170 error = ether_poll_register(igb_poll, ifp);
1171 if (error)
1172 return (error);
1173 IGB_CORE_LOCK(adapter);
1174 igb_disable_intr(adapter);
1175 ifp->if_capenable |= IFCAP_POLLING;
1176 IGB_CORE_UNLOCK(adapter);
1177 } else {
1178 error = ether_poll_deregister(ifp);
1179 /* Enable interrupt even in error case */
1180 IGB_CORE_LOCK(adapter);
1181 igb_enable_intr(adapter);
1182 ifp->if_capenable &= ~IFCAP_POLLING;
1183 IGB_CORE_UNLOCK(adapter);
1184 }
1185 }
1186#endif
1187 if (mask & IFCAP_HWCSUM) {
1188 ifp->if_capenable ^= IFCAP_HWCSUM;
1189 reinit = 1;
1190 }
1191 if (mask & IFCAP_TSO4) {
1192 ifp->if_capenable ^= IFCAP_TSO4;
1193 reinit = 1;
1194 }
1195 if (mask & IFCAP_TSO6) {
1196 ifp->if_capenable ^= IFCAP_TSO6;
1197 reinit = 1;
1198 }
1199 if (mask & IFCAP_VLAN_HWTAGGING) {
1200 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1201 reinit = 1;
1202 }
1203 if (mask & IFCAP_VLAN_HWFILTER) {
1204 ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1205 reinit = 1;
1206 }
1207 if (mask & IFCAP_VLAN_HWTSO) {
1208 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1209 reinit = 1;
1210 }
1211 if (mask & IFCAP_LRO) {
1212 ifp->if_capenable ^= IFCAP_LRO;
1213 reinit = 1;
1214 }
1215 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1216 igb_init(adapter);
1217 VLAN_CAPABILITIES(ifp);
1218 break;
1219 }
1220
1221 default:
1222 error = ether_ioctl(ifp, command, data);
1223 break;
1224 }
1225
1226 return (error);
1227}
1228
1229
1230/*********************************************************************
1231 * Init entry point
1232 *
1233 * This routine is used in two ways. It is used by the stack as
1234 * init entry point in network interface structure. It is also used
1235 * by the driver as a hw/sw initialization routine to get to a
1236 * consistent state.
1237 *
1238 * return 0 on success, positive on failure
1239 **********************************************************************/
1240
1241static void
1242igb_init_locked(struct adapter *adapter)
1243{
1244 struct ifnet *ifp = adapter->ifp;
1245 device_t dev = adapter->dev;
1246
1247 INIT_DEBUGOUT("igb_init: begin");
1248
1249 IGB_CORE_LOCK_ASSERT(adapter);
1250
1251 igb_disable_intr(adapter);
1252 callout_stop(&adapter->timer);
1253
1254 /* Get the latest mac address, User can use a LAA */
1255 bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1256 ETHER_ADDR_LEN);
1257
1258 /* Put the address into the Receive Address Array */
1259 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1260
1261 igb_reset(adapter);
1262 igb_update_link_status(adapter);
1263
1264 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1265
1266 /* Set hardware offload abilities */
1267 ifp->if_hwassist = 0;
1268 if (ifp->if_capenable & IFCAP_TXCSUM) {
1269 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1270#if __FreeBSD_version >= 800000
1271 if ((adapter->hw.mac.type == e1000_82576) ||
1272 (adapter->hw.mac.type == e1000_82580))
1273 ifp->if_hwassist |= CSUM_SCTP;
1274#endif
1275 }
1276
1277 if (ifp->if_capenable & IFCAP_TSO)
1278 ifp->if_hwassist |= CSUM_TSO;
1279
557 }
558 }
559
560 /*
561 ** Start from a known state, this is
562 ** important in reading the nvm and
563 ** mac from that.
564 */
565 e1000_reset_hw(&adapter->hw);
566
567 /* Make sure we have a good EEPROM before we read from it */
568 if (((adapter->hw.mac.type != e1000_i210) &&
569 (adapter->hw.mac.type != e1000_i211)) &&
570 (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
571 /*
572 ** Some PCI-E parts fail the first check due to
573 ** the link being in sleep state, call it again,
574 ** if it fails a second time its a real issue.
575 */
576 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
577 device_printf(dev,
578 "The EEPROM Checksum Is Not Valid\n");
579 error = EIO;
580 goto err_late;
581 }
582 }
583
584 /*
585 ** Copy the permanent MAC address out of the EEPROM
586 */
587 if (e1000_read_mac_addr(&adapter->hw) < 0) {
588 device_printf(dev, "EEPROM read error while reading MAC"
589 " address\n");
590 error = EIO;
591 goto err_late;
592 }
593 /* Check its sanity */
594 if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
595 device_printf(dev, "Invalid MAC address\n");
596 error = EIO;
597 goto err_late;
598 }
599
600 /* Setup OS specific network interface */
601 if (igb_setup_interface(dev, adapter) != 0)
602 goto err_late;
603
604 /* Now get a good starting state */
605 igb_reset(adapter);
606
607 /* Initialize statistics */
608 igb_update_stats_counters(adapter);
609
610 adapter->hw.mac.get_link_status = 1;
611 igb_update_link_status(adapter);
612
613 /* Indicate SOL/IDER usage */
614 if (e1000_check_reset_block(&adapter->hw))
615 device_printf(dev,
616 "PHY reset is blocked due to SOL/IDER session.\n");
617
618 /* Determine if we have to control management hardware */
619 adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
620
621 /*
622 * Setup Wake-on-Lan
623 */
624 /* APME bit in EEPROM is mapped to WUC.APME */
625 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
626 if (eeprom_data)
627 adapter->wol = E1000_WUFC_MAG;
628
629 /* Register for VLAN events */
630 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
631 igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
632 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
633 igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
634
635 igb_add_hw_stats(adapter);
636
637 /* Tell the stack that the interface is not active */
638 adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
639 adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
640
641 adapter->led_dev = led_create(igb_led_func, adapter,
642 device_get_nameunit(dev));
643
644 /*
645 ** Configure Interrupts
646 */
647 if ((adapter->msix > 1) && (igb_enable_msix))
648 error = igb_allocate_msix(adapter);
649 else /* MSI or Legacy */
650 error = igb_allocate_legacy(adapter);
651 if (error)
652 goto err_late;
653
654#ifdef DEV_NETMAP
655 igb_netmap_attach(adapter);
656#endif /* DEV_NETMAP */
657 INIT_DEBUGOUT("igb_attach: end");
658
659 return (0);
660
661err_late:
662 igb_detach(dev);
663 igb_free_transmit_structures(adapter);
664 igb_free_receive_structures(adapter);
665 igb_release_hw_control(adapter);
666err_pci:
667 igb_free_pci_resources(adapter);
668 if (adapter->ifp != NULL)
669 if_free(adapter->ifp);
670 free(adapter->mta, M_DEVBUF);
671 IGB_CORE_LOCK_DESTROY(adapter);
672
673 return (error);
674}
675
676/*********************************************************************
677 * Device removal routine
678 *
679 * The detach entry point is called when the driver is being removed.
680 * This routine stops the adapter and deallocates all the resources
681 * that were allocated for driver operation.
682 *
683 * return 0 on success, positive on failure
684 *********************************************************************/
685
686static int
687igb_detach(device_t dev)
688{
689 struct adapter *adapter = device_get_softc(dev);
690 struct ifnet *ifp = adapter->ifp;
691
692 INIT_DEBUGOUT("igb_detach: begin");
693
694 /* Make sure VLANS are not using driver */
695 if (adapter->ifp->if_vlantrunk != NULL) {
696 device_printf(dev,"Vlan in use, detach first\n");
697 return (EBUSY);
698 }
699
700 ether_ifdetach(adapter->ifp);
701
702 if (adapter->led_dev != NULL)
703 led_destroy(adapter->led_dev);
704
705#ifdef DEVICE_POLLING
706 if (ifp->if_capenable & IFCAP_POLLING)
707 ether_poll_deregister(ifp);
708#endif
709
710 IGB_CORE_LOCK(adapter);
711 adapter->in_detach = 1;
712 igb_stop(adapter);
713 IGB_CORE_UNLOCK(adapter);
714
715 e1000_phy_hw_reset(&adapter->hw);
716
717 /* Give control back to firmware */
718 igb_release_manageability(adapter);
719 igb_release_hw_control(adapter);
720
721 if (adapter->wol) {
722 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
723 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
724 igb_enable_wakeup(dev);
725 }
726
727 /* Unregister VLAN events */
728 if (adapter->vlan_attach != NULL)
729 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
730 if (adapter->vlan_detach != NULL)
731 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
732
733 callout_drain(&adapter->timer);
734
735#ifdef DEV_NETMAP
736 netmap_detach(adapter->ifp);
737#endif /* DEV_NETMAP */
738 igb_free_pci_resources(adapter);
739 bus_generic_detach(dev);
740 if_free(ifp);
741
742 igb_free_transmit_structures(adapter);
743 igb_free_receive_structures(adapter);
744 if (adapter->mta != NULL)
745 free(adapter->mta, M_DEVBUF);
746
747 IGB_CORE_LOCK_DESTROY(adapter);
748
749 return (0);
750}
751
752/*********************************************************************
753 *
754 * Shutdown entry point
755 *
756 **********************************************************************/
757
758static int
759igb_shutdown(device_t dev)
760{
761 return igb_suspend(dev);
762}
763
764/*
765 * Suspend/resume device methods.
766 */
767static int
768igb_suspend(device_t dev)
769{
770 struct adapter *adapter = device_get_softc(dev);
771
772 IGB_CORE_LOCK(adapter);
773
774 igb_stop(adapter);
775
776 igb_release_manageability(adapter);
777 igb_release_hw_control(adapter);
778
779 if (adapter->wol) {
780 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
781 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
782 igb_enable_wakeup(dev);
783 }
784
785 IGB_CORE_UNLOCK(adapter);
786
787 return bus_generic_suspend(dev);
788}
789
790static int
791igb_resume(device_t dev)
792{
793 struct adapter *adapter = device_get_softc(dev);
794 struct tx_ring *txr = adapter->tx_rings;
795 struct ifnet *ifp = adapter->ifp;
796
797 IGB_CORE_LOCK(adapter);
798 igb_init_locked(adapter);
799 igb_init_manageability(adapter);
800
801 if ((ifp->if_flags & IFF_UP) &&
802 (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
803 for (int i = 0; i < adapter->num_queues; i++, txr++) {
804 IGB_TX_LOCK(txr);
805#ifndef IGB_LEGACY_TX
806 /* Process the stack queue only if not depleted */
807 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
808 !drbr_empty(ifp, txr->br))
809 igb_mq_start_locked(ifp, txr);
810#else
811 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
812 igb_start_locked(txr, ifp);
813#endif
814 IGB_TX_UNLOCK(txr);
815 }
816 }
817 IGB_CORE_UNLOCK(adapter);
818
819 return bus_generic_resume(dev);
820}
821
822
823#ifdef IGB_LEGACY_TX
824
825/*********************************************************************
826 * Transmit entry point
827 *
828 * igb_start is called by the stack to initiate a transmit.
829 * The driver will remain in this routine as long as there are
830 * packets to transmit and transmit resources are available.
831 * In case resources are not available stack is notified and
832 * the packet is requeued.
833 **********************************************************************/
834
835static void
836igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
837{
838 struct adapter *adapter = ifp->if_softc;
839 struct mbuf *m_head;
840
841 IGB_TX_LOCK_ASSERT(txr);
842
843 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
844 IFF_DRV_RUNNING)
845 return;
846 if (!adapter->link_active)
847 return;
848
849 /* Call cleanup if number of TX descriptors low */
850 if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
851 igb_txeof(txr);
852
853 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
854 if (txr->tx_avail <= IGB_MAX_SCATTER) {
855 txr->queue_status |= IGB_QUEUE_DEPLETED;
856 break;
857 }
858 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
859 if (m_head == NULL)
860 break;
861 /*
862 * Encapsulation can modify our pointer, and or make it
863 * NULL on failure. In that event, we can't requeue.
864 */
865 if (igb_xmit(txr, &m_head)) {
866 if (m_head != NULL)
867 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
868 if (txr->tx_avail <= IGB_MAX_SCATTER)
869 txr->queue_status |= IGB_QUEUE_DEPLETED;
870 break;
871 }
872
873 /* Send a copy of the frame to the BPF listener */
874 ETHER_BPF_MTAP(ifp, m_head);
875
876 /* Set watchdog on */
877 txr->watchdog_time = ticks;
878 txr->queue_status |= IGB_QUEUE_WORKING;
879 }
880}
881
882/*
883 * Legacy TX driver routine, called from the
884 * stack, always uses tx[0], and spins for it.
885 * Should not be used with multiqueue tx
886 */
887static void
888igb_start(struct ifnet *ifp)
889{
890 struct adapter *adapter = ifp->if_softc;
891 struct tx_ring *txr = adapter->tx_rings;
892
893 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
894 IGB_TX_LOCK(txr);
895 igb_start_locked(txr, ifp);
896 IGB_TX_UNLOCK(txr);
897 }
898 return;
899}
900
901#else /* ~IGB_LEGACY_TX */
902
903/*
904** Multiqueue Transmit Entry:
905** quick turnaround to the stack
906**
907*/
908static int
909igb_mq_start(struct ifnet *ifp, struct mbuf *m)
910{
911 struct adapter *adapter = ifp->if_softc;
912 struct igb_queue *que;
913 struct tx_ring *txr;
914 int i, err = 0;
915#ifdef RSS
916 uint32_t bucket_id;
917#endif
918
919 /* Which queue to use */
920 /*
921 * When doing RSS, map it to the same outbound queue
922 * as the incoming flow would be mapped to.
923 *
924 * If everything is setup correctly, it should be the
925 * same bucket that the current CPU we're on is.
926 */
927 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
928#ifdef RSS
929 if (rss_hash2bucket(m->m_pkthdr.flowid,
930 M_HASHTYPE_GET(m), &bucket_id) == 0) {
931 /* XXX TODO: spit out something if bucket_id > num_queues? */
932 i = bucket_id % adapter->num_queues;
933 } else {
934#endif
935 i = m->m_pkthdr.flowid % adapter->num_queues;
936#ifdef RSS
937 }
938#endif
939 } else {
940 i = curcpu % adapter->num_queues;
941 }
942 txr = &adapter->tx_rings[i];
943 que = &adapter->queues[i];
944
945 err = drbr_enqueue(ifp, txr->br, m);
946 if (err)
947 return (err);
948 if (IGB_TX_TRYLOCK(txr)) {
949 igb_mq_start_locked(ifp, txr);
950 IGB_TX_UNLOCK(txr);
951 } else
952 taskqueue_enqueue(que->tq, &txr->txq_task);
953
954 return (0);
955}
956
957static int
958igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
959{
960 struct adapter *adapter = txr->adapter;
961 struct mbuf *next;
962 int err = 0, enq = 0;
963
964 IGB_TX_LOCK_ASSERT(txr);
965
966 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
967 adapter->link_active == 0)
968 return (ENETDOWN);
969
970 /* Process the queue */
971 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
972 if ((err = igb_xmit(txr, &next)) != 0) {
973 if (next == NULL) {
974 /* It was freed, move forward */
975 drbr_advance(ifp, txr->br);
976 } else {
977 /*
978 * Still have one left, it may not be
979 * the same since the transmit function
980 * may have changed it.
981 */
982 drbr_putback(ifp, txr->br, next);
983 }
984 break;
985 }
986 drbr_advance(ifp, txr->br);
987 enq++;
988 if (next->m_flags & M_MCAST && adapter->vf_ifp)
989 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
990 ETHER_BPF_MTAP(ifp, next);
991 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
992 break;
993 }
994 if (enq > 0) {
995 /* Set the watchdog */
996 txr->queue_status |= IGB_QUEUE_WORKING;
997 txr->watchdog_time = ticks;
998 }
999 if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1000 igb_txeof(txr);
1001 if (txr->tx_avail <= IGB_MAX_SCATTER)
1002 txr->queue_status |= IGB_QUEUE_DEPLETED;
1003 return (err);
1004}
1005
1006/*
1007 * Called from a taskqueue to drain queued transmit packets.
1008 */
1009static void
1010igb_deferred_mq_start(void *arg, int pending)
1011{
1012 struct tx_ring *txr = arg;
1013 struct adapter *adapter = txr->adapter;
1014 struct ifnet *ifp = adapter->ifp;
1015
1016 IGB_TX_LOCK(txr);
1017 if (!drbr_empty(ifp, txr->br))
1018 igb_mq_start_locked(ifp, txr);
1019 IGB_TX_UNLOCK(txr);
1020}
1021
1022/*
1023** Flush all ring buffers
1024*/
1025static void
1026igb_qflush(struct ifnet *ifp)
1027{
1028 struct adapter *adapter = ifp->if_softc;
1029 struct tx_ring *txr = adapter->tx_rings;
1030 struct mbuf *m;
1031
1032 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1033 IGB_TX_LOCK(txr);
1034 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1035 m_freem(m);
1036 IGB_TX_UNLOCK(txr);
1037 }
1038 if_qflush(ifp);
1039}
1040#endif /* ~IGB_LEGACY_TX */
1041
1042/*********************************************************************
1043 * Ioctl entry point
1044 *
1045 * igb_ioctl is called when the user wants to configure the
1046 * interface.
1047 *
1048 * return 0 on success, positive on failure
1049 **********************************************************************/
1050
1051static int
1052igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1053{
1054 struct adapter *adapter = ifp->if_softc;
1055 struct ifreq *ifr = (struct ifreq *)data;
1056#if defined(INET) || defined(INET6)
1057 struct ifaddr *ifa = (struct ifaddr *)data;
1058#endif
1059 bool avoid_reset = FALSE;
1060 int error = 0;
1061
1062 if (adapter->in_detach)
1063 return (error);
1064
1065 switch (command) {
1066 case SIOCSIFADDR:
1067#ifdef INET
1068 if (ifa->ifa_addr->sa_family == AF_INET)
1069 avoid_reset = TRUE;
1070#endif
1071#ifdef INET6
1072 if (ifa->ifa_addr->sa_family == AF_INET6)
1073 avoid_reset = TRUE;
1074#endif
1075 /*
1076 ** Calling init results in link renegotiation,
1077 ** so we avoid doing it when possible.
1078 */
1079 if (avoid_reset) {
1080 ifp->if_flags |= IFF_UP;
1081 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1082 igb_init(adapter);
1083#ifdef INET
1084 if (!(ifp->if_flags & IFF_NOARP))
1085 arp_ifinit(ifp, ifa);
1086#endif
1087 } else
1088 error = ether_ioctl(ifp, command, data);
1089 break;
1090 case SIOCSIFMTU:
1091 {
1092 int max_frame_size;
1093
1094 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1095
1096 IGB_CORE_LOCK(adapter);
1097 max_frame_size = 9234;
1098 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1099 ETHER_CRC_LEN) {
1100 IGB_CORE_UNLOCK(adapter);
1101 error = EINVAL;
1102 break;
1103 }
1104
1105 ifp->if_mtu = ifr->ifr_mtu;
1106 adapter->max_frame_size =
1107 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1108 igb_init_locked(adapter);
1109 IGB_CORE_UNLOCK(adapter);
1110 break;
1111 }
1112 case SIOCSIFFLAGS:
1113 IOCTL_DEBUGOUT("ioctl rcv'd:\
1114 SIOCSIFFLAGS (Set Interface Flags)");
1115 IGB_CORE_LOCK(adapter);
1116 if (ifp->if_flags & IFF_UP) {
1117 if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1118 if ((ifp->if_flags ^ adapter->if_flags) &
1119 (IFF_PROMISC | IFF_ALLMULTI)) {
1120 igb_disable_promisc(adapter);
1121 igb_set_promisc(adapter);
1122 }
1123 } else
1124 igb_init_locked(adapter);
1125 } else
1126 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1127 igb_stop(adapter);
1128 adapter->if_flags = ifp->if_flags;
1129 IGB_CORE_UNLOCK(adapter);
1130 break;
1131 case SIOCADDMULTI:
1132 case SIOCDELMULTI:
1133 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1134 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1135 IGB_CORE_LOCK(adapter);
1136 igb_disable_intr(adapter);
1137 igb_set_multi(adapter);
1138#ifdef DEVICE_POLLING
1139 if (!(ifp->if_capenable & IFCAP_POLLING))
1140#endif
1141 igb_enable_intr(adapter);
1142 IGB_CORE_UNLOCK(adapter);
1143 }
1144 break;
1145 case SIOCSIFMEDIA:
1146 /* Check SOL/IDER usage */
1147 IGB_CORE_LOCK(adapter);
1148 if (e1000_check_reset_block(&adapter->hw)) {
1149 IGB_CORE_UNLOCK(adapter);
1150 device_printf(adapter->dev, "Media change is"
1151 " blocked due to SOL/IDER session.\n");
1152 break;
1153 }
1154 IGB_CORE_UNLOCK(adapter);
1155 case SIOCGIFMEDIA:
1156 IOCTL_DEBUGOUT("ioctl rcv'd: \
1157 SIOCxIFMEDIA (Get/Set Interface Media)");
1158 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1159 break;
1160 case SIOCSIFCAP:
1161 {
1162 int mask, reinit;
1163
1164 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1165 reinit = 0;
1166 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1167#ifdef DEVICE_POLLING
1168 if (mask & IFCAP_POLLING) {
1169 if (ifr->ifr_reqcap & IFCAP_POLLING) {
1170 error = ether_poll_register(igb_poll, ifp);
1171 if (error)
1172 return (error);
1173 IGB_CORE_LOCK(adapter);
1174 igb_disable_intr(adapter);
1175 ifp->if_capenable |= IFCAP_POLLING;
1176 IGB_CORE_UNLOCK(adapter);
1177 } else {
1178 error = ether_poll_deregister(ifp);
1179 /* Enable interrupt even in error case */
1180 IGB_CORE_LOCK(adapter);
1181 igb_enable_intr(adapter);
1182 ifp->if_capenable &= ~IFCAP_POLLING;
1183 IGB_CORE_UNLOCK(adapter);
1184 }
1185 }
1186#endif
1187 if (mask & IFCAP_HWCSUM) {
1188 ifp->if_capenable ^= IFCAP_HWCSUM;
1189 reinit = 1;
1190 }
1191 if (mask & IFCAP_TSO4) {
1192 ifp->if_capenable ^= IFCAP_TSO4;
1193 reinit = 1;
1194 }
1195 if (mask & IFCAP_TSO6) {
1196 ifp->if_capenable ^= IFCAP_TSO6;
1197 reinit = 1;
1198 }
1199 if (mask & IFCAP_VLAN_HWTAGGING) {
1200 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1201 reinit = 1;
1202 }
1203 if (mask & IFCAP_VLAN_HWFILTER) {
1204 ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1205 reinit = 1;
1206 }
1207 if (mask & IFCAP_VLAN_HWTSO) {
1208 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1209 reinit = 1;
1210 }
1211 if (mask & IFCAP_LRO) {
1212 ifp->if_capenable ^= IFCAP_LRO;
1213 reinit = 1;
1214 }
1215 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1216 igb_init(adapter);
1217 VLAN_CAPABILITIES(ifp);
1218 break;
1219 }
1220
1221 default:
1222 error = ether_ioctl(ifp, command, data);
1223 break;
1224 }
1225
1226 return (error);
1227}
1228
1229
1230/*********************************************************************
1231 * Init entry point
1232 *
1233 * This routine is used in two ways. It is used by the stack as
1234 * init entry point in network interface structure. It is also used
1235 * by the driver as a hw/sw initialization routine to get to a
1236 * consistent state.
1237 *
1238 * return 0 on success, positive on failure
1239 **********************************************************************/
1240
1241static void
1242igb_init_locked(struct adapter *adapter)
1243{
1244 struct ifnet *ifp = adapter->ifp;
1245 device_t dev = adapter->dev;
1246
1247 INIT_DEBUGOUT("igb_init: begin");
1248
1249 IGB_CORE_LOCK_ASSERT(adapter);
1250
1251 igb_disable_intr(adapter);
1252 callout_stop(&adapter->timer);
1253
1254 /* Get the latest mac address, User can use a LAA */
1255 bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1256 ETHER_ADDR_LEN);
1257
1258 /* Put the address into the Receive Address Array */
1259 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1260
1261 igb_reset(adapter);
1262 igb_update_link_status(adapter);
1263
1264 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1265
1266 /* Set hardware offload abilities */
1267 ifp->if_hwassist = 0;
1268 if (ifp->if_capenable & IFCAP_TXCSUM) {
1269 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1270#if __FreeBSD_version >= 800000
1271 if ((adapter->hw.mac.type == e1000_82576) ||
1272 (adapter->hw.mac.type == e1000_82580))
1273 ifp->if_hwassist |= CSUM_SCTP;
1274#endif
1275 }
1276
1277 if (ifp->if_capenable & IFCAP_TSO)
1278 ifp->if_hwassist |= CSUM_TSO;
1279
1280 /* Clear bad data from Rx FIFOs */
1281 e1000_rx_fifo_flush_82575(&adapter->hw);
1282
1280 /* Configure for OS presence */
1281 igb_init_manageability(adapter);
1282
1283 /* Prepare transmit descriptors and buffers */
1284 igb_setup_transmit_structures(adapter);
1285 igb_initialize_transmit_units(adapter);
1286
1287 /* Setup Multicast table */
1288 igb_set_multi(adapter);
1289
1290 /*
1291 ** Figure out the desired mbuf pool
1292 ** for doing jumbo/packetsplit
1293 */
1294 if (adapter->max_frame_size <= 2048)
1295 adapter->rx_mbuf_sz = MCLBYTES;
1296 else if (adapter->max_frame_size <= 4096)
1297 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1298 else
1299 adapter->rx_mbuf_sz = MJUM9BYTES;
1300
1301 /* Prepare receive descriptors and buffers */
1302 if (igb_setup_receive_structures(adapter)) {
1303 device_printf(dev, "Could not setup receive structures\n");
1304 return;
1305 }
1306 igb_initialize_receive_units(adapter);
1283 /* Configure for OS presence */
1284 igb_init_manageability(adapter);
1285
1286 /* Prepare transmit descriptors and buffers */
1287 igb_setup_transmit_structures(adapter);
1288 igb_initialize_transmit_units(adapter);
1289
1290 /* Setup Multicast table */
1291 igb_set_multi(adapter);
1292
1293 /*
1294 ** Figure out the desired mbuf pool
1295 ** for doing jumbo/packetsplit
1296 */
1297 if (adapter->max_frame_size <= 2048)
1298 adapter->rx_mbuf_sz = MCLBYTES;
1299 else if (adapter->max_frame_size <= 4096)
1300 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1301 else
1302 adapter->rx_mbuf_sz = MJUM9BYTES;
1303
1304 /* Prepare receive descriptors and buffers */
1305 if (igb_setup_receive_structures(adapter)) {
1306 device_printf(dev, "Could not setup receive structures\n");
1307 return;
1308 }
1309 igb_initialize_receive_units(adapter);
1307 e1000_rx_fifo_flush_82575(&adapter->hw);
1308
1309 /* Enable VLAN support */
1310 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1311 igb_setup_vlan_hw_support(adapter);
1312
1313 /* Don't lose promiscuous settings */
1314 igb_set_promisc(adapter);
1315
1316 ifp->if_drv_flags |= IFF_DRV_RUNNING;
1317 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1318
1319 callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1320 e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1321
1322 if (adapter->msix > 1) /* Set up queue routing */
1323 igb_configure_queues(adapter);
1324
1325 /* this clears any pending interrupts */
1326 E1000_READ_REG(&adapter->hw, E1000_ICR);
1327#ifdef DEVICE_POLLING
1328 /*
1329 * Only enable interrupts if we are not polling, make sure
1330 * they are off otherwise.
1331 */
1332 if (ifp->if_capenable & IFCAP_POLLING)
1333 igb_disable_intr(adapter);
1334 else
1335#endif /* DEVICE_POLLING */
1336 {
1337 igb_enable_intr(adapter);
1338 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1339 }
1340
1341 /* Set Energy Efficient Ethernet */
1342 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
1343 if (adapter->hw.mac.type == e1000_i354)
1310
1311 /* Enable VLAN support */
1312 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1313 igb_setup_vlan_hw_support(adapter);
1314
1315 /* Don't lose promiscuous settings */
1316 igb_set_promisc(adapter);
1317
1318 ifp->if_drv_flags |= IFF_DRV_RUNNING;
1319 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1320
1321 callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1322 e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1323
1324 if (adapter->msix > 1) /* Set up queue routing */
1325 igb_configure_queues(adapter);
1326
1327 /* this clears any pending interrupts */
1328 E1000_READ_REG(&adapter->hw, E1000_ICR);
1329#ifdef DEVICE_POLLING
1330 /*
1331 * Only enable interrupts if we are not polling, make sure
1332 * they are off otherwise.
1333 */
1334 if (ifp->if_capenable & IFCAP_POLLING)
1335 igb_disable_intr(adapter);
1336 else
1337#endif /* DEVICE_POLLING */
1338 {
1339 igb_enable_intr(adapter);
1340 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1341 }
1342
1343 /* Set Energy Efficient Ethernet */
1344 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
1345 if (adapter->hw.mac.type == e1000_i354)
1344 e1000_set_eee_i354(&adapter->hw);
1346 e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
1345 else
1347 else
1346 e1000_set_eee_i350(&adapter->hw);
1348 e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
1347 }
1348}
1349
1350static void
1351igb_init(void *arg)
1352{
1353 struct adapter *adapter = arg;
1354
1355 IGB_CORE_LOCK(adapter);
1356 igb_init_locked(adapter);
1357 IGB_CORE_UNLOCK(adapter);
1358}
1359
1360
1361static void
1362igb_handle_que(void *context, int pending)
1363{
1364 struct igb_queue *que = context;
1365 struct adapter *adapter = que->adapter;
1366 struct tx_ring *txr = que->txr;
1367 struct ifnet *ifp = adapter->ifp;
1368
1369 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1370 bool more;
1371
1372 more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1373
1374 IGB_TX_LOCK(txr);
1375 igb_txeof(txr);
1376#ifndef IGB_LEGACY_TX
1377 /* Process the stack queue only if not depleted */
1378 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1379 !drbr_empty(ifp, txr->br))
1380 igb_mq_start_locked(ifp, txr);
1381#else
1382 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1383 igb_start_locked(txr, ifp);
1384#endif
1385 IGB_TX_UNLOCK(txr);
1386 /* Do we need another? */
1387 if (more) {
1388 taskqueue_enqueue(que->tq, &que->que_task);
1389 return;
1390 }
1391 }
1392
1393#ifdef DEVICE_POLLING
1394 if (ifp->if_capenable & IFCAP_POLLING)
1395 return;
1396#endif
1397 /* Reenable this interrupt */
1398 if (que->eims)
1399 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1400 else
1401 igb_enable_intr(adapter);
1402}
1403
1404/* Deal with link in a sleepable context */
1405static void
1406igb_handle_link(void *context, int pending)
1407{
1408 struct adapter *adapter = context;
1409
1410 IGB_CORE_LOCK(adapter);
1411 igb_handle_link_locked(adapter);
1412 IGB_CORE_UNLOCK(adapter);
1413}
1414
1415static void
1416igb_handle_link_locked(struct adapter *adapter)
1417{
1418 struct tx_ring *txr = adapter->tx_rings;
1419 struct ifnet *ifp = adapter->ifp;
1420
1421 IGB_CORE_LOCK_ASSERT(adapter);
1422 adapter->hw.mac.get_link_status = 1;
1423 igb_update_link_status(adapter);
1424 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1425 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1426 IGB_TX_LOCK(txr);
1427#ifndef IGB_LEGACY_TX
1428 /* Process the stack queue only if not depleted */
1429 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1430 !drbr_empty(ifp, txr->br))
1431 igb_mq_start_locked(ifp, txr);
1432#else
1433 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1434 igb_start_locked(txr, ifp);
1435#endif
1436 IGB_TX_UNLOCK(txr);
1437 }
1438 }
1439}
1440
1441/*********************************************************************
1442 *
1443 * MSI/Legacy Deferred
1444 * Interrupt Service routine
1445 *
1446 *********************************************************************/
1447static int
1448igb_irq_fast(void *arg)
1449{
1450 struct adapter *adapter = arg;
1451 struct igb_queue *que = adapter->queues;
1452 u32 reg_icr;
1453
1454
1455 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1456
1457 /* Hot eject? */
1458 if (reg_icr == 0xffffffff)
1459 return FILTER_STRAY;
1460
1461 /* Definitely not our interrupt. */
1462 if (reg_icr == 0x0)
1463 return FILTER_STRAY;
1464
1465 if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1466 return FILTER_STRAY;
1467
1468 /*
1469 * Mask interrupts until the taskqueue is finished running. This is
1470 * cheap, just assume that it is needed. This also works around the
1471 * MSI message reordering errata on certain systems.
1472 */
1473 igb_disable_intr(adapter);
1474 taskqueue_enqueue(que->tq, &que->que_task);
1475
1476 /* Link status change */
1477 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1478 taskqueue_enqueue(que->tq, &adapter->link_task);
1479
1480 if (reg_icr & E1000_ICR_RXO)
1481 adapter->rx_overruns++;
1482 return FILTER_HANDLED;
1483}
1484
1485#ifdef DEVICE_POLLING
1486#if __FreeBSD_version >= 800000
1487#define POLL_RETURN_COUNT(a) (a)
1488static int
1489#else
1490#define POLL_RETURN_COUNT(a)
1491static void
1492#endif
1493igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1494{
1495 struct adapter *adapter = ifp->if_softc;
1496 struct igb_queue *que;
1497 struct tx_ring *txr;
1498 u32 reg_icr, rx_done = 0;
1499 u32 loop = IGB_MAX_LOOP;
1500 bool more;
1501
1502 IGB_CORE_LOCK(adapter);
1503 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1504 IGB_CORE_UNLOCK(adapter);
1505 return POLL_RETURN_COUNT(rx_done);
1506 }
1507
1508 if (cmd == POLL_AND_CHECK_STATUS) {
1509 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1510 /* Link status change */
1511 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1512 igb_handle_link_locked(adapter);
1513
1514 if (reg_icr & E1000_ICR_RXO)
1515 adapter->rx_overruns++;
1516 }
1517 IGB_CORE_UNLOCK(adapter);
1518
1519 for (int i = 0; i < adapter->num_queues; i++) {
1520 que = &adapter->queues[i];
1521 txr = que->txr;
1522
1523 igb_rxeof(que, count, &rx_done);
1524
1525 IGB_TX_LOCK(txr);
1526 do {
1527 more = igb_txeof(txr);
1528 } while (loop-- && more);
1529#ifndef IGB_LEGACY_TX
1530 if (!drbr_empty(ifp, txr->br))
1531 igb_mq_start_locked(ifp, txr);
1532#else
1533 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1534 igb_start_locked(txr, ifp);
1535#endif
1536 IGB_TX_UNLOCK(txr);
1537 }
1538
1539 return POLL_RETURN_COUNT(rx_done);
1540}
1541#endif /* DEVICE_POLLING */
1542
1543/*********************************************************************
1544 *
1545 * MSIX Que Interrupt Service routine
1546 *
1547 **********************************************************************/
1548static void
1549igb_msix_que(void *arg)
1550{
1551 struct igb_queue *que = arg;
1552 struct adapter *adapter = que->adapter;
1553 struct ifnet *ifp = adapter->ifp;
1554 struct tx_ring *txr = que->txr;
1555 struct rx_ring *rxr = que->rxr;
1556 u32 newitr = 0;
1557 bool more_rx;
1558
1559 /* Ignore spurious interrupts */
1560 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1561 return;
1562
1563 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1564 ++que->irqs;
1565
1566 IGB_TX_LOCK(txr);
1567 igb_txeof(txr);
1568#ifndef IGB_LEGACY_TX
1569 /* Process the stack queue only if not depleted */
1570 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1571 !drbr_empty(ifp, txr->br))
1572 igb_mq_start_locked(ifp, txr);
1573#else
1574 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1575 igb_start_locked(txr, ifp);
1576#endif
1577 IGB_TX_UNLOCK(txr);
1578
1579 more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1580
1581 if (adapter->enable_aim == FALSE)
1582 goto no_calc;
1583 /*
1584 ** Do Adaptive Interrupt Moderation:
1585 ** - Write out last calculated setting
1586 ** - Calculate based on average size over
1587 ** the last interval.
1588 */
1589 if (que->eitr_setting)
1590 E1000_WRITE_REG(&adapter->hw,
1591 E1000_EITR(que->msix), que->eitr_setting);
1592
1593 que->eitr_setting = 0;
1594
1595 /* Idle, do nothing */
1596 if ((txr->bytes == 0) && (rxr->bytes == 0))
1597 goto no_calc;
1598
1599 /* Used half Default if sub-gig */
1600 if (adapter->link_speed != 1000)
1601 newitr = IGB_DEFAULT_ITR / 2;
1602 else {
1603 if ((txr->bytes) && (txr->packets))
1604 newitr = txr->bytes/txr->packets;
1605 if ((rxr->bytes) && (rxr->packets))
1606 newitr = max(newitr,
1607 (rxr->bytes / rxr->packets));
1608 newitr += 24; /* account for hardware frame, crc */
1609 /* set an upper boundary */
1610 newitr = min(newitr, 3000);
1611 /* Be nice to the mid range */
1612 if ((newitr > 300) && (newitr < 1200))
1613 newitr = (newitr / 3);
1614 else
1615 newitr = (newitr / 2);
1616 }
1617 newitr &= 0x7FFC; /* Mask invalid bits */
1618 if (adapter->hw.mac.type == e1000_82575)
1619 newitr |= newitr << 16;
1620 else
1621 newitr |= E1000_EITR_CNT_IGNR;
1622
1623 /* save for next interrupt */
1624 que->eitr_setting = newitr;
1625
1626 /* Reset state */
1627 txr->bytes = 0;
1628 txr->packets = 0;
1629 rxr->bytes = 0;
1630 rxr->packets = 0;
1631
1632no_calc:
1633 /* Schedule a clean task if needed*/
1634 if (more_rx)
1635 taskqueue_enqueue(que->tq, &que->que_task);
1636 else
1637 /* Reenable this interrupt */
1638 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1639 return;
1640}
1641
1642
1643/*********************************************************************
1644 *
1645 * MSIX Link Interrupt Service routine
1646 *
1647 **********************************************************************/
1648
1649static void
1650igb_msix_link(void *arg)
1651{
1652 struct adapter *adapter = arg;
1653 u32 icr;
1654
1655 ++adapter->link_irq;
1656 icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1657 if (!(icr & E1000_ICR_LSC))
1658 goto spurious;
1659 igb_handle_link(adapter, 0);
1660
1661spurious:
1662 /* Rearm */
1663 E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1664 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1665 return;
1666}
1667
1668
1669/*********************************************************************
1670 *
1671 * Media Ioctl callback
1672 *
1673 * This routine is called whenever the user queries the status of
1674 * the interface using ifconfig.
1675 *
1676 **********************************************************************/
1677static void
1678igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1679{
1680 struct adapter *adapter = ifp->if_softc;
1681
1682 INIT_DEBUGOUT("igb_media_status: begin");
1683
1684 IGB_CORE_LOCK(adapter);
1685 igb_update_link_status(adapter);
1686
1687 ifmr->ifm_status = IFM_AVALID;
1688 ifmr->ifm_active = IFM_ETHER;
1689
1690 if (!adapter->link_active) {
1691 IGB_CORE_UNLOCK(adapter);
1692 return;
1693 }
1694
1695 ifmr->ifm_status |= IFM_ACTIVE;
1696
1697 switch (adapter->link_speed) {
1698 case 10:
1699 ifmr->ifm_active |= IFM_10_T;
1700 break;
1701 case 100:
1702 /*
1703 ** Support for 100Mb SFP - these are Fiber
1704 ** but the media type appears as serdes
1705 */
1706 if (adapter->hw.phy.media_type ==
1707 e1000_media_type_internal_serdes)
1708 ifmr->ifm_active |= IFM_100_FX;
1709 else
1710 ifmr->ifm_active |= IFM_100_TX;
1711 break;
1712 case 1000:
1713 ifmr->ifm_active |= IFM_1000_T;
1714 break;
1715 case 2500:
1716 ifmr->ifm_active |= IFM_2500_SX;
1717 break;
1718 }
1719
1720 if (adapter->link_duplex == FULL_DUPLEX)
1721 ifmr->ifm_active |= IFM_FDX;
1722 else
1723 ifmr->ifm_active |= IFM_HDX;
1724
1725 IGB_CORE_UNLOCK(adapter);
1726}
1727
1728/*********************************************************************
1729 *
1730 * Media Ioctl callback
1731 *
1732 * This routine is called when the user changes speed/duplex using
1733 * media/mediopt option with ifconfig.
1734 *
1735 **********************************************************************/
1736static int
1737igb_media_change(struct ifnet *ifp)
1738{
1739 struct adapter *adapter = ifp->if_softc;
1740 struct ifmedia *ifm = &adapter->media;
1741
1742 INIT_DEBUGOUT("igb_media_change: begin");
1743
1744 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1745 return (EINVAL);
1746
1747 IGB_CORE_LOCK(adapter);
1748 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1749 case IFM_AUTO:
1750 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1751 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1752 break;
1753 case IFM_1000_LX:
1754 case IFM_1000_SX:
1755 case IFM_1000_T:
1756 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1757 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1758 break;
1759 case IFM_100_TX:
1760 adapter->hw.mac.autoneg = FALSE;
1761 adapter->hw.phy.autoneg_advertised = 0;
1762 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1763 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1764 else
1765 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1766 break;
1767 case IFM_10_T:
1768 adapter->hw.mac.autoneg = FALSE;
1769 adapter->hw.phy.autoneg_advertised = 0;
1770 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1771 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1772 else
1773 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1774 break;
1775 default:
1776 device_printf(adapter->dev, "Unsupported media type\n");
1777 }
1778
1779 igb_init_locked(adapter);
1780 IGB_CORE_UNLOCK(adapter);
1781
1782 return (0);
1783}
1784
1785
1786/*********************************************************************
1787 *
1788 * This routine maps the mbufs to Advanced TX descriptors.
1789 *
1790 **********************************************************************/
1791static int
1792igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1793{
1794 struct adapter *adapter = txr->adapter;
1795 u32 olinfo_status = 0, cmd_type_len;
1796 int i, j, error, nsegs;
1797 int first;
1798 bool remap = TRUE;
1799 struct mbuf *m_head;
1800 bus_dma_segment_t segs[IGB_MAX_SCATTER];
1801 bus_dmamap_t map;
1802 struct igb_tx_buf *txbuf;
1803 union e1000_adv_tx_desc *txd = NULL;
1804
1805 m_head = *m_headp;
1806
1807 /* Basic descriptor defines */
1808 cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
1809 E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
1810
1811 if (m_head->m_flags & M_VLANTAG)
1812 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1813
1814 /*
1815 * Important to capture the first descriptor
1816 * used because it will contain the index of
1817 * the one we tell the hardware to report back
1818 */
1819 first = txr->next_avail_desc;
1820 txbuf = &txr->tx_buffers[first];
1821 map = txbuf->map;
1822
1823 /*
1824 * Map the packet for DMA.
1825 */
1826retry:
1827 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1828 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1829
1830 if (__predict_false(error)) {
1831 struct mbuf *m;
1832
1833 switch (error) {
1834 case EFBIG:
1835 /* Try it again? - one try */
1836 if (remap == TRUE) {
1837 remap = FALSE;
1838 m = m_collapse(*m_headp, M_NOWAIT,
1839 IGB_MAX_SCATTER);
1840 if (m == NULL) {
1841 adapter->mbuf_defrag_failed++;
1842 m_freem(*m_headp);
1843 *m_headp = NULL;
1844 return (ENOBUFS);
1845 }
1846 *m_headp = m;
1847 goto retry;
1848 } else
1849 return (error);
1850 default:
1851 txr->no_tx_dma_setup++;
1852 m_freem(*m_headp);
1853 *m_headp = NULL;
1854 return (error);
1855 }
1856 }
1857
1858 /* Make certain there are enough descriptors */
1859 if (nsegs > txr->tx_avail - 2) {
1860 txr->no_desc_avail++;
1861 bus_dmamap_unload(txr->txtag, map);
1862 return (ENOBUFS);
1863 }
1864 m_head = *m_headp;
1865
1866 /*
1867 ** Set up the appropriate offload context
1868 ** this will consume the first descriptor
1869 */
1870 error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1871 if (__predict_false(error)) {
1872 m_freem(*m_headp);
1873 *m_headp = NULL;
1874 return (error);
1875 }
1876
1877 /* 82575 needs the queue index added */
1878 if (adapter->hw.mac.type == e1000_82575)
1879 olinfo_status |= txr->me << 4;
1880
1881 i = txr->next_avail_desc;
1882 for (j = 0; j < nsegs; j++) {
1883 bus_size_t seglen;
1884 bus_addr_t segaddr;
1885
1886 txbuf = &txr->tx_buffers[i];
1887 txd = &txr->tx_base[i];
1888 seglen = segs[j].ds_len;
1889 segaddr = htole64(segs[j].ds_addr);
1890
1891 txd->read.buffer_addr = segaddr;
1892 txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
1893 cmd_type_len | seglen);
1894 txd->read.olinfo_status = htole32(olinfo_status);
1895
1896 if (++i == txr->num_desc)
1897 i = 0;
1898 }
1899
1900 txd->read.cmd_type_len |=
1901 htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1902 txr->tx_avail -= nsegs;
1903 txr->next_avail_desc = i;
1904
1905 txbuf->m_head = m_head;
1906 /*
1907 ** Here we swap the map so the last descriptor,
1908 ** which gets the completion interrupt has the
1909 ** real map, and the first descriptor gets the
1910 ** unused map from this descriptor.
1911 */
1912 txr->tx_buffers[first].map = txbuf->map;
1913 txbuf->map = map;
1914 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1915
1916 /* Set the EOP descriptor that will be marked done */
1917 txbuf = &txr->tx_buffers[first];
1918 txbuf->eop = txd;
1919
1920 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1921 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1922 /*
1923 * Advance the Transmit Descriptor Tail (Tdt), this tells the
1924 * hardware that this frame is available to transmit.
1925 */
1926 ++txr->total_packets;
1927 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1928
1929 return (0);
1930}
1931static void
1932igb_set_promisc(struct adapter *adapter)
1933{
1934 struct ifnet *ifp = adapter->ifp;
1935 struct e1000_hw *hw = &adapter->hw;
1936 u32 reg;
1937
1938 if (adapter->vf_ifp) {
1939 e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1940 return;
1941 }
1942
1943 reg = E1000_READ_REG(hw, E1000_RCTL);
1944 if (ifp->if_flags & IFF_PROMISC) {
1945 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1946 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1947 } else if (ifp->if_flags & IFF_ALLMULTI) {
1948 reg |= E1000_RCTL_MPE;
1949 reg &= ~E1000_RCTL_UPE;
1950 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1951 }
1952}
1953
1954static void
1955igb_disable_promisc(struct adapter *adapter)
1956{
1957 struct e1000_hw *hw = &adapter->hw;
1958 struct ifnet *ifp = adapter->ifp;
1959 u32 reg;
1960 int mcnt = 0;
1961
1962 if (adapter->vf_ifp) {
1963 e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1964 return;
1965 }
1966 reg = E1000_READ_REG(hw, E1000_RCTL);
1967 reg &= (~E1000_RCTL_UPE);
1968 if (ifp->if_flags & IFF_ALLMULTI)
1969 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
1970 else {
1971 struct ifmultiaddr *ifma;
1972#if __FreeBSD_version < 800000
1973 IF_ADDR_LOCK(ifp);
1974#else
1975 if_maddr_rlock(ifp);
1976#endif
1977 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1978 if (ifma->ifma_addr->sa_family != AF_LINK)
1979 continue;
1980 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1981 break;
1982 mcnt++;
1983 }
1984#if __FreeBSD_version < 800000
1985 IF_ADDR_UNLOCK(ifp);
1986#else
1987 if_maddr_runlock(ifp);
1988#endif
1989 }
1990 /* Don't disable if in MAX groups */
1991 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
1992 reg &= (~E1000_RCTL_MPE);
1993 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1994}
1995
1996
1997/*********************************************************************
1998 * Multicast Update
1999 *
2000 * This routine is called whenever multicast address list is updated.
2001 *
2002 **********************************************************************/
2003
2004static void
2005igb_set_multi(struct adapter *adapter)
2006{
2007 struct ifnet *ifp = adapter->ifp;
2008 struct ifmultiaddr *ifma;
2009 u32 reg_rctl = 0;
2010 u8 *mta;
2011
2012 int mcnt = 0;
2013
2014 IOCTL_DEBUGOUT("igb_set_multi: begin");
2015
2016 mta = adapter->mta;
2017 bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2018 MAX_NUM_MULTICAST_ADDRESSES);
2019
2020#if __FreeBSD_version < 800000
2021 IF_ADDR_LOCK(ifp);
2022#else
2023 if_maddr_rlock(ifp);
2024#endif
2025 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2026 if (ifma->ifma_addr->sa_family != AF_LINK)
2027 continue;
2028
2029 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2030 break;
2031
2032 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2033 &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2034 mcnt++;
2035 }
2036#if __FreeBSD_version < 800000
2037 IF_ADDR_UNLOCK(ifp);
2038#else
2039 if_maddr_runlock(ifp);
2040#endif
2041
2042 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2043 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2044 reg_rctl |= E1000_RCTL_MPE;
2045 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2046 } else
2047 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2048}
2049
2050
2051/*********************************************************************
2052 * Timer routine:
2053 * This routine checks for link status,
2054 * updates statistics, and does the watchdog.
2055 *
2056 **********************************************************************/
2057
2058static void
2059igb_local_timer(void *arg)
2060{
2061 struct adapter *adapter = arg;
2062 device_t dev = adapter->dev;
2063 struct ifnet *ifp = adapter->ifp;
2064 struct tx_ring *txr = adapter->tx_rings;
2065 struct igb_queue *que = adapter->queues;
2066 int hung = 0, busy = 0;
2067
2068
2069 IGB_CORE_LOCK_ASSERT(adapter);
2070
2071 igb_update_link_status(adapter);
2072 igb_update_stats_counters(adapter);
2073
2074 /*
2075 ** Check the TX queues status
2076 ** - central locked handling of OACTIVE
2077 ** - watchdog only if all queues show hung
2078 */
2079 for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2080 if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2081 (adapter->pause_frames == 0))
2082 ++hung;
2083 if (txr->queue_status & IGB_QUEUE_DEPLETED)
2084 ++busy;
2085 if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2086 taskqueue_enqueue(que->tq, &que->que_task);
2087 }
2088 if (hung == adapter->num_queues)
2089 goto timeout;
2090 if (busy == adapter->num_queues)
2091 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2092 else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2093 (busy < adapter->num_queues))
2094 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2095
2096 adapter->pause_frames = 0;
2097 callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2098#ifndef DEVICE_POLLING
2099 /* Schedule all queue interrupts - deadlock protection */
2100 E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2101#endif
2102 return;
2103
2104timeout:
2105 device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2106 device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2107 E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2108 E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2109 device_printf(dev,"TX(%d) desc avail = %d,"
2110 "Next TX to Clean = %d\n",
2111 txr->me, txr->tx_avail, txr->next_to_clean);
2112 adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2113 adapter->watchdog_events++;
2114 igb_init_locked(adapter);
2115}
2116
2117static void
2118igb_update_link_status(struct adapter *adapter)
2119{
2120 struct e1000_hw *hw = &adapter->hw;
2121 struct e1000_fc_info *fc = &hw->fc;
2122 struct ifnet *ifp = adapter->ifp;
2123 device_t dev = adapter->dev;
2124 struct tx_ring *txr = adapter->tx_rings;
2125 u32 link_check, thstat, ctrl;
2126 char *flowctl = NULL;
2127
2128 link_check = thstat = ctrl = 0;
2129
2130 /* Get the cached link value or read for real */
2131 switch (hw->phy.media_type) {
2132 case e1000_media_type_copper:
2133 if (hw->mac.get_link_status) {
2134 /* Do the work to read phy */
2135 e1000_check_for_link(hw);
2136 link_check = !hw->mac.get_link_status;
2137 } else
2138 link_check = TRUE;
2139 break;
2140 case e1000_media_type_fiber:
2141 e1000_check_for_link(hw);
2142 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2143 E1000_STATUS_LU);
2144 break;
2145 case e1000_media_type_internal_serdes:
2146 e1000_check_for_link(hw);
2147 link_check = adapter->hw.mac.serdes_has_link;
2148 break;
2149 /* VF device is type_unknown */
2150 case e1000_media_type_unknown:
2151 e1000_check_for_link(hw);
2152 link_check = !hw->mac.get_link_status;
2153 /* Fall thru */
2154 default:
2155 break;
2156 }
2157
2158 /* Check for thermal downshift or shutdown */
2159 if (hw->mac.type == e1000_i350) {
2160 thstat = E1000_READ_REG(hw, E1000_THSTAT);
2161 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2162 }
2163
2164 /* Get the flow control for display */
2165 switch (fc->current_mode) {
2166 case e1000_fc_rx_pause:
2167 flowctl = "RX";
2168 break;
2169 case e1000_fc_tx_pause:
2170 flowctl = "TX";
2171 break;
2172 case e1000_fc_full:
2173 flowctl = "Full";
2174 break;
2175 case e1000_fc_none:
2176 default:
2177 flowctl = "None";
2178 break;
2179 }
2180
2181 /* Now we check if a transition has happened */
2182 if (link_check && (adapter->link_active == 0)) {
2183 e1000_get_speed_and_duplex(&adapter->hw,
2184 &adapter->link_speed, &adapter->link_duplex);
2185 if (bootverbose)
2186 device_printf(dev, "Link is up %d Mbps %s,"
2187 " Flow Control: %s\n",
2188 adapter->link_speed,
2189 ((adapter->link_duplex == FULL_DUPLEX) ?
2190 "Full Duplex" : "Half Duplex"), flowctl);
2191 adapter->link_active = 1;
2192 ifp->if_baudrate = adapter->link_speed * 1000000;
2193 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2194 (thstat & E1000_THSTAT_LINK_THROTTLE))
2195 device_printf(dev, "Link: thermal downshift\n");
2196 /* Delay Link Up for Phy update */
2197 if (((hw->mac.type == e1000_i210) ||
2198 (hw->mac.type == e1000_i211)) &&
2199 (hw->phy.id == I210_I_PHY_ID))
2200 msec_delay(I210_LINK_DELAY);
2201 /* Reset if the media type changed. */
2202 if (hw->dev_spec._82575.media_changed) {
2203 hw->dev_spec._82575.media_changed = false;
2204 adapter->flags |= IGB_MEDIA_RESET;
2205 igb_reset(adapter);
2206 }
2207 /* This can sleep */
2208 if_link_state_change(ifp, LINK_STATE_UP);
2209 } else if (!link_check && (adapter->link_active == 1)) {
2210 ifp->if_baudrate = adapter->link_speed = 0;
2211 adapter->link_duplex = 0;
2212 if (bootverbose)
2213 device_printf(dev, "Link is Down\n");
2214 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2215 (thstat & E1000_THSTAT_PWR_DOWN))
2216 device_printf(dev, "Link: thermal shutdown\n");
2217 adapter->link_active = 0;
2218 /* This can sleep */
2219 if_link_state_change(ifp, LINK_STATE_DOWN);
2220 /* Reset queue state */
2221 for (int i = 0; i < adapter->num_queues; i++, txr++)
2222 txr->queue_status = IGB_QUEUE_IDLE;
2223 }
2224}
2225
2226/*********************************************************************
2227 *
2228 * This routine disables all traffic on the adapter by issuing a
2229 * global reset on the MAC and deallocates TX/RX buffers.
2230 *
2231 **********************************************************************/
2232
2233static void
2234igb_stop(void *arg)
2235{
2236 struct adapter *adapter = arg;
2237 struct ifnet *ifp = adapter->ifp;
2238 struct tx_ring *txr = adapter->tx_rings;
2239
2240 IGB_CORE_LOCK_ASSERT(adapter);
2241
2242 INIT_DEBUGOUT("igb_stop: begin");
2243
2244 igb_disable_intr(adapter);
2245
2246 callout_stop(&adapter->timer);
2247
2248 /* Tell the stack that the interface is no longer active */
2249 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2250 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2251
2252 /* Disarm watchdog timer. */
2253 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2254 IGB_TX_LOCK(txr);
2255 txr->queue_status = IGB_QUEUE_IDLE;
2256 IGB_TX_UNLOCK(txr);
2257 }
2258
2259 e1000_reset_hw(&adapter->hw);
2260 E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2261
2262 e1000_led_off(&adapter->hw);
2263 e1000_cleanup_led(&adapter->hw);
2264}
2265
2266
2267/*********************************************************************
2268 *
2269 * Determine hardware revision.
2270 *
2271 **********************************************************************/
2272static void
2273igb_identify_hardware(struct adapter *adapter)
2274{
2275 device_t dev = adapter->dev;
2276
2277 /* Make sure our PCI config space has the necessary stuff set */
2278 pci_enable_busmaster(dev);
2279 adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2280
2281 /* Save off the information about this board */
2282 adapter->hw.vendor_id = pci_get_vendor(dev);
2283 adapter->hw.device_id = pci_get_device(dev);
2284 adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2285 adapter->hw.subsystem_vendor_id =
2286 pci_read_config(dev, PCIR_SUBVEND_0, 2);
2287 adapter->hw.subsystem_device_id =
2288 pci_read_config(dev, PCIR_SUBDEV_0, 2);
2289
2290 /* Set MAC type early for PCI setup */
2291 e1000_set_mac_type(&adapter->hw);
2292
2293 /* Are we a VF device? */
2294 if ((adapter->hw.mac.type == e1000_vfadapt) ||
2295 (adapter->hw.mac.type == e1000_vfadapt_i350))
2296 adapter->vf_ifp = 1;
2297 else
2298 adapter->vf_ifp = 0;
2299}
2300
2301static int
2302igb_allocate_pci_resources(struct adapter *adapter)
2303{
2304 device_t dev = adapter->dev;
2305 int rid;
2306
2307 rid = PCIR_BAR(0);
2308 adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2309 &rid, RF_ACTIVE);
2310 if (adapter->pci_mem == NULL) {
2311 device_printf(dev, "Unable to allocate bus resource: memory\n");
2312 return (ENXIO);
2313 }
2314 adapter->osdep.mem_bus_space_tag =
2315 rman_get_bustag(adapter->pci_mem);
2316 adapter->osdep.mem_bus_space_handle =
2317 rman_get_bushandle(adapter->pci_mem);
2318 adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2319
2320 adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2321
2322 /* This will setup either MSI/X or MSI */
2323 adapter->msix = igb_setup_msix(adapter);
2324 adapter->hw.back = &adapter->osdep;
2325
2326 return (0);
2327}
2328
2329/*********************************************************************
2330 *
2331 * Setup the Legacy or MSI Interrupt handler
2332 *
2333 **********************************************************************/
2334static int
2335igb_allocate_legacy(struct adapter *adapter)
2336{
2337 device_t dev = adapter->dev;
2338 struct igb_queue *que = adapter->queues;
2339#ifndef IGB_LEGACY_TX
2340 struct tx_ring *txr = adapter->tx_rings;
2341#endif
2342 int error, rid = 0;
2343
2344 /* Turn off all interrupts */
2345 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2346
2347 /* MSI RID is 1 */
2348 if (adapter->msix == 1)
2349 rid = 1;
2350
2351 /* We allocate a single interrupt resource */
2352 adapter->res = bus_alloc_resource_any(dev,
2353 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2354 if (adapter->res == NULL) {
2355 device_printf(dev, "Unable to allocate bus resource: "
2356 "interrupt\n");
2357 return (ENXIO);
2358 }
2359
2360#ifndef IGB_LEGACY_TX
2361 TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2362#endif
2363
2364 /*
2365 * Try allocating a fast interrupt and the associated deferred
2366 * processing contexts.
2367 */
2368 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2369 /* Make tasklet for deferred link handling */
2370 TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2371 que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2372 taskqueue_thread_enqueue, &que->tq);
2373 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2374 device_get_nameunit(adapter->dev));
2375 if ((error = bus_setup_intr(dev, adapter->res,
2376 INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2377 adapter, &adapter->tag)) != 0) {
2378 device_printf(dev, "Failed to register fast interrupt "
2379 "handler: %d\n", error);
2380 taskqueue_free(que->tq);
2381 que->tq = NULL;
2382 return (error);
2383 }
2384
2385 return (0);
2386}
2387
2388
2389/*********************************************************************
2390 *
2391 * Setup the MSIX Queue Interrupt handlers:
2392 *
2393 **********************************************************************/
2394static int
2395igb_allocate_msix(struct adapter *adapter)
2396{
2397 device_t dev = adapter->dev;
2398 struct igb_queue *que = adapter->queues;
2399 int error, rid, vector = 0;
2400 int cpu_id = 0;
2401#ifdef RSS
2402 cpuset_t cpu_mask;
2403#endif
2404
2405 /* Be sure to start with all interrupts disabled */
2406 E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2407 E1000_WRITE_FLUSH(&adapter->hw);
2408
2409#ifdef RSS
2410 /*
2411 * If we're doing RSS, the number of queues needs to
2412 * match the number of RSS buckets that are configured.
2413 *
2414 * + If there's more queues than RSS buckets, we'll end
2415 * up with queues that get no traffic.
2416 *
2417 * + If there's more RSS buckets than queues, we'll end
2418 * up having multiple RSS buckets map to the same queue,
2419 * so there'll be some contention.
2420 */
2421 if (adapter->num_queues != rss_getnumbuckets()) {
2422 device_printf(dev,
2423 "%s: number of queues (%d) != number of RSS buckets (%d)"
2424 "; performance will be impacted.\n",
2425 __func__,
2426 adapter->num_queues,
2427 rss_getnumbuckets());
2428 }
2429#endif
2430
2431 for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2432 rid = vector +1;
2433 que->res = bus_alloc_resource_any(dev,
2434 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2435 if (que->res == NULL) {
2436 device_printf(dev,
2437 "Unable to allocate bus resource: "
2438 "MSIX Queue Interrupt\n");
2439 return (ENXIO);
2440 }
2441 error = bus_setup_intr(dev, que->res,
2442 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2443 igb_msix_que, que, &que->tag);
2444 if (error) {
2445 que->res = NULL;
2446 device_printf(dev, "Failed to register Queue handler");
2447 return (error);
2448 }
2449#if __FreeBSD_version >= 800504
2450 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2451#endif
2452 que->msix = vector;
2453 if (adapter->hw.mac.type == e1000_82575)
2454 que->eims = E1000_EICR_TX_QUEUE0 << i;
2455 else
2456 que->eims = 1 << vector;
2457
2458#ifdef RSS
2459 /*
2460 * The queue ID is used as the RSS layer bucket ID.
2461 * We look up the queue ID -> RSS CPU ID and select
2462 * that.
2463 */
2464 cpu_id = rss_getcpu(i % rss_getnumbuckets());
2465#else
2466 /*
2467 * Bind the msix vector, and thus the
2468 * rings to the corresponding cpu.
2469 *
2470 * This just happens to match the default RSS round-robin
2471 * bucket -> queue -> CPU allocation.
2472 */
2473 if (adapter->num_queues > 1) {
2474 if (igb_last_bind_cpu < 0)
2475 igb_last_bind_cpu = CPU_FIRST();
2476 cpu_id = igb_last_bind_cpu;
2477 }
2478#endif
2479
2480 if (adapter->num_queues > 1) {
2481 bus_bind_intr(dev, que->res, cpu_id);
2482#ifdef RSS
2483 device_printf(dev,
2484 "Bound queue %d to RSS bucket %d\n",
2485 i, cpu_id);
2486#else
2487 device_printf(dev,
2488 "Bound queue %d to cpu %d\n",
2489 i, cpu_id);
2490#endif
2491 }
2492
2493#ifndef IGB_LEGACY_TX
2494 TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2495 que->txr);
2496#endif
2497 /* Make tasklet for deferred handling */
2498 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2499 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2500 taskqueue_thread_enqueue, &que->tq);
2501 if (adapter->num_queues > 1) {
2502 /*
2503 * Only pin the taskqueue thread to a CPU if
2504 * RSS is in use.
2505 *
2506 * This again just happens to match the default RSS
2507 * round-robin bucket -> queue -> CPU allocation.
2508 */
2509#ifdef RSS
2510 CPU_SETOF(cpu_id, &cpu_mask);
2511 taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET,
2512 &cpu_mask,
2513 "%s que (bucket %d)",
2514 device_get_nameunit(adapter->dev),
2515 cpu_id);
2516#else
2517 taskqueue_start_threads(&que->tq, 1, PI_NET,
2518 "%s que (qid %d)",
2519 device_get_nameunit(adapter->dev),
2520 cpu_id);
2521#endif
2522 } else {
2523 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2524 device_get_nameunit(adapter->dev));
2525 }
2526
2527 /* Finally update the last bound CPU id */
2528 if (adapter->num_queues > 1)
2529 igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2530 }
2531
2532 /* And Link */
2533 rid = vector + 1;
2534 adapter->res = bus_alloc_resource_any(dev,
2535 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2536 if (adapter->res == NULL) {
2537 device_printf(dev,
2538 "Unable to allocate bus resource: "
2539 "MSIX Link Interrupt\n");
2540 return (ENXIO);
2541 }
2542 if ((error = bus_setup_intr(dev, adapter->res,
2543 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2544 igb_msix_link, adapter, &adapter->tag)) != 0) {
2545 device_printf(dev, "Failed to register Link handler");
2546 return (error);
2547 }
2548#if __FreeBSD_version >= 800504
2549 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2550#endif
2551 adapter->linkvec = vector;
2552
2553 return (0);
2554}
2555
2556
2557static void
2558igb_configure_queues(struct adapter *adapter)
2559{
2560 struct e1000_hw *hw = &adapter->hw;
2561 struct igb_queue *que;
2562 u32 tmp, ivar = 0, newitr = 0;
2563
2564 /* First turn on RSS capability */
2565 if (adapter->hw.mac.type != e1000_82575)
2566 E1000_WRITE_REG(hw, E1000_GPIE,
2567 E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2568 E1000_GPIE_PBA | E1000_GPIE_NSICR);
2569
2570 /* Turn on MSIX */
2571 switch (adapter->hw.mac.type) {
2572 case e1000_82580:
2573 case e1000_i350:
2574 case e1000_i354:
2575 case e1000_i210:
2576 case e1000_i211:
2577 case e1000_vfadapt:
2578 case e1000_vfadapt_i350:
2579 /* RX entries */
2580 for (int i = 0; i < adapter->num_queues; i++) {
2581 u32 index = i >> 1;
2582 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2583 que = &adapter->queues[i];
2584 if (i & 1) {
2585 ivar &= 0xFF00FFFF;
2586 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2587 } else {
2588 ivar &= 0xFFFFFF00;
2589 ivar |= que->msix | E1000_IVAR_VALID;
2590 }
2591 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2592 }
2593 /* TX entries */
2594 for (int i = 0; i < adapter->num_queues; i++) {
2595 u32 index = i >> 1;
2596 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2597 que = &adapter->queues[i];
2598 if (i & 1) {
2599 ivar &= 0x00FFFFFF;
2600 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2601 } else {
2602 ivar &= 0xFFFF00FF;
2603 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2604 }
2605 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2606 adapter->que_mask |= que->eims;
2607 }
2608
2609 /* And for the link interrupt */
2610 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2611 adapter->link_mask = 1 << adapter->linkvec;
2612 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2613 break;
2614 case e1000_82576:
2615 /* RX entries */
2616 for (int i = 0; i < adapter->num_queues; i++) {
2617 u32 index = i & 0x7; /* Each IVAR has two entries */
2618 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2619 que = &adapter->queues[i];
2620 if (i < 8) {
2621 ivar &= 0xFFFFFF00;
2622 ivar |= que->msix | E1000_IVAR_VALID;
2623 } else {
2624 ivar &= 0xFF00FFFF;
2625 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2626 }
2627 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2628 adapter->que_mask |= que->eims;
2629 }
2630 /* TX entries */
2631 for (int i = 0; i < adapter->num_queues; i++) {
2632 u32 index = i & 0x7; /* Each IVAR has two entries */
2633 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2634 que = &adapter->queues[i];
2635 if (i < 8) {
2636 ivar &= 0xFFFF00FF;
2637 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2638 } else {
2639 ivar &= 0x00FFFFFF;
2640 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2641 }
2642 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2643 adapter->que_mask |= que->eims;
2644 }
2645
2646 /* And for the link interrupt */
2647 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2648 adapter->link_mask = 1 << adapter->linkvec;
2649 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2650 break;
2651
2652 case e1000_82575:
2653 /* enable MSI-X support*/
2654 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2655 tmp |= E1000_CTRL_EXT_PBA_CLR;
2656 /* Auto-Mask interrupts upon ICR read. */
2657 tmp |= E1000_CTRL_EXT_EIAME;
2658 tmp |= E1000_CTRL_EXT_IRCA;
2659 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2660
2661 /* Queues */
2662 for (int i = 0; i < adapter->num_queues; i++) {
2663 que = &adapter->queues[i];
2664 tmp = E1000_EICR_RX_QUEUE0 << i;
2665 tmp |= E1000_EICR_TX_QUEUE0 << i;
2666 que->eims = tmp;
2667 E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2668 i, que->eims);
2669 adapter->que_mask |= que->eims;
2670 }
2671
2672 /* Link */
2673 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2674 E1000_EIMS_OTHER);
2675 adapter->link_mask |= E1000_EIMS_OTHER;
2676 default:
2677 break;
2678 }
2679
2680 /* Set the starting interrupt rate */
2681 if (igb_max_interrupt_rate > 0)
2682 newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2683
2684 if (hw->mac.type == e1000_82575)
2685 newitr |= newitr << 16;
2686 else
2687 newitr |= E1000_EITR_CNT_IGNR;
2688
2689 for (int i = 0; i < adapter->num_queues; i++) {
2690 que = &adapter->queues[i];
2691 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2692 }
2693
2694 return;
2695}
2696
2697
2698static void
2699igb_free_pci_resources(struct adapter *adapter)
2700{
2701 struct igb_queue *que = adapter->queues;
2702 device_t dev = adapter->dev;
2703 int rid;
2704
2705 /*
2706 ** There is a slight possibility of a failure mode
2707 ** in attach that will result in entering this function
2708 ** before interrupt resources have been initialized, and
2709 ** in that case we do not want to execute the loops below
2710 ** We can detect this reliably by the state of the adapter
2711 ** res pointer.
2712 */
2713 if (adapter->res == NULL)
2714 goto mem;
2715
2716 /*
2717 * First release all the interrupt resources:
2718 */
2719 for (int i = 0; i < adapter->num_queues; i++, que++) {
2720 rid = que->msix + 1;
2721 if (que->tag != NULL) {
2722 bus_teardown_intr(dev, que->res, que->tag);
2723 que->tag = NULL;
2724 }
2725 if (que->res != NULL)
2726 bus_release_resource(dev,
2727 SYS_RES_IRQ, rid, que->res);
2728 }
2729
2730 /* Clean the Legacy or Link interrupt last */
2731 if (adapter->linkvec) /* we are doing MSIX */
2732 rid = adapter->linkvec + 1;
2733 else
2734 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2735
2736 que = adapter->queues;
2737 if (adapter->tag != NULL) {
2738 taskqueue_drain(que->tq, &adapter->link_task);
2739 bus_teardown_intr(dev, adapter->res, adapter->tag);
2740 adapter->tag = NULL;
2741 }
2742 if (adapter->res != NULL)
2743 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2744
2745 for (int i = 0; i < adapter->num_queues; i++, que++) {
2746 if (que->tq != NULL) {
2747#ifndef IGB_LEGACY_TX
2748 taskqueue_drain(que->tq, &que->txr->txq_task);
2749#endif
2750 taskqueue_drain(que->tq, &que->que_task);
2751 taskqueue_free(que->tq);
2752 }
2753 }
2754mem:
2755 if (adapter->msix)
2756 pci_release_msi(dev);
2757
2758 if (adapter->msix_mem != NULL)
2759 bus_release_resource(dev, SYS_RES_MEMORY,
2760 adapter->memrid, adapter->msix_mem);
2761
2762 if (adapter->pci_mem != NULL)
2763 bus_release_resource(dev, SYS_RES_MEMORY,
2764 PCIR_BAR(0), adapter->pci_mem);
2765
2766}
2767
2768/*
2769 * Setup Either MSI/X or MSI
2770 */
2771static int
2772igb_setup_msix(struct adapter *adapter)
2773{
2774 device_t dev = adapter->dev;
2775 int bar, want, queues, msgs, maxqueues;
2776
2777 /* tuneable override */
2778 if (igb_enable_msix == 0)
2779 goto msi;
2780
2781 /* First try MSI/X */
2782 msgs = pci_msix_count(dev);
2783 if (msgs == 0)
2784 goto msi;
2785 /*
2786 ** Some new devices, as with ixgbe, now may
2787 ** use a different BAR, so we need to keep
2788 ** track of which is used.
2789 */
2790 adapter->memrid = PCIR_BAR(IGB_MSIX_BAR);
2791 bar = pci_read_config(dev, adapter->memrid, 4);
2792 if (bar == 0) /* use next bar */
2793 adapter->memrid += 4;
2794 adapter->msix_mem = bus_alloc_resource_any(dev,
2795 SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2796 if (adapter->msix_mem == NULL) {
2797 /* May not be enabled */
2798 device_printf(adapter->dev,
2799 "Unable to map MSIX table \n");
2800 goto msi;
2801 }
2802
2803 queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2804
2805 /* Override via tuneable */
2806 if (igb_num_queues != 0)
2807 queues = igb_num_queues;
2808
2809#ifdef RSS
2810 /* If we're doing RSS, clamp at the number of RSS buckets */
2811 if (queues > rss_getnumbuckets())
2812 queues = rss_getnumbuckets();
2813#endif
2814
2815
2816 /* Sanity check based on HW */
2817 switch (adapter->hw.mac.type) {
2818 case e1000_82575:
2819 maxqueues = 4;
2820 break;
2821 case e1000_82576:
2822 case e1000_82580:
2823 case e1000_i350:
2824 case e1000_i354:
2825 maxqueues = 8;
2826 break;
2827 case e1000_i210:
2828 maxqueues = 4;
2829 break;
2830 case e1000_i211:
2831 maxqueues = 2;
2832 break;
2833 default: /* VF interfaces */
2834 maxqueues = 1;
2835 break;
2836 }
2837
2838 /* Final clamp on the actual hardware capability */
2839 if (queues > maxqueues)
2840 queues = maxqueues;
2841
2842 /*
2843 ** One vector (RX/TX pair) per queue
2844 ** plus an additional for Link interrupt
2845 */
2846 want = queues + 1;
2847 if (msgs >= want)
2848 msgs = want;
2849 else {
2850 device_printf(adapter->dev,
2851 "MSIX Configuration Problem, "
2852 "%d vectors configured, but %d queues wanted!\n",
2853 msgs, want);
2854 goto msi;
2855 }
2856 if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2857 device_printf(adapter->dev,
2858 "Using MSIX interrupts with %d vectors\n", msgs);
2859 adapter->num_queues = queues;
2860 return (msgs);
2861 }
2862 /*
2863 ** If MSIX alloc failed or provided us with
2864 ** less than needed, free and fall through to MSI
2865 */
2866 pci_release_msi(dev);
2867
2868msi:
2869 if (adapter->msix_mem != NULL) {
2870 bus_release_resource(dev, SYS_RES_MEMORY,
2871 PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2872 adapter->msix_mem = NULL;
2873 }
2874 msgs = 1;
2875 if (pci_alloc_msi(dev, &msgs) == 0) {
2876 device_printf(adapter->dev," Using an MSI interrupt\n");
2877 return (msgs);
2878 }
2879 device_printf(adapter->dev," Using a Legacy interrupt\n");
2880 return (0);
2881}
2882
2883/*********************************************************************
2884 *
2885 * Initialize the DMA Coalescing feature
2886 *
2887 **********************************************************************/
2888static void
2889igb_init_dmac(struct adapter *adapter, u32 pba)
2890{
2891 device_t dev = adapter->dev;
2892 struct e1000_hw *hw = &adapter->hw;
2893 u32 dmac, reg = ~E1000_DMACR_DMAC_EN;
2894 u16 hwm;
2895
2896 if (hw->mac.type == e1000_i211)
2897 return;
2898
2899 if (hw->mac.type > e1000_82580) {
2900
2901 if (adapter->dmac == 0) { /* Disabling it */
2902 E1000_WRITE_REG(hw, E1000_DMACR, reg);
2903 return;
2904 } else
2905 device_printf(dev, "DMA Coalescing enabled\n");
2906
2907 /* Set starting threshold */
2908 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2909
2910 hwm = 64 * pba - adapter->max_frame_size / 16;
2911 if (hwm < 64 * (pba - 6))
2912 hwm = 64 * (pba - 6);
2913 reg = E1000_READ_REG(hw, E1000_FCRTC);
2914 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2915 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2916 & E1000_FCRTC_RTH_COAL_MASK);
2917 E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2918
2919
2920 dmac = pba - adapter->max_frame_size / 512;
2921 if (dmac < pba - 10)
2922 dmac = pba - 10;
2923 reg = E1000_READ_REG(hw, E1000_DMACR);
2924 reg &= ~E1000_DMACR_DMACTHR_MASK;
2925 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
2926 & E1000_DMACR_DMACTHR_MASK);
2927
2928 /* transition to L0x or L1 if available..*/
2929 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2930
2931 /* Check if status is 2.5Gb backplane connection
2932 * before configuration of watchdog timer, which is
2933 * in msec values in 12.8usec intervals
2934 * watchdog timer= msec values in 32usec intervals
2935 * for non 2.5Gb connection
2936 */
2937 if (hw->mac.type == e1000_i354) {
2938 int status = E1000_READ_REG(hw, E1000_STATUS);
2939 if ((status & E1000_STATUS_2P5_SKU) &&
2940 (!(status & E1000_STATUS_2P5_SKU_OVER)))
2941 reg |= ((adapter->dmac * 5) >> 6);
2942 else
2943 reg |= (adapter->dmac >> 5);
2944 } else {
2945 reg |= (adapter->dmac >> 5);
2946 }
2947
2948 E1000_WRITE_REG(hw, E1000_DMACR, reg);
2949
2950 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2951
2952 /* Set the interval before transition */
2953 reg = E1000_READ_REG(hw, E1000_DMCTLX);
2954 if (hw->mac.type == e1000_i350)
2955 reg |= IGB_DMCTLX_DCFLUSH_DIS;
2956 /*
2957 ** in 2.5Gb connection, TTLX unit is 0.4 usec
2958 ** which is 0x4*2 = 0xA. But delay is still 4 usec
2959 */
2960 if (hw->mac.type == e1000_i354) {
2961 int status = E1000_READ_REG(hw, E1000_STATUS);
2962 if ((status & E1000_STATUS_2P5_SKU) &&
2963 (!(status & E1000_STATUS_2P5_SKU_OVER)))
2964 reg |= 0xA;
2965 else
2966 reg |= 0x4;
2967 } else {
2968 reg |= 0x4;
2969 }
2970
2971 E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2972
2973 /* free space in tx packet buffer to wake from DMA coal */
2974 E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE -
2975 (2 * adapter->max_frame_size)) >> 6);
2976
2977 /* make low power state decision controlled by DMA coal */
2978 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2979 reg &= ~E1000_PCIEMISC_LX_DECISION;
2980 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
2981
2982 } else if (hw->mac.type == e1000_82580) {
2983 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2984 E1000_WRITE_REG(hw, E1000_PCIEMISC,
2985 reg & ~E1000_PCIEMISC_LX_DECISION);
2986 E1000_WRITE_REG(hw, E1000_DMACR, 0);
2987 }
2988}
2989
2990
2991/*********************************************************************
2992 *
2993 * Set up an fresh starting state
2994 *
2995 **********************************************************************/
2996static void
2997igb_reset(struct adapter *adapter)
2998{
2999 device_t dev = adapter->dev;
3000 struct e1000_hw *hw = &adapter->hw;
3001 struct e1000_fc_info *fc = &hw->fc;
3002 struct ifnet *ifp = adapter->ifp;
3003 u32 pba = 0;
3004 u16 hwm;
3005
3006 INIT_DEBUGOUT("igb_reset: begin");
3007
3008 /* Let the firmware know the OS is in control */
3009 igb_get_hw_control(adapter);
3010
3011 /*
3012 * Packet Buffer Allocation (PBA)
3013 * Writing PBA sets the receive portion of the buffer
3014 * the remainder is used for the transmit buffer.
3015 */
3016 switch (hw->mac.type) {
3017 case e1000_82575:
3018 pba = E1000_PBA_32K;
3019 break;
3020 case e1000_82576:
3021 case e1000_vfadapt:
3022 pba = E1000_READ_REG(hw, E1000_RXPBS);
3023 pba &= E1000_RXPBS_SIZE_MASK_82576;
3024 break;
3025 case e1000_82580:
3026 case e1000_i350:
3027 case e1000_i354:
3028 case e1000_vfadapt_i350:
3029 pba = E1000_READ_REG(hw, E1000_RXPBS);
3030 pba = e1000_rxpbs_adjust_82580(pba);
3031 break;
3032 case e1000_i210:
3033 case e1000_i211:
3034 pba = E1000_PBA_34K;
3035 default:
3036 break;
3037 }
3038
3039 /* Special needs in case of Jumbo frames */
3040 if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
3041 u32 tx_space, min_tx, min_rx;
3042 pba = E1000_READ_REG(hw, E1000_PBA);
3043 tx_space = pba >> 16;
3044 pba &= 0xffff;
3045 min_tx = (adapter->max_frame_size +
3046 sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
3047 min_tx = roundup2(min_tx, 1024);
3048 min_tx >>= 10;
3049 min_rx = adapter->max_frame_size;
3050 min_rx = roundup2(min_rx, 1024);
3051 min_rx >>= 10;
3052 if (tx_space < min_tx &&
3053 ((min_tx - tx_space) < pba)) {
3054 pba = pba - (min_tx - tx_space);
3055 /*
3056 * if short on rx space, rx wins
3057 * and must trump tx adjustment
3058 */
3059 if (pba < min_rx)
3060 pba = min_rx;
3061 }
3062 E1000_WRITE_REG(hw, E1000_PBA, pba);
3063 }
3064
3065 INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3066
3067 /*
3068 * These parameters control the automatic generation (Tx) and
3069 * response (Rx) to Ethernet PAUSE frames.
3070 * - High water mark should allow for at least two frames to be
3071 * received after sending an XOFF.
3072 * - Low water mark works best when it is very near the high water mark.
3073 * This allows the receiver to restart by sending XON when it has
3074 * drained a bit.
3075 */
3076 hwm = min(((pba << 10) * 9 / 10),
3077 ((pba << 10) - 2 * adapter->max_frame_size));
3078
3079 if (hw->mac.type < e1000_82576) {
3080 fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */
3081 fc->low_water = fc->high_water - 8;
3082 } else {
3083 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
3084 fc->low_water = fc->high_water - 16;
3085 }
3086
3087 fc->pause_time = IGB_FC_PAUSE_TIME;
3088 fc->send_xon = TRUE;
3089 if (adapter->fc)
3090 fc->requested_mode = adapter->fc;
3091 else
3092 fc->requested_mode = e1000_fc_default;
3093
3094 /* Issue a global reset */
3095 e1000_reset_hw(hw);
3096 E1000_WRITE_REG(hw, E1000_WUC, 0);
3097
3098 /* Reset for AutoMediaDetect */
3099 if (adapter->flags & IGB_MEDIA_RESET) {
3100 e1000_setup_init_funcs(hw, TRUE);
3101 e1000_get_bus_info(hw);
3102 adapter->flags &= ~IGB_MEDIA_RESET;
3103 }
3104
3105 if (e1000_init_hw(hw) < 0)
3106 device_printf(dev, "Hardware Initialization Failed\n");
3107
3108 /* Setup DMA Coalescing */
3109 igb_init_dmac(adapter, pba);
3110
3111 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3112 e1000_get_phy_info(hw);
3113 e1000_check_for_link(hw);
3114 return;
3115}
3116
3117/*********************************************************************
3118 *
3119 * Setup networking device structure and register an interface.
3120 *
3121 **********************************************************************/
3122static int
3123igb_setup_interface(device_t dev, struct adapter *adapter)
3124{
3125 struct ifnet *ifp;
3126
3127 INIT_DEBUGOUT("igb_setup_interface: begin");
3128
3129 ifp = adapter->ifp = if_alloc(IFT_ETHER);
3130 if (ifp == NULL) {
3131 device_printf(dev, "can not allocate ifnet structure\n");
3132 return (-1);
3133 }
3134 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3135 ifp->if_init = igb_init;
3136 ifp->if_softc = adapter;
3137 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3138 ifp->if_ioctl = igb_ioctl;
3139 ifp->if_get_counter = igb_get_counter;
3140#ifndef IGB_LEGACY_TX
3141 ifp->if_transmit = igb_mq_start;
3142 ifp->if_qflush = igb_qflush;
3143#else
3144 ifp->if_start = igb_start;
3145 IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3146 ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3147 IFQ_SET_READY(&ifp->if_snd);
3148#endif
3149
3150 ether_ifattach(ifp, adapter->hw.mac.addr);
3151
3152 ifp->if_capabilities = ifp->if_capenable = 0;
3153
3154 ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3155 ifp->if_capabilities |= IFCAP_TSO;
3156 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3157 ifp->if_capenable = ifp->if_capabilities;
3158
3159 /* Don't enable LRO by default */
3160 ifp->if_capabilities |= IFCAP_LRO;
3161
3162#ifdef DEVICE_POLLING
3163 ifp->if_capabilities |= IFCAP_POLLING;
3164#endif
3165
3166 /*
3167 * Tell the upper layer(s) we
3168 * support full VLAN capability.
3169 */
3170 ifp->if_hdrlen = sizeof(struct ether_vlan_header);
3171 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3172 | IFCAP_VLAN_HWTSO
3173 | IFCAP_VLAN_MTU;
3174 ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3175 | IFCAP_VLAN_HWTSO
3176 | IFCAP_VLAN_MTU;
3177
3178 /*
3179 ** Don't turn this on by default, if vlans are
3180 ** created on another pseudo device (eg. lagg)
3181 ** then vlan events are not passed thru, breaking
3182 ** operation, but with HW FILTER off it works. If
3183 ** using vlans directly on the igb driver you can
3184 ** enable this and get full hardware tag filtering.
3185 */
3186 ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3187
3188 /*
3189 * Specify the media types supported by this adapter and register
3190 * callbacks to update media and link information
3191 */
3192 ifmedia_init(&adapter->media, IFM_IMASK,
3193 igb_media_change, igb_media_status);
3194 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3195 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3196 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
3197 0, NULL);
3198 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3199 } else {
3200 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3201 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3202 0, NULL);
3203 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3204 0, NULL);
3205 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3206 0, NULL);
3207 if (adapter->hw.phy.type != e1000_phy_ife) {
3208 ifmedia_add(&adapter->media,
3209 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3210 ifmedia_add(&adapter->media,
3211 IFM_ETHER | IFM_1000_T, 0, NULL);
3212 }
3213 }
3214 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3215 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3216 return (0);
3217}
3218
3219
3220/*
3221 * Manage DMA'able memory.
3222 */
3223static void
3224igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3225{
3226 if (error)
3227 return;
3228 *(bus_addr_t *) arg = segs[0].ds_addr;
3229}
3230
3231static int
3232igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3233 struct igb_dma_alloc *dma, int mapflags)
3234{
3235 int error;
3236
3237 error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3238 IGB_DBA_ALIGN, 0, /* alignment, bounds */
3239 BUS_SPACE_MAXADDR, /* lowaddr */
3240 BUS_SPACE_MAXADDR, /* highaddr */
3241 NULL, NULL, /* filter, filterarg */
3242 size, /* maxsize */
3243 1, /* nsegments */
3244 size, /* maxsegsize */
3245 0, /* flags */
3246 NULL, /* lockfunc */
3247 NULL, /* lockarg */
3248 &dma->dma_tag);
3249 if (error) {
3250 device_printf(adapter->dev,
3251 "%s: bus_dma_tag_create failed: %d\n",
3252 __func__, error);
3253 goto fail_0;
3254 }
3255
3256 error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3257 BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3258 if (error) {
3259 device_printf(adapter->dev,
3260 "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3261 __func__, (uintmax_t)size, error);
3262 goto fail_2;
3263 }
3264
3265 dma->dma_paddr = 0;
3266 error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3267 size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3268 if (error || dma->dma_paddr == 0) {
3269 device_printf(adapter->dev,
3270 "%s: bus_dmamap_load failed: %d\n",
3271 __func__, error);
3272 goto fail_3;
3273 }
3274
3275 return (0);
3276
3277fail_3:
3278 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3279fail_2:
3280 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3281 bus_dma_tag_destroy(dma->dma_tag);
3282fail_0:
3283 dma->dma_tag = NULL;
3284
3285 return (error);
3286}
3287
3288static void
3289igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3290{
3291 if (dma->dma_tag == NULL)
3292 return;
3293 if (dma->dma_paddr != 0) {
3294 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3295 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3296 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3297 dma->dma_paddr = 0;
3298 }
3299 if (dma->dma_vaddr != NULL) {
3300 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3301 dma->dma_vaddr = NULL;
3302 }
3303 bus_dma_tag_destroy(dma->dma_tag);
3304 dma->dma_tag = NULL;
3305}
3306
3307
3308/*********************************************************************
3309 *
3310 * Allocate memory for the transmit and receive rings, and then
3311 * the descriptors associated with each, called only once at attach.
3312 *
3313 **********************************************************************/
3314static int
3315igb_allocate_queues(struct adapter *adapter)
3316{
3317 device_t dev = adapter->dev;
3318 struct igb_queue *que = NULL;
3319 struct tx_ring *txr = NULL;
3320 struct rx_ring *rxr = NULL;
3321 int rsize, tsize, error = E1000_SUCCESS;
3322 int txconf = 0, rxconf = 0;
3323
3324 /* First allocate the top level queue structs */
3325 if (!(adapter->queues =
3326 (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3327 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3328 device_printf(dev, "Unable to allocate queue memory\n");
3329 error = ENOMEM;
3330 goto fail;
3331 }
3332
3333 /* Next allocate the TX ring struct memory */
3334 if (!(adapter->tx_rings =
3335 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3336 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3337 device_printf(dev, "Unable to allocate TX ring memory\n");
3338 error = ENOMEM;
3339 goto tx_fail;
3340 }
3341
3342 /* Now allocate the RX */
3343 if (!(adapter->rx_rings =
3344 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3345 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3346 device_printf(dev, "Unable to allocate RX ring memory\n");
3347 error = ENOMEM;
3348 goto rx_fail;
3349 }
3350
3351 tsize = roundup2(adapter->num_tx_desc *
3352 sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3353 /*
3354 * Now set up the TX queues, txconf is needed to handle the
3355 * possibility that things fail midcourse and we need to
3356 * undo memory gracefully
3357 */
3358 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3359 /* Set up some basics */
3360 txr = &adapter->tx_rings[i];
3361 txr->adapter = adapter;
3362 txr->me = i;
3363 txr->num_desc = adapter->num_tx_desc;
3364
3365 /* Initialize the TX lock */
3366 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3367 device_get_nameunit(dev), txr->me);
3368 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3369
3370 if (igb_dma_malloc(adapter, tsize,
3371 &txr->txdma, BUS_DMA_NOWAIT)) {
3372 device_printf(dev,
3373 "Unable to allocate TX Descriptor memory\n");
3374 error = ENOMEM;
3375 goto err_tx_desc;
3376 }
3377 txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr;
3378 bzero((void *)txr->tx_base, tsize);
3379
3380 /* Now allocate transmit buffers for the ring */
3381 if (igb_allocate_transmit_buffers(txr)) {
3382 device_printf(dev,
3383 "Critical Failure setting up transmit buffers\n");
3384 error = ENOMEM;
3385 goto err_tx_desc;
3386 }
3387#ifndef IGB_LEGACY_TX
3388 /* Allocate a buf ring */
3389 txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3390 M_WAITOK, &txr->tx_mtx);
3391#endif
3392 }
3393
3394 /*
3395 * Next the RX queues...
3396 */
3397 rsize = roundup2(adapter->num_rx_desc *
3398 sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3399 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3400 rxr = &adapter->rx_rings[i];
3401 rxr->adapter = adapter;
3402 rxr->me = i;
3403
3404 /* Initialize the RX lock */
3405 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3406 device_get_nameunit(dev), txr->me);
3407 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3408
3409 if (igb_dma_malloc(adapter, rsize,
3410 &rxr->rxdma, BUS_DMA_NOWAIT)) {
3411 device_printf(dev,
3412 "Unable to allocate RxDescriptor memory\n");
3413 error = ENOMEM;
3414 goto err_rx_desc;
3415 }
3416 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3417 bzero((void *)rxr->rx_base, rsize);
3418
3419 /* Allocate receive buffers for the ring*/
3420 if (igb_allocate_receive_buffers(rxr)) {
3421 device_printf(dev,
3422 "Critical Failure setting up receive buffers\n");
3423 error = ENOMEM;
3424 goto err_rx_desc;
3425 }
3426 }
3427
3428 /*
3429 ** Finally set up the queue holding structs
3430 */
3431 for (int i = 0; i < adapter->num_queues; i++) {
3432 que = &adapter->queues[i];
3433 que->adapter = adapter;
3434 que->txr = &adapter->tx_rings[i];
3435 que->rxr = &adapter->rx_rings[i];
3436 }
3437
3438 return (0);
3439
3440err_rx_desc:
3441 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3442 igb_dma_free(adapter, &rxr->rxdma);
3443err_tx_desc:
3444 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3445 igb_dma_free(adapter, &txr->txdma);
3446 free(adapter->rx_rings, M_DEVBUF);
3447rx_fail:
3448#ifndef IGB_LEGACY_TX
3449 buf_ring_free(txr->br, M_DEVBUF);
3450#endif
3451 free(adapter->tx_rings, M_DEVBUF);
3452tx_fail:
3453 free(adapter->queues, M_DEVBUF);
3454fail:
3455 return (error);
3456}
3457
3458/*********************************************************************
3459 *
3460 * Allocate memory for tx_buffer structures. The tx_buffer stores all
3461 * the information needed to transmit a packet on the wire. This is
3462 * called only once at attach, setup is done every reset.
3463 *
3464 **********************************************************************/
3465static int
3466igb_allocate_transmit_buffers(struct tx_ring *txr)
3467{
3468 struct adapter *adapter = txr->adapter;
3469 device_t dev = adapter->dev;
3470 struct igb_tx_buf *txbuf;
3471 int error, i;
3472
3473 /*
3474 * Setup DMA descriptor areas.
3475 */
3476 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3477 1, 0, /* alignment, bounds */
3478 BUS_SPACE_MAXADDR, /* lowaddr */
3479 BUS_SPACE_MAXADDR, /* highaddr */
3480 NULL, NULL, /* filter, filterarg */
3481 IGB_TSO_SIZE, /* maxsize */
3482 IGB_MAX_SCATTER, /* nsegments */
3483 PAGE_SIZE, /* maxsegsize */
3484 0, /* flags */
3485 NULL, /* lockfunc */
3486 NULL, /* lockfuncarg */
3487 &txr->txtag))) {
3488 device_printf(dev,"Unable to allocate TX DMA tag\n");
3489 goto fail;
3490 }
3491
3492 if (!(txr->tx_buffers =
3493 (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) *
3494 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3495 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3496 error = ENOMEM;
3497 goto fail;
3498 }
3499
3500 /* Create the descriptor buffer dma maps */
3501 txbuf = txr->tx_buffers;
3502 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3503 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3504 if (error != 0) {
3505 device_printf(dev, "Unable to create TX DMA map\n");
3506 goto fail;
3507 }
3508 }
3509
3510 return 0;
3511fail:
3512 /* We free all, it handles case where we are in the middle */
3513 igb_free_transmit_structures(adapter);
3514 return (error);
3515}
3516
3517/*********************************************************************
3518 *
3519 * Initialize a transmit ring.
3520 *
3521 **********************************************************************/
3522static void
3523igb_setup_transmit_ring(struct tx_ring *txr)
3524{
3525 struct adapter *adapter = txr->adapter;
3526 struct igb_tx_buf *txbuf;
3527 int i;
3528#ifdef DEV_NETMAP
3529 struct netmap_adapter *na = NA(adapter->ifp);
3530 struct netmap_slot *slot;
3531#endif /* DEV_NETMAP */
3532
3533 /* Clear the old descriptor contents */
3534 IGB_TX_LOCK(txr);
3535#ifdef DEV_NETMAP
3536 slot = netmap_reset(na, NR_TX, txr->me, 0);
3537#endif /* DEV_NETMAP */
3538 bzero((void *)txr->tx_base,
3539 (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3540 /* Reset indices */
3541 txr->next_avail_desc = 0;
3542 txr->next_to_clean = 0;
3543
3544 /* Free any existing tx buffers. */
3545 txbuf = txr->tx_buffers;
3546 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3547 if (txbuf->m_head != NULL) {
3548 bus_dmamap_sync(txr->txtag, txbuf->map,
3549 BUS_DMASYNC_POSTWRITE);
3550 bus_dmamap_unload(txr->txtag, txbuf->map);
3551 m_freem(txbuf->m_head);
3552 txbuf->m_head = NULL;
3553 }
3554#ifdef DEV_NETMAP
3555 if (slot) {
3556 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3557 /* no need to set the address */
3558 netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
3559 }
3560#endif /* DEV_NETMAP */
3561 /* clear the watch index */
3562 txbuf->eop = NULL;
3563 }
3564
3565 /* Set number of descriptors available */
3566 txr->tx_avail = adapter->num_tx_desc;
3567
3568 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3569 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3570 IGB_TX_UNLOCK(txr);
3571}
3572
3573/*********************************************************************
3574 *
3575 * Initialize all transmit rings.
3576 *
3577 **********************************************************************/
3578static void
3579igb_setup_transmit_structures(struct adapter *adapter)
3580{
3581 struct tx_ring *txr = adapter->tx_rings;
3582
3583 for (int i = 0; i < adapter->num_queues; i++, txr++)
3584 igb_setup_transmit_ring(txr);
3585
3586 return;
3587}
3588
3589/*********************************************************************
3590 *
3591 * Enable transmit unit.
3592 *
3593 **********************************************************************/
3594static void
3595igb_initialize_transmit_units(struct adapter *adapter)
3596{
3597 struct tx_ring *txr = adapter->tx_rings;
3598 struct e1000_hw *hw = &adapter->hw;
3599 u32 tctl, txdctl;
3600
3601 INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3602 tctl = txdctl = 0;
3603
3604 /* Setup the Tx Descriptor Rings */
3605 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3606 u64 bus_addr = txr->txdma.dma_paddr;
3607
3608 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3609 adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3610 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3611 (uint32_t)(bus_addr >> 32));
3612 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3613 (uint32_t)bus_addr);
3614
3615 /* Setup the HW Tx Head and Tail descriptor pointers */
3616 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3617 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3618
3619 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3620 E1000_READ_REG(hw, E1000_TDBAL(i)),
3621 E1000_READ_REG(hw, E1000_TDLEN(i)));
3622
3623 txr->queue_status = IGB_QUEUE_IDLE;
3624
3625 txdctl |= IGB_TX_PTHRESH;
3626 txdctl |= IGB_TX_HTHRESH << 8;
3627 txdctl |= IGB_TX_WTHRESH << 16;
3628 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3629 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3630 }
3631
3632 if (adapter->vf_ifp)
3633 return;
3634
3635 e1000_config_collision_dist(hw);
3636
3637 /* Program the Transmit Control Register */
3638 tctl = E1000_READ_REG(hw, E1000_TCTL);
3639 tctl &= ~E1000_TCTL_CT;
3640 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3641 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3642
3643 /* This write will effectively turn on the transmit unit. */
3644 E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3645}
3646
3647/*********************************************************************
3648 *
3649 * Free all transmit rings.
3650 *
3651 **********************************************************************/
3652static void
3653igb_free_transmit_structures(struct adapter *adapter)
3654{
3655 struct tx_ring *txr = adapter->tx_rings;
3656
3657 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3658 IGB_TX_LOCK(txr);
3659 igb_free_transmit_buffers(txr);
3660 igb_dma_free(adapter, &txr->txdma);
3661 IGB_TX_UNLOCK(txr);
3662 IGB_TX_LOCK_DESTROY(txr);
3663 }
3664 free(adapter->tx_rings, M_DEVBUF);
3665}
3666
3667/*********************************************************************
3668 *
3669 * Free transmit ring related data structures.
3670 *
3671 **********************************************************************/
3672static void
3673igb_free_transmit_buffers(struct tx_ring *txr)
3674{
3675 struct adapter *adapter = txr->adapter;
3676 struct igb_tx_buf *tx_buffer;
3677 int i;
3678
3679 INIT_DEBUGOUT("free_transmit_ring: begin");
3680
3681 if (txr->tx_buffers == NULL)
3682 return;
3683
3684 tx_buffer = txr->tx_buffers;
3685 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3686 if (tx_buffer->m_head != NULL) {
3687 bus_dmamap_sync(txr->txtag, tx_buffer->map,
3688 BUS_DMASYNC_POSTWRITE);
3689 bus_dmamap_unload(txr->txtag,
3690 tx_buffer->map);
3691 m_freem(tx_buffer->m_head);
3692 tx_buffer->m_head = NULL;
3693 if (tx_buffer->map != NULL) {
3694 bus_dmamap_destroy(txr->txtag,
3695 tx_buffer->map);
3696 tx_buffer->map = NULL;
3697 }
3698 } else if (tx_buffer->map != NULL) {
3699 bus_dmamap_unload(txr->txtag,
3700 tx_buffer->map);
3701 bus_dmamap_destroy(txr->txtag,
3702 tx_buffer->map);
3703 tx_buffer->map = NULL;
3704 }
3705 }
3706#ifndef IGB_LEGACY_TX
3707 if (txr->br != NULL)
3708 buf_ring_free(txr->br, M_DEVBUF);
3709#endif
3710 if (txr->tx_buffers != NULL) {
3711 free(txr->tx_buffers, M_DEVBUF);
3712 txr->tx_buffers = NULL;
3713 }
3714 if (txr->txtag != NULL) {
3715 bus_dma_tag_destroy(txr->txtag);
3716 txr->txtag = NULL;
3717 }
3718 return;
3719}
3720
3721/**********************************************************************
3722 *
3723 * Setup work for hardware segmentation offload (TSO) on
3724 * adapters using advanced tx descriptors
3725 *
3726 **********************************************************************/
3727static int
3728igb_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3729 u32 *cmd_type_len, u32 *olinfo_status)
3730{
3731 struct adapter *adapter = txr->adapter;
3732 struct e1000_adv_tx_context_desc *TXD;
3733 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3734 u32 mss_l4len_idx = 0, paylen;
3735 u16 vtag = 0, eh_type;
3736 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3737 struct ether_vlan_header *eh;
3738#ifdef INET6
3739 struct ip6_hdr *ip6;
3740#endif
3741#ifdef INET
3742 struct ip *ip;
3743#endif
3744 struct tcphdr *th;
3745
3746
3747 /*
3748 * Determine where frame payload starts.
3749 * Jump over vlan headers if already present
3750 */
3751 eh = mtod(mp, struct ether_vlan_header *);
3752 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3753 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3754 eh_type = eh->evl_proto;
3755 } else {
3756 ehdrlen = ETHER_HDR_LEN;
3757 eh_type = eh->evl_encap_proto;
3758 }
3759
3760 switch (ntohs(eh_type)) {
3761#ifdef INET6
3762 case ETHERTYPE_IPV6:
3763 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3764 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3765 if (ip6->ip6_nxt != IPPROTO_TCP)
3766 return (ENXIO);
3767 ip_hlen = sizeof(struct ip6_hdr);
3768 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3769 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3770 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3771 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3772 break;
3773#endif
3774#ifdef INET
3775 case ETHERTYPE_IP:
3776 ip = (struct ip *)(mp->m_data + ehdrlen);
3777 if (ip->ip_p != IPPROTO_TCP)
3778 return (ENXIO);
3779 ip->ip_sum = 0;
3780 ip_hlen = ip->ip_hl << 2;
3781 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3782 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3783 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3784 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3785 /* Tell transmit desc to also do IPv4 checksum. */
3786 *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3787 break;
3788#endif
3789 default:
3790 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3791 __func__, ntohs(eh_type));
3792 break;
3793 }
3794
3795 ctxd = txr->next_avail_desc;
3796 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3797
3798 tcp_hlen = th->th_off << 2;
3799
3800 /* This is used in the transmit desc in encap */
3801 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3802
3803 /* VLAN MACLEN IPLEN */
3804 if (mp->m_flags & M_VLANTAG) {
3805 vtag = htole16(mp->m_pkthdr.ether_vtag);
3806 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3807 }
3808
3809 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3810 vlan_macip_lens |= ip_hlen;
3811 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3812
3813 /* ADV DTYPE TUCMD */
3814 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3815 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3816 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3817
3818 /* MSS L4LEN IDX */
3819 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3820 mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3821 /* 82575 needs the queue index added */
3822 if (adapter->hw.mac.type == e1000_82575)
3823 mss_l4len_idx |= txr->me << 4;
3824 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3825
3826 TXD->seqnum_seed = htole32(0);
3827
3828 if (++ctxd == txr->num_desc)
3829 ctxd = 0;
3830
3831 txr->tx_avail--;
3832 txr->next_avail_desc = ctxd;
3833 *cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3834 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3835 *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
3836 ++txr->tso_tx;
3837 return (0);
3838}
3839
3840/*********************************************************************
3841 *
3842 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
3843 *
3844 **********************************************************************/
3845
3846static int
3847igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3848 u32 *cmd_type_len, u32 *olinfo_status)
3849{
3850 struct e1000_adv_tx_context_desc *TXD;
3851 struct adapter *adapter = txr->adapter;
3852 struct ether_vlan_header *eh;
3853 struct ip *ip;
3854 struct ip6_hdr *ip6;
3855 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
3856 int ehdrlen, ip_hlen = 0;
3857 u16 etype;
3858 u8 ipproto = 0;
3859 int offload = TRUE;
3860 int ctxd = txr->next_avail_desc;
3861 u16 vtag = 0;
3862
3863 /* First check if TSO is to be used */
3864 if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3865 return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3866
3867 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3868 offload = FALSE;
3869
3870 /* Indicate the whole packet as payload when not doing TSO */
3871 *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
3872
3873 /* Now ready a context descriptor */
3874 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3875
3876 /*
3877 ** In advanced descriptors the vlan tag must
3878 ** be placed into the context descriptor. Hence
3879 ** we need to make one even if not doing offloads.
3880 */
3881 if (mp->m_flags & M_VLANTAG) {
3882 vtag = htole16(mp->m_pkthdr.ether_vtag);
3883 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3884 } else if (offload == FALSE) /* ... no offload to do */
3885 return (0);
3886
3887 /*
3888 * Determine where frame payload starts.
3889 * Jump over vlan headers if already present,
3890 * helpful for QinQ too.
3891 */
3892 eh = mtod(mp, struct ether_vlan_header *);
3893 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3894 etype = ntohs(eh->evl_proto);
3895 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3896 } else {
3897 etype = ntohs(eh->evl_encap_proto);
3898 ehdrlen = ETHER_HDR_LEN;
3899 }
3900
3901 /* Set the ether header length */
3902 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3903
3904 switch (etype) {
3905 case ETHERTYPE_IP:
3906 ip = (struct ip *)(mp->m_data + ehdrlen);
3907 ip_hlen = ip->ip_hl << 2;
3908 ipproto = ip->ip_p;
3909 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3910 break;
3911 case ETHERTYPE_IPV6:
3912 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3913 ip_hlen = sizeof(struct ip6_hdr);
3914 /* XXX-BZ this will go badly in case of ext hdrs. */
3915 ipproto = ip6->ip6_nxt;
3916 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3917 break;
3918 default:
3919 offload = FALSE;
3920 break;
3921 }
3922
3923 vlan_macip_lens |= ip_hlen;
3924 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3925
3926 switch (ipproto) {
3927 case IPPROTO_TCP:
3928 if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3929 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3930 break;
3931 case IPPROTO_UDP:
3932 if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3933 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3934 break;
3935
3936#if __FreeBSD_version >= 800000
3937 case IPPROTO_SCTP:
3938 if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3939 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3940 break;
3941#endif
3942 default:
3943 offload = FALSE;
3944 break;
3945 }
3946
3947 if (offload) /* For the TX descriptor setup */
3948 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3949
3950 /* 82575 needs the queue index added */
3951 if (adapter->hw.mac.type == e1000_82575)
3952 mss_l4len_idx = txr->me << 4;
3953
3954 /* Now copy bits into descriptor */
3955 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3956 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3957 TXD->seqnum_seed = htole32(0);
3958 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3959
3960 /* We've consumed the first desc, adjust counters */
3961 if (++ctxd == txr->num_desc)
3962 ctxd = 0;
3963 txr->next_avail_desc = ctxd;
3964 --txr->tx_avail;
3965
3966 return (0);
3967}
3968
3969/**********************************************************************
3970 *
3971 * Examine each tx_buffer in the used queue. If the hardware is done
3972 * processing the packet then free associated resources. The
3973 * tx_buffer is put back on the free queue.
3974 *
3975 * TRUE return means there's work in the ring to clean, FALSE its empty.
3976 **********************************************************************/
3977static bool
3978igb_txeof(struct tx_ring *txr)
3979{
3980 struct adapter *adapter = txr->adapter;
3981#ifdef DEV_NETMAP
3982 struct ifnet *ifp = adapter->ifp;
3983#endif /* DEV_NETMAP */
3984 u32 work, processed = 0;
3985 int limit = adapter->tx_process_limit;
3986 struct igb_tx_buf *buf;
3987 union e1000_adv_tx_desc *txd;
3988
3989 mtx_assert(&txr->tx_mtx, MA_OWNED);
3990
3991#ifdef DEV_NETMAP
3992 if (netmap_tx_irq(ifp, txr->me))
3993 return (FALSE);
3994#endif /* DEV_NETMAP */
3995
3996 if (txr->tx_avail == txr->num_desc) {
3997 txr->queue_status = IGB_QUEUE_IDLE;
3998 return FALSE;
3999 }
4000
4001 /* Get work starting point */
4002 work = txr->next_to_clean;
4003 buf = &txr->tx_buffers[work];
4004 txd = &txr->tx_base[work];
4005 work -= txr->num_desc; /* The distance to ring end */
4006 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4007 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4008 do {
4009 union e1000_adv_tx_desc *eop = buf->eop;
4010 if (eop == NULL) /* No work */
4011 break;
4012
4013 if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
4014 break; /* I/O not complete */
4015
4016 if (buf->m_head) {
4017 txr->bytes +=
4018 buf->m_head->m_pkthdr.len;
4019 bus_dmamap_sync(txr->txtag,
4020 buf->map,
4021 BUS_DMASYNC_POSTWRITE);
4022 bus_dmamap_unload(txr->txtag,
4023 buf->map);
4024 m_freem(buf->m_head);
4025 buf->m_head = NULL;
4026 }
4027 buf->eop = NULL;
4028 ++txr->tx_avail;
4029
4030 /* We clean the range if multi segment */
4031 while (txd != eop) {
4032 ++txd;
4033 ++buf;
4034 ++work;
4035 /* wrap the ring? */
4036 if (__predict_false(!work)) {
4037 work -= txr->num_desc;
4038 buf = txr->tx_buffers;
4039 txd = txr->tx_base;
4040 }
4041 if (buf->m_head) {
4042 txr->bytes +=
4043 buf->m_head->m_pkthdr.len;
4044 bus_dmamap_sync(txr->txtag,
4045 buf->map,
4046 BUS_DMASYNC_POSTWRITE);
4047 bus_dmamap_unload(txr->txtag,
4048 buf->map);
4049 m_freem(buf->m_head);
4050 buf->m_head = NULL;
4051 }
4052 ++txr->tx_avail;
4053 buf->eop = NULL;
4054
4055 }
4056 ++txr->packets;
4057 ++processed;
4058 txr->watchdog_time = ticks;
4059
4060 /* Try the next packet */
4061 ++txd;
4062 ++buf;
4063 ++work;
4064 /* reset with a wrap */
4065 if (__predict_false(!work)) {
4066 work -= txr->num_desc;
4067 buf = txr->tx_buffers;
4068 txd = txr->tx_base;
4069 }
4070 prefetch(txd);
4071 } while (__predict_true(--limit));
4072
4073 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4074 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4075
4076 work += txr->num_desc;
4077 txr->next_to_clean = work;
4078
4079 /*
4080 ** Watchdog calculation, we know there's
4081 ** work outstanding or the first return
4082 ** would have been taken, so none processed
4083 ** for too long indicates a hang.
4084 */
4085 if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
4086 txr->queue_status |= IGB_QUEUE_HUNG;
4087
4088 if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
4089 txr->queue_status &= ~IGB_QUEUE_DEPLETED;
4090
4091 if (txr->tx_avail == txr->num_desc) {
4092 txr->queue_status = IGB_QUEUE_IDLE;
4093 return (FALSE);
4094 }
4095
4096 return (TRUE);
4097}
4098
4099/*********************************************************************
4100 *
4101 * Refresh mbuf buffers for RX descriptor rings
4102 * - now keeps its own state so discards due to resource
4103 * exhaustion are unnecessary, if an mbuf cannot be obtained
4104 * it just returns, keeping its placeholder, thus it can simply
4105 * be recalled to try again.
4106 *
4107 **********************************************************************/
4108static void
4109igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4110{
4111 struct adapter *adapter = rxr->adapter;
4112 bus_dma_segment_t hseg[1];
4113 bus_dma_segment_t pseg[1];
4114 struct igb_rx_buf *rxbuf;
4115 struct mbuf *mh, *mp;
4116 int i, j, nsegs, error;
4117 bool refreshed = FALSE;
4118
4119 i = j = rxr->next_to_refresh;
4120 /*
4121 ** Get one descriptor beyond
4122 ** our work mark to control
4123 ** the loop.
4124 */
4125 if (++j == adapter->num_rx_desc)
4126 j = 0;
4127
4128 while (j != limit) {
4129 rxbuf = &rxr->rx_buffers[i];
4130 /* No hdr mbuf used with header split off */
4131 if (rxr->hdr_split == FALSE)
4132 goto no_split;
4133 if (rxbuf->m_head == NULL) {
4134 mh = m_gethdr(M_NOWAIT, MT_DATA);
4135 if (mh == NULL)
4136 goto update;
4137 } else
4138 mh = rxbuf->m_head;
4139
4140 mh->m_pkthdr.len = mh->m_len = MHLEN;
4141 mh->m_len = MHLEN;
4142 mh->m_flags |= M_PKTHDR;
4143 /* Get the memory mapping */
4144 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4145 rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4146 if (error != 0) {
4147 printf("Refresh mbufs: hdr dmamap load"
4148 " failure - %d\n", error);
4149 m_free(mh);
4150 rxbuf->m_head = NULL;
4151 goto update;
4152 }
4153 rxbuf->m_head = mh;
4154 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4155 BUS_DMASYNC_PREREAD);
4156 rxr->rx_base[i].read.hdr_addr =
4157 htole64(hseg[0].ds_addr);
4158no_split:
4159 if (rxbuf->m_pack == NULL) {
4160 mp = m_getjcl(M_NOWAIT, MT_DATA,
4161 M_PKTHDR, adapter->rx_mbuf_sz);
4162 if (mp == NULL)
4163 goto update;
4164 } else
4165 mp = rxbuf->m_pack;
4166
4167 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4168 /* Get the memory mapping */
4169 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4170 rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4171 if (error != 0) {
4172 printf("Refresh mbufs: payload dmamap load"
4173 " failure - %d\n", error);
4174 m_free(mp);
4175 rxbuf->m_pack = NULL;
4176 goto update;
4177 }
4178 rxbuf->m_pack = mp;
4179 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4180 BUS_DMASYNC_PREREAD);
4181 rxr->rx_base[i].read.pkt_addr =
4182 htole64(pseg[0].ds_addr);
4183 refreshed = TRUE; /* I feel wefreshed :) */
4184
4185 i = j; /* our next is precalculated */
4186 rxr->next_to_refresh = i;
4187 if (++j == adapter->num_rx_desc)
4188 j = 0;
4189 }
4190update:
4191 if (refreshed) /* update tail */
4192 E1000_WRITE_REG(&adapter->hw,
4193 E1000_RDT(rxr->me), rxr->next_to_refresh);
4194 return;
4195}
4196
4197
4198/*********************************************************************
4199 *
4200 * Allocate memory for rx_buffer structures. Since we use one
4201 * rx_buffer per received packet, the maximum number of rx_buffer's
4202 * that we'll need is equal to the number of receive descriptors
4203 * that we've allocated.
4204 *
4205 **********************************************************************/
4206static int
4207igb_allocate_receive_buffers(struct rx_ring *rxr)
4208{
4209 struct adapter *adapter = rxr->adapter;
4210 device_t dev = adapter->dev;
4211 struct igb_rx_buf *rxbuf;
4212 int i, bsize, error;
4213
4214 bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4215 if (!(rxr->rx_buffers =
4216 (struct igb_rx_buf *) malloc(bsize,
4217 M_DEVBUF, M_NOWAIT | M_ZERO))) {
4218 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4219 error = ENOMEM;
4220 goto fail;
4221 }
4222
4223 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4224 1, 0, /* alignment, bounds */
4225 BUS_SPACE_MAXADDR, /* lowaddr */
4226 BUS_SPACE_MAXADDR, /* highaddr */
4227 NULL, NULL, /* filter, filterarg */
4228 MSIZE, /* maxsize */
4229 1, /* nsegments */
4230 MSIZE, /* maxsegsize */
4231 0, /* flags */
4232 NULL, /* lockfunc */
4233 NULL, /* lockfuncarg */
4234 &rxr->htag))) {
4235 device_printf(dev, "Unable to create RX DMA tag\n");
4236 goto fail;
4237 }
4238
4239 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4240 1, 0, /* alignment, bounds */
4241 BUS_SPACE_MAXADDR, /* lowaddr */
4242 BUS_SPACE_MAXADDR, /* highaddr */
4243 NULL, NULL, /* filter, filterarg */
4244 MJUM9BYTES, /* maxsize */
4245 1, /* nsegments */
4246 MJUM9BYTES, /* maxsegsize */
4247 0, /* flags */
4248 NULL, /* lockfunc */
4249 NULL, /* lockfuncarg */
4250 &rxr->ptag))) {
4251 device_printf(dev, "Unable to create RX payload DMA tag\n");
4252 goto fail;
4253 }
4254
4255 for (i = 0; i < adapter->num_rx_desc; i++) {
4256 rxbuf = &rxr->rx_buffers[i];
4257 error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap);
4258 if (error) {
4259 device_printf(dev,
4260 "Unable to create RX head DMA maps\n");
4261 goto fail;
4262 }
4263 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
4264 if (error) {
4265 device_printf(dev,
4266 "Unable to create RX packet DMA maps\n");
4267 goto fail;
4268 }
4269 }
4270
4271 return (0);
4272
4273fail:
4274 /* Frees all, but can handle partial completion */
4275 igb_free_receive_structures(adapter);
4276 return (error);
4277}
4278
4279
4280static void
4281igb_free_receive_ring(struct rx_ring *rxr)
4282{
4283 struct adapter *adapter = rxr->adapter;
4284 struct igb_rx_buf *rxbuf;
4285
4286
4287 for (int i = 0; i < adapter->num_rx_desc; i++) {
4288 rxbuf = &rxr->rx_buffers[i];
4289 if (rxbuf->m_head != NULL) {
4290 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4291 BUS_DMASYNC_POSTREAD);
4292 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4293 rxbuf->m_head->m_flags |= M_PKTHDR;
4294 m_freem(rxbuf->m_head);
4295 }
4296 if (rxbuf->m_pack != NULL) {
4297 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4298 BUS_DMASYNC_POSTREAD);
4299 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4300 rxbuf->m_pack->m_flags |= M_PKTHDR;
4301 m_freem(rxbuf->m_pack);
4302 }
4303 rxbuf->m_head = NULL;
4304 rxbuf->m_pack = NULL;
4305 }
4306}
4307
4308
4309/*********************************************************************
4310 *
4311 * Initialize a receive ring and its buffers.
4312 *
4313 **********************************************************************/
4314static int
4315igb_setup_receive_ring(struct rx_ring *rxr)
4316{
4317 struct adapter *adapter;
4318 struct ifnet *ifp;
4319 device_t dev;
4320 struct igb_rx_buf *rxbuf;
4321 bus_dma_segment_t pseg[1], hseg[1];
4322 struct lro_ctrl *lro = &rxr->lro;
4323 int rsize, nsegs, error = 0;
4324#ifdef DEV_NETMAP
4325 struct netmap_adapter *na = NA(rxr->adapter->ifp);
4326 struct netmap_slot *slot;
4327#endif /* DEV_NETMAP */
4328
4329 adapter = rxr->adapter;
4330 dev = adapter->dev;
4331 ifp = adapter->ifp;
4332
4333 /* Clear the ring contents */
4334 IGB_RX_LOCK(rxr);
4335#ifdef DEV_NETMAP
4336 slot = netmap_reset(na, NR_RX, rxr->me, 0);
4337#endif /* DEV_NETMAP */
4338 rsize = roundup2(adapter->num_rx_desc *
4339 sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4340 bzero((void *)rxr->rx_base, rsize);
4341
4342 /*
4343 ** Free current RX buffer structures and their mbufs
4344 */
4345 igb_free_receive_ring(rxr);
4346
4347 /* Configure for header split? */
4348 if (igb_header_split)
4349 rxr->hdr_split = TRUE;
4350
4351 /* Now replenish the ring mbufs */
4352 for (int j = 0; j < adapter->num_rx_desc; ++j) {
4353 struct mbuf *mh, *mp;
4354
4355 rxbuf = &rxr->rx_buffers[j];
4356#ifdef DEV_NETMAP
4357 if (slot) {
4358 /* slot sj is mapped to the j-th NIC-ring entry */
4359 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4360 uint64_t paddr;
4361 void *addr;
4362
4363 addr = PNMB(na, slot + sj, &paddr);
4364 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
4365 /* Update descriptor */
4366 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4367 continue;
4368 }
4369#endif /* DEV_NETMAP */
4370 if (rxr->hdr_split == FALSE)
4371 goto skip_head;
4372
4373 /* First the header */
4374 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4375 if (rxbuf->m_head == NULL) {
4376 error = ENOBUFS;
4377 goto fail;
4378 }
4379 m_adj(rxbuf->m_head, ETHER_ALIGN);
4380 mh = rxbuf->m_head;
4381 mh->m_len = mh->m_pkthdr.len = MHLEN;
4382 mh->m_flags |= M_PKTHDR;
4383 /* Get the memory mapping */
4384 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4385 rxbuf->hmap, rxbuf->m_head, hseg,
4386 &nsegs, BUS_DMA_NOWAIT);
4387 if (error != 0) /* Nothing elegant to do here */
4388 goto fail;
4389 bus_dmamap_sync(rxr->htag,
4390 rxbuf->hmap, BUS_DMASYNC_PREREAD);
4391 /* Update descriptor */
4392 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4393
4394skip_head:
4395 /* Now the payload cluster */
4396 rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4397 M_PKTHDR, adapter->rx_mbuf_sz);
4398 if (rxbuf->m_pack == NULL) {
4399 error = ENOBUFS;
4400 goto fail;
4401 }
4402 mp = rxbuf->m_pack;
4403 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4404 /* Get the memory mapping */
4405 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4406 rxbuf->pmap, mp, pseg,
4407 &nsegs, BUS_DMA_NOWAIT);
4408 if (error != 0)
4409 goto fail;
4410 bus_dmamap_sync(rxr->ptag,
4411 rxbuf->pmap, BUS_DMASYNC_PREREAD);
4412 /* Update descriptor */
4413 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4414 }
4415
4416 /* Setup our descriptor indices */
4417 rxr->next_to_check = 0;
4418 rxr->next_to_refresh = adapter->num_rx_desc - 1;
4419 rxr->lro_enabled = FALSE;
4420 rxr->rx_split_packets = 0;
4421 rxr->rx_bytes = 0;
4422
4423 rxr->fmp = NULL;
4424 rxr->lmp = NULL;
4425
4426 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4427 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4428
4429 /*
4430 ** Now set up the LRO interface, we
4431 ** also only do head split when LRO
4432 ** is enabled, since so often they
4433 ** are undesireable in similar setups.
4434 */
4435 if (ifp->if_capenable & IFCAP_LRO) {
4436 error = tcp_lro_init(lro);
4437 if (error) {
4438 device_printf(dev, "LRO Initialization failed!\n");
4439 goto fail;
4440 }
4441 INIT_DEBUGOUT("RX LRO Initialized\n");
4442 rxr->lro_enabled = TRUE;
4443 lro->ifp = adapter->ifp;
4444 }
4445
4446 IGB_RX_UNLOCK(rxr);
4447 return (0);
4448
4449fail:
4450 igb_free_receive_ring(rxr);
4451 IGB_RX_UNLOCK(rxr);
4452 return (error);
4453}
4454
4455
4456/*********************************************************************
4457 *
4458 * Initialize all receive rings.
4459 *
4460 **********************************************************************/
4461static int
4462igb_setup_receive_structures(struct adapter *adapter)
4463{
4464 struct rx_ring *rxr = adapter->rx_rings;
4465 int i;
4466
4467 for (i = 0; i < adapter->num_queues; i++, rxr++)
4468 if (igb_setup_receive_ring(rxr))
4469 goto fail;
4470
4471 return (0);
4472fail:
4473 /*
4474 * Free RX buffers allocated so far, we will only handle
4475 * the rings that completed, the failing case will have
4476 * cleaned up for itself. 'i' is the endpoint.
4477 */
4478 for (int j = 0; j < i; ++j) {
4479 rxr = &adapter->rx_rings[j];
4480 IGB_RX_LOCK(rxr);
4481 igb_free_receive_ring(rxr);
4482 IGB_RX_UNLOCK(rxr);
4483 }
4484
4485 return (ENOBUFS);
4486}
4487
4488/*
4489 * Initialise the RSS mapping for NICs that support multiple transmit/
4490 * receive rings.
4491 */
4492static void
4493igb_initialise_rss_mapping(struct adapter *adapter)
4494{
4495 struct e1000_hw *hw = &adapter->hw;
4496 int i;
4497 int queue_id;
4498 u32 reta;
4499 u32 rss_key[10], mrqc, shift = 0;
4500
4501 /* XXX? */
4502 if (adapter->hw.mac.type == e1000_82575)
4503 shift = 6;
4504
4505 /*
4506 * The redirection table controls which destination
4507 * queue each bucket redirects traffic to.
4508 * Each DWORD represents four queues, with the LSB
4509 * being the first queue in the DWORD.
4510 *
4511 * This just allocates buckets to queues using round-robin
4512 * allocation.
4513 *
4514 * NOTE: It Just Happens to line up with the default
4515 * RSS allocation method.
4516 */
4517
4518 /* Warning FM follows */
4519 reta = 0;
4520 for (i = 0; i < 128; i++) {
4521#ifdef RSS
4522 queue_id = rss_get_indirection_to_bucket(i);
4523 /*
4524 * If we have more queues than buckets, we'll
4525 * end up mapping buckets to a subset of the
4526 * queues.
4527 *
4528 * If we have more buckets than queues, we'll
4529 * end up instead assigning multiple buckets
4530 * to queues.
4531 *
4532 * Both are suboptimal, but we need to handle
4533 * the case so we don't go out of bounds
4534 * indexing arrays and such.
4535 */
4536 queue_id = queue_id % adapter->num_queues;
4537#else
4538 queue_id = (i % adapter->num_queues);
4539#endif
4540 /* Adjust if required */
4541 queue_id = queue_id << shift;
4542
4543 /*
4544 * The low 8 bits are for hash value (n+0);
4545 * The next 8 bits are for hash value (n+1), etc.
4546 */
4547 reta = reta >> 8;
4548 reta = reta | ( ((uint32_t) queue_id) << 24);
4549 if ((i & 3) == 3) {
4550 E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta);
4551 reta = 0;
4552 }
4553 }
4554
4555 /* Now fill in hash table */
4556
4557 /*
4558 * MRQC: Multiple Receive Queues Command
4559 * Set queuing to RSS control, number depends on the device.
4560 */
4561 mrqc = E1000_MRQC_ENABLE_RSS_8Q;
4562
4563#ifdef RSS
4564 /* XXX ew typecasting */
4565 rss_getkey((uint8_t *) &rss_key);
4566#else
4567 arc4rand(&rss_key, sizeof(rss_key), 0);
4568#endif
4569 for (i = 0; i < 10; i++)
4570 E1000_WRITE_REG_ARRAY(hw,
4571 E1000_RSSRK(0), i, rss_key[i]);
4572
4573 /*
4574 * Configure the RSS fields to hash upon.
4575 */
4576 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4577 E1000_MRQC_RSS_FIELD_IPV4_TCP);
4578 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4579 E1000_MRQC_RSS_FIELD_IPV6_TCP);
4580 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4581 E1000_MRQC_RSS_FIELD_IPV6_UDP);
4582 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4583 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4584
4585 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4586}
4587
4588/*********************************************************************
4589 *
4590 * Enable receive unit.
4591 *
4592 **********************************************************************/
4593static void
4594igb_initialize_receive_units(struct adapter *adapter)
4595{
4596 struct rx_ring *rxr = adapter->rx_rings;
4597 struct ifnet *ifp = adapter->ifp;
4598 struct e1000_hw *hw = &adapter->hw;
4599 u32 rctl, rxcsum, psize, srrctl = 0;
4600
4601 INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4602
4603 /*
4604 * Make sure receives are disabled while setting
4605 * up the descriptor ring
4606 */
4607 rctl = E1000_READ_REG(hw, E1000_RCTL);
4608 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4609
4610 /*
4611 ** Set up for header split
4612 */
4613 if (igb_header_split) {
4614 /* Use a standard mbuf for the header */
4615 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4616 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4617 } else
4618 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4619
4620 /*
4621 ** Set up for jumbo frames
4622 */
4623 if (ifp->if_mtu > ETHERMTU) {
4624 rctl |= E1000_RCTL_LPE;
4625 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4626 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4627 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4628 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4629 srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4630 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4631 }
4632 /* Set maximum packet len */
4633 psize = adapter->max_frame_size;
4634 /* are we on a vlan? */
4635 if (adapter->ifp->if_vlantrunk != NULL)
4636 psize += VLAN_TAG_SIZE;
4637 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4638 } else {
4639 rctl &= ~E1000_RCTL_LPE;
4640 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4641 rctl |= E1000_RCTL_SZ_2048;
4642 }
4643
4644 /*
4645 * If TX flow control is disabled and there's >1 queue defined,
4646 * enable DROP.
4647 *
4648 * This drops frames rather than hanging the RX MAC for all queues.
4649 */
4650 if ((adapter->num_queues > 1) &&
4651 (adapter->fc == e1000_fc_none ||
4652 adapter->fc == e1000_fc_rx_pause)) {
4653 srrctl |= E1000_SRRCTL_DROP_EN;
4654 }
4655
4656 /* Setup the Base and Length of the Rx Descriptor Rings */
4657 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4658 u64 bus_addr = rxr->rxdma.dma_paddr;
4659 u32 rxdctl;
4660
4661 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4662 adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4663 E1000_WRITE_REG(hw, E1000_RDBAH(i),
4664 (uint32_t)(bus_addr >> 32));
4665 E1000_WRITE_REG(hw, E1000_RDBAL(i),
4666 (uint32_t)bus_addr);
4667 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4668 /* Enable this Queue */
4669 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4670 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4671 rxdctl &= 0xFFF00000;
4672 rxdctl |= IGB_RX_PTHRESH;
4673 rxdctl |= IGB_RX_HTHRESH << 8;
4674 rxdctl |= IGB_RX_WTHRESH << 16;
4675 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4676 }
4677
4678 /*
4679 ** Setup for RX MultiQueue
4680 */
4681 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4682 if (adapter->num_queues >1) {
4683
4684 /* rss setup */
4685 igb_initialise_rss_mapping(adapter);
4686
4687 /*
4688 ** NOTE: Receive Full-Packet Checksum Offload
4689 ** is mutually exclusive with Multiqueue. However
4690 ** this is not the same as TCP/IP checksums which
4691 ** still work.
4692 */
4693 rxcsum |= E1000_RXCSUM_PCSD;
4694#if __FreeBSD_version >= 800000
4695 /* For SCTP Offload */
4696 if (((hw->mac.type == e1000_82576) ||
4697 (hw->mac.type == e1000_82580)) &&
4698 (ifp->if_capenable & IFCAP_RXCSUM))
4699 rxcsum |= E1000_RXCSUM_CRCOFL;
4700#endif
4701 } else {
4702 /* Non RSS setup */
4703 if (ifp->if_capenable & IFCAP_RXCSUM) {
4704 rxcsum |= E1000_RXCSUM_IPPCSE;
4705#if __FreeBSD_version >= 800000
4706 if ((adapter->hw.mac.type == e1000_82576) ||
4707 (adapter->hw.mac.type == e1000_82580))
4708 rxcsum |= E1000_RXCSUM_CRCOFL;
4709#endif
4710 } else
4711 rxcsum &= ~E1000_RXCSUM_TUOFL;
4712 }
4713 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4714
4715 /* Setup the Receive Control Register */
4716 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4717 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4718 E1000_RCTL_RDMTS_HALF |
4719 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4720 /* Strip CRC bytes. */
4721 rctl |= E1000_RCTL_SECRC;
4722 /* Make sure VLAN Filters are off */
4723 rctl &= ~E1000_RCTL_VFE;
4724 /* Don't store bad packets */
4725 rctl &= ~E1000_RCTL_SBP;
4726
4727 /* Enable Receives */
4728 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4729
4730 /*
4731 * Setup the HW Rx Head and Tail Descriptor Pointers
4732 * - needs to be after enable
4733 */
4734 for (int i = 0; i < adapter->num_queues; i++) {
4735 rxr = &adapter->rx_rings[i];
4736 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4737#ifdef DEV_NETMAP
4738 /*
4739 * an init() while a netmap client is active must
4740 * preserve the rx buffers passed to userspace.
4741 * In this driver it means we adjust RDT to
4742 * something different from next_to_refresh
4743 * (which is not used in netmap mode).
4744 */
4745 if (ifp->if_capenable & IFCAP_NETMAP) {
4746 struct netmap_adapter *na = NA(adapter->ifp);
4747 struct netmap_kring *kring = &na->rx_rings[i];
4748 int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
4749
4750 if (t >= adapter->num_rx_desc)
4751 t -= adapter->num_rx_desc;
4752 else if (t < 0)
4753 t += adapter->num_rx_desc;
4754 E1000_WRITE_REG(hw, E1000_RDT(i), t);
4755 } else
4756#endif /* DEV_NETMAP */
4757 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4758 }
4759 return;
4760}
4761
4762/*********************************************************************
4763 *
4764 * Free receive rings.
4765 *
4766 **********************************************************************/
4767static void
4768igb_free_receive_structures(struct adapter *adapter)
4769{
4770 struct rx_ring *rxr = adapter->rx_rings;
4771
4772 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4773 struct lro_ctrl *lro = &rxr->lro;
4774 igb_free_receive_buffers(rxr);
4775 tcp_lro_free(lro);
4776 igb_dma_free(adapter, &rxr->rxdma);
4777 }
4778
4779 free(adapter->rx_rings, M_DEVBUF);
4780}
4781
4782/*********************************************************************
4783 *
4784 * Free receive ring data structures.
4785 *
4786 **********************************************************************/
4787static void
4788igb_free_receive_buffers(struct rx_ring *rxr)
4789{
4790 struct adapter *adapter = rxr->adapter;
4791 struct igb_rx_buf *rxbuf;
4792 int i;
4793
4794 INIT_DEBUGOUT("free_receive_structures: begin");
4795
4796 /* Cleanup any existing buffers */
4797 if (rxr->rx_buffers != NULL) {
4798 for (i = 0; i < adapter->num_rx_desc; i++) {
4799 rxbuf = &rxr->rx_buffers[i];
4800 if (rxbuf->m_head != NULL) {
4801 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4802 BUS_DMASYNC_POSTREAD);
4803 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4804 rxbuf->m_head->m_flags |= M_PKTHDR;
4805 m_freem(rxbuf->m_head);
4806 }
4807 if (rxbuf->m_pack != NULL) {
4808 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4809 BUS_DMASYNC_POSTREAD);
4810 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4811 rxbuf->m_pack->m_flags |= M_PKTHDR;
4812 m_freem(rxbuf->m_pack);
4813 }
4814 rxbuf->m_head = NULL;
4815 rxbuf->m_pack = NULL;
4816 if (rxbuf->hmap != NULL) {
4817 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4818 rxbuf->hmap = NULL;
4819 }
4820 if (rxbuf->pmap != NULL) {
4821 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4822 rxbuf->pmap = NULL;
4823 }
4824 }
4825 if (rxr->rx_buffers != NULL) {
4826 free(rxr->rx_buffers, M_DEVBUF);
4827 rxr->rx_buffers = NULL;
4828 }
4829 }
4830
4831 if (rxr->htag != NULL) {
4832 bus_dma_tag_destroy(rxr->htag);
4833 rxr->htag = NULL;
4834 }
4835 if (rxr->ptag != NULL) {
4836 bus_dma_tag_destroy(rxr->ptag);
4837 rxr->ptag = NULL;
4838 }
4839}
4840
4841static __inline void
4842igb_rx_discard(struct rx_ring *rxr, int i)
4843{
4844 struct igb_rx_buf *rbuf;
4845
4846 rbuf = &rxr->rx_buffers[i];
4847
4848 /* Partially received? Free the chain */
4849 if (rxr->fmp != NULL) {
4850 rxr->fmp->m_flags |= M_PKTHDR;
4851 m_freem(rxr->fmp);
4852 rxr->fmp = NULL;
4853 rxr->lmp = NULL;
4854 }
4855
4856 /*
4857 ** With advanced descriptors the writeback
4858 ** clobbers the buffer addrs, so its easier
4859 ** to just free the existing mbufs and take
4860 ** the normal refresh path to get new buffers
4861 ** and mapping.
4862 */
4863 if (rbuf->m_head) {
4864 m_free(rbuf->m_head);
4865 rbuf->m_head = NULL;
4866 bus_dmamap_unload(rxr->htag, rbuf->hmap);
4867 }
4868
4869 if (rbuf->m_pack) {
4870 m_free(rbuf->m_pack);
4871 rbuf->m_pack = NULL;
4872 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
4873 }
4874
4875 return;
4876}
4877
4878static __inline void
4879igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4880{
4881
4882 /*
4883 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4884 * should be computed by hardware. Also it should not have VLAN tag in
4885 * ethernet header.
4886 */
4887 if (rxr->lro_enabled &&
4888 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4889 (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4890 (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4891 (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4892 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4893 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4894 /*
4895 * Send to the stack if:
4896 ** - LRO not enabled, or
4897 ** - no LRO resources, or
4898 ** - lro enqueue fails
4899 */
4900 if (rxr->lro.lro_cnt != 0)
4901 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4902 return;
4903 }
4904 IGB_RX_UNLOCK(rxr);
4905 (*ifp->if_input)(ifp, m);
4906 IGB_RX_LOCK(rxr);
4907}
4908
4909/*********************************************************************
4910 *
4911 * This routine executes in interrupt context. It replenishes
4912 * the mbufs in the descriptor and sends data which has been
4913 * dma'ed into host memory to upper layer.
4914 *
4915 * We loop at most count times if count is > 0, or until done if
4916 * count < 0.
4917 *
4918 * Return TRUE if more to clean, FALSE otherwise
4919 *********************************************************************/
4920static bool
4921igb_rxeof(struct igb_queue *que, int count, int *done)
4922{
4923 struct adapter *adapter = que->adapter;
4924 struct rx_ring *rxr = que->rxr;
4925 struct ifnet *ifp = adapter->ifp;
4926 struct lro_ctrl *lro = &rxr->lro;
4927 struct lro_entry *queued;
4928 int i, processed = 0, rxdone = 0;
4929 u32 ptype, staterr = 0;
4930 union e1000_adv_rx_desc *cur;
4931
4932 IGB_RX_LOCK(rxr);
4933 /* Sync the ring. */
4934 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4935 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4936
4937#ifdef DEV_NETMAP
4938 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4939 IGB_RX_UNLOCK(rxr);
4940 return (FALSE);
4941 }
4942#endif /* DEV_NETMAP */
4943
4944 /* Main clean loop */
4945 for (i = rxr->next_to_check; count != 0;) {
4946 struct mbuf *sendmp, *mh, *mp;
4947 struct igb_rx_buf *rxbuf;
4948 u16 hlen, plen, hdr, vtag, pkt_info;
4949 bool eop = FALSE;
4950
4951 cur = &rxr->rx_base[i];
4952 staterr = le32toh(cur->wb.upper.status_error);
4953 if ((staterr & E1000_RXD_STAT_DD) == 0)
4954 break;
4955 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4956 break;
4957 count--;
4958 sendmp = mh = mp = NULL;
4959 cur->wb.upper.status_error = 0;
4960 rxbuf = &rxr->rx_buffers[i];
4961 plen = le16toh(cur->wb.upper.length);
4962 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4963 if (((adapter->hw.mac.type == e1000_i350) ||
4964 (adapter->hw.mac.type == e1000_i354)) &&
4965 (staterr & E1000_RXDEXT_STATERR_LB))
4966 vtag = be16toh(cur->wb.upper.vlan);
4967 else
4968 vtag = le16toh(cur->wb.upper.vlan);
4969 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4970 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
4971 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4972
4973 /*
4974 * Free the frame (all segments) if we're at EOP and
4975 * it's an error.
4976 *
4977 * The datasheet states that EOP + status is only valid for
4978 * the final segment in a multi-segment frame.
4979 */
4980 if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) {
4981 adapter->dropped_pkts++;
4982 ++rxr->rx_discarded;
4983 igb_rx_discard(rxr, i);
4984 goto next_desc;
4985 }
4986
4987 /*
4988 ** The way the hardware is configured to
4989 ** split, it will ONLY use the header buffer
4990 ** when header split is enabled, otherwise we
4991 ** get normal behavior, ie, both header and
4992 ** payload are DMA'd into the payload buffer.
4993 **
4994 ** The fmp test is to catch the case where a
4995 ** packet spans multiple descriptors, in that
4996 ** case only the first header is valid.
4997 */
4998 if (rxr->hdr_split && rxr->fmp == NULL) {
4999 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
5000 hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
5001 E1000_RXDADV_HDRBUFLEN_SHIFT;
5002 if (hlen > IGB_HDR_BUF)
5003 hlen = IGB_HDR_BUF;
5004 mh = rxr->rx_buffers[i].m_head;
5005 mh->m_len = hlen;
5006 /* clear buf pointer for refresh */
5007 rxbuf->m_head = NULL;
5008 /*
5009 ** Get the payload length, this
5010 ** could be zero if its a small
5011 ** packet.
5012 */
5013 if (plen > 0) {
5014 mp = rxr->rx_buffers[i].m_pack;
5015 mp->m_len = plen;
5016 mh->m_next = mp;
5017 /* clear buf pointer */
5018 rxbuf->m_pack = NULL;
5019 rxr->rx_split_packets++;
5020 }
5021 } else {
5022 /*
5023 ** Either no header split, or a
5024 ** secondary piece of a fragmented
5025 ** split packet.
5026 */
5027 mh = rxr->rx_buffers[i].m_pack;
5028 mh->m_len = plen;
5029 /* clear buf info for refresh */
5030 rxbuf->m_pack = NULL;
5031 }
5032 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
5033
5034 ++processed; /* So we know when to refresh */
5035
5036 /* Initial frame - setup */
5037 if (rxr->fmp == NULL) {
5038 mh->m_pkthdr.len = mh->m_len;
5039 /* Save the head of the chain */
5040 rxr->fmp = mh;
5041 rxr->lmp = mh;
5042 if (mp != NULL) {
5043 /* Add payload if split */
5044 mh->m_pkthdr.len += mp->m_len;
5045 rxr->lmp = mh->m_next;
5046 }
5047 } else {
5048 /* Chain mbuf's together */
5049 rxr->lmp->m_next = mh;
5050 rxr->lmp = rxr->lmp->m_next;
5051 rxr->fmp->m_pkthdr.len += mh->m_len;
5052 }
5053
5054 if (eop) {
5055 rxr->fmp->m_pkthdr.rcvif = ifp;
5056 rxr->rx_packets++;
5057 /* capture data for AIM */
5058 rxr->packets++;
5059 rxr->bytes += rxr->fmp->m_pkthdr.len;
5060 rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
5061
5062 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
5063 igb_rx_checksum(staterr, rxr->fmp, ptype);
5064
5065 if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
5066 (staterr & E1000_RXD_STAT_VP) != 0) {
5067 rxr->fmp->m_pkthdr.ether_vtag = vtag;
5068 rxr->fmp->m_flags |= M_VLANTAG;
5069 }
5070
5071 /*
5072 * In case of multiqueue, we have RXCSUM.PCSD bit set
5073 * and never cleared. This means we have RSS hash
5074 * available to be used.
5075 */
5076 if (adapter->num_queues > 1) {
5077 rxr->fmp->m_pkthdr.flowid =
5078 le32toh(cur->wb.lower.hi_dword.rss);
5079 switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
5080 case E1000_RXDADV_RSSTYPE_IPV4_TCP:
5081 M_HASHTYPE_SET(rxr->fmp,
5082 M_HASHTYPE_RSS_TCP_IPV4);
5083 break;
5084 case E1000_RXDADV_RSSTYPE_IPV4:
5085 M_HASHTYPE_SET(rxr->fmp,
5086 M_HASHTYPE_RSS_IPV4);
5087 break;
5088 case E1000_RXDADV_RSSTYPE_IPV6_TCP:
5089 M_HASHTYPE_SET(rxr->fmp,
5090 M_HASHTYPE_RSS_TCP_IPV6);
5091 break;
5092 case E1000_RXDADV_RSSTYPE_IPV6_EX:
5093 M_HASHTYPE_SET(rxr->fmp,
5094 M_HASHTYPE_RSS_IPV6_EX);
5095 break;
5096 case E1000_RXDADV_RSSTYPE_IPV6:
5097 M_HASHTYPE_SET(rxr->fmp,
5098 M_HASHTYPE_RSS_IPV6);
5099 break;
5100 case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
5101 M_HASHTYPE_SET(rxr->fmp,
5102 M_HASHTYPE_RSS_TCP_IPV6_EX);
5103 break;
5104 default:
5105 /* XXX fallthrough */
5106 M_HASHTYPE_SET(rxr->fmp,
5107 M_HASHTYPE_OPAQUE);
5108 }
5109 } else {
5110#ifndef IGB_LEGACY_TX
5111 rxr->fmp->m_pkthdr.flowid = que->msix;
5112 M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
5113#endif
5114 }
5115 sendmp = rxr->fmp;
5116 /* Make sure to set M_PKTHDR. */
5117 sendmp->m_flags |= M_PKTHDR;
5118 rxr->fmp = NULL;
5119 rxr->lmp = NULL;
5120 }
5121
5122next_desc:
5123 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
5124 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
5125
5126 /* Advance our pointers to the next descriptor. */
5127 if (++i == adapter->num_rx_desc)
5128 i = 0;
5129 /*
5130 ** Send to the stack or LRO
5131 */
5132 if (sendmp != NULL) {
5133 rxr->next_to_check = i;
5134 igb_rx_input(rxr, ifp, sendmp, ptype);
5135 i = rxr->next_to_check;
5136 rxdone++;
5137 }
5138
5139 /* Every 8 descriptors we go to refresh mbufs */
5140 if (processed == 8) {
5141 igb_refresh_mbufs(rxr, i);
5142 processed = 0;
5143 }
5144 }
5145
5146 /* Catch any remainders */
5147 if (igb_rx_unrefreshed(rxr))
5148 igb_refresh_mbufs(rxr, i);
5149
5150 rxr->next_to_check = i;
5151
5152 /*
5153 * Flush any outstanding LRO work
5154 */
5155 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
5156 SLIST_REMOVE_HEAD(&lro->lro_active, next);
5157 tcp_lro_flush(lro, queued);
5158 }
5159
5160 if (done != NULL)
5161 *done += rxdone;
5162
5163 IGB_RX_UNLOCK(rxr);
5164 return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
5165}
5166
5167/*********************************************************************
5168 *
5169 * Verify that the hardware indicated that the checksum is valid.
5170 * Inform the stack about the status of checksum so that stack
5171 * doesn't spend time verifying the checksum.
5172 *
5173 *********************************************************************/
5174static void
5175igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
5176{
5177 u16 status = (u16)staterr;
5178 u8 errors = (u8) (staterr >> 24);
5179 int sctp;
5180
5181 /* Ignore Checksum bit is set */
5182 if (status & E1000_RXD_STAT_IXSM) {
5183 mp->m_pkthdr.csum_flags = 0;
5184 return;
5185 }
5186
5187 if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
5188 (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
5189 sctp = 1;
5190 else
5191 sctp = 0;
5192 if (status & E1000_RXD_STAT_IPCS) {
5193 /* Did it pass? */
5194 if (!(errors & E1000_RXD_ERR_IPE)) {
5195 /* IP Checksum Good */
5196 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
5197 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
5198 } else
5199 mp->m_pkthdr.csum_flags = 0;
5200 }
5201
5202 if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
5203 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5204#if __FreeBSD_version >= 800000
5205 if (sctp) /* reassign */
5206 type = CSUM_SCTP_VALID;
5207#endif
5208 /* Did it pass? */
5209 if (!(errors & E1000_RXD_ERR_TCPE)) {
5210 mp->m_pkthdr.csum_flags |= type;
5211 if (sctp == 0)
5212 mp->m_pkthdr.csum_data = htons(0xffff);
5213 }
5214 }
5215 return;
5216}
5217
5218/*
5219 * This routine is run via an vlan
5220 * config EVENT
5221 */
5222static void
5223igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5224{
5225 struct adapter *adapter = ifp->if_softc;
5226 u32 index, bit;
5227
5228 if (ifp->if_softc != arg) /* Not our event */
5229 return;
5230
5231 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
5232 return;
5233
5234 IGB_CORE_LOCK(adapter);
5235 index = (vtag >> 5) & 0x7F;
5236 bit = vtag & 0x1F;
5237 adapter->shadow_vfta[index] |= (1 << bit);
5238 ++adapter->num_vlans;
5239 /* Change hw filter setting */
5240 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5241 igb_setup_vlan_hw_support(adapter);
5242 IGB_CORE_UNLOCK(adapter);
5243}
5244
5245/*
5246 * This routine is run via an vlan
5247 * unconfig EVENT
5248 */
5249static void
5250igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5251{
5252 struct adapter *adapter = ifp->if_softc;
5253 u32 index, bit;
5254
5255 if (ifp->if_softc != arg)
5256 return;
5257
5258 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
5259 return;
5260
5261 IGB_CORE_LOCK(adapter);
5262 index = (vtag >> 5) & 0x7F;
5263 bit = vtag & 0x1F;
5264 adapter->shadow_vfta[index] &= ~(1 << bit);
5265 --adapter->num_vlans;
5266 /* Change hw filter setting */
5267 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5268 igb_setup_vlan_hw_support(adapter);
5269 IGB_CORE_UNLOCK(adapter);
5270}
5271
5272static void
5273igb_setup_vlan_hw_support(struct adapter *adapter)
5274{
5275 struct e1000_hw *hw = &adapter->hw;
5276 struct ifnet *ifp = adapter->ifp;
5277 u32 reg;
5278
5279 if (adapter->vf_ifp) {
5280 e1000_rlpml_set_vf(hw,
5281 adapter->max_frame_size + VLAN_TAG_SIZE);
5282 return;
5283 }
5284
5285 reg = E1000_READ_REG(hw, E1000_CTRL);
5286 reg |= E1000_CTRL_VME;
5287 E1000_WRITE_REG(hw, E1000_CTRL, reg);
5288
5289 /* Enable the Filter Table */
5290 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5291 reg = E1000_READ_REG(hw, E1000_RCTL);
5292 reg &= ~E1000_RCTL_CFIEN;
5293 reg |= E1000_RCTL_VFE;
5294 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5295 }
5296
5297 /* Update the frame size */
5298 E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5299 adapter->max_frame_size + VLAN_TAG_SIZE);
5300
5301 /* Don't bother with table if no vlans */
5302 if ((adapter->num_vlans == 0) ||
5303 ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5304 return;
5305 /*
5306 ** A soft reset zero's out the VFTA, so
5307 ** we need to repopulate it now.
5308 */
5309 for (int i = 0; i < IGB_VFTA_SIZE; i++)
5310 if (adapter->shadow_vfta[i] != 0) {
5311 if (adapter->vf_ifp)
5312 e1000_vfta_set_vf(hw,
5313 adapter->shadow_vfta[i], TRUE);
5314 else
5315 e1000_write_vfta(hw,
5316 i, adapter->shadow_vfta[i]);
5317 }
5318}
5319
5320static void
5321igb_enable_intr(struct adapter *adapter)
5322{
5323 /* With RSS set up what to auto clear */
5324 if (adapter->msix_mem) {
5325 u32 mask = (adapter->que_mask | adapter->link_mask);
5326 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5327 E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5328 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5329 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5330 E1000_IMS_LSC);
5331 } else {
5332 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5333 IMS_ENABLE_MASK);
5334 }
5335 E1000_WRITE_FLUSH(&adapter->hw);
5336
5337 return;
5338}
5339
5340static void
5341igb_disable_intr(struct adapter *adapter)
5342{
5343 if (adapter->msix_mem) {
5344 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5345 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5346 }
5347 E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5348 E1000_WRITE_FLUSH(&adapter->hw);
5349 return;
5350}
5351
5352/*
5353 * Bit of a misnomer, what this really means is
5354 * to enable OS management of the system... aka
5355 * to disable special hardware management features
5356 */
5357static void
5358igb_init_manageability(struct adapter *adapter)
5359{
5360 if (adapter->has_manage) {
5361 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5362 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5363
5364 /* disable hardware interception of ARP */
5365 manc &= ~(E1000_MANC_ARP_EN);
5366
5367 /* enable receiving management packets to the host */
5368 manc |= E1000_MANC_EN_MNG2HOST;
5369 manc2h |= 1 << 5; /* Mng Port 623 */
5370 manc2h |= 1 << 6; /* Mng Port 664 */
5371 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5372 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5373 }
5374}
5375
5376/*
5377 * Give control back to hardware management
5378 * controller if there is one.
5379 */
5380static void
5381igb_release_manageability(struct adapter *adapter)
5382{
5383 if (adapter->has_manage) {
5384 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5385
5386 /* re-enable hardware interception of ARP */
5387 manc |= E1000_MANC_ARP_EN;
5388 manc &= ~E1000_MANC_EN_MNG2HOST;
5389
5390 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5391 }
5392}
5393
5394/*
5395 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5396 * For ASF and Pass Through versions of f/w this means that
5397 * the driver is loaded.
5398 *
5399 */
5400static void
5401igb_get_hw_control(struct adapter *adapter)
5402{
5403 u32 ctrl_ext;
5404
5405 if (adapter->vf_ifp)
5406 return;
5407
5408 /* Let firmware know the driver has taken over */
5409 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5410 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5411 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5412}
5413
5414/*
5415 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5416 * For ASF and Pass Through versions of f/w this means that the
5417 * driver is no longer loaded.
5418 *
5419 */
5420static void
5421igb_release_hw_control(struct adapter *adapter)
5422{
5423 u32 ctrl_ext;
5424
5425 if (adapter->vf_ifp)
5426 return;
5427
5428 /* Let firmware taken over control of h/w */
5429 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5430 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5431 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5432}
5433
5434static int
5435igb_is_valid_ether_addr(uint8_t *addr)
5436{
5437 char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5438
5439 if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5440 return (FALSE);
5441 }
5442
5443 return (TRUE);
5444}
5445
5446
5447/*
5448 * Enable PCI Wake On Lan capability
5449 */
5450static void
5451igb_enable_wakeup(device_t dev)
5452{
5453 u16 cap, status;
5454 u8 id;
5455
5456 /* First find the capabilities pointer*/
5457 cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5458 /* Read the PM Capabilities */
5459 id = pci_read_config(dev, cap, 1);
5460 if (id != PCIY_PMG) /* Something wrong */
5461 return;
5462 /* OK, we have the power capabilities, so
5463 now get the status register */
5464 cap += PCIR_POWER_STATUS;
5465 status = pci_read_config(dev, cap, 2);
5466 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5467 pci_write_config(dev, cap, status, 2);
5468 return;
5469}
5470
5471static void
5472igb_led_func(void *arg, int onoff)
5473{
5474 struct adapter *adapter = arg;
5475
5476 IGB_CORE_LOCK(adapter);
5477 if (onoff) {
5478 e1000_setup_led(&adapter->hw);
5479 e1000_led_on(&adapter->hw);
5480 } else {
5481 e1000_led_off(&adapter->hw);
5482 e1000_cleanup_led(&adapter->hw);
5483 }
5484 IGB_CORE_UNLOCK(adapter);
5485}
5486
5487static uint64_t
5488igb_get_vf_counter(if_t ifp, ift_counter cnt)
5489{
5490 struct adapter *adapter;
5491 struct e1000_vf_stats *stats;
5492#ifndef IGB_LEGACY_TX
5493 struct tx_ring *txr;
5494 uint64_t rv;
5495#endif
5496
5497 adapter = if_getsoftc(ifp);
5498 stats = (struct e1000_vf_stats *)adapter->stats;
5499
5500 switch (cnt) {
5501 case IFCOUNTER_IPACKETS:
5502 return (stats->gprc);
5503 case IFCOUNTER_OPACKETS:
5504 return (stats->gptc);
5505 case IFCOUNTER_IBYTES:
5506 return (stats->gorc);
5507 case IFCOUNTER_OBYTES:
5508 return (stats->gotc);
5509 case IFCOUNTER_IMCASTS:
5510 return (stats->mprc);
5511 case IFCOUNTER_IERRORS:
5512 return (adapter->dropped_pkts);
5513 case IFCOUNTER_OERRORS:
5514 return (adapter->watchdog_events);
5515#ifndef IGB_LEGACY_TX
5516 case IFCOUNTER_OQDROPS:
5517 rv = 0;
5518 txr = adapter->tx_rings;
5519 for (int i = 0; i < adapter->num_queues; i++, txr++)
5520 rv += txr->br->br_drops;
5521 return (rv);
5522#endif
5523 default:
5524 return (if_get_counter_default(ifp, cnt));
5525 }
5526}
5527
5528static uint64_t
5529igb_get_counter(if_t ifp, ift_counter cnt)
5530{
5531 struct adapter *adapter;
5532 struct e1000_hw_stats *stats;
5533#ifndef IGB_LEGACY_TX
5534 struct tx_ring *txr;
5535 uint64_t rv;
5536#endif
5537
5538 adapter = if_getsoftc(ifp);
5539 if (adapter->vf_ifp)
5540 return (igb_get_vf_counter(ifp, cnt));
5541
5542 stats = (struct e1000_hw_stats *)adapter->stats;
5543
5544 switch (cnt) {
5545 case IFCOUNTER_IPACKETS:
5546 return (stats->gprc);
5547 case IFCOUNTER_OPACKETS:
5548 return (stats->gptc);
5549 case IFCOUNTER_IBYTES:
5550 return (stats->gorc);
5551 case IFCOUNTER_OBYTES:
5552 return (stats->gotc);
5553 case IFCOUNTER_IMCASTS:
5554 return (stats->mprc);
5555 case IFCOUNTER_OMCASTS:
5556 return (stats->mptc);
5557 case IFCOUNTER_IERRORS:
5558 return (adapter->dropped_pkts + stats->rxerrc +
5559 stats->crcerrs + stats->algnerrc +
5560 stats->ruc + stats->roc + stats->cexterr);
5561 case IFCOUNTER_OERRORS:
5562 return (stats->ecol + stats->latecol +
5563 adapter->watchdog_events);
5564 case IFCOUNTER_COLLISIONS:
5565 return (stats->colc);
5566 case IFCOUNTER_IQDROPS:
5567 return (stats->mpc);
5568#ifndef IGB_LEGACY_TX
5569 case IFCOUNTER_OQDROPS:
5570 rv = 0;
5571 txr = adapter->tx_rings;
5572 for (int i = 0; i < adapter->num_queues; i++, txr++)
5573 rv += txr->br->br_drops;
5574 return (rv);
5575#endif
5576 default:
5577 return (if_get_counter_default(ifp, cnt));
5578 }
5579}
5580
5581/**********************************************************************
5582 *
5583 * Update the board statistics counters.
5584 *
5585 **********************************************************************/
5586static void
5587igb_update_stats_counters(struct adapter *adapter)
5588{
5589 struct e1000_hw *hw = &adapter->hw;
5590 struct e1000_hw_stats *stats;
5591
5592 /*
5593 ** The virtual function adapter has only a
5594 ** small controlled set of stats, do only
5595 ** those and return.
5596 */
5597 if (adapter->vf_ifp) {
5598 igb_update_vf_stats_counters(adapter);
5599 return;
5600 }
5601
5602 stats = (struct e1000_hw_stats *)adapter->stats;
5603
5604 if (adapter->hw.phy.media_type == e1000_media_type_copper ||
5605 (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5606 stats->symerrs +=
5607 E1000_READ_REG(hw,E1000_SYMERRS);
5608 stats->sec += E1000_READ_REG(hw, E1000_SEC);
5609 }
5610
5611 stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5612 stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5613 stats->scc += E1000_READ_REG(hw, E1000_SCC);
5614 stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5615
5616 stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5617 stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5618 stats->colc += E1000_READ_REG(hw, E1000_COLC);
5619 stats->dc += E1000_READ_REG(hw, E1000_DC);
5620 stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5621 stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5622 stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5623 /*
5624 ** For watchdog management we need to know if we have been
5625 ** paused during the last interval, so capture that here.
5626 */
5627 adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5628 stats->xoffrxc += adapter->pause_frames;
5629 stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5630 stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5631 stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5632 stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5633 stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5634 stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5635 stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5636 stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5637 stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5638 stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5639 stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5640 stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5641
5642 /* For the 64-bit byte counters the low dword must be read first. */
5643 /* Both registers clear on the read of the high dword */
5644
5645 stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5646 ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5647 stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5648 ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5649
5650 stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5651 stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5652 stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5653 stats->roc += E1000_READ_REG(hw, E1000_ROC);
5654 stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5655
5656 stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
5657 stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC);
5658 stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
5659
5660 stats->tor += E1000_READ_REG(hw, E1000_TORL) +
5661 ((u64)E1000_READ_REG(hw, E1000_TORH) << 32);
5662 stats->tot += E1000_READ_REG(hw, E1000_TOTL) +
5663 ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32);
5664
5665 stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5666 stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5667 stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5668 stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5669 stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5670 stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5671 stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5672 stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5673 stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5674 stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5675
5676 /* Interrupt Counts */
5677
5678 stats->iac += E1000_READ_REG(hw, E1000_IAC);
5679 stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5680 stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5681 stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5682 stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5683 stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5684 stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5685 stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5686 stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5687
5688 /* Host to Card Statistics */
5689
5690 stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5691 stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5692 stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5693 stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5694 stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5695 stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5696 stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5697 stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5698 ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5699 stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5700 ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5701 stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5702 stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5703 stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5704
5705 stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5706 stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5707 stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5708 stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5709 stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5710 stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5711
5712 /* Driver specific counters */
5713 adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5714 adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5715 adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5716 adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5717 adapter->packet_buf_alloc_tx =
5718 ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5719 adapter->packet_buf_alloc_rx =
5720 (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5721}
5722
5723
5724/**********************************************************************
5725 *
5726 * Initialize the VF board statistics counters.
5727 *
5728 **********************************************************************/
5729static void
5730igb_vf_init_stats(struct adapter *adapter)
5731{
5732 struct e1000_hw *hw = &adapter->hw;
5733 struct e1000_vf_stats *stats;
5734
5735 stats = (struct e1000_vf_stats *)adapter->stats;
5736 if (stats == NULL)
5737 return;
5738 stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5739 stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5740 stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5741 stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5742 stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5743}
5744
5745/**********************************************************************
5746 *
5747 * Update the VF board statistics counters.
5748 *
5749 **********************************************************************/
5750static void
5751igb_update_vf_stats_counters(struct adapter *adapter)
5752{
5753 struct e1000_hw *hw = &adapter->hw;
5754 struct e1000_vf_stats *stats;
5755
5756 if (adapter->link_speed == 0)
5757 return;
5758
5759 stats = (struct e1000_vf_stats *)adapter->stats;
5760
5761 UPDATE_VF_REG(E1000_VFGPRC,
5762 stats->last_gprc, stats->gprc);
5763 UPDATE_VF_REG(E1000_VFGORC,
5764 stats->last_gorc, stats->gorc);
5765 UPDATE_VF_REG(E1000_VFGPTC,
5766 stats->last_gptc, stats->gptc);
5767 UPDATE_VF_REG(E1000_VFGOTC,
5768 stats->last_gotc, stats->gotc);
5769 UPDATE_VF_REG(E1000_VFMPRC,
5770 stats->last_mprc, stats->mprc);
5771}
5772
5773/* Export a single 32-bit register via a read-only sysctl. */
5774static int
5775igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5776{
5777 struct adapter *adapter;
5778 u_int val;
5779
5780 adapter = oidp->oid_arg1;
5781 val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5782 return (sysctl_handle_int(oidp, &val, 0, req));
5783}
5784
5785/*
5786** Tuneable interrupt rate handler
5787*/
5788static int
5789igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5790{
5791 struct igb_queue *que = ((struct igb_queue *)oidp->oid_arg1);
5792 int error;
5793 u32 reg, usec, rate;
5794
5795 reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5796 usec = ((reg & 0x7FFC) >> 2);
5797 if (usec > 0)
5798 rate = 1000000 / usec;
5799 else
5800 rate = 0;
5801 error = sysctl_handle_int(oidp, &rate, 0, req);
5802 if (error || !req->newptr)
5803 return error;
5804 return 0;
5805}
5806
5807/*
5808 * Add sysctl variables, one per statistic, to the system.
5809 */
5810static void
5811igb_add_hw_stats(struct adapter *adapter)
5812{
5813 device_t dev = adapter->dev;
5814
5815 struct tx_ring *txr = adapter->tx_rings;
5816 struct rx_ring *rxr = adapter->rx_rings;
5817
5818 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5819 struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5820 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5821 struct e1000_hw_stats *stats = adapter->stats;
5822
5823 struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5824 struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5825
5826#define QUEUE_NAME_LEN 32
5827 char namebuf[QUEUE_NAME_LEN];
5828
5829 /* Driver Statistics */
5830 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5831 CTLFLAG_RD, &adapter->dropped_pkts,
5832 "Driver dropped packets");
5833 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5834 CTLFLAG_RD, &adapter->link_irq,
5835 "Link MSIX IRQ Handled");
5836 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5837 CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5838 "Defragmenting mbuf chain failed");
5839 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5840 CTLFLAG_RD, &adapter->no_tx_dma_setup,
5841 "Driver tx dma failure in xmit");
5842 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5843 CTLFLAG_RD, &adapter->rx_overruns,
5844 "RX overruns");
5845 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5846 CTLFLAG_RD, &adapter->watchdog_events,
5847 "Watchdog timeouts");
5848
5849 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5850 CTLFLAG_RD, &adapter->device_control,
5851 "Device Control Register");
5852 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5853 CTLFLAG_RD, &adapter->rx_control,
5854 "Receiver Control Register");
5855 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5856 CTLFLAG_RD, &adapter->int_mask,
5857 "Interrupt Mask");
5858 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5859 CTLFLAG_RD, &adapter->eint_mask,
5860 "Extended Interrupt Mask");
5861 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5862 CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5863 "Transmit Buffer Packet Allocation");
5864 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5865 CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5866 "Receive Buffer Packet Allocation");
5867 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5868 CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5869 "Flow Control High Watermark");
5870 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5871 CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5872 "Flow Control Low Watermark");
5873
5874 for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5875 struct lro_ctrl *lro = &rxr->lro;
5876
5877 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5878 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5879 CTLFLAG_RD, NULL, "Queue Name");
5880 queue_list = SYSCTL_CHILDREN(queue_node);
5881
5882 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5883 CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i],
5884 sizeof(&adapter->queues[i]),
5885 igb_sysctl_interrupt_rate_handler,
5886 "IU", "Interrupt Rate");
5887
5888 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5889 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5890 igb_sysctl_reg_handler, "IU",
5891 "Transmit Descriptor Head");
5892 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5893 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5894 igb_sysctl_reg_handler, "IU",
5895 "Transmit Descriptor Tail");
5896 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5897 CTLFLAG_RD, &txr->no_desc_avail,
5898 "Queue Descriptors Unavailable");
5899 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5900 CTLFLAG_RD, &txr->total_packets,
5901 "Queue Packets Transmitted");
5902
5903 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5904 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5905 igb_sysctl_reg_handler, "IU",
5906 "Receive Descriptor Head");
5907 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5908 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5909 igb_sysctl_reg_handler, "IU",
5910 "Receive Descriptor Tail");
5911 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5912 CTLFLAG_RD, &rxr->rx_packets,
5913 "Queue Packets Received");
5914 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5915 CTLFLAG_RD, &rxr->rx_bytes,
5916 "Queue Bytes Received");
5917 SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_queued",
5918 CTLFLAG_RD, &lro->lro_queued, 0,
5919 "LRO Queued");
5920 SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_flushed",
5921 CTLFLAG_RD, &lro->lro_flushed, 0,
5922 "LRO Flushed");
5923 }
5924
5925 /* MAC stats get their own sub node */
5926
5927 stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5928 CTLFLAG_RD, NULL, "MAC Statistics");
5929 stat_list = SYSCTL_CHILDREN(stat_node);
5930
5931 /*
5932 ** VF adapter has a very limited set of stats
5933 ** since its not managing the metal, so to speak.
5934 */
5935 if (adapter->vf_ifp) {
5936 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5937 CTLFLAG_RD, &stats->gprc,
5938 "Good Packets Received");
5939 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5940 CTLFLAG_RD, &stats->gptc,
5941 "Good Packets Transmitted");
5942 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5943 CTLFLAG_RD, &stats->gorc,
5944 "Good Octets Received");
5945 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5946 CTLFLAG_RD, &stats->gotc,
5947 "Good Octets Transmitted");
5948 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5949 CTLFLAG_RD, &stats->mprc,
5950 "Multicast Packets Received");
5951 return;
5952 }
5953
5954 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5955 CTLFLAG_RD, &stats->ecol,
5956 "Excessive collisions");
5957 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5958 CTLFLAG_RD, &stats->scc,
5959 "Single collisions");
5960 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5961 CTLFLAG_RD, &stats->mcc,
5962 "Multiple collisions");
5963 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5964 CTLFLAG_RD, &stats->latecol,
5965 "Late collisions");
5966 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5967 CTLFLAG_RD, &stats->colc,
5968 "Collision Count");
5969 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5970 CTLFLAG_RD, &stats->symerrs,
5971 "Symbol Errors");
5972 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5973 CTLFLAG_RD, &stats->sec,
5974 "Sequence Errors");
5975 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5976 CTLFLAG_RD, &stats->dc,
5977 "Defer Count");
5978 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5979 CTLFLAG_RD, &stats->mpc,
5980 "Missed Packets");
5981 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors",
5982 CTLFLAG_RD, &stats->rlec,
5983 "Receive Length Errors");
5984 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5985 CTLFLAG_RD, &stats->rnbc,
5986 "Receive No Buffers");
5987 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5988 CTLFLAG_RD, &stats->ruc,
5989 "Receive Undersize");
5990 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5991 CTLFLAG_RD, &stats->rfc,
5992 "Fragmented Packets Received");
5993 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5994 CTLFLAG_RD, &stats->roc,
5995 "Oversized Packets Received");
5996 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5997 CTLFLAG_RD, &stats->rjc,
5998 "Recevied Jabber");
5999 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
6000 CTLFLAG_RD, &stats->rxerrc,
6001 "Receive Errors");
6002 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
6003 CTLFLAG_RD, &stats->crcerrs,
6004 "CRC errors");
6005 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
6006 CTLFLAG_RD, &stats->algnerrc,
6007 "Alignment Errors");
6008 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs",
6009 CTLFLAG_RD, &stats->tncrs,
6010 "Transmit with No CRS");
6011 /* On 82575 these are collision counts */
6012 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
6013 CTLFLAG_RD, &stats->cexterr,
6014 "Collision/Carrier extension errors");
6015 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
6016 CTLFLAG_RD, &stats->xonrxc,
6017 "XON Received");
6018 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
6019 CTLFLAG_RD, &stats->xontxc,
6020 "XON Transmitted");
6021 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
6022 CTLFLAG_RD, &stats->xoffrxc,
6023 "XOFF Received");
6024 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
6025 CTLFLAG_RD, &stats->xofftxc,
6026 "XOFF Transmitted");
6027 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd",
6028 CTLFLAG_RD, &stats->fcruc,
6029 "Unsupported Flow Control Received");
6030 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd",
6031 CTLFLAG_RD, &stats->mgprc,
6032 "Management Packets Received");
6033 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop",
6034 CTLFLAG_RD, &stats->mgpdc,
6035 "Management Packets Dropped");
6036 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd",
6037 CTLFLAG_RD, &stats->mgptc,
6038 "Management Packets Transmitted");
6039 /* Packet Reception Stats */
6040 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
6041 CTLFLAG_RD, &stats->tpr,
6042 "Total Packets Received");
6043 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
6044 CTLFLAG_RD, &stats->gprc,
6045 "Good Packets Received");
6046 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
6047 CTLFLAG_RD, &stats->bprc,
6048 "Broadcast Packets Received");
6049 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
6050 CTLFLAG_RD, &stats->mprc,
6051 "Multicast Packets Received");
6052 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
6053 CTLFLAG_RD, &stats->prc64,
6054 "64 byte frames received");
6055 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
6056 CTLFLAG_RD, &stats->prc127,
6057 "65-127 byte frames received");
6058 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
6059 CTLFLAG_RD, &stats->prc255,
6060 "128-255 byte frames received");
6061 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
6062 CTLFLAG_RD, &stats->prc511,
6063 "256-511 byte frames received");
6064 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
6065 CTLFLAG_RD, &stats->prc1023,
6066 "512-1023 byte frames received");
6067 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
6068 CTLFLAG_RD, &stats->prc1522,
6069 "1023-1522 byte frames received");
6070 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
6071 CTLFLAG_RD, &stats->gorc,
6072 "Good Octets Received");
6073 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd",
6074 CTLFLAG_RD, &stats->tor,
6075 "Total Octets Received");
6076
6077 /* Packet Transmission Stats */
6078 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
6079 CTLFLAG_RD, &stats->gotc,
6080 "Good Octets Transmitted");
6081 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd",
6082 CTLFLAG_RD, &stats->tot,
6083 "Total Octets Transmitted");
6084 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
6085 CTLFLAG_RD, &stats->tpt,
6086 "Total Packets Transmitted");
6087 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
6088 CTLFLAG_RD, &stats->gptc,
6089 "Good Packets Transmitted");
6090 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
6091 CTLFLAG_RD, &stats->bptc,
6092 "Broadcast Packets Transmitted");
6093 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
6094 CTLFLAG_RD, &stats->mptc,
6095 "Multicast Packets Transmitted");
6096 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
6097 CTLFLAG_RD, &stats->ptc64,
6098 "64 byte frames transmitted");
6099 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
6100 CTLFLAG_RD, &stats->ptc127,
6101 "65-127 byte frames transmitted");
6102 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
6103 CTLFLAG_RD, &stats->ptc255,
6104 "128-255 byte frames transmitted");
6105 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
6106 CTLFLAG_RD, &stats->ptc511,
6107 "256-511 byte frames transmitted");
6108 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
6109 CTLFLAG_RD, &stats->ptc1023,
6110 "512-1023 byte frames transmitted");
6111 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
6112 CTLFLAG_RD, &stats->ptc1522,
6113 "1024-1522 byte frames transmitted");
6114 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
6115 CTLFLAG_RD, &stats->tsctc,
6116 "TSO Contexts Transmitted");
6117 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
6118 CTLFLAG_RD, &stats->tsctfc,
6119 "TSO Contexts Failed");
6120
6121
6122 /* Interrupt Stats */
6123
6124 int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
6125 CTLFLAG_RD, NULL, "Interrupt Statistics");
6126 int_list = SYSCTL_CHILDREN(int_node);
6127
6128 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
6129 CTLFLAG_RD, &stats->iac,
6130 "Interrupt Assertion Count");
6131
6132 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
6133 CTLFLAG_RD, &stats->icrxptc,
6134 "Interrupt Cause Rx Pkt Timer Expire Count");
6135
6136 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
6137 CTLFLAG_RD, &stats->icrxatc,
6138 "Interrupt Cause Rx Abs Timer Expire Count");
6139
6140 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
6141 CTLFLAG_RD, &stats->ictxptc,
6142 "Interrupt Cause Tx Pkt Timer Expire Count");
6143
6144 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
6145 CTLFLAG_RD, &stats->ictxatc,
6146 "Interrupt Cause Tx Abs Timer Expire Count");
6147
6148 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
6149 CTLFLAG_RD, &stats->ictxqec,
6150 "Interrupt Cause Tx Queue Empty Count");
6151
6152 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
6153 CTLFLAG_RD, &stats->ictxqmtc,
6154 "Interrupt Cause Tx Queue Min Thresh Count");
6155
6156 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
6157 CTLFLAG_RD, &stats->icrxdmtc,
6158 "Interrupt Cause Rx Desc Min Thresh Count");
6159
6160 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
6161 CTLFLAG_RD, &stats->icrxoc,
6162 "Interrupt Cause Receiver Overrun Count");
6163
6164 /* Host to Card Stats */
6165
6166 host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
6167 CTLFLAG_RD, NULL,
6168 "Host to Card Statistics");
6169
6170 host_list = SYSCTL_CHILDREN(host_node);
6171
6172 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
6173 CTLFLAG_RD, &stats->cbtmpc,
6174 "Circuit Breaker Tx Packet Count");
6175
6176 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
6177 CTLFLAG_RD, &stats->htdpmc,
6178 "Host Transmit Discarded Packets");
6179
6180 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
6181 CTLFLAG_RD, &stats->rpthc,
6182 "Rx Packets To Host");
6183
6184 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
6185 CTLFLAG_RD, &stats->cbrmpc,
6186 "Circuit Breaker Rx Packet Count");
6187
6188 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
6189 CTLFLAG_RD, &stats->cbrdpc,
6190 "Circuit Breaker Rx Dropped Count");
6191
6192 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
6193 CTLFLAG_RD, &stats->hgptc,
6194 "Host Good Packets Tx Count");
6195
6196 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
6197 CTLFLAG_RD, &stats->htcbdpc,
6198 "Host Tx Circuit Breaker Dropped Count");
6199
6200 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
6201 CTLFLAG_RD, &stats->hgorc,
6202 "Host Good Octets Received Count");
6203
6204 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
6205 CTLFLAG_RD, &stats->hgotc,
6206 "Host Good Octets Transmit Count");
6207
6208 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
6209 CTLFLAG_RD, &stats->lenerrs,
6210 "Length Errors");
6211
6212 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
6213 CTLFLAG_RD, &stats->scvpc,
6214 "SerDes/SGMII Code Violation Pkt Count");
6215
6216 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
6217 CTLFLAG_RD, &stats->hrmpc,
6218 "Header Redirection Missed Packet Count");
6219}
6220
6221
6222/**********************************************************************
6223 *
6224 * This routine provides a way to dump out the adapter eeprom,
6225 * often a useful debug/service tool. This only dumps the first
6226 * 32 words, stuff that matters is in that extent.
6227 *
6228 **********************************************************************/
6229static int
6230igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6231{
6232 struct adapter *adapter;
6233 int error;
6234 int result;
6235
6236 result = -1;
6237 error = sysctl_handle_int(oidp, &result, 0, req);
6238
6239 if (error || !req->newptr)
6240 return (error);
6241
6242 /*
6243 * This value will cause a hex dump of the
6244 * first 32 16-bit words of the EEPROM to
6245 * the screen.
6246 */
6247 if (result == 1) {
6248 adapter = (struct adapter *)arg1;
6249 igb_print_nvm_info(adapter);
6250 }
6251
6252 return (error);
6253}
6254
6255static void
6256igb_print_nvm_info(struct adapter *adapter)
6257{
6258 u16 eeprom_data;
6259 int i, j, row = 0;
6260
6261 /* Its a bit crude, but it gets the job done */
6262 printf("\nInterface EEPROM Dump:\n");
6263 printf("Offset\n0x0000 ");
6264 for (i = 0, j = 0; i < 32; i++, j++) {
6265 if (j == 8) { /* Make the offset block */
6266 j = 0; ++row;
6267 printf("\n0x00%x0 ",row);
6268 }
6269 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6270 printf("%04x ", eeprom_data);
6271 }
6272 printf("\n");
6273}
6274
6275static void
6276igb_set_sysctl_value(struct adapter *adapter, const char *name,
6277 const char *description, int *limit, int value)
6278{
6279 *limit = value;
6280 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6281 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6282 OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6283}
6284
6285/*
6286** Set flow control using sysctl:
6287** Flow control values:
6288** 0 - off
6289** 1 - rx pause
6290** 2 - tx pause
6291** 3 - full
6292*/
6293static int
6294igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
6295{
6296 int error;
6297 static int input = 3; /* default is full */
6298 struct adapter *adapter = (struct adapter *) arg1;
6299
6300 error = sysctl_handle_int(oidp, &input, 0, req);
6301
6302 if ((error) || (req->newptr == NULL))
6303 return (error);
6304
6305 switch (input) {
6306 case e1000_fc_rx_pause:
6307 case e1000_fc_tx_pause:
6308 case e1000_fc_full:
6309 case e1000_fc_none:
6310 adapter->hw.fc.requested_mode = input;
6311 adapter->fc = input;
6312 break;
6313 default:
6314 /* Do nothing */
6315 return (error);
6316 }
6317
6318 adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6319 e1000_force_mac_fc(&adapter->hw);
6320 /* XXX TODO: update DROP_EN on each RX queue if appropriate */
6321 return (error);
6322}
6323
6324/*
6325** Manage DMA Coalesce:
6326** Control values:
6327** 0/1 - off/on
6328** Legal timer values are:
6329** 250,500,1000-10000 in thousands
6330*/
6331static int
6332igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6333{
6334 struct adapter *adapter = (struct adapter *) arg1;
6335 int error;
6336
6337 error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6338
6339 if ((error) || (req->newptr == NULL))
6340 return (error);
6341
6342 switch (adapter->dmac) {
6343 case 0:
6344 /* Disabling */
6345 break;
6346 case 1: /* Just enable and use default */
6347 adapter->dmac = 1000;
6348 break;
6349 case 250:
6350 case 500:
6351 case 1000:
6352 case 2000:
6353 case 3000:
6354 case 4000:
6355 case 5000:
6356 case 6000:
6357 case 7000:
6358 case 8000:
6359 case 9000:
6360 case 10000:
6361 /* Legal values - allow */
6362 break;
6363 default:
6364 /* Do nothing, illegal value */
6365 adapter->dmac = 0;
6366 return (EINVAL);
6367 }
6368 /* Reinit the interface */
6369 igb_init(adapter);
6370 return (error);
6371}
6372
6373/*
6374** Manage Energy Efficient Ethernet:
6375** Control values:
6376** 0/1 - enabled/disabled
6377*/
6378static int
6379igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6380{
6381 struct adapter *adapter = (struct adapter *) arg1;
6382 int error, value;
6383
6384 value = adapter->hw.dev_spec._82575.eee_disable;
6385 error = sysctl_handle_int(oidp, &value, 0, req);
6386 if (error || req->newptr == NULL)
6387 return (error);
6388 IGB_CORE_LOCK(adapter);
6389 adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6390 igb_init_locked(adapter);
6391 IGB_CORE_UNLOCK(adapter);
6392 return (0);
6393}
1349 }
1350}
1351
1352static void
1353igb_init(void *arg)
1354{
1355 struct adapter *adapter = arg;
1356
1357 IGB_CORE_LOCK(adapter);
1358 igb_init_locked(adapter);
1359 IGB_CORE_UNLOCK(adapter);
1360}
1361
1362
1363static void
1364igb_handle_que(void *context, int pending)
1365{
1366 struct igb_queue *que = context;
1367 struct adapter *adapter = que->adapter;
1368 struct tx_ring *txr = que->txr;
1369 struct ifnet *ifp = adapter->ifp;
1370
1371 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1372 bool more;
1373
1374 more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1375
1376 IGB_TX_LOCK(txr);
1377 igb_txeof(txr);
1378#ifndef IGB_LEGACY_TX
1379 /* Process the stack queue only if not depleted */
1380 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1381 !drbr_empty(ifp, txr->br))
1382 igb_mq_start_locked(ifp, txr);
1383#else
1384 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1385 igb_start_locked(txr, ifp);
1386#endif
1387 IGB_TX_UNLOCK(txr);
1388 /* Do we need another? */
1389 if (more) {
1390 taskqueue_enqueue(que->tq, &que->que_task);
1391 return;
1392 }
1393 }
1394
1395#ifdef DEVICE_POLLING
1396 if (ifp->if_capenable & IFCAP_POLLING)
1397 return;
1398#endif
1399 /* Reenable this interrupt */
1400 if (que->eims)
1401 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1402 else
1403 igb_enable_intr(adapter);
1404}
1405
1406/* Deal with link in a sleepable context */
1407static void
1408igb_handle_link(void *context, int pending)
1409{
1410 struct adapter *adapter = context;
1411
1412 IGB_CORE_LOCK(adapter);
1413 igb_handle_link_locked(adapter);
1414 IGB_CORE_UNLOCK(adapter);
1415}
1416
1417static void
1418igb_handle_link_locked(struct adapter *adapter)
1419{
1420 struct tx_ring *txr = adapter->tx_rings;
1421 struct ifnet *ifp = adapter->ifp;
1422
1423 IGB_CORE_LOCK_ASSERT(adapter);
1424 adapter->hw.mac.get_link_status = 1;
1425 igb_update_link_status(adapter);
1426 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1427 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1428 IGB_TX_LOCK(txr);
1429#ifndef IGB_LEGACY_TX
1430 /* Process the stack queue only if not depleted */
1431 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1432 !drbr_empty(ifp, txr->br))
1433 igb_mq_start_locked(ifp, txr);
1434#else
1435 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1436 igb_start_locked(txr, ifp);
1437#endif
1438 IGB_TX_UNLOCK(txr);
1439 }
1440 }
1441}
1442
1443/*********************************************************************
1444 *
1445 * MSI/Legacy Deferred
1446 * Interrupt Service routine
1447 *
1448 *********************************************************************/
1449static int
1450igb_irq_fast(void *arg)
1451{
1452 struct adapter *adapter = arg;
1453 struct igb_queue *que = adapter->queues;
1454 u32 reg_icr;
1455
1456
1457 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1458
1459 /* Hot eject? */
1460 if (reg_icr == 0xffffffff)
1461 return FILTER_STRAY;
1462
1463 /* Definitely not our interrupt. */
1464 if (reg_icr == 0x0)
1465 return FILTER_STRAY;
1466
1467 if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1468 return FILTER_STRAY;
1469
1470 /*
1471 * Mask interrupts until the taskqueue is finished running. This is
1472 * cheap, just assume that it is needed. This also works around the
1473 * MSI message reordering errata on certain systems.
1474 */
1475 igb_disable_intr(adapter);
1476 taskqueue_enqueue(que->tq, &que->que_task);
1477
1478 /* Link status change */
1479 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1480 taskqueue_enqueue(que->tq, &adapter->link_task);
1481
1482 if (reg_icr & E1000_ICR_RXO)
1483 adapter->rx_overruns++;
1484 return FILTER_HANDLED;
1485}
1486
1487#ifdef DEVICE_POLLING
1488#if __FreeBSD_version >= 800000
1489#define POLL_RETURN_COUNT(a) (a)
1490static int
1491#else
1492#define POLL_RETURN_COUNT(a)
1493static void
1494#endif
1495igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1496{
1497 struct adapter *adapter = ifp->if_softc;
1498 struct igb_queue *que;
1499 struct tx_ring *txr;
1500 u32 reg_icr, rx_done = 0;
1501 u32 loop = IGB_MAX_LOOP;
1502 bool more;
1503
1504 IGB_CORE_LOCK(adapter);
1505 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1506 IGB_CORE_UNLOCK(adapter);
1507 return POLL_RETURN_COUNT(rx_done);
1508 }
1509
1510 if (cmd == POLL_AND_CHECK_STATUS) {
1511 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1512 /* Link status change */
1513 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1514 igb_handle_link_locked(adapter);
1515
1516 if (reg_icr & E1000_ICR_RXO)
1517 adapter->rx_overruns++;
1518 }
1519 IGB_CORE_UNLOCK(adapter);
1520
1521 for (int i = 0; i < adapter->num_queues; i++) {
1522 que = &adapter->queues[i];
1523 txr = que->txr;
1524
1525 igb_rxeof(que, count, &rx_done);
1526
1527 IGB_TX_LOCK(txr);
1528 do {
1529 more = igb_txeof(txr);
1530 } while (loop-- && more);
1531#ifndef IGB_LEGACY_TX
1532 if (!drbr_empty(ifp, txr->br))
1533 igb_mq_start_locked(ifp, txr);
1534#else
1535 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1536 igb_start_locked(txr, ifp);
1537#endif
1538 IGB_TX_UNLOCK(txr);
1539 }
1540
1541 return POLL_RETURN_COUNT(rx_done);
1542}
1543#endif /* DEVICE_POLLING */
1544
1545/*********************************************************************
1546 *
1547 * MSIX Que Interrupt Service routine
1548 *
1549 **********************************************************************/
1550static void
1551igb_msix_que(void *arg)
1552{
1553 struct igb_queue *que = arg;
1554 struct adapter *adapter = que->adapter;
1555 struct ifnet *ifp = adapter->ifp;
1556 struct tx_ring *txr = que->txr;
1557 struct rx_ring *rxr = que->rxr;
1558 u32 newitr = 0;
1559 bool more_rx;
1560
1561 /* Ignore spurious interrupts */
1562 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1563 return;
1564
1565 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1566 ++que->irqs;
1567
1568 IGB_TX_LOCK(txr);
1569 igb_txeof(txr);
1570#ifndef IGB_LEGACY_TX
1571 /* Process the stack queue only if not depleted */
1572 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1573 !drbr_empty(ifp, txr->br))
1574 igb_mq_start_locked(ifp, txr);
1575#else
1576 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1577 igb_start_locked(txr, ifp);
1578#endif
1579 IGB_TX_UNLOCK(txr);
1580
1581 more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1582
1583 if (adapter->enable_aim == FALSE)
1584 goto no_calc;
1585 /*
1586 ** Do Adaptive Interrupt Moderation:
1587 ** - Write out last calculated setting
1588 ** - Calculate based on average size over
1589 ** the last interval.
1590 */
1591 if (que->eitr_setting)
1592 E1000_WRITE_REG(&adapter->hw,
1593 E1000_EITR(que->msix), que->eitr_setting);
1594
1595 que->eitr_setting = 0;
1596
1597 /* Idle, do nothing */
1598 if ((txr->bytes == 0) && (rxr->bytes == 0))
1599 goto no_calc;
1600
1601 /* Used half Default if sub-gig */
1602 if (adapter->link_speed != 1000)
1603 newitr = IGB_DEFAULT_ITR / 2;
1604 else {
1605 if ((txr->bytes) && (txr->packets))
1606 newitr = txr->bytes/txr->packets;
1607 if ((rxr->bytes) && (rxr->packets))
1608 newitr = max(newitr,
1609 (rxr->bytes / rxr->packets));
1610 newitr += 24; /* account for hardware frame, crc */
1611 /* set an upper boundary */
1612 newitr = min(newitr, 3000);
1613 /* Be nice to the mid range */
1614 if ((newitr > 300) && (newitr < 1200))
1615 newitr = (newitr / 3);
1616 else
1617 newitr = (newitr / 2);
1618 }
1619 newitr &= 0x7FFC; /* Mask invalid bits */
1620 if (adapter->hw.mac.type == e1000_82575)
1621 newitr |= newitr << 16;
1622 else
1623 newitr |= E1000_EITR_CNT_IGNR;
1624
1625 /* save for next interrupt */
1626 que->eitr_setting = newitr;
1627
1628 /* Reset state */
1629 txr->bytes = 0;
1630 txr->packets = 0;
1631 rxr->bytes = 0;
1632 rxr->packets = 0;
1633
1634no_calc:
1635 /* Schedule a clean task if needed*/
1636 if (more_rx)
1637 taskqueue_enqueue(que->tq, &que->que_task);
1638 else
1639 /* Reenable this interrupt */
1640 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1641 return;
1642}
1643
1644
1645/*********************************************************************
1646 *
1647 * MSIX Link Interrupt Service routine
1648 *
1649 **********************************************************************/
1650
1651static void
1652igb_msix_link(void *arg)
1653{
1654 struct adapter *adapter = arg;
1655 u32 icr;
1656
1657 ++adapter->link_irq;
1658 icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1659 if (!(icr & E1000_ICR_LSC))
1660 goto spurious;
1661 igb_handle_link(adapter, 0);
1662
1663spurious:
1664 /* Rearm */
1665 E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1666 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1667 return;
1668}
1669
1670
1671/*********************************************************************
1672 *
1673 * Media Ioctl callback
1674 *
1675 * This routine is called whenever the user queries the status of
1676 * the interface using ifconfig.
1677 *
1678 **********************************************************************/
1679static void
1680igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1681{
1682 struct adapter *adapter = ifp->if_softc;
1683
1684 INIT_DEBUGOUT("igb_media_status: begin");
1685
1686 IGB_CORE_LOCK(adapter);
1687 igb_update_link_status(adapter);
1688
1689 ifmr->ifm_status = IFM_AVALID;
1690 ifmr->ifm_active = IFM_ETHER;
1691
1692 if (!adapter->link_active) {
1693 IGB_CORE_UNLOCK(adapter);
1694 return;
1695 }
1696
1697 ifmr->ifm_status |= IFM_ACTIVE;
1698
1699 switch (adapter->link_speed) {
1700 case 10:
1701 ifmr->ifm_active |= IFM_10_T;
1702 break;
1703 case 100:
1704 /*
1705 ** Support for 100Mb SFP - these are Fiber
1706 ** but the media type appears as serdes
1707 */
1708 if (adapter->hw.phy.media_type ==
1709 e1000_media_type_internal_serdes)
1710 ifmr->ifm_active |= IFM_100_FX;
1711 else
1712 ifmr->ifm_active |= IFM_100_TX;
1713 break;
1714 case 1000:
1715 ifmr->ifm_active |= IFM_1000_T;
1716 break;
1717 case 2500:
1718 ifmr->ifm_active |= IFM_2500_SX;
1719 break;
1720 }
1721
1722 if (adapter->link_duplex == FULL_DUPLEX)
1723 ifmr->ifm_active |= IFM_FDX;
1724 else
1725 ifmr->ifm_active |= IFM_HDX;
1726
1727 IGB_CORE_UNLOCK(adapter);
1728}
1729
1730/*********************************************************************
1731 *
1732 * Media Ioctl callback
1733 *
1734 * This routine is called when the user changes speed/duplex using
1735 * media/mediopt option with ifconfig.
1736 *
1737 **********************************************************************/
1738static int
1739igb_media_change(struct ifnet *ifp)
1740{
1741 struct adapter *adapter = ifp->if_softc;
1742 struct ifmedia *ifm = &adapter->media;
1743
1744 INIT_DEBUGOUT("igb_media_change: begin");
1745
1746 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1747 return (EINVAL);
1748
1749 IGB_CORE_LOCK(adapter);
1750 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1751 case IFM_AUTO:
1752 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1753 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1754 break;
1755 case IFM_1000_LX:
1756 case IFM_1000_SX:
1757 case IFM_1000_T:
1758 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1759 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1760 break;
1761 case IFM_100_TX:
1762 adapter->hw.mac.autoneg = FALSE;
1763 adapter->hw.phy.autoneg_advertised = 0;
1764 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1765 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1766 else
1767 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1768 break;
1769 case IFM_10_T:
1770 adapter->hw.mac.autoneg = FALSE;
1771 adapter->hw.phy.autoneg_advertised = 0;
1772 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1773 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1774 else
1775 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1776 break;
1777 default:
1778 device_printf(adapter->dev, "Unsupported media type\n");
1779 }
1780
1781 igb_init_locked(adapter);
1782 IGB_CORE_UNLOCK(adapter);
1783
1784 return (0);
1785}
1786
1787
1788/*********************************************************************
1789 *
1790 * This routine maps the mbufs to Advanced TX descriptors.
1791 *
1792 **********************************************************************/
1793static int
1794igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1795{
1796 struct adapter *adapter = txr->adapter;
1797 u32 olinfo_status = 0, cmd_type_len;
1798 int i, j, error, nsegs;
1799 int first;
1800 bool remap = TRUE;
1801 struct mbuf *m_head;
1802 bus_dma_segment_t segs[IGB_MAX_SCATTER];
1803 bus_dmamap_t map;
1804 struct igb_tx_buf *txbuf;
1805 union e1000_adv_tx_desc *txd = NULL;
1806
1807 m_head = *m_headp;
1808
1809 /* Basic descriptor defines */
1810 cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
1811 E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
1812
1813 if (m_head->m_flags & M_VLANTAG)
1814 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1815
1816 /*
1817 * Important to capture the first descriptor
1818 * used because it will contain the index of
1819 * the one we tell the hardware to report back
1820 */
1821 first = txr->next_avail_desc;
1822 txbuf = &txr->tx_buffers[first];
1823 map = txbuf->map;
1824
1825 /*
1826 * Map the packet for DMA.
1827 */
1828retry:
1829 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1830 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1831
1832 if (__predict_false(error)) {
1833 struct mbuf *m;
1834
1835 switch (error) {
1836 case EFBIG:
1837 /* Try it again? - one try */
1838 if (remap == TRUE) {
1839 remap = FALSE;
1840 m = m_collapse(*m_headp, M_NOWAIT,
1841 IGB_MAX_SCATTER);
1842 if (m == NULL) {
1843 adapter->mbuf_defrag_failed++;
1844 m_freem(*m_headp);
1845 *m_headp = NULL;
1846 return (ENOBUFS);
1847 }
1848 *m_headp = m;
1849 goto retry;
1850 } else
1851 return (error);
1852 default:
1853 txr->no_tx_dma_setup++;
1854 m_freem(*m_headp);
1855 *m_headp = NULL;
1856 return (error);
1857 }
1858 }
1859
1860 /* Make certain there are enough descriptors */
1861 if (nsegs > txr->tx_avail - 2) {
1862 txr->no_desc_avail++;
1863 bus_dmamap_unload(txr->txtag, map);
1864 return (ENOBUFS);
1865 }
1866 m_head = *m_headp;
1867
1868 /*
1869 ** Set up the appropriate offload context
1870 ** this will consume the first descriptor
1871 */
1872 error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1873 if (__predict_false(error)) {
1874 m_freem(*m_headp);
1875 *m_headp = NULL;
1876 return (error);
1877 }
1878
1879 /* 82575 needs the queue index added */
1880 if (adapter->hw.mac.type == e1000_82575)
1881 olinfo_status |= txr->me << 4;
1882
1883 i = txr->next_avail_desc;
1884 for (j = 0; j < nsegs; j++) {
1885 bus_size_t seglen;
1886 bus_addr_t segaddr;
1887
1888 txbuf = &txr->tx_buffers[i];
1889 txd = &txr->tx_base[i];
1890 seglen = segs[j].ds_len;
1891 segaddr = htole64(segs[j].ds_addr);
1892
1893 txd->read.buffer_addr = segaddr;
1894 txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
1895 cmd_type_len | seglen);
1896 txd->read.olinfo_status = htole32(olinfo_status);
1897
1898 if (++i == txr->num_desc)
1899 i = 0;
1900 }
1901
1902 txd->read.cmd_type_len |=
1903 htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1904 txr->tx_avail -= nsegs;
1905 txr->next_avail_desc = i;
1906
1907 txbuf->m_head = m_head;
1908 /*
1909 ** Here we swap the map so the last descriptor,
1910 ** which gets the completion interrupt has the
1911 ** real map, and the first descriptor gets the
1912 ** unused map from this descriptor.
1913 */
1914 txr->tx_buffers[first].map = txbuf->map;
1915 txbuf->map = map;
1916 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1917
1918 /* Set the EOP descriptor that will be marked done */
1919 txbuf = &txr->tx_buffers[first];
1920 txbuf->eop = txd;
1921
1922 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1923 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1924 /*
1925 * Advance the Transmit Descriptor Tail (Tdt), this tells the
1926 * hardware that this frame is available to transmit.
1927 */
1928 ++txr->total_packets;
1929 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1930
1931 return (0);
1932}
1933static void
1934igb_set_promisc(struct adapter *adapter)
1935{
1936 struct ifnet *ifp = adapter->ifp;
1937 struct e1000_hw *hw = &adapter->hw;
1938 u32 reg;
1939
1940 if (adapter->vf_ifp) {
1941 e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1942 return;
1943 }
1944
1945 reg = E1000_READ_REG(hw, E1000_RCTL);
1946 if (ifp->if_flags & IFF_PROMISC) {
1947 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1948 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1949 } else if (ifp->if_flags & IFF_ALLMULTI) {
1950 reg |= E1000_RCTL_MPE;
1951 reg &= ~E1000_RCTL_UPE;
1952 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1953 }
1954}
1955
1956static void
1957igb_disable_promisc(struct adapter *adapter)
1958{
1959 struct e1000_hw *hw = &adapter->hw;
1960 struct ifnet *ifp = adapter->ifp;
1961 u32 reg;
1962 int mcnt = 0;
1963
1964 if (adapter->vf_ifp) {
1965 e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1966 return;
1967 }
1968 reg = E1000_READ_REG(hw, E1000_RCTL);
1969 reg &= (~E1000_RCTL_UPE);
1970 if (ifp->if_flags & IFF_ALLMULTI)
1971 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
1972 else {
1973 struct ifmultiaddr *ifma;
1974#if __FreeBSD_version < 800000
1975 IF_ADDR_LOCK(ifp);
1976#else
1977 if_maddr_rlock(ifp);
1978#endif
1979 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1980 if (ifma->ifma_addr->sa_family != AF_LINK)
1981 continue;
1982 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1983 break;
1984 mcnt++;
1985 }
1986#if __FreeBSD_version < 800000
1987 IF_ADDR_UNLOCK(ifp);
1988#else
1989 if_maddr_runlock(ifp);
1990#endif
1991 }
1992 /* Don't disable if in MAX groups */
1993 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
1994 reg &= (~E1000_RCTL_MPE);
1995 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1996}
1997
1998
1999/*********************************************************************
2000 * Multicast Update
2001 *
2002 * This routine is called whenever multicast address list is updated.
2003 *
2004 **********************************************************************/
2005
2006static void
2007igb_set_multi(struct adapter *adapter)
2008{
2009 struct ifnet *ifp = adapter->ifp;
2010 struct ifmultiaddr *ifma;
2011 u32 reg_rctl = 0;
2012 u8 *mta;
2013
2014 int mcnt = 0;
2015
2016 IOCTL_DEBUGOUT("igb_set_multi: begin");
2017
2018 mta = adapter->mta;
2019 bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2020 MAX_NUM_MULTICAST_ADDRESSES);
2021
2022#if __FreeBSD_version < 800000
2023 IF_ADDR_LOCK(ifp);
2024#else
2025 if_maddr_rlock(ifp);
2026#endif
2027 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2028 if (ifma->ifma_addr->sa_family != AF_LINK)
2029 continue;
2030
2031 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2032 break;
2033
2034 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2035 &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2036 mcnt++;
2037 }
2038#if __FreeBSD_version < 800000
2039 IF_ADDR_UNLOCK(ifp);
2040#else
2041 if_maddr_runlock(ifp);
2042#endif
2043
2044 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2045 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2046 reg_rctl |= E1000_RCTL_MPE;
2047 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2048 } else
2049 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2050}
2051
2052
2053/*********************************************************************
2054 * Timer routine:
2055 * This routine checks for link status,
2056 * updates statistics, and does the watchdog.
2057 *
2058 **********************************************************************/
2059
2060static void
2061igb_local_timer(void *arg)
2062{
2063 struct adapter *adapter = arg;
2064 device_t dev = adapter->dev;
2065 struct ifnet *ifp = adapter->ifp;
2066 struct tx_ring *txr = adapter->tx_rings;
2067 struct igb_queue *que = adapter->queues;
2068 int hung = 0, busy = 0;
2069
2070
2071 IGB_CORE_LOCK_ASSERT(adapter);
2072
2073 igb_update_link_status(adapter);
2074 igb_update_stats_counters(adapter);
2075
2076 /*
2077 ** Check the TX queues status
2078 ** - central locked handling of OACTIVE
2079 ** - watchdog only if all queues show hung
2080 */
2081 for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2082 if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2083 (adapter->pause_frames == 0))
2084 ++hung;
2085 if (txr->queue_status & IGB_QUEUE_DEPLETED)
2086 ++busy;
2087 if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2088 taskqueue_enqueue(que->tq, &que->que_task);
2089 }
2090 if (hung == adapter->num_queues)
2091 goto timeout;
2092 if (busy == adapter->num_queues)
2093 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2094 else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2095 (busy < adapter->num_queues))
2096 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2097
2098 adapter->pause_frames = 0;
2099 callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2100#ifndef DEVICE_POLLING
2101 /* Schedule all queue interrupts - deadlock protection */
2102 E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2103#endif
2104 return;
2105
2106timeout:
2107 device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2108 device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2109 E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2110 E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2111 device_printf(dev,"TX(%d) desc avail = %d,"
2112 "Next TX to Clean = %d\n",
2113 txr->me, txr->tx_avail, txr->next_to_clean);
2114 adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2115 adapter->watchdog_events++;
2116 igb_init_locked(adapter);
2117}
2118
2119static void
2120igb_update_link_status(struct adapter *adapter)
2121{
2122 struct e1000_hw *hw = &adapter->hw;
2123 struct e1000_fc_info *fc = &hw->fc;
2124 struct ifnet *ifp = adapter->ifp;
2125 device_t dev = adapter->dev;
2126 struct tx_ring *txr = adapter->tx_rings;
2127 u32 link_check, thstat, ctrl;
2128 char *flowctl = NULL;
2129
2130 link_check = thstat = ctrl = 0;
2131
2132 /* Get the cached link value or read for real */
2133 switch (hw->phy.media_type) {
2134 case e1000_media_type_copper:
2135 if (hw->mac.get_link_status) {
2136 /* Do the work to read phy */
2137 e1000_check_for_link(hw);
2138 link_check = !hw->mac.get_link_status;
2139 } else
2140 link_check = TRUE;
2141 break;
2142 case e1000_media_type_fiber:
2143 e1000_check_for_link(hw);
2144 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2145 E1000_STATUS_LU);
2146 break;
2147 case e1000_media_type_internal_serdes:
2148 e1000_check_for_link(hw);
2149 link_check = adapter->hw.mac.serdes_has_link;
2150 break;
2151 /* VF device is type_unknown */
2152 case e1000_media_type_unknown:
2153 e1000_check_for_link(hw);
2154 link_check = !hw->mac.get_link_status;
2155 /* Fall thru */
2156 default:
2157 break;
2158 }
2159
2160 /* Check for thermal downshift or shutdown */
2161 if (hw->mac.type == e1000_i350) {
2162 thstat = E1000_READ_REG(hw, E1000_THSTAT);
2163 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2164 }
2165
2166 /* Get the flow control for display */
2167 switch (fc->current_mode) {
2168 case e1000_fc_rx_pause:
2169 flowctl = "RX";
2170 break;
2171 case e1000_fc_tx_pause:
2172 flowctl = "TX";
2173 break;
2174 case e1000_fc_full:
2175 flowctl = "Full";
2176 break;
2177 case e1000_fc_none:
2178 default:
2179 flowctl = "None";
2180 break;
2181 }
2182
2183 /* Now we check if a transition has happened */
2184 if (link_check && (adapter->link_active == 0)) {
2185 e1000_get_speed_and_duplex(&adapter->hw,
2186 &adapter->link_speed, &adapter->link_duplex);
2187 if (bootverbose)
2188 device_printf(dev, "Link is up %d Mbps %s,"
2189 " Flow Control: %s\n",
2190 adapter->link_speed,
2191 ((adapter->link_duplex == FULL_DUPLEX) ?
2192 "Full Duplex" : "Half Duplex"), flowctl);
2193 adapter->link_active = 1;
2194 ifp->if_baudrate = adapter->link_speed * 1000000;
2195 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2196 (thstat & E1000_THSTAT_LINK_THROTTLE))
2197 device_printf(dev, "Link: thermal downshift\n");
2198 /* Delay Link Up for Phy update */
2199 if (((hw->mac.type == e1000_i210) ||
2200 (hw->mac.type == e1000_i211)) &&
2201 (hw->phy.id == I210_I_PHY_ID))
2202 msec_delay(I210_LINK_DELAY);
2203 /* Reset if the media type changed. */
2204 if (hw->dev_spec._82575.media_changed) {
2205 hw->dev_spec._82575.media_changed = false;
2206 adapter->flags |= IGB_MEDIA_RESET;
2207 igb_reset(adapter);
2208 }
2209 /* This can sleep */
2210 if_link_state_change(ifp, LINK_STATE_UP);
2211 } else if (!link_check && (adapter->link_active == 1)) {
2212 ifp->if_baudrate = adapter->link_speed = 0;
2213 adapter->link_duplex = 0;
2214 if (bootverbose)
2215 device_printf(dev, "Link is Down\n");
2216 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2217 (thstat & E1000_THSTAT_PWR_DOWN))
2218 device_printf(dev, "Link: thermal shutdown\n");
2219 adapter->link_active = 0;
2220 /* This can sleep */
2221 if_link_state_change(ifp, LINK_STATE_DOWN);
2222 /* Reset queue state */
2223 for (int i = 0; i < adapter->num_queues; i++, txr++)
2224 txr->queue_status = IGB_QUEUE_IDLE;
2225 }
2226}
2227
2228/*********************************************************************
2229 *
2230 * This routine disables all traffic on the adapter by issuing a
2231 * global reset on the MAC and deallocates TX/RX buffers.
2232 *
2233 **********************************************************************/
2234
2235static void
2236igb_stop(void *arg)
2237{
2238 struct adapter *adapter = arg;
2239 struct ifnet *ifp = adapter->ifp;
2240 struct tx_ring *txr = adapter->tx_rings;
2241
2242 IGB_CORE_LOCK_ASSERT(adapter);
2243
2244 INIT_DEBUGOUT("igb_stop: begin");
2245
2246 igb_disable_intr(adapter);
2247
2248 callout_stop(&adapter->timer);
2249
2250 /* Tell the stack that the interface is no longer active */
2251 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2252 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2253
2254 /* Disarm watchdog timer. */
2255 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2256 IGB_TX_LOCK(txr);
2257 txr->queue_status = IGB_QUEUE_IDLE;
2258 IGB_TX_UNLOCK(txr);
2259 }
2260
2261 e1000_reset_hw(&adapter->hw);
2262 E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2263
2264 e1000_led_off(&adapter->hw);
2265 e1000_cleanup_led(&adapter->hw);
2266}
2267
2268
2269/*********************************************************************
2270 *
2271 * Determine hardware revision.
2272 *
2273 **********************************************************************/
2274static void
2275igb_identify_hardware(struct adapter *adapter)
2276{
2277 device_t dev = adapter->dev;
2278
2279 /* Make sure our PCI config space has the necessary stuff set */
2280 pci_enable_busmaster(dev);
2281 adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2282
2283 /* Save off the information about this board */
2284 adapter->hw.vendor_id = pci_get_vendor(dev);
2285 adapter->hw.device_id = pci_get_device(dev);
2286 adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2287 adapter->hw.subsystem_vendor_id =
2288 pci_read_config(dev, PCIR_SUBVEND_0, 2);
2289 adapter->hw.subsystem_device_id =
2290 pci_read_config(dev, PCIR_SUBDEV_0, 2);
2291
2292 /* Set MAC type early for PCI setup */
2293 e1000_set_mac_type(&adapter->hw);
2294
2295 /* Are we a VF device? */
2296 if ((adapter->hw.mac.type == e1000_vfadapt) ||
2297 (adapter->hw.mac.type == e1000_vfadapt_i350))
2298 adapter->vf_ifp = 1;
2299 else
2300 adapter->vf_ifp = 0;
2301}
2302
2303static int
2304igb_allocate_pci_resources(struct adapter *adapter)
2305{
2306 device_t dev = adapter->dev;
2307 int rid;
2308
2309 rid = PCIR_BAR(0);
2310 adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2311 &rid, RF_ACTIVE);
2312 if (adapter->pci_mem == NULL) {
2313 device_printf(dev, "Unable to allocate bus resource: memory\n");
2314 return (ENXIO);
2315 }
2316 adapter->osdep.mem_bus_space_tag =
2317 rman_get_bustag(adapter->pci_mem);
2318 adapter->osdep.mem_bus_space_handle =
2319 rman_get_bushandle(adapter->pci_mem);
2320 adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2321
2322 adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2323
2324 /* This will setup either MSI/X or MSI */
2325 adapter->msix = igb_setup_msix(adapter);
2326 adapter->hw.back = &adapter->osdep;
2327
2328 return (0);
2329}
2330
2331/*********************************************************************
2332 *
2333 * Setup the Legacy or MSI Interrupt handler
2334 *
2335 **********************************************************************/
2336static int
2337igb_allocate_legacy(struct adapter *adapter)
2338{
2339 device_t dev = adapter->dev;
2340 struct igb_queue *que = adapter->queues;
2341#ifndef IGB_LEGACY_TX
2342 struct tx_ring *txr = adapter->tx_rings;
2343#endif
2344 int error, rid = 0;
2345
2346 /* Turn off all interrupts */
2347 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2348
2349 /* MSI RID is 1 */
2350 if (adapter->msix == 1)
2351 rid = 1;
2352
2353 /* We allocate a single interrupt resource */
2354 adapter->res = bus_alloc_resource_any(dev,
2355 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2356 if (adapter->res == NULL) {
2357 device_printf(dev, "Unable to allocate bus resource: "
2358 "interrupt\n");
2359 return (ENXIO);
2360 }
2361
2362#ifndef IGB_LEGACY_TX
2363 TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2364#endif
2365
2366 /*
2367 * Try allocating a fast interrupt and the associated deferred
2368 * processing contexts.
2369 */
2370 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2371 /* Make tasklet for deferred link handling */
2372 TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2373 que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2374 taskqueue_thread_enqueue, &que->tq);
2375 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2376 device_get_nameunit(adapter->dev));
2377 if ((error = bus_setup_intr(dev, adapter->res,
2378 INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2379 adapter, &adapter->tag)) != 0) {
2380 device_printf(dev, "Failed to register fast interrupt "
2381 "handler: %d\n", error);
2382 taskqueue_free(que->tq);
2383 que->tq = NULL;
2384 return (error);
2385 }
2386
2387 return (0);
2388}
2389
2390
2391/*********************************************************************
2392 *
2393 * Setup the MSIX Queue Interrupt handlers:
2394 *
2395 **********************************************************************/
2396static int
2397igb_allocate_msix(struct adapter *adapter)
2398{
2399 device_t dev = adapter->dev;
2400 struct igb_queue *que = adapter->queues;
2401 int error, rid, vector = 0;
2402 int cpu_id = 0;
2403#ifdef RSS
2404 cpuset_t cpu_mask;
2405#endif
2406
2407 /* Be sure to start with all interrupts disabled */
2408 E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2409 E1000_WRITE_FLUSH(&adapter->hw);
2410
2411#ifdef RSS
2412 /*
2413 * If we're doing RSS, the number of queues needs to
2414 * match the number of RSS buckets that are configured.
2415 *
2416 * + If there's more queues than RSS buckets, we'll end
2417 * up with queues that get no traffic.
2418 *
2419 * + If there's more RSS buckets than queues, we'll end
2420 * up having multiple RSS buckets map to the same queue,
2421 * so there'll be some contention.
2422 */
2423 if (adapter->num_queues != rss_getnumbuckets()) {
2424 device_printf(dev,
2425 "%s: number of queues (%d) != number of RSS buckets (%d)"
2426 "; performance will be impacted.\n",
2427 __func__,
2428 adapter->num_queues,
2429 rss_getnumbuckets());
2430 }
2431#endif
2432
2433 for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2434 rid = vector +1;
2435 que->res = bus_alloc_resource_any(dev,
2436 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2437 if (que->res == NULL) {
2438 device_printf(dev,
2439 "Unable to allocate bus resource: "
2440 "MSIX Queue Interrupt\n");
2441 return (ENXIO);
2442 }
2443 error = bus_setup_intr(dev, que->res,
2444 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2445 igb_msix_que, que, &que->tag);
2446 if (error) {
2447 que->res = NULL;
2448 device_printf(dev, "Failed to register Queue handler");
2449 return (error);
2450 }
2451#if __FreeBSD_version >= 800504
2452 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2453#endif
2454 que->msix = vector;
2455 if (adapter->hw.mac.type == e1000_82575)
2456 que->eims = E1000_EICR_TX_QUEUE0 << i;
2457 else
2458 que->eims = 1 << vector;
2459
2460#ifdef RSS
2461 /*
2462 * The queue ID is used as the RSS layer bucket ID.
2463 * We look up the queue ID -> RSS CPU ID and select
2464 * that.
2465 */
2466 cpu_id = rss_getcpu(i % rss_getnumbuckets());
2467#else
2468 /*
2469 * Bind the msix vector, and thus the
2470 * rings to the corresponding cpu.
2471 *
2472 * This just happens to match the default RSS round-robin
2473 * bucket -> queue -> CPU allocation.
2474 */
2475 if (adapter->num_queues > 1) {
2476 if (igb_last_bind_cpu < 0)
2477 igb_last_bind_cpu = CPU_FIRST();
2478 cpu_id = igb_last_bind_cpu;
2479 }
2480#endif
2481
2482 if (adapter->num_queues > 1) {
2483 bus_bind_intr(dev, que->res, cpu_id);
2484#ifdef RSS
2485 device_printf(dev,
2486 "Bound queue %d to RSS bucket %d\n",
2487 i, cpu_id);
2488#else
2489 device_printf(dev,
2490 "Bound queue %d to cpu %d\n",
2491 i, cpu_id);
2492#endif
2493 }
2494
2495#ifndef IGB_LEGACY_TX
2496 TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2497 que->txr);
2498#endif
2499 /* Make tasklet for deferred handling */
2500 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2501 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2502 taskqueue_thread_enqueue, &que->tq);
2503 if (adapter->num_queues > 1) {
2504 /*
2505 * Only pin the taskqueue thread to a CPU if
2506 * RSS is in use.
2507 *
2508 * This again just happens to match the default RSS
2509 * round-robin bucket -> queue -> CPU allocation.
2510 */
2511#ifdef RSS
2512 CPU_SETOF(cpu_id, &cpu_mask);
2513 taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET,
2514 &cpu_mask,
2515 "%s que (bucket %d)",
2516 device_get_nameunit(adapter->dev),
2517 cpu_id);
2518#else
2519 taskqueue_start_threads(&que->tq, 1, PI_NET,
2520 "%s que (qid %d)",
2521 device_get_nameunit(adapter->dev),
2522 cpu_id);
2523#endif
2524 } else {
2525 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2526 device_get_nameunit(adapter->dev));
2527 }
2528
2529 /* Finally update the last bound CPU id */
2530 if (adapter->num_queues > 1)
2531 igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2532 }
2533
2534 /* And Link */
2535 rid = vector + 1;
2536 adapter->res = bus_alloc_resource_any(dev,
2537 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2538 if (adapter->res == NULL) {
2539 device_printf(dev,
2540 "Unable to allocate bus resource: "
2541 "MSIX Link Interrupt\n");
2542 return (ENXIO);
2543 }
2544 if ((error = bus_setup_intr(dev, adapter->res,
2545 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2546 igb_msix_link, adapter, &adapter->tag)) != 0) {
2547 device_printf(dev, "Failed to register Link handler");
2548 return (error);
2549 }
2550#if __FreeBSD_version >= 800504
2551 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2552#endif
2553 adapter->linkvec = vector;
2554
2555 return (0);
2556}
2557
2558
2559static void
2560igb_configure_queues(struct adapter *adapter)
2561{
2562 struct e1000_hw *hw = &adapter->hw;
2563 struct igb_queue *que;
2564 u32 tmp, ivar = 0, newitr = 0;
2565
2566 /* First turn on RSS capability */
2567 if (adapter->hw.mac.type != e1000_82575)
2568 E1000_WRITE_REG(hw, E1000_GPIE,
2569 E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2570 E1000_GPIE_PBA | E1000_GPIE_NSICR);
2571
2572 /* Turn on MSIX */
2573 switch (adapter->hw.mac.type) {
2574 case e1000_82580:
2575 case e1000_i350:
2576 case e1000_i354:
2577 case e1000_i210:
2578 case e1000_i211:
2579 case e1000_vfadapt:
2580 case e1000_vfadapt_i350:
2581 /* RX entries */
2582 for (int i = 0; i < adapter->num_queues; i++) {
2583 u32 index = i >> 1;
2584 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2585 que = &adapter->queues[i];
2586 if (i & 1) {
2587 ivar &= 0xFF00FFFF;
2588 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2589 } else {
2590 ivar &= 0xFFFFFF00;
2591 ivar |= que->msix | E1000_IVAR_VALID;
2592 }
2593 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2594 }
2595 /* TX entries */
2596 for (int i = 0; i < adapter->num_queues; i++) {
2597 u32 index = i >> 1;
2598 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2599 que = &adapter->queues[i];
2600 if (i & 1) {
2601 ivar &= 0x00FFFFFF;
2602 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2603 } else {
2604 ivar &= 0xFFFF00FF;
2605 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2606 }
2607 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2608 adapter->que_mask |= que->eims;
2609 }
2610
2611 /* And for the link interrupt */
2612 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2613 adapter->link_mask = 1 << adapter->linkvec;
2614 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2615 break;
2616 case e1000_82576:
2617 /* RX entries */
2618 for (int i = 0; i < adapter->num_queues; i++) {
2619 u32 index = i & 0x7; /* Each IVAR has two entries */
2620 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2621 que = &adapter->queues[i];
2622 if (i < 8) {
2623 ivar &= 0xFFFFFF00;
2624 ivar |= que->msix | E1000_IVAR_VALID;
2625 } else {
2626 ivar &= 0xFF00FFFF;
2627 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2628 }
2629 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2630 adapter->que_mask |= que->eims;
2631 }
2632 /* TX entries */
2633 for (int i = 0; i < adapter->num_queues; i++) {
2634 u32 index = i & 0x7; /* Each IVAR has two entries */
2635 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2636 que = &adapter->queues[i];
2637 if (i < 8) {
2638 ivar &= 0xFFFF00FF;
2639 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2640 } else {
2641 ivar &= 0x00FFFFFF;
2642 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2643 }
2644 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2645 adapter->que_mask |= que->eims;
2646 }
2647
2648 /* And for the link interrupt */
2649 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2650 adapter->link_mask = 1 << adapter->linkvec;
2651 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2652 break;
2653
2654 case e1000_82575:
2655 /* enable MSI-X support*/
2656 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2657 tmp |= E1000_CTRL_EXT_PBA_CLR;
2658 /* Auto-Mask interrupts upon ICR read. */
2659 tmp |= E1000_CTRL_EXT_EIAME;
2660 tmp |= E1000_CTRL_EXT_IRCA;
2661 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2662
2663 /* Queues */
2664 for (int i = 0; i < adapter->num_queues; i++) {
2665 que = &adapter->queues[i];
2666 tmp = E1000_EICR_RX_QUEUE0 << i;
2667 tmp |= E1000_EICR_TX_QUEUE0 << i;
2668 que->eims = tmp;
2669 E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2670 i, que->eims);
2671 adapter->que_mask |= que->eims;
2672 }
2673
2674 /* Link */
2675 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2676 E1000_EIMS_OTHER);
2677 adapter->link_mask |= E1000_EIMS_OTHER;
2678 default:
2679 break;
2680 }
2681
2682 /* Set the starting interrupt rate */
2683 if (igb_max_interrupt_rate > 0)
2684 newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2685
2686 if (hw->mac.type == e1000_82575)
2687 newitr |= newitr << 16;
2688 else
2689 newitr |= E1000_EITR_CNT_IGNR;
2690
2691 for (int i = 0; i < adapter->num_queues; i++) {
2692 que = &adapter->queues[i];
2693 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2694 }
2695
2696 return;
2697}
2698
2699
2700static void
2701igb_free_pci_resources(struct adapter *adapter)
2702{
2703 struct igb_queue *que = adapter->queues;
2704 device_t dev = adapter->dev;
2705 int rid;
2706
2707 /*
2708 ** There is a slight possibility of a failure mode
2709 ** in attach that will result in entering this function
2710 ** before interrupt resources have been initialized, and
2711 ** in that case we do not want to execute the loops below
2712 ** We can detect this reliably by the state of the adapter
2713 ** res pointer.
2714 */
2715 if (adapter->res == NULL)
2716 goto mem;
2717
2718 /*
2719 * First release all the interrupt resources:
2720 */
2721 for (int i = 0; i < adapter->num_queues; i++, que++) {
2722 rid = que->msix + 1;
2723 if (que->tag != NULL) {
2724 bus_teardown_intr(dev, que->res, que->tag);
2725 que->tag = NULL;
2726 }
2727 if (que->res != NULL)
2728 bus_release_resource(dev,
2729 SYS_RES_IRQ, rid, que->res);
2730 }
2731
2732 /* Clean the Legacy or Link interrupt last */
2733 if (adapter->linkvec) /* we are doing MSIX */
2734 rid = adapter->linkvec + 1;
2735 else
2736 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2737
2738 que = adapter->queues;
2739 if (adapter->tag != NULL) {
2740 taskqueue_drain(que->tq, &adapter->link_task);
2741 bus_teardown_intr(dev, adapter->res, adapter->tag);
2742 adapter->tag = NULL;
2743 }
2744 if (adapter->res != NULL)
2745 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2746
2747 for (int i = 0; i < adapter->num_queues; i++, que++) {
2748 if (que->tq != NULL) {
2749#ifndef IGB_LEGACY_TX
2750 taskqueue_drain(que->tq, &que->txr->txq_task);
2751#endif
2752 taskqueue_drain(que->tq, &que->que_task);
2753 taskqueue_free(que->tq);
2754 }
2755 }
2756mem:
2757 if (adapter->msix)
2758 pci_release_msi(dev);
2759
2760 if (adapter->msix_mem != NULL)
2761 bus_release_resource(dev, SYS_RES_MEMORY,
2762 adapter->memrid, adapter->msix_mem);
2763
2764 if (adapter->pci_mem != NULL)
2765 bus_release_resource(dev, SYS_RES_MEMORY,
2766 PCIR_BAR(0), adapter->pci_mem);
2767
2768}
2769
2770/*
2771 * Setup Either MSI/X or MSI
2772 */
2773static int
2774igb_setup_msix(struct adapter *adapter)
2775{
2776 device_t dev = adapter->dev;
2777 int bar, want, queues, msgs, maxqueues;
2778
2779 /* tuneable override */
2780 if (igb_enable_msix == 0)
2781 goto msi;
2782
2783 /* First try MSI/X */
2784 msgs = pci_msix_count(dev);
2785 if (msgs == 0)
2786 goto msi;
2787 /*
2788 ** Some new devices, as with ixgbe, now may
2789 ** use a different BAR, so we need to keep
2790 ** track of which is used.
2791 */
2792 adapter->memrid = PCIR_BAR(IGB_MSIX_BAR);
2793 bar = pci_read_config(dev, adapter->memrid, 4);
2794 if (bar == 0) /* use next bar */
2795 adapter->memrid += 4;
2796 adapter->msix_mem = bus_alloc_resource_any(dev,
2797 SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2798 if (adapter->msix_mem == NULL) {
2799 /* May not be enabled */
2800 device_printf(adapter->dev,
2801 "Unable to map MSIX table \n");
2802 goto msi;
2803 }
2804
2805 queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2806
2807 /* Override via tuneable */
2808 if (igb_num_queues != 0)
2809 queues = igb_num_queues;
2810
2811#ifdef RSS
2812 /* If we're doing RSS, clamp at the number of RSS buckets */
2813 if (queues > rss_getnumbuckets())
2814 queues = rss_getnumbuckets();
2815#endif
2816
2817
2818 /* Sanity check based on HW */
2819 switch (adapter->hw.mac.type) {
2820 case e1000_82575:
2821 maxqueues = 4;
2822 break;
2823 case e1000_82576:
2824 case e1000_82580:
2825 case e1000_i350:
2826 case e1000_i354:
2827 maxqueues = 8;
2828 break;
2829 case e1000_i210:
2830 maxqueues = 4;
2831 break;
2832 case e1000_i211:
2833 maxqueues = 2;
2834 break;
2835 default: /* VF interfaces */
2836 maxqueues = 1;
2837 break;
2838 }
2839
2840 /* Final clamp on the actual hardware capability */
2841 if (queues > maxqueues)
2842 queues = maxqueues;
2843
2844 /*
2845 ** One vector (RX/TX pair) per queue
2846 ** plus an additional for Link interrupt
2847 */
2848 want = queues + 1;
2849 if (msgs >= want)
2850 msgs = want;
2851 else {
2852 device_printf(adapter->dev,
2853 "MSIX Configuration Problem, "
2854 "%d vectors configured, but %d queues wanted!\n",
2855 msgs, want);
2856 goto msi;
2857 }
2858 if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2859 device_printf(adapter->dev,
2860 "Using MSIX interrupts with %d vectors\n", msgs);
2861 adapter->num_queues = queues;
2862 return (msgs);
2863 }
2864 /*
2865 ** If MSIX alloc failed or provided us with
2866 ** less than needed, free and fall through to MSI
2867 */
2868 pci_release_msi(dev);
2869
2870msi:
2871 if (adapter->msix_mem != NULL) {
2872 bus_release_resource(dev, SYS_RES_MEMORY,
2873 PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2874 adapter->msix_mem = NULL;
2875 }
2876 msgs = 1;
2877 if (pci_alloc_msi(dev, &msgs) == 0) {
2878 device_printf(adapter->dev," Using an MSI interrupt\n");
2879 return (msgs);
2880 }
2881 device_printf(adapter->dev," Using a Legacy interrupt\n");
2882 return (0);
2883}
2884
2885/*********************************************************************
2886 *
2887 * Initialize the DMA Coalescing feature
2888 *
2889 **********************************************************************/
2890static void
2891igb_init_dmac(struct adapter *adapter, u32 pba)
2892{
2893 device_t dev = adapter->dev;
2894 struct e1000_hw *hw = &adapter->hw;
2895 u32 dmac, reg = ~E1000_DMACR_DMAC_EN;
2896 u16 hwm;
2897
2898 if (hw->mac.type == e1000_i211)
2899 return;
2900
2901 if (hw->mac.type > e1000_82580) {
2902
2903 if (adapter->dmac == 0) { /* Disabling it */
2904 E1000_WRITE_REG(hw, E1000_DMACR, reg);
2905 return;
2906 } else
2907 device_printf(dev, "DMA Coalescing enabled\n");
2908
2909 /* Set starting threshold */
2910 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2911
2912 hwm = 64 * pba - adapter->max_frame_size / 16;
2913 if (hwm < 64 * (pba - 6))
2914 hwm = 64 * (pba - 6);
2915 reg = E1000_READ_REG(hw, E1000_FCRTC);
2916 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2917 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2918 & E1000_FCRTC_RTH_COAL_MASK);
2919 E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2920
2921
2922 dmac = pba - adapter->max_frame_size / 512;
2923 if (dmac < pba - 10)
2924 dmac = pba - 10;
2925 reg = E1000_READ_REG(hw, E1000_DMACR);
2926 reg &= ~E1000_DMACR_DMACTHR_MASK;
2927 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
2928 & E1000_DMACR_DMACTHR_MASK);
2929
2930 /* transition to L0x or L1 if available..*/
2931 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2932
2933 /* Check if status is 2.5Gb backplane connection
2934 * before configuration of watchdog timer, which is
2935 * in msec values in 12.8usec intervals
2936 * watchdog timer= msec values in 32usec intervals
2937 * for non 2.5Gb connection
2938 */
2939 if (hw->mac.type == e1000_i354) {
2940 int status = E1000_READ_REG(hw, E1000_STATUS);
2941 if ((status & E1000_STATUS_2P5_SKU) &&
2942 (!(status & E1000_STATUS_2P5_SKU_OVER)))
2943 reg |= ((adapter->dmac * 5) >> 6);
2944 else
2945 reg |= (adapter->dmac >> 5);
2946 } else {
2947 reg |= (adapter->dmac >> 5);
2948 }
2949
2950 E1000_WRITE_REG(hw, E1000_DMACR, reg);
2951
2952 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2953
2954 /* Set the interval before transition */
2955 reg = E1000_READ_REG(hw, E1000_DMCTLX);
2956 if (hw->mac.type == e1000_i350)
2957 reg |= IGB_DMCTLX_DCFLUSH_DIS;
2958 /*
2959 ** in 2.5Gb connection, TTLX unit is 0.4 usec
2960 ** which is 0x4*2 = 0xA. But delay is still 4 usec
2961 */
2962 if (hw->mac.type == e1000_i354) {
2963 int status = E1000_READ_REG(hw, E1000_STATUS);
2964 if ((status & E1000_STATUS_2P5_SKU) &&
2965 (!(status & E1000_STATUS_2P5_SKU_OVER)))
2966 reg |= 0xA;
2967 else
2968 reg |= 0x4;
2969 } else {
2970 reg |= 0x4;
2971 }
2972
2973 E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2974
2975 /* free space in tx packet buffer to wake from DMA coal */
2976 E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE -
2977 (2 * adapter->max_frame_size)) >> 6);
2978
2979 /* make low power state decision controlled by DMA coal */
2980 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2981 reg &= ~E1000_PCIEMISC_LX_DECISION;
2982 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
2983
2984 } else if (hw->mac.type == e1000_82580) {
2985 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2986 E1000_WRITE_REG(hw, E1000_PCIEMISC,
2987 reg & ~E1000_PCIEMISC_LX_DECISION);
2988 E1000_WRITE_REG(hw, E1000_DMACR, 0);
2989 }
2990}
2991
2992
2993/*********************************************************************
2994 *
2995 * Set up an fresh starting state
2996 *
2997 **********************************************************************/
2998static void
2999igb_reset(struct adapter *adapter)
3000{
3001 device_t dev = adapter->dev;
3002 struct e1000_hw *hw = &adapter->hw;
3003 struct e1000_fc_info *fc = &hw->fc;
3004 struct ifnet *ifp = adapter->ifp;
3005 u32 pba = 0;
3006 u16 hwm;
3007
3008 INIT_DEBUGOUT("igb_reset: begin");
3009
3010 /* Let the firmware know the OS is in control */
3011 igb_get_hw_control(adapter);
3012
3013 /*
3014 * Packet Buffer Allocation (PBA)
3015 * Writing PBA sets the receive portion of the buffer
3016 * the remainder is used for the transmit buffer.
3017 */
3018 switch (hw->mac.type) {
3019 case e1000_82575:
3020 pba = E1000_PBA_32K;
3021 break;
3022 case e1000_82576:
3023 case e1000_vfadapt:
3024 pba = E1000_READ_REG(hw, E1000_RXPBS);
3025 pba &= E1000_RXPBS_SIZE_MASK_82576;
3026 break;
3027 case e1000_82580:
3028 case e1000_i350:
3029 case e1000_i354:
3030 case e1000_vfadapt_i350:
3031 pba = E1000_READ_REG(hw, E1000_RXPBS);
3032 pba = e1000_rxpbs_adjust_82580(pba);
3033 break;
3034 case e1000_i210:
3035 case e1000_i211:
3036 pba = E1000_PBA_34K;
3037 default:
3038 break;
3039 }
3040
3041 /* Special needs in case of Jumbo frames */
3042 if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
3043 u32 tx_space, min_tx, min_rx;
3044 pba = E1000_READ_REG(hw, E1000_PBA);
3045 tx_space = pba >> 16;
3046 pba &= 0xffff;
3047 min_tx = (adapter->max_frame_size +
3048 sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
3049 min_tx = roundup2(min_tx, 1024);
3050 min_tx >>= 10;
3051 min_rx = adapter->max_frame_size;
3052 min_rx = roundup2(min_rx, 1024);
3053 min_rx >>= 10;
3054 if (tx_space < min_tx &&
3055 ((min_tx - tx_space) < pba)) {
3056 pba = pba - (min_tx - tx_space);
3057 /*
3058 * if short on rx space, rx wins
3059 * and must trump tx adjustment
3060 */
3061 if (pba < min_rx)
3062 pba = min_rx;
3063 }
3064 E1000_WRITE_REG(hw, E1000_PBA, pba);
3065 }
3066
3067 INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3068
3069 /*
3070 * These parameters control the automatic generation (Tx) and
3071 * response (Rx) to Ethernet PAUSE frames.
3072 * - High water mark should allow for at least two frames to be
3073 * received after sending an XOFF.
3074 * - Low water mark works best when it is very near the high water mark.
3075 * This allows the receiver to restart by sending XON when it has
3076 * drained a bit.
3077 */
3078 hwm = min(((pba << 10) * 9 / 10),
3079 ((pba << 10) - 2 * adapter->max_frame_size));
3080
3081 if (hw->mac.type < e1000_82576) {
3082 fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */
3083 fc->low_water = fc->high_water - 8;
3084 } else {
3085 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
3086 fc->low_water = fc->high_water - 16;
3087 }
3088
3089 fc->pause_time = IGB_FC_PAUSE_TIME;
3090 fc->send_xon = TRUE;
3091 if (adapter->fc)
3092 fc->requested_mode = adapter->fc;
3093 else
3094 fc->requested_mode = e1000_fc_default;
3095
3096 /* Issue a global reset */
3097 e1000_reset_hw(hw);
3098 E1000_WRITE_REG(hw, E1000_WUC, 0);
3099
3100 /* Reset for AutoMediaDetect */
3101 if (adapter->flags & IGB_MEDIA_RESET) {
3102 e1000_setup_init_funcs(hw, TRUE);
3103 e1000_get_bus_info(hw);
3104 adapter->flags &= ~IGB_MEDIA_RESET;
3105 }
3106
3107 if (e1000_init_hw(hw) < 0)
3108 device_printf(dev, "Hardware Initialization Failed\n");
3109
3110 /* Setup DMA Coalescing */
3111 igb_init_dmac(adapter, pba);
3112
3113 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3114 e1000_get_phy_info(hw);
3115 e1000_check_for_link(hw);
3116 return;
3117}
3118
3119/*********************************************************************
3120 *
3121 * Setup networking device structure and register an interface.
3122 *
3123 **********************************************************************/
3124static int
3125igb_setup_interface(device_t dev, struct adapter *adapter)
3126{
3127 struct ifnet *ifp;
3128
3129 INIT_DEBUGOUT("igb_setup_interface: begin");
3130
3131 ifp = adapter->ifp = if_alloc(IFT_ETHER);
3132 if (ifp == NULL) {
3133 device_printf(dev, "can not allocate ifnet structure\n");
3134 return (-1);
3135 }
3136 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3137 ifp->if_init = igb_init;
3138 ifp->if_softc = adapter;
3139 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3140 ifp->if_ioctl = igb_ioctl;
3141 ifp->if_get_counter = igb_get_counter;
3142#ifndef IGB_LEGACY_TX
3143 ifp->if_transmit = igb_mq_start;
3144 ifp->if_qflush = igb_qflush;
3145#else
3146 ifp->if_start = igb_start;
3147 IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3148 ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3149 IFQ_SET_READY(&ifp->if_snd);
3150#endif
3151
3152 ether_ifattach(ifp, adapter->hw.mac.addr);
3153
3154 ifp->if_capabilities = ifp->if_capenable = 0;
3155
3156 ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3157 ifp->if_capabilities |= IFCAP_TSO;
3158 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3159 ifp->if_capenable = ifp->if_capabilities;
3160
3161 /* Don't enable LRO by default */
3162 ifp->if_capabilities |= IFCAP_LRO;
3163
3164#ifdef DEVICE_POLLING
3165 ifp->if_capabilities |= IFCAP_POLLING;
3166#endif
3167
3168 /*
3169 * Tell the upper layer(s) we
3170 * support full VLAN capability.
3171 */
3172 ifp->if_hdrlen = sizeof(struct ether_vlan_header);
3173 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3174 | IFCAP_VLAN_HWTSO
3175 | IFCAP_VLAN_MTU;
3176 ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3177 | IFCAP_VLAN_HWTSO
3178 | IFCAP_VLAN_MTU;
3179
3180 /*
3181 ** Don't turn this on by default, if vlans are
3182 ** created on another pseudo device (eg. lagg)
3183 ** then vlan events are not passed thru, breaking
3184 ** operation, but with HW FILTER off it works. If
3185 ** using vlans directly on the igb driver you can
3186 ** enable this and get full hardware tag filtering.
3187 */
3188 ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3189
3190 /*
3191 * Specify the media types supported by this adapter and register
3192 * callbacks to update media and link information
3193 */
3194 ifmedia_init(&adapter->media, IFM_IMASK,
3195 igb_media_change, igb_media_status);
3196 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3197 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3198 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
3199 0, NULL);
3200 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3201 } else {
3202 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3203 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3204 0, NULL);
3205 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3206 0, NULL);
3207 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3208 0, NULL);
3209 if (adapter->hw.phy.type != e1000_phy_ife) {
3210 ifmedia_add(&adapter->media,
3211 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3212 ifmedia_add(&adapter->media,
3213 IFM_ETHER | IFM_1000_T, 0, NULL);
3214 }
3215 }
3216 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3217 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3218 return (0);
3219}
3220
3221
3222/*
3223 * Manage DMA'able memory.
3224 */
3225static void
3226igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3227{
3228 if (error)
3229 return;
3230 *(bus_addr_t *) arg = segs[0].ds_addr;
3231}
3232
3233static int
3234igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3235 struct igb_dma_alloc *dma, int mapflags)
3236{
3237 int error;
3238
3239 error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3240 IGB_DBA_ALIGN, 0, /* alignment, bounds */
3241 BUS_SPACE_MAXADDR, /* lowaddr */
3242 BUS_SPACE_MAXADDR, /* highaddr */
3243 NULL, NULL, /* filter, filterarg */
3244 size, /* maxsize */
3245 1, /* nsegments */
3246 size, /* maxsegsize */
3247 0, /* flags */
3248 NULL, /* lockfunc */
3249 NULL, /* lockarg */
3250 &dma->dma_tag);
3251 if (error) {
3252 device_printf(adapter->dev,
3253 "%s: bus_dma_tag_create failed: %d\n",
3254 __func__, error);
3255 goto fail_0;
3256 }
3257
3258 error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3259 BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3260 if (error) {
3261 device_printf(adapter->dev,
3262 "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3263 __func__, (uintmax_t)size, error);
3264 goto fail_2;
3265 }
3266
3267 dma->dma_paddr = 0;
3268 error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3269 size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3270 if (error || dma->dma_paddr == 0) {
3271 device_printf(adapter->dev,
3272 "%s: bus_dmamap_load failed: %d\n",
3273 __func__, error);
3274 goto fail_3;
3275 }
3276
3277 return (0);
3278
3279fail_3:
3280 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3281fail_2:
3282 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3283 bus_dma_tag_destroy(dma->dma_tag);
3284fail_0:
3285 dma->dma_tag = NULL;
3286
3287 return (error);
3288}
3289
3290static void
3291igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3292{
3293 if (dma->dma_tag == NULL)
3294 return;
3295 if (dma->dma_paddr != 0) {
3296 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3297 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3298 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3299 dma->dma_paddr = 0;
3300 }
3301 if (dma->dma_vaddr != NULL) {
3302 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3303 dma->dma_vaddr = NULL;
3304 }
3305 bus_dma_tag_destroy(dma->dma_tag);
3306 dma->dma_tag = NULL;
3307}
3308
3309
3310/*********************************************************************
3311 *
3312 * Allocate memory for the transmit and receive rings, and then
3313 * the descriptors associated with each, called only once at attach.
3314 *
3315 **********************************************************************/
3316static int
3317igb_allocate_queues(struct adapter *adapter)
3318{
3319 device_t dev = adapter->dev;
3320 struct igb_queue *que = NULL;
3321 struct tx_ring *txr = NULL;
3322 struct rx_ring *rxr = NULL;
3323 int rsize, tsize, error = E1000_SUCCESS;
3324 int txconf = 0, rxconf = 0;
3325
3326 /* First allocate the top level queue structs */
3327 if (!(adapter->queues =
3328 (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3329 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3330 device_printf(dev, "Unable to allocate queue memory\n");
3331 error = ENOMEM;
3332 goto fail;
3333 }
3334
3335 /* Next allocate the TX ring struct memory */
3336 if (!(adapter->tx_rings =
3337 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3338 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3339 device_printf(dev, "Unable to allocate TX ring memory\n");
3340 error = ENOMEM;
3341 goto tx_fail;
3342 }
3343
3344 /* Now allocate the RX */
3345 if (!(adapter->rx_rings =
3346 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3347 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3348 device_printf(dev, "Unable to allocate RX ring memory\n");
3349 error = ENOMEM;
3350 goto rx_fail;
3351 }
3352
3353 tsize = roundup2(adapter->num_tx_desc *
3354 sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3355 /*
3356 * Now set up the TX queues, txconf is needed to handle the
3357 * possibility that things fail midcourse and we need to
3358 * undo memory gracefully
3359 */
3360 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3361 /* Set up some basics */
3362 txr = &adapter->tx_rings[i];
3363 txr->adapter = adapter;
3364 txr->me = i;
3365 txr->num_desc = adapter->num_tx_desc;
3366
3367 /* Initialize the TX lock */
3368 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3369 device_get_nameunit(dev), txr->me);
3370 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3371
3372 if (igb_dma_malloc(adapter, tsize,
3373 &txr->txdma, BUS_DMA_NOWAIT)) {
3374 device_printf(dev,
3375 "Unable to allocate TX Descriptor memory\n");
3376 error = ENOMEM;
3377 goto err_tx_desc;
3378 }
3379 txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr;
3380 bzero((void *)txr->tx_base, tsize);
3381
3382 /* Now allocate transmit buffers for the ring */
3383 if (igb_allocate_transmit_buffers(txr)) {
3384 device_printf(dev,
3385 "Critical Failure setting up transmit buffers\n");
3386 error = ENOMEM;
3387 goto err_tx_desc;
3388 }
3389#ifndef IGB_LEGACY_TX
3390 /* Allocate a buf ring */
3391 txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3392 M_WAITOK, &txr->tx_mtx);
3393#endif
3394 }
3395
3396 /*
3397 * Next the RX queues...
3398 */
3399 rsize = roundup2(adapter->num_rx_desc *
3400 sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3401 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3402 rxr = &adapter->rx_rings[i];
3403 rxr->adapter = adapter;
3404 rxr->me = i;
3405
3406 /* Initialize the RX lock */
3407 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3408 device_get_nameunit(dev), txr->me);
3409 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3410
3411 if (igb_dma_malloc(adapter, rsize,
3412 &rxr->rxdma, BUS_DMA_NOWAIT)) {
3413 device_printf(dev,
3414 "Unable to allocate RxDescriptor memory\n");
3415 error = ENOMEM;
3416 goto err_rx_desc;
3417 }
3418 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3419 bzero((void *)rxr->rx_base, rsize);
3420
3421 /* Allocate receive buffers for the ring*/
3422 if (igb_allocate_receive_buffers(rxr)) {
3423 device_printf(dev,
3424 "Critical Failure setting up receive buffers\n");
3425 error = ENOMEM;
3426 goto err_rx_desc;
3427 }
3428 }
3429
3430 /*
3431 ** Finally set up the queue holding structs
3432 */
3433 for (int i = 0; i < adapter->num_queues; i++) {
3434 que = &adapter->queues[i];
3435 que->adapter = adapter;
3436 que->txr = &adapter->tx_rings[i];
3437 que->rxr = &adapter->rx_rings[i];
3438 }
3439
3440 return (0);
3441
3442err_rx_desc:
3443 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3444 igb_dma_free(adapter, &rxr->rxdma);
3445err_tx_desc:
3446 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3447 igb_dma_free(adapter, &txr->txdma);
3448 free(adapter->rx_rings, M_DEVBUF);
3449rx_fail:
3450#ifndef IGB_LEGACY_TX
3451 buf_ring_free(txr->br, M_DEVBUF);
3452#endif
3453 free(adapter->tx_rings, M_DEVBUF);
3454tx_fail:
3455 free(adapter->queues, M_DEVBUF);
3456fail:
3457 return (error);
3458}
3459
3460/*********************************************************************
3461 *
3462 * Allocate memory for tx_buffer structures. The tx_buffer stores all
3463 * the information needed to transmit a packet on the wire. This is
3464 * called only once at attach, setup is done every reset.
3465 *
3466 **********************************************************************/
3467static int
3468igb_allocate_transmit_buffers(struct tx_ring *txr)
3469{
3470 struct adapter *adapter = txr->adapter;
3471 device_t dev = adapter->dev;
3472 struct igb_tx_buf *txbuf;
3473 int error, i;
3474
3475 /*
3476 * Setup DMA descriptor areas.
3477 */
3478 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3479 1, 0, /* alignment, bounds */
3480 BUS_SPACE_MAXADDR, /* lowaddr */
3481 BUS_SPACE_MAXADDR, /* highaddr */
3482 NULL, NULL, /* filter, filterarg */
3483 IGB_TSO_SIZE, /* maxsize */
3484 IGB_MAX_SCATTER, /* nsegments */
3485 PAGE_SIZE, /* maxsegsize */
3486 0, /* flags */
3487 NULL, /* lockfunc */
3488 NULL, /* lockfuncarg */
3489 &txr->txtag))) {
3490 device_printf(dev,"Unable to allocate TX DMA tag\n");
3491 goto fail;
3492 }
3493
3494 if (!(txr->tx_buffers =
3495 (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) *
3496 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3497 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3498 error = ENOMEM;
3499 goto fail;
3500 }
3501
3502 /* Create the descriptor buffer dma maps */
3503 txbuf = txr->tx_buffers;
3504 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3505 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3506 if (error != 0) {
3507 device_printf(dev, "Unable to create TX DMA map\n");
3508 goto fail;
3509 }
3510 }
3511
3512 return 0;
3513fail:
3514 /* We free all, it handles case where we are in the middle */
3515 igb_free_transmit_structures(adapter);
3516 return (error);
3517}
3518
3519/*********************************************************************
3520 *
3521 * Initialize a transmit ring.
3522 *
3523 **********************************************************************/
3524static void
3525igb_setup_transmit_ring(struct tx_ring *txr)
3526{
3527 struct adapter *adapter = txr->adapter;
3528 struct igb_tx_buf *txbuf;
3529 int i;
3530#ifdef DEV_NETMAP
3531 struct netmap_adapter *na = NA(adapter->ifp);
3532 struct netmap_slot *slot;
3533#endif /* DEV_NETMAP */
3534
3535 /* Clear the old descriptor contents */
3536 IGB_TX_LOCK(txr);
3537#ifdef DEV_NETMAP
3538 slot = netmap_reset(na, NR_TX, txr->me, 0);
3539#endif /* DEV_NETMAP */
3540 bzero((void *)txr->tx_base,
3541 (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3542 /* Reset indices */
3543 txr->next_avail_desc = 0;
3544 txr->next_to_clean = 0;
3545
3546 /* Free any existing tx buffers. */
3547 txbuf = txr->tx_buffers;
3548 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3549 if (txbuf->m_head != NULL) {
3550 bus_dmamap_sync(txr->txtag, txbuf->map,
3551 BUS_DMASYNC_POSTWRITE);
3552 bus_dmamap_unload(txr->txtag, txbuf->map);
3553 m_freem(txbuf->m_head);
3554 txbuf->m_head = NULL;
3555 }
3556#ifdef DEV_NETMAP
3557 if (slot) {
3558 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3559 /* no need to set the address */
3560 netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
3561 }
3562#endif /* DEV_NETMAP */
3563 /* clear the watch index */
3564 txbuf->eop = NULL;
3565 }
3566
3567 /* Set number of descriptors available */
3568 txr->tx_avail = adapter->num_tx_desc;
3569
3570 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3571 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3572 IGB_TX_UNLOCK(txr);
3573}
3574
3575/*********************************************************************
3576 *
3577 * Initialize all transmit rings.
3578 *
3579 **********************************************************************/
3580static void
3581igb_setup_transmit_structures(struct adapter *adapter)
3582{
3583 struct tx_ring *txr = adapter->tx_rings;
3584
3585 for (int i = 0; i < adapter->num_queues; i++, txr++)
3586 igb_setup_transmit_ring(txr);
3587
3588 return;
3589}
3590
3591/*********************************************************************
3592 *
3593 * Enable transmit unit.
3594 *
3595 **********************************************************************/
3596static void
3597igb_initialize_transmit_units(struct adapter *adapter)
3598{
3599 struct tx_ring *txr = adapter->tx_rings;
3600 struct e1000_hw *hw = &adapter->hw;
3601 u32 tctl, txdctl;
3602
3603 INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3604 tctl = txdctl = 0;
3605
3606 /* Setup the Tx Descriptor Rings */
3607 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3608 u64 bus_addr = txr->txdma.dma_paddr;
3609
3610 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3611 adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3612 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3613 (uint32_t)(bus_addr >> 32));
3614 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3615 (uint32_t)bus_addr);
3616
3617 /* Setup the HW Tx Head and Tail descriptor pointers */
3618 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3619 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3620
3621 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3622 E1000_READ_REG(hw, E1000_TDBAL(i)),
3623 E1000_READ_REG(hw, E1000_TDLEN(i)));
3624
3625 txr->queue_status = IGB_QUEUE_IDLE;
3626
3627 txdctl |= IGB_TX_PTHRESH;
3628 txdctl |= IGB_TX_HTHRESH << 8;
3629 txdctl |= IGB_TX_WTHRESH << 16;
3630 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3631 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3632 }
3633
3634 if (adapter->vf_ifp)
3635 return;
3636
3637 e1000_config_collision_dist(hw);
3638
3639 /* Program the Transmit Control Register */
3640 tctl = E1000_READ_REG(hw, E1000_TCTL);
3641 tctl &= ~E1000_TCTL_CT;
3642 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3643 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3644
3645 /* This write will effectively turn on the transmit unit. */
3646 E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3647}
3648
3649/*********************************************************************
3650 *
3651 * Free all transmit rings.
3652 *
3653 **********************************************************************/
3654static void
3655igb_free_transmit_structures(struct adapter *adapter)
3656{
3657 struct tx_ring *txr = adapter->tx_rings;
3658
3659 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3660 IGB_TX_LOCK(txr);
3661 igb_free_transmit_buffers(txr);
3662 igb_dma_free(adapter, &txr->txdma);
3663 IGB_TX_UNLOCK(txr);
3664 IGB_TX_LOCK_DESTROY(txr);
3665 }
3666 free(adapter->tx_rings, M_DEVBUF);
3667}
3668
3669/*********************************************************************
3670 *
3671 * Free transmit ring related data structures.
3672 *
3673 **********************************************************************/
3674static void
3675igb_free_transmit_buffers(struct tx_ring *txr)
3676{
3677 struct adapter *adapter = txr->adapter;
3678 struct igb_tx_buf *tx_buffer;
3679 int i;
3680
3681 INIT_DEBUGOUT("free_transmit_ring: begin");
3682
3683 if (txr->tx_buffers == NULL)
3684 return;
3685
3686 tx_buffer = txr->tx_buffers;
3687 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3688 if (tx_buffer->m_head != NULL) {
3689 bus_dmamap_sync(txr->txtag, tx_buffer->map,
3690 BUS_DMASYNC_POSTWRITE);
3691 bus_dmamap_unload(txr->txtag,
3692 tx_buffer->map);
3693 m_freem(tx_buffer->m_head);
3694 tx_buffer->m_head = NULL;
3695 if (tx_buffer->map != NULL) {
3696 bus_dmamap_destroy(txr->txtag,
3697 tx_buffer->map);
3698 tx_buffer->map = NULL;
3699 }
3700 } else if (tx_buffer->map != NULL) {
3701 bus_dmamap_unload(txr->txtag,
3702 tx_buffer->map);
3703 bus_dmamap_destroy(txr->txtag,
3704 tx_buffer->map);
3705 tx_buffer->map = NULL;
3706 }
3707 }
3708#ifndef IGB_LEGACY_TX
3709 if (txr->br != NULL)
3710 buf_ring_free(txr->br, M_DEVBUF);
3711#endif
3712 if (txr->tx_buffers != NULL) {
3713 free(txr->tx_buffers, M_DEVBUF);
3714 txr->tx_buffers = NULL;
3715 }
3716 if (txr->txtag != NULL) {
3717 bus_dma_tag_destroy(txr->txtag);
3718 txr->txtag = NULL;
3719 }
3720 return;
3721}
3722
3723/**********************************************************************
3724 *
3725 * Setup work for hardware segmentation offload (TSO) on
3726 * adapters using advanced tx descriptors
3727 *
3728 **********************************************************************/
3729static int
3730igb_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3731 u32 *cmd_type_len, u32 *olinfo_status)
3732{
3733 struct adapter *adapter = txr->adapter;
3734 struct e1000_adv_tx_context_desc *TXD;
3735 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3736 u32 mss_l4len_idx = 0, paylen;
3737 u16 vtag = 0, eh_type;
3738 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3739 struct ether_vlan_header *eh;
3740#ifdef INET6
3741 struct ip6_hdr *ip6;
3742#endif
3743#ifdef INET
3744 struct ip *ip;
3745#endif
3746 struct tcphdr *th;
3747
3748
3749 /*
3750 * Determine where frame payload starts.
3751 * Jump over vlan headers if already present
3752 */
3753 eh = mtod(mp, struct ether_vlan_header *);
3754 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3755 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3756 eh_type = eh->evl_proto;
3757 } else {
3758 ehdrlen = ETHER_HDR_LEN;
3759 eh_type = eh->evl_encap_proto;
3760 }
3761
3762 switch (ntohs(eh_type)) {
3763#ifdef INET6
3764 case ETHERTYPE_IPV6:
3765 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3766 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3767 if (ip6->ip6_nxt != IPPROTO_TCP)
3768 return (ENXIO);
3769 ip_hlen = sizeof(struct ip6_hdr);
3770 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3771 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3772 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3773 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3774 break;
3775#endif
3776#ifdef INET
3777 case ETHERTYPE_IP:
3778 ip = (struct ip *)(mp->m_data + ehdrlen);
3779 if (ip->ip_p != IPPROTO_TCP)
3780 return (ENXIO);
3781 ip->ip_sum = 0;
3782 ip_hlen = ip->ip_hl << 2;
3783 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3784 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3785 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3786 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3787 /* Tell transmit desc to also do IPv4 checksum. */
3788 *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3789 break;
3790#endif
3791 default:
3792 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3793 __func__, ntohs(eh_type));
3794 break;
3795 }
3796
3797 ctxd = txr->next_avail_desc;
3798 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3799
3800 tcp_hlen = th->th_off << 2;
3801
3802 /* This is used in the transmit desc in encap */
3803 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3804
3805 /* VLAN MACLEN IPLEN */
3806 if (mp->m_flags & M_VLANTAG) {
3807 vtag = htole16(mp->m_pkthdr.ether_vtag);
3808 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3809 }
3810
3811 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3812 vlan_macip_lens |= ip_hlen;
3813 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3814
3815 /* ADV DTYPE TUCMD */
3816 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3817 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3818 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3819
3820 /* MSS L4LEN IDX */
3821 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3822 mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3823 /* 82575 needs the queue index added */
3824 if (adapter->hw.mac.type == e1000_82575)
3825 mss_l4len_idx |= txr->me << 4;
3826 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3827
3828 TXD->seqnum_seed = htole32(0);
3829
3830 if (++ctxd == txr->num_desc)
3831 ctxd = 0;
3832
3833 txr->tx_avail--;
3834 txr->next_avail_desc = ctxd;
3835 *cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3836 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3837 *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
3838 ++txr->tso_tx;
3839 return (0);
3840}
3841
3842/*********************************************************************
3843 *
3844 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
3845 *
3846 **********************************************************************/
3847
3848static int
3849igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3850 u32 *cmd_type_len, u32 *olinfo_status)
3851{
3852 struct e1000_adv_tx_context_desc *TXD;
3853 struct adapter *adapter = txr->adapter;
3854 struct ether_vlan_header *eh;
3855 struct ip *ip;
3856 struct ip6_hdr *ip6;
3857 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
3858 int ehdrlen, ip_hlen = 0;
3859 u16 etype;
3860 u8 ipproto = 0;
3861 int offload = TRUE;
3862 int ctxd = txr->next_avail_desc;
3863 u16 vtag = 0;
3864
3865 /* First check if TSO is to be used */
3866 if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3867 return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3868
3869 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3870 offload = FALSE;
3871
3872 /* Indicate the whole packet as payload when not doing TSO */
3873 *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
3874
3875 /* Now ready a context descriptor */
3876 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3877
3878 /*
3879 ** In advanced descriptors the vlan tag must
3880 ** be placed into the context descriptor. Hence
3881 ** we need to make one even if not doing offloads.
3882 */
3883 if (mp->m_flags & M_VLANTAG) {
3884 vtag = htole16(mp->m_pkthdr.ether_vtag);
3885 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3886 } else if (offload == FALSE) /* ... no offload to do */
3887 return (0);
3888
3889 /*
3890 * Determine where frame payload starts.
3891 * Jump over vlan headers if already present,
3892 * helpful for QinQ too.
3893 */
3894 eh = mtod(mp, struct ether_vlan_header *);
3895 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3896 etype = ntohs(eh->evl_proto);
3897 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3898 } else {
3899 etype = ntohs(eh->evl_encap_proto);
3900 ehdrlen = ETHER_HDR_LEN;
3901 }
3902
3903 /* Set the ether header length */
3904 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3905
3906 switch (etype) {
3907 case ETHERTYPE_IP:
3908 ip = (struct ip *)(mp->m_data + ehdrlen);
3909 ip_hlen = ip->ip_hl << 2;
3910 ipproto = ip->ip_p;
3911 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3912 break;
3913 case ETHERTYPE_IPV6:
3914 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3915 ip_hlen = sizeof(struct ip6_hdr);
3916 /* XXX-BZ this will go badly in case of ext hdrs. */
3917 ipproto = ip6->ip6_nxt;
3918 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3919 break;
3920 default:
3921 offload = FALSE;
3922 break;
3923 }
3924
3925 vlan_macip_lens |= ip_hlen;
3926 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3927
3928 switch (ipproto) {
3929 case IPPROTO_TCP:
3930 if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3931 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3932 break;
3933 case IPPROTO_UDP:
3934 if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3935 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3936 break;
3937
3938#if __FreeBSD_version >= 800000
3939 case IPPROTO_SCTP:
3940 if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3941 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3942 break;
3943#endif
3944 default:
3945 offload = FALSE;
3946 break;
3947 }
3948
3949 if (offload) /* For the TX descriptor setup */
3950 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3951
3952 /* 82575 needs the queue index added */
3953 if (adapter->hw.mac.type == e1000_82575)
3954 mss_l4len_idx = txr->me << 4;
3955
3956 /* Now copy bits into descriptor */
3957 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3958 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3959 TXD->seqnum_seed = htole32(0);
3960 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3961
3962 /* We've consumed the first desc, adjust counters */
3963 if (++ctxd == txr->num_desc)
3964 ctxd = 0;
3965 txr->next_avail_desc = ctxd;
3966 --txr->tx_avail;
3967
3968 return (0);
3969}
3970
3971/**********************************************************************
3972 *
3973 * Examine each tx_buffer in the used queue. If the hardware is done
3974 * processing the packet then free associated resources. The
3975 * tx_buffer is put back on the free queue.
3976 *
3977 * TRUE return means there's work in the ring to clean, FALSE its empty.
3978 **********************************************************************/
3979static bool
3980igb_txeof(struct tx_ring *txr)
3981{
3982 struct adapter *adapter = txr->adapter;
3983#ifdef DEV_NETMAP
3984 struct ifnet *ifp = adapter->ifp;
3985#endif /* DEV_NETMAP */
3986 u32 work, processed = 0;
3987 int limit = adapter->tx_process_limit;
3988 struct igb_tx_buf *buf;
3989 union e1000_adv_tx_desc *txd;
3990
3991 mtx_assert(&txr->tx_mtx, MA_OWNED);
3992
3993#ifdef DEV_NETMAP
3994 if (netmap_tx_irq(ifp, txr->me))
3995 return (FALSE);
3996#endif /* DEV_NETMAP */
3997
3998 if (txr->tx_avail == txr->num_desc) {
3999 txr->queue_status = IGB_QUEUE_IDLE;
4000 return FALSE;
4001 }
4002
4003 /* Get work starting point */
4004 work = txr->next_to_clean;
4005 buf = &txr->tx_buffers[work];
4006 txd = &txr->tx_base[work];
4007 work -= txr->num_desc; /* The distance to ring end */
4008 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4009 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4010 do {
4011 union e1000_adv_tx_desc *eop = buf->eop;
4012 if (eop == NULL) /* No work */
4013 break;
4014
4015 if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
4016 break; /* I/O not complete */
4017
4018 if (buf->m_head) {
4019 txr->bytes +=
4020 buf->m_head->m_pkthdr.len;
4021 bus_dmamap_sync(txr->txtag,
4022 buf->map,
4023 BUS_DMASYNC_POSTWRITE);
4024 bus_dmamap_unload(txr->txtag,
4025 buf->map);
4026 m_freem(buf->m_head);
4027 buf->m_head = NULL;
4028 }
4029 buf->eop = NULL;
4030 ++txr->tx_avail;
4031
4032 /* We clean the range if multi segment */
4033 while (txd != eop) {
4034 ++txd;
4035 ++buf;
4036 ++work;
4037 /* wrap the ring? */
4038 if (__predict_false(!work)) {
4039 work -= txr->num_desc;
4040 buf = txr->tx_buffers;
4041 txd = txr->tx_base;
4042 }
4043 if (buf->m_head) {
4044 txr->bytes +=
4045 buf->m_head->m_pkthdr.len;
4046 bus_dmamap_sync(txr->txtag,
4047 buf->map,
4048 BUS_DMASYNC_POSTWRITE);
4049 bus_dmamap_unload(txr->txtag,
4050 buf->map);
4051 m_freem(buf->m_head);
4052 buf->m_head = NULL;
4053 }
4054 ++txr->tx_avail;
4055 buf->eop = NULL;
4056
4057 }
4058 ++txr->packets;
4059 ++processed;
4060 txr->watchdog_time = ticks;
4061
4062 /* Try the next packet */
4063 ++txd;
4064 ++buf;
4065 ++work;
4066 /* reset with a wrap */
4067 if (__predict_false(!work)) {
4068 work -= txr->num_desc;
4069 buf = txr->tx_buffers;
4070 txd = txr->tx_base;
4071 }
4072 prefetch(txd);
4073 } while (__predict_true(--limit));
4074
4075 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4076 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4077
4078 work += txr->num_desc;
4079 txr->next_to_clean = work;
4080
4081 /*
4082 ** Watchdog calculation, we know there's
4083 ** work outstanding or the first return
4084 ** would have been taken, so none processed
4085 ** for too long indicates a hang.
4086 */
4087 if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
4088 txr->queue_status |= IGB_QUEUE_HUNG;
4089
4090 if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
4091 txr->queue_status &= ~IGB_QUEUE_DEPLETED;
4092
4093 if (txr->tx_avail == txr->num_desc) {
4094 txr->queue_status = IGB_QUEUE_IDLE;
4095 return (FALSE);
4096 }
4097
4098 return (TRUE);
4099}
4100
4101/*********************************************************************
4102 *
4103 * Refresh mbuf buffers for RX descriptor rings
4104 * - now keeps its own state so discards due to resource
4105 * exhaustion are unnecessary, if an mbuf cannot be obtained
4106 * it just returns, keeping its placeholder, thus it can simply
4107 * be recalled to try again.
4108 *
4109 **********************************************************************/
4110static void
4111igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4112{
4113 struct adapter *adapter = rxr->adapter;
4114 bus_dma_segment_t hseg[1];
4115 bus_dma_segment_t pseg[1];
4116 struct igb_rx_buf *rxbuf;
4117 struct mbuf *mh, *mp;
4118 int i, j, nsegs, error;
4119 bool refreshed = FALSE;
4120
4121 i = j = rxr->next_to_refresh;
4122 /*
4123 ** Get one descriptor beyond
4124 ** our work mark to control
4125 ** the loop.
4126 */
4127 if (++j == adapter->num_rx_desc)
4128 j = 0;
4129
4130 while (j != limit) {
4131 rxbuf = &rxr->rx_buffers[i];
4132 /* No hdr mbuf used with header split off */
4133 if (rxr->hdr_split == FALSE)
4134 goto no_split;
4135 if (rxbuf->m_head == NULL) {
4136 mh = m_gethdr(M_NOWAIT, MT_DATA);
4137 if (mh == NULL)
4138 goto update;
4139 } else
4140 mh = rxbuf->m_head;
4141
4142 mh->m_pkthdr.len = mh->m_len = MHLEN;
4143 mh->m_len = MHLEN;
4144 mh->m_flags |= M_PKTHDR;
4145 /* Get the memory mapping */
4146 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4147 rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4148 if (error != 0) {
4149 printf("Refresh mbufs: hdr dmamap load"
4150 " failure - %d\n", error);
4151 m_free(mh);
4152 rxbuf->m_head = NULL;
4153 goto update;
4154 }
4155 rxbuf->m_head = mh;
4156 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4157 BUS_DMASYNC_PREREAD);
4158 rxr->rx_base[i].read.hdr_addr =
4159 htole64(hseg[0].ds_addr);
4160no_split:
4161 if (rxbuf->m_pack == NULL) {
4162 mp = m_getjcl(M_NOWAIT, MT_DATA,
4163 M_PKTHDR, adapter->rx_mbuf_sz);
4164 if (mp == NULL)
4165 goto update;
4166 } else
4167 mp = rxbuf->m_pack;
4168
4169 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4170 /* Get the memory mapping */
4171 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4172 rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4173 if (error != 0) {
4174 printf("Refresh mbufs: payload dmamap load"
4175 " failure - %d\n", error);
4176 m_free(mp);
4177 rxbuf->m_pack = NULL;
4178 goto update;
4179 }
4180 rxbuf->m_pack = mp;
4181 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4182 BUS_DMASYNC_PREREAD);
4183 rxr->rx_base[i].read.pkt_addr =
4184 htole64(pseg[0].ds_addr);
4185 refreshed = TRUE; /* I feel wefreshed :) */
4186
4187 i = j; /* our next is precalculated */
4188 rxr->next_to_refresh = i;
4189 if (++j == adapter->num_rx_desc)
4190 j = 0;
4191 }
4192update:
4193 if (refreshed) /* update tail */
4194 E1000_WRITE_REG(&adapter->hw,
4195 E1000_RDT(rxr->me), rxr->next_to_refresh);
4196 return;
4197}
4198
4199
4200/*********************************************************************
4201 *
4202 * Allocate memory for rx_buffer structures. Since we use one
4203 * rx_buffer per received packet, the maximum number of rx_buffer's
4204 * that we'll need is equal to the number of receive descriptors
4205 * that we've allocated.
4206 *
4207 **********************************************************************/
4208static int
4209igb_allocate_receive_buffers(struct rx_ring *rxr)
4210{
4211 struct adapter *adapter = rxr->adapter;
4212 device_t dev = adapter->dev;
4213 struct igb_rx_buf *rxbuf;
4214 int i, bsize, error;
4215
4216 bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4217 if (!(rxr->rx_buffers =
4218 (struct igb_rx_buf *) malloc(bsize,
4219 M_DEVBUF, M_NOWAIT | M_ZERO))) {
4220 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4221 error = ENOMEM;
4222 goto fail;
4223 }
4224
4225 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4226 1, 0, /* alignment, bounds */
4227 BUS_SPACE_MAXADDR, /* lowaddr */
4228 BUS_SPACE_MAXADDR, /* highaddr */
4229 NULL, NULL, /* filter, filterarg */
4230 MSIZE, /* maxsize */
4231 1, /* nsegments */
4232 MSIZE, /* maxsegsize */
4233 0, /* flags */
4234 NULL, /* lockfunc */
4235 NULL, /* lockfuncarg */
4236 &rxr->htag))) {
4237 device_printf(dev, "Unable to create RX DMA tag\n");
4238 goto fail;
4239 }
4240
4241 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4242 1, 0, /* alignment, bounds */
4243 BUS_SPACE_MAXADDR, /* lowaddr */
4244 BUS_SPACE_MAXADDR, /* highaddr */
4245 NULL, NULL, /* filter, filterarg */
4246 MJUM9BYTES, /* maxsize */
4247 1, /* nsegments */
4248 MJUM9BYTES, /* maxsegsize */
4249 0, /* flags */
4250 NULL, /* lockfunc */
4251 NULL, /* lockfuncarg */
4252 &rxr->ptag))) {
4253 device_printf(dev, "Unable to create RX payload DMA tag\n");
4254 goto fail;
4255 }
4256
4257 for (i = 0; i < adapter->num_rx_desc; i++) {
4258 rxbuf = &rxr->rx_buffers[i];
4259 error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap);
4260 if (error) {
4261 device_printf(dev,
4262 "Unable to create RX head DMA maps\n");
4263 goto fail;
4264 }
4265 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
4266 if (error) {
4267 device_printf(dev,
4268 "Unable to create RX packet DMA maps\n");
4269 goto fail;
4270 }
4271 }
4272
4273 return (0);
4274
4275fail:
4276 /* Frees all, but can handle partial completion */
4277 igb_free_receive_structures(adapter);
4278 return (error);
4279}
4280
4281
4282static void
4283igb_free_receive_ring(struct rx_ring *rxr)
4284{
4285 struct adapter *adapter = rxr->adapter;
4286 struct igb_rx_buf *rxbuf;
4287
4288
4289 for (int i = 0; i < adapter->num_rx_desc; i++) {
4290 rxbuf = &rxr->rx_buffers[i];
4291 if (rxbuf->m_head != NULL) {
4292 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4293 BUS_DMASYNC_POSTREAD);
4294 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4295 rxbuf->m_head->m_flags |= M_PKTHDR;
4296 m_freem(rxbuf->m_head);
4297 }
4298 if (rxbuf->m_pack != NULL) {
4299 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4300 BUS_DMASYNC_POSTREAD);
4301 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4302 rxbuf->m_pack->m_flags |= M_PKTHDR;
4303 m_freem(rxbuf->m_pack);
4304 }
4305 rxbuf->m_head = NULL;
4306 rxbuf->m_pack = NULL;
4307 }
4308}
4309
4310
4311/*********************************************************************
4312 *
4313 * Initialize a receive ring and its buffers.
4314 *
4315 **********************************************************************/
4316static int
4317igb_setup_receive_ring(struct rx_ring *rxr)
4318{
4319 struct adapter *adapter;
4320 struct ifnet *ifp;
4321 device_t dev;
4322 struct igb_rx_buf *rxbuf;
4323 bus_dma_segment_t pseg[1], hseg[1];
4324 struct lro_ctrl *lro = &rxr->lro;
4325 int rsize, nsegs, error = 0;
4326#ifdef DEV_NETMAP
4327 struct netmap_adapter *na = NA(rxr->adapter->ifp);
4328 struct netmap_slot *slot;
4329#endif /* DEV_NETMAP */
4330
4331 adapter = rxr->adapter;
4332 dev = adapter->dev;
4333 ifp = adapter->ifp;
4334
4335 /* Clear the ring contents */
4336 IGB_RX_LOCK(rxr);
4337#ifdef DEV_NETMAP
4338 slot = netmap_reset(na, NR_RX, rxr->me, 0);
4339#endif /* DEV_NETMAP */
4340 rsize = roundup2(adapter->num_rx_desc *
4341 sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4342 bzero((void *)rxr->rx_base, rsize);
4343
4344 /*
4345 ** Free current RX buffer structures and their mbufs
4346 */
4347 igb_free_receive_ring(rxr);
4348
4349 /* Configure for header split? */
4350 if (igb_header_split)
4351 rxr->hdr_split = TRUE;
4352
4353 /* Now replenish the ring mbufs */
4354 for (int j = 0; j < adapter->num_rx_desc; ++j) {
4355 struct mbuf *mh, *mp;
4356
4357 rxbuf = &rxr->rx_buffers[j];
4358#ifdef DEV_NETMAP
4359 if (slot) {
4360 /* slot sj is mapped to the j-th NIC-ring entry */
4361 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4362 uint64_t paddr;
4363 void *addr;
4364
4365 addr = PNMB(na, slot + sj, &paddr);
4366 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
4367 /* Update descriptor */
4368 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4369 continue;
4370 }
4371#endif /* DEV_NETMAP */
4372 if (rxr->hdr_split == FALSE)
4373 goto skip_head;
4374
4375 /* First the header */
4376 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4377 if (rxbuf->m_head == NULL) {
4378 error = ENOBUFS;
4379 goto fail;
4380 }
4381 m_adj(rxbuf->m_head, ETHER_ALIGN);
4382 mh = rxbuf->m_head;
4383 mh->m_len = mh->m_pkthdr.len = MHLEN;
4384 mh->m_flags |= M_PKTHDR;
4385 /* Get the memory mapping */
4386 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4387 rxbuf->hmap, rxbuf->m_head, hseg,
4388 &nsegs, BUS_DMA_NOWAIT);
4389 if (error != 0) /* Nothing elegant to do here */
4390 goto fail;
4391 bus_dmamap_sync(rxr->htag,
4392 rxbuf->hmap, BUS_DMASYNC_PREREAD);
4393 /* Update descriptor */
4394 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4395
4396skip_head:
4397 /* Now the payload cluster */
4398 rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4399 M_PKTHDR, adapter->rx_mbuf_sz);
4400 if (rxbuf->m_pack == NULL) {
4401 error = ENOBUFS;
4402 goto fail;
4403 }
4404 mp = rxbuf->m_pack;
4405 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4406 /* Get the memory mapping */
4407 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4408 rxbuf->pmap, mp, pseg,
4409 &nsegs, BUS_DMA_NOWAIT);
4410 if (error != 0)
4411 goto fail;
4412 bus_dmamap_sync(rxr->ptag,
4413 rxbuf->pmap, BUS_DMASYNC_PREREAD);
4414 /* Update descriptor */
4415 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4416 }
4417
4418 /* Setup our descriptor indices */
4419 rxr->next_to_check = 0;
4420 rxr->next_to_refresh = adapter->num_rx_desc - 1;
4421 rxr->lro_enabled = FALSE;
4422 rxr->rx_split_packets = 0;
4423 rxr->rx_bytes = 0;
4424
4425 rxr->fmp = NULL;
4426 rxr->lmp = NULL;
4427
4428 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4429 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4430
4431 /*
4432 ** Now set up the LRO interface, we
4433 ** also only do head split when LRO
4434 ** is enabled, since so often they
4435 ** are undesireable in similar setups.
4436 */
4437 if (ifp->if_capenable & IFCAP_LRO) {
4438 error = tcp_lro_init(lro);
4439 if (error) {
4440 device_printf(dev, "LRO Initialization failed!\n");
4441 goto fail;
4442 }
4443 INIT_DEBUGOUT("RX LRO Initialized\n");
4444 rxr->lro_enabled = TRUE;
4445 lro->ifp = adapter->ifp;
4446 }
4447
4448 IGB_RX_UNLOCK(rxr);
4449 return (0);
4450
4451fail:
4452 igb_free_receive_ring(rxr);
4453 IGB_RX_UNLOCK(rxr);
4454 return (error);
4455}
4456
4457
4458/*********************************************************************
4459 *
4460 * Initialize all receive rings.
4461 *
4462 **********************************************************************/
4463static int
4464igb_setup_receive_structures(struct adapter *adapter)
4465{
4466 struct rx_ring *rxr = adapter->rx_rings;
4467 int i;
4468
4469 for (i = 0; i < adapter->num_queues; i++, rxr++)
4470 if (igb_setup_receive_ring(rxr))
4471 goto fail;
4472
4473 return (0);
4474fail:
4475 /*
4476 * Free RX buffers allocated so far, we will only handle
4477 * the rings that completed, the failing case will have
4478 * cleaned up for itself. 'i' is the endpoint.
4479 */
4480 for (int j = 0; j < i; ++j) {
4481 rxr = &adapter->rx_rings[j];
4482 IGB_RX_LOCK(rxr);
4483 igb_free_receive_ring(rxr);
4484 IGB_RX_UNLOCK(rxr);
4485 }
4486
4487 return (ENOBUFS);
4488}
4489
4490/*
4491 * Initialise the RSS mapping for NICs that support multiple transmit/
4492 * receive rings.
4493 */
4494static void
4495igb_initialise_rss_mapping(struct adapter *adapter)
4496{
4497 struct e1000_hw *hw = &adapter->hw;
4498 int i;
4499 int queue_id;
4500 u32 reta;
4501 u32 rss_key[10], mrqc, shift = 0;
4502
4503 /* XXX? */
4504 if (adapter->hw.mac.type == e1000_82575)
4505 shift = 6;
4506
4507 /*
4508 * The redirection table controls which destination
4509 * queue each bucket redirects traffic to.
4510 * Each DWORD represents four queues, with the LSB
4511 * being the first queue in the DWORD.
4512 *
4513 * This just allocates buckets to queues using round-robin
4514 * allocation.
4515 *
4516 * NOTE: It Just Happens to line up with the default
4517 * RSS allocation method.
4518 */
4519
4520 /* Warning FM follows */
4521 reta = 0;
4522 for (i = 0; i < 128; i++) {
4523#ifdef RSS
4524 queue_id = rss_get_indirection_to_bucket(i);
4525 /*
4526 * If we have more queues than buckets, we'll
4527 * end up mapping buckets to a subset of the
4528 * queues.
4529 *
4530 * If we have more buckets than queues, we'll
4531 * end up instead assigning multiple buckets
4532 * to queues.
4533 *
4534 * Both are suboptimal, but we need to handle
4535 * the case so we don't go out of bounds
4536 * indexing arrays and such.
4537 */
4538 queue_id = queue_id % adapter->num_queues;
4539#else
4540 queue_id = (i % adapter->num_queues);
4541#endif
4542 /* Adjust if required */
4543 queue_id = queue_id << shift;
4544
4545 /*
4546 * The low 8 bits are for hash value (n+0);
4547 * The next 8 bits are for hash value (n+1), etc.
4548 */
4549 reta = reta >> 8;
4550 reta = reta | ( ((uint32_t) queue_id) << 24);
4551 if ((i & 3) == 3) {
4552 E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta);
4553 reta = 0;
4554 }
4555 }
4556
4557 /* Now fill in hash table */
4558
4559 /*
4560 * MRQC: Multiple Receive Queues Command
4561 * Set queuing to RSS control, number depends on the device.
4562 */
4563 mrqc = E1000_MRQC_ENABLE_RSS_8Q;
4564
4565#ifdef RSS
4566 /* XXX ew typecasting */
4567 rss_getkey((uint8_t *) &rss_key);
4568#else
4569 arc4rand(&rss_key, sizeof(rss_key), 0);
4570#endif
4571 for (i = 0; i < 10; i++)
4572 E1000_WRITE_REG_ARRAY(hw,
4573 E1000_RSSRK(0), i, rss_key[i]);
4574
4575 /*
4576 * Configure the RSS fields to hash upon.
4577 */
4578 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4579 E1000_MRQC_RSS_FIELD_IPV4_TCP);
4580 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4581 E1000_MRQC_RSS_FIELD_IPV6_TCP);
4582 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4583 E1000_MRQC_RSS_FIELD_IPV6_UDP);
4584 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4585 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4586
4587 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4588}
4589
4590/*********************************************************************
4591 *
4592 * Enable receive unit.
4593 *
4594 **********************************************************************/
4595static void
4596igb_initialize_receive_units(struct adapter *adapter)
4597{
4598 struct rx_ring *rxr = adapter->rx_rings;
4599 struct ifnet *ifp = adapter->ifp;
4600 struct e1000_hw *hw = &adapter->hw;
4601 u32 rctl, rxcsum, psize, srrctl = 0;
4602
4603 INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4604
4605 /*
4606 * Make sure receives are disabled while setting
4607 * up the descriptor ring
4608 */
4609 rctl = E1000_READ_REG(hw, E1000_RCTL);
4610 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4611
4612 /*
4613 ** Set up for header split
4614 */
4615 if (igb_header_split) {
4616 /* Use a standard mbuf for the header */
4617 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4618 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4619 } else
4620 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4621
4622 /*
4623 ** Set up for jumbo frames
4624 */
4625 if (ifp->if_mtu > ETHERMTU) {
4626 rctl |= E1000_RCTL_LPE;
4627 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4628 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4629 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4630 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4631 srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4632 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4633 }
4634 /* Set maximum packet len */
4635 psize = adapter->max_frame_size;
4636 /* are we on a vlan? */
4637 if (adapter->ifp->if_vlantrunk != NULL)
4638 psize += VLAN_TAG_SIZE;
4639 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4640 } else {
4641 rctl &= ~E1000_RCTL_LPE;
4642 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4643 rctl |= E1000_RCTL_SZ_2048;
4644 }
4645
4646 /*
4647 * If TX flow control is disabled and there's >1 queue defined,
4648 * enable DROP.
4649 *
4650 * This drops frames rather than hanging the RX MAC for all queues.
4651 */
4652 if ((adapter->num_queues > 1) &&
4653 (adapter->fc == e1000_fc_none ||
4654 adapter->fc == e1000_fc_rx_pause)) {
4655 srrctl |= E1000_SRRCTL_DROP_EN;
4656 }
4657
4658 /* Setup the Base and Length of the Rx Descriptor Rings */
4659 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4660 u64 bus_addr = rxr->rxdma.dma_paddr;
4661 u32 rxdctl;
4662
4663 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4664 adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4665 E1000_WRITE_REG(hw, E1000_RDBAH(i),
4666 (uint32_t)(bus_addr >> 32));
4667 E1000_WRITE_REG(hw, E1000_RDBAL(i),
4668 (uint32_t)bus_addr);
4669 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4670 /* Enable this Queue */
4671 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4672 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4673 rxdctl &= 0xFFF00000;
4674 rxdctl |= IGB_RX_PTHRESH;
4675 rxdctl |= IGB_RX_HTHRESH << 8;
4676 rxdctl |= IGB_RX_WTHRESH << 16;
4677 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4678 }
4679
4680 /*
4681 ** Setup for RX MultiQueue
4682 */
4683 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4684 if (adapter->num_queues >1) {
4685
4686 /* rss setup */
4687 igb_initialise_rss_mapping(adapter);
4688
4689 /*
4690 ** NOTE: Receive Full-Packet Checksum Offload
4691 ** is mutually exclusive with Multiqueue. However
4692 ** this is not the same as TCP/IP checksums which
4693 ** still work.
4694 */
4695 rxcsum |= E1000_RXCSUM_PCSD;
4696#if __FreeBSD_version >= 800000
4697 /* For SCTP Offload */
4698 if (((hw->mac.type == e1000_82576) ||
4699 (hw->mac.type == e1000_82580)) &&
4700 (ifp->if_capenable & IFCAP_RXCSUM))
4701 rxcsum |= E1000_RXCSUM_CRCOFL;
4702#endif
4703 } else {
4704 /* Non RSS setup */
4705 if (ifp->if_capenable & IFCAP_RXCSUM) {
4706 rxcsum |= E1000_RXCSUM_IPPCSE;
4707#if __FreeBSD_version >= 800000
4708 if ((adapter->hw.mac.type == e1000_82576) ||
4709 (adapter->hw.mac.type == e1000_82580))
4710 rxcsum |= E1000_RXCSUM_CRCOFL;
4711#endif
4712 } else
4713 rxcsum &= ~E1000_RXCSUM_TUOFL;
4714 }
4715 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4716
4717 /* Setup the Receive Control Register */
4718 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4719 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4720 E1000_RCTL_RDMTS_HALF |
4721 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4722 /* Strip CRC bytes. */
4723 rctl |= E1000_RCTL_SECRC;
4724 /* Make sure VLAN Filters are off */
4725 rctl &= ~E1000_RCTL_VFE;
4726 /* Don't store bad packets */
4727 rctl &= ~E1000_RCTL_SBP;
4728
4729 /* Enable Receives */
4730 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4731
4732 /*
4733 * Setup the HW Rx Head and Tail Descriptor Pointers
4734 * - needs to be after enable
4735 */
4736 for (int i = 0; i < adapter->num_queues; i++) {
4737 rxr = &adapter->rx_rings[i];
4738 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4739#ifdef DEV_NETMAP
4740 /*
4741 * an init() while a netmap client is active must
4742 * preserve the rx buffers passed to userspace.
4743 * In this driver it means we adjust RDT to
4744 * something different from next_to_refresh
4745 * (which is not used in netmap mode).
4746 */
4747 if (ifp->if_capenable & IFCAP_NETMAP) {
4748 struct netmap_adapter *na = NA(adapter->ifp);
4749 struct netmap_kring *kring = &na->rx_rings[i];
4750 int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
4751
4752 if (t >= adapter->num_rx_desc)
4753 t -= adapter->num_rx_desc;
4754 else if (t < 0)
4755 t += adapter->num_rx_desc;
4756 E1000_WRITE_REG(hw, E1000_RDT(i), t);
4757 } else
4758#endif /* DEV_NETMAP */
4759 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4760 }
4761 return;
4762}
4763
4764/*********************************************************************
4765 *
4766 * Free receive rings.
4767 *
4768 **********************************************************************/
4769static void
4770igb_free_receive_structures(struct adapter *adapter)
4771{
4772 struct rx_ring *rxr = adapter->rx_rings;
4773
4774 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4775 struct lro_ctrl *lro = &rxr->lro;
4776 igb_free_receive_buffers(rxr);
4777 tcp_lro_free(lro);
4778 igb_dma_free(adapter, &rxr->rxdma);
4779 }
4780
4781 free(adapter->rx_rings, M_DEVBUF);
4782}
4783
4784/*********************************************************************
4785 *
4786 * Free receive ring data structures.
4787 *
4788 **********************************************************************/
4789static void
4790igb_free_receive_buffers(struct rx_ring *rxr)
4791{
4792 struct adapter *adapter = rxr->adapter;
4793 struct igb_rx_buf *rxbuf;
4794 int i;
4795
4796 INIT_DEBUGOUT("free_receive_structures: begin");
4797
4798 /* Cleanup any existing buffers */
4799 if (rxr->rx_buffers != NULL) {
4800 for (i = 0; i < adapter->num_rx_desc; i++) {
4801 rxbuf = &rxr->rx_buffers[i];
4802 if (rxbuf->m_head != NULL) {
4803 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4804 BUS_DMASYNC_POSTREAD);
4805 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4806 rxbuf->m_head->m_flags |= M_PKTHDR;
4807 m_freem(rxbuf->m_head);
4808 }
4809 if (rxbuf->m_pack != NULL) {
4810 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4811 BUS_DMASYNC_POSTREAD);
4812 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4813 rxbuf->m_pack->m_flags |= M_PKTHDR;
4814 m_freem(rxbuf->m_pack);
4815 }
4816 rxbuf->m_head = NULL;
4817 rxbuf->m_pack = NULL;
4818 if (rxbuf->hmap != NULL) {
4819 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4820 rxbuf->hmap = NULL;
4821 }
4822 if (rxbuf->pmap != NULL) {
4823 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4824 rxbuf->pmap = NULL;
4825 }
4826 }
4827 if (rxr->rx_buffers != NULL) {
4828 free(rxr->rx_buffers, M_DEVBUF);
4829 rxr->rx_buffers = NULL;
4830 }
4831 }
4832
4833 if (rxr->htag != NULL) {
4834 bus_dma_tag_destroy(rxr->htag);
4835 rxr->htag = NULL;
4836 }
4837 if (rxr->ptag != NULL) {
4838 bus_dma_tag_destroy(rxr->ptag);
4839 rxr->ptag = NULL;
4840 }
4841}
4842
4843static __inline void
4844igb_rx_discard(struct rx_ring *rxr, int i)
4845{
4846 struct igb_rx_buf *rbuf;
4847
4848 rbuf = &rxr->rx_buffers[i];
4849
4850 /* Partially received? Free the chain */
4851 if (rxr->fmp != NULL) {
4852 rxr->fmp->m_flags |= M_PKTHDR;
4853 m_freem(rxr->fmp);
4854 rxr->fmp = NULL;
4855 rxr->lmp = NULL;
4856 }
4857
4858 /*
4859 ** With advanced descriptors the writeback
4860 ** clobbers the buffer addrs, so its easier
4861 ** to just free the existing mbufs and take
4862 ** the normal refresh path to get new buffers
4863 ** and mapping.
4864 */
4865 if (rbuf->m_head) {
4866 m_free(rbuf->m_head);
4867 rbuf->m_head = NULL;
4868 bus_dmamap_unload(rxr->htag, rbuf->hmap);
4869 }
4870
4871 if (rbuf->m_pack) {
4872 m_free(rbuf->m_pack);
4873 rbuf->m_pack = NULL;
4874 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
4875 }
4876
4877 return;
4878}
4879
4880static __inline void
4881igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4882{
4883
4884 /*
4885 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4886 * should be computed by hardware. Also it should not have VLAN tag in
4887 * ethernet header.
4888 */
4889 if (rxr->lro_enabled &&
4890 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4891 (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4892 (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4893 (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4894 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4895 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4896 /*
4897 * Send to the stack if:
4898 ** - LRO not enabled, or
4899 ** - no LRO resources, or
4900 ** - lro enqueue fails
4901 */
4902 if (rxr->lro.lro_cnt != 0)
4903 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4904 return;
4905 }
4906 IGB_RX_UNLOCK(rxr);
4907 (*ifp->if_input)(ifp, m);
4908 IGB_RX_LOCK(rxr);
4909}
4910
4911/*********************************************************************
4912 *
4913 * This routine executes in interrupt context. It replenishes
4914 * the mbufs in the descriptor and sends data which has been
4915 * dma'ed into host memory to upper layer.
4916 *
4917 * We loop at most count times if count is > 0, or until done if
4918 * count < 0.
4919 *
4920 * Return TRUE if more to clean, FALSE otherwise
4921 *********************************************************************/
4922static bool
4923igb_rxeof(struct igb_queue *que, int count, int *done)
4924{
4925 struct adapter *adapter = que->adapter;
4926 struct rx_ring *rxr = que->rxr;
4927 struct ifnet *ifp = adapter->ifp;
4928 struct lro_ctrl *lro = &rxr->lro;
4929 struct lro_entry *queued;
4930 int i, processed = 0, rxdone = 0;
4931 u32 ptype, staterr = 0;
4932 union e1000_adv_rx_desc *cur;
4933
4934 IGB_RX_LOCK(rxr);
4935 /* Sync the ring. */
4936 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4937 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4938
4939#ifdef DEV_NETMAP
4940 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4941 IGB_RX_UNLOCK(rxr);
4942 return (FALSE);
4943 }
4944#endif /* DEV_NETMAP */
4945
4946 /* Main clean loop */
4947 for (i = rxr->next_to_check; count != 0;) {
4948 struct mbuf *sendmp, *mh, *mp;
4949 struct igb_rx_buf *rxbuf;
4950 u16 hlen, plen, hdr, vtag, pkt_info;
4951 bool eop = FALSE;
4952
4953 cur = &rxr->rx_base[i];
4954 staterr = le32toh(cur->wb.upper.status_error);
4955 if ((staterr & E1000_RXD_STAT_DD) == 0)
4956 break;
4957 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4958 break;
4959 count--;
4960 sendmp = mh = mp = NULL;
4961 cur->wb.upper.status_error = 0;
4962 rxbuf = &rxr->rx_buffers[i];
4963 plen = le16toh(cur->wb.upper.length);
4964 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4965 if (((adapter->hw.mac.type == e1000_i350) ||
4966 (adapter->hw.mac.type == e1000_i354)) &&
4967 (staterr & E1000_RXDEXT_STATERR_LB))
4968 vtag = be16toh(cur->wb.upper.vlan);
4969 else
4970 vtag = le16toh(cur->wb.upper.vlan);
4971 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4972 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
4973 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4974
4975 /*
4976 * Free the frame (all segments) if we're at EOP and
4977 * it's an error.
4978 *
4979 * The datasheet states that EOP + status is only valid for
4980 * the final segment in a multi-segment frame.
4981 */
4982 if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) {
4983 adapter->dropped_pkts++;
4984 ++rxr->rx_discarded;
4985 igb_rx_discard(rxr, i);
4986 goto next_desc;
4987 }
4988
4989 /*
4990 ** The way the hardware is configured to
4991 ** split, it will ONLY use the header buffer
4992 ** when header split is enabled, otherwise we
4993 ** get normal behavior, ie, both header and
4994 ** payload are DMA'd into the payload buffer.
4995 **
4996 ** The fmp test is to catch the case where a
4997 ** packet spans multiple descriptors, in that
4998 ** case only the first header is valid.
4999 */
5000 if (rxr->hdr_split && rxr->fmp == NULL) {
5001 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
5002 hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
5003 E1000_RXDADV_HDRBUFLEN_SHIFT;
5004 if (hlen > IGB_HDR_BUF)
5005 hlen = IGB_HDR_BUF;
5006 mh = rxr->rx_buffers[i].m_head;
5007 mh->m_len = hlen;
5008 /* clear buf pointer for refresh */
5009 rxbuf->m_head = NULL;
5010 /*
5011 ** Get the payload length, this
5012 ** could be zero if its a small
5013 ** packet.
5014 */
5015 if (plen > 0) {
5016 mp = rxr->rx_buffers[i].m_pack;
5017 mp->m_len = plen;
5018 mh->m_next = mp;
5019 /* clear buf pointer */
5020 rxbuf->m_pack = NULL;
5021 rxr->rx_split_packets++;
5022 }
5023 } else {
5024 /*
5025 ** Either no header split, or a
5026 ** secondary piece of a fragmented
5027 ** split packet.
5028 */
5029 mh = rxr->rx_buffers[i].m_pack;
5030 mh->m_len = plen;
5031 /* clear buf info for refresh */
5032 rxbuf->m_pack = NULL;
5033 }
5034 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
5035
5036 ++processed; /* So we know when to refresh */
5037
5038 /* Initial frame - setup */
5039 if (rxr->fmp == NULL) {
5040 mh->m_pkthdr.len = mh->m_len;
5041 /* Save the head of the chain */
5042 rxr->fmp = mh;
5043 rxr->lmp = mh;
5044 if (mp != NULL) {
5045 /* Add payload if split */
5046 mh->m_pkthdr.len += mp->m_len;
5047 rxr->lmp = mh->m_next;
5048 }
5049 } else {
5050 /* Chain mbuf's together */
5051 rxr->lmp->m_next = mh;
5052 rxr->lmp = rxr->lmp->m_next;
5053 rxr->fmp->m_pkthdr.len += mh->m_len;
5054 }
5055
5056 if (eop) {
5057 rxr->fmp->m_pkthdr.rcvif = ifp;
5058 rxr->rx_packets++;
5059 /* capture data for AIM */
5060 rxr->packets++;
5061 rxr->bytes += rxr->fmp->m_pkthdr.len;
5062 rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
5063
5064 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
5065 igb_rx_checksum(staterr, rxr->fmp, ptype);
5066
5067 if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
5068 (staterr & E1000_RXD_STAT_VP) != 0) {
5069 rxr->fmp->m_pkthdr.ether_vtag = vtag;
5070 rxr->fmp->m_flags |= M_VLANTAG;
5071 }
5072
5073 /*
5074 * In case of multiqueue, we have RXCSUM.PCSD bit set
5075 * and never cleared. This means we have RSS hash
5076 * available to be used.
5077 */
5078 if (adapter->num_queues > 1) {
5079 rxr->fmp->m_pkthdr.flowid =
5080 le32toh(cur->wb.lower.hi_dword.rss);
5081 switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
5082 case E1000_RXDADV_RSSTYPE_IPV4_TCP:
5083 M_HASHTYPE_SET(rxr->fmp,
5084 M_HASHTYPE_RSS_TCP_IPV4);
5085 break;
5086 case E1000_RXDADV_RSSTYPE_IPV4:
5087 M_HASHTYPE_SET(rxr->fmp,
5088 M_HASHTYPE_RSS_IPV4);
5089 break;
5090 case E1000_RXDADV_RSSTYPE_IPV6_TCP:
5091 M_HASHTYPE_SET(rxr->fmp,
5092 M_HASHTYPE_RSS_TCP_IPV6);
5093 break;
5094 case E1000_RXDADV_RSSTYPE_IPV6_EX:
5095 M_HASHTYPE_SET(rxr->fmp,
5096 M_HASHTYPE_RSS_IPV6_EX);
5097 break;
5098 case E1000_RXDADV_RSSTYPE_IPV6:
5099 M_HASHTYPE_SET(rxr->fmp,
5100 M_HASHTYPE_RSS_IPV6);
5101 break;
5102 case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
5103 M_HASHTYPE_SET(rxr->fmp,
5104 M_HASHTYPE_RSS_TCP_IPV6_EX);
5105 break;
5106 default:
5107 /* XXX fallthrough */
5108 M_HASHTYPE_SET(rxr->fmp,
5109 M_HASHTYPE_OPAQUE);
5110 }
5111 } else {
5112#ifndef IGB_LEGACY_TX
5113 rxr->fmp->m_pkthdr.flowid = que->msix;
5114 M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
5115#endif
5116 }
5117 sendmp = rxr->fmp;
5118 /* Make sure to set M_PKTHDR. */
5119 sendmp->m_flags |= M_PKTHDR;
5120 rxr->fmp = NULL;
5121 rxr->lmp = NULL;
5122 }
5123
5124next_desc:
5125 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
5126 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
5127
5128 /* Advance our pointers to the next descriptor. */
5129 if (++i == adapter->num_rx_desc)
5130 i = 0;
5131 /*
5132 ** Send to the stack or LRO
5133 */
5134 if (sendmp != NULL) {
5135 rxr->next_to_check = i;
5136 igb_rx_input(rxr, ifp, sendmp, ptype);
5137 i = rxr->next_to_check;
5138 rxdone++;
5139 }
5140
5141 /* Every 8 descriptors we go to refresh mbufs */
5142 if (processed == 8) {
5143 igb_refresh_mbufs(rxr, i);
5144 processed = 0;
5145 }
5146 }
5147
5148 /* Catch any remainders */
5149 if (igb_rx_unrefreshed(rxr))
5150 igb_refresh_mbufs(rxr, i);
5151
5152 rxr->next_to_check = i;
5153
5154 /*
5155 * Flush any outstanding LRO work
5156 */
5157 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
5158 SLIST_REMOVE_HEAD(&lro->lro_active, next);
5159 tcp_lro_flush(lro, queued);
5160 }
5161
5162 if (done != NULL)
5163 *done += rxdone;
5164
5165 IGB_RX_UNLOCK(rxr);
5166 return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
5167}
5168
5169/*********************************************************************
5170 *
5171 * Verify that the hardware indicated that the checksum is valid.
5172 * Inform the stack about the status of checksum so that stack
5173 * doesn't spend time verifying the checksum.
5174 *
5175 *********************************************************************/
5176static void
5177igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
5178{
5179 u16 status = (u16)staterr;
5180 u8 errors = (u8) (staterr >> 24);
5181 int sctp;
5182
5183 /* Ignore Checksum bit is set */
5184 if (status & E1000_RXD_STAT_IXSM) {
5185 mp->m_pkthdr.csum_flags = 0;
5186 return;
5187 }
5188
5189 if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
5190 (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
5191 sctp = 1;
5192 else
5193 sctp = 0;
5194 if (status & E1000_RXD_STAT_IPCS) {
5195 /* Did it pass? */
5196 if (!(errors & E1000_RXD_ERR_IPE)) {
5197 /* IP Checksum Good */
5198 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
5199 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
5200 } else
5201 mp->m_pkthdr.csum_flags = 0;
5202 }
5203
5204 if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
5205 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5206#if __FreeBSD_version >= 800000
5207 if (sctp) /* reassign */
5208 type = CSUM_SCTP_VALID;
5209#endif
5210 /* Did it pass? */
5211 if (!(errors & E1000_RXD_ERR_TCPE)) {
5212 mp->m_pkthdr.csum_flags |= type;
5213 if (sctp == 0)
5214 mp->m_pkthdr.csum_data = htons(0xffff);
5215 }
5216 }
5217 return;
5218}
5219
5220/*
5221 * This routine is run via an vlan
5222 * config EVENT
5223 */
5224static void
5225igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5226{
5227 struct adapter *adapter = ifp->if_softc;
5228 u32 index, bit;
5229
5230 if (ifp->if_softc != arg) /* Not our event */
5231 return;
5232
5233 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
5234 return;
5235
5236 IGB_CORE_LOCK(adapter);
5237 index = (vtag >> 5) & 0x7F;
5238 bit = vtag & 0x1F;
5239 adapter->shadow_vfta[index] |= (1 << bit);
5240 ++adapter->num_vlans;
5241 /* Change hw filter setting */
5242 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5243 igb_setup_vlan_hw_support(adapter);
5244 IGB_CORE_UNLOCK(adapter);
5245}
5246
5247/*
5248 * This routine is run via an vlan
5249 * unconfig EVENT
5250 */
5251static void
5252igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5253{
5254 struct adapter *adapter = ifp->if_softc;
5255 u32 index, bit;
5256
5257 if (ifp->if_softc != arg)
5258 return;
5259
5260 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
5261 return;
5262
5263 IGB_CORE_LOCK(adapter);
5264 index = (vtag >> 5) & 0x7F;
5265 bit = vtag & 0x1F;
5266 adapter->shadow_vfta[index] &= ~(1 << bit);
5267 --adapter->num_vlans;
5268 /* Change hw filter setting */
5269 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5270 igb_setup_vlan_hw_support(adapter);
5271 IGB_CORE_UNLOCK(adapter);
5272}
5273
5274static void
5275igb_setup_vlan_hw_support(struct adapter *adapter)
5276{
5277 struct e1000_hw *hw = &adapter->hw;
5278 struct ifnet *ifp = adapter->ifp;
5279 u32 reg;
5280
5281 if (adapter->vf_ifp) {
5282 e1000_rlpml_set_vf(hw,
5283 adapter->max_frame_size + VLAN_TAG_SIZE);
5284 return;
5285 }
5286
5287 reg = E1000_READ_REG(hw, E1000_CTRL);
5288 reg |= E1000_CTRL_VME;
5289 E1000_WRITE_REG(hw, E1000_CTRL, reg);
5290
5291 /* Enable the Filter Table */
5292 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5293 reg = E1000_READ_REG(hw, E1000_RCTL);
5294 reg &= ~E1000_RCTL_CFIEN;
5295 reg |= E1000_RCTL_VFE;
5296 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5297 }
5298
5299 /* Update the frame size */
5300 E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5301 adapter->max_frame_size + VLAN_TAG_SIZE);
5302
5303 /* Don't bother with table if no vlans */
5304 if ((adapter->num_vlans == 0) ||
5305 ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5306 return;
5307 /*
5308 ** A soft reset zero's out the VFTA, so
5309 ** we need to repopulate it now.
5310 */
5311 for (int i = 0; i < IGB_VFTA_SIZE; i++)
5312 if (adapter->shadow_vfta[i] != 0) {
5313 if (adapter->vf_ifp)
5314 e1000_vfta_set_vf(hw,
5315 adapter->shadow_vfta[i], TRUE);
5316 else
5317 e1000_write_vfta(hw,
5318 i, adapter->shadow_vfta[i]);
5319 }
5320}
5321
5322static void
5323igb_enable_intr(struct adapter *adapter)
5324{
5325 /* With RSS set up what to auto clear */
5326 if (adapter->msix_mem) {
5327 u32 mask = (adapter->que_mask | adapter->link_mask);
5328 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5329 E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5330 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5331 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5332 E1000_IMS_LSC);
5333 } else {
5334 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5335 IMS_ENABLE_MASK);
5336 }
5337 E1000_WRITE_FLUSH(&adapter->hw);
5338
5339 return;
5340}
5341
5342static void
5343igb_disable_intr(struct adapter *adapter)
5344{
5345 if (adapter->msix_mem) {
5346 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5347 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5348 }
5349 E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5350 E1000_WRITE_FLUSH(&adapter->hw);
5351 return;
5352}
5353
5354/*
5355 * Bit of a misnomer, what this really means is
5356 * to enable OS management of the system... aka
5357 * to disable special hardware management features
5358 */
5359static void
5360igb_init_manageability(struct adapter *adapter)
5361{
5362 if (adapter->has_manage) {
5363 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5364 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5365
5366 /* disable hardware interception of ARP */
5367 manc &= ~(E1000_MANC_ARP_EN);
5368
5369 /* enable receiving management packets to the host */
5370 manc |= E1000_MANC_EN_MNG2HOST;
5371 manc2h |= 1 << 5; /* Mng Port 623 */
5372 manc2h |= 1 << 6; /* Mng Port 664 */
5373 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5374 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5375 }
5376}
5377
5378/*
5379 * Give control back to hardware management
5380 * controller if there is one.
5381 */
5382static void
5383igb_release_manageability(struct adapter *adapter)
5384{
5385 if (adapter->has_manage) {
5386 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5387
5388 /* re-enable hardware interception of ARP */
5389 manc |= E1000_MANC_ARP_EN;
5390 manc &= ~E1000_MANC_EN_MNG2HOST;
5391
5392 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5393 }
5394}
5395
5396/*
5397 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5398 * For ASF and Pass Through versions of f/w this means that
5399 * the driver is loaded.
5400 *
5401 */
5402static void
5403igb_get_hw_control(struct adapter *adapter)
5404{
5405 u32 ctrl_ext;
5406
5407 if (adapter->vf_ifp)
5408 return;
5409
5410 /* Let firmware know the driver has taken over */
5411 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5412 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5413 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5414}
5415
5416/*
5417 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5418 * For ASF and Pass Through versions of f/w this means that the
5419 * driver is no longer loaded.
5420 *
5421 */
5422static void
5423igb_release_hw_control(struct adapter *adapter)
5424{
5425 u32 ctrl_ext;
5426
5427 if (adapter->vf_ifp)
5428 return;
5429
5430 /* Let firmware taken over control of h/w */
5431 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5432 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5433 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5434}
5435
5436static int
5437igb_is_valid_ether_addr(uint8_t *addr)
5438{
5439 char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5440
5441 if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5442 return (FALSE);
5443 }
5444
5445 return (TRUE);
5446}
5447
5448
5449/*
5450 * Enable PCI Wake On Lan capability
5451 */
5452static void
5453igb_enable_wakeup(device_t dev)
5454{
5455 u16 cap, status;
5456 u8 id;
5457
5458 /* First find the capabilities pointer*/
5459 cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5460 /* Read the PM Capabilities */
5461 id = pci_read_config(dev, cap, 1);
5462 if (id != PCIY_PMG) /* Something wrong */
5463 return;
5464 /* OK, we have the power capabilities, so
5465 now get the status register */
5466 cap += PCIR_POWER_STATUS;
5467 status = pci_read_config(dev, cap, 2);
5468 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5469 pci_write_config(dev, cap, status, 2);
5470 return;
5471}
5472
5473static void
5474igb_led_func(void *arg, int onoff)
5475{
5476 struct adapter *adapter = arg;
5477
5478 IGB_CORE_LOCK(adapter);
5479 if (onoff) {
5480 e1000_setup_led(&adapter->hw);
5481 e1000_led_on(&adapter->hw);
5482 } else {
5483 e1000_led_off(&adapter->hw);
5484 e1000_cleanup_led(&adapter->hw);
5485 }
5486 IGB_CORE_UNLOCK(adapter);
5487}
5488
5489static uint64_t
5490igb_get_vf_counter(if_t ifp, ift_counter cnt)
5491{
5492 struct adapter *adapter;
5493 struct e1000_vf_stats *stats;
5494#ifndef IGB_LEGACY_TX
5495 struct tx_ring *txr;
5496 uint64_t rv;
5497#endif
5498
5499 adapter = if_getsoftc(ifp);
5500 stats = (struct e1000_vf_stats *)adapter->stats;
5501
5502 switch (cnt) {
5503 case IFCOUNTER_IPACKETS:
5504 return (stats->gprc);
5505 case IFCOUNTER_OPACKETS:
5506 return (stats->gptc);
5507 case IFCOUNTER_IBYTES:
5508 return (stats->gorc);
5509 case IFCOUNTER_OBYTES:
5510 return (stats->gotc);
5511 case IFCOUNTER_IMCASTS:
5512 return (stats->mprc);
5513 case IFCOUNTER_IERRORS:
5514 return (adapter->dropped_pkts);
5515 case IFCOUNTER_OERRORS:
5516 return (adapter->watchdog_events);
5517#ifndef IGB_LEGACY_TX
5518 case IFCOUNTER_OQDROPS:
5519 rv = 0;
5520 txr = adapter->tx_rings;
5521 for (int i = 0; i < adapter->num_queues; i++, txr++)
5522 rv += txr->br->br_drops;
5523 return (rv);
5524#endif
5525 default:
5526 return (if_get_counter_default(ifp, cnt));
5527 }
5528}
5529
5530static uint64_t
5531igb_get_counter(if_t ifp, ift_counter cnt)
5532{
5533 struct adapter *adapter;
5534 struct e1000_hw_stats *stats;
5535#ifndef IGB_LEGACY_TX
5536 struct tx_ring *txr;
5537 uint64_t rv;
5538#endif
5539
5540 adapter = if_getsoftc(ifp);
5541 if (adapter->vf_ifp)
5542 return (igb_get_vf_counter(ifp, cnt));
5543
5544 stats = (struct e1000_hw_stats *)adapter->stats;
5545
5546 switch (cnt) {
5547 case IFCOUNTER_IPACKETS:
5548 return (stats->gprc);
5549 case IFCOUNTER_OPACKETS:
5550 return (stats->gptc);
5551 case IFCOUNTER_IBYTES:
5552 return (stats->gorc);
5553 case IFCOUNTER_OBYTES:
5554 return (stats->gotc);
5555 case IFCOUNTER_IMCASTS:
5556 return (stats->mprc);
5557 case IFCOUNTER_OMCASTS:
5558 return (stats->mptc);
5559 case IFCOUNTER_IERRORS:
5560 return (adapter->dropped_pkts + stats->rxerrc +
5561 stats->crcerrs + stats->algnerrc +
5562 stats->ruc + stats->roc + stats->cexterr);
5563 case IFCOUNTER_OERRORS:
5564 return (stats->ecol + stats->latecol +
5565 adapter->watchdog_events);
5566 case IFCOUNTER_COLLISIONS:
5567 return (stats->colc);
5568 case IFCOUNTER_IQDROPS:
5569 return (stats->mpc);
5570#ifndef IGB_LEGACY_TX
5571 case IFCOUNTER_OQDROPS:
5572 rv = 0;
5573 txr = adapter->tx_rings;
5574 for (int i = 0; i < adapter->num_queues; i++, txr++)
5575 rv += txr->br->br_drops;
5576 return (rv);
5577#endif
5578 default:
5579 return (if_get_counter_default(ifp, cnt));
5580 }
5581}
5582
5583/**********************************************************************
5584 *
5585 * Update the board statistics counters.
5586 *
5587 **********************************************************************/
5588static void
5589igb_update_stats_counters(struct adapter *adapter)
5590{
5591 struct e1000_hw *hw = &adapter->hw;
5592 struct e1000_hw_stats *stats;
5593
5594 /*
5595 ** The virtual function adapter has only a
5596 ** small controlled set of stats, do only
5597 ** those and return.
5598 */
5599 if (adapter->vf_ifp) {
5600 igb_update_vf_stats_counters(adapter);
5601 return;
5602 }
5603
5604 stats = (struct e1000_hw_stats *)adapter->stats;
5605
5606 if (adapter->hw.phy.media_type == e1000_media_type_copper ||
5607 (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5608 stats->symerrs +=
5609 E1000_READ_REG(hw,E1000_SYMERRS);
5610 stats->sec += E1000_READ_REG(hw, E1000_SEC);
5611 }
5612
5613 stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5614 stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5615 stats->scc += E1000_READ_REG(hw, E1000_SCC);
5616 stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5617
5618 stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5619 stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5620 stats->colc += E1000_READ_REG(hw, E1000_COLC);
5621 stats->dc += E1000_READ_REG(hw, E1000_DC);
5622 stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5623 stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5624 stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5625 /*
5626 ** For watchdog management we need to know if we have been
5627 ** paused during the last interval, so capture that here.
5628 */
5629 adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5630 stats->xoffrxc += adapter->pause_frames;
5631 stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5632 stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5633 stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5634 stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5635 stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5636 stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5637 stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5638 stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5639 stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5640 stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5641 stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5642 stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5643
5644 /* For the 64-bit byte counters the low dword must be read first. */
5645 /* Both registers clear on the read of the high dword */
5646
5647 stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5648 ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5649 stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5650 ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5651
5652 stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5653 stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5654 stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5655 stats->roc += E1000_READ_REG(hw, E1000_ROC);
5656 stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5657
5658 stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
5659 stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC);
5660 stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
5661
5662 stats->tor += E1000_READ_REG(hw, E1000_TORL) +
5663 ((u64)E1000_READ_REG(hw, E1000_TORH) << 32);
5664 stats->tot += E1000_READ_REG(hw, E1000_TOTL) +
5665 ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32);
5666
5667 stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5668 stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5669 stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5670 stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5671 stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5672 stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5673 stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5674 stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5675 stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5676 stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5677
5678 /* Interrupt Counts */
5679
5680 stats->iac += E1000_READ_REG(hw, E1000_IAC);
5681 stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5682 stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5683 stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5684 stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5685 stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5686 stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5687 stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5688 stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5689
5690 /* Host to Card Statistics */
5691
5692 stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5693 stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5694 stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5695 stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5696 stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5697 stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5698 stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5699 stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5700 ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5701 stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5702 ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5703 stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5704 stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5705 stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5706
5707 stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5708 stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5709 stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5710 stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5711 stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5712 stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5713
5714 /* Driver specific counters */
5715 adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5716 adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5717 adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5718 adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5719 adapter->packet_buf_alloc_tx =
5720 ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5721 adapter->packet_buf_alloc_rx =
5722 (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5723}
5724
5725
5726/**********************************************************************
5727 *
5728 * Initialize the VF board statistics counters.
5729 *
5730 **********************************************************************/
5731static void
5732igb_vf_init_stats(struct adapter *adapter)
5733{
5734 struct e1000_hw *hw = &adapter->hw;
5735 struct e1000_vf_stats *stats;
5736
5737 stats = (struct e1000_vf_stats *)adapter->stats;
5738 if (stats == NULL)
5739 return;
5740 stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5741 stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5742 stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5743 stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5744 stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5745}
5746
5747/**********************************************************************
5748 *
5749 * Update the VF board statistics counters.
5750 *
5751 **********************************************************************/
5752static void
5753igb_update_vf_stats_counters(struct adapter *adapter)
5754{
5755 struct e1000_hw *hw = &adapter->hw;
5756 struct e1000_vf_stats *stats;
5757
5758 if (adapter->link_speed == 0)
5759 return;
5760
5761 stats = (struct e1000_vf_stats *)adapter->stats;
5762
5763 UPDATE_VF_REG(E1000_VFGPRC,
5764 stats->last_gprc, stats->gprc);
5765 UPDATE_VF_REG(E1000_VFGORC,
5766 stats->last_gorc, stats->gorc);
5767 UPDATE_VF_REG(E1000_VFGPTC,
5768 stats->last_gptc, stats->gptc);
5769 UPDATE_VF_REG(E1000_VFGOTC,
5770 stats->last_gotc, stats->gotc);
5771 UPDATE_VF_REG(E1000_VFMPRC,
5772 stats->last_mprc, stats->mprc);
5773}
5774
5775/* Export a single 32-bit register via a read-only sysctl. */
5776static int
5777igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5778{
5779 struct adapter *adapter;
5780 u_int val;
5781
5782 adapter = oidp->oid_arg1;
5783 val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5784 return (sysctl_handle_int(oidp, &val, 0, req));
5785}
5786
5787/*
5788** Tuneable interrupt rate handler
5789*/
5790static int
5791igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5792{
5793 struct igb_queue *que = ((struct igb_queue *)oidp->oid_arg1);
5794 int error;
5795 u32 reg, usec, rate;
5796
5797 reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5798 usec = ((reg & 0x7FFC) >> 2);
5799 if (usec > 0)
5800 rate = 1000000 / usec;
5801 else
5802 rate = 0;
5803 error = sysctl_handle_int(oidp, &rate, 0, req);
5804 if (error || !req->newptr)
5805 return error;
5806 return 0;
5807}
5808
5809/*
5810 * Add sysctl variables, one per statistic, to the system.
5811 */
5812static void
5813igb_add_hw_stats(struct adapter *adapter)
5814{
5815 device_t dev = adapter->dev;
5816
5817 struct tx_ring *txr = adapter->tx_rings;
5818 struct rx_ring *rxr = adapter->rx_rings;
5819
5820 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5821 struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5822 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5823 struct e1000_hw_stats *stats = adapter->stats;
5824
5825 struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5826 struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5827
5828#define QUEUE_NAME_LEN 32
5829 char namebuf[QUEUE_NAME_LEN];
5830
5831 /* Driver Statistics */
5832 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5833 CTLFLAG_RD, &adapter->dropped_pkts,
5834 "Driver dropped packets");
5835 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5836 CTLFLAG_RD, &adapter->link_irq,
5837 "Link MSIX IRQ Handled");
5838 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5839 CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5840 "Defragmenting mbuf chain failed");
5841 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5842 CTLFLAG_RD, &adapter->no_tx_dma_setup,
5843 "Driver tx dma failure in xmit");
5844 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5845 CTLFLAG_RD, &adapter->rx_overruns,
5846 "RX overruns");
5847 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5848 CTLFLAG_RD, &adapter->watchdog_events,
5849 "Watchdog timeouts");
5850
5851 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5852 CTLFLAG_RD, &adapter->device_control,
5853 "Device Control Register");
5854 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5855 CTLFLAG_RD, &adapter->rx_control,
5856 "Receiver Control Register");
5857 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5858 CTLFLAG_RD, &adapter->int_mask,
5859 "Interrupt Mask");
5860 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5861 CTLFLAG_RD, &adapter->eint_mask,
5862 "Extended Interrupt Mask");
5863 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5864 CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5865 "Transmit Buffer Packet Allocation");
5866 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5867 CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5868 "Receive Buffer Packet Allocation");
5869 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5870 CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5871 "Flow Control High Watermark");
5872 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5873 CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5874 "Flow Control Low Watermark");
5875
5876 for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5877 struct lro_ctrl *lro = &rxr->lro;
5878
5879 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5880 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5881 CTLFLAG_RD, NULL, "Queue Name");
5882 queue_list = SYSCTL_CHILDREN(queue_node);
5883
5884 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5885 CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i],
5886 sizeof(&adapter->queues[i]),
5887 igb_sysctl_interrupt_rate_handler,
5888 "IU", "Interrupt Rate");
5889
5890 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5891 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5892 igb_sysctl_reg_handler, "IU",
5893 "Transmit Descriptor Head");
5894 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5895 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5896 igb_sysctl_reg_handler, "IU",
5897 "Transmit Descriptor Tail");
5898 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5899 CTLFLAG_RD, &txr->no_desc_avail,
5900 "Queue Descriptors Unavailable");
5901 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5902 CTLFLAG_RD, &txr->total_packets,
5903 "Queue Packets Transmitted");
5904
5905 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5906 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5907 igb_sysctl_reg_handler, "IU",
5908 "Receive Descriptor Head");
5909 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5910 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5911 igb_sysctl_reg_handler, "IU",
5912 "Receive Descriptor Tail");
5913 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5914 CTLFLAG_RD, &rxr->rx_packets,
5915 "Queue Packets Received");
5916 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5917 CTLFLAG_RD, &rxr->rx_bytes,
5918 "Queue Bytes Received");
5919 SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_queued",
5920 CTLFLAG_RD, &lro->lro_queued, 0,
5921 "LRO Queued");
5922 SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_flushed",
5923 CTLFLAG_RD, &lro->lro_flushed, 0,
5924 "LRO Flushed");
5925 }
5926
5927 /* MAC stats get their own sub node */
5928
5929 stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5930 CTLFLAG_RD, NULL, "MAC Statistics");
5931 stat_list = SYSCTL_CHILDREN(stat_node);
5932
5933 /*
5934 ** VF adapter has a very limited set of stats
5935 ** since its not managing the metal, so to speak.
5936 */
5937 if (adapter->vf_ifp) {
5938 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5939 CTLFLAG_RD, &stats->gprc,
5940 "Good Packets Received");
5941 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5942 CTLFLAG_RD, &stats->gptc,
5943 "Good Packets Transmitted");
5944 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5945 CTLFLAG_RD, &stats->gorc,
5946 "Good Octets Received");
5947 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5948 CTLFLAG_RD, &stats->gotc,
5949 "Good Octets Transmitted");
5950 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5951 CTLFLAG_RD, &stats->mprc,
5952 "Multicast Packets Received");
5953 return;
5954 }
5955
5956 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5957 CTLFLAG_RD, &stats->ecol,
5958 "Excessive collisions");
5959 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5960 CTLFLAG_RD, &stats->scc,
5961 "Single collisions");
5962 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5963 CTLFLAG_RD, &stats->mcc,
5964 "Multiple collisions");
5965 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5966 CTLFLAG_RD, &stats->latecol,
5967 "Late collisions");
5968 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5969 CTLFLAG_RD, &stats->colc,
5970 "Collision Count");
5971 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5972 CTLFLAG_RD, &stats->symerrs,
5973 "Symbol Errors");
5974 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5975 CTLFLAG_RD, &stats->sec,
5976 "Sequence Errors");
5977 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5978 CTLFLAG_RD, &stats->dc,
5979 "Defer Count");
5980 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5981 CTLFLAG_RD, &stats->mpc,
5982 "Missed Packets");
5983 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors",
5984 CTLFLAG_RD, &stats->rlec,
5985 "Receive Length Errors");
5986 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5987 CTLFLAG_RD, &stats->rnbc,
5988 "Receive No Buffers");
5989 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5990 CTLFLAG_RD, &stats->ruc,
5991 "Receive Undersize");
5992 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5993 CTLFLAG_RD, &stats->rfc,
5994 "Fragmented Packets Received");
5995 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5996 CTLFLAG_RD, &stats->roc,
5997 "Oversized Packets Received");
5998 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5999 CTLFLAG_RD, &stats->rjc,
6000 "Recevied Jabber");
6001 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
6002 CTLFLAG_RD, &stats->rxerrc,
6003 "Receive Errors");
6004 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
6005 CTLFLAG_RD, &stats->crcerrs,
6006 "CRC errors");
6007 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
6008 CTLFLAG_RD, &stats->algnerrc,
6009 "Alignment Errors");
6010 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs",
6011 CTLFLAG_RD, &stats->tncrs,
6012 "Transmit with No CRS");
6013 /* On 82575 these are collision counts */
6014 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
6015 CTLFLAG_RD, &stats->cexterr,
6016 "Collision/Carrier extension errors");
6017 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
6018 CTLFLAG_RD, &stats->xonrxc,
6019 "XON Received");
6020 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
6021 CTLFLAG_RD, &stats->xontxc,
6022 "XON Transmitted");
6023 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
6024 CTLFLAG_RD, &stats->xoffrxc,
6025 "XOFF Received");
6026 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
6027 CTLFLAG_RD, &stats->xofftxc,
6028 "XOFF Transmitted");
6029 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd",
6030 CTLFLAG_RD, &stats->fcruc,
6031 "Unsupported Flow Control Received");
6032 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd",
6033 CTLFLAG_RD, &stats->mgprc,
6034 "Management Packets Received");
6035 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop",
6036 CTLFLAG_RD, &stats->mgpdc,
6037 "Management Packets Dropped");
6038 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd",
6039 CTLFLAG_RD, &stats->mgptc,
6040 "Management Packets Transmitted");
6041 /* Packet Reception Stats */
6042 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
6043 CTLFLAG_RD, &stats->tpr,
6044 "Total Packets Received");
6045 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
6046 CTLFLAG_RD, &stats->gprc,
6047 "Good Packets Received");
6048 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
6049 CTLFLAG_RD, &stats->bprc,
6050 "Broadcast Packets Received");
6051 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
6052 CTLFLAG_RD, &stats->mprc,
6053 "Multicast Packets Received");
6054 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
6055 CTLFLAG_RD, &stats->prc64,
6056 "64 byte frames received");
6057 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
6058 CTLFLAG_RD, &stats->prc127,
6059 "65-127 byte frames received");
6060 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
6061 CTLFLAG_RD, &stats->prc255,
6062 "128-255 byte frames received");
6063 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
6064 CTLFLAG_RD, &stats->prc511,
6065 "256-511 byte frames received");
6066 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
6067 CTLFLAG_RD, &stats->prc1023,
6068 "512-1023 byte frames received");
6069 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
6070 CTLFLAG_RD, &stats->prc1522,
6071 "1023-1522 byte frames received");
6072 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
6073 CTLFLAG_RD, &stats->gorc,
6074 "Good Octets Received");
6075 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd",
6076 CTLFLAG_RD, &stats->tor,
6077 "Total Octets Received");
6078
6079 /* Packet Transmission Stats */
6080 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
6081 CTLFLAG_RD, &stats->gotc,
6082 "Good Octets Transmitted");
6083 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd",
6084 CTLFLAG_RD, &stats->tot,
6085 "Total Octets Transmitted");
6086 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
6087 CTLFLAG_RD, &stats->tpt,
6088 "Total Packets Transmitted");
6089 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
6090 CTLFLAG_RD, &stats->gptc,
6091 "Good Packets Transmitted");
6092 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
6093 CTLFLAG_RD, &stats->bptc,
6094 "Broadcast Packets Transmitted");
6095 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
6096 CTLFLAG_RD, &stats->mptc,
6097 "Multicast Packets Transmitted");
6098 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
6099 CTLFLAG_RD, &stats->ptc64,
6100 "64 byte frames transmitted");
6101 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
6102 CTLFLAG_RD, &stats->ptc127,
6103 "65-127 byte frames transmitted");
6104 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
6105 CTLFLAG_RD, &stats->ptc255,
6106 "128-255 byte frames transmitted");
6107 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
6108 CTLFLAG_RD, &stats->ptc511,
6109 "256-511 byte frames transmitted");
6110 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
6111 CTLFLAG_RD, &stats->ptc1023,
6112 "512-1023 byte frames transmitted");
6113 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
6114 CTLFLAG_RD, &stats->ptc1522,
6115 "1024-1522 byte frames transmitted");
6116 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
6117 CTLFLAG_RD, &stats->tsctc,
6118 "TSO Contexts Transmitted");
6119 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
6120 CTLFLAG_RD, &stats->tsctfc,
6121 "TSO Contexts Failed");
6122
6123
6124 /* Interrupt Stats */
6125
6126 int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
6127 CTLFLAG_RD, NULL, "Interrupt Statistics");
6128 int_list = SYSCTL_CHILDREN(int_node);
6129
6130 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
6131 CTLFLAG_RD, &stats->iac,
6132 "Interrupt Assertion Count");
6133
6134 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
6135 CTLFLAG_RD, &stats->icrxptc,
6136 "Interrupt Cause Rx Pkt Timer Expire Count");
6137
6138 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
6139 CTLFLAG_RD, &stats->icrxatc,
6140 "Interrupt Cause Rx Abs Timer Expire Count");
6141
6142 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
6143 CTLFLAG_RD, &stats->ictxptc,
6144 "Interrupt Cause Tx Pkt Timer Expire Count");
6145
6146 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
6147 CTLFLAG_RD, &stats->ictxatc,
6148 "Interrupt Cause Tx Abs Timer Expire Count");
6149
6150 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
6151 CTLFLAG_RD, &stats->ictxqec,
6152 "Interrupt Cause Tx Queue Empty Count");
6153
6154 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
6155 CTLFLAG_RD, &stats->ictxqmtc,
6156 "Interrupt Cause Tx Queue Min Thresh Count");
6157
6158 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
6159 CTLFLAG_RD, &stats->icrxdmtc,
6160 "Interrupt Cause Rx Desc Min Thresh Count");
6161
6162 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
6163 CTLFLAG_RD, &stats->icrxoc,
6164 "Interrupt Cause Receiver Overrun Count");
6165
6166 /* Host to Card Stats */
6167
6168 host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
6169 CTLFLAG_RD, NULL,
6170 "Host to Card Statistics");
6171
6172 host_list = SYSCTL_CHILDREN(host_node);
6173
6174 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
6175 CTLFLAG_RD, &stats->cbtmpc,
6176 "Circuit Breaker Tx Packet Count");
6177
6178 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
6179 CTLFLAG_RD, &stats->htdpmc,
6180 "Host Transmit Discarded Packets");
6181
6182 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
6183 CTLFLAG_RD, &stats->rpthc,
6184 "Rx Packets To Host");
6185
6186 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
6187 CTLFLAG_RD, &stats->cbrmpc,
6188 "Circuit Breaker Rx Packet Count");
6189
6190 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
6191 CTLFLAG_RD, &stats->cbrdpc,
6192 "Circuit Breaker Rx Dropped Count");
6193
6194 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
6195 CTLFLAG_RD, &stats->hgptc,
6196 "Host Good Packets Tx Count");
6197
6198 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
6199 CTLFLAG_RD, &stats->htcbdpc,
6200 "Host Tx Circuit Breaker Dropped Count");
6201
6202 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
6203 CTLFLAG_RD, &stats->hgorc,
6204 "Host Good Octets Received Count");
6205
6206 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
6207 CTLFLAG_RD, &stats->hgotc,
6208 "Host Good Octets Transmit Count");
6209
6210 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
6211 CTLFLAG_RD, &stats->lenerrs,
6212 "Length Errors");
6213
6214 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
6215 CTLFLAG_RD, &stats->scvpc,
6216 "SerDes/SGMII Code Violation Pkt Count");
6217
6218 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
6219 CTLFLAG_RD, &stats->hrmpc,
6220 "Header Redirection Missed Packet Count");
6221}
6222
6223
6224/**********************************************************************
6225 *
6226 * This routine provides a way to dump out the adapter eeprom,
6227 * often a useful debug/service tool. This only dumps the first
6228 * 32 words, stuff that matters is in that extent.
6229 *
6230 **********************************************************************/
6231static int
6232igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6233{
6234 struct adapter *adapter;
6235 int error;
6236 int result;
6237
6238 result = -1;
6239 error = sysctl_handle_int(oidp, &result, 0, req);
6240
6241 if (error || !req->newptr)
6242 return (error);
6243
6244 /*
6245 * This value will cause a hex dump of the
6246 * first 32 16-bit words of the EEPROM to
6247 * the screen.
6248 */
6249 if (result == 1) {
6250 adapter = (struct adapter *)arg1;
6251 igb_print_nvm_info(adapter);
6252 }
6253
6254 return (error);
6255}
6256
6257static void
6258igb_print_nvm_info(struct adapter *adapter)
6259{
6260 u16 eeprom_data;
6261 int i, j, row = 0;
6262
6263 /* Its a bit crude, but it gets the job done */
6264 printf("\nInterface EEPROM Dump:\n");
6265 printf("Offset\n0x0000 ");
6266 for (i = 0, j = 0; i < 32; i++, j++) {
6267 if (j == 8) { /* Make the offset block */
6268 j = 0; ++row;
6269 printf("\n0x00%x0 ",row);
6270 }
6271 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6272 printf("%04x ", eeprom_data);
6273 }
6274 printf("\n");
6275}
6276
6277static void
6278igb_set_sysctl_value(struct adapter *adapter, const char *name,
6279 const char *description, int *limit, int value)
6280{
6281 *limit = value;
6282 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6283 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6284 OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6285}
6286
6287/*
6288** Set flow control using sysctl:
6289** Flow control values:
6290** 0 - off
6291** 1 - rx pause
6292** 2 - tx pause
6293** 3 - full
6294*/
6295static int
6296igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
6297{
6298 int error;
6299 static int input = 3; /* default is full */
6300 struct adapter *adapter = (struct adapter *) arg1;
6301
6302 error = sysctl_handle_int(oidp, &input, 0, req);
6303
6304 if ((error) || (req->newptr == NULL))
6305 return (error);
6306
6307 switch (input) {
6308 case e1000_fc_rx_pause:
6309 case e1000_fc_tx_pause:
6310 case e1000_fc_full:
6311 case e1000_fc_none:
6312 adapter->hw.fc.requested_mode = input;
6313 adapter->fc = input;
6314 break;
6315 default:
6316 /* Do nothing */
6317 return (error);
6318 }
6319
6320 adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6321 e1000_force_mac_fc(&adapter->hw);
6322 /* XXX TODO: update DROP_EN on each RX queue if appropriate */
6323 return (error);
6324}
6325
6326/*
6327** Manage DMA Coalesce:
6328** Control values:
6329** 0/1 - off/on
6330** Legal timer values are:
6331** 250,500,1000-10000 in thousands
6332*/
6333static int
6334igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6335{
6336 struct adapter *adapter = (struct adapter *) arg1;
6337 int error;
6338
6339 error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6340
6341 if ((error) || (req->newptr == NULL))
6342 return (error);
6343
6344 switch (adapter->dmac) {
6345 case 0:
6346 /* Disabling */
6347 break;
6348 case 1: /* Just enable and use default */
6349 adapter->dmac = 1000;
6350 break;
6351 case 250:
6352 case 500:
6353 case 1000:
6354 case 2000:
6355 case 3000:
6356 case 4000:
6357 case 5000:
6358 case 6000:
6359 case 7000:
6360 case 8000:
6361 case 9000:
6362 case 10000:
6363 /* Legal values - allow */
6364 break;
6365 default:
6366 /* Do nothing, illegal value */
6367 adapter->dmac = 0;
6368 return (EINVAL);
6369 }
6370 /* Reinit the interface */
6371 igb_init(adapter);
6372 return (error);
6373}
6374
6375/*
6376** Manage Energy Efficient Ethernet:
6377** Control values:
6378** 0/1 - enabled/disabled
6379*/
6380static int
6381igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6382{
6383 struct adapter *adapter = (struct adapter *) arg1;
6384 int error, value;
6385
6386 value = adapter->hw.dev_spec._82575.eee_disable;
6387 error = sysctl_handle_int(oidp, &value, 0, req);
6388 if (error || req->newptr == NULL)
6389 return (error);
6390 IGB_CORE_LOCK(adapter);
6391 adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6392 igb_init_locked(adapter);
6393 IGB_CORE_UNLOCK(adapter);
6394 return (0);
6395}