Deleted Added
sdiff udiff text old ( 235256 ) new ( 238148 )
full compact
1/******************************************************************************
2
3 Copyright (c) 2001-2011, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_em.c 238148 2012-07-05 20:26:57Z jfv $*/
34
35#ifdef HAVE_KERNEL_OPTION_HEADERS
36#include "opt_device_polling.h"
37#include "opt_inet.h"
38#include "opt_inet6.h"
39#endif
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#if __FreeBSD_version >= 800000
44#include <sys/buf_ring.h>
45#endif
46#include <sys/bus.h>
47#include <sys/endian.h>
48#include <sys/kernel.h>
49#include <sys/kthread.h>
50#include <sys/malloc.h>
51#include <sys/mbuf.h>
52#include <sys/module.h>
53#include <sys/rman.h>
54#include <sys/socket.h>
55#include <sys/sockio.h>
56#include <sys/sysctl.h>
57#include <sys/taskqueue.h>
58#include <sys/eventhandler.h>
59#include <machine/bus.h>
60#include <machine/resource.h>
61
62#include <net/bpf.h>
63#include <net/ethernet.h>
64#include <net/if.h>
65#include <net/if_arp.h>
66#include <net/if_dl.h>
67#include <net/if_media.h>
68
69#include <net/if_types.h>
70#include <net/if_vlan_var.h>
71
72#include <netinet/in_systm.h>
73#include <netinet/in.h>
74#include <netinet/if_ether.h>
75#include <netinet/ip.h>
76#include <netinet/ip6.h>
77#include <netinet/tcp.h>
78#include <netinet/udp.h>
79
80#include <machine/in_cksum.h>
81#include <dev/led/led.h>
82#include <dev/pci/pcivar.h>
83#include <dev/pci/pcireg.h>
84
85#include "e1000_api.h"
86#include "e1000_82571.h"
87#include "if_em.h"
88
89/*********************************************************************
90 * Set this to one to display debug statistics
91 *********************************************************************/
92int em_display_debug_stats = 0;
93
94/*********************************************************************
95 * Driver version:
96 *********************************************************************/
97char em_driver_version[] = "7.3.2";
98
99/*********************************************************************
100 * PCI Device ID Table
101 *
102 * Used by probe to select devices to load on
103 * Last field stores an index into e1000_strings
104 * Last entry must be all 0s
105 *
106 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
107 *********************************************************************/
108
109static em_vendor_info_t em_vendor_info_array[] =
110{
111 /* Intel(R) PRO/1000 Network Connection */
112 { 0x8086, E1000_DEV_ID_82571EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
113 { 0x8086, E1000_DEV_ID_82571EB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
114 { 0x8086, E1000_DEV_ID_82571EB_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
115 { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
116 PCI_ANY_ID, PCI_ANY_ID, 0},
117 { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
118 PCI_ANY_ID, PCI_ANY_ID, 0},
119 { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
120 PCI_ANY_ID, PCI_ANY_ID, 0},
121 { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
122 PCI_ANY_ID, PCI_ANY_ID, 0},
123 { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
124 PCI_ANY_ID, PCI_ANY_ID, 0},
125 { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
126 PCI_ANY_ID, PCI_ANY_ID, 0},
127 { 0x8086, E1000_DEV_ID_82572EI_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
128 { 0x8086, E1000_DEV_ID_82572EI_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
129 { 0x8086, E1000_DEV_ID_82572EI_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
130 { 0x8086, E1000_DEV_ID_82572EI, PCI_ANY_ID, PCI_ANY_ID, 0},
131
132 { 0x8086, E1000_DEV_ID_82573E, PCI_ANY_ID, PCI_ANY_ID, 0},
133 { 0x8086, E1000_DEV_ID_82573E_IAMT, PCI_ANY_ID, PCI_ANY_ID, 0},
134 { 0x8086, E1000_DEV_ID_82573L, PCI_ANY_ID, PCI_ANY_ID, 0},
135 { 0x8086, E1000_DEV_ID_82583V, PCI_ANY_ID, PCI_ANY_ID, 0},
136 { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
137 PCI_ANY_ID, PCI_ANY_ID, 0},
138 { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
139 PCI_ANY_ID, PCI_ANY_ID, 0},
140 { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
141 PCI_ANY_ID, PCI_ANY_ID, 0},
142 { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
143 PCI_ANY_ID, PCI_ANY_ID, 0},
144 { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
145 { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
146 { 0x8086, E1000_DEV_ID_ICH8_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0},
147 { 0x8086, E1000_DEV_ID_ICH8_IFE, PCI_ANY_ID, PCI_ANY_ID, 0},
148 { 0x8086, E1000_DEV_ID_ICH8_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0},
149 { 0x8086, E1000_DEV_ID_ICH8_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0},
150 { 0x8086, E1000_DEV_ID_ICH8_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0},
151 { 0x8086, E1000_DEV_ID_ICH8_82567V_3, PCI_ANY_ID, PCI_ANY_ID, 0},
152 { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
153 { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
154 { 0x8086, E1000_DEV_ID_ICH9_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0},
155 { 0x8086, E1000_DEV_ID_ICH9_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0},
156 { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V, PCI_ANY_ID, PCI_ANY_ID, 0},
157 { 0x8086, E1000_DEV_ID_ICH9_IFE, PCI_ANY_ID, PCI_ANY_ID, 0},
158 { 0x8086, E1000_DEV_ID_ICH9_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0},
159 { 0x8086, E1000_DEV_ID_ICH9_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0},
160 { 0x8086, E1000_DEV_ID_ICH9_BM, PCI_ANY_ID, PCI_ANY_ID, 0},
161 { 0x8086, E1000_DEV_ID_82574L, PCI_ANY_ID, PCI_ANY_ID, 0},
162 { 0x8086, E1000_DEV_ID_82574LA, PCI_ANY_ID, PCI_ANY_ID, 0},
163 { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
164 { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0},
165 { 0x8086, E1000_DEV_ID_ICH10_R_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0},
166 { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
167 { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0},
168 { 0x8086, E1000_DEV_ID_ICH10_D_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0},
169 { 0x8086, E1000_DEV_ID_PCH_M_HV_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
170 { 0x8086, E1000_DEV_ID_PCH_M_HV_LC, PCI_ANY_ID, PCI_ANY_ID, 0},
171 { 0x8086, E1000_DEV_ID_PCH_D_HV_DM, PCI_ANY_ID, PCI_ANY_ID, 0},
172 { 0x8086, E1000_DEV_ID_PCH_D_HV_DC, PCI_ANY_ID, PCI_ANY_ID, 0},
173 { 0x8086, E1000_DEV_ID_PCH2_LV_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
174 { 0x8086, E1000_DEV_ID_PCH2_LV_V, PCI_ANY_ID, PCI_ANY_ID, 0},
175 /* required last entry */
176 { 0, 0, 0, 0, 0}
177};
178
179/*********************************************************************
180 * Table of branding strings for all supported NICs.
181 *********************************************************************/
182
183static char *em_strings[] = {
184 "Intel(R) PRO/1000 Network Connection"
185};
186
187/*********************************************************************
188 * Function prototypes
189 *********************************************************************/
190static int em_probe(device_t);
191static int em_attach(device_t);
192static int em_detach(device_t);
193static int em_shutdown(device_t);
194static int em_suspend(device_t);
195static int em_resume(device_t);
196#ifdef EM_MULTIQUEUE
197static int em_mq_start(struct ifnet *, struct mbuf *);
198static int em_mq_start_locked(struct ifnet *,
199 struct tx_ring *, struct mbuf *);
200static void em_qflush(struct ifnet *);
201#else
202static void em_start(struct ifnet *);
203static void em_start_locked(struct ifnet *, struct tx_ring *);
204#endif
205static int em_ioctl(struct ifnet *, u_long, caddr_t);
206static void em_init(void *);
207static void em_init_locked(struct adapter *);
208static void em_stop(void *);
209static void em_media_status(struct ifnet *, struct ifmediareq *);
210static int em_media_change(struct ifnet *);
211static void em_identify_hardware(struct adapter *);
212static int em_allocate_pci_resources(struct adapter *);
213static int em_allocate_legacy(struct adapter *);
214static int em_allocate_msix(struct adapter *);
215static int em_allocate_queues(struct adapter *);
216static int em_setup_msix(struct adapter *);
217static void em_free_pci_resources(struct adapter *);
218static void em_local_timer(void *);
219static void em_reset(struct adapter *);
220static int em_setup_interface(device_t, struct adapter *);
221
222static void em_setup_transmit_structures(struct adapter *);
223static void em_initialize_transmit_unit(struct adapter *);
224static int em_allocate_transmit_buffers(struct tx_ring *);
225static void em_free_transmit_structures(struct adapter *);
226static void em_free_transmit_buffers(struct tx_ring *);
227
228static int em_setup_receive_structures(struct adapter *);
229static int em_allocate_receive_buffers(struct rx_ring *);
230static void em_initialize_receive_unit(struct adapter *);
231static void em_free_receive_structures(struct adapter *);
232static void em_free_receive_buffers(struct rx_ring *);
233
234static void em_enable_intr(struct adapter *);
235static void em_disable_intr(struct adapter *);
236static void em_update_stats_counters(struct adapter *);
237static void em_add_hw_stats(struct adapter *adapter);
238static void em_txeof(struct tx_ring *);
239static bool em_rxeof(struct rx_ring *, int, int *);
240#ifndef __NO_STRICT_ALIGNMENT
241static int em_fixup_rx(struct rx_ring *);
242#endif
243static void em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
244static void em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
245 struct ip *, u32 *, u32 *);
246static void em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
247 struct tcphdr *, u32 *, u32 *);
248static void em_set_promisc(struct adapter *);
249static void em_disable_promisc(struct adapter *);
250static void em_set_multi(struct adapter *);
251static void em_update_link_status(struct adapter *);
252static void em_refresh_mbufs(struct rx_ring *, int);
253static void em_register_vlan(void *, struct ifnet *, u16);
254static void em_unregister_vlan(void *, struct ifnet *, u16);
255static void em_setup_vlan_hw_support(struct adapter *);
256static int em_xmit(struct tx_ring *, struct mbuf **);
257static int em_dma_malloc(struct adapter *, bus_size_t,
258 struct em_dma_alloc *, int);
259static void em_dma_free(struct adapter *, struct em_dma_alloc *);
260static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
261static void em_print_nvm_info(struct adapter *);
262static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
263static void em_print_debug_info(struct adapter *);
264static int em_is_valid_ether_addr(u8 *);
265static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
266static void em_add_int_delay_sysctl(struct adapter *, const char *,
267 const char *, struct em_int_delay_info *, int, int);
268/* Management and WOL Support */
269static void em_init_manageability(struct adapter *);
270static void em_release_manageability(struct adapter *);
271static void em_get_hw_control(struct adapter *);
272static void em_release_hw_control(struct adapter *);
273static void em_get_wakeup(device_t);
274static void em_enable_wakeup(device_t);
275static int em_enable_phy_wakeup(struct adapter *);
276static void em_led_func(void *, int);
277static void em_disable_aspm(struct adapter *);
278
279static int em_irq_fast(void *);
280
281/* MSIX handlers */
282static void em_msix_tx(void *);
283static void em_msix_rx(void *);
284static void em_msix_link(void *);
285static void em_handle_tx(void *context, int pending);
286static void em_handle_rx(void *context, int pending);
287static void em_handle_link(void *context, int pending);
288
289static void em_set_sysctl_value(struct adapter *, const char *,
290 const char *, int *, int);
291static int em_set_flowcntl(SYSCTL_HANDLER_ARGS);
292
293static __inline void em_rx_discard(struct rx_ring *, int);
294
295#ifdef DEVICE_POLLING
296static poll_handler_t em_poll;
297#endif /* POLLING */
298
299/*********************************************************************
300 * FreeBSD Device Interface Entry Points
301 *********************************************************************/
302
303static device_method_t em_methods[] = {
304 /* Device interface */
305 DEVMETHOD(device_probe, em_probe),
306 DEVMETHOD(device_attach, em_attach),
307 DEVMETHOD(device_detach, em_detach),
308 DEVMETHOD(device_shutdown, em_shutdown),
309 DEVMETHOD(device_suspend, em_suspend),
310 DEVMETHOD(device_resume, em_resume),
311 {0, 0}
312};
313
314static driver_t em_driver = {
315 "em", em_methods, sizeof(struct adapter),
316};
317
318devclass_t em_devclass;
319DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
320MODULE_DEPEND(em, pci, 1, 1, 1);
321MODULE_DEPEND(em, ether, 1, 1, 1);
322
323/*********************************************************************
324 * Tunable default values.
325 *********************************************************************/
326
327#define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000)
328#define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024)
329#define M_TSO_LEN 66
330
331/* Allow common code without TSO */
332#ifndef CSUM_TSO
333#define CSUM_TSO 0
334#endif
335
336static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
337
338static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
339static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
340TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
341TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
342SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
343 0, "Default transmit interrupt delay in usecs");
344SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
345 0, "Default receive interrupt delay in usecs");
346
347static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
348static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
349TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
350TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
351SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
352 &em_tx_abs_int_delay_dflt, 0,
353 "Default transmit interrupt delay limit in usecs");
354SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
355 &em_rx_abs_int_delay_dflt, 0,
356 "Default receive interrupt delay limit in usecs");
357
358static int em_rxd = EM_DEFAULT_RXD;
359static int em_txd = EM_DEFAULT_TXD;
360TUNABLE_INT("hw.em.rxd", &em_rxd);
361TUNABLE_INT("hw.em.txd", &em_txd);
362SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
363 "Number of receive descriptors per queue");
364SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
365 "Number of transmit descriptors per queue");
366
367static int em_smart_pwr_down = FALSE;
368TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
369SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
370 0, "Set to true to leave smart power down enabled on newer adapters");
371
372/* Controls whether promiscuous also shows bad packets */
373static int em_debug_sbp = FALSE;
374TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
375SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
376 "Show bad packets in promiscuous mode");
377
378static int em_enable_msix = TRUE;
379TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
380SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
381 "Enable MSI-X interrupts");
382
383/* How many packets rxeof tries to clean at a time */
384static int em_rx_process_limit = 100;
385TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
386SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
387 &em_rx_process_limit, 0,
388 "Maximum number of received packets to process "
389 "at a time, -1 means unlimited");
390
391/* Energy efficient ethernet - default to OFF */
392static int eee_setting = 0;
393TUNABLE_INT("hw.em.eee_setting", &eee_setting);
394SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
395 "Enable Energy Efficient Ethernet");
396
397/* Global used in WOL setup with multiport cards */
398static int global_quad_port_a = 0;
399
400#ifdef DEV_NETMAP /* see ixgbe.c for details */
401#include <dev/netmap/if_em_netmap.h>
402#endif /* DEV_NETMAP */
403
404/*********************************************************************
405 * Device identification routine
406 *
407 * em_probe determines if the driver should be loaded on
408 * adapter based on PCI vendor/device id of the adapter.
409 *
410 * return BUS_PROBE_DEFAULT on success, positive on failure
411 *********************************************************************/
412
413static int
414em_probe(device_t dev)
415{
416 char adapter_name[60];
417 u16 pci_vendor_id = 0;
418 u16 pci_device_id = 0;
419 u16 pci_subvendor_id = 0;
420 u16 pci_subdevice_id = 0;
421 em_vendor_info_t *ent;
422
423 INIT_DEBUGOUT("em_probe: begin");
424
425 pci_vendor_id = pci_get_vendor(dev);
426 if (pci_vendor_id != EM_VENDOR_ID)
427 return (ENXIO);
428
429 pci_device_id = pci_get_device(dev);
430 pci_subvendor_id = pci_get_subvendor(dev);
431 pci_subdevice_id = pci_get_subdevice(dev);
432
433 ent = em_vendor_info_array;
434 while (ent->vendor_id != 0) {
435 if ((pci_vendor_id == ent->vendor_id) &&
436 (pci_device_id == ent->device_id) &&
437
438 ((pci_subvendor_id == ent->subvendor_id) ||
439 (ent->subvendor_id == PCI_ANY_ID)) &&
440
441 ((pci_subdevice_id == ent->subdevice_id) ||
442 (ent->subdevice_id == PCI_ANY_ID))) {
443 sprintf(adapter_name, "%s %s",
444 em_strings[ent->index],
445 em_driver_version);
446 device_set_desc_copy(dev, adapter_name);
447 return (BUS_PROBE_DEFAULT);
448 }
449 ent++;
450 }
451
452 return (ENXIO);
453}
454
455/*********************************************************************
456 * Device initialization routine
457 *
458 * The attach entry point is called when the driver is being loaded.
459 * This routine identifies the type of hardware, allocates all resources
460 * and initializes the hardware.
461 *
462 * return 0 on success, positive on failure
463 *********************************************************************/
464
465static int
466em_attach(device_t dev)
467{
468 struct adapter *adapter;
469 struct e1000_hw *hw;
470 int error = 0;
471
472 INIT_DEBUGOUT("em_attach: begin");
473
474 if (resource_disabled("em", device_get_unit(dev))) {
475 device_printf(dev, "Disabled by device hint\n");
476 return (ENXIO);
477 }
478
479 adapter = device_get_softc(dev);
480 adapter->dev = adapter->osdep.dev = dev;
481 hw = &adapter->hw;
482 EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
483
484 /* SYSCTL stuff */
485 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
486 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
487 OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
488 em_sysctl_nvm_info, "I", "NVM Information");
489
490 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
491 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
492 OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
493 em_sysctl_debug_info, "I", "Debug Information");
494
495 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
496 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
497 OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
498 em_set_flowcntl, "I", "Flow Control");
499
500 callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
501
502 /* Determine hardware and mac info */
503 em_identify_hardware(adapter);
504
505 /* Setup PCI resources */
506 if (em_allocate_pci_resources(adapter)) {
507 device_printf(dev, "Allocation of PCI resources failed\n");
508 error = ENXIO;
509 goto err_pci;
510 }
511
512 /*
513 ** For ICH8 and family we need to
514 ** map the flash memory, and this
515 ** must happen after the MAC is
516 ** identified
517 */
518 if ((hw->mac.type == e1000_ich8lan) ||
519 (hw->mac.type == e1000_ich9lan) ||
520 (hw->mac.type == e1000_ich10lan) ||
521 (hw->mac.type == e1000_pchlan) ||
522 (hw->mac.type == e1000_pch2lan)) {
523 int rid = EM_BAR_TYPE_FLASH;
524 adapter->flash = bus_alloc_resource_any(dev,
525 SYS_RES_MEMORY, &rid, RF_ACTIVE);
526 if (adapter->flash == NULL) {
527 device_printf(dev, "Mapping of Flash failed\n");
528 error = ENXIO;
529 goto err_pci;
530 }
531 /* This is used in the shared code */
532 hw->flash_address = (u8 *)adapter->flash;
533 adapter->osdep.flash_bus_space_tag =
534 rman_get_bustag(adapter->flash);
535 adapter->osdep.flash_bus_space_handle =
536 rman_get_bushandle(adapter->flash);
537 }
538
539 /* Do Shared Code initialization */
540 if (e1000_setup_init_funcs(hw, TRUE)) {
541 device_printf(dev, "Setup of Shared code failed\n");
542 error = ENXIO;
543 goto err_pci;
544 }
545
546 e1000_get_bus_info(hw);
547
548 /* Set up some sysctls for the tunable interrupt delays */
549 em_add_int_delay_sysctl(adapter, "rx_int_delay",
550 "receive interrupt delay in usecs", &adapter->rx_int_delay,
551 E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
552 em_add_int_delay_sysctl(adapter, "tx_int_delay",
553 "transmit interrupt delay in usecs", &adapter->tx_int_delay,
554 E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
555 em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
556 "receive interrupt delay limit in usecs",
557 &adapter->rx_abs_int_delay,
558 E1000_REGISTER(hw, E1000_RADV),
559 em_rx_abs_int_delay_dflt);
560 em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
561 "transmit interrupt delay limit in usecs",
562 &adapter->tx_abs_int_delay,
563 E1000_REGISTER(hw, E1000_TADV),
564 em_tx_abs_int_delay_dflt);
565
566 /* Sysctl for limiting the amount of work done in the taskqueue */
567 em_set_sysctl_value(adapter, "rx_processing_limit",
568 "max number of rx packets to process", &adapter->rx_process_limit,
569 em_rx_process_limit);
570
571 /*
572 * Validate number of transmit and receive descriptors. It
573 * must not exceed hardware maximum, and must be multiple
574 * of E1000_DBA_ALIGN.
575 */
576 if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
577 (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
578 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
579 EM_DEFAULT_TXD, em_txd);
580 adapter->num_tx_desc = EM_DEFAULT_TXD;
581 } else
582 adapter->num_tx_desc = em_txd;
583
584 if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
585 (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
586 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
587 EM_DEFAULT_RXD, em_rxd);
588 adapter->num_rx_desc = EM_DEFAULT_RXD;
589 } else
590 adapter->num_rx_desc = em_rxd;
591
592 hw->mac.autoneg = DO_AUTO_NEG;
593 hw->phy.autoneg_wait_to_complete = FALSE;
594 hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
595
596 /* Copper options */
597 if (hw->phy.media_type == e1000_media_type_copper) {
598 hw->phy.mdix = AUTO_ALL_MODES;
599 hw->phy.disable_polarity_correction = FALSE;
600 hw->phy.ms_type = EM_MASTER_SLAVE;
601 }
602
603 /*
604 * Set the frame limits assuming
605 * standard ethernet sized frames.
606 */
607 adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
608 adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
609
610 /*
611 * This controls when hardware reports transmit completion
612 * status.
613 */
614 hw->mac.report_tx_early = 1;
615
616 /*
617 ** Get queue/ring memory
618 */
619 if (em_allocate_queues(adapter)) {
620 error = ENOMEM;
621 goto err_pci;
622 }
623
624 /* Allocate multicast array memory. */
625 adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
626 MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
627 if (adapter->mta == NULL) {
628 device_printf(dev, "Can not allocate multicast setup array\n");
629 error = ENOMEM;
630 goto err_late;
631 }
632
633 /* Check SOL/IDER usage */
634 if (e1000_check_reset_block(hw))
635 device_printf(dev, "PHY reset is blocked"
636 " due to SOL/IDER session.\n");
637
638 /* Sysctl for setting Energy Efficient Ethernet */
639 em_set_sysctl_value(adapter, "eee_control",
640 "enable Energy Efficient Ethernet",
641 (int *)&hw->dev_spec.ich8lan.eee_disable, eee_setting);
642
643 /*
644 ** Start from a known state, this is
645 ** important in reading the nvm and
646 ** mac from that.
647 */
648 e1000_reset_hw(hw);
649
650
651 /* Make sure we have a good EEPROM before we read from it */
652 if (e1000_validate_nvm_checksum(hw) < 0) {
653 /*
654 ** Some PCI-E parts fail the first check due to
655 ** the link being in sleep state, call it again,
656 ** if it fails a second time its a real issue.
657 */
658 if (e1000_validate_nvm_checksum(hw) < 0) {
659 device_printf(dev,
660 "The EEPROM Checksum Is Not Valid\n");
661 error = EIO;
662 goto err_late;
663 }
664 }
665
666 /* Copy the permanent MAC address out of the EEPROM */
667 if (e1000_read_mac_addr(hw) < 0) {
668 device_printf(dev, "EEPROM read error while reading MAC"
669 " address\n");
670 error = EIO;
671 goto err_late;
672 }
673
674 if (!em_is_valid_ether_addr(hw->mac.addr)) {
675 device_printf(dev, "Invalid MAC address\n");
676 error = EIO;
677 goto err_late;
678 }
679
680 /*
681 ** Do interrupt configuration
682 */
683 if (adapter->msix > 1) /* Do MSIX */
684 error = em_allocate_msix(adapter);
685 else /* MSI or Legacy */
686 error = em_allocate_legacy(adapter);
687 if (error)
688 goto err_late;
689
690 /*
691 * Get Wake-on-Lan and Management info for later use
692 */
693 em_get_wakeup(dev);
694
695 /* Setup OS specific network interface */
696 if (em_setup_interface(dev, adapter) != 0)
697 goto err_late;
698
699 em_reset(adapter);
700
701 /* Initialize statistics */
702 em_update_stats_counters(adapter);
703
704 hw->mac.get_link_status = 1;
705 em_update_link_status(adapter);
706
707 /* Register for VLAN events */
708 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
709 em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
710 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
711 em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
712
713 em_add_hw_stats(adapter);
714
715 /* Non-AMT based hardware can now take control from firmware */
716 if (adapter->has_manage && !adapter->has_amt)
717 em_get_hw_control(adapter);
718
719 /* Tell the stack that the interface is not active */
720 adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
721 adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
722
723 adapter->led_dev = led_create(em_led_func, adapter,
724 device_get_nameunit(dev));
725#ifdef DEV_NETMAP
726 em_netmap_attach(adapter);
727#endif /* DEV_NETMAP */
728
729 INIT_DEBUGOUT("em_attach: end");
730
731 return (0);
732
733err_late:
734 em_free_transmit_structures(adapter);
735 em_free_receive_structures(adapter);
736 em_release_hw_control(adapter);
737 if (adapter->ifp != NULL)
738 if_free(adapter->ifp);
739err_pci:
740 em_free_pci_resources(adapter);
741 free(adapter->mta, M_DEVBUF);
742 EM_CORE_LOCK_DESTROY(adapter);
743
744 return (error);
745}
746
747/*********************************************************************
748 * Device removal routine
749 *
750 * The detach entry point is called when the driver is being removed.
751 * This routine stops the adapter and deallocates all the resources
752 * that were allocated for driver operation.
753 *
754 * return 0 on success, positive on failure
755 *********************************************************************/
756
757static int
758em_detach(device_t dev)
759{
760 struct adapter *adapter = device_get_softc(dev);
761 struct ifnet *ifp = adapter->ifp;
762
763 INIT_DEBUGOUT("em_detach: begin");
764
765 /* Make sure VLANS are not using driver */
766 if (adapter->ifp->if_vlantrunk != NULL) {
767 device_printf(dev,"Vlan in use, detach first\n");
768 return (EBUSY);
769 }
770
771#ifdef DEVICE_POLLING
772 if (ifp->if_capenable & IFCAP_POLLING)
773 ether_poll_deregister(ifp);
774#endif
775
776 if (adapter->led_dev != NULL)
777 led_destroy(adapter->led_dev);
778
779 EM_CORE_LOCK(adapter);
780 adapter->in_detach = 1;
781 em_stop(adapter);
782 EM_CORE_UNLOCK(adapter);
783 EM_CORE_LOCK_DESTROY(adapter);
784
785 e1000_phy_hw_reset(&adapter->hw);
786
787 em_release_manageability(adapter);
788 em_release_hw_control(adapter);
789
790 /* Unregister VLAN events */
791 if (adapter->vlan_attach != NULL)
792 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
793 if (adapter->vlan_detach != NULL)
794 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
795
796 ether_ifdetach(adapter->ifp);
797 callout_drain(&adapter->timer);
798
799#ifdef DEV_NETMAP
800 netmap_detach(ifp);
801#endif /* DEV_NETMAP */
802
803 em_free_pci_resources(adapter);
804 bus_generic_detach(dev);
805 if_free(ifp);
806
807 em_free_transmit_structures(adapter);
808 em_free_receive_structures(adapter);
809
810 em_release_hw_control(adapter);
811 free(adapter->mta, M_DEVBUF);
812
813 return (0);
814}
815
816/*********************************************************************
817 *
818 * Shutdown entry point
819 *
820 **********************************************************************/
821
822static int
823em_shutdown(device_t dev)
824{
825 return em_suspend(dev);
826}
827
828/*
829 * Suspend/resume device methods.
830 */
831static int
832em_suspend(device_t dev)
833{
834 struct adapter *adapter = device_get_softc(dev);
835
836 EM_CORE_LOCK(adapter);
837
838 em_release_manageability(adapter);
839 em_release_hw_control(adapter);
840 em_enable_wakeup(dev);
841
842 EM_CORE_UNLOCK(adapter);
843
844 return bus_generic_suspend(dev);
845}
846
847static int
848em_resume(device_t dev)
849{
850 struct adapter *adapter = device_get_softc(dev);
851 struct tx_ring *txr = adapter->tx_rings;
852 struct ifnet *ifp = adapter->ifp;
853
854 EM_CORE_LOCK(adapter);
855 if (adapter->hw.mac.type == e1000_pch2lan)
856 e1000_resume_workarounds_pchlan(&adapter->hw);
857 em_init_locked(adapter);
858 em_init_manageability(adapter);
859
860 if ((ifp->if_flags & IFF_UP) &&
861 (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
862 for (int i = 0; i < adapter->num_queues; i++, txr++) {
863 EM_TX_LOCK(txr);
864#ifdef EM_MULTIQUEUE
865 if (!drbr_empty(ifp, txr->br))
866 em_mq_start_locked(ifp, txr, NULL);
867#else
868 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
869 em_start_locked(ifp, txr);
870#endif
871 EM_TX_UNLOCK(txr);
872 }
873 }
874 EM_CORE_UNLOCK(adapter);
875
876 return bus_generic_resume(dev);
877}
878
879
880#ifdef EM_MULTIQUEUE
881/*********************************************************************
882 * Multiqueue Transmit routines
883 *
884 * em_mq_start is called by the stack to initiate a transmit.
885 * however, if busy the driver can queue the request rather
886 * than do an immediate send. It is this that is an advantage
887 * in this driver, rather than also having multiple tx queues.
888 **********************************************************************/
889static int
890em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
891{
892 struct adapter *adapter = txr->adapter;
893 struct mbuf *next;
894 int err = 0, enq = 0;
895
896 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
897 IFF_DRV_RUNNING || adapter->link_active == 0) {
898 if (m != NULL)
899 err = drbr_enqueue(ifp, txr->br, m);
900 return (err);
901 }
902
903 enq = 0;
904 if (m == NULL) {
905 next = drbr_dequeue(ifp, txr->br);
906 } else if (drbr_needs_enqueue(ifp, txr->br)) {
907 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
908 return (err);
909 next = drbr_dequeue(ifp, txr->br);
910 } else
911 next = m;
912
913 /* Process the queue */
914 while (next != NULL) {
915 if ((err = em_xmit(txr, &next)) != 0) {
916 if (next != NULL)
917 err = drbr_enqueue(ifp, txr->br, next);
918 break;
919 }
920 enq++;
921 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
922 ETHER_BPF_MTAP(ifp, next);
923 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
924 break;
925 next = drbr_dequeue(ifp, txr->br);
926 }
927
928 if (enq > 0) {
929 /* Set the watchdog */
930 txr->queue_status = EM_QUEUE_WORKING;
931 txr->watchdog_time = ticks;
932 }
933
934 if (txr->tx_avail < EM_MAX_SCATTER)
935 em_txeof(txr);
936 if (txr->tx_avail < EM_MAX_SCATTER)
937 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
938 return (err);
939}
940
941/*
942** Multiqueue capable stack interface
943*/
944static int
945em_mq_start(struct ifnet *ifp, struct mbuf *m)
946{
947 struct adapter *adapter = ifp->if_softc;
948 struct tx_ring *txr = adapter->tx_rings;
949 int error;
950
951 if (EM_TX_TRYLOCK(txr)) {
952 error = em_mq_start_locked(ifp, txr, m);
953 EM_TX_UNLOCK(txr);
954 } else
955 error = drbr_enqueue(ifp, txr->br, m);
956
957 return (error);
958}
959
960/*
961** Flush all ring buffers
962*/
963static void
964em_qflush(struct ifnet *ifp)
965{
966 struct adapter *adapter = ifp->if_softc;
967 struct tx_ring *txr = adapter->tx_rings;
968 struct mbuf *m;
969
970 for (int i = 0; i < adapter->num_queues; i++, txr++) {
971 EM_TX_LOCK(txr);
972 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
973 m_freem(m);
974 EM_TX_UNLOCK(txr);
975 }
976 if_qflush(ifp);
977}
978#else /* !EM_MULTIQUEUE */
979
980static void
981em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
982{
983 struct adapter *adapter = ifp->if_softc;
984 struct mbuf *m_head;
985
986 EM_TX_LOCK_ASSERT(txr);
987
988 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
989 IFF_DRV_RUNNING)
990 return;
991
992 if (!adapter->link_active)
993 return;
994
995 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
996 /* Call cleanup if number of TX descriptors low */
997 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
998 em_txeof(txr);
999 if (txr->tx_avail < EM_MAX_SCATTER) {
1000 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1001 break;
1002 }
1003 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1004 if (m_head == NULL)
1005 break;
1006 /*
1007 * Encapsulation can modify our pointer, and or make it
1008 * NULL on failure. In that event, we can't requeue.
1009 */
1010 if (em_xmit(txr, &m_head)) {
1011 if (m_head == NULL)
1012 break;
1013 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1014 break;
1015 }
1016
1017 /* Send a copy of the frame to the BPF listener */
1018 ETHER_BPF_MTAP(ifp, m_head);
1019
1020 /* Set timeout in case hardware has problems transmitting. */
1021 txr->watchdog_time = ticks;
1022 txr->queue_status = EM_QUEUE_WORKING;
1023 }
1024
1025 return;
1026}
1027
1028static void
1029em_start(struct ifnet *ifp)
1030{
1031 struct adapter *adapter = ifp->if_softc;
1032 struct tx_ring *txr = adapter->tx_rings;
1033
1034 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1035 EM_TX_LOCK(txr);
1036 em_start_locked(ifp, txr);
1037 EM_TX_UNLOCK(txr);
1038 }
1039 return;
1040}
1041#endif /* EM_MULTIQUEUE */
1042
1043/*********************************************************************
1044 * Ioctl entry point
1045 *
1046 * em_ioctl is called when the user wants to configure the
1047 * interface.
1048 *
1049 * return 0 on success, positive on failure
1050 **********************************************************************/
1051
1052static int
1053em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1054{
1055 struct adapter *adapter = ifp->if_softc;
1056 struct ifreq *ifr = (struct ifreq *)data;
1057#if defined(INET) || defined(INET6)
1058 struct ifaddr *ifa = (struct ifaddr *)data;
1059#endif
1060 bool avoid_reset = FALSE;
1061 int error = 0;
1062
1063 if (adapter->in_detach)
1064 return (error);
1065
1066 switch (command) {
1067 case SIOCSIFADDR:
1068#ifdef INET
1069 if (ifa->ifa_addr->sa_family == AF_INET)
1070 avoid_reset = TRUE;
1071#endif
1072#ifdef INET6
1073 if (ifa->ifa_addr->sa_family == AF_INET6)
1074 avoid_reset = TRUE;
1075#endif
1076 /*
1077 ** Calling init results in link renegotiation,
1078 ** so we avoid doing it when possible.
1079 */
1080 if (avoid_reset) {
1081 ifp->if_flags |= IFF_UP;
1082 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1083 em_init(adapter);
1084#ifdef INET
1085 if (!(ifp->if_flags & IFF_NOARP))
1086 arp_ifinit(ifp, ifa);
1087#endif
1088 } else
1089 error = ether_ioctl(ifp, command, data);
1090 break;
1091 case SIOCSIFMTU:
1092 {
1093 int max_frame_size;
1094
1095 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1096
1097 EM_CORE_LOCK(adapter);
1098 switch (adapter->hw.mac.type) {
1099 case e1000_82571:
1100 case e1000_82572:
1101 case e1000_ich9lan:
1102 case e1000_ich10lan:
1103 case e1000_pch2lan:
1104 case e1000_82574:
1105 case e1000_82583:
1106 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1107 max_frame_size = 9234;
1108 break;
1109 case e1000_pchlan:
1110 max_frame_size = 4096;
1111 break;
1112 /* Adapters that do not support jumbo frames */
1113 case e1000_ich8lan:
1114 max_frame_size = ETHER_MAX_LEN;
1115 break;
1116 default:
1117 max_frame_size = MAX_JUMBO_FRAME_SIZE;
1118 }
1119 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1120 ETHER_CRC_LEN) {
1121 EM_CORE_UNLOCK(adapter);
1122 error = EINVAL;
1123 break;
1124 }
1125
1126 ifp->if_mtu = ifr->ifr_mtu;
1127 adapter->max_frame_size =
1128 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1129 em_init_locked(adapter);
1130 EM_CORE_UNLOCK(adapter);
1131 break;
1132 }
1133 case SIOCSIFFLAGS:
1134 IOCTL_DEBUGOUT("ioctl rcv'd:\
1135 SIOCSIFFLAGS (Set Interface Flags)");
1136 EM_CORE_LOCK(adapter);
1137 if (ifp->if_flags & IFF_UP) {
1138 if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1139 if ((ifp->if_flags ^ adapter->if_flags) &
1140 (IFF_PROMISC | IFF_ALLMULTI)) {
1141 em_disable_promisc(adapter);
1142 em_set_promisc(adapter);
1143 }
1144 } else
1145 em_init_locked(adapter);
1146 } else
1147 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1148 em_stop(adapter);
1149 adapter->if_flags = ifp->if_flags;
1150 EM_CORE_UNLOCK(adapter);
1151 break;
1152 case SIOCADDMULTI:
1153 case SIOCDELMULTI:
1154 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1155 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1156 EM_CORE_LOCK(adapter);
1157 em_disable_intr(adapter);
1158 em_set_multi(adapter);
1159#ifdef DEVICE_POLLING
1160 if (!(ifp->if_capenable & IFCAP_POLLING))
1161#endif
1162 em_enable_intr(adapter);
1163 EM_CORE_UNLOCK(adapter);
1164 }
1165 break;
1166 case SIOCSIFMEDIA:
1167 /* Check SOL/IDER usage */
1168 EM_CORE_LOCK(adapter);
1169 if (e1000_check_reset_block(&adapter->hw)) {
1170 EM_CORE_UNLOCK(adapter);
1171 device_printf(adapter->dev, "Media change is"
1172 " blocked due to SOL/IDER session.\n");
1173 break;
1174 }
1175 EM_CORE_UNLOCK(adapter);
1176 /* falls thru */
1177 case SIOCGIFMEDIA:
1178 IOCTL_DEBUGOUT("ioctl rcv'd: \
1179 SIOCxIFMEDIA (Get/Set Interface Media)");
1180 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1181 break;
1182 case SIOCSIFCAP:
1183 {
1184 int mask, reinit;
1185
1186 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1187 reinit = 0;
1188 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1189#ifdef DEVICE_POLLING
1190 if (mask & IFCAP_POLLING) {
1191 if (ifr->ifr_reqcap & IFCAP_POLLING) {
1192 error = ether_poll_register(em_poll, ifp);
1193 if (error)
1194 return (error);
1195 EM_CORE_LOCK(adapter);
1196 em_disable_intr(adapter);
1197 ifp->if_capenable |= IFCAP_POLLING;
1198 EM_CORE_UNLOCK(adapter);
1199 } else {
1200 error = ether_poll_deregister(ifp);
1201 /* Enable interrupt even in error case */
1202 EM_CORE_LOCK(adapter);
1203 em_enable_intr(adapter);
1204 ifp->if_capenable &= ~IFCAP_POLLING;
1205 EM_CORE_UNLOCK(adapter);
1206 }
1207 }
1208#endif
1209 if (mask & IFCAP_HWCSUM) {
1210 ifp->if_capenable ^= IFCAP_HWCSUM;
1211 reinit = 1;
1212 }
1213 if (mask & IFCAP_TSO4) {
1214 ifp->if_capenable ^= IFCAP_TSO4;
1215 reinit = 1;
1216 }
1217 if (mask & IFCAP_VLAN_HWTAGGING) {
1218 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1219 reinit = 1;
1220 }
1221 if (mask & IFCAP_VLAN_HWFILTER) {
1222 ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1223 reinit = 1;
1224 }
1225 if (mask & IFCAP_VLAN_HWTSO) {
1226 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1227 reinit = 1;
1228 }
1229 if ((mask & IFCAP_WOL) &&
1230 (ifp->if_capabilities & IFCAP_WOL) != 0) {
1231 if (mask & IFCAP_WOL_MCAST)
1232 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1233 if (mask & IFCAP_WOL_MAGIC)
1234 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1235 }
1236 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1237 em_init(adapter);
1238 VLAN_CAPABILITIES(ifp);
1239 break;
1240 }
1241
1242 default:
1243 error = ether_ioctl(ifp, command, data);
1244 break;
1245 }
1246
1247 return (error);
1248}
1249
1250
1251/*********************************************************************
1252 * Init entry point
1253 *
1254 * This routine is used in two ways. It is used by the stack as
1255 * init entry point in network interface structure. It is also used
1256 * by the driver as a hw/sw initialization routine to get to a
1257 * consistent state.
1258 *
1259 * return 0 on success, positive on failure
1260 **********************************************************************/
1261
1262static void
1263em_init_locked(struct adapter *adapter)
1264{
1265 struct ifnet *ifp = adapter->ifp;
1266 device_t dev = adapter->dev;
1267
1268 INIT_DEBUGOUT("em_init: begin");
1269
1270 EM_CORE_LOCK_ASSERT(adapter);
1271
1272 em_disable_intr(adapter);
1273 callout_stop(&adapter->timer);
1274
1275 /* Get the latest mac address, User can use a LAA */
1276 bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1277 ETHER_ADDR_LEN);
1278
1279 /* Put the address into the Receive Address Array */
1280 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1281
1282 /*
1283 * With the 82571 adapter, RAR[0] may be overwritten
1284 * when the other port is reset, we make a duplicate
1285 * in RAR[14] for that eventuality, this assures
1286 * the interface continues to function.
1287 */
1288 if (adapter->hw.mac.type == e1000_82571) {
1289 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1290 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1291 E1000_RAR_ENTRIES - 1);
1292 }
1293
1294 /* Initialize the hardware */
1295 em_reset(adapter);
1296 em_update_link_status(adapter);
1297
1298 /* Setup VLAN support, basic and offload if available */
1299 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1300
1301 /* Set hardware offload abilities */
1302 ifp->if_hwassist = 0;
1303 if (ifp->if_capenable & IFCAP_TXCSUM)
1304 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1305 if (ifp->if_capenable & IFCAP_TSO4)
1306 ifp->if_hwassist |= CSUM_TSO;
1307
1308 /* Configure for OS presence */
1309 em_init_manageability(adapter);
1310
1311 /* Prepare transmit descriptors and buffers */
1312 em_setup_transmit_structures(adapter);
1313 em_initialize_transmit_unit(adapter);
1314
1315 /* Setup Multicast table */
1316 em_set_multi(adapter);
1317
1318 /*
1319 ** Figure out the desired mbuf
1320 ** pool for doing jumbos
1321 */
1322 if (adapter->max_frame_size <= 2048)
1323 adapter->rx_mbuf_sz = MCLBYTES;
1324 else if (adapter->max_frame_size <= 4096)
1325 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1326 else
1327 adapter->rx_mbuf_sz = MJUM9BYTES;
1328
1329 /* Prepare receive descriptors and buffers */
1330 if (em_setup_receive_structures(adapter)) {
1331 device_printf(dev, "Could not setup receive structures\n");
1332 em_stop(adapter);
1333 return;
1334 }
1335 em_initialize_receive_unit(adapter);
1336
1337 /* Use real VLAN Filter support? */
1338 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1339 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1340 /* Use real VLAN Filter support */
1341 em_setup_vlan_hw_support(adapter);
1342 else {
1343 u32 ctrl;
1344 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1345 ctrl |= E1000_CTRL_VME;
1346 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1347 }
1348 }
1349
1350 /* Don't lose promiscuous settings */
1351 em_set_promisc(adapter);
1352
1353 /* Set the interface as ACTIVE */
1354 ifp->if_drv_flags |= IFF_DRV_RUNNING;
1355 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1356
1357 callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1358 e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1359
1360 /* MSI/X configuration for 82574 */
1361 if (adapter->hw.mac.type == e1000_82574) {
1362 int tmp;
1363 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1364 tmp |= E1000_CTRL_EXT_PBA_CLR;
1365 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1366 /* Set the IVAR - interrupt vector routing. */
1367 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1368 }
1369
1370#ifdef DEVICE_POLLING
1371 /*
1372 * Only enable interrupts if we are not polling, make sure
1373 * they are off otherwise.
1374 */
1375 if (ifp->if_capenable & IFCAP_POLLING)
1376 em_disable_intr(adapter);
1377 else
1378#endif /* DEVICE_POLLING */
1379 em_enable_intr(adapter);
1380
1381 /* AMT based hardware can now take control from firmware */
1382 if (adapter->has_manage && adapter->has_amt)
1383 em_get_hw_control(adapter);
1384}
1385
1386static void
1387em_init(void *arg)
1388{
1389 struct adapter *adapter = arg;
1390
1391 EM_CORE_LOCK(adapter);
1392 em_init_locked(adapter);
1393 EM_CORE_UNLOCK(adapter);
1394}
1395
1396
1397#ifdef DEVICE_POLLING
1398/*********************************************************************
1399 *
1400 * Legacy polling routine: note this only works with single queue
1401 *
1402 *********************************************************************/
1403static int
1404em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1405{
1406 struct adapter *adapter = ifp->if_softc;
1407 struct tx_ring *txr = adapter->tx_rings;
1408 struct rx_ring *rxr = adapter->rx_rings;
1409 u32 reg_icr;
1410 int rx_done;
1411
1412 EM_CORE_LOCK(adapter);
1413 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1414 EM_CORE_UNLOCK(adapter);
1415 return (0);
1416 }
1417
1418 if (cmd == POLL_AND_CHECK_STATUS) {
1419 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1420 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1421 callout_stop(&adapter->timer);
1422 adapter->hw.mac.get_link_status = 1;
1423 em_update_link_status(adapter);
1424 callout_reset(&adapter->timer, hz,
1425 em_local_timer, adapter);
1426 }
1427 }
1428 EM_CORE_UNLOCK(adapter);
1429
1430 em_rxeof(rxr, count, &rx_done);
1431
1432 EM_TX_LOCK(txr);
1433 em_txeof(txr);
1434#ifdef EM_MULTIQUEUE
1435 if (!drbr_empty(ifp, txr->br))
1436 em_mq_start_locked(ifp, txr, NULL);
1437#else
1438 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1439 em_start_locked(ifp, txr);
1440#endif
1441 EM_TX_UNLOCK(txr);
1442
1443 return (rx_done);
1444}
1445#endif /* DEVICE_POLLING */
1446
1447
1448/*********************************************************************
1449 *
1450 * Fast Legacy/MSI Combined Interrupt Service routine
1451 *
1452 *********************************************************************/
1453static int
1454em_irq_fast(void *arg)
1455{
1456 struct adapter *adapter = arg;
1457 struct ifnet *ifp;
1458 u32 reg_icr;
1459
1460 ifp = adapter->ifp;
1461
1462 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1463
1464 /* Hot eject? */
1465 if (reg_icr == 0xffffffff)
1466 return FILTER_STRAY;
1467
1468 /* Definitely not our interrupt. */
1469 if (reg_icr == 0x0)
1470 return FILTER_STRAY;
1471
1472 /*
1473 * Starting with the 82571 chip, bit 31 should be used to
1474 * determine whether the interrupt belongs to us.
1475 */
1476 if (adapter->hw.mac.type >= e1000_82571 &&
1477 (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1478 return FILTER_STRAY;
1479
1480 em_disable_intr(adapter);
1481 taskqueue_enqueue(adapter->tq, &adapter->que_task);
1482
1483 /* Link status change */
1484 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1485 adapter->hw.mac.get_link_status = 1;
1486 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1487 }
1488
1489 if (reg_icr & E1000_ICR_RXO)
1490 adapter->rx_overruns++;
1491 return FILTER_HANDLED;
1492}
1493
1494/* Combined RX/TX handler, used by Legacy and MSI */
1495static void
1496em_handle_que(void *context, int pending)
1497{
1498 struct adapter *adapter = context;
1499 struct ifnet *ifp = adapter->ifp;
1500 struct tx_ring *txr = adapter->tx_rings;
1501 struct rx_ring *rxr = adapter->rx_rings;
1502
1503
1504 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1505 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1506 EM_TX_LOCK(txr);
1507 em_txeof(txr);
1508#ifdef EM_MULTIQUEUE
1509 if (!drbr_empty(ifp, txr->br))
1510 em_mq_start_locked(ifp, txr, NULL);
1511#else
1512 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1513 em_start_locked(ifp, txr);
1514#endif
1515 EM_TX_UNLOCK(txr);
1516 if (more) {
1517 taskqueue_enqueue(adapter->tq, &adapter->que_task);
1518 return;
1519 }
1520 }
1521
1522 em_enable_intr(adapter);
1523 return;
1524}
1525
1526
1527/*********************************************************************
1528 *
1529 * MSIX Interrupt Service Routines
1530 *
1531 **********************************************************************/
1532static void
1533em_msix_tx(void *arg)
1534{
1535 struct tx_ring *txr = arg;
1536 struct adapter *adapter = txr->adapter;
1537 struct ifnet *ifp = adapter->ifp;
1538
1539 ++txr->tx_irq;
1540 EM_TX_LOCK(txr);
1541 em_txeof(txr);
1542#ifdef EM_MULTIQUEUE
1543 if (!drbr_empty(ifp, txr->br))
1544 em_mq_start_locked(ifp, txr, NULL);
1545#else
1546 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1547 em_start_locked(ifp, txr);
1548#endif
1549 /* Reenable this interrupt */
1550 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1551 EM_TX_UNLOCK(txr);
1552 return;
1553}
1554
1555/*********************************************************************
1556 *
1557 * MSIX RX Interrupt Service routine
1558 *
1559 **********************************************************************/
1560
1561static void
1562em_msix_rx(void *arg)
1563{
1564 struct rx_ring *rxr = arg;
1565 struct adapter *adapter = rxr->adapter;
1566 bool more;
1567
1568 ++rxr->rx_irq;
1569 more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1570 if (more)
1571 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1572 else
1573 /* Reenable this interrupt */
1574 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1575 return;
1576}
1577
1578/*********************************************************************
1579 *
1580 * MSIX Link Fast Interrupt Service routine
1581 *
1582 **********************************************************************/
1583static void
1584em_msix_link(void *arg)
1585{
1586 struct adapter *adapter = arg;
1587 u32 reg_icr;
1588
1589 ++adapter->link_irq;
1590 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1591
1592 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1593 adapter->hw.mac.get_link_status = 1;
1594 em_handle_link(adapter, 0);
1595 } else
1596 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1597 EM_MSIX_LINK | E1000_IMS_LSC);
1598 return;
1599}
1600
1601static void
1602em_handle_rx(void *context, int pending)
1603{
1604 struct rx_ring *rxr = context;
1605 struct adapter *adapter = rxr->adapter;
1606 bool more;
1607
1608 more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1609 if (more)
1610 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1611 else
1612 /* Reenable this interrupt */
1613 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1614}
1615
1616static void
1617em_handle_tx(void *context, int pending)
1618{
1619 struct tx_ring *txr = context;
1620 struct adapter *adapter = txr->adapter;
1621 struct ifnet *ifp = adapter->ifp;
1622
1623 EM_TX_LOCK(txr);
1624 em_txeof(txr);
1625#ifdef EM_MULTIQUEUE
1626 if (!drbr_empty(ifp, txr->br))
1627 em_mq_start_locked(ifp, txr, NULL);
1628#else
1629 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1630 em_start_locked(ifp, txr);
1631#endif
1632 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1633 EM_TX_UNLOCK(txr);
1634}
1635
1636static void
1637em_handle_link(void *context, int pending)
1638{
1639 struct adapter *adapter = context;
1640 struct tx_ring *txr = adapter->tx_rings;
1641 struct ifnet *ifp = adapter->ifp;
1642
1643 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1644 return;
1645
1646 EM_CORE_LOCK(adapter);
1647 callout_stop(&adapter->timer);
1648 em_update_link_status(adapter);
1649 callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1650 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1651 EM_MSIX_LINK | E1000_IMS_LSC);
1652 if (adapter->link_active) {
1653 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1654 EM_TX_LOCK(txr);
1655#ifdef EM_MULTIQUEUE
1656 if (!drbr_empty(ifp, txr->br))
1657 em_mq_start_locked(ifp, txr, NULL);
1658#else
1659 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1660 em_start_locked(ifp, txr);
1661#endif
1662 EM_TX_UNLOCK(txr);
1663 }
1664 }
1665 EM_CORE_UNLOCK(adapter);
1666}
1667
1668
1669/*********************************************************************
1670 *
1671 * Media Ioctl callback
1672 *
1673 * This routine is called whenever the user queries the status of
1674 * the interface using ifconfig.
1675 *
1676 **********************************************************************/
1677static void
1678em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1679{
1680 struct adapter *adapter = ifp->if_softc;
1681 u_char fiber_type = IFM_1000_SX;
1682
1683 INIT_DEBUGOUT("em_media_status: begin");
1684
1685 EM_CORE_LOCK(adapter);
1686 em_update_link_status(adapter);
1687
1688 ifmr->ifm_status = IFM_AVALID;
1689 ifmr->ifm_active = IFM_ETHER;
1690
1691 if (!adapter->link_active) {
1692 EM_CORE_UNLOCK(adapter);
1693 return;
1694 }
1695
1696 ifmr->ifm_status |= IFM_ACTIVE;
1697
1698 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1699 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1700 ifmr->ifm_active |= fiber_type | IFM_FDX;
1701 } else {
1702 switch (adapter->link_speed) {
1703 case 10:
1704 ifmr->ifm_active |= IFM_10_T;
1705 break;
1706 case 100:
1707 ifmr->ifm_active |= IFM_100_TX;
1708 break;
1709 case 1000:
1710 ifmr->ifm_active |= IFM_1000_T;
1711 break;
1712 }
1713 if (adapter->link_duplex == FULL_DUPLEX)
1714 ifmr->ifm_active |= IFM_FDX;
1715 else
1716 ifmr->ifm_active |= IFM_HDX;
1717 }
1718 EM_CORE_UNLOCK(adapter);
1719}
1720
1721/*********************************************************************
1722 *
1723 * Media Ioctl callback
1724 *
1725 * This routine is called when the user changes speed/duplex using
1726 * media/mediopt option with ifconfig.
1727 *
1728 **********************************************************************/
1729static int
1730em_media_change(struct ifnet *ifp)
1731{
1732 struct adapter *adapter = ifp->if_softc;
1733 struct ifmedia *ifm = &adapter->media;
1734
1735 INIT_DEBUGOUT("em_media_change: begin");
1736
1737 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1738 return (EINVAL);
1739
1740 EM_CORE_LOCK(adapter);
1741 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1742 case IFM_AUTO:
1743 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1744 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1745 break;
1746 case IFM_1000_LX:
1747 case IFM_1000_SX:
1748 case IFM_1000_T:
1749 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1750 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1751 break;
1752 case IFM_100_TX:
1753 adapter->hw.mac.autoneg = FALSE;
1754 adapter->hw.phy.autoneg_advertised = 0;
1755 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1756 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1757 else
1758 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1759 break;
1760 case IFM_10_T:
1761 adapter->hw.mac.autoneg = FALSE;
1762 adapter->hw.phy.autoneg_advertised = 0;
1763 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1764 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1765 else
1766 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1767 break;
1768 default:
1769 device_printf(adapter->dev, "Unsupported media type\n");
1770 }
1771
1772 em_init_locked(adapter);
1773 EM_CORE_UNLOCK(adapter);
1774
1775 return (0);
1776}
1777
1778/*********************************************************************
1779 *
1780 * This routine maps the mbufs to tx descriptors.
1781 *
1782 * return 0 on success, positive on failure
1783 **********************************************************************/
1784
1785static int
1786em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1787{
1788 struct adapter *adapter = txr->adapter;
1789 bus_dma_segment_t segs[EM_MAX_SCATTER];
1790 bus_dmamap_t map;
1791 struct em_buffer *tx_buffer, *tx_buffer_mapped;
1792 struct e1000_tx_desc *ctxd = NULL;
1793 struct mbuf *m_head;
1794 struct ether_header *eh;
1795 struct ip *ip = NULL;
1796 struct tcphdr *tp = NULL;
1797 u32 txd_upper, txd_lower, txd_used, txd_saved;
1798 int ip_off, poff;
1799 int nsegs, i, j, first, last = 0;
1800 int error, do_tso, tso_desc = 0, remap = 1;
1801
1802retry:
1803 m_head = *m_headp;
1804 txd_upper = txd_lower = txd_used = txd_saved = 0;
1805 do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1806 ip_off = poff = 0;
1807
1808 /*
1809 * Intel recommends entire IP/TCP header length reside in a single
1810 * buffer. If multiple descriptors are used to describe the IP and
1811 * TCP header, each descriptor should describe one or more
1812 * complete headers; descriptors referencing only parts of headers
1813 * are not supported. If all layer headers are not coalesced into
1814 * a single buffer, each buffer should not cross a 4KB boundary,
1815 * or be larger than the maximum read request size.
1816 * Controller also requires modifing IP/TCP header to make TSO work
1817 * so we firstly get a writable mbuf chain then coalesce ethernet/
1818 * IP/TCP header into a single buffer to meet the requirement of
1819 * controller. This also simplifies IP/TCP/UDP checksum offloading
1820 * which also has similiar restrictions.
1821 */
1822 if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1823 if (do_tso || (m_head->m_next != NULL &&
1824 m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1825 if (M_WRITABLE(*m_headp) == 0) {
1826 m_head = m_dup(*m_headp, M_DONTWAIT);
1827 m_freem(*m_headp);
1828 if (m_head == NULL) {
1829 *m_headp = NULL;
1830 return (ENOBUFS);
1831 }
1832 *m_headp = m_head;
1833 }
1834 }
1835 /*
1836 * XXX
1837 * Assume IPv4, we don't have TSO/checksum offload support
1838 * for IPv6 yet.
1839 */
1840 ip_off = sizeof(struct ether_header);
1841 m_head = m_pullup(m_head, ip_off);
1842 if (m_head == NULL) {
1843 *m_headp = NULL;
1844 return (ENOBUFS);
1845 }
1846 eh = mtod(m_head, struct ether_header *);
1847 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1848 ip_off = sizeof(struct ether_vlan_header);
1849 m_head = m_pullup(m_head, ip_off);
1850 if (m_head == NULL) {
1851 *m_headp = NULL;
1852 return (ENOBUFS);
1853 }
1854 }
1855 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1856 if (m_head == NULL) {
1857 *m_headp = NULL;
1858 return (ENOBUFS);
1859 }
1860 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1861 poff = ip_off + (ip->ip_hl << 2);
1862 if (do_tso) {
1863 m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1864 if (m_head == NULL) {
1865 *m_headp = NULL;
1866 return (ENOBUFS);
1867 }
1868 tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1869 /*
1870 * TSO workaround:
1871 * pull 4 more bytes of data into it.
1872 */
1873 m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1874 if (m_head == NULL) {
1875 *m_headp = NULL;
1876 return (ENOBUFS);
1877 }
1878 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1879 ip->ip_len = 0;
1880 ip->ip_sum = 0;
1881 /*
1882 * The pseudo TCP checksum does not include TCP payload
1883 * length so driver should recompute the checksum here
1884 * what hardware expect to see. This is adherence of
1885 * Microsoft's Large Send specification.
1886 */
1887 tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1888 tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1889 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1890 } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1891 m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1892 if (m_head == NULL) {
1893 *m_headp = NULL;
1894 return (ENOBUFS);
1895 }
1896 tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1897 m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1898 if (m_head == NULL) {
1899 *m_headp = NULL;
1900 return (ENOBUFS);
1901 }
1902 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1903 tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1904 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1905 m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1906 if (m_head == NULL) {
1907 *m_headp = NULL;
1908 return (ENOBUFS);
1909 }
1910 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1911 }
1912 *m_headp = m_head;
1913 }
1914
1915 /*
1916 * Map the packet for DMA
1917 *
1918 * Capture the first descriptor index,
1919 * this descriptor will have the index
1920 * of the EOP which is the only one that
1921 * now gets a DONE bit writeback.
1922 */
1923 first = txr->next_avail_desc;
1924 tx_buffer = &txr->tx_buffers[first];
1925 tx_buffer_mapped = tx_buffer;
1926 map = tx_buffer->map;
1927
1928 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1929 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1930
1931 /*
1932 * There are two types of errors we can (try) to handle:
1933 * - EFBIG means the mbuf chain was too long and bus_dma ran
1934 * out of segments. Defragment the mbuf chain and try again.
1935 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1936 * at this point in time. Defer sending and try again later.
1937 * All other errors, in particular EINVAL, are fatal and prevent the
1938 * mbuf chain from ever going through. Drop it and report error.
1939 */
1940 if (error == EFBIG && remap) {
1941 struct mbuf *m;
1942
1943 m = m_defrag(*m_headp, M_DONTWAIT);
1944 if (m == NULL) {
1945 adapter->mbuf_alloc_failed++;
1946 m_freem(*m_headp);
1947 *m_headp = NULL;
1948 return (ENOBUFS);
1949 }
1950 *m_headp = m;
1951
1952 /* Try it again, but only once */
1953 remap = 0;
1954 goto retry;
1955 } else if (error == ENOMEM) {
1956 adapter->no_tx_dma_setup++;
1957 return (error);
1958 } else if (error != 0) {
1959 adapter->no_tx_dma_setup++;
1960 m_freem(*m_headp);
1961 *m_headp = NULL;
1962 return (error);
1963 }
1964
1965 /*
1966 * TSO Hardware workaround, if this packet is not
1967 * TSO, and is only a single descriptor long, and
1968 * it follows a TSO burst, then we need to add a
1969 * sentinel descriptor to prevent premature writeback.
1970 */
1971 if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
1972 if (nsegs == 1)
1973 tso_desc = TRUE;
1974 txr->tx_tso = FALSE;
1975 }
1976
1977 if (nsegs > (txr->tx_avail - 2)) {
1978 txr->no_desc_avail++;
1979 bus_dmamap_unload(txr->txtag, map);
1980 return (ENOBUFS);
1981 }
1982 m_head = *m_headp;
1983
1984 /* Do hardware assists */
1985 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1986 em_tso_setup(txr, m_head, ip_off, ip, tp,
1987 &txd_upper, &txd_lower);
1988 /* we need to make a final sentinel transmit desc */
1989 tso_desc = TRUE;
1990 } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1991 em_transmit_checksum_setup(txr, m_head,
1992 ip_off, ip, &txd_upper, &txd_lower);
1993
1994 if (m_head->m_flags & M_VLANTAG) {
1995 /* Set the vlan id. */
1996 txd_upper |=
1997 (htole16(m_head->m_pkthdr.ether_vtag) << 16);
1998 /* Tell hardware to add tag */
1999 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2000 }
2001
2002 i = txr->next_avail_desc;
2003
2004 /* Set up our transmit descriptors */
2005 for (j = 0; j < nsegs; j++) {
2006 bus_size_t seg_len;
2007 bus_addr_t seg_addr;
2008
2009 tx_buffer = &txr->tx_buffers[i];
2010 ctxd = &txr->tx_base[i];
2011 seg_addr = segs[j].ds_addr;
2012 seg_len = segs[j].ds_len;
2013 /*
2014 ** TSO Workaround:
2015 ** If this is the last descriptor, we want to
2016 ** split it so we have a small final sentinel
2017 */
2018 if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2019 seg_len -= 4;
2020 ctxd->buffer_addr = htole64(seg_addr);
2021 ctxd->lower.data = htole32(
2022 adapter->txd_cmd | txd_lower | seg_len);
2023 ctxd->upper.data =
2024 htole32(txd_upper);
2025 if (++i == adapter->num_tx_desc)
2026 i = 0;
2027 /* Now make the sentinel */
2028 ++txd_used; /* using an extra txd */
2029 ctxd = &txr->tx_base[i];
2030 tx_buffer = &txr->tx_buffers[i];
2031 ctxd->buffer_addr =
2032 htole64(seg_addr + seg_len);
2033 ctxd->lower.data = htole32(
2034 adapter->txd_cmd | txd_lower | 4);
2035 ctxd->upper.data =
2036 htole32(txd_upper);
2037 last = i;
2038 if (++i == adapter->num_tx_desc)
2039 i = 0;
2040 } else {
2041 ctxd->buffer_addr = htole64(seg_addr);
2042 ctxd->lower.data = htole32(
2043 adapter->txd_cmd | txd_lower | seg_len);
2044 ctxd->upper.data =
2045 htole32(txd_upper);
2046 last = i;
2047 if (++i == adapter->num_tx_desc)
2048 i = 0;
2049 }
2050 tx_buffer->m_head = NULL;
2051 tx_buffer->next_eop = -1;
2052 }
2053
2054 txr->next_avail_desc = i;
2055 txr->tx_avail -= nsegs;
2056 if (tso_desc) /* TSO used an extra for sentinel */
2057 txr->tx_avail -= txd_used;
2058
2059 tx_buffer->m_head = m_head;
2060 /*
2061 ** Here we swap the map so the last descriptor,
2062 ** which gets the completion interrupt has the
2063 ** real map, and the first descriptor gets the
2064 ** unused map from this descriptor.
2065 */
2066 tx_buffer_mapped->map = tx_buffer->map;
2067 tx_buffer->map = map;
2068 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2069
2070 /*
2071 * Last Descriptor of Packet
2072 * needs End Of Packet (EOP)
2073 * and Report Status (RS)
2074 */
2075 ctxd->lower.data |=
2076 htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2077 /*
2078 * Keep track in the first buffer which
2079 * descriptor will be written back
2080 */
2081 tx_buffer = &txr->tx_buffers[first];
2082 tx_buffer->next_eop = last;
2083 /* Update the watchdog time early and often */
2084 txr->watchdog_time = ticks;
2085
2086 /*
2087 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2088 * that this frame is available to transmit.
2089 */
2090 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2091 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2092 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2093
2094 return (0);
2095}
2096
2097static void
2098em_set_promisc(struct adapter *adapter)
2099{
2100 struct ifnet *ifp = adapter->ifp;
2101 u32 reg_rctl;
2102
2103 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2104
2105 if (ifp->if_flags & IFF_PROMISC) {
2106 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2107 /* Turn this on if you want to see bad packets */
2108 if (em_debug_sbp)
2109 reg_rctl |= E1000_RCTL_SBP;
2110 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2111 } else if (ifp->if_flags & IFF_ALLMULTI) {
2112 reg_rctl |= E1000_RCTL_MPE;
2113 reg_rctl &= ~E1000_RCTL_UPE;
2114 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2115 }
2116}
2117
2118static void
2119em_disable_promisc(struct adapter *adapter)
2120{
2121 u32 reg_rctl;
2122
2123 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2124
2125 reg_rctl &= (~E1000_RCTL_UPE);
2126 reg_rctl &= (~E1000_RCTL_MPE);
2127 reg_rctl &= (~E1000_RCTL_SBP);
2128 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2129}
2130
2131
2132/*********************************************************************
2133 * Multicast Update
2134 *
2135 * This routine is called whenever multicast address list is updated.
2136 *
2137 **********************************************************************/
2138
2139static void
2140em_set_multi(struct adapter *adapter)
2141{
2142 struct ifnet *ifp = adapter->ifp;
2143 struct ifmultiaddr *ifma;
2144 u32 reg_rctl = 0;
2145 u8 *mta; /* Multicast array memory */
2146 int mcnt = 0;
2147
2148 IOCTL_DEBUGOUT("em_set_multi: begin");
2149
2150 mta = adapter->mta;
2151 bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2152
2153 if (adapter->hw.mac.type == e1000_82542 &&
2154 adapter->hw.revision_id == E1000_REVISION_2) {
2155 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2156 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2157 e1000_pci_clear_mwi(&adapter->hw);
2158 reg_rctl |= E1000_RCTL_RST;
2159 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2160 msec_delay(5);
2161 }
2162
2163#if __FreeBSD_version < 800000
2164 IF_ADDR_LOCK(ifp);
2165#else
2166 if_maddr_rlock(ifp);
2167#endif
2168 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2169 if (ifma->ifma_addr->sa_family != AF_LINK)
2170 continue;
2171
2172 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2173 break;
2174
2175 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2176 &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2177 mcnt++;
2178 }
2179#if __FreeBSD_version < 800000
2180 IF_ADDR_UNLOCK(ifp);
2181#else
2182 if_maddr_runlock(ifp);
2183#endif
2184 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2185 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2186 reg_rctl |= E1000_RCTL_MPE;
2187 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2188 } else
2189 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2190
2191 if (adapter->hw.mac.type == e1000_82542 &&
2192 adapter->hw.revision_id == E1000_REVISION_2) {
2193 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2194 reg_rctl &= ~E1000_RCTL_RST;
2195 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2196 msec_delay(5);
2197 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2198 e1000_pci_set_mwi(&adapter->hw);
2199 }
2200}
2201
2202
2203/*********************************************************************
2204 * Timer routine
2205 *
2206 * This routine checks for link status and updates statistics.
2207 *
2208 **********************************************************************/
2209
2210static void
2211em_local_timer(void *arg)
2212{
2213 struct adapter *adapter = arg;
2214 struct ifnet *ifp = adapter->ifp;
2215 struct tx_ring *txr = adapter->tx_rings;
2216 struct rx_ring *rxr = adapter->rx_rings;
2217 u32 trigger;
2218
2219 EM_CORE_LOCK_ASSERT(adapter);
2220
2221 em_update_link_status(adapter);
2222 em_update_stats_counters(adapter);
2223
2224 /* Reset LAA into RAR[0] on 82571 */
2225 if ((adapter->hw.mac.type == e1000_82571) &&
2226 e1000_get_laa_state_82571(&adapter->hw))
2227 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2228
2229 /* Mask to use in the irq trigger */
2230 if (adapter->msix_mem)
2231 trigger = rxr->ims; /* RX for 82574 */
2232 else
2233 trigger = E1000_ICS_RXDMT0;
2234
2235 /*
2236 ** Check on the state of the TX queue(s), this
2237 ** can be done without the lock because its RO
2238 ** and the HUNG state will be static if set.
2239 */
2240 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2241 if ((txr->queue_status == EM_QUEUE_HUNG) &&
2242 (adapter->pause_frames == 0))
2243 goto hung;
2244 /* Schedule a TX tasklet if needed */
2245 if (txr->tx_avail <= EM_MAX_SCATTER)
2246 taskqueue_enqueue(txr->tq, &txr->tx_task);
2247 }
2248
2249 adapter->pause_frames = 0;
2250 callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2251#ifndef DEVICE_POLLING
2252 /* Trigger an RX interrupt to guarantee mbuf refresh */
2253 E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2254#endif
2255 return;
2256hung:
2257 /* Looks like we're hung */
2258 device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2259 device_printf(adapter->dev,
2260 "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2261 E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2262 E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2263 device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2264 "Next TX to Clean = %d\n",
2265 txr->me, txr->tx_avail, txr->next_to_clean);
2266 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2267 adapter->watchdog_events++;
2268 adapter->pause_frames = 0;
2269 em_init_locked(adapter);
2270}
2271
2272
2273static void
2274em_update_link_status(struct adapter *adapter)
2275{
2276 struct e1000_hw *hw = &adapter->hw;
2277 struct ifnet *ifp = adapter->ifp;
2278 device_t dev = adapter->dev;
2279 struct tx_ring *txr = adapter->tx_rings;
2280 u32 link_check = 0;
2281
2282 /* Get the cached link value or read phy for real */
2283 switch (hw->phy.media_type) {
2284 case e1000_media_type_copper:
2285 if (hw->mac.get_link_status) {
2286 /* Do the work to read phy */
2287 e1000_check_for_link(hw);
2288 link_check = !hw->mac.get_link_status;
2289 if (link_check) /* ESB2 fix */
2290 e1000_cfg_on_link_up(hw);
2291 } else
2292 link_check = TRUE;
2293 break;
2294 case e1000_media_type_fiber:
2295 e1000_check_for_link(hw);
2296 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2297 E1000_STATUS_LU);
2298 break;
2299 case e1000_media_type_internal_serdes:
2300 e1000_check_for_link(hw);
2301 link_check = adapter->hw.mac.serdes_has_link;
2302 break;
2303 default:
2304 case e1000_media_type_unknown:
2305 break;
2306 }
2307
2308 /* Now check for a transition */
2309 if (link_check && (adapter->link_active == 0)) {
2310 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2311 &adapter->link_duplex);
2312 /* Check if we must disable SPEED_MODE bit on PCI-E */
2313 if ((adapter->link_speed != SPEED_1000) &&
2314 ((hw->mac.type == e1000_82571) ||
2315 (hw->mac.type == e1000_82572))) {
2316 int tarc0;
2317 tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2318 tarc0 &= ~SPEED_MODE_BIT;
2319 E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2320 }
2321 if (bootverbose)
2322 device_printf(dev, "Link is up %d Mbps %s\n",
2323 adapter->link_speed,
2324 ((adapter->link_duplex == FULL_DUPLEX) ?
2325 "Full Duplex" : "Half Duplex"));
2326 adapter->link_active = 1;
2327 adapter->smartspeed = 0;
2328 ifp->if_baudrate = adapter->link_speed * 1000000;
2329 if_link_state_change(ifp, LINK_STATE_UP);
2330 } else if (!link_check && (adapter->link_active == 1)) {
2331 ifp->if_baudrate = adapter->link_speed = 0;
2332 adapter->link_duplex = 0;
2333 if (bootverbose)
2334 device_printf(dev, "Link is Down\n");
2335 adapter->link_active = 0;
2336 /* Link down, disable watchdog */
2337 for (int i = 0; i < adapter->num_queues; i++, txr++)
2338 txr->queue_status = EM_QUEUE_IDLE;
2339 if_link_state_change(ifp, LINK_STATE_DOWN);
2340 }
2341}
2342
2343/*********************************************************************
2344 *
2345 * This routine disables all traffic on the adapter by issuing a
2346 * global reset on the MAC and deallocates TX/RX buffers.
2347 *
2348 * This routine should always be called with BOTH the CORE
2349 * and TX locks.
2350 **********************************************************************/
2351
2352static void
2353em_stop(void *arg)
2354{
2355 struct adapter *adapter = arg;
2356 struct ifnet *ifp = adapter->ifp;
2357 struct tx_ring *txr = adapter->tx_rings;
2358
2359 EM_CORE_LOCK_ASSERT(adapter);
2360
2361 INIT_DEBUGOUT("em_stop: begin");
2362
2363 em_disable_intr(adapter);
2364 callout_stop(&adapter->timer);
2365
2366 /* Tell the stack that the interface is no longer active */
2367 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2368 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2369
2370 /* Unarm watchdog timer. */
2371 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2372 EM_TX_LOCK(txr);
2373 txr->queue_status = EM_QUEUE_IDLE;
2374 EM_TX_UNLOCK(txr);
2375 }
2376
2377 e1000_reset_hw(&adapter->hw);
2378 E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2379
2380 e1000_led_off(&adapter->hw);
2381 e1000_cleanup_led(&adapter->hw);
2382}
2383
2384
2385/*********************************************************************
2386 *
2387 * Determine hardware revision.
2388 *
2389 **********************************************************************/
2390static void
2391em_identify_hardware(struct adapter *adapter)
2392{
2393 device_t dev = adapter->dev;
2394
2395 /* Make sure our PCI config space has the necessary stuff set */
2396 adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2397 if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2398 (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2399 device_printf(dev, "Memory Access and/or Bus Master bits "
2400 "were not set!\n");
2401 adapter->hw.bus.pci_cmd_word |=
2402 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2403 pci_write_config(dev, PCIR_COMMAND,
2404 adapter->hw.bus.pci_cmd_word, 2);
2405 }
2406
2407 /* Save off the information about this board */
2408 adapter->hw.vendor_id = pci_get_vendor(dev);
2409 adapter->hw.device_id = pci_get_device(dev);
2410 adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2411 adapter->hw.subsystem_vendor_id =
2412 pci_read_config(dev, PCIR_SUBVEND_0, 2);
2413 adapter->hw.subsystem_device_id =
2414 pci_read_config(dev, PCIR_SUBDEV_0, 2);
2415
2416 /* Do Shared Code Init and Setup */
2417 if (e1000_set_mac_type(&adapter->hw)) {
2418 device_printf(dev, "Setup init failure\n");
2419 return;
2420 }
2421}
2422
2423static int
2424em_allocate_pci_resources(struct adapter *adapter)
2425{
2426 device_t dev = adapter->dev;
2427 int rid;
2428
2429 rid = PCIR_BAR(0);
2430 adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2431 &rid, RF_ACTIVE);
2432 if (adapter->memory == NULL) {
2433 device_printf(dev, "Unable to allocate bus resource: memory\n");
2434 return (ENXIO);
2435 }
2436 adapter->osdep.mem_bus_space_tag =
2437 rman_get_bustag(adapter->memory);
2438 adapter->osdep.mem_bus_space_handle =
2439 rman_get_bushandle(adapter->memory);
2440 adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2441
2442 /* Default to a single queue */
2443 adapter->num_queues = 1;
2444
2445 /*
2446 * Setup MSI/X or MSI if PCI Express
2447 */
2448 adapter->msix = em_setup_msix(adapter);
2449
2450 adapter->hw.back = &adapter->osdep;
2451
2452 return (0);
2453}
2454
2455/*********************************************************************
2456 *
2457 * Setup the Legacy or MSI Interrupt handler
2458 *
2459 **********************************************************************/
2460int
2461em_allocate_legacy(struct adapter *adapter)
2462{
2463 device_t dev = adapter->dev;
2464 struct tx_ring *txr = adapter->tx_rings;
2465 int error, rid = 0;
2466
2467 /* Manually turn off all interrupts */
2468 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2469
2470 if (adapter->msix == 1) /* using MSI */
2471 rid = 1;
2472 /* We allocate a single interrupt resource */
2473 adapter->res = bus_alloc_resource_any(dev,
2474 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2475 if (adapter->res == NULL) {
2476 device_printf(dev, "Unable to allocate bus resource: "
2477 "interrupt\n");
2478 return (ENXIO);
2479 }
2480
2481 /*
2482 * Allocate a fast interrupt and the associated
2483 * deferred processing contexts.
2484 */
2485 TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2486 adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2487 taskqueue_thread_enqueue, &adapter->tq);
2488 taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2489 device_get_nameunit(adapter->dev));
2490 /* Use a TX only tasklet for local timer */
2491 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2492 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2493 taskqueue_thread_enqueue, &txr->tq);
2494 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2495 device_get_nameunit(adapter->dev));
2496 TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2497 if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2498 em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2499 device_printf(dev, "Failed to register fast interrupt "
2500 "handler: %d\n", error);
2501 taskqueue_free(adapter->tq);
2502 adapter->tq = NULL;
2503 return (error);
2504 }
2505
2506 return (0);
2507}
2508
2509/*********************************************************************
2510 *
2511 * Setup the MSIX Interrupt handlers
2512 * This is not really Multiqueue, rather
2513 * its just seperate interrupt vectors
2514 * for TX, RX, and Link.
2515 *
2516 **********************************************************************/
2517int
2518em_allocate_msix(struct adapter *adapter)
2519{
2520 device_t dev = adapter->dev;
2521 struct tx_ring *txr = adapter->tx_rings;
2522 struct rx_ring *rxr = adapter->rx_rings;
2523 int error, rid, vector = 0;
2524
2525
2526 /* Make sure all interrupts are disabled */
2527 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2528
2529 /* First set up ring resources */
2530 for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2531
2532 /* RX ring */
2533 rid = vector + 1;
2534
2535 rxr->res = bus_alloc_resource_any(dev,
2536 SYS_RES_IRQ, &rid, RF_ACTIVE);
2537 if (rxr->res == NULL) {
2538 device_printf(dev,
2539 "Unable to allocate bus resource: "
2540 "RX MSIX Interrupt %d\n", i);
2541 return (ENXIO);
2542 }
2543 if ((error = bus_setup_intr(dev, rxr->res,
2544 INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2545 rxr, &rxr->tag)) != 0) {
2546 device_printf(dev, "Failed to register RX handler");
2547 return (error);
2548 }
2549#if __FreeBSD_version >= 800504
2550 bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2551#endif
2552 rxr->msix = vector++; /* NOTE increment vector for TX */
2553 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2554 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2555 taskqueue_thread_enqueue, &rxr->tq);
2556 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2557 device_get_nameunit(adapter->dev));
2558 /*
2559 ** Set the bit to enable interrupt
2560 ** in E1000_IMS -- bits 20 and 21
2561 ** are for RX0 and RX1, note this has
2562 ** NOTHING to do with the MSIX vector
2563 */
2564 rxr->ims = 1 << (20 + i);
2565 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2566
2567 /* TX ring */
2568 rid = vector + 1;
2569 txr->res = bus_alloc_resource_any(dev,
2570 SYS_RES_IRQ, &rid, RF_ACTIVE);
2571 if (txr->res == NULL) {
2572 device_printf(dev,
2573 "Unable to allocate bus resource: "
2574 "TX MSIX Interrupt %d\n", i);
2575 return (ENXIO);
2576 }
2577 if ((error = bus_setup_intr(dev, txr->res,
2578 INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2579 txr, &txr->tag)) != 0) {
2580 device_printf(dev, "Failed to register TX handler");
2581 return (error);
2582 }
2583#if __FreeBSD_version >= 800504
2584 bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2585#endif
2586 txr->msix = vector++; /* Increment vector for next pass */
2587 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2588 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2589 taskqueue_thread_enqueue, &txr->tq);
2590 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2591 device_get_nameunit(adapter->dev));
2592 /*
2593 ** Set the bit to enable interrupt
2594 ** in E1000_IMS -- bits 22 and 23
2595 ** are for TX0 and TX1, note this has
2596 ** NOTHING to do with the MSIX vector
2597 */
2598 txr->ims = 1 << (22 + i);
2599 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2600 }
2601
2602 /* Link interrupt */
2603 ++rid;
2604 adapter->res = bus_alloc_resource_any(dev,
2605 SYS_RES_IRQ, &rid, RF_ACTIVE);
2606 if (!adapter->res) {
2607 device_printf(dev,"Unable to allocate "
2608 "bus resource: Link interrupt [%d]\n", rid);
2609 return (ENXIO);
2610 }
2611 /* Set the link handler function */
2612 error = bus_setup_intr(dev, adapter->res,
2613 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2614 em_msix_link, adapter, &adapter->tag);
2615 if (error) {
2616 adapter->res = NULL;
2617 device_printf(dev, "Failed to register LINK handler");
2618 return (error);
2619 }
2620#if __FreeBSD_version >= 800504
2621 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2622#endif
2623 adapter->linkvec = vector;
2624 adapter->ivars |= (8 | vector) << 16;
2625 adapter->ivars |= 0x80000000;
2626
2627 return (0);
2628}
2629
2630
2631static void
2632em_free_pci_resources(struct adapter *adapter)
2633{
2634 device_t dev = adapter->dev;
2635 struct tx_ring *txr;
2636 struct rx_ring *rxr;
2637 int rid;
2638
2639
2640 /*
2641 ** Release all the queue interrupt resources:
2642 */
2643 for (int i = 0; i < adapter->num_queues; i++) {
2644 txr = &adapter->tx_rings[i];
2645 rxr = &adapter->rx_rings[i];
2646 /* an early abort? */
2647 if ((txr == NULL) || (rxr == NULL))
2648 break;
2649 rid = txr->msix +1;
2650 if (txr->tag != NULL) {
2651 bus_teardown_intr(dev, txr->res, txr->tag);
2652 txr->tag = NULL;
2653 }
2654 if (txr->res != NULL)
2655 bus_release_resource(dev, SYS_RES_IRQ,
2656 rid, txr->res);
2657 rid = rxr->msix +1;
2658 if (rxr->tag != NULL) {
2659 bus_teardown_intr(dev, rxr->res, rxr->tag);
2660 rxr->tag = NULL;
2661 }
2662 if (rxr->res != NULL)
2663 bus_release_resource(dev, SYS_RES_IRQ,
2664 rid, rxr->res);
2665 }
2666
2667 if (adapter->linkvec) /* we are doing MSIX */
2668 rid = adapter->linkvec + 1;
2669 else
2670 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2671
2672 if (adapter->tag != NULL) {
2673 bus_teardown_intr(dev, adapter->res, adapter->tag);
2674 adapter->tag = NULL;
2675 }
2676
2677 if (adapter->res != NULL)
2678 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2679
2680
2681 if (adapter->msix)
2682 pci_release_msi(dev);
2683
2684 if (adapter->msix_mem != NULL)
2685 bus_release_resource(dev, SYS_RES_MEMORY,
2686 PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2687
2688 if (adapter->memory != NULL)
2689 bus_release_resource(dev, SYS_RES_MEMORY,
2690 PCIR_BAR(0), adapter->memory);
2691
2692 if (adapter->flash != NULL)
2693 bus_release_resource(dev, SYS_RES_MEMORY,
2694 EM_FLASH, adapter->flash);
2695}
2696
2697/*
2698 * Setup MSI or MSI/X
2699 */
2700static int
2701em_setup_msix(struct adapter *adapter)
2702{
2703 device_t dev = adapter->dev;
2704 int val = 0;
2705
2706 /*
2707 ** Setup MSI/X for Hartwell: tests have shown
2708 ** use of two queues to be unstable, and to
2709 ** provide no great gain anyway, so we simply
2710 ** seperate the interrupts and use a single queue.
2711 */
2712 if ((adapter->hw.mac.type == e1000_82574) &&
2713 (em_enable_msix == TRUE)) {
2714 /* Map the MSIX BAR */
2715 int rid = PCIR_BAR(EM_MSIX_BAR);
2716 adapter->msix_mem = bus_alloc_resource_any(dev,
2717 SYS_RES_MEMORY, &rid, RF_ACTIVE);
2718 if (!adapter->msix_mem) {
2719 /* May not be enabled */
2720 device_printf(adapter->dev,
2721 "Unable to map MSIX table \n");
2722 goto msi;
2723 }
2724 val = pci_msix_count(dev);
2725 /* We only need 3 vectors */
2726 if (val > 3)
2727 val = 3;
2728 if ((val != 3) && (val != 5)) {
2729 bus_release_resource(dev, SYS_RES_MEMORY,
2730 PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2731 adapter->msix_mem = NULL;
2732 device_printf(adapter->dev,
2733 "MSIX: incorrect vectors, using MSI\n");
2734 goto msi;
2735 }
2736
2737 if (pci_alloc_msix(dev, &val) == 0) {
2738 device_printf(adapter->dev,
2739 "Using MSIX interrupts "
2740 "with %d vectors\n", val);
2741 }
2742
2743 return (val);
2744 }
2745msi:
2746 val = pci_msi_count(dev);
2747 if (val == 1 && pci_alloc_msi(dev, &val) == 0) {
2748 adapter->msix = 1;
2749 device_printf(adapter->dev,"Using an MSI interrupt\n");
2750 return (val);
2751 }
2752 /* Should only happen due to manual configuration */
2753 device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2754 return (0);
2755}
2756
2757
2758/*********************************************************************
2759 *
2760 * Initialize the hardware to a configuration
2761 * as specified by the adapter structure.
2762 *
2763 **********************************************************************/
2764static void
2765em_reset(struct adapter *adapter)
2766{
2767 device_t dev = adapter->dev;
2768 struct ifnet *ifp = adapter->ifp;
2769 struct e1000_hw *hw = &adapter->hw;
2770 u16 rx_buffer_size;
2771 u32 pba;
2772
2773 INIT_DEBUGOUT("em_reset: begin");
2774
2775 /* Set up smart power down as default off on newer adapters. */
2776 if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2777 hw->mac.type == e1000_82572)) {
2778 u16 phy_tmp = 0;
2779
2780 /* Speed up time to link by disabling smart power down. */
2781 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2782 phy_tmp &= ~IGP02E1000_PM_SPD;
2783 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2784 }
2785
2786 /*
2787 * Packet Buffer Allocation (PBA)
2788 * Writing PBA sets the receive portion of the buffer
2789 * the remainder is used for the transmit buffer.
2790 */
2791 switch (hw->mac.type) {
2792 /* Total Packet Buffer on these is 48K */
2793 case e1000_82571:
2794 case e1000_82572:
2795 case e1000_80003es2lan:
2796 pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2797 break;
2798 case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2799 pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2800 break;
2801 case e1000_82574:
2802 case e1000_82583:
2803 pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2804 break;
2805 case e1000_ich8lan:
2806 pba = E1000_PBA_8K;
2807 break;
2808 case e1000_ich9lan:
2809 case e1000_ich10lan:
2810 /* Boost Receive side for jumbo frames */
2811 if (adapter->max_frame_size > 4096)
2812 pba = E1000_PBA_14K;
2813 else
2814 pba = E1000_PBA_10K;
2815 break;
2816 case e1000_pchlan:
2817 case e1000_pch2lan:
2818 pba = E1000_PBA_26K;
2819 break;
2820 default:
2821 if (adapter->max_frame_size > 8192)
2822 pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2823 else
2824 pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2825 }
2826 E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2827
2828 /*
2829 * These parameters control the automatic generation (Tx) and
2830 * response (Rx) to Ethernet PAUSE frames.
2831 * - High water mark should allow for at least two frames to be
2832 * received after sending an XOFF.
2833 * - Low water mark works best when it is very near the high water mark.
2834 * This allows the receiver to restart by sending XON when it has
2835 * drained a bit. Here we use an arbitary value of 1500 which will
2836 * restart after one full frame is pulled from the buffer. There
2837 * could be several smaller frames in the buffer and if so they will
2838 * not trigger the XON until their total number reduces the buffer
2839 * by 1500.
2840 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2841 */
2842 rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2843 hw->fc.high_water = rx_buffer_size -
2844 roundup2(adapter->max_frame_size, 1024);
2845 hw->fc.low_water = hw->fc.high_water - 1500;
2846
2847 if (adapter->fc) /* locally set flow control value? */
2848 hw->fc.requested_mode = adapter->fc;
2849 else
2850 hw->fc.requested_mode = e1000_fc_full;
2851
2852 if (hw->mac.type == e1000_80003es2lan)
2853 hw->fc.pause_time = 0xFFFF;
2854 else
2855 hw->fc.pause_time = EM_FC_PAUSE_TIME;
2856
2857 hw->fc.send_xon = TRUE;
2858
2859 /* Device specific overrides/settings */
2860 switch (hw->mac.type) {
2861 case e1000_pchlan:
2862 /* Workaround: no TX flow ctrl for PCH */
2863 hw->fc.requested_mode = e1000_fc_rx_pause;
2864 hw->fc.pause_time = 0xFFFF; /* override */
2865 if (ifp->if_mtu > ETHERMTU) {
2866 hw->fc.high_water = 0x3500;
2867 hw->fc.low_water = 0x1500;
2868 } else {
2869 hw->fc.high_water = 0x5000;
2870 hw->fc.low_water = 0x3000;
2871 }
2872 hw->fc.refresh_time = 0x1000;
2873 break;
2874 case e1000_pch2lan:
2875 hw->fc.high_water = 0x5C20;
2876 hw->fc.low_water = 0x5048;
2877 hw->fc.pause_time = 0x0650;
2878 hw->fc.refresh_time = 0x0400;
2879 /* Jumbos need adjusted PBA */
2880 if (ifp->if_mtu > ETHERMTU)
2881 E1000_WRITE_REG(hw, E1000_PBA, 12);
2882 else
2883 E1000_WRITE_REG(hw, E1000_PBA, 26);
2884 break;
2885 case e1000_ich9lan:
2886 case e1000_ich10lan:
2887 if (ifp->if_mtu > ETHERMTU) {
2888 hw->fc.high_water = 0x2800;
2889 hw->fc.low_water = hw->fc.high_water - 8;
2890 break;
2891 }
2892 /* else fall thru */
2893 default:
2894 if (hw->mac.type == e1000_80003es2lan)
2895 hw->fc.pause_time = 0xFFFF;
2896 break;
2897 }
2898
2899 /* Issue a global reset */
2900 e1000_reset_hw(hw);
2901 E1000_WRITE_REG(hw, E1000_WUC, 0);
2902 em_disable_aspm(adapter);
2903 /* and a re-init */
2904 if (e1000_init_hw(hw) < 0) {
2905 device_printf(dev, "Hardware Initialization Failed\n");
2906 return;
2907 }
2908
2909 E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2910 e1000_get_phy_info(hw);
2911 e1000_check_for_link(hw);
2912 return;
2913}
2914
2915/*********************************************************************
2916 *
2917 * Setup networking device structure and register an interface.
2918 *
2919 **********************************************************************/
2920static int
2921em_setup_interface(device_t dev, struct adapter *adapter)
2922{
2923 struct ifnet *ifp;
2924
2925 INIT_DEBUGOUT("em_setup_interface: begin");
2926
2927 ifp = adapter->ifp = if_alloc(IFT_ETHER);
2928 if (ifp == NULL) {
2929 device_printf(dev, "can not allocate ifnet structure\n");
2930 return (-1);
2931 }
2932 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2933 ifp->if_init = em_init;
2934 ifp->if_softc = adapter;
2935 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2936 ifp->if_ioctl = em_ioctl;
2937#ifdef EM_MULTIQUEUE
2938 /* Multiqueue stack interface */
2939 ifp->if_transmit = em_mq_start;
2940 ifp->if_qflush = em_qflush;
2941#else
2942 ifp->if_start = em_start;
2943 IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2944 ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2945 IFQ_SET_READY(&ifp->if_snd);
2946#endif
2947
2948 ether_ifattach(ifp, adapter->hw.mac.addr);
2949
2950 ifp->if_capabilities = ifp->if_capenable = 0;
2951
2952
2953 ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2954 ifp->if_capabilities |= IFCAP_TSO4;
2955 /*
2956 * Tell the upper layer(s) we
2957 * support full VLAN capability
2958 */
2959 ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2960 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2961 | IFCAP_VLAN_HWTSO
2962 | IFCAP_VLAN_MTU;
2963 ifp->if_capenable = ifp->if_capabilities;
2964
2965 /*
2966 ** Don't turn this on by default, if vlans are
2967 ** created on another pseudo device (eg. lagg)
2968 ** then vlan events are not passed thru, breaking
2969 ** operation, but with HW FILTER off it works. If
2970 ** using vlans directly on the em driver you can
2971 ** enable this and get full hardware tag filtering.
2972 */
2973 ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2974
2975#ifdef DEVICE_POLLING
2976 ifp->if_capabilities |= IFCAP_POLLING;
2977#endif
2978
2979 /* Enable only WOL MAGIC by default */
2980 if (adapter->wol) {
2981 ifp->if_capabilities |= IFCAP_WOL;
2982 ifp->if_capenable |= IFCAP_WOL_MAGIC;
2983 }
2984
2985 /*
2986 * Specify the media types supported by this adapter and register
2987 * callbacks to update media and link information
2988 */
2989 ifmedia_init(&adapter->media, IFM_IMASK,
2990 em_media_change, em_media_status);
2991 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2992 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2993 u_char fiber_type = IFM_1000_SX; /* default type */
2994
2995 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2996 0, NULL);
2997 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2998 } else {
2999 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3000 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3001 0, NULL);
3002 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3003 0, NULL);
3004 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3005 0, NULL);
3006 if (adapter->hw.phy.type != e1000_phy_ife) {
3007 ifmedia_add(&adapter->media,
3008 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3009 ifmedia_add(&adapter->media,
3010 IFM_ETHER | IFM_1000_T, 0, NULL);
3011 }
3012 }
3013 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3014 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3015 return (0);
3016}
3017
3018
3019/*
3020 * Manage DMA'able memory.
3021 */
3022static void
3023em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3024{
3025 if (error)
3026 return;
3027 *(bus_addr_t *) arg = segs[0].ds_addr;
3028}
3029
3030static int
3031em_dma_malloc(struct adapter *adapter, bus_size_t size,
3032 struct em_dma_alloc *dma, int mapflags)
3033{
3034 int error;
3035
3036 error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3037 EM_DBA_ALIGN, 0, /* alignment, bounds */
3038 BUS_SPACE_MAXADDR, /* lowaddr */
3039 BUS_SPACE_MAXADDR, /* highaddr */
3040 NULL, NULL, /* filter, filterarg */
3041 size, /* maxsize */
3042 1, /* nsegments */
3043 size, /* maxsegsize */
3044 0, /* flags */
3045 NULL, /* lockfunc */
3046 NULL, /* lockarg */
3047 &dma->dma_tag);
3048 if (error) {
3049 device_printf(adapter->dev,
3050 "%s: bus_dma_tag_create failed: %d\n",
3051 __func__, error);
3052 goto fail_0;
3053 }
3054
3055 error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3056 BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3057 if (error) {
3058 device_printf(adapter->dev,
3059 "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3060 __func__, (uintmax_t)size, error);
3061 goto fail_2;
3062 }
3063
3064 dma->dma_paddr = 0;
3065 error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3066 size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3067 if (error || dma->dma_paddr == 0) {
3068 device_printf(adapter->dev,
3069 "%s: bus_dmamap_load failed: %d\n",
3070 __func__, error);
3071 goto fail_3;
3072 }
3073
3074 return (0);
3075
3076fail_3:
3077 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3078fail_2:
3079 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3080 bus_dma_tag_destroy(dma->dma_tag);
3081fail_0:
3082 dma->dma_map = NULL;
3083 dma->dma_tag = NULL;
3084
3085 return (error);
3086}
3087
3088static void
3089em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3090{
3091 if (dma->dma_tag == NULL)
3092 return;
3093 if (dma->dma_map != NULL) {
3094 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3095 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3096 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3097 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3098 dma->dma_map = NULL;
3099 }
3100 bus_dma_tag_destroy(dma->dma_tag);
3101 dma->dma_tag = NULL;
3102}
3103
3104
3105/*********************************************************************
3106 *
3107 * Allocate memory for the transmit and receive rings, and then
3108 * the descriptors associated with each, called only once at attach.
3109 *
3110 **********************************************************************/
3111static int
3112em_allocate_queues(struct adapter *adapter)
3113{
3114 device_t dev = adapter->dev;
3115 struct tx_ring *txr = NULL;
3116 struct rx_ring *rxr = NULL;
3117 int rsize, tsize, error = E1000_SUCCESS;
3118 int txconf = 0, rxconf = 0;
3119
3120
3121 /* Allocate the TX ring struct memory */
3122 if (!(adapter->tx_rings =
3123 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3124 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3125 device_printf(dev, "Unable to allocate TX ring memory\n");
3126 error = ENOMEM;
3127 goto fail;
3128 }
3129
3130 /* Now allocate the RX */
3131 if (!(adapter->rx_rings =
3132 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3133 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3134 device_printf(dev, "Unable to allocate RX ring memory\n");
3135 error = ENOMEM;
3136 goto rx_fail;
3137 }
3138
3139 tsize = roundup2(adapter->num_tx_desc *
3140 sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3141 /*
3142 * Now set up the TX queues, txconf is needed to handle the
3143 * possibility that things fail midcourse and we need to
3144 * undo memory gracefully
3145 */
3146 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3147 /* Set up some basics */
3148 txr = &adapter->tx_rings[i];
3149 txr->adapter = adapter;
3150 txr->me = i;
3151
3152 /* Initialize the TX lock */
3153 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3154 device_get_nameunit(dev), txr->me);
3155 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3156
3157 if (em_dma_malloc(adapter, tsize,
3158 &txr->txdma, BUS_DMA_NOWAIT)) {
3159 device_printf(dev,
3160 "Unable to allocate TX Descriptor memory\n");
3161 error = ENOMEM;
3162 goto err_tx_desc;
3163 }
3164 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3165 bzero((void *)txr->tx_base, tsize);
3166
3167 if (em_allocate_transmit_buffers(txr)) {
3168 device_printf(dev,
3169 "Critical Failure setting up transmit buffers\n");
3170 error = ENOMEM;
3171 goto err_tx_desc;
3172 }
3173#if __FreeBSD_version >= 800000
3174 /* Allocate a buf ring */
3175 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3176 M_WAITOK, &txr->tx_mtx);
3177#endif
3178 }
3179
3180 /*
3181 * Next the RX queues...
3182 */
3183 rsize = roundup2(adapter->num_rx_desc *
3184 sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3185 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3186 rxr = &adapter->rx_rings[i];
3187 rxr->adapter = adapter;
3188 rxr->me = i;
3189
3190 /* Initialize the RX lock */
3191 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3192 device_get_nameunit(dev), txr->me);
3193 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3194
3195 if (em_dma_malloc(adapter, rsize,
3196 &rxr->rxdma, BUS_DMA_NOWAIT)) {
3197 device_printf(dev,
3198 "Unable to allocate RxDescriptor memory\n");
3199 error = ENOMEM;
3200 goto err_rx_desc;
3201 }
3202 rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3203 bzero((void *)rxr->rx_base, rsize);
3204
3205 /* Allocate receive buffers for the ring*/
3206 if (em_allocate_receive_buffers(rxr)) {
3207 device_printf(dev,
3208 "Critical Failure setting up receive buffers\n");
3209 error = ENOMEM;
3210 goto err_rx_desc;
3211 }
3212 }
3213
3214 return (0);
3215
3216err_rx_desc:
3217 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3218 em_dma_free(adapter, &rxr->rxdma);
3219err_tx_desc:
3220 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3221 em_dma_free(adapter, &txr->txdma);
3222 free(adapter->rx_rings, M_DEVBUF);
3223rx_fail:
3224#if __FreeBSD_version >= 800000
3225 buf_ring_free(txr->br, M_DEVBUF);
3226#endif
3227 free(adapter->tx_rings, M_DEVBUF);
3228fail:
3229 return (error);
3230}
3231
3232
3233/*********************************************************************
3234 *
3235 * Allocate memory for tx_buffer structures. The tx_buffer stores all
3236 * the information needed to transmit a packet on the wire. This is
3237 * called only once at attach, setup is done every reset.
3238 *
3239 **********************************************************************/
3240static int
3241em_allocate_transmit_buffers(struct tx_ring *txr)
3242{
3243 struct adapter *adapter = txr->adapter;
3244 device_t dev = adapter->dev;
3245 struct em_buffer *txbuf;
3246 int error, i;
3247
3248 /*
3249 * Setup DMA descriptor areas.
3250 */
3251 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3252 1, 0, /* alignment, bounds */
3253 BUS_SPACE_MAXADDR, /* lowaddr */
3254 BUS_SPACE_MAXADDR, /* highaddr */
3255 NULL, NULL, /* filter, filterarg */
3256 EM_TSO_SIZE, /* maxsize */
3257 EM_MAX_SCATTER, /* nsegments */
3258 PAGE_SIZE, /* maxsegsize */
3259 0, /* flags */
3260 NULL, /* lockfunc */
3261 NULL, /* lockfuncarg */
3262 &txr->txtag))) {
3263 device_printf(dev,"Unable to allocate TX DMA tag\n");
3264 goto fail;
3265 }
3266
3267 if (!(txr->tx_buffers =
3268 (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3269 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3270 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3271 error = ENOMEM;
3272 goto fail;
3273 }
3274
3275 /* Create the descriptor buffer dma maps */
3276 txbuf = txr->tx_buffers;
3277 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3278 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3279 if (error != 0) {
3280 device_printf(dev, "Unable to create TX DMA map\n");
3281 goto fail;
3282 }
3283 }
3284
3285 return 0;
3286fail:
3287 /* We free all, it handles case where we are in the middle */
3288 em_free_transmit_structures(adapter);
3289 return (error);
3290}
3291
3292/*********************************************************************
3293 *
3294 * Initialize a transmit ring.
3295 *
3296 **********************************************************************/
3297static void
3298em_setup_transmit_ring(struct tx_ring *txr)
3299{
3300 struct adapter *adapter = txr->adapter;
3301 struct em_buffer *txbuf;
3302 int i;
3303#ifdef DEV_NETMAP
3304 struct netmap_adapter *na = NA(adapter->ifp);
3305 struct netmap_slot *slot;
3306#endif /* DEV_NETMAP */
3307
3308 /* Clear the old descriptor contents */
3309 EM_TX_LOCK(txr);
3310#ifdef DEV_NETMAP
3311 slot = netmap_reset(na, NR_TX, txr->me, 0);
3312#endif /* DEV_NETMAP */
3313
3314 bzero((void *)txr->tx_base,
3315 (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3316 /* Reset indices */
3317 txr->next_avail_desc = 0;
3318 txr->next_to_clean = 0;
3319
3320 /* Free any existing tx buffers. */
3321 txbuf = txr->tx_buffers;
3322 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3323 if (txbuf->m_head != NULL) {
3324 bus_dmamap_sync(txr->txtag, txbuf->map,
3325 BUS_DMASYNC_POSTWRITE);
3326 bus_dmamap_unload(txr->txtag, txbuf->map);
3327 m_freem(txbuf->m_head);
3328 txbuf->m_head = NULL;
3329 }
3330#ifdef DEV_NETMAP
3331 if (slot) {
3332 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3333 uint64_t paddr;
3334 void *addr;
3335
3336 addr = PNMB(slot + si, &paddr);
3337 txr->tx_base[i].buffer_addr = htole64(paddr);
3338 /* reload the map for netmap mode */
3339 netmap_load_map(txr->txtag, txbuf->map, addr);
3340 }
3341#endif /* DEV_NETMAP */
3342
3343 /* clear the watch index */
3344 txbuf->next_eop = -1;
3345 }
3346
3347 /* Set number of descriptors available */
3348 txr->tx_avail = adapter->num_tx_desc;
3349 txr->queue_status = EM_QUEUE_IDLE;
3350
3351 /* Clear checksum offload context. */
3352 txr->last_hw_offload = 0;
3353 txr->last_hw_ipcss = 0;
3354 txr->last_hw_ipcso = 0;
3355 txr->last_hw_tucss = 0;
3356 txr->last_hw_tucso = 0;
3357
3358 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3359 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3360 EM_TX_UNLOCK(txr);
3361}
3362
3363/*********************************************************************
3364 *
3365 * Initialize all transmit rings.
3366 *
3367 **********************************************************************/
3368static void
3369em_setup_transmit_structures(struct adapter *adapter)
3370{
3371 struct tx_ring *txr = adapter->tx_rings;
3372
3373 for (int i = 0; i < adapter->num_queues; i++, txr++)
3374 em_setup_transmit_ring(txr);
3375
3376 return;
3377}
3378
3379/*********************************************************************
3380 *
3381 * Enable transmit unit.
3382 *
3383 **********************************************************************/
3384static void
3385em_initialize_transmit_unit(struct adapter *adapter)
3386{
3387 struct tx_ring *txr = adapter->tx_rings;
3388 struct e1000_hw *hw = &adapter->hw;
3389 u32 tctl, tarc, tipg = 0;
3390
3391 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3392
3393 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3394 u64 bus_addr = txr->txdma.dma_paddr;
3395 /* Base and Len of TX Ring */
3396 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3397 adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3398 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3399 (u32)(bus_addr >> 32));
3400 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3401 (u32)bus_addr);
3402 /* Init the HEAD/TAIL indices */
3403 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3404 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3405
3406 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3407 E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3408 E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3409
3410 txr->queue_status = EM_QUEUE_IDLE;
3411 }
3412
3413 /* Set the default values for the Tx Inter Packet Gap timer */
3414 switch (adapter->hw.mac.type) {
3415 case e1000_80003es2lan:
3416 tipg = DEFAULT_82543_TIPG_IPGR1;
3417 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3418 E1000_TIPG_IPGR2_SHIFT;
3419 break;
3420 default:
3421 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3422 (adapter->hw.phy.media_type ==
3423 e1000_media_type_internal_serdes))
3424 tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3425 else
3426 tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3427 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3428 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3429 }
3430
3431 E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3432 E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3433
3434 if(adapter->hw.mac.type >= e1000_82540)
3435 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3436 adapter->tx_abs_int_delay.value);
3437
3438 if ((adapter->hw.mac.type == e1000_82571) ||
3439 (adapter->hw.mac.type == e1000_82572)) {
3440 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3441 tarc |= SPEED_MODE_BIT;
3442 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3443 } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3444 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3445 tarc |= 1;
3446 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3447 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3448 tarc |= 1;
3449 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3450 }
3451
3452 adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3453 if (adapter->tx_int_delay.value > 0)
3454 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3455
3456 /* Program the Transmit Control Register */
3457 tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3458 tctl &= ~E1000_TCTL_CT;
3459 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3460 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3461
3462 if (adapter->hw.mac.type >= e1000_82571)
3463 tctl |= E1000_TCTL_MULR;
3464
3465 /* This write will effectively turn on the transmit unit. */
3466 E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3467
3468}
3469
3470
3471/*********************************************************************
3472 *
3473 * Free all transmit rings.
3474 *
3475 **********************************************************************/
3476static void
3477em_free_transmit_structures(struct adapter *adapter)
3478{
3479 struct tx_ring *txr = adapter->tx_rings;
3480
3481 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3482 EM_TX_LOCK(txr);
3483 em_free_transmit_buffers(txr);
3484 em_dma_free(adapter, &txr->txdma);
3485 EM_TX_UNLOCK(txr);
3486 EM_TX_LOCK_DESTROY(txr);
3487 }
3488
3489 free(adapter->tx_rings, M_DEVBUF);
3490}
3491
3492/*********************************************************************
3493 *
3494 * Free transmit ring related data structures.
3495 *
3496 **********************************************************************/
3497static void
3498em_free_transmit_buffers(struct tx_ring *txr)
3499{
3500 struct adapter *adapter = txr->adapter;
3501 struct em_buffer *txbuf;
3502
3503 INIT_DEBUGOUT("free_transmit_ring: begin");
3504
3505 if (txr->tx_buffers == NULL)
3506 return;
3507
3508 for (int i = 0; i < adapter->num_tx_desc; i++) {
3509 txbuf = &txr->tx_buffers[i];
3510 if (txbuf->m_head != NULL) {
3511 bus_dmamap_sync(txr->txtag, txbuf->map,
3512 BUS_DMASYNC_POSTWRITE);
3513 bus_dmamap_unload(txr->txtag,
3514 txbuf->map);
3515 m_freem(txbuf->m_head);
3516 txbuf->m_head = NULL;
3517 if (txbuf->map != NULL) {
3518 bus_dmamap_destroy(txr->txtag,
3519 txbuf->map);
3520 txbuf->map = NULL;
3521 }
3522 } else if (txbuf->map != NULL) {
3523 bus_dmamap_unload(txr->txtag,
3524 txbuf->map);
3525 bus_dmamap_destroy(txr->txtag,
3526 txbuf->map);
3527 txbuf->map = NULL;
3528 }
3529 }
3530#if __FreeBSD_version >= 800000
3531 if (txr->br != NULL)
3532 buf_ring_free(txr->br, M_DEVBUF);
3533#endif
3534 if (txr->tx_buffers != NULL) {
3535 free(txr->tx_buffers, M_DEVBUF);
3536 txr->tx_buffers = NULL;
3537 }
3538 if (txr->txtag != NULL) {
3539 bus_dma_tag_destroy(txr->txtag);
3540 txr->txtag = NULL;
3541 }
3542 return;
3543}
3544
3545
3546/*********************************************************************
3547 * The offload context is protocol specific (TCP/UDP) and thus
3548 * only needs to be set when the protocol changes. The occasion
3549 * of a context change can be a performance detriment, and
3550 * might be better just disabled. The reason arises in the way
3551 * in which the controller supports pipelined requests from the
3552 * Tx data DMA. Up to four requests can be pipelined, and they may
3553 * belong to the same packet or to multiple packets. However all
3554 * requests for one packet are issued before a request is issued
3555 * for a subsequent packet and if a request for the next packet
3556 * requires a context change, that request will be stalled
3557 * until the previous request completes. This means setting up
3558 * a new context effectively disables pipelined Tx data DMA which
3559 * in turn greatly slow down performance to send small sized
3560 * frames.
3561 **********************************************************************/
3562static void
3563em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3564 struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3565{
3566 struct adapter *adapter = txr->adapter;
3567 struct e1000_context_desc *TXD = NULL;
3568 struct em_buffer *tx_buffer;
3569 int cur, hdr_len;
3570 u32 cmd = 0;
3571 u16 offload = 0;
3572 u8 ipcso, ipcss, tucso, tucss;
3573
3574 ipcss = ipcso = tucss = tucso = 0;
3575 hdr_len = ip_off + (ip->ip_hl << 2);
3576 cur = txr->next_avail_desc;
3577
3578 /* Setup of IP header checksum. */
3579 if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3580 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3581 offload |= CSUM_IP;
3582 ipcss = ip_off;
3583 ipcso = ip_off + offsetof(struct ip, ip_sum);
3584 /*
3585 * Start offset for header checksum calculation.
3586 * End offset for header checksum calculation.
3587 * Offset of place to put the checksum.
3588 */
3589 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3590 TXD->lower_setup.ip_fields.ipcss = ipcss;
3591 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3592 TXD->lower_setup.ip_fields.ipcso = ipcso;
3593 cmd |= E1000_TXD_CMD_IP;
3594 }
3595
3596 if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3597 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3598 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3599 offload |= CSUM_TCP;
3600 tucss = hdr_len;
3601 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3602 /*
3603 * Setting up new checksum offload context for every frames
3604 * takes a lot of processing time for hardware. This also
3605 * reduces performance a lot for small sized frames so avoid
3606 * it if driver can use previously configured checksum
3607 * offload context.
3608 */
3609 if (txr->last_hw_offload == offload) {
3610 if (offload & CSUM_IP) {
3611 if (txr->last_hw_ipcss == ipcss &&
3612 txr->last_hw_ipcso == ipcso &&
3613 txr->last_hw_tucss == tucss &&
3614 txr->last_hw_tucso == tucso)
3615 return;
3616 } else {
3617 if (txr->last_hw_tucss == tucss &&
3618 txr->last_hw_tucso == tucso)
3619 return;
3620 }
3621 }
3622 txr->last_hw_offload = offload;
3623 txr->last_hw_tucss = tucss;
3624 txr->last_hw_tucso = tucso;
3625 /*
3626 * Start offset for payload checksum calculation.
3627 * End offset for payload checksum calculation.
3628 * Offset of place to put the checksum.
3629 */
3630 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3631 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3632 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3633 TXD->upper_setup.tcp_fields.tucso = tucso;
3634 cmd |= E1000_TXD_CMD_TCP;
3635 } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3636 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3637 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3638 tucss = hdr_len;
3639 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3640 /*
3641 * Setting up new checksum offload context for every frames
3642 * takes a lot of processing time for hardware. This also
3643 * reduces performance a lot for small sized frames so avoid
3644 * it if driver can use previously configured checksum
3645 * offload context.
3646 */
3647 if (txr->last_hw_offload == offload) {
3648 if (offload & CSUM_IP) {
3649 if (txr->last_hw_ipcss == ipcss &&
3650 txr->last_hw_ipcso == ipcso &&
3651 txr->last_hw_tucss == tucss &&
3652 txr->last_hw_tucso == tucso)
3653 return;
3654 } else {
3655 if (txr->last_hw_tucss == tucss &&
3656 txr->last_hw_tucso == tucso)
3657 return;
3658 }
3659 }
3660 txr->last_hw_offload = offload;
3661 txr->last_hw_tucss = tucss;
3662 txr->last_hw_tucso = tucso;
3663 /*
3664 * Start offset for header checksum calculation.
3665 * End offset for header checksum calculation.
3666 * Offset of place to put the checksum.
3667 */
3668 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3669 TXD->upper_setup.tcp_fields.tucss = tucss;
3670 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3671 TXD->upper_setup.tcp_fields.tucso = tucso;
3672 }
3673
3674 if (offload & CSUM_IP) {
3675 txr->last_hw_ipcss = ipcss;
3676 txr->last_hw_ipcso = ipcso;
3677 }
3678
3679 TXD->tcp_seg_setup.data = htole32(0);
3680 TXD->cmd_and_length =
3681 htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3682 tx_buffer = &txr->tx_buffers[cur];
3683 tx_buffer->m_head = NULL;
3684 tx_buffer->next_eop = -1;
3685
3686 if (++cur == adapter->num_tx_desc)
3687 cur = 0;
3688
3689 txr->tx_avail--;
3690 txr->next_avail_desc = cur;
3691}
3692
3693
3694/**********************************************************************
3695 *
3696 * Setup work for hardware segmentation offload (TSO)
3697 *
3698 **********************************************************************/
3699static void
3700em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3701 struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3702{
3703 struct adapter *adapter = txr->adapter;
3704 struct e1000_context_desc *TXD;
3705 struct em_buffer *tx_buffer;
3706 int cur, hdr_len;
3707
3708 /*
3709 * In theory we can use the same TSO context if and only if
3710 * frame is the same type(IP/TCP) and the same MSS. However
3711 * checking whether a frame has the same IP/TCP structure is
3712 * hard thing so just ignore that and always restablish a
3713 * new TSO context.
3714 */
3715 hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3716 *txd_lower = (E1000_TXD_CMD_DEXT | /* Extended descr type */
3717 E1000_TXD_DTYP_D | /* Data descr type */
3718 E1000_TXD_CMD_TSE); /* Do TSE on this packet */
3719
3720 /* IP and/or TCP header checksum calculation and insertion. */
3721 *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3722
3723 cur = txr->next_avail_desc;
3724 tx_buffer = &txr->tx_buffers[cur];
3725 TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3726
3727 /*
3728 * Start offset for header checksum calculation.
3729 * End offset for header checksum calculation.
3730 * Offset of place put the checksum.
3731 */
3732 TXD->lower_setup.ip_fields.ipcss = ip_off;
3733 TXD->lower_setup.ip_fields.ipcse =
3734 htole16(ip_off + (ip->ip_hl << 2) - 1);
3735 TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3736 /*
3737 * Start offset for payload checksum calculation.
3738 * End offset for payload checksum calculation.
3739 * Offset of place to put the checksum.
3740 */
3741 TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3742 TXD->upper_setup.tcp_fields.tucse = 0;
3743 TXD->upper_setup.tcp_fields.tucso =
3744 ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3745 /*
3746 * Payload size per packet w/o any headers.
3747 * Length of all headers up to payload.
3748 */
3749 TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3750 TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3751
3752 TXD->cmd_and_length = htole32(adapter->txd_cmd |
3753 E1000_TXD_CMD_DEXT | /* Extended descr */
3754 E1000_TXD_CMD_TSE | /* TSE context */
3755 E1000_TXD_CMD_IP | /* Do IP csum */
3756 E1000_TXD_CMD_TCP | /* Do TCP checksum */
3757 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3758
3759 tx_buffer->m_head = NULL;
3760 tx_buffer->next_eop = -1;
3761
3762 if (++cur == adapter->num_tx_desc)
3763 cur = 0;
3764
3765 txr->tx_avail--;
3766 txr->next_avail_desc = cur;
3767 txr->tx_tso = TRUE;
3768}
3769
3770
3771/**********************************************************************
3772 *
3773 * Examine each tx_buffer in the used queue. If the hardware is done
3774 * processing the packet then free associated resources. The
3775 * tx_buffer is put back on the free queue.
3776 *
3777 **********************************************************************/
3778static void
3779em_txeof(struct tx_ring *txr)
3780{
3781 struct adapter *adapter = txr->adapter;
3782 int first, last, done, processed;
3783 struct em_buffer *tx_buffer;
3784 struct e1000_tx_desc *tx_desc, *eop_desc;
3785 struct ifnet *ifp = adapter->ifp;
3786
3787 EM_TX_LOCK_ASSERT(txr);
3788#ifdef DEV_NETMAP
3789 if (ifp->if_capenable & IFCAP_NETMAP) {
3790 struct netmap_adapter *na = NA(ifp);
3791
3792 selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3793 EM_TX_UNLOCK(txr);
3794 EM_CORE_LOCK(adapter);
3795 selwakeuppri(&na->tx_si, PI_NET);
3796 EM_CORE_UNLOCK(adapter);
3797 EM_TX_LOCK(txr);
3798 return;
3799 }
3800#endif /* DEV_NETMAP */
3801
3802 /* No work, make sure watchdog is off */
3803 if (txr->tx_avail == adapter->num_tx_desc) {
3804 txr->queue_status = EM_QUEUE_IDLE;
3805 return;
3806 }
3807
3808 processed = 0;
3809 first = txr->next_to_clean;
3810 tx_desc = &txr->tx_base[first];
3811 tx_buffer = &txr->tx_buffers[first];
3812 last = tx_buffer->next_eop;
3813 eop_desc = &txr->tx_base[last];
3814
3815 /*
3816 * What this does is get the index of the
3817 * first descriptor AFTER the EOP of the
3818 * first packet, that way we can do the
3819 * simple comparison on the inner while loop.
3820 */
3821 if (++last == adapter->num_tx_desc)
3822 last = 0;
3823 done = last;
3824
3825 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3826 BUS_DMASYNC_POSTREAD);
3827
3828 while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3829 /* We clean the range of the packet */
3830 while (first != done) {
3831 tx_desc->upper.data = 0;
3832 tx_desc->lower.data = 0;
3833 tx_desc->buffer_addr = 0;
3834 ++txr->tx_avail;
3835 ++processed;
3836
3837 if (tx_buffer->m_head) {
3838 bus_dmamap_sync(txr->txtag,
3839 tx_buffer->map,
3840 BUS_DMASYNC_POSTWRITE);
3841 bus_dmamap_unload(txr->txtag,
3842 tx_buffer->map);
3843 m_freem(tx_buffer->m_head);
3844 tx_buffer->m_head = NULL;
3845 }
3846 tx_buffer->next_eop = -1;
3847 txr->watchdog_time = ticks;
3848
3849 if (++first == adapter->num_tx_desc)
3850 first = 0;
3851
3852 tx_buffer = &txr->tx_buffers[first];
3853 tx_desc = &txr->tx_base[first];
3854 }
3855 ++ifp->if_opackets;
3856 /* See if we can continue to the next packet */
3857 last = tx_buffer->next_eop;
3858 if (last != -1) {
3859 eop_desc = &txr->tx_base[last];
3860 /* Get new done point */
3861 if (++last == adapter->num_tx_desc) last = 0;
3862 done = last;
3863 } else
3864 break;
3865 }
3866 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3867 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3868
3869 txr->next_to_clean = first;
3870
3871 /*
3872 ** Watchdog calculation, we know there's
3873 ** work outstanding or the first return
3874 ** would have been taken, so none processed
3875 ** for too long indicates a hang. local timer
3876 ** will examine this and do a reset if needed.
3877 */
3878 if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3879 txr->queue_status = EM_QUEUE_HUNG;
3880
3881 /*
3882 * If we have a minimum free, clear IFF_DRV_OACTIVE
3883 * to tell the stack that it is OK to send packets.
3884 * Notice that all writes of OACTIVE happen under the
3885 * TX lock which, with a single queue, guarantees
3886 * sanity.
3887 */
3888 if (txr->tx_avail >= EM_MAX_SCATTER)
3889 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3890
3891 /* Disable watchdog if all clean */
3892 if (txr->tx_avail == adapter->num_tx_desc) {
3893 txr->queue_status = EM_QUEUE_IDLE;
3894 }
3895}
3896
3897
3898/*********************************************************************
3899 *
3900 * Refresh RX descriptor mbufs from system mbuf buffer pool.
3901 *
3902 **********************************************************************/
3903static void
3904em_refresh_mbufs(struct rx_ring *rxr, int limit)
3905{
3906 struct adapter *adapter = rxr->adapter;
3907 struct mbuf *m;
3908 bus_dma_segment_t segs[1];
3909 struct em_buffer *rxbuf;
3910 int i, j, error, nsegs;
3911 bool cleaned = FALSE;
3912
3913 i = j = rxr->next_to_refresh;
3914 /*
3915 ** Get one descriptor beyond
3916 ** our work mark to control
3917 ** the loop.
3918 */
3919 if (++j == adapter->num_rx_desc)
3920 j = 0;
3921
3922 while (j != limit) {
3923 rxbuf = &rxr->rx_buffers[i];
3924 if (rxbuf->m_head == NULL) {
3925 m = m_getjcl(M_DONTWAIT, MT_DATA,
3926 M_PKTHDR, adapter->rx_mbuf_sz);
3927 /*
3928 ** If we have a temporary resource shortage
3929 ** that causes a failure, just abort refresh
3930 ** for now, we will return to this point when
3931 ** reinvoked from em_rxeof.
3932 */
3933 if (m == NULL)
3934 goto update;
3935 } else
3936 m = rxbuf->m_head;
3937
3938 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3939 m->m_flags |= M_PKTHDR;
3940 m->m_data = m->m_ext.ext_buf;
3941
3942 /* Use bus_dma machinery to setup the memory mapping */
3943 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3944 m, segs, &nsegs, BUS_DMA_NOWAIT);
3945 if (error != 0) {
3946 printf("Refresh mbufs: hdr dmamap load"
3947 " failure - %d\n", error);
3948 m_free(m);
3949 rxbuf->m_head = NULL;
3950 goto update;
3951 }
3952 rxbuf->m_head = m;
3953 bus_dmamap_sync(rxr->rxtag,
3954 rxbuf->map, BUS_DMASYNC_PREREAD);
3955 rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
3956 cleaned = TRUE;
3957
3958 i = j; /* Next is precalulated for us */
3959 rxr->next_to_refresh = i;
3960 /* Calculate next controlling index */
3961 if (++j == adapter->num_rx_desc)
3962 j = 0;
3963 }
3964update:
3965 /*
3966 ** Update the tail pointer only if,
3967 ** and as far as we have refreshed.
3968 */
3969 if (cleaned)
3970 E1000_WRITE_REG(&adapter->hw,
3971 E1000_RDT(rxr->me), rxr->next_to_refresh);
3972
3973 return;
3974}
3975
3976
3977/*********************************************************************
3978 *
3979 * Allocate memory for rx_buffer structures. Since we use one
3980 * rx_buffer per received packet, the maximum number of rx_buffer's
3981 * that we'll need is equal to the number of receive descriptors
3982 * that we've allocated.
3983 *
3984 **********************************************************************/
3985static int
3986em_allocate_receive_buffers(struct rx_ring *rxr)
3987{
3988 struct adapter *adapter = rxr->adapter;
3989 device_t dev = adapter->dev;
3990 struct em_buffer *rxbuf;
3991 int error;
3992
3993 rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
3994 adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3995 if (rxr->rx_buffers == NULL) {
3996 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3997 return (ENOMEM);
3998 }
3999
4000 error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4001 1, 0, /* alignment, bounds */
4002 BUS_SPACE_MAXADDR, /* lowaddr */
4003 BUS_SPACE_MAXADDR, /* highaddr */
4004 NULL, NULL, /* filter, filterarg */
4005 MJUM9BYTES, /* maxsize */
4006 1, /* nsegments */
4007 MJUM9BYTES, /* maxsegsize */
4008 0, /* flags */
4009 NULL, /* lockfunc */
4010 NULL, /* lockarg */
4011 &rxr->rxtag);
4012 if (error) {
4013 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4014 __func__, error);
4015 goto fail;
4016 }
4017
4018 rxbuf = rxr->rx_buffers;
4019 for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4020 rxbuf = &rxr->rx_buffers[i];
4021 error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
4022 &rxbuf->map);
4023 if (error) {
4024 device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4025 __func__, error);
4026 goto fail;
4027 }
4028 }
4029
4030 return (0);
4031
4032fail:
4033 em_free_receive_structures(adapter);
4034 return (error);
4035}
4036
4037
4038/*********************************************************************
4039 *
4040 * Initialize a receive ring and its buffers.
4041 *
4042 **********************************************************************/
4043static int
4044em_setup_receive_ring(struct rx_ring *rxr)
4045{
4046 struct adapter *adapter = rxr->adapter;
4047 struct em_buffer *rxbuf;
4048 bus_dma_segment_t seg[1];
4049 int rsize, nsegs, error = 0;
4050#ifdef DEV_NETMAP
4051 struct netmap_adapter *na = NA(adapter->ifp);
4052 struct netmap_slot *slot;
4053#endif
4054
4055
4056 /* Clear the ring contents */
4057 EM_RX_LOCK(rxr);
4058 rsize = roundup2(adapter->num_rx_desc *
4059 sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4060 bzero((void *)rxr->rx_base, rsize);
4061#ifdef DEV_NETMAP
4062 slot = netmap_reset(na, NR_RX, 0, 0);
4063#endif
4064
4065 /*
4066 ** Free current RX buffer structs and their mbufs
4067 */
4068 for (int i = 0; i < adapter->num_rx_desc; i++) {
4069 rxbuf = &rxr->rx_buffers[i];
4070 if (rxbuf->m_head != NULL) {
4071 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4072 BUS_DMASYNC_POSTREAD);
4073 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4074 m_freem(rxbuf->m_head);
4075 rxbuf->m_head = NULL; /* mark as freed */
4076 }
4077 }
4078
4079 /* Now replenish the mbufs */
4080 for (int j = 0; j != adapter->num_rx_desc; ++j) {
4081 rxbuf = &rxr->rx_buffers[j];
4082#ifdef DEV_NETMAP
4083 if (slot) {
4084 int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4085 uint64_t paddr;
4086 void *addr;
4087
4088 addr = PNMB(slot + si, &paddr);
4089 netmap_load_map(rxr->rxtag, rxbuf->map, addr);
4090 /* Update descriptor */
4091 rxr->rx_base[j].buffer_addr = htole64(paddr);
4092 continue;
4093 }
4094#endif /* DEV_NETMAP */
4095 rxbuf->m_head = m_getjcl(M_DONTWAIT, MT_DATA,
4096 M_PKTHDR, adapter->rx_mbuf_sz);
4097 if (rxbuf->m_head == NULL) {
4098 error = ENOBUFS;
4099 goto fail;
4100 }
4101 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4102 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4103 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4104
4105 /* Get the memory mapping */
4106 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4107 rxbuf->map, rxbuf->m_head, seg,
4108 &nsegs, BUS_DMA_NOWAIT);
4109 if (error != 0) {
4110 m_freem(rxbuf->m_head);
4111 rxbuf->m_head = NULL;
4112 goto fail;
4113 }
4114 bus_dmamap_sync(rxr->rxtag,
4115 rxbuf->map, BUS_DMASYNC_PREREAD);
4116
4117 /* Update descriptor */
4118 rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4119 }
4120 rxr->next_to_check = 0;
4121 rxr->next_to_refresh = 0;
4122 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4123 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4124
4125fail:
4126 EM_RX_UNLOCK(rxr);
4127 return (error);
4128}
4129
4130/*********************************************************************
4131 *
4132 * Initialize all receive rings.
4133 *
4134 **********************************************************************/
4135static int
4136em_setup_receive_structures(struct adapter *adapter)
4137{
4138 struct rx_ring *rxr = adapter->rx_rings;
4139 int q;
4140
4141 for (q = 0; q < adapter->num_queues; q++, rxr++)
4142 if (em_setup_receive_ring(rxr))
4143 goto fail;
4144
4145 return (0);
4146fail:
4147 /*
4148 * Free RX buffers allocated so far, we will only handle
4149 * the rings that completed, the failing case will have
4150 * cleaned up for itself. 'q' failed, so its the terminus.
4151 */
4152 for (int i = 0; i < q; ++i) {
4153 rxr = &adapter->rx_rings[i];
4154 for (int n = 0; n < adapter->num_rx_desc; n++) {
4155 struct em_buffer *rxbuf;
4156 rxbuf = &rxr->rx_buffers[n];
4157 if (rxbuf->m_head != NULL) {
4158 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4159 BUS_DMASYNC_POSTREAD);
4160 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4161 m_freem(rxbuf->m_head);
4162 rxbuf->m_head = NULL;
4163 }
4164 }
4165 rxr->next_to_check = 0;
4166 rxr->next_to_refresh = 0;
4167 }
4168
4169 return (ENOBUFS);
4170}
4171
4172/*********************************************************************
4173 *
4174 * Free all receive rings.
4175 *
4176 **********************************************************************/
4177static void
4178em_free_receive_structures(struct adapter *adapter)
4179{
4180 struct rx_ring *rxr = adapter->rx_rings;
4181
4182 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4183 em_free_receive_buffers(rxr);
4184 /* Free the ring memory as well */
4185 em_dma_free(adapter, &rxr->rxdma);
4186 EM_RX_LOCK_DESTROY(rxr);
4187 }
4188
4189 free(adapter->rx_rings, M_DEVBUF);
4190}
4191
4192
4193/*********************************************************************
4194 *
4195 * Free receive ring data structures
4196 *
4197 **********************************************************************/
4198static void
4199em_free_receive_buffers(struct rx_ring *rxr)
4200{
4201 struct adapter *adapter = rxr->adapter;
4202 struct em_buffer *rxbuf = NULL;
4203
4204 INIT_DEBUGOUT("free_receive_buffers: begin");
4205
4206 if (rxr->rx_buffers != NULL) {
4207 for (int i = 0; i < adapter->num_rx_desc; i++) {
4208 rxbuf = &rxr->rx_buffers[i];
4209 if (rxbuf->map != NULL) {
4210 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4211 BUS_DMASYNC_POSTREAD);
4212 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4213 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4214 }
4215 if (rxbuf->m_head != NULL) {
4216 m_freem(rxbuf->m_head);
4217 rxbuf->m_head = NULL;
4218 }
4219 }
4220 free(rxr->rx_buffers, M_DEVBUF);
4221 rxr->rx_buffers = NULL;
4222 rxr->next_to_check = 0;
4223 rxr->next_to_refresh = 0;
4224 }
4225
4226 if (rxr->rxtag != NULL) {
4227 bus_dma_tag_destroy(rxr->rxtag);
4228 rxr->rxtag = NULL;
4229 }
4230
4231 return;
4232}
4233
4234
4235/*********************************************************************
4236 *
4237 * Enable receive unit.
4238 *
4239 **********************************************************************/
4240#define MAX_INTS_PER_SEC 8000
4241#define DEFAULT_ITR 1000000000/(MAX_INTS_PER_SEC * 256)
4242
4243static void
4244em_initialize_receive_unit(struct adapter *adapter)
4245{
4246 struct rx_ring *rxr = adapter->rx_rings;
4247 struct ifnet *ifp = adapter->ifp;
4248 struct e1000_hw *hw = &adapter->hw;
4249 u64 bus_addr;
4250 u32 rctl, rxcsum;
4251
4252 INIT_DEBUGOUT("em_initialize_receive_units: begin");
4253
4254 /*
4255 * Make sure receives are disabled while setting
4256 * up the descriptor ring
4257 */
4258 rctl = E1000_READ_REG(hw, E1000_RCTL);
4259 /* Do not disable if ever enabled on this hardware */
4260 if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4261 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4262
4263 E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4264 adapter->rx_abs_int_delay.value);
4265 /*
4266 * Set the interrupt throttling rate. Value is calculated
4267 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4268 */
4269 E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4270
4271 /*
4272 ** When using MSIX interrupts we need to throttle
4273 ** using the EITR register (82574 only)
4274 */
4275 if (hw->mac.type == e1000_82574) {
4276 for (int i = 0; i < 4; i++)
4277 E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4278 DEFAULT_ITR);
4279 /* Disable accelerated acknowledge */
4280 E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4281 }
4282
4283 if (ifp->if_capenable & IFCAP_RXCSUM) {
4284 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4285 rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
4286 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4287 }
4288
4289 /*
4290 ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4291 ** long latencies are observed, like Lenovo X60. This
4292 ** change eliminates the problem, but since having positive
4293 ** values in RDTR is a known source of problems on other
4294 ** platforms another solution is being sought.
4295 */
4296 if (hw->mac.type == e1000_82573)
4297 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4298
4299 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4300 /* Setup the Base and Length of the Rx Descriptor Ring */
4301 bus_addr = rxr->rxdma.dma_paddr;
4302 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4303 adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4304 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4305 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4306 /* Setup the Head and Tail Descriptor Pointers */
4307 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4308#ifdef DEV_NETMAP
4309 /*
4310 * an init() while a netmap client is active must
4311 * preserve the rx buffers passed to userspace.
4312 * In this driver it means we adjust RDT to
4313 * something different from na->num_rx_desc - 1.
4314 */
4315 if (ifp->if_capenable & IFCAP_NETMAP) {
4316 struct netmap_adapter *na = NA(adapter->ifp);
4317 struct netmap_kring *kring = &na->rx_rings[i];
4318 int t = na->num_rx_desc - 1 - kring->nr_hwavail;
4319
4320 E1000_WRITE_REG(hw, E1000_RDT(i), t);
4321 } else
4322#endif /* DEV_NETMAP */
4323 E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1);
4324 }
4325
4326 /* Set PTHRESH for improved jumbo performance */
4327 if (((adapter->hw.mac.type == e1000_ich9lan) ||
4328 (adapter->hw.mac.type == e1000_pch2lan) ||
4329 (adapter->hw.mac.type == e1000_ich10lan)) &&
4330 (ifp->if_mtu > ETHERMTU)) {
4331 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4332 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4333 }
4334
4335 if (adapter->hw.mac.type == e1000_pch2lan) {
4336 if (ifp->if_mtu > ETHERMTU)
4337 e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4338 else
4339 e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4340 }
4341
4342 /* Setup the Receive Control Register */
4343 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4344 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4345 E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4346 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4347
4348 /* Strip the CRC */
4349 rctl |= E1000_RCTL_SECRC;
4350
4351 /* Make sure VLAN Filters are off */
4352 rctl &= ~E1000_RCTL_VFE;
4353 rctl &= ~E1000_RCTL_SBP;
4354
4355 if (adapter->rx_mbuf_sz == MCLBYTES)
4356 rctl |= E1000_RCTL_SZ_2048;
4357 else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4358 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4359 else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4360 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4361
4362 if (ifp->if_mtu > ETHERMTU)
4363 rctl |= E1000_RCTL_LPE;
4364 else
4365 rctl &= ~E1000_RCTL_LPE;
4366
4367 /* Write out the settings */
4368 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4369
4370 return;
4371}
4372
4373
4374/*********************************************************************
4375 *
4376 * This routine executes in interrupt context. It replenishes
4377 * the mbufs in the descriptor and sends data which has been
4378 * dma'ed into host memory to upper layer.
4379 *
4380 * We loop at most count times if count is > 0, or until done if
4381 * count < 0.
4382 *
4383 * For polling we also now return the number of cleaned packets
4384 *********************************************************************/
4385static bool
4386em_rxeof(struct rx_ring *rxr, int count, int *done)
4387{
4388 struct adapter *adapter = rxr->adapter;
4389 struct ifnet *ifp = adapter->ifp;
4390 struct mbuf *mp, *sendmp;
4391 u8 status = 0;
4392 u16 len;
4393 int i, processed, rxdone = 0;
4394 bool eop;
4395 struct e1000_rx_desc *cur;
4396
4397 EM_RX_LOCK(rxr);
4398
4399#ifdef DEV_NETMAP
4400 if (ifp->if_capenable & IFCAP_NETMAP) {
4401 struct netmap_adapter *na = NA(ifp);
4402
4403 na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4404 selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4405 EM_RX_UNLOCK(rxr);
4406 EM_CORE_LOCK(adapter);
4407 selwakeuppri(&na->rx_si, PI_NET);
4408 EM_CORE_UNLOCK(adapter);
4409 return (0);
4410 }
4411#endif /* DEV_NETMAP */
4412
4413 for (i = rxr->next_to_check, processed = 0; count != 0;) {
4414
4415 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4416 break;
4417
4418 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4419 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4420
4421 cur = &rxr->rx_base[i];
4422 status = cur->status;
4423 mp = sendmp = NULL;
4424
4425 if ((status & E1000_RXD_STAT_DD) == 0)
4426 break;
4427
4428 len = le16toh(cur->length);
4429 eop = (status & E1000_RXD_STAT_EOP) != 0;
4430
4431 if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4432 (rxr->discard == TRUE)) {
4433 ifp->if_ierrors++;
4434 ++rxr->rx_discarded;
4435 if (!eop) /* Catch subsequent segs */
4436 rxr->discard = TRUE;
4437 else
4438 rxr->discard = FALSE;
4439 em_rx_discard(rxr, i);
4440 goto next_desc;
4441 }
4442
4443 /* Assign correct length to the current fragment */
4444 mp = rxr->rx_buffers[i].m_head;
4445 mp->m_len = len;
4446
4447 /* Trigger for refresh */
4448 rxr->rx_buffers[i].m_head = NULL;
4449
4450 /* First segment? */
4451 if (rxr->fmp == NULL) {
4452 mp->m_pkthdr.len = len;
4453 rxr->fmp = rxr->lmp = mp;
4454 } else {
4455 /* Chain mbuf's together */
4456 mp->m_flags &= ~M_PKTHDR;
4457 rxr->lmp->m_next = mp;
4458 rxr->lmp = mp;
4459 rxr->fmp->m_pkthdr.len += len;
4460 }
4461
4462 if (eop) {
4463 --count;
4464 sendmp = rxr->fmp;
4465 sendmp->m_pkthdr.rcvif = ifp;
4466 ifp->if_ipackets++;
4467 em_receive_checksum(cur, sendmp);
4468#ifndef __NO_STRICT_ALIGNMENT
4469 if (adapter->max_frame_size >
4470 (MCLBYTES - ETHER_ALIGN) &&
4471 em_fixup_rx(rxr) != 0)
4472 goto skip;
4473#endif
4474 if (status & E1000_RXD_STAT_VP) {
4475 sendmp->m_pkthdr.ether_vtag =
4476 le16toh(cur->special);
4477 sendmp->m_flags |= M_VLANTAG;
4478 }
4479#ifndef __NO_STRICT_ALIGNMENT
4480skip:
4481#endif
4482 rxr->fmp = rxr->lmp = NULL;
4483 }
4484next_desc:
4485 /* Zero out the receive descriptors status. */
4486 cur->status = 0;
4487 ++rxdone; /* cumulative for POLL */
4488 ++processed;
4489
4490 /* Advance our pointers to the next descriptor. */
4491 if (++i == adapter->num_rx_desc)
4492 i = 0;
4493
4494 /* Send to the stack */
4495 if (sendmp != NULL) {
4496 rxr->next_to_check = i;
4497 EM_RX_UNLOCK(rxr);
4498 (*ifp->if_input)(ifp, sendmp);
4499 EM_RX_LOCK(rxr);
4500 i = rxr->next_to_check;
4501 }
4502
4503 /* Only refresh mbufs every 8 descriptors */
4504 if (processed == 8) {
4505 em_refresh_mbufs(rxr, i);
4506 processed = 0;
4507 }
4508 }
4509
4510 /* Catch any remaining refresh work */
4511 if (e1000_rx_unrefreshed(rxr))
4512 em_refresh_mbufs(rxr, i);
4513
4514 rxr->next_to_check = i;
4515 if (done != NULL)
4516 *done = rxdone;
4517 EM_RX_UNLOCK(rxr);
4518
4519 return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4520}
4521
4522static __inline void
4523em_rx_discard(struct rx_ring *rxr, int i)
4524{
4525 struct em_buffer *rbuf;
4526
4527 rbuf = &rxr->rx_buffers[i];
4528 /* Free any previous pieces */
4529 if (rxr->fmp != NULL) {
4530 rxr->fmp->m_flags |= M_PKTHDR;
4531 m_freem(rxr->fmp);
4532 rxr->fmp = NULL;
4533 rxr->lmp = NULL;
4534 }
4535 /*
4536 ** Free buffer and allow em_refresh_mbufs()
4537 ** to clean up and recharge buffer.
4538 */
4539 if (rbuf->m_head) {
4540 m_free(rbuf->m_head);
4541 rbuf->m_head = NULL;
4542 }
4543 return;
4544}
4545
4546#ifndef __NO_STRICT_ALIGNMENT
4547/*
4548 * When jumbo frames are enabled we should realign entire payload on
4549 * architecures with strict alignment. This is serious design mistake of 8254x
4550 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4551 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4552 * payload. On architecures without strict alignment restrictions 8254x still
4553 * performs unaligned memory access which would reduce the performance too.
4554 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4555 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4556 * existing mbuf chain.
4557 *
4558 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4559 * not used at all on architectures with strict alignment.
4560 */
4561static int
4562em_fixup_rx(struct rx_ring *rxr)
4563{
4564 struct adapter *adapter = rxr->adapter;
4565 struct mbuf *m, *n;
4566 int error;
4567
4568 error = 0;
4569 m = rxr->fmp;
4570 if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4571 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4572 m->m_data += ETHER_HDR_LEN;
4573 } else {
4574 MGETHDR(n, M_DONTWAIT, MT_DATA);
4575 if (n != NULL) {
4576 bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4577 m->m_data += ETHER_HDR_LEN;
4578 m->m_len -= ETHER_HDR_LEN;
4579 n->m_len = ETHER_HDR_LEN;
4580 M_MOVE_PKTHDR(n, m);
4581 n->m_next = m;
4582 rxr->fmp = n;
4583 } else {
4584 adapter->dropped_pkts++;
4585 m_freem(rxr->fmp);
4586 rxr->fmp = NULL;
4587 error = ENOMEM;
4588 }
4589 }
4590
4591 return (error);
4592}
4593#endif
4594
4595/*********************************************************************
4596 *
4597 * Verify that the hardware indicated that the checksum is valid.
4598 * Inform the stack about the status of checksum so that stack
4599 * doesn't spend time verifying the checksum.
4600 *
4601 *********************************************************************/
4602static void
4603em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4604{
4605 /* Ignore Checksum bit is set */
4606 if (rx_desc->status & E1000_RXD_STAT_IXSM) {
4607 mp->m_pkthdr.csum_flags = 0;
4608 return;
4609 }
4610
4611 if (rx_desc->status & E1000_RXD_STAT_IPCS) {
4612 /* Did it pass? */
4613 if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
4614 /* IP Checksum Good */
4615 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4616 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4617
4618 } else {
4619 mp->m_pkthdr.csum_flags = 0;
4620 }
4621 }
4622
4623 if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
4624 /* Did it pass? */
4625 if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
4626 mp->m_pkthdr.csum_flags |=
4627 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4628 mp->m_pkthdr.csum_data = htons(0xffff);
4629 }
4630 }
4631}
4632
4633/*
4634 * This routine is run via an vlan
4635 * config EVENT
4636 */
4637static void
4638em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4639{
4640 struct adapter *adapter = ifp->if_softc;
4641 u32 index, bit;
4642
4643 if (ifp->if_softc != arg) /* Not our event */
4644 return;
4645
4646 if ((vtag == 0) || (vtag > 4095)) /* Invalid ID */
4647 return;
4648
4649 EM_CORE_LOCK(adapter);
4650 index = (vtag >> 5) & 0x7F;
4651 bit = vtag & 0x1F;
4652 adapter->shadow_vfta[index] |= (1 << bit);
4653 ++adapter->num_vlans;
4654 /* Re-init to load the changes */
4655 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4656 em_init_locked(adapter);
4657 EM_CORE_UNLOCK(adapter);
4658}
4659
4660/*
4661 * This routine is run via an vlan
4662 * unconfig EVENT
4663 */
4664static void
4665em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4666{
4667 struct adapter *adapter = ifp->if_softc;
4668 u32 index, bit;
4669
4670 if (ifp->if_softc != arg)
4671 return;
4672
4673 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
4674 return;
4675
4676 EM_CORE_LOCK(adapter);
4677 index = (vtag >> 5) & 0x7F;
4678 bit = vtag & 0x1F;
4679 adapter->shadow_vfta[index] &= ~(1 << bit);
4680 --adapter->num_vlans;
4681 /* Re-init to load the changes */
4682 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4683 em_init_locked(adapter);
4684 EM_CORE_UNLOCK(adapter);
4685}
4686
4687static void
4688em_setup_vlan_hw_support(struct adapter *adapter)
4689{
4690 struct e1000_hw *hw = &adapter->hw;
4691 u32 reg;
4692
4693 /*
4694 ** We get here thru init_locked, meaning
4695 ** a soft reset, this has already cleared
4696 ** the VFTA and other state, so if there
4697 ** have been no vlan's registered do nothing.
4698 */
4699 if (adapter->num_vlans == 0)
4700 return;
4701
4702 /*
4703 ** A soft reset zero's out the VFTA, so
4704 ** we need to repopulate it now.
4705 */
4706 for (int i = 0; i < EM_VFTA_SIZE; i++)
4707 if (adapter->shadow_vfta[i] != 0)
4708 E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4709 i, adapter->shadow_vfta[i]);
4710
4711 reg = E1000_READ_REG(hw, E1000_CTRL);
4712 reg |= E1000_CTRL_VME;
4713 E1000_WRITE_REG(hw, E1000_CTRL, reg);
4714
4715 /* Enable the Filter Table */
4716 reg = E1000_READ_REG(hw, E1000_RCTL);
4717 reg &= ~E1000_RCTL_CFIEN;
4718 reg |= E1000_RCTL_VFE;
4719 E1000_WRITE_REG(hw, E1000_RCTL, reg);
4720}
4721
4722static void
4723em_enable_intr(struct adapter *adapter)
4724{
4725 struct e1000_hw *hw = &adapter->hw;
4726 u32 ims_mask = IMS_ENABLE_MASK;
4727
4728 if (hw->mac.type == e1000_82574) {
4729 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4730 ims_mask |= EM_MSIX_MASK;
4731 }
4732 E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4733}
4734
4735static void
4736em_disable_intr(struct adapter *adapter)
4737{
4738 struct e1000_hw *hw = &adapter->hw;
4739
4740 if (hw->mac.type == e1000_82574)
4741 E1000_WRITE_REG(hw, EM_EIAC, 0);
4742 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4743}
4744
4745/*
4746 * Bit of a misnomer, what this really means is
4747 * to enable OS management of the system... aka
4748 * to disable special hardware management features
4749 */
4750static void
4751em_init_manageability(struct adapter *adapter)
4752{
4753 /* A shared code workaround */
4754#define E1000_82542_MANC2H E1000_MANC2H
4755 if (adapter->has_manage) {
4756 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4757 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4758
4759 /* disable hardware interception of ARP */
4760 manc &= ~(E1000_MANC_ARP_EN);
4761
4762 /* enable receiving management packets to the host */
4763 manc |= E1000_MANC_EN_MNG2HOST;
4764#define E1000_MNG2HOST_PORT_623 (1 << 5)
4765#define E1000_MNG2HOST_PORT_664 (1 << 6)
4766 manc2h |= E1000_MNG2HOST_PORT_623;
4767 manc2h |= E1000_MNG2HOST_PORT_664;
4768 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4769 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4770 }
4771}
4772
4773/*
4774 * Give control back to hardware management
4775 * controller if there is one.
4776 */
4777static void
4778em_release_manageability(struct adapter *adapter)
4779{
4780 if (adapter->has_manage) {
4781 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4782
4783 /* re-enable hardware interception of ARP */
4784 manc |= E1000_MANC_ARP_EN;
4785 manc &= ~E1000_MANC_EN_MNG2HOST;
4786
4787 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4788 }
4789}
4790
4791/*
4792 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4793 * For ASF and Pass Through versions of f/w this means
4794 * that the driver is loaded. For AMT version type f/w
4795 * this means that the network i/f is open.
4796 */
4797static void
4798em_get_hw_control(struct adapter *adapter)
4799{
4800 u32 ctrl_ext, swsm;
4801
4802 if (adapter->hw.mac.type == e1000_82573) {
4803 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4804 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4805 swsm | E1000_SWSM_DRV_LOAD);
4806 return;
4807 }
4808 /* else */
4809 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4810 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4811 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4812 return;
4813}
4814
4815/*
4816 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4817 * For ASF and Pass Through versions of f/w this means that
4818 * the driver is no longer loaded. For AMT versions of the
4819 * f/w this means that the network i/f is closed.
4820 */
4821static void
4822em_release_hw_control(struct adapter *adapter)
4823{
4824 u32 ctrl_ext, swsm;
4825
4826 if (!adapter->has_manage)
4827 return;
4828
4829 if (adapter->hw.mac.type == e1000_82573) {
4830 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4831 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4832 swsm & ~E1000_SWSM_DRV_LOAD);
4833 return;
4834 }
4835 /* else */
4836 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4837 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4838 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4839 return;
4840}
4841
4842static int
4843em_is_valid_ether_addr(u8 *addr)
4844{
4845 char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4846
4847 if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4848 return (FALSE);
4849 }
4850
4851 return (TRUE);
4852}
4853
4854/*
4855** Parse the interface capabilities with regard
4856** to both system management and wake-on-lan for
4857** later use.
4858*/
4859static void
4860em_get_wakeup(device_t dev)
4861{
4862 struct adapter *adapter = device_get_softc(dev);
4863 u16 eeprom_data = 0, device_id, apme_mask;
4864
4865 adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4866 apme_mask = EM_EEPROM_APME;
4867
4868 switch (adapter->hw.mac.type) {
4869 case e1000_82573:
4870 case e1000_82583:
4871 adapter->has_amt = TRUE;
4872 /* Falls thru */
4873 case e1000_82571:
4874 case e1000_82572:
4875 case e1000_80003es2lan:
4876 if (adapter->hw.bus.func == 1) {
4877 e1000_read_nvm(&adapter->hw,
4878 NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4879 break;
4880 } else
4881 e1000_read_nvm(&adapter->hw,
4882 NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4883 break;
4884 case e1000_ich8lan:
4885 case e1000_ich9lan:
4886 case e1000_ich10lan:
4887 case e1000_pchlan:
4888 case e1000_pch2lan:
4889 apme_mask = E1000_WUC_APME;
4890 adapter->has_amt = TRUE;
4891 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4892 break;
4893 default:
4894 e1000_read_nvm(&adapter->hw,
4895 NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4896 break;
4897 }
4898 if (eeprom_data & apme_mask)
4899 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4900 /*
4901 * We have the eeprom settings, now apply the special cases
4902 * where the eeprom may be wrong or the board won't support
4903 * wake on lan on a particular port
4904 */
4905 device_id = pci_get_device(dev);
4906 switch (device_id) {
4907 case E1000_DEV_ID_82571EB_FIBER:
4908 /* Wake events only supported on port A for dual fiber
4909 * regardless of eeprom setting */
4910 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4911 E1000_STATUS_FUNC_1)
4912 adapter->wol = 0;
4913 break;
4914 case E1000_DEV_ID_82571EB_QUAD_COPPER:
4915 case E1000_DEV_ID_82571EB_QUAD_FIBER:
4916 case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4917 /* if quad port adapter, disable WoL on all but port A */
4918 if (global_quad_port_a != 0)
4919 adapter->wol = 0;
4920 /* Reset for multiple quad port adapters */
4921 if (++global_quad_port_a == 4)
4922 global_quad_port_a = 0;
4923 break;
4924 }
4925 return;
4926}
4927
4928
4929/*
4930 * Enable PCI Wake On Lan capability
4931 */
4932static void
4933em_enable_wakeup(device_t dev)
4934{
4935 struct adapter *adapter = device_get_softc(dev);
4936 struct ifnet *ifp = adapter->ifp;
4937 u32 pmc, ctrl, ctrl_ext, rctl;
4938 u16 status;
4939
4940 if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4941 return;
4942
4943 /* Advertise the wakeup capability */
4944 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4945 ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4946 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4947 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4948
4949 if ((adapter->hw.mac.type == e1000_ich8lan) ||
4950 (adapter->hw.mac.type == e1000_pchlan) ||
4951 (adapter->hw.mac.type == e1000_ich9lan) ||
4952 (adapter->hw.mac.type == e1000_ich10lan))
4953 e1000_suspend_workarounds_ich8lan(&adapter->hw);
4954
4955 /* Keep the laser running on Fiber adapters */
4956 if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4957 adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4958 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4959 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4960 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4961 }
4962
4963 /*
4964 ** Determine type of Wakeup: note that wol
4965 ** is set with all bits on by default.
4966 */
4967 if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4968 adapter->wol &= ~E1000_WUFC_MAG;
4969
4970 if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4971 adapter->wol &= ~E1000_WUFC_MC;
4972 else {
4973 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4974 rctl |= E1000_RCTL_MPE;
4975 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4976 }
4977
4978 if ((adapter->hw.mac.type == e1000_pchlan) ||
4979 (adapter->hw.mac.type == e1000_pch2lan)) {
4980 if (em_enable_phy_wakeup(adapter))
4981 return;
4982 } else {
4983 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4984 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4985 }
4986
4987 if (adapter->hw.phy.type == e1000_phy_igp_3)
4988 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
4989
4990 /* Request PME */
4991 status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4992 status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4993 if (ifp->if_capenable & IFCAP_WOL)
4994 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4995 pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4996
4997 return;
4998}
4999
5000/*
5001** WOL in the newer chipset interfaces (pchlan)
5002** require thing to be copied into the phy
5003*/
5004static int
5005em_enable_phy_wakeup(struct adapter *adapter)
5006{
5007 struct e1000_hw *hw = &adapter->hw;
5008 u32 mreg, ret = 0;
5009 u16 preg;
5010
5011 /* copy MAC RARs to PHY RARs */
5012 e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5013
5014 /* copy MAC MTA to PHY MTA */
5015 for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5016 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5017 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5018 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5019 (u16)((mreg >> 16) & 0xFFFF));
5020 }
5021
5022 /* configure PHY Rx Control register */
5023 e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5024 mreg = E1000_READ_REG(hw, E1000_RCTL);
5025 if (mreg & E1000_RCTL_UPE)
5026 preg |= BM_RCTL_UPE;
5027 if (mreg & E1000_RCTL_MPE)
5028 preg |= BM_RCTL_MPE;
5029 preg &= ~(BM_RCTL_MO_MASK);
5030 if (mreg & E1000_RCTL_MO_3)
5031 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5032 << BM_RCTL_MO_SHIFT);
5033 if (mreg & E1000_RCTL_BAM)
5034 preg |= BM_RCTL_BAM;
5035 if (mreg & E1000_RCTL_PMCF)
5036 preg |= BM_RCTL_PMCF;
5037 mreg = E1000_READ_REG(hw, E1000_CTRL);
5038 if (mreg & E1000_CTRL_RFCE)
5039 preg |= BM_RCTL_RFCE;
5040 e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5041
5042 /* enable PHY wakeup in MAC register */
5043 E1000_WRITE_REG(hw, E1000_WUC,
5044 E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5045 E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5046
5047 /* configure and enable PHY wakeup in PHY registers */
5048 e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5049 e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5050
5051 /* activate PHY wakeup */
5052 ret = hw->phy.ops.acquire(hw);
5053 if (ret) {
5054 printf("Could not acquire PHY\n");
5055 return ret;
5056 }
5057 e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5058 (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5059 ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5060 if (ret) {
5061 printf("Could not read PHY page 769\n");
5062 goto out;
5063 }
5064 preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5065 ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5066 if (ret)
5067 printf("Could not set PHY Host Wakeup bit\n");
5068out:
5069 hw->phy.ops.release(hw);
5070
5071 return ret;
5072}
5073
5074static void
5075em_led_func(void *arg, int onoff)
5076{
5077 struct adapter *adapter = arg;
5078
5079 EM_CORE_LOCK(adapter);
5080 if (onoff) {
5081 e1000_setup_led(&adapter->hw);
5082 e1000_led_on(&adapter->hw);
5083 } else {
5084 e1000_led_off(&adapter->hw);
5085 e1000_cleanup_led(&adapter->hw);
5086 }
5087 EM_CORE_UNLOCK(adapter);
5088}
5089
5090/*
5091** Disable the L0S and L1 LINK states
5092*/
5093static void
5094em_disable_aspm(struct adapter *adapter)
5095{
5096 int base, reg;
5097 u16 link_cap,link_ctrl;
5098 device_t dev = adapter->dev;
5099
5100 switch (adapter->hw.mac.type) {
5101 case e1000_82573:
5102 case e1000_82574:
5103 case e1000_82583:
5104 break;
5105 default:
5106 return;
5107 }
5108 if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5109 return;
5110 reg = base + PCIR_EXPRESS_LINK_CAP;
5111 link_cap = pci_read_config(dev, reg, 2);
5112 if ((link_cap & PCIM_LINK_CAP_ASPM) == 0)
5113 return;
5114 reg = base + PCIR_EXPRESS_LINK_CTL;
5115 link_ctrl = pci_read_config(dev, reg, 2);
5116 link_ctrl &= 0xFFFC; /* turn off bit 1 and 2 */
5117 pci_write_config(dev, reg, link_ctrl, 2);
5118 return;
5119}
5120
5121/**********************************************************************
5122 *
5123 * Update the board statistics counters.
5124 *
5125 **********************************************************************/
5126static void
5127em_update_stats_counters(struct adapter *adapter)
5128{
5129 struct ifnet *ifp;
5130
5131 if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5132 (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5133 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5134 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5135 }
5136 adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5137 adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5138 adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5139 adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5140
5141 adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5142 adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5143 adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5144 adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5145 adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5146 adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5147 adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5148 /*
5149 ** For watchdog management we need to know if we have been
5150 ** paused during the last interval, so capture that here.
5151 */
5152 adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5153 adapter->stats.xoffrxc += adapter->pause_frames;
5154 adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5155 adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5156 adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5157 adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5158 adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5159 adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5160 adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5161 adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5162 adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5163 adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5164 adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5165 adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5166
5167 /* For the 64-bit byte counters the low dword must be read first. */
5168 /* Both registers clear on the read of the high dword */
5169
5170 adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5171 ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5172 adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5173 ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5174
5175 adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5176 adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5177 adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5178 adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5179 adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5180
5181 adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5182 adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5183
5184 adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5185 adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5186 adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5187 adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5188 adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5189 adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5190 adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5191 adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5192 adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5193 adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5194
5195 /* Interrupt Counts */
5196
5197 adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5198 adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5199 adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5200 adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5201 adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5202 adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5203 adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5204 adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5205 adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5206
5207 if (adapter->hw.mac.type >= e1000_82543) {
5208 adapter->stats.algnerrc +=
5209 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5210 adapter->stats.rxerrc +=
5211 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5212 adapter->stats.tncrs +=
5213 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5214 adapter->stats.cexterr +=
5215 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5216 adapter->stats.tsctc +=
5217 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5218 adapter->stats.tsctfc +=
5219 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5220 }
5221 ifp = adapter->ifp;
5222
5223 ifp->if_collisions = adapter->stats.colc;
5224
5225 /* Rx Errors */
5226 ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5227 adapter->stats.crcerrs + adapter->stats.algnerrc +
5228 adapter->stats.ruc + adapter->stats.roc +
5229 adapter->stats.mpc + adapter->stats.cexterr;
5230
5231 /* Tx Errors */
5232 ifp->if_oerrors = adapter->stats.ecol +
5233 adapter->stats.latecol + adapter->watchdog_events;
5234}
5235
5236/* Export a single 32-bit register via a read-only sysctl. */
5237static int
5238em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5239{
5240 struct adapter *adapter;
5241 u_int val;
5242
5243 adapter = oidp->oid_arg1;
5244 val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5245 return (sysctl_handle_int(oidp, &val, 0, req));
5246}
5247
5248/*
5249 * Add sysctl variables, one per statistic, to the system.
5250 */
5251static void
5252em_add_hw_stats(struct adapter *adapter)
5253{
5254 device_t dev = adapter->dev;
5255
5256 struct tx_ring *txr = adapter->tx_rings;
5257 struct rx_ring *rxr = adapter->rx_rings;
5258
5259 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5260 struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5261 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5262 struct e1000_hw_stats *stats = &adapter->stats;
5263
5264 struct sysctl_oid *stat_node, *queue_node, *int_node;
5265 struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5266
5267#define QUEUE_NAME_LEN 32
5268 char namebuf[QUEUE_NAME_LEN];
5269
5270 /* Driver Statistics */
5271 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5272 CTLFLAG_RD, &adapter->link_irq,
5273 "Link MSIX IRQ Handled");
5274 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5275 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5276 "Std mbuf failed");
5277 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5278 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5279 "Std mbuf cluster failed");
5280 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5281 CTLFLAG_RD, &adapter->dropped_pkts,
5282 "Driver dropped packets");
5283 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5284 CTLFLAG_RD, &adapter->no_tx_dma_setup,
5285 "Driver tx dma failure in xmit");
5286 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5287 CTLFLAG_RD, &adapter->rx_overruns,
5288 "RX overruns");
5289 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5290 CTLFLAG_RD, &adapter->watchdog_events,
5291 "Watchdog timeouts");
5292
5293 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5294 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5295 em_sysctl_reg_handler, "IU",
5296 "Device Control Register");
5297 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5298 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5299 em_sysctl_reg_handler, "IU",
5300 "Receiver Control Register");
5301 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5302 CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5303 "Flow Control High Watermark");
5304 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5305 CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5306 "Flow Control Low Watermark");
5307
5308 for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5309 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5310 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5311 CTLFLAG_RD, NULL, "Queue Name");
5312 queue_list = SYSCTL_CHILDREN(queue_node);
5313
5314 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5315 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5316 E1000_TDH(txr->me),
5317 em_sysctl_reg_handler, "IU",
5318 "Transmit Descriptor Head");
5319 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5320 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5321 E1000_TDT(txr->me),
5322 em_sysctl_reg_handler, "IU",
5323 "Transmit Descriptor Tail");
5324 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5325 CTLFLAG_RD, &txr->tx_irq,
5326 "Queue MSI-X Transmit Interrupts");
5327 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5328 CTLFLAG_RD, &txr->no_desc_avail,
5329 "Queue No Descriptor Available");
5330
5331 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5332 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5333 E1000_RDH(rxr->me),
5334 em_sysctl_reg_handler, "IU",
5335 "Receive Descriptor Head");
5336 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5337 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5338 E1000_RDT(rxr->me),
5339 em_sysctl_reg_handler, "IU",
5340 "Receive Descriptor Tail");
5341 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5342 CTLFLAG_RD, &rxr->rx_irq,
5343 "Queue MSI-X Receive Interrupts");
5344 }
5345
5346 /* MAC stats get their own sub node */
5347
5348 stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5349 CTLFLAG_RD, NULL, "Statistics");
5350 stat_list = SYSCTL_CHILDREN(stat_node);
5351
5352 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5353 CTLFLAG_RD, &stats->ecol,
5354 "Excessive collisions");
5355 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5356 CTLFLAG_RD, &stats->scc,
5357 "Single collisions");
5358 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5359 CTLFLAG_RD, &stats->mcc,
5360 "Multiple collisions");
5361 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5362 CTLFLAG_RD, &stats->latecol,
5363 "Late collisions");
5364 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5365 CTLFLAG_RD, &stats->colc,
5366 "Collision Count");
5367 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5368 CTLFLAG_RD, &adapter->stats.symerrs,
5369 "Symbol Errors");
5370 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5371 CTLFLAG_RD, &adapter->stats.sec,
5372 "Sequence Errors");
5373 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5374 CTLFLAG_RD, &adapter->stats.dc,
5375 "Defer Count");
5376 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5377 CTLFLAG_RD, &adapter->stats.mpc,
5378 "Missed Packets");
5379 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5380 CTLFLAG_RD, &adapter->stats.rnbc,
5381 "Receive No Buffers");
5382 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5383 CTLFLAG_RD, &adapter->stats.ruc,
5384 "Receive Undersize");
5385 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5386 CTLFLAG_RD, &adapter->stats.rfc,
5387 "Fragmented Packets Received ");
5388 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5389 CTLFLAG_RD, &adapter->stats.roc,
5390 "Oversized Packets Received");
5391 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5392 CTLFLAG_RD, &adapter->stats.rjc,
5393 "Recevied Jabber");
5394 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5395 CTLFLAG_RD, &adapter->stats.rxerrc,
5396 "Receive Errors");
5397 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5398 CTLFLAG_RD, &adapter->stats.crcerrs,
5399 "CRC errors");
5400 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5401 CTLFLAG_RD, &adapter->stats.algnerrc,
5402 "Alignment Errors");
5403 /* On 82575 these are collision counts */
5404 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5405 CTLFLAG_RD, &adapter->stats.cexterr,
5406 "Collision/Carrier extension errors");
5407 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5408 CTLFLAG_RD, &adapter->stats.xonrxc,
5409 "XON Received");
5410 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5411 CTLFLAG_RD, &adapter->stats.xontxc,
5412 "XON Transmitted");
5413 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5414 CTLFLAG_RD, &adapter->stats.xoffrxc,
5415 "XOFF Received");
5416 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5417 CTLFLAG_RD, &adapter->stats.xofftxc,
5418 "XOFF Transmitted");
5419
5420 /* Packet Reception Stats */
5421 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5422 CTLFLAG_RD, &adapter->stats.tpr,
5423 "Total Packets Received ");
5424 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5425 CTLFLAG_RD, &adapter->stats.gprc,
5426 "Good Packets Received");
5427 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5428 CTLFLAG_RD, &adapter->stats.bprc,
5429 "Broadcast Packets Received");
5430 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5431 CTLFLAG_RD, &adapter->stats.mprc,
5432 "Multicast Packets Received");
5433 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5434 CTLFLAG_RD, &adapter->stats.prc64,
5435 "64 byte frames received ");
5436 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5437 CTLFLAG_RD, &adapter->stats.prc127,
5438 "65-127 byte frames received");
5439 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5440 CTLFLAG_RD, &adapter->stats.prc255,
5441 "128-255 byte frames received");
5442 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5443 CTLFLAG_RD, &adapter->stats.prc511,
5444 "256-511 byte frames received");
5445 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5446 CTLFLAG_RD, &adapter->stats.prc1023,
5447 "512-1023 byte frames received");
5448 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5449 CTLFLAG_RD, &adapter->stats.prc1522,
5450 "1023-1522 byte frames received");
5451 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5452 CTLFLAG_RD, &adapter->stats.gorc,
5453 "Good Octets Received");
5454
5455 /* Packet Transmission Stats */
5456 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5457 CTLFLAG_RD, &adapter->stats.gotc,
5458 "Good Octets Transmitted");
5459 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5460 CTLFLAG_RD, &adapter->stats.tpt,
5461 "Total Packets Transmitted");
5462 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5463 CTLFLAG_RD, &adapter->stats.gptc,
5464 "Good Packets Transmitted");
5465 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5466 CTLFLAG_RD, &adapter->stats.bptc,
5467 "Broadcast Packets Transmitted");
5468 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5469 CTLFLAG_RD, &adapter->stats.mptc,
5470 "Multicast Packets Transmitted");
5471 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5472 CTLFLAG_RD, &adapter->stats.ptc64,
5473 "64 byte frames transmitted ");
5474 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5475 CTLFLAG_RD, &adapter->stats.ptc127,
5476 "65-127 byte frames transmitted");
5477 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5478 CTLFLAG_RD, &adapter->stats.ptc255,
5479 "128-255 byte frames transmitted");
5480 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5481 CTLFLAG_RD, &adapter->stats.ptc511,
5482 "256-511 byte frames transmitted");
5483 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5484 CTLFLAG_RD, &adapter->stats.ptc1023,
5485 "512-1023 byte frames transmitted");
5486 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5487 CTLFLAG_RD, &adapter->stats.ptc1522,
5488 "1024-1522 byte frames transmitted");
5489 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5490 CTLFLAG_RD, &adapter->stats.tsctc,
5491 "TSO Contexts Transmitted");
5492 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5493 CTLFLAG_RD, &adapter->stats.tsctfc,
5494 "TSO Contexts Failed");
5495
5496
5497 /* Interrupt Stats */
5498
5499 int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5500 CTLFLAG_RD, NULL, "Interrupt Statistics");
5501 int_list = SYSCTL_CHILDREN(int_node);
5502
5503 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5504 CTLFLAG_RD, &adapter->stats.iac,
5505 "Interrupt Assertion Count");
5506
5507 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5508 CTLFLAG_RD, &adapter->stats.icrxptc,
5509 "Interrupt Cause Rx Pkt Timer Expire Count");
5510
5511 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5512 CTLFLAG_RD, &adapter->stats.icrxatc,
5513 "Interrupt Cause Rx Abs Timer Expire Count");
5514
5515 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5516 CTLFLAG_RD, &adapter->stats.ictxptc,
5517 "Interrupt Cause Tx Pkt Timer Expire Count");
5518
5519 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5520 CTLFLAG_RD, &adapter->stats.ictxatc,
5521 "Interrupt Cause Tx Abs Timer Expire Count");
5522
5523 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5524 CTLFLAG_RD, &adapter->stats.ictxqec,
5525 "Interrupt Cause Tx Queue Empty Count");
5526
5527 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5528 CTLFLAG_RD, &adapter->stats.ictxqmtc,
5529 "Interrupt Cause Tx Queue Min Thresh Count");
5530
5531 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5532 CTLFLAG_RD, &adapter->stats.icrxdmtc,
5533 "Interrupt Cause Rx Desc Min Thresh Count");
5534
5535 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5536 CTLFLAG_RD, &adapter->stats.icrxoc,
5537 "Interrupt Cause Receiver Overrun Count");
5538}
5539
5540/**********************************************************************
5541 *
5542 * This routine provides a way to dump out the adapter eeprom,
5543 * often a useful debug/service tool. This only dumps the first
5544 * 32 words, stuff that matters is in that extent.
5545 *
5546 **********************************************************************/
5547static int
5548em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5549{
5550 struct adapter *adapter = (struct adapter *)arg1;
5551 int error;
5552 int result;
5553
5554 result = -1;
5555 error = sysctl_handle_int(oidp, &result, 0, req);
5556
5557 if (error || !req->newptr)
5558 return (error);
5559
5560 /*
5561 * This value will cause a hex dump of the
5562 * first 32 16-bit words of the EEPROM to
5563 * the screen.
5564 */
5565 if (result == 1)
5566 em_print_nvm_info(adapter);
5567
5568 return (error);
5569}
5570
5571static void
5572em_print_nvm_info(struct adapter *adapter)
5573{
5574 u16 eeprom_data;
5575 int i, j, row = 0;
5576
5577 /* Its a bit crude, but it gets the job done */
5578 printf("\nInterface EEPROM Dump:\n");
5579 printf("Offset\n0x0000 ");
5580 for (i = 0, j = 0; i < 32; i++, j++) {
5581 if (j == 8) { /* Make the offset block */
5582 j = 0; ++row;
5583 printf("\n0x00%x0 ",row);
5584 }
5585 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5586 printf("%04x ", eeprom_data);
5587 }
5588 printf("\n");
5589}
5590
5591static int
5592em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5593{
5594 struct em_int_delay_info *info;
5595 struct adapter *adapter;
5596 u32 regval;
5597 int error, usecs, ticks;
5598
5599 info = (struct em_int_delay_info *)arg1;
5600 usecs = info->value;
5601 error = sysctl_handle_int(oidp, &usecs, 0, req);
5602 if (error != 0 || req->newptr == NULL)
5603 return (error);
5604 if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5605 return (EINVAL);
5606 info->value = usecs;
5607 ticks = EM_USECS_TO_TICKS(usecs);
5608
5609 adapter = info->adapter;
5610
5611 EM_CORE_LOCK(adapter);
5612 regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5613 regval = (regval & ~0xffff) | (ticks & 0xffff);
5614 /* Handle a few special cases. */
5615 switch (info->offset) {
5616 case E1000_RDTR:
5617 break;
5618 case E1000_TIDV:
5619 if (ticks == 0) {
5620 adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5621 /* Don't write 0 into the TIDV register. */
5622 regval++;
5623 } else
5624 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5625 break;
5626 }
5627 E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5628 EM_CORE_UNLOCK(adapter);
5629 return (0);
5630}
5631
5632static void
5633em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5634 const char *description, struct em_int_delay_info *info,
5635 int offset, int value)
5636{
5637 info->adapter = adapter;
5638 info->offset = offset;
5639 info->value = value;
5640 SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5641 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5642 OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5643 info, 0, em_sysctl_int_delay, "I", description);
5644}
5645
5646static void
5647em_set_sysctl_value(struct adapter *adapter, const char *name,
5648 const char *description, int *limit, int value)
5649{
5650 *limit = value;
5651 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5652 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5653 OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5654}
5655
5656
5657/*
5658** Set flow control using sysctl:
5659** Flow control values:
5660** 0 - off
5661** 1 - rx pause
5662** 2 - tx pause
5663** 3 - full
5664*/
5665static int
5666em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5667{
5668 int error;
5669 static int input = 3; /* default is full */
5670 struct adapter *adapter = (struct adapter *) arg1;
5671
5672 error = sysctl_handle_int(oidp, &input, 0, req);
5673
5674 if ((error) || (req->newptr == NULL))
5675 return (error);
5676
5677 if (input == adapter->fc) /* no change? */
5678 return (error);
5679
5680 switch (input) {
5681 case e1000_fc_rx_pause:
5682 case e1000_fc_tx_pause:
5683 case e1000_fc_full:
5684 case e1000_fc_none:
5685 adapter->hw.fc.requested_mode = input;
5686 adapter->fc = input;
5687 break;
5688 default:
5689 /* Do nothing */
5690 return (error);
5691 }
5692
5693 adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5694 e1000_force_mac_fc(&adapter->hw);
5695 return (error);
5696}
5697
5698
5699static int
5700em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5701{
5702 struct adapter *adapter;
5703 int error;
5704 int result;
5705
5706 result = -1;
5707 error = sysctl_handle_int(oidp, &result, 0, req);
5708
5709 if (error || !req->newptr)
5710 return (error);
5711
5712 if (result == 1) {
5713 adapter = (struct adapter *)arg1;
5714 em_print_debug_info(adapter);
5715 }
5716
5717 return (error);
5718}
5719
5720/*
5721** This routine is meant to be fluid, add whatever is
5722** needed for debugging a problem. -jfv
5723*/
5724static void
5725em_print_debug_info(struct adapter *adapter)
5726{
5727 device_t dev = adapter->dev;
5728 struct tx_ring *txr = adapter->tx_rings;
5729 struct rx_ring *rxr = adapter->rx_rings;
5730
5731 if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5732 printf("Interface is RUNNING ");
5733 else
5734 printf("Interface is NOT RUNNING\n");
5735
5736 if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5737 printf("and INACTIVE\n");
5738 else
5739 printf("and ACTIVE\n");
5740
5741 device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5742 E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5743 E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5744 device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5745 E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5746 E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5747 device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5748 device_printf(dev, "TX descriptors avail = %d\n",
5749 txr->tx_avail);
5750 device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5751 txr->no_desc_avail);
5752 device_printf(dev, "RX discarded packets = %ld\n",
5753 rxr->rx_discarded);
5754 device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5755 device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5756}