Deleted Added
full compact
1/******************************************************************************
2
3 Copyright (c) 2001-2015, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_em.c 295133 2016-02-01 23:51:30Z marius $*/
33/*$FreeBSD: head/sys/dev/e1000/if_em.c 295323 2016-02-05 17:14:37Z erj $*/
34
35#include "opt_em.h"
36#include "opt_ddb.h"
37#include "opt_inet.h"
38#include "opt_inet6.h"
39
40#ifdef HAVE_KERNEL_OPTION_HEADERS
41#include "opt_device_polling.h"
42#endif
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#ifdef DDB
47#include <sys/types.h>
48#include <ddb/ddb.h>
49#endif
50#if __FreeBSD_version >= 800000
51#include <sys/buf_ring.h>
52#endif
53#include <sys/bus.h>
54#include <sys/endian.h>
55#include <sys/kernel.h>
56#include <sys/kthread.h>
57#include <sys/malloc.h>
58#include <sys/mbuf.h>
59#include <sys/module.h>
60#include <sys/rman.h>
61#include <sys/smp.h>
62#include <sys/socket.h>
63#include <sys/sockio.h>
64#include <sys/sysctl.h>
65#include <sys/taskqueue.h>
66#include <sys/eventhandler.h>
67#include <machine/bus.h>
68#include <machine/resource.h>
69
70#include <net/bpf.h>
71#include <net/ethernet.h>
72#include <net/if.h>
73#include <net/if_var.h>
74#include <net/if_arp.h>
75#include <net/if_dl.h>
76#include <net/if_media.h>
77
78#include <net/if_types.h>
79#include <net/if_vlan_var.h>
80
81#include <netinet/in_systm.h>
82#include <netinet/in.h>
83#include <netinet/if_ether.h>
84#include <netinet/ip.h>
85#include <netinet/ip6.h>
86#include <netinet/tcp.h>
87#include <netinet/udp.h>
88
89#include <machine/in_cksum.h>
90#include <dev/led/led.h>
91#include <dev/pci/pcivar.h>
92#include <dev/pci/pcireg.h>
93
94#include "e1000_api.h"
95#include "e1000_82571.h"
96#include "if_em.h"
97
98/*********************************************************************
99 * Set this to one to display debug statistics
100 *********************************************************************/
101int em_display_debug_stats = 0;
102
103/*********************************************************************
99 * Driver version:
100 *********************************************************************/
106char em_driver_version[] = "7.4.2";
101char em_driver_version[] = "7.6.1-k";
102
103/*********************************************************************
104 * PCI Device ID Table
105 *
106 * Used by probe to select devices to load on
107 * Last field stores an index into e1000_strings
108 * Last entry must be all 0s
109 *
110 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
111 *********************************************************************/
112
113static em_vendor_info_t em_vendor_info_array[] =
114{
115 /* Intel(R) PRO/1000 Network Connection */
116 { 0x8086, E1000_DEV_ID_82571EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
117 { 0x8086, E1000_DEV_ID_82571EB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
118 { 0x8086, E1000_DEV_ID_82571EB_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
119 { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
120 PCI_ANY_ID, PCI_ANY_ID, 0},
121 { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
122 PCI_ANY_ID, PCI_ANY_ID, 0},
123 { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
124 PCI_ANY_ID, PCI_ANY_ID, 0},
125 { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
126 PCI_ANY_ID, PCI_ANY_ID, 0},
127 { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
128 PCI_ANY_ID, PCI_ANY_ID, 0},
129 { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
130 PCI_ANY_ID, PCI_ANY_ID, 0},
131 { 0x8086, E1000_DEV_ID_82572EI_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
132 { 0x8086, E1000_DEV_ID_82572EI_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
133 { 0x8086, E1000_DEV_ID_82572EI_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
134 { 0x8086, E1000_DEV_ID_82572EI, PCI_ANY_ID, PCI_ANY_ID, 0},
135
136 { 0x8086, E1000_DEV_ID_82573E, PCI_ANY_ID, PCI_ANY_ID, 0},
137 { 0x8086, E1000_DEV_ID_82573E_IAMT, PCI_ANY_ID, PCI_ANY_ID, 0},
138 { 0x8086, E1000_DEV_ID_82573L, PCI_ANY_ID, PCI_ANY_ID, 0},
139 { 0x8086, E1000_DEV_ID_82583V, PCI_ANY_ID, PCI_ANY_ID, 0},
140 { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
141 PCI_ANY_ID, PCI_ANY_ID, 0},
142 { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
143 PCI_ANY_ID, PCI_ANY_ID, 0},
144 { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
145 PCI_ANY_ID, PCI_ANY_ID, 0},
146 { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
147 PCI_ANY_ID, PCI_ANY_ID, 0},
148 { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
149 { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
150 { 0x8086, E1000_DEV_ID_ICH8_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0},
151 { 0x8086, E1000_DEV_ID_ICH8_IFE, PCI_ANY_ID, PCI_ANY_ID, 0},
152 { 0x8086, E1000_DEV_ID_ICH8_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0},
153 { 0x8086, E1000_DEV_ID_ICH8_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0},
154 { 0x8086, E1000_DEV_ID_ICH8_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0},
155 { 0x8086, E1000_DEV_ID_ICH8_82567V_3, PCI_ANY_ID, PCI_ANY_ID, 0},
156 { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
157 { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
158 { 0x8086, E1000_DEV_ID_ICH9_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0},
159 { 0x8086, E1000_DEV_ID_ICH9_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0},
160 { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V, PCI_ANY_ID, PCI_ANY_ID, 0},
161 { 0x8086, E1000_DEV_ID_ICH9_IFE, PCI_ANY_ID, PCI_ANY_ID, 0},
162 { 0x8086, E1000_DEV_ID_ICH9_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0},
163 { 0x8086, E1000_DEV_ID_ICH9_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0},
164 { 0x8086, E1000_DEV_ID_ICH9_BM, PCI_ANY_ID, PCI_ANY_ID, 0},
165 { 0x8086, E1000_DEV_ID_82574L, PCI_ANY_ID, PCI_ANY_ID, 0},
166 { 0x8086, E1000_DEV_ID_82574LA, PCI_ANY_ID, PCI_ANY_ID, 0},
167 { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
168 { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0},
169 { 0x8086, E1000_DEV_ID_ICH10_R_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0},
170 { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
171 { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0},
172 { 0x8086, E1000_DEV_ID_ICH10_D_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0},
173 { 0x8086, E1000_DEV_ID_PCH_M_HV_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
174 { 0x8086, E1000_DEV_ID_PCH_M_HV_LC, PCI_ANY_ID, PCI_ANY_ID, 0},
175 { 0x8086, E1000_DEV_ID_PCH_D_HV_DM, PCI_ANY_ID, PCI_ANY_ID, 0},
176 { 0x8086, E1000_DEV_ID_PCH_D_HV_DC, PCI_ANY_ID, PCI_ANY_ID, 0},
177 { 0x8086, E1000_DEV_ID_PCH2_LV_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
178 { 0x8086, E1000_DEV_ID_PCH2_LV_V, PCI_ANY_ID, PCI_ANY_ID, 0},
179 { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
180 { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V, PCI_ANY_ID, PCI_ANY_ID, 0},
181 { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
182 PCI_ANY_ID, PCI_ANY_ID, 0},
183 { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
184 PCI_ANY_ID, PCI_ANY_ID, 0},
185 { 0x8086, E1000_DEV_ID_PCH_I218_LM2, PCI_ANY_ID, PCI_ANY_ID, 0},
186 { 0x8086, E1000_DEV_ID_PCH_I218_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
187 { 0x8086, E1000_DEV_ID_PCH_I218_LM3, PCI_ANY_ID, PCI_ANY_ID, 0},
188 { 0x8086, E1000_DEV_ID_PCH_I218_V3, PCI_ANY_ID, PCI_ANY_ID, 0},
189 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
190 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V, PCI_ANY_ID, PCI_ANY_ID, 0},
191 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
192 PCI_ANY_ID, PCI_ANY_ID, 0},
193 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
194 { 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
195 PCI_ANY_ID, PCI_ANY_ID, 0},
196 /* required last entry */
197 { 0, 0, 0, 0, 0}
198};
199
200/*********************************************************************
201 * Table of branding strings for all supported NICs.
202 *********************************************************************/
203
204static char *em_strings[] = {
205 "Intel(R) PRO/1000 Network Connection"
206};
207
208/*********************************************************************
209 * Function prototypes
210 *********************************************************************/
211static int em_probe(device_t);
212static int em_attach(device_t);
213static int em_detach(device_t);
214static int em_shutdown(device_t);
215static int em_suspend(device_t);
216static int em_resume(device_t);
217#ifdef EM_MULTIQUEUE
218static int em_mq_start(if_t, struct mbuf *);
219static int em_mq_start_locked(if_t,
220 struct tx_ring *);
221static void em_qflush(if_t);
222#else
223static void em_start(if_t);
224static void em_start_locked(if_t, struct tx_ring *);
225#endif
226static int em_ioctl(if_t, u_long, caddr_t);
227static uint64_t em_get_counter(if_t, ift_counter);
228static void em_init(void *);
229static void em_init_locked(struct adapter *);
230static void em_stop(void *);
231static void em_media_status(if_t, struct ifmediareq *);
232static int em_media_change(if_t);
233static void em_identify_hardware(struct adapter *);
234static int em_allocate_pci_resources(struct adapter *);
235static int em_allocate_legacy(struct adapter *);
236static int em_allocate_msix(struct adapter *);
237static int em_allocate_queues(struct adapter *);
238static int em_setup_msix(struct adapter *);
239static void em_free_pci_resources(struct adapter *);
240static void em_local_timer(void *);
241static void em_reset(struct adapter *);
242static int em_setup_interface(device_t, struct adapter *);
243static void em_flush_desc_rings(struct adapter *);
244
245static void em_setup_transmit_structures(struct adapter *);
246static void em_initialize_transmit_unit(struct adapter *);
247static int em_allocate_transmit_buffers(struct tx_ring *);
248static void em_free_transmit_structures(struct adapter *);
249static void em_free_transmit_buffers(struct tx_ring *);
250
251static int em_setup_receive_structures(struct adapter *);
252static int em_allocate_receive_buffers(struct rx_ring *);
253static void em_initialize_receive_unit(struct adapter *);
254static void em_free_receive_structures(struct adapter *);
255static void em_free_receive_buffers(struct rx_ring *);
256
257static void em_enable_intr(struct adapter *);
258static void em_disable_intr(struct adapter *);
259static void em_update_stats_counters(struct adapter *);
260static void em_add_hw_stats(struct adapter *adapter);
261static void em_txeof(struct tx_ring *);
262static bool em_rxeof(struct rx_ring *, int, int *);
263#ifndef __NO_STRICT_ALIGNMENT
264static int em_fixup_rx(struct rx_ring *);
265#endif
266static void em_setup_rxdesc(union e1000_rx_desc_extended *,
267 const struct em_rxbuffer *rxbuf);
268static void em_receive_checksum(uint32_t status, struct mbuf *);
269static void em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
270 struct ip *, u32 *, u32 *);
271static void em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
272 struct tcphdr *, u32 *, u32 *);
273static void em_set_promisc(struct adapter *);
274static void em_disable_promisc(struct adapter *);
275static void em_set_multi(struct adapter *);
276static void em_update_link_status(struct adapter *);
277static void em_refresh_mbufs(struct rx_ring *, int);
278static void em_register_vlan(void *, if_t, u16);
279static void em_unregister_vlan(void *, if_t, u16);
280static void em_setup_vlan_hw_support(struct adapter *);
281static int em_xmit(struct tx_ring *, struct mbuf **);
282static int em_dma_malloc(struct adapter *, bus_size_t,
283 struct em_dma_alloc *, int);
284static void em_dma_free(struct adapter *, struct em_dma_alloc *);
285static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
286static void em_print_nvm_info(struct adapter *);
287static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
288static void em_print_debug_info(struct adapter *);
289static int em_is_valid_ether_addr(u8 *);
290static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
291static void em_add_int_delay_sysctl(struct adapter *, const char *,
292 const char *, struct em_int_delay_info *, int, int);
293/* Management and WOL Support */
294static void em_init_manageability(struct adapter *);
295static void em_release_manageability(struct adapter *);
296static void em_get_hw_control(struct adapter *);
297static void em_release_hw_control(struct adapter *);
298static void em_get_wakeup(device_t);
299static void em_enable_wakeup(device_t);
300static int em_enable_phy_wakeup(struct adapter *);
301static void em_led_func(void *, int);
302static void em_disable_aspm(struct adapter *);
303
304static int em_irq_fast(void *);
305
306/* MSIX handlers */
307static void em_msix_tx(void *);
308static void em_msix_rx(void *);
309static void em_msix_link(void *);
310static void em_handle_tx(void *context, int pending);
311static void em_handle_rx(void *context, int pending);
312static void em_handle_link(void *context, int pending);
313
314#ifdef EM_MULTIQUEUE
315static void em_enable_vectors_82574(struct adapter *);
316#endif
317
318static void em_set_sysctl_value(struct adapter *, const char *,
319 const char *, int *, int);
320static int em_set_flowcntl(SYSCTL_HANDLER_ARGS);
321static int em_sysctl_eee(SYSCTL_HANDLER_ARGS);
322
323static __inline void em_rx_discard(struct rx_ring *, int);
324
325#ifdef DEVICE_POLLING
326static poll_handler_t em_poll;
327#endif /* POLLING */
328
329/*********************************************************************
330 * FreeBSD Device Interface Entry Points
331 *********************************************************************/
332
333static device_method_t em_methods[] = {
334 /* Device interface */
335 DEVMETHOD(device_probe, em_probe),
336 DEVMETHOD(device_attach, em_attach),
337 DEVMETHOD(device_detach, em_detach),
338 DEVMETHOD(device_shutdown, em_shutdown),
339 DEVMETHOD(device_suspend, em_suspend),
340 DEVMETHOD(device_resume, em_resume),
341 DEVMETHOD_END
342};
343
344static driver_t em_driver = {
345 "em", em_methods, sizeof(struct adapter),
346};
347
348devclass_t em_devclass;
349DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
350MODULE_DEPEND(em, pci, 1, 1, 1);
351MODULE_DEPEND(em, ether, 1, 1, 1);
352#ifdef DEV_NETMAP
353MODULE_DEPEND(em, netmap, 1, 1, 1);
354#endif /* DEV_NETMAP */
355
356/*********************************************************************
357 * Tunable default values.
358 *********************************************************************/
359
360#define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000)
361#define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024)
362#define M_TSO_LEN 66
363
364#define MAX_INTS_PER_SEC 8000
365#define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256))
366
367/* Allow common code without TSO */
368#ifndef CSUM_TSO
369#define CSUM_TSO 0
370#endif
371
372#define TSO_WORKAROUND 4
373
374static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
375
376static int em_disable_crc_stripping = 0;
377SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
378 &em_disable_crc_stripping, 0, "Disable CRC Stripping");
379
380static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
381static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
382SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
383 0, "Default transmit interrupt delay in usecs");
384SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
385 0, "Default receive interrupt delay in usecs");
386
387static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
388static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
389SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
390 &em_tx_abs_int_delay_dflt, 0,
391 "Default transmit interrupt delay limit in usecs");
392SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
393 &em_rx_abs_int_delay_dflt, 0,
394 "Default receive interrupt delay limit in usecs");
395
396static int em_rxd = EM_DEFAULT_RXD;
397static int em_txd = EM_DEFAULT_TXD;
398SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
399 "Number of receive descriptors per queue");
400SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
401 "Number of transmit descriptors per queue");
402
403static int em_smart_pwr_down = FALSE;
404SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
405 0, "Set to true to leave smart power down enabled on newer adapters");
406
407/* Controls whether promiscuous also shows bad packets */
408static int em_debug_sbp = FALSE;
409SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
410 "Show bad packets in promiscuous mode");
411
412static int em_enable_msix = TRUE;
413SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
414 "Enable MSI-X interrupts");
415
416#ifdef EM_MULTIQUEUE
417static int em_num_queues = 1;
418SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
419 "82574 only: Number of queues to configure, 0 indicates autoconfigure");
420#endif
421
422/*
423** Global variable to store last used CPU when binding queues
424** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a
425** queue is bound to a cpu.
426*/
427static int em_last_bind_cpu = -1;
428
429/* How many packets rxeof tries to clean at a time */
430static int em_rx_process_limit = 100;
431SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
432 &em_rx_process_limit, 0,
433 "Maximum number of received packets to process "
434 "at a time, -1 means unlimited");
435
436/* Energy efficient ethernet - default to OFF */
437static int eee_setting = 1;
438SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
439 "Enable Energy Efficient Ethernet");
440
441/* Global used in WOL setup with multiport cards */
442static int global_quad_port_a = 0;
443
444#ifdef DEV_NETMAP /* see ixgbe.c for details */
445#include <dev/netmap/if_em_netmap.h>
446#endif /* DEV_NETMAP */
447
448/*********************************************************************
449 * Device identification routine
450 *
451 * em_probe determines if the driver should be loaded on
452 * adapter based on PCI vendor/device id of the adapter.
453 *
454 * return BUS_PROBE_DEFAULT on success, positive on failure
455 *********************************************************************/
456
457static int
458em_probe(device_t dev)
459{
460 char adapter_name[60];
461 uint16_t pci_vendor_id = 0;
462 uint16_t pci_device_id = 0;
463 uint16_t pci_subvendor_id = 0;
464 uint16_t pci_subdevice_id = 0;
465 em_vendor_info_t *ent;
466
467 INIT_DEBUGOUT("em_probe: begin");
468
469 pci_vendor_id = pci_get_vendor(dev);
470 if (pci_vendor_id != EM_VENDOR_ID)
471 return (ENXIO);
472
473 pci_device_id = pci_get_device(dev);
474 pci_subvendor_id = pci_get_subvendor(dev);
475 pci_subdevice_id = pci_get_subdevice(dev);
476
477 ent = em_vendor_info_array;
478 while (ent->vendor_id != 0) {
479 if ((pci_vendor_id == ent->vendor_id) &&
480 (pci_device_id == ent->device_id) &&
481
482 ((pci_subvendor_id == ent->subvendor_id) ||
483 (ent->subvendor_id == PCI_ANY_ID)) &&
484
485 ((pci_subdevice_id == ent->subdevice_id) ||
486 (ent->subdevice_id == PCI_ANY_ID))) {
487 sprintf(adapter_name, "%s %s",
488 em_strings[ent->index],
489 em_driver_version);
490 device_set_desc_copy(dev, adapter_name);
491 return (BUS_PROBE_DEFAULT);
492 }
493 ent++;
494 }
495
496 return (ENXIO);
497}
498
499/*********************************************************************
500 * Device initialization routine
501 *
502 * The attach entry point is called when the driver is being loaded.
503 * This routine identifies the type of hardware, allocates all resources
504 * and initializes the hardware.
505 *
506 * return 0 on success, positive on failure
507 *********************************************************************/
508
509static int
510em_attach(device_t dev)
511{
512 struct adapter *adapter;
513 struct e1000_hw *hw;
514 int error = 0;
515
516 INIT_DEBUGOUT("em_attach: begin");
517
518 if (resource_disabled("em", device_get_unit(dev))) {
519 device_printf(dev, "Disabled by device hint\n");
520 return (ENXIO);
521 }
522
523 adapter = device_get_softc(dev);
524 adapter->dev = adapter->osdep.dev = dev;
525 hw = &adapter->hw;
526 EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
527
528 /* SYSCTL stuff */
529 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
530 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
531 OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
532 em_sysctl_nvm_info, "I", "NVM Information");
533
534 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
535 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
536 OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
537 em_sysctl_debug_info, "I", "Debug Information");
538
539 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
540 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
541 OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
542 em_set_flowcntl, "I", "Flow Control");
543
544 callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
545
546 /* Determine hardware and mac info */
547 em_identify_hardware(adapter);
548
549 /* Setup PCI resources */
550 if (em_allocate_pci_resources(adapter)) {
551 device_printf(dev, "Allocation of PCI resources failed\n");
552 error = ENXIO;
553 goto err_pci;
554 }
555
556 /*
557 ** For ICH8 and family we need to
558 ** map the flash memory, and this
559 ** must happen after the MAC is
560 ** identified
561 */
562 if ((hw->mac.type == e1000_ich8lan) ||
563 (hw->mac.type == e1000_ich9lan) ||
564 (hw->mac.type == e1000_ich10lan) ||
565 (hw->mac.type == e1000_pchlan) ||
566 (hw->mac.type == e1000_pch2lan) ||
567 (hw->mac.type == e1000_pch_lpt)) {
568 int rid = EM_BAR_TYPE_FLASH;
569 adapter->flash = bus_alloc_resource_any(dev,
570 SYS_RES_MEMORY, &rid, RF_ACTIVE);
571 if (adapter->flash == NULL) {
572 device_printf(dev, "Mapping of Flash failed\n");
573 error = ENXIO;
574 goto err_pci;
575 }
576 /* This is used in the shared code */
577 hw->flash_address = (u8 *)adapter->flash;
578 adapter->osdep.flash_bus_space_tag =
579 rman_get_bustag(adapter->flash);
580 adapter->osdep.flash_bus_space_handle =
581 rman_get_bushandle(adapter->flash);
582 }
583 /*
584 ** In the new SPT device flash is not a
585 ** seperate BAR, rather it is also in BAR0,
586 ** so use the same tag and an offset handle for the
587 ** FLASH read/write macros in the shared code.
588 */
589 else if (hw->mac.type == e1000_pch_spt) {
590 adapter->osdep.flash_bus_space_tag =
591 adapter->osdep.mem_bus_space_tag;
592 adapter->osdep.flash_bus_space_handle =
593 adapter->osdep.mem_bus_space_handle
594 + E1000_FLASH_BASE_ADDR;
595 }
596
597 /* Do Shared Code initialization */
582 if (e1000_setup_init_funcs(hw, TRUE)) {
583 device_printf(dev, "Setup of Shared code failed\n");
598 error = e1000_setup_init_funcs(hw, TRUE);
599 if (error) {
600 device_printf(dev, "Setup of Shared code failed, error %d\n",
601 error);
602 error = ENXIO;
603 goto err_pci;
604 }
605
606 /*
607 * Setup MSI/X or MSI if PCI Express
608 */
609 adapter->msix = em_setup_msix(adapter);
610
611 e1000_get_bus_info(hw);
612
613 /* Set up some sysctls for the tunable interrupt delays */
614 em_add_int_delay_sysctl(adapter, "rx_int_delay",
615 "receive interrupt delay in usecs", &adapter->rx_int_delay,
616 E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
617 em_add_int_delay_sysctl(adapter, "tx_int_delay",
618 "transmit interrupt delay in usecs", &adapter->tx_int_delay,
619 E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
620 em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
621 "receive interrupt delay limit in usecs",
622 &adapter->rx_abs_int_delay,
623 E1000_REGISTER(hw, E1000_RADV),
624 em_rx_abs_int_delay_dflt);
625 em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
626 "transmit interrupt delay limit in usecs",
627 &adapter->tx_abs_int_delay,
628 E1000_REGISTER(hw, E1000_TADV),
629 em_tx_abs_int_delay_dflt);
630 em_add_int_delay_sysctl(adapter, "itr",
631 "interrupt delay limit in usecs/4",
632 &adapter->tx_itr,
633 E1000_REGISTER(hw, E1000_ITR),
634 DEFAULT_ITR);
635
636 /* Sysctl for limiting the amount of work done in the taskqueue */
637 em_set_sysctl_value(adapter, "rx_processing_limit",
638 "max number of rx packets to process", &adapter->rx_process_limit,
639 em_rx_process_limit);
640
641 /*
642 * Validate number of transmit and receive descriptors. It
643 * must not exceed hardware maximum, and must be multiple
644 * of E1000_DBA_ALIGN.
645 */
646 if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
647 (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
648 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
649 EM_DEFAULT_TXD, em_txd);
650 adapter->num_tx_desc = EM_DEFAULT_TXD;
651 } else
652 adapter->num_tx_desc = em_txd;
653
654 if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
655 (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
656 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
657 EM_DEFAULT_RXD, em_rxd);
658 adapter->num_rx_desc = EM_DEFAULT_RXD;
659 } else
660 adapter->num_rx_desc = em_rxd;
661
662 hw->mac.autoneg = DO_AUTO_NEG;
663 hw->phy.autoneg_wait_to_complete = FALSE;
664 hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
665
666 /* Copper options */
667 if (hw->phy.media_type == e1000_media_type_copper) {
668 hw->phy.mdix = AUTO_ALL_MODES;
669 hw->phy.disable_polarity_correction = FALSE;
670 hw->phy.ms_type = EM_MASTER_SLAVE;
671 }
672
673 /*
674 * Set the frame limits assuming
675 * standard ethernet sized frames.
676 */
677 adapter->hw.mac.max_frame_size =
678 ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
679
680 /*
681 * This controls when hardware reports transmit completion
682 * status.
683 */
684 hw->mac.report_tx_early = 1;
685
686 /*
687 ** Get queue/ring memory
688 */
689 if (em_allocate_queues(adapter)) {
690 error = ENOMEM;
691 goto err_pci;
692 }
693
694 /* Allocate multicast array memory. */
695 adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
696 MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
697 if (adapter->mta == NULL) {
698 device_printf(dev, "Can not allocate multicast setup array\n");
699 error = ENOMEM;
700 goto err_late;
701 }
702
703 /* Check SOL/IDER usage */
704 if (e1000_check_reset_block(hw))
705 device_printf(dev, "PHY reset is blocked"
706 " due to SOL/IDER session.\n");
707
708 /* Sysctl for setting Energy Efficient Ethernet */
709 hw->dev_spec.ich8lan.eee_disable = eee_setting;
710 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
711 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
712 OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
713 adapter, 0, em_sysctl_eee, "I",
714 "Disable Energy Efficient Ethernet");
715
716 /*
717 ** Start from a known state, this is
718 ** important in reading the nvm and
719 ** mac from that.
720 */
721 e1000_reset_hw(hw);
722
723
724 /* Make sure we have a good EEPROM before we read from it */
725 if (e1000_validate_nvm_checksum(hw) < 0) {
726 /*
727 ** Some PCI-E parts fail the first check due to
728 ** the link being in sleep state, call it again,
729 ** if it fails a second time its a real issue.
730 */
731 if (e1000_validate_nvm_checksum(hw) < 0) {
732 device_printf(dev,
733 "The EEPROM Checksum Is Not Valid\n");
734 error = EIO;
735 goto err_late;
736 }
737 }
738
739 /* Copy the permanent MAC address out of the EEPROM */
740 if (e1000_read_mac_addr(hw) < 0) {
741 device_printf(dev, "EEPROM read error while reading MAC"
742 " address\n");
743 error = EIO;
744 goto err_late;
745 }
746
747 if (!em_is_valid_ether_addr(hw->mac.addr)) {
748 device_printf(dev, "Invalid MAC address\n");
749 error = EIO;
750 goto err_late;
751 }
752
753 /* Disable ULP support */
754 e1000_disable_ulp_lpt_lp(hw, TRUE);
755
756 /*
757 ** Do interrupt configuration
758 */
759 if (adapter->msix > 1) /* Do MSIX */
760 error = em_allocate_msix(adapter);
761 else /* MSI or Legacy */
762 error = em_allocate_legacy(adapter);
763 if (error)
764 goto err_late;
765
766 /*
767 * Get Wake-on-Lan and Management info for later use
768 */
769 em_get_wakeup(dev);
770
771 /* Setup OS specific network interface */
772 if (em_setup_interface(dev, adapter) != 0)
773 goto err_late;
774
775 em_reset(adapter);
776
777 /* Initialize statistics */
778 em_update_stats_counters(adapter);
779
780 hw->mac.get_link_status = 1;
781 em_update_link_status(adapter);
782
783 /* Register for VLAN events */
784 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
785 em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
786 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
787 em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
788
789 em_add_hw_stats(adapter);
790
791 /* Non-AMT based hardware can now take control from firmware */
792 if (adapter->has_manage && !adapter->has_amt)
793 em_get_hw_control(adapter);
794
795 /* Tell the stack that the interface is not active */
796 if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
797
798 adapter->led_dev = led_create(em_led_func, adapter,
799 device_get_nameunit(dev));
800#ifdef DEV_NETMAP
801 em_netmap_attach(adapter);
802#endif /* DEV_NETMAP */
803
804 INIT_DEBUGOUT("em_attach: end");
805
806 return (0);
807
808err_late:
809 em_free_transmit_structures(adapter);
810 em_free_receive_structures(adapter);
811 em_release_hw_control(adapter);
812 if (adapter->ifp != (void *)NULL)
813 if_free(adapter->ifp);
814err_pci:
815 em_free_pci_resources(adapter);
816 free(adapter->mta, M_DEVBUF);
817 EM_CORE_LOCK_DESTROY(adapter);
818
819 return (error);
820}
821
822/*********************************************************************
823 * Device removal routine
824 *
825 * The detach entry point is called when the driver is being removed.
826 * This routine stops the adapter and deallocates all the resources
827 * that were allocated for driver operation.
828 *
829 * return 0 on success, positive on failure
830 *********************************************************************/
831
832static int
833em_detach(device_t dev)
834{
835 struct adapter *adapter = device_get_softc(dev);
836 if_t ifp = adapter->ifp;
837
838 INIT_DEBUGOUT("em_detach: begin");
839
840 /* Make sure VLANS are not using driver */
841 if (if_vlantrunkinuse(ifp)) {
842 device_printf(dev,"Vlan in use, detach first\n");
843 return (EBUSY);
844 }
845
846#ifdef DEVICE_POLLING
847 if (if_getcapenable(ifp) & IFCAP_POLLING)
848 ether_poll_deregister(ifp);
849#endif
850
851 if (adapter->led_dev != NULL)
852 led_destroy(adapter->led_dev);
853
854 EM_CORE_LOCK(adapter);
855 adapter->in_detach = 1;
856 em_stop(adapter);
857 EM_CORE_UNLOCK(adapter);
858 EM_CORE_LOCK_DESTROY(adapter);
859
860 e1000_phy_hw_reset(&adapter->hw);
861
862 em_release_manageability(adapter);
863 em_release_hw_control(adapter);
864
865 /* Unregister VLAN events */
866 if (adapter->vlan_attach != NULL)
867 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
868 if (adapter->vlan_detach != NULL)
869 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
870
871 ether_ifdetach(adapter->ifp);
872 callout_drain(&adapter->timer);
873
874#ifdef DEV_NETMAP
875 netmap_detach(ifp);
876#endif /* DEV_NETMAP */
877
878 em_free_pci_resources(adapter);
879 bus_generic_detach(dev);
880 if_free(ifp);
881
882 em_free_transmit_structures(adapter);
883 em_free_receive_structures(adapter);
884
885 em_release_hw_control(adapter);
886 free(adapter->mta, M_DEVBUF);
887
888 return (0);
889}
890
891/*********************************************************************
892 *
893 * Shutdown entry point
894 *
895 **********************************************************************/
896
897static int
898em_shutdown(device_t dev)
899{
900 return em_suspend(dev);
901}
902
903/*
904 * Suspend/resume device methods.
905 */
906static int
907em_suspend(device_t dev)
908{
909 struct adapter *adapter = device_get_softc(dev);
910
911 EM_CORE_LOCK(adapter);
912
913 em_release_manageability(adapter);
914 em_release_hw_control(adapter);
915 em_enable_wakeup(dev);
916
917 EM_CORE_UNLOCK(adapter);
918
919 return bus_generic_suspend(dev);
920}
921
922static int
923em_resume(device_t dev)
924{
925 struct adapter *adapter = device_get_softc(dev);
926 struct tx_ring *txr = adapter->tx_rings;
927 if_t ifp = adapter->ifp;
928
929 EM_CORE_LOCK(adapter);
930 if (adapter->hw.mac.type == e1000_pch2lan)
931 e1000_resume_workarounds_pchlan(&adapter->hw);
932 em_init_locked(adapter);
933 em_init_manageability(adapter);
934
935 if ((if_getflags(ifp) & IFF_UP) &&
936 (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
937 for (int i = 0; i < adapter->num_queues; i++, txr++) {
938 EM_TX_LOCK(txr);
939#ifdef EM_MULTIQUEUE
940 if (!drbr_empty(ifp, txr->br))
941 em_mq_start_locked(ifp, txr);
942#else
943 if (!if_sendq_empty(ifp))
944 em_start_locked(ifp, txr);
945#endif
946 EM_TX_UNLOCK(txr);
947 }
948 }
949 EM_CORE_UNLOCK(adapter);
950
951 return bus_generic_resume(dev);
952}
953
954
955#ifndef EM_MULTIQUEUE
956static void
957em_start_locked(if_t ifp, struct tx_ring *txr)
958{
959 struct adapter *adapter = if_getsoftc(ifp);
960 struct mbuf *m_head;
961
962 EM_TX_LOCK_ASSERT(txr);
963
964 if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
965 IFF_DRV_RUNNING)
966 return;
967
968 if (!adapter->link_active)
969 return;
970
971 while (!if_sendq_empty(ifp)) {
972 /* Call cleanup if number of TX descriptors low */
973 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
974 em_txeof(txr);
975 if (txr->tx_avail < EM_MAX_SCATTER) {
976 if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
977 break;
978 }
979 m_head = if_dequeue(ifp);
980 if (m_head == NULL)
981 break;
982 /*
983 * Encapsulation can modify our pointer, and or make it
984 * NULL on failure. In that event, we can't requeue.
985 */
986 if (em_xmit(txr, &m_head)) {
987 if (m_head == NULL)
988 break;
989 if_sendq_prepend(ifp, m_head);
990 break;
991 }
992
993 /* Mark the queue as having work */
994 if (txr->busy == EM_TX_IDLE)
995 txr->busy = EM_TX_BUSY;
996
997 /* Send a copy of the frame to the BPF listener */
998 ETHER_BPF_MTAP(ifp, m_head);
999
1000 }
1001
1002 return;
1003}
1004
1005static void
1006em_start(if_t ifp)
1007{
1008 struct adapter *adapter = if_getsoftc(ifp);
1009 struct tx_ring *txr = adapter->tx_rings;
1010
1011 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1012 EM_TX_LOCK(txr);
1013 em_start_locked(ifp, txr);
1014 EM_TX_UNLOCK(txr);
1015 }
1016 return;
1017}
1018#else /* EM_MULTIQUEUE */
1019/*********************************************************************
1020 * Multiqueue Transmit routines
1021 *
1022 * em_mq_start is called by the stack to initiate a transmit.
1023 * however, if busy the driver can queue the request rather
1024 * than do an immediate send. It is this that is an advantage
1025 * in this driver, rather than also having multiple tx queues.
1026 **********************************************************************/
1027/*
1028** Multiqueue capable stack interface
1029*/
1030static int
1031em_mq_start(if_t ifp, struct mbuf *m)
1032{
1033 struct adapter *adapter = if_getsoftc(ifp);
1034 struct tx_ring *txr = adapter->tx_rings;
1035 unsigned int i, error;
1036
1037 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1038 i = m->m_pkthdr.flowid % adapter->num_queues;
1039 else
1040 i = curcpu % adapter->num_queues;
1041
1042 txr = &adapter->tx_rings[i];
1043
1044 error = drbr_enqueue(ifp, txr->br, m);
1045 if (error)
1046 return (error);
1047
1048 if (EM_TX_TRYLOCK(txr)) {
1049 em_mq_start_locked(ifp, txr);
1050 EM_TX_UNLOCK(txr);
1051 } else
1052 taskqueue_enqueue(txr->tq, &txr->tx_task);
1053
1054 return (0);
1055}
1056
1057static int
1058em_mq_start_locked(if_t ifp, struct tx_ring *txr)
1059{
1060 struct adapter *adapter = txr->adapter;
1061 struct mbuf *next;
1062 int err = 0, enq = 0;
1063
1064 EM_TX_LOCK_ASSERT(txr);
1065
1066 if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
1067 adapter->link_active == 0) {
1068 return (ENETDOWN);
1069 }
1070
1071 /* Process the queue */
1072 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1073 if ((err = em_xmit(txr, &next)) != 0) {
1074 if (next == NULL) {
1075 /* It was freed, move forward */
1076 drbr_advance(ifp, txr->br);
1077 } else {
1078 /*
1079 * Still have one left, it may not be
1080 * the same since the transmit function
1081 * may have changed it.
1082 */
1083 drbr_putback(ifp, txr->br, next);
1084 }
1085 break;
1086 }
1087 drbr_advance(ifp, txr->br);
1088 enq++;
1089 if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
1090 if (next->m_flags & M_MCAST)
1091 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1092 ETHER_BPF_MTAP(ifp, next);
1093 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
1094 break;
1095 }
1096
1097 /* Mark the queue as having work */
1098 if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1099 txr->busy = EM_TX_BUSY;
1100
1101 if (txr->tx_avail < EM_MAX_SCATTER)
1102 em_txeof(txr);
1103 if (txr->tx_avail < EM_MAX_SCATTER) {
1104 if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
1105 }
1106 return (err);
1107}
1108
1109/*
1110** Flush all ring buffers
1111*/
1112static void
1113em_qflush(if_t ifp)
1114{
1115 struct adapter *adapter = if_getsoftc(ifp);
1116 struct tx_ring *txr = adapter->tx_rings;
1117 struct mbuf *m;
1118
1119 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1120 EM_TX_LOCK(txr);
1121 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1122 m_freem(m);
1123 EM_TX_UNLOCK(txr);
1124 }
1125 if_qflush(ifp);
1126}
1127#endif /* EM_MULTIQUEUE */
1128
1129/*********************************************************************
1130 * Ioctl entry point
1131 *
1132 * em_ioctl is called when the user wants to configure the
1133 * interface.
1134 *
1135 * return 0 on success, positive on failure
1136 **********************************************************************/
1137
1138static int
1139em_ioctl(if_t ifp, u_long command, caddr_t data)
1140{
1141 struct adapter *adapter = if_getsoftc(ifp);
1142 struct ifreq *ifr = (struct ifreq *)data;
1143#if defined(INET) || defined(INET6)
1144 struct ifaddr *ifa = (struct ifaddr *)data;
1145#endif
1146 bool avoid_reset = FALSE;
1147 int error = 0;
1148
1149 if (adapter->in_detach)
1150 return (error);
1151
1152 switch (command) {
1153 case SIOCSIFADDR:
1154#ifdef INET
1155 if (ifa->ifa_addr->sa_family == AF_INET)
1156 avoid_reset = TRUE;
1157#endif
1158#ifdef INET6
1159 if (ifa->ifa_addr->sa_family == AF_INET6)
1160 avoid_reset = TRUE;
1161#endif
1162 /*
1163 ** Calling init results in link renegotiation,
1164 ** so we avoid doing it when possible.
1165 */
1166 if (avoid_reset) {
1167 if_setflagbits(ifp,IFF_UP,0);
1168 if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1169 em_init(adapter);
1170#ifdef INET
1171 if (!(if_getflags(ifp) & IFF_NOARP))
1172 arp_ifinit(ifp, ifa);
1173#endif
1174 } else
1175 error = ether_ioctl(ifp, command, data);
1176 break;
1177 case SIOCSIFMTU:
1178 {
1179 int max_frame_size;
1180
1181 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1182
1183 EM_CORE_LOCK(adapter);
1184 switch (adapter->hw.mac.type) {
1185 case e1000_82571:
1186 case e1000_82572:
1187 case e1000_ich9lan:
1188 case e1000_ich10lan:
1189 case e1000_pch2lan:
1190 case e1000_pch_lpt:
1191 case e1000_pch_spt:
1192 case e1000_82574:
1193 case e1000_82583:
1194 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1195 max_frame_size = 9234;
1196 break;
1197 case e1000_pchlan:
1198 max_frame_size = 4096;
1199 break;
1200 /* Adapters that do not support jumbo frames */
1201 case e1000_ich8lan:
1202 max_frame_size = ETHER_MAX_LEN;
1203 break;
1204 default:
1205 max_frame_size = MAX_JUMBO_FRAME_SIZE;
1206 }
1207 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1208 ETHER_CRC_LEN) {
1209 EM_CORE_UNLOCK(adapter);
1210 error = EINVAL;
1211 break;
1212 }
1213
1214 if_setmtu(ifp, ifr->ifr_mtu);
1215 adapter->hw.mac.max_frame_size =
1216 if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1217 em_init_locked(adapter);
1218 EM_CORE_UNLOCK(adapter);
1219 break;
1220 }
1221 case SIOCSIFFLAGS:
1222 IOCTL_DEBUGOUT("ioctl rcv'd:\
1223 SIOCSIFFLAGS (Set Interface Flags)");
1224 EM_CORE_LOCK(adapter);
1225 if (if_getflags(ifp) & IFF_UP) {
1226 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1227 if ((if_getflags(ifp) ^ adapter->if_flags) &
1228 (IFF_PROMISC | IFF_ALLMULTI)) {
1229 em_disable_promisc(adapter);
1230 em_set_promisc(adapter);
1231 }
1232 } else
1233 em_init_locked(adapter);
1234 } else
1235 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1236 em_stop(adapter);
1237 adapter->if_flags = if_getflags(ifp);
1238 EM_CORE_UNLOCK(adapter);
1239 break;
1240 case SIOCADDMULTI:
1241 case SIOCDELMULTI:
1242 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1243 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1244 EM_CORE_LOCK(adapter);
1245 em_disable_intr(adapter);
1246 em_set_multi(adapter);
1247#ifdef DEVICE_POLLING
1248 if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1249#endif
1250 em_enable_intr(adapter);
1251 EM_CORE_UNLOCK(adapter);
1252 }
1253 break;
1254 case SIOCSIFMEDIA:
1255 /* Check SOL/IDER usage */
1256 EM_CORE_LOCK(adapter);
1257 if (e1000_check_reset_block(&adapter->hw)) {
1258 EM_CORE_UNLOCK(adapter);
1259 device_printf(adapter->dev, "Media change is"
1260 " blocked due to SOL/IDER session.\n");
1261 break;
1262 }
1263 EM_CORE_UNLOCK(adapter);
1264 /* falls thru */
1265 case SIOCGIFMEDIA:
1266 IOCTL_DEBUGOUT("ioctl rcv'd: \
1267 SIOCxIFMEDIA (Get/Set Interface Media)");
1268 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1269 break;
1270 case SIOCSIFCAP:
1271 {
1272 int mask, reinit;
1273
1274 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1275 reinit = 0;
1276 mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1277#ifdef DEVICE_POLLING
1278 if (mask & IFCAP_POLLING) {
1279 if (ifr->ifr_reqcap & IFCAP_POLLING) {
1280 error = ether_poll_register(em_poll, ifp);
1281 if (error)
1282 return (error);
1283 EM_CORE_LOCK(adapter);
1284 em_disable_intr(adapter);
1285 if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1286 EM_CORE_UNLOCK(adapter);
1287 } else {
1288 error = ether_poll_deregister(ifp);
1289 /* Enable interrupt even in error case */
1290 EM_CORE_LOCK(adapter);
1291 em_enable_intr(adapter);
1292 if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1293 EM_CORE_UNLOCK(adapter);
1294 }
1295 }
1296#endif
1297 if (mask & IFCAP_HWCSUM) {
1298 if_togglecapenable(ifp,IFCAP_HWCSUM);
1299 reinit = 1;
1300 }
1301 if (mask & IFCAP_TSO4) {
1302 if_togglecapenable(ifp,IFCAP_TSO4);
1303 reinit = 1;
1304 }
1305 if (mask & IFCAP_VLAN_HWTAGGING) {
1306 if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1307 reinit = 1;
1308 }
1309 if (mask & IFCAP_VLAN_HWFILTER) {
1310 if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1311 reinit = 1;
1312 }
1313 if (mask & IFCAP_VLAN_HWTSO) {
1314 if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1315 reinit = 1;
1316 }
1317 if ((mask & IFCAP_WOL) &&
1318 (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1319 if (mask & IFCAP_WOL_MCAST)
1320 if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1321 if (mask & IFCAP_WOL_MAGIC)
1322 if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1323 }
1324 if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1325 em_init(adapter);
1326 if_vlancap(ifp);
1327 break;
1328 }
1329
1330 default:
1331 error = ether_ioctl(ifp, command, data);
1332 break;
1333 }
1334
1335 return (error);
1336}
1337
1338
1339/*********************************************************************
1340 * Init entry point
1341 *
1342 * This routine is used in two ways. It is used by the stack as
1343 * init entry point in network interface structure. It is also used
1344 * by the driver as a hw/sw initialization routine to get to a
1345 * consistent state.
1346 *
1347 * return 0 on success, positive on failure
1348 **********************************************************************/
1349
1350static void
1351em_init_locked(struct adapter *adapter)
1352{
1353 if_t ifp = adapter->ifp;
1354 device_t dev = adapter->dev;
1355
1356 INIT_DEBUGOUT("em_init: begin");
1357
1358 EM_CORE_LOCK_ASSERT(adapter);
1359
1360 em_disable_intr(adapter);
1361 callout_stop(&adapter->timer);
1362
1363 /* Get the latest mac address, User can use a LAA */
1364 bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1365 ETHER_ADDR_LEN);
1366
1367 /* Put the address into the Receive Address Array */
1368 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1369
1370 /*
1371 * With the 82571 adapter, RAR[0] may be overwritten
1372 * when the other port is reset, we make a duplicate
1373 * in RAR[14] for that eventuality, this assures
1374 * the interface continues to function.
1375 */
1376 if (adapter->hw.mac.type == e1000_82571) {
1377 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1378 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1379 E1000_RAR_ENTRIES - 1);
1380 }
1381
1382 /* Initialize the hardware */
1383 em_reset(adapter);
1384 em_update_link_status(adapter);
1385
1386 /* Setup VLAN support, basic and offload if available */
1387 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1388
1389 /* Set hardware offload abilities */
1390 if_clearhwassist(ifp);
1391 if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1392 if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1393 /*
1394 ** There have proven to be problems with TSO when not
1395 ** at full gigabit speed, so disable the assist automatically
1396 ** when at lower speeds. -jfv
1397 */
1398 if (if_getcapenable(ifp) & IFCAP_TSO4) {
1399 if (adapter->link_speed == SPEED_1000)
1400 if_sethwassistbits(ifp, CSUM_TSO, 0);
1401 }
1402
1403 /* Configure for OS presence */
1404 em_init_manageability(adapter);
1405
1406 /* Prepare transmit descriptors and buffers */
1407 em_setup_transmit_structures(adapter);
1408 em_initialize_transmit_unit(adapter);
1409
1410 /* Setup Multicast table */
1411 em_set_multi(adapter);
1412
1413 /*
1414 ** Figure out the desired mbuf
1415 ** pool for doing jumbos
1416 */
1417 if (adapter->hw.mac.max_frame_size <= 2048)
1418 adapter->rx_mbuf_sz = MCLBYTES;
1419 else if (adapter->hw.mac.max_frame_size <= 4096)
1420 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1421 else
1422 adapter->rx_mbuf_sz = MJUM9BYTES;
1423
1424 /* Prepare receive descriptors and buffers */
1425 if (em_setup_receive_structures(adapter)) {
1426 device_printf(dev, "Could not setup receive structures\n");
1427 em_stop(adapter);
1428 return;
1429 }
1430 em_initialize_receive_unit(adapter);
1431
1432 /* Use real VLAN Filter support? */
1433 if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1434 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1435 /* Use real VLAN Filter support */
1436 em_setup_vlan_hw_support(adapter);
1437 else {
1438 u32 ctrl;
1439 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1440 ctrl |= E1000_CTRL_VME;
1441 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1442 }
1443 }
1444
1445 /* Don't lose promiscuous settings */
1446 em_set_promisc(adapter);
1447
1448 /* Set the interface as ACTIVE */
1449 if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1450
1451 callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1452 e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1453
1454 /* MSI/X configuration for 82574 */
1455 if (adapter->hw.mac.type == e1000_82574) {
1456 int tmp;
1457 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1458 tmp |= E1000_CTRL_EXT_PBA_CLR;
1459 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1460 /* Set the IVAR - interrupt vector routing. */
1461 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1462 }
1463
1464#ifdef DEVICE_POLLING
1465 /*
1466 * Only enable interrupts if we are not polling, make sure
1467 * they are off otherwise.
1468 */
1469 if (if_getcapenable(ifp) & IFCAP_POLLING)
1470 em_disable_intr(adapter);
1471 else
1472#endif /* DEVICE_POLLING */
1473 em_enable_intr(adapter);
1474
1475 /* AMT based hardware can now take control from firmware */
1476 if (adapter->has_manage && adapter->has_amt)
1477 em_get_hw_control(adapter);
1478}
1479
1480static void
1481em_init(void *arg)
1482{
1483 struct adapter *adapter = arg;
1484
1485 EM_CORE_LOCK(adapter);
1486 em_init_locked(adapter);
1487 EM_CORE_UNLOCK(adapter);
1488}
1489
1490
1491#ifdef DEVICE_POLLING
1492/*********************************************************************
1493 *
1494 * Legacy polling routine: note this only works with single queue
1495 *
1496 *********************************************************************/
1497static int
1498em_poll(if_t ifp, enum poll_cmd cmd, int count)
1499{
1500 struct adapter *adapter = if_getsoftc(ifp);
1501 struct tx_ring *txr = adapter->tx_rings;
1502 struct rx_ring *rxr = adapter->rx_rings;
1503 u32 reg_icr;
1504 int rx_done;
1505
1506 EM_CORE_LOCK(adapter);
1507 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1508 EM_CORE_UNLOCK(adapter);
1509 return (0);
1510 }
1511
1512 if (cmd == POLL_AND_CHECK_STATUS) {
1513 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1514 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1515 callout_stop(&adapter->timer);
1516 adapter->hw.mac.get_link_status = 1;
1517 em_update_link_status(adapter);
1518 callout_reset(&adapter->timer, hz,
1519 em_local_timer, adapter);
1520 }
1521 }
1522 EM_CORE_UNLOCK(adapter);
1523
1524 em_rxeof(rxr, count, &rx_done);
1525
1526 EM_TX_LOCK(txr);
1527 em_txeof(txr);
1528#ifdef EM_MULTIQUEUE
1529 if (!drbr_empty(ifp, txr->br))
1530 em_mq_start_locked(ifp, txr);
1531#else
1532 if (!if_sendq_empty(ifp))
1533 em_start_locked(ifp, txr);
1534#endif
1535 EM_TX_UNLOCK(txr);
1536
1537 return (rx_done);
1538}
1539#endif /* DEVICE_POLLING */
1540
1541
1542/*********************************************************************
1543 *
1544 * Fast Legacy/MSI Combined Interrupt Service routine
1545 *
1546 *********************************************************************/
1547static int
1548em_irq_fast(void *arg)
1549{
1550 struct adapter *adapter = arg;
1551 if_t ifp;
1552 u32 reg_icr;
1553
1554 ifp = adapter->ifp;
1555
1556 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1557
1558 /* Hot eject? */
1559 if (reg_icr == 0xffffffff)
1560 return FILTER_STRAY;
1561
1562 /* Definitely not our interrupt. */
1563 if (reg_icr == 0x0)
1564 return FILTER_STRAY;
1565
1566 /*
1567 * Starting with the 82571 chip, bit 31 should be used to
1568 * determine whether the interrupt belongs to us.
1569 */
1570 if (adapter->hw.mac.type >= e1000_82571 &&
1571 (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1572 return FILTER_STRAY;
1573
1574 em_disable_intr(adapter);
1575 taskqueue_enqueue(adapter->tq, &adapter->que_task);
1576
1577 /* Link status change */
1578 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1579 adapter->hw.mac.get_link_status = 1;
1580 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1581 }
1582
1583 if (reg_icr & E1000_ICR_RXO)
1584 adapter->rx_overruns++;
1585 return FILTER_HANDLED;
1586}
1587
1588/* Combined RX/TX handler, used by Legacy and MSI */
1589static void
1590em_handle_que(void *context, int pending)
1591{
1592 struct adapter *adapter = context;
1593 if_t ifp = adapter->ifp;
1594 struct tx_ring *txr = adapter->tx_rings;
1595 struct rx_ring *rxr = adapter->rx_rings;
1596
1597 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1598 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1599
1600 EM_TX_LOCK(txr);
1601 em_txeof(txr);
1602#ifdef EM_MULTIQUEUE
1603 if (!drbr_empty(ifp, txr->br))
1604 em_mq_start_locked(ifp, txr);
1605#else
1606 if (!if_sendq_empty(ifp))
1607 em_start_locked(ifp, txr);
1608#endif
1609 EM_TX_UNLOCK(txr);
1610 if (more) {
1611 taskqueue_enqueue(adapter->tq, &adapter->que_task);
1612 return;
1613 }
1614 }
1615
1616 em_enable_intr(adapter);
1617 return;
1618}
1619
1620
1621/*********************************************************************
1622 *
1623 * MSIX Interrupt Service Routines
1624 *
1625 **********************************************************************/
1626static void
1627em_msix_tx(void *arg)
1628{
1629 struct tx_ring *txr = arg;
1630 struct adapter *adapter = txr->adapter;
1631 if_t ifp = adapter->ifp;
1632
1633 ++txr->tx_irq;
1634 EM_TX_LOCK(txr);
1635 em_txeof(txr);
1636#ifdef EM_MULTIQUEUE
1637 if (!drbr_empty(ifp, txr->br))
1638 em_mq_start_locked(ifp, txr);
1639#else
1640 if (!if_sendq_empty(ifp))
1641 em_start_locked(ifp, txr);
1642#endif
1643
1644 /* Reenable this interrupt */
1645 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1646 EM_TX_UNLOCK(txr);
1647 return;
1648}
1649
1650/*********************************************************************
1651 *
1652 * MSIX RX Interrupt Service routine
1653 *
1654 **********************************************************************/
1655
1656static void
1657em_msix_rx(void *arg)
1658{
1659 struct rx_ring *rxr = arg;
1660 struct adapter *adapter = rxr->adapter;
1661 bool more;
1662
1663 ++rxr->rx_irq;
1664 if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1665 return;
1666 more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1667 if (more)
1668 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1669 else {
1670 /* Reenable this interrupt */
1671 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1672 }
1673 return;
1674}
1675
1676/*********************************************************************
1677 *
1678 * MSIX Link Fast Interrupt Service routine
1679 *
1680 **********************************************************************/
1681static void
1682em_msix_link(void *arg)
1683{
1684 struct adapter *adapter = arg;
1685 u32 reg_icr;
1686
1687 ++adapter->link_irq;
1688 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1689
1690 if (reg_icr & E1000_ICR_RXO)
1691 adapter->rx_overruns++;
1692
1693 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1694 adapter->hw.mac.get_link_status = 1;
1695 em_handle_link(adapter, 0);
1696 } else
1697 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1698 EM_MSIX_LINK | E1000_IMS_LSC);
1699 /*
1700 ** Because we must read the ICR for this interrupt
1701 ** it may clear other causes using autoclear, for
1702 ** this reason we simply create a soft interrupt
1703 ** for all these vectors.
1704 */
1705 if (reg_icr) {
1706 E1000_WRITE_REG(&adapter->hw,
1707 E1000_ICS, adapter->ims);
1708 }
1709 return;
1710}
1711
1712static void
1713em_handle_rx(void *context, int pending)
1714{
1715 struct rx_ring *rxr = context;
1716 struct adapter *adapter = rxr->adapter;
1717 bool more;
1718
1719 more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1720 if (more)
1721 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1722 else {
1723 /* Reenable this interrupt */
1724 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1725 }
1726}
1727
1728static void
1729em_handle_tx(void *context, int pending)
1730{
1731 struct tx_ring *txr = context;
1732 struct adapter *adapter = txr->adapter;
1733 if_t ifp = adapter->ifp;
1734
1735 EM_TX_LOCK(txr);
1736 em_txeof(txr);
1737#ifdef EM_MULTIQUEUE
1738 if (!drbr_empty(ifp, txr->br))
1739 em_mq_start_locked(ifp, txr);
1740#else
1741 if (!if_sendq_empty(ifp))
1742 em_start_locked(ifp, txr);
1743#endif
1744 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1745 EM_TX_UNLOCK(txr);
1746}
1747
1748static void
1749em_handle_link(void *context, int pending)
1750{
1751 struct adapter *adapter = context;
1752 struct tx_ring *txr = adapter->tx_rings;
1753 if_t ifp = adapter->ifp;
1754
1755 if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1756 return;
1757
1758 EM_CORE_LOCK(adapter);
1759 callout_stop(&adapter->timer);
1760 em_update_link_status(adapter);
1761 callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1762 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1763 EM_MSIX_LINK | E1000_IMS_LSC);
1764 if (adapter->link_active) {
1765 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1766 EM_TX_LOCK(txr);
1767#ifdef EM_MULTIQUEUE
1768 if (!drbr_empty(ifp, txr->br))
1769 em_mq_start_locked(ifp, txr);
1770#else
1771 if (if_sendq_empty(ifp))
1772 em_start_locked(ifp, txr);
1773#endif
1774 EM_TX_UNLOCK(txr);
1775 }
1776 }
1777 EM_CORE_UNLOCK(adapter);
1778}
1779
1780
1781/*********************************************************************
1782 *
1783 * Media Ioctl callback
1784 *
1785 * This routine is called whenever the user queries the status of
1786 * the interface using ifconfig.
1787 *
1788 **********************************************************************/
1789static void
1790em_media_status(if_t ifp, struct ifmediareq *ifmr)
1791{
1792 struct adapter *adapter = if_getsoftc(ifp);
1793 u_char fiber_type = IFM_1000_SX;
1794
1795 INIT_DEBUGOUT("em_media_status: begin");
1796
1797 EM_CORE_LOCK(adapter);
1798 em_update_link_status(adapter);
1799
1800 ifmr->ifm_status = IFM_AVALID;
1801 ifmr->ifm_active = IFM_ETHER;
1802
1803 if (!adapter->link_active) {
1804 EM_CORE_UNLOCK(adapter);
1805 return;
1806 }
1807
1808 ifmr->ifm_status |= IFM_ACTIVE;
1809
1810 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1811 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1812 ifmr->ifm_active |= fiber_type | IFM_FDX;
1813 } else {
1814 switch (adapter->link_speed) {
1815 case 10:
1816 ifmr->ifm_active |= IFM_10_T;
1817 break;
1818 case 100:
1819 ifmr->ifm_active |= IFM_100_TX;
1820 break;
1821 case 1000:
1822 ifmr->ifm_active |= IFM_1000_T;
1823 break;
1824 }
1825 if (adapter->link_duplex == FULL_DUPLEX)
1826 ifmr->ifm_active |= IFM_FDX;
1827 else
1828 ifmr->ifm_active |= IFM_HDX;
1829 }
1830 EM_CORE_UNLOCK(adapter);
1831}
1832
1833/*********************************************************************
1834 *
1835 * Media Ioctl callback
1836 *
1837 * This routine is called when the user changes speed/duplex using
1838 * media/mediopt option with ifconfig.
1839 *
1840 **********************************************************************/
1841static int
1842em_media_change(if_t ifp)
1843{
1844 struct adapter *adapter = if_getsoftc(ifp);
1845 struct ifmedia *ifm = &adapter->media;
1846
1847 INIT_DEBUGOUT("em_media_change: begin");
1848
1849 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1850 return (EINVAL);
1851
1852 EM_CORE_LOCK(adapter);
1853 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1854 case IFM_AUTO:
1855 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1856 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1857 break;
1858 case IFM_1000_LX:
1859 case IFM_1000_SX:
1860 case IFM_1000_T:
1861 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1862 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1863 break;
1864 case IFM_100_TX:
1865 adapter->hw.mac.autoneg = FALSE;
1866 adapter->hw.phy.autoneg_advertised = 0;
1867 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1868 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1869 else
1870 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1871 break;
1872 case IFM_10_T:
1873 adapter->hw.mac.autoneg = FALSE;
1874 adapter->hw.phy.autoneg_advertised = 0;
1875 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1876 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1877 else
1878 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1879 break;
1880 default:
1881 device_printf(adapter->dev, "Unsupported media type\n");
1882 }
1883
1884 em_init_locked(adapter);
1885 EM_CORE_UNLOCK(adapter);
1886
1887 return (0);
1888}
1889
1890/*********************************************************************
1891 *
1892 * This routine maps the mbufs to tx descriptors.
1893 *
1894 * return 0 on success, positive on failure
1895 **********************************************************************/
1896
1897static int
1898em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1899{
1900 struct adapter *adapter = txr->adapter;
1901 bus_dma_segment_t segs[EM_MAX_SCATTER];
1902 bus_dmamap_t map;
1903 struct em_txbuffer *tx_buffer, *tx_buffer_mapped;
1904 struct e1000_tx_desc *ctxd = NULL;
1905 struct mbuf *m_head;
1906 struct ether_header *eh;
1907 struct ip *ip = NULL;
1908 struct tcphdr *tp = NULL;
1909 u32 txd_upper = 0, txd_lower = 0;
1910 int ip_off, poff;
1911 int nsegs, i, j, first, last = 0;
1912 int error;
1913 bool do_tso, tso_desc, remap = TRUE;
1914
1915 m_head = *m_headp;
1916 do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO);
1917 tso_desc = FALSE;
1918 ip_off = poff = 0;
1919
1920 /*
1921 * Intel recommends entire IP/TCP header length reside in a single
1922 * buffer. If multiple descriptors are used to describe the IP and
1923 * TCP header, each descriptor should describe one or more
1924 * complete headers; descriptors referencing only parts of headers
1925 * are not supported. If all layer headers are not coalesced into
1926 * a single buffer, each buffer should not cross a 4KB boundary,
1927 * or be larger than the maximum read request size.
1928 * Controller also requires modifing IP/TCP header to make TSO work
1929 * so we firstly get a writable mbuf chain then coalesce ethernet/
1930 * IP/TCP header into a single buffer to meet the requirement of
1931 * controller. This also simplifies IP/TCP/UDP checksum offloading
1932 * which also has similiar restrictions.
1933 */
1934 if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1935 if (do_tso || (m_head->m_next != NULL &&
1936 m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1937 if (M_WRITABLE(*m_headp) == 0) {
1938 m_head = m_dup(*m_headp, M_NOWAIT);
1939 m_freem(*m_headp);
1940 if (m_head == NULL) {
1941 *m_headp = NULL;
1942 return (ENOBUFS);
1943 }
1944 *m_headp = m_head;
1945 }
1946 }
1947 /*
1948 * XXX
1949 * Assume IPv4, we don't have TSO/checksum offload support
1950 * for IPv6 yet.
1951 */
1952 ip_off = sizeof(struct ether_header);
1953 if (m_head->m_len < ip_off) {
1954 m_head = m_pullup(m_head, ip_off);
1955 if (m_head == NULL) {
1956 *m_headp = NULL;
1957 return (ENOBUFS);
1958 }
1959 }
1960 eh = mtod(m_head, struct ether_header *);
1961 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1962 ip_off = sizeof(struct ether_vlan_header);
1963 if (m_head->m_len < ip_off) {
1964 m_head = m_pullup(m_head, ip_off);
1965 if (m_head == NULL) {
1966 *m_headp = NULL;
1967 return (ENOBUFS);
1968 }
1969 }
1970 }
1971 if (m_head->m_len < ip_off + sizeof(struct ip)) {
1972 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1973 if (m_head == NULL) {
1974 *m_headp = NULL;
1975 return (ENOBUFS);
1976 }
1977 }
1978 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1979 poff = ip_off + (ip->ip_hl << 2);
1980
1981 if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
1982 if (m_head->m_len < poff + sizeof(struct tcphdr)) {
1983 m_head = m_pullup(m_head, poff +
1984 sizeof(struct tcphdr));
1985 if (m_head == NULL) {
1986 *m_headp = NULL;
1987 return (ENOBUFS);
1988 }
1989 }
1990 tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1991 /*
1992 * TSO workaround:
1993 * pull 4 more bytes of data into it.
1994 */
1995 if (m_head->m_len < poff + (tp->th_off << 2)) {
1996 m_head = m_pullup(m_head, poff +
1997 (tp->th_off << 2) +
1998 TSO_WORKAROUND);
1999 if (m_head == NULL) {
2000 *m_headp = NULL;
2001 return (ENOBUFS);
2002 }
2003 }
2004 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2005 tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2006 if (do_tso) {
2007 ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
2008 (ip->ip_hl << 2) +
2009 (tp->th_off << 2));
2010 ip->ip_sum = 0;
2011 /*
2012 * The pseudo TCP checksum does not include TCP
2013 * payload length so driver should recompute
2014 * the checksum here what hardware expect to
2015 * see. This is adherence of Microsoft's Large
2016 * Send specification.
2017 */
2018 tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2019 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2020 }
2021 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2022 if (m_head->m_len < poff + sizeof(struct udphdr)) {
2023 m_head = m_pullup(m_head, poff +
2024 sizeof(struct udphdr));
2025 if (m_head == NULL) {
2026 *m_headp = NULL;
2027 return (ENOBUFS);
2028 }
2029 }
2030 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2031 }
2032 *m_headp = m_head;
2033 }
2034
2035 /*
2036 * Map the packet for DMA
2037 *
2038 * Capture the first descriptor index,
2039 * this descriptor will have the index
2040 * of the EOP which is the only one that
2041 * now gets a DONE bit writeback.
2042 */
2043 first = txr->next_avail_desc;
2044 tx_buffer = &txr->tx_buffers[first];
2045 tx_buffer_mapped = tx_buffer;
2046 map = tx_buffer->map;
2047
2048retry:
2049 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2050 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2051
2052 /*
2053 * There are two types of errors we can (try) to handle:
2054 * - EFBIG means the mbuf chain was too long and bus_dma ran
2055 * out of segments. Defragment the mbuf chain and try again.
2056 * - ENOMEM means bus_dma could not obtain enough bounce buffers
2057 * at this point in time. Defer sending and try again later.
2058 * All other errors, in particular EINVAL, are fatal and prevent the
2059 * mbuf chain from ever going through. Drop it and report error.
2060 */
2061 if (error == EFBIG && remap) {
2062 struct mbuf *m;
2063
2064 m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2065 if (m == NULL) {
2066 adapter->mbuf_defrag_failed++;
2067 m_freem(*m_headp);
2068 *m_headp = NULL;
2069 return (ENOBUFS);
2070 }
2071 *m_headp = m;
2072
2073 /* Try it again, but only once */
2074 remap = FALSE;
2075 goto retry;
2076 } else if (error != 0) {
2077 adapter->no_tx_dma_setup++;
2078 m_freem(*m_headp);
2079 *m_headp = NULL;
2080 return (error);
2081 }
2082
2083 /*
2084 * TSO Hardware workaround, if this packet is not
2085 * TSO, and is only a single descriptor long, and
2086 * it follows a TSO burst, then we need to add a
2087 * sentinel descriptor to prevent premature writeback.
2088 */
2089 if ((!do_tso) && (txr->tx_tso == TRUE)) {
2090 if (nsegs == 1)
2091 tso_desc = TRUE;
2092 txr->tx_tso = FALSE;
2093 }
2094
2095 if (nsegs > (txr->tx_avail - EM_MAX_SCATTER)) {
2096 txr->no_desc_avail++;
2097 bus_dmamap_unload(txr->txtag, map);
2098 return (ENOBUFS);
2099 }
2100 m_head = *m_headp;
2101
2102 /* Do hardware assists */
2103 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2104 em_tso_setup(txr, m_head, ip_off, ip, tp,
2105 &txd_upper, &txd_lower);
2106 /* we need to make a final sentinel transmit desc */
2107 tso_desc = TRUE;
2108 } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2109 em_transmit_checksum_setup(txr, m_head,
2110 ip_off, ip, &txd_upper, &txd_lower);
2111
2112 if (m_head->m_flags & M_VLANTAG) {
2113 /* Set the vlan id. */
2114 txd_upper |= htole16(if_getvtag(m_head)) << 16;
2115 /* Tell hardware to add tag */
2116 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2117 }
2118
2119 i = txr->next_avail_desc;
2120
2121 /* Set up our transmit descriptors */
2122 for (j = 0; j < nsegs; j++) {
2123 bus_size_t seg_len;
2124 bus_addr_t seg_addr;
2125
2126 tx_buffer = &txr->tx_buffers[i];
2127 ctxd = &txr->tx_base[i];
2128 seg_addr = segs[j].ds_addr;
2129 seg_len = segs[j].ds_len;
2130 /*
2131 ** TSO Workaround:
2132 ** If this is the last descriptor, we want to
2133 ** split it so we have a small final sentinel
2134 */
2135 if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2136 seg_len -= TSO_WORKAROUND;
2137 ctxd->buffer_addr = htole64(seg_addr);
2138 ctxd->lower.data = htole32(
2139 adapter->txd_cmd | txd_lower | seg_len);
2140 ctxd->upper.data = htole32(txd_upper);
2141 if (++i == adapter->num_tx_desc)
2142 i = 0;
2143
2144 /* Now make the sentinel */
2145 txr->tx_avail--;
2146 ctxd = &txr->tx_base[i];
2147 tx_buffer = &txr->tx_buffers[i];
2148 ctxd->buffer_addr =
2149 htole64(seg_addr + seg_len);
2150 ctxd->lower.data = htole32(
2151 adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2152 ctxd->upper.data =
2153 htole32(txd_upper);
2154 last = i;
2155 if (++i == adapter->num_tx_desc)
2156 i = 0;
2157 } else {
2158 ctxd->buffer_addr = htole64(seg_addr);
2159 ctxd->lower.data = htole32(
2160 adapter->txd_cmd | txd_lower | seg_len);
2161 ctxd->upper.data = htole32(txd_upper);
2162 last = i;
2163 if (++i == adapter->num_tx_desc)
2164 i = 0;
2165 }
2166 tx_buffer->m_head = NULL;
2167 tx_buffer->next_eop = -1;
2168 }
2169
2170 txr->next_avail_desc = i;
2171 txr->tx_avail -= nsegs;
2172
2173 tx_buffer->m_head = m_head;
2174 /*
2175 ** Here we swap the map so the last descriptor,
2176 ** which gets the completion interrupt has the
2177 ** real map, and the first descriptor gets the
2178 ** unused map from this descriptor.
2179 */
2180 tx_buffer_mapped->map = tx_buffer->map;
2181 tx_buffer->map = map;
2182 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2183
2184 /*
2185 * Last Descriptor of Packet
2186 * needs End Of Packet (EOP)
2187 * and Report Status (RS)
2188 */
2189 ctxd->lower.data |=
2190 htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2191 /*
2192 * Keep track in the first buffer which
2193 * descriptor will be written back
2194 */
2195 tx_buffer = &txr->tx_buffers[first];
2196 tx_buffer->next_eop = last;
2197
2198 /*
2199 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2200 * that this frame is available to transmit.
2201 */
2202 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2203 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2204 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2205
2206 return (0);
2207}
2208
2209static void
2210em_set_promisc(struct adapter *adapter)
2211{
2212 if_t ifp = adapter->ifp;
2213 u32 reg_rctl;
2214
2215 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2216
2217 if (if_getflags(ifp) & IFF_PROMISC) {
2218 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2219 /* Turn this on if you want to see bad packets */
2220 if (em_debug_sbp)
2221 reg_rctl |= E1000_RCTL_SBP;
2222 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2223 } else if (if_getflags(ifp) & IFF_ALLMULTI) {
2224 reg_rctl |= E1000_RCTL_MPE;
2225 reg_rctl &= ~E1000_RCTL_UPE;
2226 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2227 }
2228}
2229
2230static void
2231em_disable_promisc(struct adapter *adapter)
2232{
2233 if_t ifp = adapter->ifp;
2234 u32 reg_rctl;
2235 int mcnt = 0;
2236
2237 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2238 reg_rctl &= (~E1000_RCTL_UPE);
2239 if (if_getflags(ifp) & IFF_ALLMULTI)
2240 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2241 else
2242 mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2243 /* Don't disable if in MAX groups */
2244 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2245 reg_rctl &= (~E1000_RCTL_MPE);
2246 reg_rctl &= (~E1000_RCTL_SBP);
2247 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2248}
2249
2250
2251/*********************************************************************
2252 * Multicast Update
2253 *
2254 * This routine is called whenever multicast address list is updated.
2255 *
2256 **********************************************************************/
2257
2258static void
2259em_set_multi(struct adapter *adapter)
2260{
2261 if_t ifp = adapter->ifp;
2262 u32 reg_rctl = 0;
2263 u8 *mta; /* Multicast array memory */
2264 int mcnt = 0;
2265
2266 IOCTL_DEBUGOUT("em_set_multi: begin");
2267
2268 mta = adapter->mta;
2269 bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2270
2271 if (adapter->hw.mac.type == e1000_82542 &&
2272 adapter->hw.revision_id == E1000_REVISION_2) {
2273 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2274 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2275 e1000_pci_clear_mwi(&adapter->hw);
2276 reg_rctl |= E1000_RCTL_RST;
2277 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2278 msec_delay(5);
2279 }
2280
2281 if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2282
2283 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2284 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2285 reg_rctl |= E1000_RCTL_MPE;
2286 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2287 } else
2288 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2289
2290 if (adapter->hw.mac.type == e1000_82542 &&
2291 adapter->hw.revision_id == E1000_REVISION_2) {
2292 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2293 reg_rctl &= ~E1000_RCTL_RST;
2294 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2295 msec_delay(5);
2296 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2297 e1000_pci_set_mwi(&adapter->hw);
2298 }
2299}
2300
2301
2302/*********************************************************************
2303 * Timer routine
2304 *
2305 * This routine checks for link status and updates statistics.
2306 *
2307 **********************************************************************/
2308
2309static void
2310em_local_timer(void *arg)
2311{
2312 struct adapter *adapter = arg;
2313 if_t ifp = adapter->ifp;
2314 struct tx_ring *txr = adapter->tx_rings;
2315 struct rx_ring *rxr = adapter->rx_rings;
2316 u32 trigger = 0;
2317
2318 EM_CORE_LOCK_ASSERT(adapter);
2319
2320 em_update_link_status(adapter);
2321 em_update_stats_counters(adapter);
2322
2323 /* Reset LAA into RAR[0] on 82571 */
2324 if ((adapter->hw.mac.type == e1000_82571) &&
2325 e1000_get_laa_state_82571(&adapter->hw))
2326 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2327
2328 /* Mask to use in the irq trigger */
2329 if (adapter->msix_mem) {
2330 for (int i = 0; i < adapter->num_queues; i++, rxr++)
2331 trigger |= rxr->ims;
2332 rxr = adapter->rx_rings;
2333 } else
2334 trigger = E1000_ICS_RXDMT0;
2335
2336 /*
2337 ** Check on the state of the TX queue(s), this
2338 ** can be done without the lock because its RO
2339 ** and the HUNG state will be static if set.
2340 */
2341 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2342 if (txr->busy == EM_TX_HUNG)
2343 goto hung;
2344 if (txr->busy >= EM_TX_MAXTRIES)
2345 txr->busy = EM_TX_HUNG;
2346 /* Schedule a TX tasklet if needed */
2347 if (txr->tx_avail <= EM_MAX_SCATTER)
2348 taskqueue_enqueue(txr->tq, &txr->tx_task);
2349 }
2350
2351 callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2352#ifndef DEVICE_POLLING
2353 /* Trigger an RX interrupt to guarantee mbuf refresh */
2354 E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2355#endif
2356 return;
2357hung:
2358 /* Looks like we're hung */
2359 device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2360 txr->me);
2361 em_print_debug_info(adapter);
2362 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2363 adapter->watchdog_events++;
2364 em_init_locked(adapter);
2365}
2366
2367
2368static void
2369em_update_link_status(struct adapter *adapter)
2370{
2371 struct e1000_hw *hw = &adapter->hw;
2372 if_t ifp = adapter->ifp;
2373 device_t dev = adapter->dev;
2374 struct tx_ring *txr = adapter->tx_rings;
2375 u32 link_check = 0;
2376
2377 /* Get the cached link value or read phy for real */
2378 switch (hw->phy.media_type) {
2379 case e1000_media_type_copper:
2380 if (hw->mac.get_link_status) {
2381 if (hw->mac.type == e1000_pch_spt)
2382 msec_delay(50);
2383 /* Do the work to read phy */
2384 e1000_check_for_link(hw);
2385 link_check = !hw->mac.get_link_status;
2386 if (link_check) /* ESB2 fix */
2387 e1000_cfg_on_link_up(hw);
2388 } else
2389 link_check = TRUE;
2390 break;
2391 case e1000_media_type_fiber:
2392 e1000_check_for_link(hw);
2393 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2394 E1000_STATUS_LU);
2395 break;
2396 case e1000_media_type_internal_serdes:
2397 e1000_check_for_link(hw);
2398 link_check = adapter->hw.mac.serdes_has_link;
2399 break;
2400 default:
2401 case e1000_media_type_unknown:
2402 break;
2403 }
2404
2405 /* Now check for a transition */
2406 if (link_check && (adapter->link_active == 0)) {
2407 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2408 &adapter->link_duplex);
2409 /* Check if we must disable SPEED_MODE bit on PCI-E */
2410 if ((adapter->link_speed != SPEED_1000) &&
2411 ((hw->mac.type == e1000_82571) ||
2412 (hw->mac.type == e1000_82572))) {
2413 int tarc0;
2414 tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2415 tarc0 &= ~TARC_SPEED_MODE_BIT;
2416 E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2417 }
2418 if (bootverbose)
2419 device_printf(dev, "Link is up %d Mbps %s\n",
2420 adapter->link_speed,
2421 ((adapter->link_duplex == FULL_DUPLEX) ?
2422 "Full Duplex" : "Half Duplex"));
2423 adapter->link_active = 1;
2424 adapter->smartspeed = 0;
2425 if_setbaudrate(ifp, adapter->link_speed * 1000000);
2426 if_link_state_change(ifp, LINK_STATE_UP);
2427 } else if (!link_check && (adapter->link_active == 1)) {
2428 if_setbaudrate(ifp, 0);
2429 adapter->link_speed = 0;
2430 adapter->link_duplex = 0;
2431 if (bootverbose)
2432 device_printf(dev, "Link is Down\n");
2433 adapter->link_active = 0;
2434 /* Link down, disable hang detection */
2435 for (int i = 0; i < adapter->num_queues; i++, txr++)
2436 txr->busy = EM_TX_IDLE;
2437 if_link_state_change(ifp, LINK_STATE_DOWN);
2438 }
2439}
2440
2441/*********************************************************************
2442 *
2443 * This routine disables all traffic on the adapter by issuing a
2444 * global reset on the MAC and deallocates TX/RX buffers.
2445 *
2446 * This routine should always be called with BOTH the CORE
2447 * and TX locks.
2448 **********************************************************************/
2449
2450static void
2451em_stop(void *arg)
2452{
2453 struct adapter *adapter = arg;
2454 if_t ifp = adapter->ifp;
2455 struct tx_ring *txr = adapter->tx_rings;
2456
2457 EM_CORE_LOCK_ASSERT(adapter);
2458
2459 INIT_DEBUGOUT("em_stop: begin");
2460
2461 em_disable_intr(adapter);
2462 callout_stop(&adapter->timer);
2463
2464 /* Tell the stack that the interface is no longer active */
2465 if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2466
2467 /* Disarm Hang Detection. */
2468 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2469 EM_TX_LOCK(txr);
2470 txr->busy = EM_TX_IDLE;
2471 EM_TX_UNLOCK(txr);
2472 }
2473
2474 /* I219 needs some special flushing to avoid hangs */
2475 if (adapter->hw.mac.type == e1000_pch_spt)
2476 em_flush_desc_rings(adapter);
2477
2478 e1000_reset_hw(&adapter->hw);
2479 E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2480
2481 e1000_led_off(&adapter->hw);
2482 e1000_cleanup_led(&adapter->hw);
2483}
2484
2485
2486/*********************************************************************
2487 *
2488 * Determine hardware revision.
2489 *
2490 **********************************************************************/
2491static void
2492em_identify_hardware(struct adapter *adapter)
2493{
2494 device_t dev = adapter->dev;
2495
2496 /* Make sure our PCI config space has the necessary stuff set */
2497 pci_enable_busmaster(dev);
2498 adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2499
2500 /* Save off the information about this board */
2501 adapter->hw.vendor_id = pci_get_vendor(dev);
2502 adapter->hw.device_id = pci_get_device(dev);
2503 adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2504 adapter->hw.subsystem_vendor_id =
2505 pci_read_config(dev, PCIR_SUBVEND_0, 2);
2506 adapter->hw.subsystem_device_id =
2507 pci_read_config(dev, PCIR_SUBDEV_0, 2);
2508
2509 /* Do Shared Code Init and Setup */
2510 if (e1000_set_mac_type(&adapter->hw)) {
2511 device_printf(dev, "Setup init failure\n");
2512 return;
2513 }
2514}
2515
2516static int
2517em_allocate_pci_resources(struct adapter *adapter)
2518{
2519 device_t dev = adapter->dev;
2520 int rid;
2521
2522 rid = PCIR_BAR(0);
2523 adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2524 &rid, RF_ACTIVE);
2525 if (adapter->memory == NULL) {
2526 device_printf(dev, "Unable to allocate bus resource: memory\n");
2527 return (ENXIO);
2528 }
2529 adapter->osdep.mem_bus_space_tag =
2530 rman_get_bustag(adapter->memory);
2531 adapter->osdep.mem_bus_space_handle =
2532 rman_get_bushandle(adapter->memory);
2533 adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2534
2535 adapter->hw.back = &adapter->osdep;
2536
2537 return (0);
2538}
2539
2540/*********************************************************************
2541 *
2542 * Setup the Legacy or MSI Interrupt handler
2543 *
2544 **********************************************************************/
2545int
2546em_allocate_legacy(struct adapter *adapter)
2547{
2548 device_t dev = adapter->dev;
2549 struct tx_ring *txr = adapter->tx_rings;
2550 int error, rid = 0;
2551
2552 /* Manually turn off all interrupts */
2553 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2554
2555 if (adapter->msix == 1) /* using MSI */
2556 rid = 1;
2557 /* We allocate a single interrupt resource */
2558 adapter->res = bus_alloc_resource_any(dev,
2559 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2560 if (adapter->res == NULL) {
2561 device_printf(dev, "Unable to allocate bus resource: "
2562 "interrupt\n");
2563 return (ENXIO);
2564 }
2565
2566 /*
2567 * Allocate a fast interrupt and the associated
2568 * deferred processing contexts.
2569 */
2570 TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2571 adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2572 taskqueue_thread_enqueue, &adapter->tq);
2573 taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2574 device_get_nameunit(adapter->dev));
2575 /* Use a TX only tasklet for local timer */
2576 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2577 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2578 taskqueue_thread_enqueue, &txr->tq);
2579 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2580 device_get_nameunit(adapter->dev));
2581 TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2582 if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2583 em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2584 device_printf(dev, "Failed to register fast interrupt "
2585 "handler: %d\n", error);
2586 taskqueue_free(adapter->tq);
2587 adapter->tq = NULL;
2588 return (error);
2589 }
2590
2591 return (0);
2592}
2593
2594/*********************************************************************
2595 *
2596 * Setup the MSIX Interrupt handlers
2597 * This is not really Multiqueue, rather
2598 * its just seperate interrupt vectors
2599 * for TX, RX, and Link.
2600 *
2601 **********************************************************************/
2602int
2603em_allocate_msix(struct adapter *adapter)
2604{
2605 device_t dev = adapter->dev;
2606 struct tx_ring *txr = adapter->tx_rings;
2607 struct rx_ring *rxr = adapter->rx_rings;
2608 int error, rid, vector = 0;
2609 int cpu_id = 0;
2610
2611
2612 /* Make sure all interrupts are disabled */
2613 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2614
2615 /* First set up ring resources */
2616 for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2617
2618 /* RX ring */
2619 rid = vector + 1;
2620
2621 rxr->res = bus_alloc_resource_any(dev,
2622 SYS_RES_IRQ, &rid, RF_ACTIVE);
2623 if (rxr->res == NULL) {
2624 device_printf(dev,
2625 "Unable to allocate bus resource: "
2626 "RX MSIX Interrupt %d\n", i);
2627 return (ENXIO);
2628 }
2629 if ((error = bus_setup_intr(dev, rxr->res,
2630 INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2631 rxr, &rxr->tag)) != 0) {
2632 device_printf(dev, "Failed to register RX handler");
2633 return (error);
2634 }
2635#if __FreeBSD_version >= 800504
2636 bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2637#endif
2638 rxr->msix = vector;
2639
2640 if (em_last_bind_cpu < 0)
2641 em_last_bind_cpu = CPU_FIRST();
2642 cpu_id = em_last_bind_cpu;
2643 bus_bind_intr(dev, rxr->res, cpu_id);
2644
2645 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2646 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2647 taskqueue_thread_enqueue, &rxr->tq);
2648 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2649 device_get_nameunit(adapter->dev), cpu_id);
2650 /*
2651 ** Set the bit to enable interrupt
2652 ** in E1000_IMS -- bits 20 and 21
2653 ** are for RX0 and RX1, note this has
2654 ** NOTHING to do with the MSIX vector
2655 */
2656 rxr->ims = 1 << (20 + i);
2657 adapter->ims |= rxr->ims;
2658 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2659
2660 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2661 }
2662
2663 for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2664 /* TX ring */
2665 rid = vector + 1;
2666 txr->res = bus_alloc_resource_any(dev,
2667 SYS_RES_IRQ, &rid, RF_ACTIVE);
2668 if (txr->res == NULL) {
2669 device_printf(dev,
2670 "Unable to allocate bus resource: "
2671 "TX MSIX Interrupt %d\n", i);
2672 return (ENXIO);
2673 }
2674 if ((error = bus_setup_intr(dev, txr->res,
2675 INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2676 txr, &txr->tag)) != 0) {
2677 device_printf(dev, "Failed to register TX handler");
2678 return (error);
2679 }
2680#if __FreeBSD_version >= 800504
2681 bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2682#endif
2683 txr->msix = vector;
2684
2685 if (em_last_bind_cpu < 0)
2686 em_last_bind_cpu = CPU_FIRST();
2687 cpu_id = em_last_bind_cpu;
2688 bus_bind_intr(dev, txr->res, cpu_id);
2689
2690 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2691 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2692 taskqueue_thread_enqueue, &txr->tq);
2693 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2694 device_get_nameunit(adapter->dev), cpu_id);
2695 /*
2696 ** Set the bit to enable interrupt
2697 ** in E1000_IMS -- bits 22 and 23
2698 ** are for TX0 and TX1, note this has
2699 ** NOTHING to do with the MSIX vector
2700 */
2701 txr->ims = 1 << (22 + i);
2702 adapter->ims |= txr->ims;
2703 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2704
2705 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2706 }
2707
2708 /* Link interrupt */
2709 rid = vector + 1;
2710 adapter->res = bus_alloc_resource_any(dev,
2711 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2712 if (!adapter->res) {
2713 device_printf(dev,"Unable to allocate "
2714 "bus resource: Link interrupt [%d]\n", rid);
2715 return (ENXIO);
2716 }
2717 /* Set the link handler function */
2718 error = bus_setup_intr(dev, adapter->res,
2719 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2720 em_msix_link, adapter, &adapter->tag);
2721 if (error) {
2722 adapter->res = NULL;
2723 device_printf(dev, "Failed to register LINK handler");
2724 return (error);
2725 }
2726#if __FreeBSD_version >= 800504
2727 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2728#endif
2729 adapter->linkvec = vector;
2730 adapter->ivars |= (8 | vector) << 16;
2731 adapter->ivars |= 0x80000000;
2732
2733 return (0);
2734}
2735
2736
2737static void
2738em_free_pci_resources(struct adapter *adapter)
2739{
2740 device_t dev = adapter->dev;
2741 struct tx_ring *txr;
2742 struct rx_ring *rxr;
2743 int rid;
2744
2745
2746 /*
2747 ** Release all the queue interrupt resources:
2748 */
2749 for (int i = 0; i < adapter->num_queues; i++) {
2750 txr = &adapter->tx_rings[i];
2751 /* an early abort? */
2752 if (txr == NULL)
2753 break;
2754 rid = txr->msix +1;
2755 if (txr->tag != NULL) {
2756 bus_teardown_intr(dev, txr->res, txr->tag);
2757 txr->tag = NULL;
2758 }
2759 if (txr->res != NULL)
2760 bus_release_resource(dev, SYS_RES_IRQ,
2761 rid, txr->res);
2762
2763 rxr = &adapter->rx_rings[i];
2764 /* an early abort? */
2765 if (rxr == NULL)
2766 break;
2767 rid = rxr->msix +1;
2768 if (rxr->tag != NULL) {
2769 bus_teardown_intr(dev, rxr->res, rxr->tag);
2770 rxr->tag = NULL;
2771 }
2772 if (rxr->res != NULL)
2773 bus_release_resource(dev, SYS_RES_IRQ,
2774 rid, rxr->res);
2775 }
2776
2777 if (adapter->linkvec) /* we are doing MSIX */
2778 rid = adapter->linkvec + 1;
2779 else
2780 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2781
2782 if (adapter->tag != NULL) {
2783 bus_teardown_intr(dev, adapter->res, adapter->tag);
2784 adapter->tag = NULL;
2785 }
2786
2787 if (adapter->res != NULL)
2788 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2789
2790
2791 if (adapter->msix)
2792 pci_release_msi(dev);
2793
2794 if (adapter->msix_mem != NULL)
2795 bus_release_resource(dev, SYS_RES_MEMORY,
2796 PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2797
2798 if (adapter->memory != NULL)
2799 bus_release_resource(dev, SYS_RES_MEMORY,
2800 PCIR_BAR(0), adapter->memory);
2801
2802 if (adapter->flash != NULL)
2803 bus_release_resource(dev, SYS_RES_MEMORY,
2804 EM_FLASH, adapter->flash);
2805}
2806
2807/*
2808 * Setup MSI or MSI/X
2809 */
2810static int
2811em_setup_msix(struct adapter *adapter)
2812{
2813 device_t dev = adapter->dev;
2814 int val;
2815
2816 /* Nearly always going to use one queue */
2817 adapter->num_queues = 1;
2818
2819 /*
2820 ** Try using MSI-X for Hartwell adapters
2821 */
2822 if ((adapter->hw.mac.type == e1000_82574) &&
2823 (em_enable_msix == TRUE)) {
2824#ifdef EM_MULTIQUEUE
2825 adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2826 if (adapter->num_queues > 1)
2827 em_enable_vectors_82574(adapter);
2828#endif
2829 /* Map the MSIX BAR */
2830 int rid = PCIR_BAR(EM_MSIX_BAR);
2831 adapter->msix_mem = bus_alloc_resource_any(dev,
2832 SYS_RES_MEMORY, &rid, RF_ACTIVE);
2833 if (adapter->msix_mem == NULL) {
2834 /* May not be enabled */
2835 device_printf(adapter->dev,
2836 "Unable to map MSIX table \n");
2837 goto msi;
2838 }
2839 val = pci_msix_count(dev);
2840
2841#ifdef EM_MULTIQUEUE
2842 /* We need 5 vectors in the multiqueue case */
2843 if (adapter->num_queues > 1 ) {
2844 if (val >= 5)
2845 val = 5;
2846 else {
2847 adapter->num_queues = 1;
2848 device_printf(adapter->dev,
2849 "Insufficient MSIX vectors for >1 queue, "
2850 "using single queue...\n");
2851 goto msix_one;
2852 }
2853 } else {
2854msix_one:
2855#endif
2856 if (val >= 3)
2857 val = 3;
2858 else {
2859 device_printf(adapter->dev,
2860 "Insufficient MSIX vectors, using MSI\n");
2861 goto msi;
2862 }
2863#ifdef EM_MULTIQUEUE
2864 }
2865#endif
2866
2867 if ((pci_alloc_msix(dev, &val) == 0)) {
2868 device_printf(adapter->dev,
2869 "Using MSIX interrupts "
2870 "with %d vectors\n", val);
2871 return (val);
2872 }
2873
2874 /*
2875 ** If MSIX alloc failed or provided us with
2876 ** less than needed, free and fall through to MSI
2877 */
2878 pci_release_msi(dev);
2879 }
2880msi:
2881 if (adapter->msix_mem != NULL) {
2882 bus_release_resource(dev, SYS_RES_MEMORY,
2883 PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2884 adapter->msix_mem = NULL;
2885 }
2886 val = 1;
2887 if (pci_alloc_msi(dev, &val) == 0) {
2888 device_printf(adapter->dev, "Using an MSI interrupt\n");
2889 return (val);
2890 }
2891 /* Should only happen due to manual configuration */
2892 device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2893 return (0);
2894}
2895
2896
2897/*
2898** The 3 following flush routines are used as a workaround in the
2899** I219 client parts and only for them.
2900**
2901** em_flush_tx_ring - remove all descriptors from the tx_ring
2902**
2903** We want to clear all pending descriptors from the TX ring.
2904** zeroing happens when the HW reads the regs. We assign the ring itself as
2905** the data of the next descriptor. We don't care about the data we are about
2906** to reset the HW.
2907*/
2908static void
2909em_flush_tx_ring(struct adapter *adapter)
2910{
2911 struct e1000_hw *hw = &adapter->hw;
2912 struct tx_ring *txr = adapter->tx_rings;
2913 struct e1000_tx_desc *txd;
2914 u32 tctl, txd_lower = E1000_TXD_CMD_IFCS;
2915 u16 size = 512;
2916
2917 tctl = E1000_READ_REG(hw, E1000_TCTL);
2918 E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
2919
2920 txd = &txr->tx_base[txr->next_avail_desc++];
2921 if (txr->next_avail_desc == adapter->num_tx_desc)
2922 txr->next_avail_desc = 0;
2923
2924 /* Just use the ring as a dummy buffer addr */
2925 txd->buffer_addr = txr->txdma.dma_paddr;
2926 txd->lower.data = htole32(txd_lower | size);
2927 txd->upper.data = 0;
2928
2929 /* flush descriptors to memory before notifying the HW */
2930 wmb();
2931
2932 E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
2933 mb();
2934 usec_delay(250);
2935}
2936
2937/*
2938** em_flush_rx_ring - remove all descriptors from the rx_ring
2939**
2940** Mark all descriptors in the RX ring as consumed and disable the rx ring
2941*/
2942static void
2943em_flush_rx_ring(struct adapter *adapter)
2944{
2945 struct e1000_hw *hw = &adapter->hw;
2946 u32 rctl, rxdctl;
2947
2948 rctl = E1000_READ_REG(hw, E1000_RCTL);
2949 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2950 E1000_WRITE_FLUSH(hw);
2951 usec_delay(150);
2952
2953 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
2954 /* zero the lower 14 bits (prefetch and host thresholds) */
2955 rxdctl &= 0xffffc000;
2956 /*
2957 * update thresholds: prefetch threshold to 31, host threshold to 1
2958 * and make sure the granularity is "descriptors" and not "cache lines"
2959 */
2960 rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
2961 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
2962
2963 /* momentarily enable the RX ring for the changes to take effect */
2964 E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
2965 E1000_WRITE_FLUSH(hw);
2966 usec_delay(150);
2967 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2968}
2969
2970/*
2971** em_flush_desc_rings - remove all descriptors from the descriptor rings
2972**
2973** In i219, the descriptor rings must be emptied before resetting the HW
2974** or before changing the device state to D3 during runtime (runtime PM).
2975**
2976** Failure to do this will cause the HW to enter a unit hang state which can
2977** only be released by PCI reset on the device
2978**
2979*/
2980static void
2981em_flush_desc_rings(struct adapter *adapter)
2982{
2983 struct e1000_hw *hw = &adapter->hw;
2984 device_t dev = adapter->dev;
2985 u16 hang_state;
2986 u32 fext_nvm11, tdlen;
2987
2988 /* First, disable MULR fix in FEXTNVM11 */
2989 fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
2990 fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
2991 E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
2992
2993 /* do nothing if we're not in faulty state, or if the queue is empty */
2994 tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
2995 hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
2996 if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
2997 return;
2998 em_flush_tx_ring(adapter);
2999
3000 /* recheck, maybe the fault is caused by the rx ring */
3001 hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3002 if (hang_state & FLUSH_DESC_REQUIRED)
3003 em_flush_rx_ring(adapter);
3004}
3005
3006
3007/*********************************************************************
3008 *
3009 * Initialize the hardware to a configuration
3010 * as specified by the adapter structure.
3011 *
3012 **********************************************************************/
3013static void
3014em_reset(struct adapter *adapter)
3015{
3016 device_t dev = adapter->dev;
3017 if_t ifp = adapter->ifp;
3018 struct e1000_hw *hw = &adapter->hw;
3019 u16 rx_buffer_size;
3020 u32 pba;
3021
3022 INIT_DEBUGOUT("em_reset: begin");
3023
3024 /* Set up smart power down as default off on newer adapters. */
3025 if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
3026 hw->mac.type == e1000_82572)) {
3027 u16 phy_tmp = 0;
3028
3029 /* Speed up time to link by disabling smart power down. */
3030 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
3031 phy_tmp &= ~IGP02E1000_PM_SPD;
3032 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
3033 }
3034
3035 /*
3036 * Packet Buffer Allocation (PBA)
3037 * Writing PBA sets the receive portion of the buffer
3038 * the remainder is used for the transmit buffer.
3039 */
3040 switch (hw->mac.type) {
3041 /* Total Packet Buffer on these is 48K */
3042 case e1000_82571:
3043 case e1000_82572:
3044 case e1000_80003es2lan:
3045 pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
3046 break;
3047 case e1000_82573: /* 82573: Total Packet Buffer is 32K */
3048 pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
3049 break;
3050 case e1000_82574:
3051 case e1000_82583:
3052 pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
3053 break;
3054 case e1000_ich8lan:
3055 pba = E1000_PBA_8K;
3056 break;
3057 case e1000_ich9lan:
3058 case e1000_ich10lan:
3059 /* Boost Receive side for jumbo frames */
3060 if (adapter->hw.mac.max_frame_size > 4096)
3061 pba = E1000_PBA_14K;
3062 else
3063 pba = E1000_PBA_10K;
3064 break;
3065 case e1000_pchlan:
3066 case e1000_pch2lan:
3067 case e1000_pch_lpt:
3068 case e1000_pch_spt:
3069 pba = E1000_PBA_26K;
3070 break;
3071 default:
3072 if (adapter->hw.mac.max_frame_size > 8192)
3073 pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
3074 else
3075 pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
3076 }
3077 E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
3078
3079 /*
3080 * These parameters control the automatic generation (Tx) and
3081 * response (Rx) to Ethernet PAUSE frames.
3082 * - High water mark should allow for at least two frames to be
3083 * received after sending an XOFF.
3084 * - Low water mark works best when it is very near the high water mark.
3085 * This allows the receiver to restart by sending XON when it has
3086 * drained a bit. Here we use an arbitary value of 1500 which will
3087 * restart after one full frame is pulled from the buffer. There
3088 * could be several smaller frames in the buffer and if so they will
3089 * not trigger the XON until their total number reduces the buffer
3090 * by 1500.
3091 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
3092 */
3093 rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
3094 hw->fc.high_water = rx_buffer_size -
3095 roundup2(adapter->hw.mac.max_frame_size, 1024);
3096 hw->fc.low_water = hw->fc.high_water - 1500;
3097
3098 if (adapter->fc) /* locally set flow control value? */
3099 hw->fc.requested_mode = adapter->fc;
3100 else
3101 hw->fc.requested_mode = e1000_fc_full;
3102
3103 if (hw->mac.type == e1000_80003es2lan)
3104 hw->fc.pause_time = 0xFFFF;
3105 else
3106 hw->fc.pause_time = EM_FC_PAUSE_TIME;
3107
3108 hw->fc.send_xon = TRUE;
3109
3110 /* Device specific overrides/settings */
3111 switch (hw->mac.type) {
3112 case e1000_pchlan:
3113 /* Workaround: no TX flow ctrl for PCH */
3114 hw->fc.requested_mode = e1000_fc_rx_pause;
3115 hw->fc.pause_time = 0xFFFF; /* override */
3116 if (if_getmtu(ifp) > ETHERMTU) {
3117 hw->fc.high_water = 0x3500;
3118 hw->fc.low_water = 0x1500;
3119 } else {
3120 hw->fc.high_water = 0x5000;
3121 hw->fc.low_water = 0x3000;
3122 }
3123 hw->fc.refresh_time = 0x1000;
3124 break;
3125 case e1000_pch2lan:
3126 case e1000_pch_lpt:
3127 case e1000_pch_spt:
3128 hw->fc.high_water = 0x5C20;
3129 hw->fc.low_water = 0x5048;
3130 hw->fc.pause_time = 0x0650;
3131 hw->fc.refresh_time = 0x0400;
3132 /* Jumbos need adjusted PBA */
3133 if (if_getmtu(ifp) > ETHERMTU)
3134 E1000_WRITE_REG(hw, E1000_PBA, 12);
3135 else
3136 E1000_WRITE_REG(hw, E1000_PBA, 26);
3137 break;
3138 case e1000_ich9lan:
3139 case e1000_ich10lan:
3140 if (if_getmtu(ifp) > ETHERMTU) {
3141 hw->fc.high_water = 0x2800;
3142 hw->fc.low_water = hw->fc.high_water - 8;
3143 break;
3144 }
3145 /* else fall thru */
3146 default:
3147 if (hw->mac.type == e1000_80003es2lan)
3148 hw->fc.pause_time = 0xFFFF;
3149 break;
3150 }
3151
3152 /* I219 needs some special flushing to avoid hangs */
3153 if (hw->mac.type == e1000_pch_spt)
3154 em_flush_desc_rings(adapter);
3155
3156 /* Issue a global reset */
3157 e1000_reset_hw(hw);
3158 E1000_WRITE_REG(hw, E1000_WUC, 0);
3159 em_disable_aspm(adapter);
3160 /* and a re-init */
3161 if (e1000_init_hw(hw) < 0) {
3162 device_printf(dev, "Hardware Initialization Failed\n");
3163 return;
3164 }
3165
3166 E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3167 e1000_get_phy_info(hw);
3168 e1000_check_for_link(hw);
3169 return;
3170}
3171
3172/*********************************************************************
3173 *
3174 * Setup networking device structure and register an interface.
3175 *
3176 **********************************************************************/
3177static int
3178em_setup_interface(device_t dev, struct adapter *adapter)
3179{
3180 if_t ifp;
3181
3182 INIT_DEBUGOUT("em_setup_interface: begin");
3183
3184 ifp = adapter->ifp = if_gethandle(IFT_ETHER);
3185 if (ifp == 0) {
3186 device_printf(dev, "can not allocate ifnet structure\n");
3187 return (-1);
3188 }
3189 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3190 if_setdev(ifp, dev);
3191 if_setinitfn(ifp, em_init);
3192 if_setsoftc(ifp, adapter);
3193 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
3194 if_setioctlfn(ifp, em_ioctl);
3195 if_setgetcounterfn(ifp, em_get_counter);
3196 /* TSO parameters */
3197 ifp->if_hw_tsomax = IP_MAXPACKET;
3198 ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER;
3199 ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3200
3201#ifdef EM_MULTIQUEUE
3202 /* Multiqueue stack interface */
3203 if_settransmitfn(ifp, em_mq_start);
3204 if_setqflushfn(ifp, em_qflush);
3205#else
3206 if_setstartfn(ifp, em_start);
3207 if_setsendqlen(ifp, adapter->num_tx_desc - 1);
3208 if_setsendqready(ifp);
3209#endif
3210
3211 ether_ifattach(ifp, adapter->hw.mac.addr);
3212
3213 if_setcapabilities(ifp, 0);
3214 if_setcapenable(ifp, 0);
3215
3216
3217 if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
3218 IFCAP_TSO4, 0);
3219 /*
3220 * Tell the upper layer(s) we
3221 * support full VLAN capability
3222 */
3223 if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
3224 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
3225 IFCAP_VLAN_MTU, 0);
3226 if_setcapenable(ifp, if_getcapabilities(ifp));
3227
3228 /*
3229 ** Don't turn this on by default, if vlans are
3230 ** created on another pseudo device (eg. lagg)
3231 ** then vlan events are not passed thru, breaking
3232 ** operation, but with HW FILTER off it works. If
3233 ** using vlans directly on the em driver you can
3234 ** enable this and get full hardware tag filtering.
3235 */
3236 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
3237
3238#ifdef DEVICE_POLLING
3239 if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
3240#endif
3241
3242 /* Enable only WOL MAGIC by default */
3243 if (adapter->wol) {
3244 if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
3245 if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
3246 }
3247
3248 /*
3249 * Specify the media types supported by this adapter and register
3250 * callbacks to update media and link information
3251 */
3252 ifmedia_init(&adapter->media, IFM_IMASK,
3253 em_media_change, em_media_status);
3254 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3255 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3256 u_char fiber_type = IFM_1000_SX; /* default type */
3257
3258 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3259 0, NULL);
3260 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3261 } else {
3262 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3263 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3264 0, NULL);
3265 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3266 0, NULL);
3267 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3268 0, NULL);
3269 if (adapter->hw.phy.type != e1000_phy_ife) {
3270 ifmedia_add(&adapter->media,
3271 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3272 ifmedia_add(&adapter->media,
3273 IFM_ETHER | IFM_1000_T, 0, NULL);
3274 }
3275 }
3276 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3277 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3278 return (0);
3279}
3280
3281
3282/*
3283 * Manage DMA'able memory.
3284 */
3285static void
3286em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3287{
3288 if (error)
3289 return;
3290 *(bus_addr_t *) arg = segs[0].ds_addr;
3291}
3292
3293static int
3294em_dma_malloc(struct adapter *adapter, bus_size_t size,
3295 struct em_dma_alloc *dma, int mapflags)
3296{
3297 int error;
3298
3299 error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3300 EM_DBA_ALIGN, 0, /* alignment, bounds */
3301 BUS_SPACE_MAXADDR, /* lowaddr */
3302 BUS_SPACE_MAXADDR, /* highaddr */
3303 NULL, NULL, /* filter, filterarg */
3304 size, /* maxsize */
3305 1, /* nsegments */
3306 size, /* maxsegsize */
3307 0, /* flags */
3308 NULL, /* lockfunc */
3309 NULL, /* lockarg */
3310 &dma->dma_tag);
3311 if (error) {
3312 device_printf(adapter->dev,
3313 "%s: bus_dma_tag_create failed: %d\n",
3314 __func__, error);
3315 goto fail_0;
3316 }
3317
3318 error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3319 BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3320 if (error) {
3321 device_printf(adapter->dev,
3322 "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3323 __func__, (uintmax_t)size, error);
3324 goto fail_2;
3325 }
3326
3327 dma->dma_paddr = 0;
3328 error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3329 size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3330 if (error || dma->dma_paddr == 0) {
3331 device_printf(adapter->dev,
3332 "%s: bus_dmamap_load failed: %d\n",
3333 __func__, error);
3334 goto fail_3;
3335 }
3336
3337 return (0);
3338
3339fail_3:
3340 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3341fail_2:
3342 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3343 bus_dma_tag_destroy(dma->dma_tag);
3344fail_0:
3345 dma->dma_tag = NULL;
3346
3347 return (error);
3348}
3349
3350static void
3351em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3352{
3353 if (dma->dma_tag == NULL)
3354 return;
3355 if (dma->dma_paddr != 0) {
3356 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3357 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3358 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3359 dma->dma_paddr = 0;
3360 }
3361 if (dma->dma_vaddr != NULL) {
3362 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3363 dma->dma_vaddr = NULL;
3364 }
3365 bus_dma_tag_destroy(dma->dma_tag);
3366 dma->dma_tag = NULL;
3367}
3368
3369
3370/*********************************************************************
3371 *
3372 * Allocate memory for the transmit and receive rings, and then
3373 * the descriptors associated with each, called only once at attach.
3374 *
3375 **********************************************************************/
3376static int
3377em_allocate_queues(struct adapter *adapter)
3378{
3379 device_t dev = adapter->dev;
3380 struct tx_ring *txr = NULL;
3381 struct rx_ring *rxr = NULL;
3382 int rsize, tsize, error = E1000_SUCCESS;
3383 int txconf = 0, rxconf = 0;
3384
3385
3386 /* Allocate the TX ring struct memory */
3387 if (!(adapter->tx_rings =
3388 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3389 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3390 device_printf(dev, "Unable to allocate TX ring memory\n");
3391 error = ENOMEM;
3392 goto fail;
3393 }
3394
3395 /* Now allocate the RX */
3396 if (!(adapter->rx_rings =
3397 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3398 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3399 device_printf(dev, "Unable to allocate RX ring memory\n");
3400 error = ENOMEM;
3401 goto rx_fail;
3402 }
3403
3404 tsize = roundup2(adapter->num_tx_desc *
3405 sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3406 /*
3407 * Now set up the TX queues, txconf is needed to handle the
3408 * possibility that things fail midcourse and we need to
3409 * undo memory gracefully
3410 */
3411 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3412 /* Set up some basics */
3413 txr = &adapter->tx_rings[i];
3414 txr->adapter = adapter;
3415 txr->me = i;
3416
3417 /* Initialize the TX lock */
3418 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3419 device_get_nameunit(dev), txr->me);
3420 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3421
3422 if (em_dma_malloc(adapter, tsize,
3423 &txr->txdma, BUS_DMA_NOWAIT)) {
3424 device_printf(dev,
3425 "Unable to allocate TX Descriptor memory\n");
3426 error = ENOMEM;
3427 goto err_tx_desc;
3428 }
3429 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3430 bzero((void *)txr->tx_base, tsize);
3431
3432 if (em_allocate_transmit_buffers(txr)) {
3433 device_printf(dev,
3434 "Critical Failure setting up transmit buffers\n");
3435 error = ENOMEM;
3436 goto err_tx_desc;
3437 }
3438#if __FreeBSD_version >= 800000
3439 /* Allocate a buf ring */
3440 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3441 M_WAITOK, &txr->tx_mtx);
3442#endif
3443 }
3444
3445 /*
3446 * Next the RX queues...
3447 */
3448 rsize = roundup2(adapter->num_rx_desc *
3449 sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3450 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3451 rxr = &adapter->rx_rings[i];
3452 rxr->adapter = adapter;
3453 rxr->me = i;
3454
3455 /* Initialize the RX lock */
3456 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3457 device_get_nameunit(dev), txr->me);
3458 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3459
3460 if (em_dma_malloc(adapter, rsize,
3461 &rxr->rxdma, BUS_DMA_NOWAIT)) {
3462 device_printf(dev,
3463 "Unable to allocate RxDescriptor memory\n");
3464 error = ENOMEM;
3465 goto err_rx_desc;
3466 }
3467 rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3468 bzero((void *)rxr->rx_base, rsize);
3469
3470 /* Allocate receive buffers for the ring*/
3471 if (em_allocate_receive_buffers(rxr)) {
3472 device_printf(dev,
3473 "Critical Failure setting up receive buffers\n");
3474 error = ENOMEM;
3475 goto err_rx_desc;
3476 }
3477 }
3478
3479 return (0);
3480
3481err_rx_desc:
3482 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3483 em_dma_free(adapter, &rxr->rxdma);
3484err_tx_desc:
3485 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3486 em_dma_free(adapter, &txr->txdma);
3487 free(adapter->rx_rings, M_DEVBUF);
3488rx_fail:
3489#if __FreeBSD_version >= 800000
3490 buf_ring_free(txr->br, M_DEVBUF);
3491#endif
3492 free(adapter->tx_rings, M_DEVBUF);
3493fail:
3494 return (error);
3495}
3496
3497
3498/*********************************************************************
3499 *
3500 * Allocate memory for tx_buffer structures. The tx_buffer stores all
3501 * the information needed to transmit a packet on the wire. This is
3502 * called only once at attach, setup is done every reset.
3503 *
3504 **********************************************************************/
3505static int
3506em_allocate_transmit_buffers(struct tx_ring *txr)
3507{
3508 struct adapter *adapter = txr->adapter;
3509 device_t dev = adapter->dev;
3510 struct em_txbuffer *txbuf;
3511 int error, i;
3512
3513 /*
3514 * Setup DMA descriptor areas.
3515 */
3516 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3517 1, 0, /* alignment, bounds */
3518 BUS_SPACE_MAXADDR, /* lowaddr */
3519 BUS_SPACE_MAXADDR, /* highaddr */
3520 NULL, NULL, /* filter, filterarg */
3521 EM_TSO_SIZE, /* maxsize */
3522 EM_MAX_SCATTER, /* nsegments */
3523 PAGE_SIZE, /* maxsegsize */
3524 0, /* flags */
3525 NULL, /* lockfunc */
3526 NULL, /* lockfuncarg */
3527 &txr->txtag))) {
3528 device_printf(dev,"Unable to allocate TX DMA tag\n");
3529 goto fail;
3530 }
3531
3532 if (!(txr->tx_buffers =
3533 (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3534 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3535 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3536 error = ENOMEM;
3537 goto fail;
3538 }
3539
3540 /* Create the descriptor buffer dma maps */
3541 txbuf = txr->tx_buffers;
3542 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3543 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3544 if (error != 0) {
3545 device_printf(dev, "Unable to create TX DMA map\n");
3546 goto fail;
3547 }
3548 }
3549
3550 return 0;
3551fail:
3552 /* We free all, it handles case where we are in the middle */
3553 em_free_transmit_structures(adapter);
3554 return (error);
3555}
3556
3557/*********************************************************************
3558 *
3559 * Initialize a transmit ring.
3560 *
3561 **********************************************************************/
3562static void
3563em_setup_transmit_ring(struct tx_ring *txr)
3564{
3565 struct adapter *adapter = txr->adapter;
3566 struct em_txbuffer *txbuf;
3567 int i;
3568#ifdef DEV_NETMAP
3569 struct netmap_slot *slot;
3570 struct netmap_adapter *na = netmap_getna(adapter->ifp);
3571#endif /* DEV_NETMAP */
3572
3573 /* Clear the old descriptor contents */
3574 EM_TX_LOCK(txr);
3575#ifdef DEV_NETMAP
3576 slot = netmap_reset(na, NR_TX, txr->me, 0);
3577#endif /* DEV_NETMAP */
3578
3579 bzero((void *)txr->tx_base,
3580 (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3581 /* Reset indices */
3582 txr->next_avail_desc = 0;
3583 txr->next_to_clean = 0;
3584
3585 /* Free any existing tx buffers. */
3586 txbuf = txr->tx_buffers;
3587 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3588 if (txbuf->m_head != NULL) {
3589 bus_dmamap_sync(txr->txtag, txbuf->map,
3590 BUS_DMASYNC_POSTWRITE);
3591 bus_dmamap_unload(txr->txtag, txbuf->map);
3592 m_freem(txbuf->m_head);
3593 txbuf->m_head = NULL;
3594 }
3595#ifdef DEV_NETMAP
3596 if (slot) {
3597 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3598 uint64_t paddr;
3599 void *addr;
3600
3601 addr = PNMB(na, slot + si, &paddr);
3602 txr->tx_base[i].buffer_addr = htole64(paddr);
3603 /* reload the map for netmap mode */
3604 netmap_load_map(na, txr->txtag, txbuf->map, addr);
3605 }
3606#endif /* DEV_NETMAP */
3607
3608 /* clear the watch index */
3609 txbuf->next_eop = -1;
3610 }
3611
3612 /* Set number of descriptors available */
3613 txr->tx_avail = adapter->num_tx_desc;
3614 txr->busy = EM_TX_IDLE;
3615
3616 /* Clear checksum offload context. */
3617 txr->last_hw_offload = 0;
3618 txr->last_hw_ipcss = 0;
3619 txr->last_hw_ipcso = 0;
3620 txr->last_hw_tucss = 0;
3621 txr->last_hw_tucso = 0;
3622
3623 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3624 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3625 EM_TX_UNLOCK(txr);
3626}
3627
3628/*********************************************************************
3629 *
3630 * Initialize all transmit rings.
3631 *
3632 **********************************************************************/
3633static void
3634em_setup_transmit_structures(struct adapter *adapter)
3635{
3636 struct tx_ring *txr = adapter->tx_rings;
3637
3638 for (int i = 0; i < adapter->num_queues; i++, txr++)
3639 em_setup_transmit_ring(txr);
3640
3641 return;
3642}
3643
3644/*********************************************************************
3645 *
3646 * Enable transmit unit.
3647 *
3648 **********************************************************************/
3649static void
3650em_initialize_transmit_unit(struct adapter *adapter)
3651{
3652 struct tx_ring *txr = adapter->tx_rings;
3653 struct e1000_hw *hw = &adapter->hw;
3654 u32 tctl, txdctl = 0, tarc, tipg = 0;
3655
3656 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3657
3658 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3659 u64 bus_addr = txr->txdma.dma_paddr;
3660 /* Base and Len of TX Ring */
3661 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3662 adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3663 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3664 (u32)(bus_addr >> 32));
3665 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3666 (u32)bus_addr);
3667 /* Init the HEAD/TAIL indices */
3668 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3669 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3670
3671 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3672 E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3673 E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3674
3675 txr->busy = EM_TX_IDLE;
3676 txdctl = 0; /* clear txdctl */
3677 txdctl |= 0x1f; /* PTHRESH */
3678 txdctl |= 1 << 8; /* HTHRESH */
3679 txdctl |= 1 << 16;/* WTHRESH */
3680 txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3681 txdctl |= E1000_TXDCTL_GRAN;
3682 txdctl |= 1 << 25; /* LWTHRESH */
3683
3684 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3685 }
3686
3687 /* Set the default values for the Tx Inter Packet Gap timer */
3688 switch (adapter->hw.mac.type) {
3689 case e1000_80003es2lan:
3690 tipg = DEFAULT_82543_TIPG_IPGR1;
3691 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3692 E1000_TIPG_IPGR2_SHIFT;
3693 break;
3694 default:
3695 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3696 (adapter->hw.phy.media_type ==
3697 e1000_media_type_internal_serdes))
3698 tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3699 else
3700 tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3701 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3702 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3703 }
3704
3705 E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3706 E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3707
3708 if(adapter->hw.mac.type >= e1000_82540)
3709 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3710 adapter->tx_abs_int_delay.value);
3711
3712 if ((adapter->hw.mac.type == e1000_82571) ||
3713 (adapter->hw.mac.type == e1000_82572)) {
3714 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3715 tarc |= TARC_SPEED_MODE_BIT;
3716 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3717 } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3718 /* errata: program both queues to unweighted RR */
3719 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3720 tarc |= 1;
3721 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3722 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3723 tarc |= 1;
3724 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3725 } else if (adapter->hw.mac.type == e1000_82574) {
3726 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3727 tarc |= TARC_ERRATA_BIT;
3728 if ( adapter->num_queues > 1) {
3729 tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3730 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3731 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3732 } else
3733 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3734 }
3735
3736 adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3737 if (adapter->tx_int_delay.value > 0)
3738 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3739
3740 /* Program the Transmit Control Register */
3741 tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3742 tctl &= ~E1000_TCTL_CT;
3743 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3744 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3745
3746 if (adapter->hw.mac.type >= e1000_82571)
3747 tctl |= E1000_TCTL_MULR;
3748
3749 /* This write will effectively turn on the transmit unit. */
3750 E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3751
3752 if (hw->mac.type == e1000_pch_spt) {
3753 u32 reg;
3754 reg = E1000_READ_REG(hw, E1000_IOSFPC);
3755 reg |= E1000_RCTL_RDMTS_HEX;
3756 E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
3757 reg = E1000_READ_REG(hw, E1000_TARC(0));
3758 reg |= E1000_TARC0_CB_MULTIQ_3_REQ;
3759 E1000_WRITE_REG(hw, E1000_TARC(0), reg);
3760 }
3761}
3762
3763
3764/*********************************************************************
3765 *
3766 * Free all transmit rings.
3767 *
3768 **********************************************************************/
3769static void
3770em_free_transmit_structures(struct adapter *adapter)
3771{
3772 struct tx_ring *txr = adapter->tx_rings;
3773
3774 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3775 EM_TX_LOCK(txr);
3776 em_free_transmit_buffers(txr);
3777 em_dma_free(adapter, &txr->txdma);
3778 EM_TX_UNLOCK(txr);
3779 EM_TX_LOCK_DESTROY(txr);
3780 }
3781
3782 free(adapter->tx_rings, M_DEVBUF);
3783}
3784
3785/*********************************************************************
3786 *
3787 * Free transmit ring related data structures.
3788 *
3789 **********************************************************************/
3790static void
3791em_free_transmit_buffers(struct tx_ring *txr)
3792{
3793 struct adapter *adapter = txr->adapter;
3794 struct em_txbuffer *txbuf;
3795
3796 INIT_DEBUGOUT("free_transmit_ring: begin");
3797
3798 if (txr->tx_buffers == NULL)
3799 return;
3800
3801 for (int i = 0; i < adapter->num_tx_desc; i++) {
3802 txbuf = &txr->tx_buffers[i];
3803 if (txbuf->m_head != NULL) {
3804 bus_dmamap_sync(txr->txtag, txbuf->map,
3805 BUS_DMASYNC_POSTWRITE);
3806 bus_dmamap_unload(txr->txtag,
3807 txbuf->map);
3808 m_freem(txbuf->m_head);
3809 txbuf->m_head = NULL;
3810 if (txbuf->map != NULL) {
3811 bus_dmamap_destroy(txr->txtag,
3812 txbuf->map);
3813 txbuf->map = NULL;
3814 }
3815 } else if (txbuf->map != NULL) {
3816 bus_dmamap_unload(txr->txtag,
3817 txbuf->map);
3818 bus_dmamap_destroy(txr->txtag,
3819 txbuf->map);
3820 txbuf->map = NULL;
3821 }
3822 }
3823#if __FreeBSD_version >= 800000
3824 if (txr->br != NULL)
3825 buf_ring_free(txr->br, M_DEVBUF);
3826#endif
3827 if (txr->tx_buffers != NULL) {
3828 free(txr->tx_buffers, M_DEVBUF);
3829 txr->tx_buffers = NULL;
3830 }
3831 if (txr->txtag != NULL) {
3832 bus_dma_tag_destroy(txr->txtag);
3833 txr->txtag = NULL;
3834 }
3835 return;
3836}
3837
3838
3839/*********************************************************************
3840 * The offload context is protocol specific (TCP/UDP) and thus
3841 * only needs to be set when the protocol changes. The occasion
3842 * of a context change can be a performance detriment, and
3843 * might be better just disabled. The reason arises in the way
3844 * in which the controller supports pipelined requests from the
3845 * Tx data DMA. Up to four requests can be pipelined, and they may
3846 * belong to the same packet or to multiple packets. However all
3847 * requests for one packet are issued before a request is issued
3848 * for a subsequent packet and if a request for the next packet
3849 * requires a context change, that request will be stalled
3850 * until the previous request completes. This means setting up
3851 * a new context effectively disables pipelined Tx data DMA which
3852 * in turn greatly slow down performance to send small sized
3853 * frames.
3854 **********************************************************************/
3855static void
3856em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3857 struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3858{
3859 struct adapter *adapter = txr->adapter;
3860 struct e1000_context_desc *TXD = NULL;
3861 struct em_txbuffer *tx_buffer;
3862 int cur, hdr_len;
3863 u32 cmd = 0;
3864 u16 offload = 0;
3865 u8 ipcso, ipcss, tucso, tucss;
3866
3867 ipcss = ipcso = tucss = tucso = 0;
3868 hdr_len = ip_off + (ip->ip_hl << 2);
3869 cur = txr->next_avail_desc;
3870
3871 /* Setup of IP header checksum. */
3872 if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3873 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3874 offload |= CSUM_IP;
3875 ipcss = ip_off;
3876 ipcso = ip_off + offsetof(struct ip, ip_sum);
3877 /*
3878 * Start offset for header checksum calculation.
3879 * End offset for header checksum calculation.
3880 * Offset of place to put the checksum.
3881 */
3882 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3883 TXD->lower_setup.ip_fields.ipcss = ipcss;
3884 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3885 TXD->lower_setup.ip_fields.ipcso = ipcso;
3886 cmd |= E1000_TXD_CMD_IP;
3887 }
3888
3889 if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3890 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3891 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3892 offload |= CSUM_TCP;
3893 tucss = hdr_len;
3894 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3895 /*
3896 * The 82574L can only remember the *last* context used
3897 * regardless of queue that it was use for. We cannot reuse
3898 * contexts on this hardware platform and must generate a new
3899 * context every time. 82574L hardware spec, section 7.2.6,
3900 * second note.
3901 */
3902 if (adapter->num_queues < 2) {
3903 /*
3904 * Setting up new checksum offload context for every
3905 * frames takes a lot of processing time for hardware.
3906 * This also reduces performance a lot for small sized
3907 * frames so avoid it if driver can use previously
3908 * configured checksum offload context.
3909 */
3910 if (txr->last_hw_offload == offload) {
3911 if (offload & CSUM_IP) {
3912 if (txr->last_hw_ipcss == ipcss &&
3913 txr->last_hw_ipcso == ipcso &&
3914 txr->last_hw_tucss == tucss &&
3915 txr->last_hw_tucso == tucso)
3916 return;
3917 } else {
3918 if (txr->last_hw_tucss == tucss &&
3919 txr->last_hw_tucso == tucso)
3920 return;
3921 }
3922 }
3923 txr->last_hw_offload = offload;
3924 txr->last_hw_tucss = tucss;
3925 txr->last_hw_tucso = tucso;
3926 }
3927 /*
3928 * Start offset for payload checksum calculation.
3929 * End offset for payload checksum calculation.
3930 * Offset of place to put the checksum.
3931 */
3932 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3933 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3934 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3935 TXD->upper_setup.tcp_fields.tucso = tucso;
3936 cmd |= E1000_TXD_CMD_TCP;
3937 } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3938 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3939 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3940 tucss = hdr_len;
3941 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3942 /*
3943 * The 82574L can only remember the *last* context used
3944 * regardless of queue that it was use for. We cannot reuse
3945 * contexts on this hardware platform and must generate a new
3946 * context every time. 82574L hardware spec, section 7.2.6,
3947 * second note.
3948 */
3949 if (adapter->num_queues < 2) {
3950 /*
3951 * Setting up new checksum offload context for every
3952 * frames takes a lot of processing time for hardware.
3953 * This also reduces performance a lot for small sized
3954 * frames so avoid it if driver can use previously
3955 * configured checksum offload context.
3956 */
3957 if (txr->last_hw_offload == offload) {
3958 if (offload & CSUM_IP) {
3959 if (txr->last_hw_ipcss == ipcss &&
3960 txr->last_hw_ipcso == ipcso &&
3961 txr->last_hw_tucss == tucss &&
3962 txr->last_hw_tucso == tucso)
3963 return;
3964 } else {
3965 if (txr->last_hw_tucss == tucss &&
3966 txr->last_hw_tucso == tucso)
3967 return;
3968 }
3969 }
3970 txr->last_hw_offload = offload;
3971 txr->last_hw_tucss = tucss;
3972 txr->last_hw_tucso = tucso;
3973 }
3974 /*
3975 * Start offset for header checksum calculation.
3976 * End offset for header checksum calculation.
3977 * Offset of place to put the checksum.
3978 */
3979 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3980 TXD->upper_setup.tcp_fields.tucss = tucss;
3981 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3982 TXD->upper_setup.tcp_fields.tucso = tucso;
3983 }
3984
3985 if (offload & CSUM_IP) {
3986 txr->last_hw_ipcss = ipcss;
3987 txr->last_hw_ipcso = ipcso;
3988 }
3989
3990 TXD->tcp_seg_setup.data = htole32(0);
3991 TXD->cmd_and_length =
3992 htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3993 tx_buffer = &txr->tx_buffers[cur];
3994 tx_buffer->m_head = NULL;
3995 tx_buffer->next_eop = -1;
3996
3997 if (++cur == adapter->num_tx_desc)
3998 cur = 0;
3999
4000 txr->tx_avail--;
4001 txr->next_avail_desc = cur;
4002}
4003
4004
4005/**********************************************************************
4006 *
4007 * Setup work for hardware segmentation offload (TSO)
4008 *
4009 **********************************************************************/
4010static void
4011em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
4012 struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
4013{
4014 struct adapter *adapter = txr->adapter;
4015 struct e1000_context_desc *TXD;
4016 struct em_txbuffer *tx_buffer;
4017 int cur, hdr_len;
4018
4019 /*
4020 * In theory we can use the same TSO context if and only if
4021 * frame is the same type(IP/TCP) and the same MSS. However
4022 * checking whether a frame has the same IP/TCP structure is
4023 * hard thing so just ignore that and always restablish a
4024 * new TSO context.
4025 */
4026 hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
4027 *txd_lower = (E1000_TXD_CMD_DEXT | /* Extended descr type */
4028 E1000_TXD_DTYP_D | /* Data descr type */
4029 E1000_TXD_CMD_TSE); /* Do TSE on this packet */
4030
4031 /* IP and/or TCP header checksum calculation and insertion. */
4032 *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
4033
4034 cur = txr->next_avail_desc;
4035 tx_buffer = &txr->tx_buffers[cur];
4036 TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
4037
4038 /*
4039 * Start offset for header checksum calculation.
4040 * End offset for header checksum calculation.
4041 * Offset of place put the checksum.
4042 */
4043 TXD->lower_setup.ip_fields.ipcss = ip_off;
4044 TXD->lower_setup.ip_fields.ipcse =
4045 htole16(ip_off + (ip->ip_hl << 2) - 1);
4046 TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
4047 /*
4048 * Start offset for payload checksum calculation.
4049 * End offset for payload checksum calculation.
4050 * Offset of place to put the checksum.
4051 */
4052 TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
4053 TXD->upper_setup.tcp_fields.tucse = 0;
4054 TXD->upper_setup.tcp_fields.tucso =
4055 ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
4056 /*
4057 * Payload size per packet w/o any headers.
4058 * Length of all headers up to payload.
4059 */
4060 TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
4061 TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
4062
4063 TXD->cmd_and_length = htole32(adapter->txd_cmd |
4064 E1000_TXD_CMD_DEXT | /* Extended descr */
4065 E1000_TXD_CMD_TSE | /* TSE context */
4066 E1000_TXD_CMD_IP | /* Do IP csum */
4067 E1000_TXD_CMD_TCP | /* Do TCP checksum */
4068 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
4069
4070 tx_buffer->m_head = NULL;
4071 tx_buffer->next_eop = -1;
4072
4073 if (++cur == adapter->num_tx_desc)
4074 cur = 0;
4075
4076 txr->tx_avail--;
4077 txr->next_avail_desc = cur;
4078 txr->tx_tso = TRUE;
4079}
4080
4081
4082/**********************************************************************
4083 *
4084 * Examine each tx_buffer in the used queue. If the hardware is done
4085 * processing the packet then free associated resources. The
4086 * tx_buffer is put back on the free queue.
4087 *
4088 **********************************************************************/
4089static void
4090em_txeof(struct tx_ring *txr)
4091{
4092 struct adapter *adapter = txr->adapter;
4093 int first, last, done, processed;
4094 struct em_txbuffer *tx_buffer;
4095 struct e1000_tx_desc *tx_desc, *eop_desc;
4096 if_t ifp = adapter->ifp;
4097
4098 EM_TX_LOCK_ASSERT(txr);
4099#ifdef DEV_NETMAP
4100 if (netmap_tx_irq(ifp, txr->me))
4101 return;
4102#endif /* DEV_NETMAP */
4103
4104 /* No work, make sure hang detection is disabled */
4105 if (txr->tx_avail == adapter->num_tx_desc) {
4106 txr->busy = EM_TX_IDLE;
4107 return;
4108 }
4109
4110 processed = 0;
4111 first = txr->next_to_clean;
4112 tx_desc = &txr->tx_base[first];
4113 tx_buffer = &txr->tx_buffers[first];
4114 last = tx_buffer->next_eop;
4115 eop_desc = &txr->tx_base[last];
4116
4117 /*
4118 * What this does is get the index of the
4119 * first descriptor AFTER the EOP of the
4120 * first packet, that way we can do the
4121 * simple comparison on the inner while loop.
4122 */
4123 if (++last == adapter->num_tx_desc)
4124 last = 0;
4125 done = last;
4126
4127 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4128 BUS_DMASYNC_POSTREAD);
4129
4130 while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
4131 /* We clean the range of the packet */
4132 while (first != done) {
4133 tx_desc->upper.data = 0;
4134 tx_desc->lower.data = 0;
4135 tx_desc->buffer_addr = 0;
4136 ++txr->tx_avail;
4137 ++processed;
4138
4139 if (tx_buffer->m_head) {
4140 bus_dmamap_sync(txr->txtag,
4141 tx_buffer->map,
4142 BUS_DMASYNC_POSTWRITE);
4143 bus_dmamap_unload(txr->txtag,
4144 tx_buffer->map);
4145 m_freem(tx_buffer->m_head);
4146 tx_buffer->m_head = NULL;
4147 }
4148 tx_buffer->next_eop = -1;
4149
4150 if (++first == adapter->num_tx_desc)
4151 first = 0;
4152
4153 tx_buffer = &txr->tx_buffers[first];
4154 tx_desc = &txr->tx_base[first];
4155 }
4156 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
4157 /* See if we can continue to the next packet */
4158 last = tx_buffer->next_eop;
4159 if (last != -1) {
4160 eop_desc = &txr->tx_base[last];
4161 /* Get new done point */
4162 if (++last == adapter->num_tx_desc) last = 0;
4163 done = last;
4164 } else
4165 break;
4166 }
4167 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4168 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4169
4170 txr->next_to_clean = first;
4171
4172 /*
4173 ** Hang detection: we know there's work outstanding
4174 ** or the entry return would have been taken, so no
4175 ** descriptor processed here indicates a potential hang.
4176 ** The local timer will examine this and do a reset if needed.
4177 */
4178 if (processed == 0) {
4179 if (txr->busy != EM_TX_HUNG)
4180 ++txr->busy;
4181 } else /* At least one descriptor was cleaned */
4182 txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4183
4184 /*
4185 * If we have a minimum free, clear IFF_DRV_OACTIVE
4186 * to tell the stack that it is OK to send packets.
4187 * Notice that all writes of OACTIVE happen under the
4188 * TX lock which, with a single queue, guarantees
4189 * sanity.
4190 */
4191 if (txr->tx_avail >= EM_MAX_SCATTER) {
4192 if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
4193 }
4194
4195 /* Disable hang detection if all clean */
4196 if (txr->tx_avail == adapter->num_tx_desc)
4197 txr->busy = EM_TX_IDLE;
4198}
4199
4200/*********************************************************************
4201 *
4202 * Refresh RX descriptor mbufs from system mbuf buffer pool.
4203 *
4204 **********************************************************************/
4205static void
4206em_refresh_mbufs(struct rx_ring *rxr, int limit)
4207{
4208 struct adapter *adapter = rxr->adapter;
4209 struct mbuf *m;
4210 bus_dma_segment_t segs;
4211 struct em_rxbuffer *rxbuf;
4212 int i, j, error, nsegs;
4213 bool cleaned = FALSE;
4214
4215 i = j = rxr->next_to_refresh;
4216 /*
4217 ** Get one descriptor beyond
4218 ** our work mark to control
4219 ** the loop.
4220 */
4221 if (++j == adapter->num_rx_desc)
4222 j = 0;
4223
4224 while (j != limit) {
4225 rxbuf = &rxr->rx_buffers[i];
4226 if (rxbuf->m_head == NULL) {
4227 m = m_getjcl(M_NOWAIT, MT_DATA,
4228 M_PKTHDR, adapter->rx_mbuf_sz);
4229 /*
4230 ** If we have a temporary resource shortage
4231 ** that causes a failure, just abort refresh
4232 ** for now, we will return to this point when
4233 ** reinvoked from em_rxeof.
4234 */
4235 if (m == NULL)
4236 goto update;
4237 } else
4238 m = rxbuf->m_head;
4239
4240 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4241 m->m_flags |= M_PKTHDR;
4242 m->m_data = m->m_ext.ext_buf;
4243
4244 /* Use bus_dma machinery to setup the memory mapping */
4245 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4246 m, &segs, &nsegs, BUS_DMA_NOWAIT);
4247 if (error != 0) {
4248 printf("Refresh mbufs: hdr dmamap load"
4249 " failure - %d\n", error);
4250 m_free(m);
4251 rxbuf->m_head = NULL;
4252 goto update;
4253 }
4254 rxbuf->m_head = m;
4255 rxbuf->paddr = segs.ds_addr;
4256 bus_dmamap_sync(rxr->rxtag,
4257 rxbuf->map, BUS_DMASYNC_PREREAD);
4258 em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4259 cleaned = TRUE;
4260
4261 i = j; /* Next is precalulated for us */
4262 rxr->next_to_refresh = i;
4263 /* Calculate next controlling index */
4264 if (++j == adapter->num_rx_desc)
4265 j = 0;
4266 }
4267update:
4268 /*
4269 ** Update the tail pointer only if,
4270 ** and as far as we have refreshed.
4271 */
4272 if (cleaned)
4273 E1000_WRITE_REG(&adapter->hw,
4274 E1000_RDT(rxr->me), rxr->next_to_refresh);
4275
4276 return;
4277}
4278
4279
4280/*********************************************************************
4281 *
4282 * Allocate memory for rx_buffer structures. Since we use one
4283 * rx_buffer per received packet, the maximum number of rx_buffer's
4284 * that we'll need is equal to the number of receive descriptors
4285 * that we've allocated.
4286 *
4287 **********************************************************************/
4288static int
4289em_allocate_receive_buffers(struct rx_ring *rxr)
4290{
4291 struct adapter *adapter = rxr->adapter;
4292 device_t dev = adapter->dev;
4293 struct em_rxbuffer *rxbuf;
4294 int error;
4295
4296 rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4297 adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4298 if (rxr->rx_buffers == NULL) {
4299 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4300 return (ENOMEM);
4301 }
4302
4303 error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4304 1, 0, /* alignment, bounds */
4305 BUS_SPACE_MAXADDR, /* lowaddr */
4306 BUS_SPACE_MAXADDR, /* highaddr */
4307 NULL, NULL, /* filter, filterarg */
4308 MJUM9BYTES, /* maxsize */
4309 1, /* nsegments */
4310 MJUM9BYTES, /* maxsegsize */
4311 0, /* flags */
4312 NULL, /* lockfunc */
4313 NULL, /* lockarg */
4314 &rxr->rxtag);
4315 if (error) {
4316 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4317 __func__, error);
4318 goto fail;
4319 }
4320
4321 rxbuf = rxr->rx_buffers;
4322 for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4323 rxbuf = &rxr->rx_buffers[i];
4324 error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4325 if (error) {
4326 device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4327 __func__, error);
4328 goto fail;
4329 }
4330 }
4331
4332 return (0);
4333
4334fail:
4335 em_free_receive_structures(adapter);
4336 return (error);
4337}
4338
4339
4340/*********************************************************************
4341 *
4342 * Initialize a receive ring and its buffers.
4343 *
4344 **********************************************************************/
4345static int
4346em_setup_receive_ring(struct rx_ring *rxr)
4347{
4348 struct adapter *adapter = rxr->adapter;
4349 struct em_rxbuffer *rxbuf;
4350 bus_dma_segment_t seg[1];
4351 int rsize, nsegs, error = 0;
4352#ifdef DEV_NETMAP
4353 struct netmap_slot *slot;
4354 struct netmap_adapter *na = netmap_getna(adapter->ifp);
4355#endif
4356
4357
4358 /* Clear the ring contents */
4359 EM_RX_LOCK(rxr);
4360 rsize = roundup2(adapter->num_rx_desc *
4361 sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4362 bzero((void *)rxr->rx_base, rsize);
4363#ifdef DEV_NETMAP
4364 slot = netmap_reset(na, NR_RX, rxr->me, 0);
4365#endif
4366
4367 /*
4368 ** Free current RX buffer structs and their mbufs
4369 */
4370 for (int i = 0; i < adapter->num_rx_desc; i++) {
4371 rxbuf = &rxr->rx_buffers[i];
4372 if (rxbuf->m_head != NULL) {
4373 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4374 BUS_DMASYNC_POSTREAD);
4375 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4376 m_freem(rxbuf->m_head);
4377 rxbuf->m_head = NULL; /* mark as freed */
4378 }
4379 }
4380
4381 /* Now replenish the mbufs */
4382 for (int j = 0; j != adapter->num_rx_desc; ++j) {
4383 rxbuf = &rxr->rx_buffers[j];
4384#ifdef DEV_NETMAP
4385 if (slot) {
4386 int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4387 uint64_t paddr;
4388 void *addr;
4389
4390 addr = PNMB(na, slot + si, &paddr);
4391 netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4392 em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4393 continue;
4394 }
4395#endif /* DEV_NETMAP */
4396 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4397 M_PKTHDR, adapter->rx_mbuf_sz);
4398 if (rxbuf->m_head == NULL) {
4399 error = ENOBUFS;
4400 goto fail;
4401 }
4402 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4403 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4404 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4405
4406 /* Get the memory mapping */
4407 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4408 rxbuf->map, rxbuf->m_head, seg,
4409 &nsegs, BUS_DMA_NOWAIT);
4410 if (error != 0) {
4411 m_freem(rxbuf->m_head);
4412 rxbuf->m_head = NULL;
4413 goto fail;
4414 }
4415 bus_dmamap_sync(rxr->rxtag,
4416 rxbuf->map, BUS_DMASYNC_PREREAD);
4417
4418 rxbuf->paddr = seg[0].ds_addr;
4419 em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4420 }
4421 rxr->next_to_check = 0;
4422 rxr->next_to_refresh = 0;
4423 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4424 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4425
4426fail:
4427 EM_RX_UNLOCK(rxr);
4428 return (error);
4429}
4430
4431/*********************************************************************
4432 *
4433 * Initialize all receive rings.
4434 *
4435 **********************************************************************/
4436static int
4437em_setup_receive_structures(struct adapter *adapter)
4438{
4439 struct rx_ring *rxr = adapter->rx_rings;
4440 int q;
4441
4442 for (q = 0; q < adapter->num_queues; q++, rxr++)
4443 if (em_setup_receive_ring(rxr))
4444 goto fail;
4445
4446 return (0);
4447fail:
4448 /*
4449 * Free RX buffers allocated so far, we will only handle
4450 * the rings that completed, the failing case will have
4451 * cleaned up for itself. 'q' failed, so its the terminus.
4452 */
4453 for (int i = 0; i < q; ++i) {
4454 rxr = &adapter->rx_rings[i];
4455 for (int n = 0; n < adapter->num_rx_desc; n++) {
4456 struct em_rxbuffer *rxbuf;
4457 rxbuf = &rxr->rx_buffers[n];
4458 if (rxbuf->m_head != NULL) {
4459 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4460 BUS_DMASYNC_POSTREAD);
4461 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4462 m_freem(rxbuf->m_head);
4463 rxbuf->m_head = NULL;
4464 }
4465 }
4466 rxr->next_to_check = 0;
4467 rxr->next_to_refresh = 0;
4468 }
4469
4470 return (ENOBUFS);
4471}
4472
4473/*********************************************************************
4474 *
4475 * Free all receive rings.
4476 *
4477 **********************************************************************/
4478static void
4479em_free_receive_structures(struct adapter *adapter)
4480{
4481 struct rx_ring *rxr = adapter->rx_rings;
4482
4483 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4484 em_free_receive_buffers(rxr);
4485 /* Free the ring memory as well */
4486 em_dma_free(adapter, &rxr->rxdma);
4487 EM_RX_LOCK_DESTROY(rxr);
4488 }
4489
4490 free(adapter->rx_rings, M_DEVBUF);
4491}
4492
4493
4494/*********************************************************************
4495 *
4496 * Free receive ring data structures
4497 *
4498 **********************************************************************/
4499static void
4500em_free_receive_buffers(struct rx_ring *rxr)
4501{
4502 struct adapter *adapter = rxr->adapter;
4503 struct em_rxbuffer *rxbuf = NULL;
4504
4505 INIT_DEBUGOUT("free_receive_buffers: begin");
4506
4507 if (rxr->rx_buffers != NULL) {
4508 for (int i = 0; i < adapter->num_rx_desc; i++) {
4509 rxbuf = &rxr->rx_buffers[i];
4510 if (rxbuf->map != NULL) {
4511 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4512 BUS_DMASYNC_POSTREAD);
4513 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4514 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4515 }
4516 if (rxbuf->m_head != NULL) {
4517 m_freem(rxbuf->m_head);
4518 rxbuf->m_head = NULL;
4519 }
4520 }
4521 free(rxr->rx_buffers, M_DEVBUF);
4522 rxr->rx_buffers = NULL;
4523 rxr->next_to_check = 0;
4524 rxr->next_to_refresh = 0;
4525 }
4526
4527 if (rxr->rxtag != NULL) {
4528 bus_dma_tag_destroy(rxr->rxtag);
4529 rxr->rxtag = NULL;
4530 }
4531
4532 return;
4533}
4534
4535
4536/*********************************************************************
4537 *
4538 * Enable receive unit.
4539 *
4540 **********************************************************************/
4541
4542static void
4543em_initialize_receive_unit(struct adapter *adapter)
4544{
4545 struct rx_ring *rxr = adapter->rx_rings;
4546 if_t ifp = adapter->ifp;
4547 struct e1000_hw *hw = &adapter->hw;
4548 u32 rctl, rxcsum, rfctl;
4549
4550 INIT_DEBUGOUT("em_initialize_receive_units: begin");
4551
4552 /*
4553 * Make sure receives are disabled while setting
4554 * up the descriptor ring
4555 */
4556 rctl = E1000_READ_REG(hw, E1000_RCTL);
4557 /* Do not disable if ever enabled on this hardware */
4558 if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4559 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4560
4561 /* Setup the Receive Control Register */
4562 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4563 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4564 E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4565 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4566
4567 /* Do not store bad packets */
4568 rctl &= ~E1000_RCTL_SBP;
4569
4570 /* Enable Long Packet receive */
4571 if (if_getmtu(ifp) > ETHERMTU)
4572 rctl |= E1000_RCTL_LPE;
4573 else
4574 rctl &= ~E1000_RCTL_LPE;
4575
4576 /* Strip the CRC */
4577 if (!em_disable_crc_stripping)
4578 rctl |= E1000_RCTL_SECRC;
4579
4580 E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4581 adapter->rx_abs_int_delay.value);
4582
4583 E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4584 adapter->rx_int_delay.value);
4585 /*
4586 * Set the interrupt throttling rate. Value is calculated
4587 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4588 */
4589 E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4590
4591 /* Use extended rx descriptor formats */
4592 rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4593 rfctl |= E1000_RFCTL_EXTEN;
4594 /*
4595 ** When using MSIX interrupts we need to throttle
4596 ** using the EITR register (82574 only)
4597 */
4598 if (hw->mac.type == e1000_82574) {
4599 for (int i = 0; i < 4; i++)
4600 E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4601 DEFAULT_ITR);
4602 /* Disable accelerated acknowledge */
4603 rfctl |= E1000_RFCTL_ACK_DIS;
4604 }
4605 E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4606
4607 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4608 if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
4609#ifdef EM_MULTIQUEUE
4610 rxcsum |= E1000_RXCSUM_TUOFL |
4611 E1000_RXCSUM_IPOFL |
4612 E1000_RXCSUM_PCSD;
4613#else
4614 rxcsum |= E1000_RXCSUM_TUOFL;
4615#endif
4616 } else
4617 rxcsum &= ~E1000_RXCSUM_TUOFL;
4618
4619 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4620
4621#ifdef EM_MULTIQUEUE
4622#define RSSKEYLEN 10
4623 if (adapter->num_queues > 1) {
4624 uint8_t rss_key[4 * RSSKEYLEN];
4625 uint32_t reta = 0;
4626 int i;
4627
4628 /*
4629 * Configure RSS key
4630 */
4631 arc4rand(rss_key, sizeof(rss_key), 0);
4632 for (i = 0; i < RSSKEYLEN; ++i) {
4633 uint32_t rssrk = 0;
4634
4635 rssrk = EM_RSSRK_VAL(rss_key, i);
4636 E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4637 }
4638
4639 /*
4640 * Configure RSS redirect table in following fashion:
4641 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4642 */
4643 for (i = 0; i < sizeof(reta); ++i) {
4644 uint32_t q;
4645
4646 q = (i % adapter->num_queues) << 7;
4647 reta |= q << (8 * i);
4648 }
4649
4650 for (i = 0; i < 32; ++i) {
4651 E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4652 }
4653
4654 E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
4655 E1000_MRQC_RSS_FIELD_IPV4_TCP |
4656 E1000_MRQC_RSS_FIELD_IPV4 |
4657 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4658 E1000_MRQC_RSS_FIELD_IPV6_EX |
4659 E1000_MRQC_RSS_FIELD_IPV6);
4660 }
4661#endif
4662 /*
4663 ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4664 ** long latencies are observed, like Lenovo X60. This
4665 ** change eliminates the problem, but since having positive
4666 ** values in RDTR is a known source of problems on other
4667 ** platforms another solution is being sought.
4668 */
4669 if (hw->mac.type == e1000_82573)
4670 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4671
4672 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4673 /* Setup the Base and Length of the Rx Descriptor Ring */
4674 u64 bus_addr = rxr->rxdma.dma_paddr;
4675 u32 rdt = adapter->num_rx_desc - 1; /* default */
4676
4677 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4678 adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4679 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4680 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4681 /* Setup the Head and Tail Descriptor Pointers */
4682 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4683#ifdef DEV_NETMAP
4684 /*
4685 * an init() while a netmap client is active must
4686 * preserve the rx buffers passed to userspace.
4687 */
4688 if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4689 struct netmap_adapter *na = netmap_getna(adapter->ifp);
4690 rdt -= nm_kr_rxspace(&na->rx_rings[i]);
4691 }
4692#endif /* DEV_NETMAP */
4693 E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4694 }
4695
4696 /*
4697 * Set PTHRESH for improved jumbo performance
4698 * According to 10.2.5.11 of Intel 82574 Datasheet,
4699 * RXDCTL(1) is written whenever RXDCTL(0) is written.
4700 * Only write to RXDCTL(1) if there is a need for different
4701 * settings.
4702 */
4703 if (((adapter->hw.mac.type == e1000_ich9lan) ||
4704 (adapter->hw.mac.type == e1000_pch2lan) ||
4705 (adapter->hw.mac.type == e1000_ich10lan)) &&
4706 (if_getmtu(ifp) > ETHERMTU)) {
4707 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4708 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4709 } else if (adapter->hw.mac.type == e1000_82574) {
4710 for (int i = 0; i < adapter->num_queues; i++) {
4711 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4712
4713 rxdctl |= 0x20; /* PTHRESH */
4714 rxdctl |= 4 << 8; /* HTHRESH */
4715 rxdctl |= 4 << 16;/* WTHRESH */
4716 rxdctl |= 1 << 24; /* Switch to granularity */
4717 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4718 }
4719 }
4720
4721 if (adapter->hw.mac.type >= e1000_pch2lan) {
4722 if (if_getmtu(ifp) > ETHERMTU)
4723 e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4724 else
4725 e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4726 }
4727
4728 /* Make sure VLAN Filters are off */
4729 rctl &= ~E1000_RCTL_VFE;
4730
4731 if (adapter->rx_mbuf_sz == MCLBYTES)
4732 rctl |= E1000_RCTL_SZ_2048;
4733 else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4734 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4735 else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4736 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4737
4738 /* ensure we clear use DTYPE of 00 here */
4739 rctl &= ~0x00000C00;
4740 /* Write out the settings */
4741 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4742
4743 return;
4744}
4745
4746
4747/*********************************************************************
4748 *
4749 * This routine executes in interrupt context. It replenishes
4750 * the mbufs in the descriptor and sends data which has been
4751 * dma'ed into host memory to upper layer.
4752 *
4753 * We loop at most count times if count is > 0, or until done if
4754 * count < 0.
4755 *
4756 * For polling we also now return the number of cleaned packets
4757 *********************************************************************/
4758static bool
4759em_rxeof(struct rx_ring *rxr, int count, int *done)
4760{
4761 struct adapter *adapter = rxr->adapter;
4762 if_t ifp = adapter->ifp;
4763 struct mbuf *mp, *sendmp;
4764 u32 status = 0;
4765 u16 len;
4766 int i, processed, rxdone = 0;
4767 bool eop;
4768 union e1000_rx_desc_extended *cur;
4769
4770 EM_RX_LOCK(rxr);
4771
4772 /* Sync the ring */
4773 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4774 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4775
4776
4777#ifdef DEV_NETMAP
4778 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4779 EM_RX_UNLOCK(rxr);
4780 return (FALSE);
4781 }
4782#endif /* DEV_NETMAP */
4783
4784 for (i = rxr->next_to_check, processed = 0; count != 0;) {
4785 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4786 break;
4787
4788 cur = &rxr->rx_base[i];
4789 status = le32toh(cur->wb.upper.status_error);
4790 mp = sendmp = NULL;
4791
4792 if ((status & E1000_RXD_STAT_DD) == 0)
4793 break;
4794
4795 len = le16toh(cur->wb.upper.length);
4796 eop = (status & E1000_RXD_STAT_EOP) != 0;
4797
4798 if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4799 (rxr->discard == TRUE)) {
4800 adapter->dropped_pkts++;
4801 ++rxr->rx_discarded;
4802 if (!eop) /* Catch subsequent segs */
4803 rxr->discard = TRUE;
4804 else
4805 rxr->discard = FALSE;
4806 em_rx_discard(rxr, i);
4807 goto next_desc;
4808 }
4809 bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4810
4811 /* Assign correct length to the current fragment */
4812 mp = rxr->rx_buffers[i].m_head;
4813 mp->m_len = len;
4814
4815 /* Trigger for refresh */
4816 rxr->rx_buffers[i].m_head = NULL;
4817
4818 /* First segment? */
4819 if (rxr->fmp == NULL) {
4820 mp->m_pkthdr.len = len;
4821 rxr->fmp = rxr->lmp = mp;
4822 } else {
4823 /* Chain mbuf's together */
4824 mp->m_flags &= ~M_PKTHDR;
4825 rxr->lmp->m_next = mp;
4826 rxr->lmp = mp;
4827 rxr->fmp->m_pkthdr.len += len;
4828 }
4829
4830 if (eop) {
4831 --count;
4832 sendmp = rxr->fmp;
4833 if_setrcvif(sendmp, ifp);
4834 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
4835 em_receive_checksum(status, sendmp);
4836#ifndef __NO_STRICT_ALIGNMENT
4837 if (adapter->hw.mac.max_frame_size >
4838 (MCLBYTES - ETHER_ALIGN) &&
4839 em_fixup_rx(rxr) != 0)
4840 goto skip;
4841#endif
4842 if (status & E1000_RXD_STAT_VP) {
4843 if_setvtag(sendmp,
4844 le16toh(cur->wb.upper.vlan));
4845 sendmp->m_flags |= M_VLANTAG;
4846 }
4847#ifndef __NO_STRICT_ALIGNMENT
4848skip:
4849#endif
4850 rxr->fmp = rxr->lmp = NULL;
4851 }
4852next_desc:
4853 /* Sync the ring */
4854 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4855 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4856
4857 /* Zero out the receive descriptors status. */
4858 cur->wb.upper.status_error &= htole32(~0xFF);
4859 ++rxdone; /* cumulative for POLL */
4860 ++processed;
4861
4862 /* Advance our pointers to the next descriptor. */
4863 if (++i == adapter->num_rx_desc)
4864 i = 0;
4865
4866 /* Send to the stack */
4867 if (sendmp != NULL) {
4868 rxr->next_to_check = i;
4869 EM_RX_UNLOCK(rxr);
4870 if_input(ifp, sendmp);
4871 EM_RX_LOCK(rxr);
4872 i = rxr->next_to_check;
4873 }
4874
4875 /* Only refresh mbufs every 8 descriptors */
4876 if (processed == 8) {
4877 em_refresh_mbufs(rxr, i);
4878 processed = 0;
4879 }
4880 }
4881
4882 /* Catch any remaining refresh work */
4883 if (e1000_rx_unrefreshed(rxr))
4884 em_refresh_mbufs(rxr, i);
4885
4886 rxr->next_to_check = i;
4887 if (done != NULL)
4888 *done = rxdone;
4889 EM_RX_UNLOCK(rxr);
4890
4891 return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4892}
4893
4894static __inline void
4895em_rx_discard(struct rx_ring *rxr, int i)
4896{
4897 struct em_rxbuffer *rbuf;
4898
4899 rbuf = &rxr->rx_buffers[i];
4900 bus_dmamap_unload(rxr->rxtag, rbuf->map);
4901
4902 /* Free any previous pieces */
4903 if (rxr->fmp != NULL) {
4904 rxr->fmp->m_flags |= M_PKTHDR;
4905 m_freem(rxr->fmp);
4906 rxr->fmp = NULL;
4907 rxr->lmp = NULL;
4908 }
4909 /*
4910 ** Free buffer and allow em_refresh_mbufs()
4911 ** to clean up and recharge buffer.
4912 */
4913 if (rbuf->m_head) {
4914 m_free(rbuf->m_head);
4915 rbuf->m_head = NULL;
4916 }
4917 return;
4918}
4919
4920#ifndef __NO_STRICT_ALIGNMENT
4921/*
4922 * When jumbo frames are enabled we should realign entire payload on
4923 * architecures with strict alignment. This is serious design mistake of 8254x
4924 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4925 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4926 * payload. On architecures without strict alignment restrictions 8254x still
4927 * performs unaligned memory access which would reduce the performance too.
4928 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4929 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4930 * existing mbuf chain.
4931 *
4932 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4933 * not used at all on architectures with strict alignment.
4934 */
4935static int
4936em_fixup_rx(struct rx_ring *rxr)
4937{
4938 struct adapter *adapter = rxr->adapter;
4939 struct mbuf *m, *n;
4940 int error;
4941
4942 error = 0;
4943 m = rxr->fmp;
4944 if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4945 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4946 m->m_data += ETHER_HDR_LEN;
4947 } else {
4948 MGETHDR(n, M_NOWAIT, MT_DATA);
4949 if (n != NULL) {
4950 bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4951 m->m_data += ETHER_HDR_LEN;
4952 m->m_len -= ETHER_HDR_LEN;
4953 n->m_len = ETHER_HDR_LEN;
4954 M_MOVE_PKTHDR(n, m);
4955 n->m_next = m;
4956 rxr->fmp = n;
4957 } else {
4958 adapter->dropped_pkts++;
4959 m_freem(rxr->fmp);
4960 rxr->fmp = NULL;
4961 error = ENOMEM;
4962 }
4963 }
4964
4965 return (error);
4966}
4967#endif
4968
4969static void
4970em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
4971{
4972 rxd->read.buffer_addr = htole64(rxbuf->paddr);
4973 /* DD bits must be cleared */
4974 rxd->wb.upper.status_error= 0;
4975}
4976
4977/*********************************************************************
4978 *
4979 * Verify that the hardware indicated that the checksum is valid.
4980 * Inform the stack about the status of checksum so that stack
4981 * doesn't spend time verifying the checksum.
4982 *
4983 *********************************************************************/
4984static void
4985em_receive_checksum(uint32_t status, struct mbuf *mp)
4986{
4987 mp->m_pkthdr.csum_flags = 0;
4988
4989 /* Ignore Checksum bit is set */
4990 if (status & E1000_RXD_STAT_IXSM)
4991 return;
4992
4993 /* If the IP checksum exists and there is no IP Checksum error */
4994 if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
4995 E1000_RXD_STAT_IPCS) {
4996 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4997 }
4998
4999 /* TCP or UDP checksum */
5000 if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
5001 E1000_RXD_STAT_TCPCS) {
5002 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5003 mp->m_pkthdr.csum_data = htons(0xffff);
5004 }
5005 if (status & E1000_RXD_STAT_UDPCS) {
5006 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5007 mp->m_pkthdr.csum_data = htons(0xffff);
5008 }
5009}
5010
5011/*
5012 * This routine is run via an vlan
5013 * config EVENT
5014 */
5015static void
5016em_register_vlan(void *arg, if_t ifp, u16 vtag)
5017{
5018 struct adapter *adapter = if_getsoftc(ifp);
5019 u32 index, bit;
5020
5021 if ((void*)adapter != arg) /* Not our event */
5022 return;
5023
5024 if ((vtag == 0) || (vtag > 4095)) /* Invalid ID */
5025 return;
5026
5027 EM_CORE_LOCK(adapter);
5028 index = (vtag >> 5) & 0x7F;
5029 bit = vtag & 0x1F;
5030 adapter->shadow_vfta[index] |= (1 << bit);
5031 ++adapter->num_vlans;
5032 /* Re-init to load the changes */
5033 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5034 em_init_locked(adapter);
5035 EM_CORE_UNLOCK(adapter);
5036}
5037
5038/*
5039 * This routine is run via an vlan
5040 * unconfig EVENT
5041 */
5042static void
5043em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
5044{
5045 struct adapter *adapter = if_getsoftc(ifp);
5046 u32 index, bit;
5047
5048 if (adapter != arg)
5049 return;
5050
5051 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
5052 return;
5053
5054 EM_CORE_LOCK(adapter);
5055 index = (vtag >> 5) & 0x7F;
5056 bit = vtag & 0x1F;
5057 adapter->shadow_vfta[index] &= ~(1 << bit);
5058 --adapter->num_vlans;
5059 /* Re-init to load the changes */
5060 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5061 em_init_locked(adapter);
5062 EM_CORE_UNLOCK(adapter);
5063}
5064
5065static void
5066em_setup_vlan_hw_support(struct adapter *adapter)
5067{
5068 struct e1000_hw *hw = &adapter->hw;
5069 u32 reg;
5070
5071 /*
5072 ** We get here thru init_locked, meaning
5073 ** a soft reset, this has already cleared
5074 ** the VFTA and other state, so if there
5075 ** have been no vlan's registered do nothing.
5076 */
5077 if (adapter->num_vlans == 0)
5078 return;
5079
5080 /*
5081 ** A soft reset zero's out the VFTA, so
5082 ** we need to repopulate it now.
5083 */
5084 for (int i = 0; i < EM_VFTA_SIZE; i++)
5085 if (adapter->shadow_vfta[i] != 0)
5086 E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
5087 i, adapter->shadow_vfta[i]);
5088
5089 reg = E1000_READ_REG(hw, E1000_CTRL);
5090 reg |= E1000_CTRL_VME;
5091 E1000_WRITE_REG(hw, E1000_CTRL, reg);
5092
5093 /* Enable the Filter Table */
5094 reg = E1000_READ_REG(hw, E1000_RCTL);
5095 reg &= ~E1000_RCTL_CFIEN;
5096 reg |= E1000_RCTL_VFE;
5097 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5098}
5099
5100static void
5101em_enable_intr(struct adapter *adapter)
5102{
5103 struct e1000_hw *hw = &adapter->hw;
5104 u32 ims_mask = IMS_ENABLE_MASK;
5105
5106 if (hw->mac.type == e1000_82574) {
5107 E1000_WRITE_REG(hw, EM_EIAC, adapter->ims);
5108 ims_mask |= adapter->ims;
5109 }
5110 E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
5111}
5112
5113static void
5114em_disable_intr(struct adapter *adapter)
5115{
5116 struct e1000_hw *hw = &adapter->hw;
5117
5118 if (hw->mac.type == e1000_82574)
5119 E1000_WRITE_REG(hw, EM_EIAC, 0);
5120 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
5121}
5122
5123/*
5124 * Bit of a misnomer, what this really means is
5125 * to enable OS management of the system... aka
5126 * to disable special hardware management features
5127 */
5128static void
5129em_init_manageability(struct adapter *adapter)
5130{
5131 /* A shared code workaround */
5132#define E1000_82542_MANC2H E1000_MANC2H
5133 if (adapter->has_manage) {
5134 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5135 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5136
5137 /* disable hardware interception of ARP */
5138 manc &= ~(E1000_MANC_ARP_EN);
5139
5140 /* enable receiving management packets to the host */
5141 manc |= E1000_MANC_EN_MNG2HOST;
5142#define E1000_MNG2HOST_PORT_623 (1 << 5)
5143#define E1000_MNG2HOST_PORT_664 (1 << 6)
5144 manc2h |= E1000_MNG2HOST_PORT_623;
5145 manc2h |= E1000_MNG2HOST_PORT_664;
5146 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5147 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5148 }
5149}
5150
5151/*
5152 * Give control back to hardware management
5153 * controller if there is one.
5154 */
5155static void
5156em_release_manageability(struct adapter *adapter)
5157{
5158 if (adapter->has_manage) {
5159 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5160
5161 /* re-enable hardware interception of ARP */
5162 manc |= E1000_MANC_ARP_EN;
5163 manc &= ~E1000_MANC_EN_MNG2HOST;
5164
5165 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5166 }
5167}
5168
5169/*
5170 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5171 * For ASF and Pass Through versions of f/w this means
5172 * that the driver is loaded. For AMT version type f/w
5173 * this means that the network i/f is open.
5174 */
5175static void
5176em_get_hw_control(struct adapter *adapter)
5177{
5178 u32 ctrl_ext, swsm;
5179
5180 if (adapter->hw.mac.type == e1000_82573) {
5181 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5182 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5183 swsm | E1000_SWSM_DRV_LOAD);
5184 return;
5185 }
5186 /* else */
5187 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5188 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5189 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5190 return;
5191}
5192
5193/*
5194 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5195 * For ASF and Pass Through versions of f/w this means that
5196 * the driver is no longer loaded. For AMT versions of the
5197 * f/w this means that the network i/f is closed.
5198 */
5199static void
5200em_release_hw_control(struct adapter *adapter)
5201{
5202 u32 ctrl_ext, swsm;
5203
5204 if (!adapter->has_manage)
5205 return;
5206
5207 if (adapter->hw.mac.type == e1000_82573) {
5208 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5209 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5210 swsm & ~E1000_SWSM_DRV_LOAD);
5211 return;
5212 }
5213 /* else */
5214 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5215 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5216 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5217 return;
5218}
5219
5220static int
5221em_is_valid_ether_addr(u8 *addr)
5222{
5223 char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5224
5225 if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5226 return (FALSE);
5227 }
5228
5229 return (TRUE);
5230}
5231
5232/*
5233** Parse the interface capabilities with regard
5234** to both system management and wake-on-lan for
5235** later use.
5236*/
5237static void
5238em_get_wakeup(device_t dev)
5239{
5240 struct adapter *adapter = device_get_softc(dev);
5241 u16 eeprom_data = 0, device_id, apme_mask;
5242
5243 adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5244 apme_mask = EM_EEPROM_APME;
5245
5246 switch (adapter->hw.mac.type) {
5247 case e1000_82573:
5248 case e1000_82583:
5249 adapter->has_amt = TRUE;
5250 /* Falls thru */
5251 case e1000_82571:
5252 case e1000_82572:
5253 case e1000_80003es2lan:
5254 if (adapter->hw.bus.func == 1) {
5255 e1000_read_nvm(&adapter->hw,
5256 NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5257 break;
5258 } else
5259 e1000_read_nvm(&adapter->hw,
5260 NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5261 break;
5262 case e1000_ich8lan:
5263 case e1000_ich9lan:
5264 case e1000_ich10lan:
5265 case e1000_pchlan:
5266 case e1000_pch2lan:
5267 apme_mask = E1000_WUC_APME;
5268 adapter->has_amt = TRUE;
5269 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5270 break;
5271 default:
5272 e1000_read_nvm(&adapter->hw,
5273 NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5274 break;
5275 }
5276 if (eeprom_data & apme_mask)
5277 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5278 /*
5279 * We have the eeprom settings, now apply the special cases
5280 * where the eeprom may be wrong or the board won't support
5281 * wake on lan on a particular port
5282 */
5283 device_id = pci_get_device(dev);
5284 switch (device_id) {
5285 case E1000_DEV_ID_82571EB_FIBER:
5286 /* Wake events only supported on port A for dual fiber
5287 * regardless of eeprom setting */
5288 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5289 E1000_STATUS_FUNC_1)
5290 adapter->wol = 0;
5291 break;
5292 case E1000_DEV_ID_82571EB_QUAD_COPPER:
5293 case E1000_DEV_ID_82571EB_QUAD_FIBER:
5294 case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5295 /* if quad port adapter, disable WoL on all but port A */
5296 if (global_quad_port_a != 0)
5297 adapter->wol = 0;
5298 /* Reset for multiple quad port adapters */
5299 if (++global_quad_port_a == 4)
5300 global_quad_port_a = 0;
5301 break;
5302 }
5303 return;
5304}
5305
5306
5307/*
5308 * Enable PCI Wake On Lan capability
5309 */
5310static void
5311em_enable_wakeup(device_t dev)
5312{
5313 struct adapter *adapter = device_get_softc(dev);
5314 if_t ifp = adapter->ifp;
5315 u32 pmc, ctrl, ctrl_ext, rctl;
5316 u16 status;
5317
5318 if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
5319 return;
5320
5321 /* Advertise the wakeup capability */
5322 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5323 ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5324 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5325 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5326
5327 if ((adapter->hw.mac.type == e1000_ich8lan) ||
5328 (adapter->hw.mac.type == e1000_pchlan) ||
5329 (adapter->hw.mac.type == e1000_ich9lan) ||
5330 (adapter->hw.mac.type == e1000_ich10lan))
5331 e1000_suspend_workarounds_ich8lan(&adapter->hw);
5332
5333 /* Keep the laser running on Fiber adapters */
5334 if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5335 adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5336 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5337 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5338 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5339 }
5340
5341 /*
5342 ** Determine type of Wakeup: note that wol
5343 ** is set with all bits on by default.
5344 */
5345 if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
5346 adapter->wol &= ~E1000_WUFC_MAG;
5347
5348 if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
5349 adapter->wol &= ~E1000_WUFC_MC;
5350 else {
5351 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5352 rctl |= E1000_RCTL_MPE;
5353 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5354 }
5355
5356 if ((adapter->hw.mac.type == e1000_pchlan) ||
5357 (adapter->hw.mac.type == e1000_pch2lan)) {
5358 if (em_enable_phy_wakeup(adapter))
5359 return;
5360 } else {
5361 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5362 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5363 }
5364
5365 if (adapter->hw.phy.type == e1000_phy_igp_3)
5366 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5367
5368 /* Request PME */
5369 status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5370 status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5371 if (if_getcapenable(ifp) & IFCAP_WOL)
5372 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5373 pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5374
5375 return;
5376}
5377
5378/*
5379** WOL in the newer chipset interfaces (pchlan)
5380** require thing to be copied into the phy
5381*/
5382static int
5383em_enable_phy_wakeup(struct adapter *adapter)
5384{
5385 struct e1000_hw *hw = &adapter->hw;
5386 u32 mreg, ret = 0;
5387 u16 preg;
5388
5389 /* copy MAC RARs to PHY RARs */
5390 e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5391
5392 /* copy MAC MTA to PHY MTA */
5393 for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5394 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5395 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5396 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5397 (u16)((mreg >> 16) & 0xFFFF));
5398 }
5399
5400 /* configure PHY Rx Control register */
5401 e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5402 mreg = E1000_READ_REG(hw, E1000_RCTL);
5403 if (mreg & E1000_RCTL_UPE)
5404 preg |= BM_RCTL_UPE;
5405 if (mreg & E1000_RCTL_MPE)
5406 preg |= BM_RCTL_MPE;
5407 preg &= ~(BM_RCTL_MO_MASK);
5408 if (mreg & E1000_RCTL_MO_3)
5409 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5410 << BM_RCTL_MO_SHIFT);
5411 if (mreg & E1000_RCTL_BAM)
5412 preg |= BM_RCTL_BAM;
5413 if (mreg & E1000_RCTL_PMCF)
5414 preg |= BM_RCTL_PMCF;
5415 mreg = E1000_READ_REG(hw, E1000_CTRL);
5416 if (mreg & E1000_CTRL_RFCE)
5417 preg |= BM_RCTL_RFCE;
5418 e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5419
5420 /* enable PHY wakeup in MAC register */
5421 E1000_WRITE_REG(hw, E1000_WUC,
5422 E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5423 E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5424
5425 /* configure and enable PHY wakeup in PHY registers */
5426 e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5427 e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5428
5429 /* activate PHY wakeup */
5430 ret = hw->phy.ops.acquire(hw);
5431 if (ret) {
5432 printf("Could not acquire PHY\n");
5433 return ret;
5434 }
5435 e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5436 (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5437 ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5438 if (ret) {
5439 printf("Could not read PHY page 769\n");
5440 goto out;
5441 }
5442 preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5443 ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5444 if (ret)
5445 printf("Could not set PHY Host Wakeup bit\n");
5446out:
5447 hw->phy.ops.release(hw);
5448
5449 return ret;
5450}
5451
5452static void
5453em_led_func(void *arg, int onoff)
5454{
5455 struct adapter *adapter = arg;
5456
5457 EM_CORE_LOCK(adapter);
5458 if (onoff) {
5459 e1000_setup_led(&adapter->hw);
5460 e1000_led_on(&adapter->hw);
5461 } else {
5462 e1000_led_off(&adapter->hw);
5463 e1000_cleanup_led(&adapter->hw);
5464 }
5465 EM_CORE_UNLOCK(adapter);
5466}
5467
5468/*
5469** Disable the L0S and L1 LINK states
5470*/
5471static void
5472em_disable_aspm(struct adapter *adapter)
5473{
5474 int base, reg;
5475 u16 link_cap,link_ctrl;
5476 device_t dev = adapter->dev;
5477
5478 switch (adapter->hw.mac.type) {
5479 case e1000_82573:
5480 case e1000_82574:
5481 case e1000_82583:
5482 break;
5483 default:
5484 return;
5485 }
5486 if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5487 return;
5488 reg = base + PCIER_LINK_CAP;
5489 link_cap = pci_read_config(dev, reg, 2);
5490 if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5491 return;
5492 reg = base + PCIER_LINK_CTL;
5493 link_ctrl = pci_read_config(dev, reg, 2);
5494 link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5495 pci_write_config(dev, reg, link_ctrl, 2);
5496 return;
5497}
5498
5499/**********************************************************************
5500 *
5501 * Update the board statistics counters.
5502 *
5503 **********************************************************************/
5504static void
5505em_update_stats_counters(struct adapter *adapter)
5506{
5507
5508 if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5509 (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5510 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5511 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5512 }
5513 adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5514 adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5515 adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5516 adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5517
5518 adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5519 adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5520 adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5521 adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5522 adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5523 adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5524 adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5525 adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5526 adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5527 adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5528 adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5529 adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5530 adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5531 adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5532 adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5533 adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5534 adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5535 adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5536 adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5537 adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5538
5539 /* For the 64-bit byte counters the low dword must be read first. */
5540 /* Both registers clear on the read of the high dword */
5541
5542 adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5543 ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5544 adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5545 ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5546
5547 adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5548 adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5549 adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5550 adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5551 adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5552
5553 adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5554 adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5555
5556 adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5557 adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5558 adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5559 adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5560 adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5561 adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5562 adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5563 adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5564 adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5565 adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5566
5567 /* Interrupt Counts */
5568
5569 adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5570 adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5571 adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5572 adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5573 adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5574 adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5575 adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5576 adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5577 adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5578
5579 if (adapter->hw.mac.type >= e1000_82543) {
5580 adapter->stats.algnerrc +=
5581 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5582 adapter->stats.rxerrc +=
5583 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5584 adapter->stats.tncrs +=
5585 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5586 adapter->stats.cexterr +=
5587 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5588 adapter->stats.tsctc +=
5589 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5590 adapter->stats.tsctfc +=
5591 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5592 }
5593}
5594
5595static uint64_t
5596em_get_counter(if_t ifp, ift_counter cnt)
5597{
5598 struct adapter *adapter;
5599
5600 adapter = if_getsoftc(ifp);
5601
5602 switch (cnt) {
5603 case IFCOUNTER_COLLISIONS:
5604 return (adapter->stats.colc);
5605 case IFCOUNTER_IERRORS:
5606 return (adapter->dropped_pkts + adapter->stats.rxerrc +
5607 adapter->stats.crcerrs + adapter->stats.algnerrc +
5608 adapter->stats.ruc + adapter->stats.roc +
5609 adapter->stats.mpc + adapter->stats.cexterr);
5610 case IFCOUNTER_OERRORS:
5611 return (adapter->stats.ecol + adapter->stats.latecol +
5612 adapter->watchdog_events);
5613 default:
5614 return (if_get_counter_default(ifp, cnt));
5615 }
5616}
5617
5618/* Export a single 32-bit register via a read-only sysctl. */
5619static int
5620em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5621{
5622 struct adapter *adapter;
5623 u_int val;
5624
5625 adapter = oidp->oid_arg1;
5626 val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5627 return (sysctl_handle_int(oidp, &val, 0, req));
5628}
5629
5630/*
5631 * Add sysctl variables, one per statistic, to the system.
5632 */
5633static void
5634em_add_hw_stats(struct adapter *adapter)
5635{
5636 device_t dev = adapter->dev;
5637
5638 struct tx_ring *txr = adapter->tx_rings;
5639 struct rx_ring *rxr = adapter->rx_rings;
5640
5641 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5642 struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5643 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5644 struct e1000_hw_stats *stats = &adapter->stats;
5645
5646 struct sysctl_oid *stat_node, *queue_node, *int_node;
5647 struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5648
5649#define QUEUE_NAME_LEN 32
5650 char namebuf[QUEUE_NAME_LEN];
5651
5652 /* Driver Statistics */
5653 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5654 CTLFLAG_RD, &adapter->dropped_pkts,
5655 "Driver dropped packets");
5656 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5657 CTLFLAG_RD, &adapter->link_irq,
5658 "Link MSIX IRQ Handled");
5659 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5660 CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5661 "Defragmenting mbuf chain failed");
5662 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5663 CTLFLAG_RD, &adapter->no_tx_dma_setup,
5664 "Driver tx dma failure in xmit");
5665 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5666 CTLFLAG_RD, &adapter->rx_overruns,
5667 "RX overruns");
5668 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5669 CTLFLAG_RD, &adapter->watchdog_events,
5670 "Watchdog timeouts");
5671
5672 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5673 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5674 em_sysctl_reg_handler, "IU",
5675 "Device Control Register");
5676 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5677 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5678 em_sysctl_reg_handler, "IU",
5679 "Receiver Control Register");
5680 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5681 CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5682 "Flow Control High Watermark");
5683 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5684 CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5685 "Flow Control Low Watermark");
5686
5687 for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5688 snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5689 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5690 CTLFLAG_RD, NULL, "TX Queue Name");
5691 queue_list = SYSCTL_CHILDREN(queue_node);
5692
5693 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5694 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5695 E1000_TDH(txr->me),
5696 em_sysctl_reg_handler, "IU",
5697 "Transmit Descriptor Head");
5698 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5699 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5700 E1000_TDT(txr->me),
5701 em_sysctl_reg_handler, "IU",
5702 "Transmit Descriptor Tail");
5703 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5704 CTLFLAG_RD, &txr->tx_irq,
5705 "Queue MSI-X Transmit Interrupts");
5706 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5707 CTLFLAG_RD, &txr->no_desc_avail,
5708 "Queue No Descriptor Available");
5709
5710 snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5711 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5712 CTLFLAG_RD, NULL, "RX Queue Name");
5713 queue_list = SYSCTL_CHILDREN(queue_node);
5714
5715 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5716 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5717 E1000_RDH(rxr->me),
5718 em_sysctl_reg_handler, "IU",
5719 "Receive Descriptor Head");
5720 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5721 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5722 E1000_RDT(rxr->me),
5723 em_sysctl_reg_handler, "IU",
5724 "Receive Descriptor Tail");
5725 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5726 CTLFLAG_RD, &rxr->rx_irq,
5727 "Queue MSI-X Receive Interrupts");
5728 }
5729
5730 /* MAC stats get their own sub node */
5731
5732 stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5733 CTLFLAG_RD, NULL, "Statistics");
5734 stat_list = SYSCTL_CHILDREN(stat_node);
5735
5736 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5737 CTLFLAG_RD, &stats->ecol,
5738 "Excessive collisions");
5739 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5740 CTLFLAG_RD, &stats->scc,
5741 "Single collisions");
5742 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5743 CTLFLAG_RD, &stats->mcc,
5744 "Multiple collisions");
5745 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5746 CTLFLAG_RD, &stats->latecol,
5747 "Late collisions");
5748 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5749 CTLFLAG_RD, &stats->colc,
5750 "Collision Count");
5751 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5752 CTLFLAG_RD, &adapter->stats.symerrs,
5753 "Symbol Errors");
5754 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5755 CTLFLAG_RD, &adapter->stats.sec,
5756 "Sequence Errors");
5757 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5758 CTLFLAG_RD, &adapter->stats.dc,
5759 "Defer Count");
5760 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5761 CTLFLAG_RD, &adapter->stats.mpc,
5762 "Missed Packets");
5763 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5764 CTLFLAG_RD, &adapter->stats.rnbc,
5765 "Receive No Buffers");
5766 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5767 CTLFLAG_RD, &adapter->stats.ruc,
5768 "Receive Undersize");
5769 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5770 CTLFLAG_RD, &adapter->stats.rfc,
5771 "Fragmented Packets Received ");
5772 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5773 CTLFLAG_RD, &adapter->stats.roc,
5774 "Oversized Packets Received");
5775 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5776 CTLFLAG_RD, &adapter->stats.rjc,
5777 "Recevied Jabber");
5778 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5779 CTLFLAG_RD, &adapter->stats.rxerrc,
5780 "Receive Errors");
5781 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5782 CTLFLAG_RD, &adapter->stats.crcerrs,
5783 "CRC errors");
5784 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5785 CTLFLAG_RD, &adapter->stats.algnerrc,
5786 "Alignment Errors");
5787 /* On 82575 these are collision counts */
5788 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5789 CTLFLAG_RD, &adapter->stats.cexterr,
5790 "Collision/Carrier extension errors");
5791 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5792 CTLFLAG_RD, &adapter->stats.xonrxc,
5793 "XON Received");
5794 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5795 CTLFLAG_RD, &adapter->stats.xontxc,
5796 "XON Transmitted");
5797 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5798 CTLFLAG_RD, &adapter->stats.xoffrxc,
5799 "XOFF Received");
5800 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5801 CTLFLAG_RD, &adapter->stats.xofftxc,
5802 "XOFF Transmitted");
5803
5804 /* Packet Reception Stats */
5805 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5806 CTLFLAG_RD, &adapter->stats.tpr,
5807 "Total Packets Received ");
5808 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5809 CTLFLAG_RD, &adapter->stats.gprc,
5810 "Good Packets Received");
5811 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5812 CTLFLAG_RD, &adapter->stats.bprc,
5813 "Broadcast Packets Received");
5814 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5815 CTLFLAG_RD, &adapter->stats.mprc,
5816 "Multicast Packets Received");
5817 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5818 CTLFLAG_RD, &adapter->stats.prc64,
5819 "64 byte frames received ");
5820 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5821 CTLFLAG_RD, &adapter->stats.prc127,
5822 "65-127 byte frames received");
5823 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5824 CTLFLAG_RD, &adapter->stats.prc255,
5825 "128-255 byte frames received");
5826 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5827 CTLFLAG_RD, &adapter->stats.prc511,
5828 "256-511 byte frames received");
5829 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5830 CTLFLAG_RD, &adapter->stats.prc1023,
5831 "512-1023 byte frames received");
5832 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5833 CTLFLAG_RD, &adapter->stats.prc1522,
5834 "1023-1522 byte frames received");
5835 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5836 CTLFLAG_RD, &adapter->stats.gorc,
5837 "Good Octets Received");
5838
5839 /* Packet Transmission Stats */
5840 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5841 CTLFLAG_RD, &adapter->stats.gotc,
5842 "Good Octets Transmitted");
5843 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5844 CTLFLAG_RD, &adapter->stats.tpt,
5845 "Total Packets Transmitted");
5846 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5847 CTLFLAG_RD, &adapter->stats.gptc,
5848 "Good Packets Transmitted");
5849 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5850 CTLFLAG_RD, &adapter->stats.bptc,
5851 "Broadcast Packets Transmitted");
5852 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5853 CTLFLAG_RD, &adapter->stats.mptc,
5854 "Multicast Packets Transmitted");
5855 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5856 CTLFLAG_RD, &adapter->stats.ptc64,
5857 "64 byte frames transmitted ");
5858 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5859 CTLFLAG_RD, &adapter->stats.ptc127,
5860 "65-127 byte frames transmitted");
5861 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5862 CTLFLAG_RD, &adapter->stats.ptc255,
5863 "128-255 byte frames transmitted");
5864 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5865 CTLFLAG_RD, &adapter->stats.ptc511,
5866 "256-511 byte frames transmitted");
5867 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5868 CTLFLAG_RD, &adapter->stats.ptc1023,
5869 "512-1023 byte frames transmitted");
5870 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5871 CTLFLAG_RD, &adapter->stats.ptc1522,
5872 "1024-1522 byte frames transmitted");
5873 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5874 CTLFLAG_RD, &adapter->stats.tsctc,
5875 "TSO Contexts Transmitted");
5876 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5877 CTLFLAG_RD, &adapter->stats.tsctfc,
5878 "TSO Contexts Failed");
5879
5880
5881 /* Interrupt Stats */
5882
5883 int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5884 CTLFLAG_RD, NULL, "Interrupt Statistics");
5885 int_list = SYSCTL_CHILDREN(int_node);
5886
5887 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5888 CTLFLAG_RD, &adapter->stats.iac,
5889 "Interrupt Assertion Count");
5890
5891 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5892 CTLFLAG_RD, &adapter->stats.icrxptc,
5893 "Interrupt Cause Rx Pkt Timer Expire Count");
5894
5895 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5896 CTLFLAG_RD, &adapter->stats.icrxatc,
5897 "Interrupt Cause Rx Abs Timer Expire Count");
5898
5899 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5900 CTLFLAG_RD, &adapter->stats.ictxptc,
5901 "Interrupt Cause Tx Pkt Timer Expire Count");
5902
5903 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5904 CTLFLAG_RD, &adapter->stats.ictxatc,
5905 "Interrupt Cause Tx Abs Timer Expire Count");
5906
5907 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5908 CTLFLAG_RD, &adapter->stats.ictxqec,
5909 "Interrupt Cause Tx Queue Empty Count");
5910
5911 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5912 CTLFLAG_RD, &adapter->stats.ictxqmtc,
5913 "Interrupt Cause Tx Queue Min Thresh Count");
5914
5915 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5916 CTLFLAG_RD, &adapter->stats.icrxdmtc,
5917 "Interrupt Cause Rx Desc Min Thresh Count");
5918
5919 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5920 CTLFLAG_RD, &adapter->stats.icrxoc,
5921 "Interrupt Cause Receiver Overrun Count");
5922}
5923
5924/**********************************************************************
5925 *
5926 * This routine provides a way to dump out the adapter eeprom,
5927 * often a useful debug/service tool. This only dumps the first
5928 * 32 words, stuff that matters is in that extent.
5929 *
5930 **********************************************************************/
5931static int
5932em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5933{
5934 struct adapter *adapter = (struct adapter *)arg1;
5935 int error;
5936 int result;
5937
5938 result = -1;
5939 error = sysctl_handle_int(oidp, &result, 0, req);
5940
5941 if (error || !req->newptr)
5942 return (error);
5943
5944 /*
5945 * This value will cause a hex dump of the
5946 * first 32 16-bit words of the EEPROM to
5947 * the screen.
5948 */
5949 if (result == 1)
5950 em_print_nvm_info(adapter);
5951
5952 return (error);
5953}
5954
5955static void
5956em_print_nvm_info(struct adapter *adapter)
5957{
5958 u16 eeprom_data;
5959 int i, j, row = 0;
5960
5961 /* Its a bit crude, but it gets the job done */
5962 printf("\nInterface EEPROM Dump:\n");
5963 printf("Offset\n0x0000 ");
5964 for (i = 0, j = 0; i < 32; i++, j++) {
5965 if (j == 8) { /* Make the offset block */
5966 j = 0; ++row;
5967 printf("\n0x00%x0 ",row);
5968 }
5969 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5970 printf("%04x ", eeprom_data);
5971 }
5972 printf("\n");
5973}
5974
5975static int
5976em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5977{
5978 struct em_int_delay_info *info;
5979 struct adapter *adapter;
5980 u32 regval;
5981 int error, usecs, ticks;
5982
5983 info = (struct em_int_delay_info *)arg1;
5984 usecs = info->value;
5985 error = sysctl_handle_int(oidp, &usecs, 0, req);
5986 if (error != 0 || req->newptr == NULL)
5987 return (error);
5988 if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5989 return (EINVAL);
5990 info->value = usecs;
5991 ticks = EM_USECS_TO_TICKS(usecs);
5992 if (info->offset == E1000_ITR) /* units are 256ns here */
5993 ticks *= 4;
5994
5995 adapter = info->adapter;
5996
5997 EM_CORE_LOCK(adapter);
5998 regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5999 regval = (regval & ~0xffff) | (ticks & 0xffff);
6000 /* Handle a few special cases. */
6001 switch (info->offset) {
6002 case E1000_RDTR:
6003 break;
6004 case E1000_TIDV:
6005 if (ticks == 0) {
6006 adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
6007 /* Don't write 0 into the TIDV register. */
6008 regval++;
6009 } else
6010 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
6011 break;
6012 }
6013 E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
6014 EM_CORE_UNLOCK(adapter);
6015 return (0);
6016}
6017
6018static void
6019em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
6020 const char *description, struct em_int_delay_info *info,
6021 int offset, int value)
6022{
6023 info->adapter = adapter;
6024 info->offset = offset;
6025 info->value = value;
6026 SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
6027 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6028 OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
6029 info, 0, em_sysctl_int_delay, "I", description);
6030}
6031
6032static void
6033em_set_sysctl_value(struct adapter *adapter, const char *name,
6034 const char *description, int *limit, int value)
6035{
6036 *limit = value;
6037 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6038 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6039 OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6040}
6041
6042
6043/*
6044** Set flow control using sysctl:
6045** Flow control values:
6046** 0 - off
6047** 1 - rx pause
6048** 2 - tx pause
6049** 3 - full
6050*/
6051static int
6052em_set_flowcntl(SYSCTL_HANDLER_ARGS)
6053{
6054 int error;
6055 static int input = 3; /* default is full */
6056 struct adapter *adapter = (struct adapter *) arg1;
6057
6058 error = sysctl_handle_int(oidp, &input, 0, req);
6059
6060 if ((error) || (req->newptr == NULL))
6061 return (error);
6062
6063 if (input == adapter->fc) /* no change? */
6064 return (error);
6065
6066 switch (input) {
6067 case e1000_fc_rx_pause:
6068 case e1000_fc_tx_pause:
6069 case e1000_fc_full:
6070 case e1000_fc_none:
6071 adapter->hw.fc.requested_mode = input;
6072 adapter->fc = input;
6073 break;
6074 default:
6075 /* Do nothing */
6076 return (error);
6077 }
6078
6079 adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6080 e1000_force_mac_fc(&adapter->hw);
6081 return (error);
6082}
6083
6084/*
6085** Manage Energy Efficient Ethernet:
6086** Control values:
6087** 0/1 - enabled/disabled
6088*/
6089static int
6090em_sysctl_eee(SYSCTL_HANDLER_ARGS)
6091{
6092 struct adapter *adapter = (struct adapter *) arg1;
6093 int error, value;
6094
6095 value = adapter->hw.dev_spec.ich8lan.eee_disable;
6096 error = sysctl_handle_int(oidp, &value, 0, req);
6097 if (error || req->newptr == NULL)
6098 return (error);
6099 EM_CORE_LOCK(adapter);
6100 adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
6101 em_init_locked(adapter);
6102 EM_CORE_UNLOCK(adapter);
6103 return (0);
6104}
6105
6106static int
6107em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
6108{
6109 struct adapter *adapter;
6110 int error;
6111 int result;
6112
6113 result = -1;
6114 error = sysctl_handle_int(oidp, &result, 0, req);
6115
6116 if (error || !req->newptr)
6117 return (error);
6118
6119 if (result == 1) {
6120 adapter = (struct adapter *)arg1;
6121 em_print_debug_info(adapter);
6122 }
6123
6124 return (error);
6125}
6126
6127/*
6128** This routine is meant to be fluid, add whatever is
6129** needed for debugging a problem. -jfv
6130*/
6131static void
6132em_print_debug_info(struct adapter *adapter)
6133{
6134 device_t dev = adapter->dev;
6135 struct tx_ring *txr = adapter->tx_rings;
6136 struct rx_ring *rxr = adapter->rx_rings;
6137
6138 if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
6139 printf("Interface is RUNNING ");
6140 else
6141 printf("Interface is NOT RUNNING\n");
6142
6143 if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
6144 printf("and INACTIVE\n");
6145 else
6146 printf("and ACTIVE\n");
6147
6148 for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
6149 device_printf(dev, "TX Queue %d ------\n", i);
6150 device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6151 E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6152 E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6153 device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6154 device_printf(dev, "TX descriptors avail = %d\n",
6155 txr->tx_avail);
6156 device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6157 txr->no_desc_avail);
6158 device_printf(dev, "RX Queue %d ------\n", i);
6159 device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6160 E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6161 E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6162 device_printf(dev, "RX discarded packets = %ld\n",
6163 rxr->rx_discarded);
6164 device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6165 device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6166 }
6167}
6168
6169#ifdef EM_MULTIQUEUE
6170/*
6171 * 82574 only:
6172 * Write a new value to the EEPROM increasing the number of MSIX
6173 * vectors from 3 to 5, for proper multiqueue support.
6174 */
6175static void
6176em_enable_vectors_82574(struct adapter *adapter)
6177{
6178 struct e1000_hw *hw = &adapter->hw;
6179 device_t dev = adapter->dev;
6180 u16 edata;
6181
6182 e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6183 printf("Current cap: %#06x\n", edata);
6184 if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6185 device_printf(dev, "Writing to eeprom: increasing "
6186 "reported MSIX vectors from 3 to 5...\n");
6187 edata &= ~(EM_NVM_MSIX_N_MASK);
6188 edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6189 e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6190 e1000_update_nvm_checksum(hw);
6191 device_printf(dev, "Writing to eeprom: done\n");
6192 }
6193}
6194#endif
6195
6196#ifdef DDB
6197DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6198{
6199 devclass_t dc;
6200 int max_em;
6201
6202 dc = devclass_find("em");
6203 max_em = devclass_get_maxunit(dc);
6204
6205 for (int index = 0; index < (max_em - 1); index++) {
6206 device_t dev;
6207 dev = devclass_get_device(dc, index);
6208 if (device_get_driver(dev) == &em_driver) {
6209 struct adapter *adapter = device_get_softc(dev);
6210 EM_CORE_LOCK(adapter);
6211 em_init_locked(adapter);
6212 EM_CORE_UNLOCK(adapter);
6213 }
6214 }
6215}
6216DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6217{
6218 devclass_t dc;
6219 int max_em;
6220
6221 dc = devclass_find("em");
6222 max_em = devclass_get_maxunit(dc);
6223
6224 for (int index = 0; index < (max_em - 1); index++) {
6225 device_t dev;
6226 dev = devclass_get_device(dc, index);
6227 if (device_get_driver(dev) == &em_driver)
6228 em_print_debug_info(device_get_softc(dev));
6229 }
6230
6231}
6232#endif