Deleted Added
sdiff udiff text old ( 295133 ) new ( 295323 )
full compact
1/******************************************************************************
2
3 Copyright (c) 2001-2015, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_em.c 295133 2016-02-01 23:51:30Z marius $*/
34
35#include "opt_em.h"
36#include "opt_ddb.h"
37#include "opt_inet.h"
38#include "opt_inet6.h"
39
40#ifdef HAVE_KERNEL_OPTION_HEADERS
41#include "opt_device_polling.h"
42#endif
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#ifdef DDB
47#include <sys/types.h>
48#include <ddb/ddb.h>
49#endif
50#if __FreeBSD_version >= 800000
51#include <sys/buf_ring.h>
52#endif
53#include <sys/bus.h>
54#include <sys/endian.h>
55#include <sys/kernel.h>
56#include <sys/kthread.h>
57#include <sys/malloc.h>
58#include <sys/mbuf.h>
59#include <sys/module.h>
60#include <sys/rman.h>
61#include <sys/smp.h>
62#include <sys/socket.h>
63#include <sys/sockio.h>
64#include <sys/sysctl.h>
65#include <sys/taskqueue.h>
66#include <sys/eventhandler.h>
67#include <machine/bus.h>
68#include <machine/resource.h>
69
70#include <net/bpf.h>
71#include <net/ethernet.h>
72#include <net/if.h>
73#include <net/if_var.h>
74#include <net/if_arp.h>
75#include <net/if_dl.h>
76#include <net/if_media.h>
77
78#include <net/if_types.h>
79#include <net/if_vlan_var.h>
80
81#include <netinet/in_systm.h>
82#include <netinet/in.h>
83#include <netinet/if_ether.h>
84#include <netinet/ip.h>
85#include <netinet/ip6.h>
86#include <netinet/tcp.h>
87#include <netinet/udp.h>
88
89#include <machine/in_cksum.h>
90#include <dev/led/led.h>
91#include <dev/pci/pcivar.h>
92#include <dev/pci/pcireg.h>
93
94#include "e1000_api.h"
95#include "e1000_82571.h"
96#include "if_em.h"
97
98/*********************************************************************
99 * Set this to one to display debug statistics
100 *********************************************************************/
101int em_display_debug_stats = 0;
102
103/*********************************************************************
104 * Driver version:
105 *********************************************************************/
106char em_driver_version[] = "7.4.2";
107
108/*********************************************************************
109 * PCI Device ID Table
110 *
111 * Used by probe to select devices to load on
112 * Last field stores an index into e1000_strings
113 * Last entry must be all 0s
114 *
115 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
116 *********************************************************************/
117
118static em_vendor_info_t em_vendor_info_array[] =
119{
120 /* Intel(R) PRO/1000 Network Connection */
121 { 0x8086, E1000_DEV_ID_82571EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
122 { 0x8086, E1000_DEV_ID_82571EB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
123 { 0x8086, E1000_DEV_ID_82571EB_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
124 { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
125 PCI_ANY_ID, PCI_ANY_ID, 0},
126 { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
127 PCI_ANY_ID, PCI_ANY_ID, 0},
128 { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
129 PCI_ANY_ID, PCI_ANY_ID, 0},
130 { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
131 PCI_ANY_ID, PCI_ANY_ID, 0},
132 { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
133 PCI_ANY_ID, PCI_ANY_ID, 0},
134 { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
135 PCI_ANY_ID, PCI_ANY_ID, 0},
136 { 0x8086, E1000_DEV_ID_82572EI_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
137 { 0x8086, E1000_DEV_ID_82572EI_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
138 { 0x8086, E1000_DEV_ID_82572EI_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
139 { 0x8086, E1000_DEV_ID_82572EI, PCI_ANY_ID, PCI_ANY_ID, 0},
140
141 { 0x8086, E1000_DEV_ID_82573E, PCI_ANY_ID, PCI_ANY_ID, 0},
142 { 0x8086, E1000_DEV_ID_82573E_IAMT, PCI_ANY_ID, PCI_ANY_ID, 0},
143 { 0x8086, E1000_DEV_ID_82573L, PCI_ANY_ID, PCI_ANY_ID, 0},
144 { 0x8086, E1000_DEV_ID_82583V, PCI_ANY_ID, PCI_ANY_ID, 0},
145 { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
146 PCI_ANY_ID, PCI_ANY_ID, 0},
147 { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
148 PCI_ANY_ID, PCI_ANY_ID, 0},
149 { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
150 PCI_ANY_ID, PCI_ANY_ID, 0},
151 { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
152 PCI_ANY_ID, PCI_ANY_ID, 0},
153 { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
154 { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
155 { 0x8086, E1000_DEV_ID_ICH8_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0},
156 { 0x8086, E1000_DEV_ID_ICH8_IFE, PCI_ANY_ID, PCI_ANY_ID, 0},
157 { 0x8086, E1000_DEV_ID_ICH8_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0},
158 { 0x8086, E1000_DEV_ID_ICH8_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0},
159 { 0x8086, E1000_DEV_ID_ICH8_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0},
160 { 0x8086, E1000_DEV_ID_ICH8_82567V_3, PCI_ANY_ID, PCI_ANY_ID, 0},
161 { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
162 { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
163 { 0x8086, E1000_DEV_ID_ICH9_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0},
164 { 0x8086, E1000_DEV_ID_ICH9_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0},
165 { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V, PCI_ANY_ID, PCI_ANY_ID, 0},
166 { 0x8086, E1000_DEV_ID_ICH9_IFE, PCI_ANY_ID, PCI_ANY_ID, 0},
167 { 0x8086, E1000_DEV_ID_ICH9_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0},
168 { 0x8086, E1000_DEV_ID_ICH9_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0},
169 { 0x8086, E1000_DEV_ID_ICH9_BM, PCI_ANY_ID, PCI_ANY_ID, 0},
170 { 0x8086, E1000_DEV_ID_82574L, PCI_ANY_ID, PCI_ANY_ID, 0},
171 { 0x8086, E1000_DEV_ID_82574LA, PCI_ANY_ID, PCI_ANY_ID, 0},
172 { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
173 { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0},
174 { 0x8086, E1000_DEV_ID_ICH10_R_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0},
175 { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
176 { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0},
177 { 0x8086, E1000_DEV_ID_ICH10_D_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0},
178 { 0x8086, E1000_DEV_ID_PCH_M_HV_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
179 { 0x8086, E1000_DEV_ID_PCH_M_HV_LC, PCI_ANY_ID, PCI_ANY_ID, 0},
180 { 0x8086, E1000_DEV_ID_PCH_D_HV_DM, PCI_ANY_ID, PCI_ANY_ID, 0},
181 { 0x8086, E1000_DEV_ID_PCH_D_HV_DC, PCI_ANY_ID, PCI_ANY_ID, 0},
182 { 0x8086, E1000_DEV_ID_PCH2_LV_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
183 { 0x8086, E1000_DEV_ID_PCH2_LV_V, PCI_ANY_ID, PCI_ANY_ID, 0},
184 { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
185 { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V, PCI_ANY_ID, PCI_ANY_ID, 0},
186 { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
187 PCI_ANY_ID, PCI_ANY_ID, 0},
188 { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
189 PCI_ANY_ID, PCI_ANY_ID, 0},
190 { 0x8086, E1000_DEV_ID_PCH_I218_LM2, PCI_ANY_ID, PCI_ANY_ID, 0},
191 { 0x8086, E1000_DEV_ID_PCH_I218_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
192 { 0x8086, E1000_DEV_ID_PCH_I218_LM3, PCI_ANY_ID, PCI_ANY_ID, 0},
193 { 0x8086, E1000_DEV_ID_PCH_I218_V3, PCI_ANY_ID, PCI_ANY_ID, 0},
194 /* required last entry */
195 { 0, 0, 0, 0, 0}
196};
197
198/*********************************************************************
199 * Table of branding strings for all supported NICs.
200 *********************************************************************/
201
202static char *em_strings[] = {
203 "Intel(R) PRO/1000 Network Connection"
204};
205
206/*********************************************************************
207 * Function prototypes
208 *********************************************************************/
209static int em_probe(device_t);
210static int em_attach(device_t);
211static int em_detach(device_t);
212static int em_shutdown(device_t);
213static int em_suspend(device_t);
214static int em_resume(device_t);
215#ifdef EM_MULTIQUEUE
216static int em_mq_start(if_t, struct mbuf *);
217static int em_mq_start_locked(if_t,
218 struct tx_ring *);
219static void em_qflush(if_t);
220#else
221static void em_start(if_t);
222static void em_start_locked(if_t, struct tx_ring *);
223#endif
224static int em_ioctl(if_t, u_long, caddr_t);
225static uint64_t em_get_counter(if_t, ift_counter);
226static void em_init(void *);
227static void em_init_locked(struct adapter *);
228static void em_stop(void *);
229static void em_media_status(if_t, struct ifmediareq *);
230static int em_media_change(if_t);
231static void em_identify_hardware(struct adapter *);
232static int em_allocate_pci_resources(struct adapter *);
233static int em_allocate_legacy(struct adapter *);
234static int em_allocate_msix(struct adapter *);
235static int em_allocate_queues(struct adapter *);
236static int em_setup_msix(struct adapter *);
237static void em_free_pci_resources(struct adapter *);
238static void em_local_timer(void *);
239static void em_reset(struct adapter *);
240static int em_setup_interface(device_t, struct adapter *);
241
242static void em_setup_transmit_structures(struct adapter *);
243static void em_initialize_transmit_unit(struct adapter *);
244static int em_allocate_transmit_buffers(struct tx_ring *);
245static void em_free_transmit_structures(struct adapter *);
246static void em_free_transmit_buffers(struct tx_ring *);
247
248static int em_setup_receive_structures(struct adapter *);
249static int em_allocate_receive_buffers(struct rx_ring *);
250static void em_initialize_receive_unit(struct adapter *);
251static void em_free_receive_structures(struct adapter *);
252static void em_free_receive_buffers(struct rx_ring *);
253
254static void em_enable_intr(struct adapter *);
255static void em_disable_intr(struct adapter *);
256static void em_update_stats_counters(struct adapter *);
257static void em_add_hw_stats(struct adapter *adapter);
258static void em_txeof(struct tx_ring *);
259static bool em_rxeof(struct rx_ring *, int, int *);
260#ifndef __NO_STRICT_ALIGNMENT
261static int em_fixup_rx(struct rx_ring *);
262#endif
263static void em_setup_rxdesc(union e1000_rx_desc_extended *,
264 const struct em_rxbuffer *rxbuf);
265static void em_receive_checksum(uint32_t status, struct mbuf *);
266static void em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
267 struct ip *, u32 *, u32 *);
268static void em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
269 struct tcphdr *, u32 *, u32 *);
270static void em_set_promisc(struct adapter *);
271static void em_disable_promisc(struct adapter *);
272static void em_set_multi(struct adapter *);
273static void em_update_link_status(struct adapter *);
274static void em_refresh_mbufs(struct rx_ring *, int);
275static void em_register_vlan(void *, if_t, u16);
276static void em_unregister_vlan(void *, if_t, u16);
277static void em_setup_vlan_hw_support(struct adapter *);
278static int em_xmit(struct tx_ring *, struct mbuf **);
279static int em_dma_malloc(struct adapter *, bus_size_t,
280 struct em_dma_alloc *, int);
281static void em_dma_free(struct adapter *, struct em_dma_alloc *);
282static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
283static void em_print_nvm_info(struct adapter *);
284static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
285static void em_print_debug_info(struct adapter *);
286static int em_is_valid_ether_addr(u8 *);
287static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
288static void em_add_int_delay_sysctl(struct adapter *, const char *,
289 const char *, struct em_int_delay_info *, int, int);
290/* Management and WOL Support */
291static void em_init_manageability(struct adapter *);
292static void em_release_manageability(struct adapter *);
293static void em_get_hw_control(struct adapter *);
294static void em_release_hw_control(struct adapter *);
295static void em_get_wakeup(device_t);
296static void em_enable_wakeup(device_t);
297static int em_enable_phy_wakeup(struct adapter *);
298static void em_led_func(void *, int);
299static void em_disable_aspm(struct adapter *);
300
301static int em_irq_fast(void *);
302
303/* MSIX handlers */
304static void em_msix_tx(void *);
305static void em_msix_rx(void *);
306static void em_msix_link(void *);
307static void em_handle_tx(void *context, int pending);
308static void em_handle_rx(void *context, int pending);
309static void em_handle_link(void *context, int pending);
310
311#ifdef EM_MULTIQUEUE
312static void em_enable_vectors_82574(struct adapter *);
313#endif
314
315static void em_set_sysctl_value(struct adapter *, const char *,
316 const char *, int *, int);
317static int em_set_flowcntl(SYSCTL_HANDLER_ARGS);
318static int em_sysctl_eee(SYSCTL_HANDLER_ARGS);
319
320static __inline void em_rx_discard(struct rx_ring *, int);
321
322#ifdef DEVICE_POLLING
323static poll_handler_t em_poll;
324#endif /* POLLING */
325
326/*********************************************************************
327 * FreeBSD Device Interface Entry Points
328 *********************************************************************/
329
330static device_method_t em_methods[] = {
331 /* Device interface */
332 DEVMETHOD(device_probe, em_probe),
333 DEVMETHOD(device_attach, em_attach),
334 DEVMETHOD(device_detach, em_detach),
335 DEVMETHOD(device_shutdown, em_shutdown),
336 DEVMETHOD(device_suspend, em_suspend),
337 DEVMETHOD(device_resume, em_resume),
338 DEVMETHOD_END
339};
340
341static driver_t em_driver = {
342 "em", em_methods, sizeof(struct adapter),
343};
344
345devclass_t em_devclass;
346DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
347MODULE_DEPEND(em, pci, 1, 1, 1);
348MODULE_DEPEND(em, ether, 1, 1, 1);
349#ifdef DEV_NETMAP
350MODULE_DEPEND(em, netmap, 1, 1, 1);
351#endif /* DEV_NETMAP */
352
353/*********************************************************************
354 * Tunable default values.
355 *********************************************************************/
356
357#define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000)
358#define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024)
359#define M_TSO_LEN 66
360
361#define MAX_INTS_PER_SEC 8000
362#define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256))
363
364/* Allow common code without TSO */
365#ifndef CSUM_TSO
366#define CSUM_TSO 0
367#endif
368
369#define TSO_WORKAROUND 4
370
371static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
372
373static int em_disable_crc_stripping = 0;
374SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
375 &em_disable_crc_stripping, 0, "Disable CRC Stripping");
376
377static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
378static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
379SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
380 0, "Default transmit interrupt delay in usecs");
381SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
382 0, "Default receive interrupt delay in usecs");
383
384static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
385static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
386SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
387 &em_tx_abs_int_delay_dflt, 0,
388 "Default transmit interrupt delay limit in usecs");
389SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
390 &em_rx_abs_int_delay_dflt, 0,
391 "Default receive interrupt delay limit in usecs");
392
393static int em_rxd = EM_DEFAULT_RXD;
394static int em_txd = EM_DEFAULT_TXD;
395SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
396 "Number of receive descriptors per queue");
397SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
398 "Number of transmit descriptors per queue");
399
400static int em_smart_pwr_down = FALSE;
401SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
402 0, "Set to true to leave smart power down enabled on newer adapters");
403
404/* Controls whether promiscuous also shows bad packets */
405static int em_debug_sbp = FALSE;
406SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
407 "Show bad packets in promiscuous mode");
408
409static int em_enable_msix = TRUE;
410SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
411 "Enable MSI-X interrupts");
412
413#ifdef EM_MULTIQUEUE
414static int em_num_queues = 1;
415SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
416 "82574 only: Number of queues to configure, 0 indicates autoconfigure");
417#endif
418
419/*
420** Global variable to store last used CPU when binding queues
421** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a
422** queue is bound to a cpu.
423*/
424static int em_last_bind_cpu = -1;
425
426/* How many packets rxeof tries to clean at a time */
427static int em_rx_process_limit = 100;
428SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
429 &em_rx_process_limit, 0,
430 "Maximum number of received packets to process "
431 "at a time, -1 means unlimited");
432
433/* Energy efficient ethernet - default to OFF */
434static int eee_setting = 1;
435SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
436 "Enable Energy Efficient Ethernet");
437
438/* Global used in WOL setup with multiport cards */
439static int global_quad_port_a = 0;
440
441#ifdef DEV_NETMAP /* see ixgbe.c for details */
442#include <dev/netmap/if_em_netmap.h>
443#endif /* DEV_NETMAP */
444
445/*********************************************************************
446 * Device identification routine
447 *
448 * em_probe determines if the driver should be loaded on
449 * adapter based on PCI vendor/device id of the adapter.
450 *
451 * return BUS_PROBE_DEFAULT on success, positive on failure
452 *********************************************************************/
453
454static int
455em_probe(device_t dev)
456{
457 char adapter_name[60];
458 uint16_t pci_vendor_id = 0;
459 uint16_t pci_device_id = 0;
460 uint16_t pci_subvendor_id = 0;
461 uint16_t pci_subdevice_id = 0;
462 em_vendor_info_t *ent;
463
464 INIT_DEBUGOUT("em_probe: begin");
465
466 pci_vendor_id = pci_get_vendor(dev);
467 if (pci_vendor_id != EM_VENDOR_ID)
468 return (ENXIO);
469
470 pci_device_id = pci_get_device(dev);
471 pci_subvendor_id = pci_get_subvendor(dev);
472 pci_subdevice_id = pci_get_subdevice(dev);
473
474 ent = em_vendor_info_array;
475 while (ent->vendor_id != 0) {
476 if ((pci_vendor_id == ent->vendor_id) &&
477 (pci_device_id == ent->device_id) &&
478
479 ((pci_subvendor_id == ent->subvendor_id) ||
480 (ent->subvendor_id == PCI_ANY_ID)) &&
481
482 ((pci_subdevice_id == ent->subdevice_id) ||
483 (ent->subdevice_id == PCI_ANY_ID))) {
484 sprintf(adapter_name, "%s %s",
485 em_strings[ent->index],
486 em_driver_version);
487 device_set_desc_copy(dev, adapter_name);
488 return (BUS_PROBE_DEFAULT);
489 }
490 ent++;
491 }
492
493 return (ENXIO);
494}
495
496/*********************************************************************
497 * Device initialization routine
498 *
499 * The attach entry point is called when the driver is being loaded.
500 * This routine identifies the type of hardware, allocates all resources
501 * and initializes the hardware.
502 *
503 * return 0 on success, positive on failure
504 *********************************************************************/
505
506static int
507em_attach(device_t dev)
508{
509 struct adapter *adapter;
510 struct e1000_hw *hw;
511 int error = 0;
512
513 INIT_DEBUGOUT("em_attach: begin");
514
515 if (resource_disabled("em", device_get_unit(dev))) {
516 device_printf(dev, "Disabled by device hint\n");
517 return (ENXIO);
518 }
519
520 adapter = device_get_softc(dev);
521 adapter->dev = adapter->osdep.dev = dev;
522 hw = &adapter->hw;
523 EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
524
525 /* SYSCTL stuff */
526 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
527 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
528 OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
529 em_sysctl_nvm_info, "I", "NVM Information");
530
531 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
532 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
533 OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
534 em_sysctl_debug_info, "I", "Debug Information");
535
536 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
537 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
538 OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
539 em_set_flowcntl, "I", "Flow Control");
540
541 callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
542
543 /* Determine hardware and mac info */
544 em_identify_hardware(adapter);
545
546 /* Setup PCI resources */
547 if (em_allocate_pci_resources(adapter)) {
548 device_printf(dev, "Allocation of PCI resources failed\n");
549 error = ENXIO;
550 goto err_pci;
551 }
552
553 /*
554 ** For ICH8 and family we need to
555 ** map the flash memory, and this
556 ** must happen after the MAC is
557 ** identified
558 */
559 if ((hw->mac.type == e1000_ich8lan) ||
560 (hw->mac.type == e1000_ich9lan) ||
561 (hw->mac.type == e1000_ich10lan) ||
562 (hw->mac.type == e1000_pchlan) ||
563 (hw->mac.type == e1000_pch2lan) ||
564 (hw->mac.type == e1000_pch_lpt)) {
565 int rid = EM_BAR_TYPE_FLASH;
566 adapter->flash = bus_alloc_resource_any(dev,
567 SYS_RES_MEMORY, &rid, RF_ACTIVE);
568 if (adapter->flash == NULL) {
569 device_printf(dev, "Mapping of Flash failed\n");
570 error = ENXIO;
571 goto err_pci;
572 }
573 /* This is used in the shared code */
574 hw->flash_address = (u8 *)adapter->flash;
575 adapter->osdep.flash_bus_space_tag =
576 rman_get_bustag(adapter->flash);
577 adapter->osdep.flash_bus_space_handle =
578 rman_get_bushandle(adapter->flash);
579 }
580
581 /* Do Shared Code initialization */
582 if (e1000_setup_init_funcs(hw, TRUE)) {
583 device_printf(dev, "Setup of Shared code failed\n");
584 error = ENXIO;
585 goto err_pci;
586 }
587
588 /*
589 * Setup MSI/X or MSI if PCI Express
590 */
591 adapter->msix = em_setup_msix(adapter);
592
593 e1000_get_bus_info(hw);
594
595 /* Set up some sysctls for the tunable interrupt delays */
596 em_add_int_delay_sysctl(adapter, "rx_int_delay",
597 "receive interrupt delay in usecs", &adapter->rx_int_delay,
598 E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
599 em_add_int_delay_sysctl(adapter, "tx_int_delay",
600 "transmit interrupt delay in usecs", &adapter->tx_int_delay,
601 E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
602 em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
603 "receive interrupt delay limit in usecs",
604 &adapter->rx_abs_int_delay,
605 E1000_REGISTER(hw, E1000_RADV),
606 em_rx_abs_int_delay_dflt);
607 em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
608 "transmit interrupt delay limit in usecs",
609 &adapter->tx_abs_int_delay,
610 E1000_REGISTER(hw, E1000_TADV),
611 em_tx_abs_int_delay_dflt);
612 em_add_int_delay_sysctl(adapter, "itr",
613 "interrupt delay limit in usecs/4",
614 &adapter->tx_itr,
615 E1000_REGISTER(hw, E1000_ITR),
616 DEFAULT_ITR);
617
618 /* Sysctl for limiting the amount of work done in the taskqueue */
619 em_set_sysctl_value(adapter, "rx_processing_limit",
620 "max number of rx packets to process", &adapter->rx_process_limit,
621 em_rx_process_limit);
622
623 /*
624 * Validate number of transmit and receive descriptors. It
625 * must not exceed hardware maximum, and must be multiple
626 * of E1000_DBA_ALIGN.
627 */
628 if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
629 (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
630 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
631 EM_DEFAULT_TXD, em_txd);
632 adapter->num_tx_desc = EM_DEFAULT_TXD;
633 } else
634 adapter->num_tx_desc = em_txd;
635
636 if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
637 (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
638 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
639 EM_DEFAULT_RXD, em_rxd);
640 adapter->num_rx_desc = EM_DEFAULT_RXD;
641 } else
642 adapter->num_rx_desc = em_rxd;
643
644 hw->mac.autoneg = DO_AUTO_NEG;
645 hw->phy.autoneg_wait_to_complete = FALSE;
646 hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
647
648 /* Copper options */
649 if (hw->phy.media_type == e1000_media_type_copper) {
650 hw->phy.mdix = AUTO_ALL_MODES;
651 hw->phy.disable_polarity_correction = FALSE;
652 hw->phy.ms_type = EM_MASTER_SLAVE;
653 }
654
655 /*
656 * Set the frame limits assuming
657 * standard ethernet sized frames.
658 */
659 adapter->hw.mac.max_frame_size =
660 ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
661
662 /*
663 * This controls when hardware reports transmit completion
664 * status.
665 */
666 hw->mac.report_tx_early = 1;
667
668 /*
669 ** Get queue/ring memory
670 */
671 if (em_allocate_queues(adapter)) {
672 error = ENOMEM;
673 goto err_pci;
674 }
675
676 /* Allocate multicast array memory. */
677 adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
678 MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
679 if (adapter->mta == NULL) {
680 device_printf(dev, "Can not allocate multicast setup array\n");
681 error = ENOMEM;
682 goto err_late;
683 }
684
685 /* Check SOL/IDER usage */
686 if (e1000_check_reset_block(hw))
687 device_printf(dev, "PHY reset is blocked"
688 " due to SOL/IDER session.\n");
689
690 /* Sysctl for setting Energy Efficient Ethernet */
691 hw->dev_spec.ich8lan.eee_disable = eee_setting;
692 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
693 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
694 OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
695 adapter, 0, em_sysctl_eee, "I",
696 "Disable Energy Efficient Ethernet");
697
698 /*
699 ** Start from a known state, this is
700 ** important in reading the nvm and
701 ** mac from that.
702 */
703 e1000_reset_hw(hw);
704
705
706 /* Make sure we have a good EEPROM before we read from it */
707 if (e1000_validate_nvm_checksum(hw) < 0) {
708 /*
709 ** Some PCI-E parts fail the first check due to
710 ** the link being in sleep state, call it again,
711 ** if it fails a second time its a real issue.
712 */
713 if (e1000_validate_nvm_checksum(hw) < 0) {
714 device_printf(dev,
715 "The EEPROM Checksum Is Not Valid\n");
716 error = EIO;
717 goto err_late;
718 }
719 }
720
721 /* Copy the permanent MAC address out of the EEPROM */
722 if (e1000_read_mac_addr(hw) < 0) {
723 device_printf(dev, "EEPROM read error while reading MAC"
724 " address\n");
725 error = EIO;
726 goto err_late;
727 }
728
729 if (!em_is_valid_ether_addr(hw->mac.addr)) {
730 device_printf(dev, "Invalid MAC address\n");
731 error = EIO;
732 goto err_late;
733 }
734
735 /* Disable ULP support */
736 e1000_disable_ulp_lpt_lp(hw, TRUE);
737
738 /*
739 ** Do interrupt configuration
740 */
741 if (adapter->msix > 1) /* Do MSIX */
742 error = em_allocate_msix(adapter);
743 else /* MSI or Legacy */
744 error = em_allocate_legacy(adapter);
745 if (error)
746 goto err_late;
747
748 /*
749 * Get Wake-on-Lan and Management info for later use
750 */
751 em_get_wakeup(dev);
752
753 /* Setup OS specific network interface */
754 if (em_setup_interface(dev, adapter) != 0)
755 goto err_late;
756
757 em_reset(adapter);
758
759 /* Initialize statistics */
760 em_update_stats_counters(adapter);
761
762 hw->mac.get_link_status = 1;
763 em_update_link_status(adapter);
764
765 /* Register for VLAN events */
766 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
767 em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
768 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
769 em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
770
771 em_add_hw_stats(adapter);
772
773 /* Non-AMT based hardware can now take control from firmware */
774 if (adapter->has_manage && !adapter->has_amt)
775 em_get_hw_control(adapter);
776
777 /* Tell the stack that the interface is not active */
778 if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
779
780 adapter->led_dev = led_create(em_led_func, adapter,
781 device_get_nameunit(dev));
782#ifdef DEV_NETMAP
783 em_netmap_attach(adapter);
784#endif /* DEV_NETMAP */
785
786 INIT_DEBUGOUT("em_attach: end");
787
788 return (0);
789
790err_late:
791 em_free_transmit_structures(adapter);
792 em_free_receive_structures(adapter);
793 em_release_hw_control(adapter);
794 if (adapter->ifp != (void *)NULL)
795 if_free(adapter->ifp);
796err_pci:
797 em_free_pci_resources(adapter);
798 free(adapter->mta, M_DEVBUF);
799 EM_CORE_LOCK_DESTROY(adapter);
800
801 return (error);
802}
803
804/*********************************************************************
805 * Device removal routine
806 *
807 * The detach entry point is called when the driver is being removed.
808 * This routine stops the adapter and deallocates all the resources
809 * that were allocated for driver operation.
810 *
811 * return 0 on success, positive on failure
812 *********************************************************************/
813
814static int
815em_detach(device_t dev)
816{
817 struct adapter *adapter = device_get_softc(dev);
818 if_t ifp = adapter->ifp;
819
820 INIT_DEBUGOUT("em_detach: begin");
821
822 /* Make sure VLANS are not using driver */
823 if (if_vlantrunkinuse(ifp)) {
824 device_printf(dev,"Vlan in use, detach first\n");
825 return (EBUSY);
826 }
827
828#ifdef DEVICE_POLLING
829 if (if_getcapenable(ifp) & IFCAP_POLLING)
830 ether_poll_deregister(ifp);
831#endif
832
833 if (adapter->led_dev != NULL)
834 led_destroy(adapter->led_dev);
835
836 EM_CORE_LOCK(adapter);
837 adapter->in_detach = 1;
838 em_stop(adapter);
839 EM_CORE_UNLOCK(adapter);
840 EM_CORE_LOCK_DESTROY(adapter);
841
842 e1000_phy_hw_reset(&adapter->hw);
843
844 em_release_manageability(adapter);
845 em_release_hw_control(adapter);
846
847 /* Unregister VLAN events */
848 if (adapter->vlan_attach != NULL)
849 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
850 if (adapter->vlan_detach != NULL)
851 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
852
853 ether_ifdetach(adapter->ifp);
854 callout_drain(&adapter->timer);
855
856#ifdef DEV_NETMAP
857 netmap_detach(ifp);
858#endif /* DEV_NETMAP */
859
860 em_free_pci_resources(adapter);
861 bus_generic_detach(dev);
862 if_free(ifp);
863
864 em_free_transmit_structures(adapter);
865 em_free_receive_structures(adapter);
866
867 em_release_hw_control(adapter);
868 free(adapter->mta, M_DEVBUF);
869
870 return (0);
871}
872
873/*********************************************************************
874 *
875 * Shutdown entry point
876 *
877 **********************************************************************/
878
879static int
880em_shutdown(device_t dev)
881{
882 return em_suspend(dev);
883}
884
885/*
886 * Suspend/resume device methods.
887 */
888static int
889em_suspend(device_t dev)
890{
891 struct adapter *adapter = device_get_softc(dev);
892
893 EM_CORE_LOCK(adapter);
894
895 em_release_manageability(adapter);
896 em_release_hw_control(adapter);
897 em_enable_wakeup(dev);
898
899 EM_CORE_UNLOCK(adapter);
900
901 return bus_generic_suspend(dev);
902}
903
904static int
905em_resume(device_t dev)
906{
907 struct adapter *adapter = device_get_softc(dev);
908 struct tx_ring *txr = adapter->tx_rings;
909 if_t ifp = adapter->ifp;
910
911 EM_CORE_LOCK(adapter);
912 if (adapter->hw.mac.type == e1000_pch2lan)
913 e1000_resume_workarounds_pchlan(&adapter->hw);
914 em_init_locked(adapter);
915 em_init_manageability(adapter);
916
917 if ((if_getflags(ifp) & IFF_UP) &&
918 (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
919 for (int i = 0; i < adapter->num_queues; i++, txr++) {
920 EM_TX_LOCK(txr);
921#ifdef EM_MULTIQUEUE
922 if (!drbr_empty(ifp, txr->br))
923 em_mq_start_locked(ifp, txr);
924#else
925 if (!if_sendq_empty(ifp))
926 em_start_locked(ifp, txr);
927#endif
928 EM_TX_UNLOCK(txr);
929 }
930 }
931 EM_CORE_UNLOCK(adapter);
932
933 return bus_generic_resume(dev);
934}
935
936
937#ifndef EM_MULTIQUEUE
938static void
939em_start_locked(if_t ifp, struct tx_ring *txr)
940{
941 struct adapter *adapter = if_getsoftc(ifp);
942 struct mbuf *m_head;
943
944 EM_TX_LOCK_ASSERT(txr);
945
946 if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
947 IFF_DRV_RUNNING)
948 return;
949
950 if (!adapter->link_active)
951 return;
952
953 while (!if_sendq_empty(ifp)) {
954 /* Call cleanup if number of TX descriptors low */
955 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
956 em_txeof(txr);
957 if (txr->tx_avail < EM_MAX_SCATTER) {
958 if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
959 break;
960 }
961 m_head = if_dequeue(ifp);
962 if (m_head == NULL)
963 break;
964 /*
965 * Encapsulation can modify our pointer, and or make it
966 * NULL on failure. In that event, we can't requeue.
967 */
968 if (em_xmit(txr, &m_head)) {
969 if (m_head == NULL)
970 break;
971 if_sendq_prepend(ifp, m_head);
972 break;
973 }
974
975 /* Mark the queue as having work */
976 if (txr->busy == EM_TX_IDLE)
977 txr->busy = EM_TX_BUSY;
978
979 /* Send a copy of the frame to the BPF listener */
980 ETHER_BPF_MTAP(ifp, m_head);
981
982 }
983
984 return;
985}
986
987static void
988em_start(if_t ifp)
989{
990 struct adapter *adapter = if_getsoftc(ifp);
991 struct tx_ring *txr = adapter->tx_rings;
992
993 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
994 EM_TX_LOCK(txr);
995 em_start_locked(ifp, txr);
996 EM_TX_UNLOCK(txr);
997 }
998 return;
999}
1000#else /* EM_MULTIQUEUE */
1001/*********************************************************************
1002 * Multiqueue Transmit routines
1003 *
1004 * em_mq_start is called by the stack to initiate a transmit.
1005 * however, if busy the driver can queue the request rather
1006 * than do an immediate send. It is this that is an advantage
1007 * in this driver, rather than also having multiple tx queues.
1008 **********************************************************************/
1009/*
1010** Multiqueue capable stack interface
1011*/
1012static int
1013em_mq_start(if_t ifp, struct mbuf *m)
1014{
1015 struct adapter *adapter = if_getsoftc(ifp);
1016 struct tx_ring *txr = adapter->tx_rings;
1017 unsigned int i, error;
1018
1019 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1020 i = m->m_pkthdr.flowid % adapter->num_queues;
1021 else
1022 i = curcpu % adapter->num_queues;
1023
1024 txr = &adapter->tx_rings[i];
1025
1026 error = drbr_enqueue(ifp, txr->br, m);
1027 if (error)
1028 return (error);
1029
1030 if (EM_TX_TRYLOCK(txr)) {
1031 em_mq_start_locked(ifp, txr);
1032 EM_TX_UNLOCK(txr);
1033 } else
1034 taskqueue_enqueue(txr->tq, &txr->tx_task);
1035
1036 return (0);
1037}
1038
1039static int
1040em_mq_start_locked(if_t ifp, struct tx_ring *txr)
1041{
1042 struct adapter *adapter = txr->adapter;
1043 struct mbuf *next;
1044 int err = 0, enq = 0;
1045
1046 EM_TX_LOCK_ASSERT(txr);
1047
1048 if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
1049 adapter->link_active == 0) {
1050 return (ENETDOWN);
1051 }
1052
1053 /* Process the queue */
1054 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1055 if ((err = em_xmit(txr, &next)) != 0) {
1056 if (next == NULL) {
1057 /* It was freed, move forward */
1058 drbr_advance(ifp, txr->br);
1059 } else {
1060 /*
1061 * Still have one left, it may not be
1062 * the same since the transmit function
1063 * may have changed it.
1064 */
1065 drbr_putback(ifp, txr->br, next);
1066 }
1067 break;
1068 }
1069 drbr_advance(ifp, txr->br);
1070 enq++;
1071 if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
1072 if (next->m_flags & M_MCAST)
1073 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1074 ETHER_BPF_MTAP(ifp, next);
1075 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
1076 break;
1077 }
1078
1079 /* Mark the queue as having work */
1080 if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1081 txr->busy = EM_TX_BUSY;
1082
1083 if (txr->tx_avail < EM_MAX_SCATTER)
1084 em_txeof(txr);
1085 if (txr->tx_avail < EM_MAX_SCATTER) {
1086 if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
1087 }
1088 return (err);
1089}
1090
1091/*
1092** Flush all ring buffers
1093*/
1094static void
1095em_qflush(if_t ifp)
1096{
1097 struct adapter *adapter = if_getsoftc(ifp);
1098 struct tx_ring *txr = adapter->tx_rings;
1099 struct mbuf *m;
1100
1101 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1102 EM_TX_LOCK(txr);
1103 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1104 m_freem(m);
1105 EM_TX_UNLOCK(txr);
1106 }
1107 if_qflush(ifp);
1108}
1109#endif /* EM_MULTIQUEUE */
1110
1111/*********************************************************************
1112 * Ioctl entry point
1113 *
1114 * em_ioctl is called when the user wants to configure the
1115 * interface.
1116 *
1117 * return 0 on success, positive on failure
1118 **********************************************************************/
1119
1120static int
1121em_ioctl(if_t ifp, u_long command, caddr_t data)
1122{
1123 struct adapter *adapter = if_getsoftc(ifp);
1124 struct ifreq *ifr = (struct ifreq *)data;
1125#if defined(INET) || defined(INET6)
1126 struct ifaddr *ifa = (struct ifaddr *)data;
1127#endif
1128 bool avoid_reset = FALSE;
1129 int error = 0;
1130
1131 if (adapter->in_detach)
1132 return (error);
1133
1134 switch (command) {
1135 case SIOCSIFADDR:
1136#ifdef INET
1137 if (ifa->ifa_addr->sa_family == AF_INET)
1138 avoid_reset = TRUE;
1139#endif
1140#ifdef INET6
1141 if (ifa->ifa_addr->sa_family == AF_INET6)
1142 avoid_reset = TRUE;
1143#endif
1144 /*
1145 ** Calling init results in link renegotiation,
1146 ** so we avoid doing it when possible.
1147 */
1148 if (avoid_reset) {
1149 if_setflagbits(ifp,IFF_UP,0);
1150 if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1151 em_init(adapter);
1152#ifdef INET
1153 if (!(if_getflags(ifp) & IFF_NOARP))
1154 arp_ifinit(ifp, ifa);
1155#endif
1156 } else
1157 error = ether_ioctl(ifp, command, data);
1158 break;
1159 case SIOCSIFMTU:
1160 {
1161 int max_frame_size;
1162
1163 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1164
1165 EM_CORE_LOCK(adapter);
1166 switch (adapter->hw.mac.type) {
1167 case e1000_82571:
1168 case e1000_82572:
1169 case e1000_ich9lan:
1170 case e1000_ich10lan:
1171 case e1000_pch2lan:
1172 case e1000_pch_lpt:
1173 case e1000_82574:
1174 case e1000_82583:
1175 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1176 max_frame_size = 9234;
1177 break;
1178 case e1000_pchlan:
1179 max_frame_size = 4096;
1180 break;
1181 /* Adapters that do not support jumbo frames */
1182 case e1000_ich8lan:
1183 max_frame_size = ETHER_MAX_LEN;
1184 break;
1185 default:
1186 max_frame_size = MAX_JUMBO_FRAME_SIZE;
1187 }
1188 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1189 ETHER_CRC_LEN) {
1190 EM_CORE_UNLOCK(adapter);
1191 error = EINVAL;
1192 break;
1193 }
1194
1195 if_setmtu(ifp, ifr->ifr_mtu);
1196 adapter->hw.mac.max_frame_size =
1197 if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1198 em_init_locked(adapter);
1199 EM_CORE_UNLOCK(adapter);
1200 break;
1201 }
1202 case SIOCSIFFLAGS:
1203 IOCTL_DEBUGOUT("ioctl rcv'd:\
1204 SIOCSIFFLAGS (Set Interface Flags)");
1205 EM_CORE_LOCK(adapter);
1206 if (if_getflags(ifp) & IFF_UP) {
1207 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1208 if ((if_getflags(ifp) ^ adapter->if_flags) &
1209 (IFF_PROMISC | IFF_ALLMULTI)) {
1210 em_disable_promisc(adapter);
1211 em_set_promisc(adapter);
1212 }
1213 } else
1214 em_init_locked(adapter);
1215 } else
1216 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1217 em_stop(adapter);
1218 adapter->if_flags = if_getflags(ifp);
1219 EM_CORE_UNLOCK(adapter);
1220 break;
1221 case SIOCADDMULTI:
1222 case SIOCDELMULTI:
1223 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1224 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1225 EM_CORE_LOCK(adapter);
1226 em_disable_intr(adapter);
1227 em_set_multi(adapter);
1228#ifdef DEVICE_POLLING
1229 if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1230#endif
1231 em_enable_intr(adapter);
1232 EM_CORE_UNLOCK(adapter);
1233 }
1234 break;
1235 case SIOCSIFMEDIA:
1236 /* Check SOL/IDER usage */
1237 EM_CORE_LOCK(adapter);
1238 if (e1000_check_reset_block(&adapter->hw)) {
1239 EM_CORE_UNLOCK(adapter);
1240 device_printf(adapter->dev, "Media change is"
1241 " blocked due to SOL/IDER session.\n");
1242 break;
1243 }
1244 EM_CORE_UNLOCK(adapter);
1245 /* falls thru */
1246 case SIOCGIFMEDIA:
1247 IOCTL_DEBUGOUT("ioctl rcv'd: \
1248 SIOCxIFMEDIA (Get/Set Interface Media)");
1249 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1250 break;
1251 case SIOCSIFCAP:
1252 {
1253 int mask, reinit;
1254
1255 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1256 reinit = 0;
1257 mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1258#ifdef DEVICE_POLLING
1259 if (mask & IFCAP_POLLING) {
1260 if (ifr->ifr_reqcap & IFCAP_POLLING) {
1261 error = ether_poll_register(em_poll, ifp);
1262 if (error)
1263 return (error);
1264 EM_CORE_LOCK(adapter);
1265 em_disable_intr(adapter);
1266 if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1267 EM_CORE_UNLOCK(adapter);
1268 } else {
1269 error = ether_poll_deregister(ifp);
1270 /* Enable interrupt even in error case */
1271 EM_CORE_LOCK(adapter);
1272 em_enable_intr(adapter);
1273 if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1274 EM_CORE_UNLOCK(adapter);
1275 }
1276 }
1277#endif
1278 if (mask & IFCAP_HWCSUM) {
1279 if_togglecapenable(ifp,IFCAP_HWCSUM);
1280 reinit = 1;
1281 }
1282 if (mask & IFCAP_TSO4) {
1283 if_togglecapenable(ifp,IFCAP_TSO4);
1284 reinit = 1;
1285 }
1286 if (mask & IFCAP_VLAN_HWTAGGING) {
1287 if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1288 reinit = 1;
1289 }
1290 if (mask & IFCAP_VLAN_HWFILTER) {
1291 if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1292 reinit = 1;
1293 }
1294 if (mask & IFCAP_VLAN_HWTSO) {
1295 if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1296 reinit = 1;
1297 }
1298 if ((mask & IFCAP_WOL) &&
1299 (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1300 if (mask & IFCAP_WOL_MCAST)
1301 if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1302 if (mask & IFCAP_WOL_MAGIC)
1303 if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1304 }
1305 if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1306 em_init(adapter);
1307 if_vlancap(ifp);
1308 break;
1309 }
1310
1311 default:
1312 error = ether_ioctl(ifp, command, data);
1313 break;
1314 }
1315
1316 return (error);
1317}
1318
1319
1320/*********************************************************************
1321 * Init entry point
1322 *
1323 * This routine is used in two ways. It is used by the stack as
1324 * init entry point in network interface structure. It is also used
1325 * by the driver as a hw/sw initialization routine to get to a
1326 * consistent state.
1327 *
1328 * return 0 on success, positive on failure
1329 **********************************************************************/
1330
1331static void
1332em_init_locked(struct adapter *adapter)
1333{
1334 if_t ifp = adapter->ifp;
1335 device_t dev = adapter->dev;
1336
1337 INIT_DEBUGOUT("em_init: begin");
1338
1339 EM_CORE_LOCK_ASSERT(adapter);
1340
1341 em_disable_intr(adapter);
1342 callout_stop(&adapter->timer);
1343
1344 /* Get the latest mac address, User can use a LAA */
1345 bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1346 ETHER_ADDR_LEN);
1347
1348 /* Put the address into the Receive Address Array */
1349 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1350
1351 /*
1352 * With the 82571 adapter, RAR[0] may be overwritten
1353 * when the other port is reset, we make a duplicate
1354 * in RAR[14] for that eventuality, this assures
1355 * the interface continues to function.
1356 */
1357 if (adapter->hw.mac.type == e1000_82571) {
1358 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1359 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1360 E1000_RAR_ENTRIES - 1);
1361 }
1362
1363 /* Initialize the hardware */
1364 em_reset(adapter);
1365 em_update_link_status(adapter);
1366
1367 /* Setup VLAN support, basic and offload if available */
1368 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1369
1370 /* Set hardware offload abilities */
1371 if_clearhwassist(ifp);
1372 if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1373 if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1374 /*
1375 ** There have proven to be problems with TSO when not
1376 ** at full gigabit speed, so disable the assist automatically
1377 ** when at lower speeds. -jfv
1378 */
1379 if (if_getcapenable(ifp) & IFCAP_TSO4) {
1380 if (adapter->link_speed == SPEED_1000)
1381 if_sethwassistbits(ifp, CSUM_TSO, 0);
1382 }
1383
1384 /* Configure for OS presence */
1385 em_init_manageability(adapter);
1386
1387 /* Prepare transmit descriptors and buffers */
1388 em_setup_transmit_structures(adapter);
1389 em_initialize_transmit_unit(adapter);
1390
1391 /* Setup Multicast table */
1392 em_set_multi(adapter);
1393
1394 /*
1395 ** Figure out the desired mbuf
1396 ** pool for doing jumbos
1397 */
1398 if (adapter->hw.mac.max_frame_size <= 2048)
1399 adapter->rx_mbuf_sz = MCLBYTES;
1400 else if (adapter->hw.mac.max_frame_size <= 4096)
1401 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1402 else
1403 adapter->rx_mbuf_sz = MJUM9BYTES;
1404
1405 /* Prepare receive descriptors and buffers */
1406 if (em_setup_receive_structures(adapter)) {
1407 device_printf(dev, "Could not setup receive structures\n");
1408 em_stop(adapter);
1409 return;
1410 }
1411 em_initialize_receive_unit(adapter);
1412
1413 /* Use real VLAN Filter support? */
1414 if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1415 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1416 /* Use real VLAN Filter support */
1417 em_setup_vlan_hw_support(adapter);
1418 else {
1419 u32 ctrl;
1420 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1421 ctrl |= E1000_CTRL_VME;
1422 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1423 }
1424 }
1425
1426 /* Don't lose promiscuous settings */
1427 em_set_promisc(adapter);
1428
1429 /* Set the interface as ACTIVE */
1430 if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1431
1432 callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1433 e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1434
1435 /* MSI/X configuration for 82574 */
1436 if (adapter->hw.mac.type == e1000_82574) {
1437 int tmp;
1438 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1439 tmp |= E1000_CTRL_EXT_PBA_CLR;
1440 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1441 /* Set the IVAR - interrupt vector routing. */
1442 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1443 }
1444
1445#ifdef DEVICE_POLLING
1446 /*
1447 * Only enable interrupts if we are not polling, make sure
1448 * they are off otherwise.
1449 */
1450 if (if_getcapenable(ifp) & IFCAP_POLLING)
1451 em_disable_intr(adapter);
1452 else
1453#endif /* DEVICE_POLLING */
1454 em_enable_intr(adapter);
1455
1456 /* AMT based hardware can now take control from firmware */
1457 if (adapter->has_manage && adapter->has_amt)
1458 em_get_hw_control(adapter);
1459}
1460
1461static void
1462em_init(void *arg)
1463{
1464 struct adapter *adapter = arg;
1465
1466 EM_CORE_LOCK(adapter);
1467 em_init_locked(adapter);
1468 EM_CORE_UNLOCK(adapter);
1469}
1470
1471
1472#ifdef DEVICE_POLLING
1473/*********************************************************************
1474 *
1475 * Legacy polling routine: note this only works with single queue
1476 *
1477 *********************************************************************/
1478static int
1479em_poll(if_t ifp, enum poll_cmd cmd, int count)
1480{
1481 struct adapter *adapter = if_getsoftc(ifp);
1482 struct tx_ring *txr = adapter->tx_rings;
1483 struct rx_ring *rxr = adapter->rx_rings;
1484 u32 reg_icr;
1485 int rx_done;
1486
1487 EM_CORE_LOCK(adapter);
1488 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1489 EM_CORE_UNLOCK(adapter);
1490 return (0);
1491 }
1492
1493 if (cmd == POLL_AND_CHECK_STATUS) {
1494 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1495 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1496 callout_stop(&adapter->timer);
1497 adapter->hw.mac.get_link_status = 1;
1498 em_update_link_status(adapter);
1499 callout_reset(&adapter->timer, hz,
1500 em_local_timer, adapter);
1501 }
1502 }
1503 EM_CORE_UNLOCK(adapter);
1504
1505 em_rxeof(rxr, count, &rx_done);
1506
1507 EM_TX_LOCK(txr);
1508 em_txeof(txr);
1509#ifdef EM_MULTIQUEUE
1510 if (!drbr_empty(ifp, txr->br))
1511 em_mq_start_locked(ifp, txr);
1512#else
1513 if (!if_sendq_empty(ifp))
1514 em_start_locked(ifp, txr);
1515#endif
1516 EM_TX_UNLOCK(txr);
1517
1518 return (rx_done);
1519}
1520#endif /* DEVICE_POLLING */
1521
1522
1523/*********************************************************************
1524 *
1525 * Fast Legacy/MSI Combined Interrupt Service routine
1526 *
1527 *********************************************************************/
1528static int
1529em_irq_fast(void *arg)
1530{
1531 struct adapter *adapter = arg;
1532 if_t ifp;
1533 u32 reg_icr;
1534
1535 ifp = adapter->ifp;
1536
1537 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1538
1539 /* Hot eject? */
1540 if (reg_icr == 0xffffffff)
1541 return FILTER_STRAY;
1542
1543 /* Definitely not our interrupt. */
1544 if (reg_icr == 0x0)
1545 return FILTER_STRAY;
1546
1547 /*
1548 * Starting with the 82571 chip, bit 31 should be used to
1549 * determine whether the interrupt belongs to us.
1550 */
1551 if (adapter->hw.mac.type >= e1000_82571 &&
1552 (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1553 return FILTER_STRAY;
1554
1555 em_disable_intr(adapter);
1556 taskqueue_enqueue(adapter->tq, &adapter->que_task);
1557
1558 /* Link status change */
1559 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1560 adapter->hw.mac.get_link_status = 1;
1561 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1562 }
1563
1564 if (reg_icr & E1000_ICR_RXO)
1565 adapter->rx_overruns++;
1566 return FILTER_HANDLED;
1567}
1568
1569/* Combined RX/TX handler, used by Legacy and MSI */
1570static void
1571em_handle_que(void *context, int pending)
1572{
1573 struct adapter *adapter = context;
1574 if_t ifp = adapter->ifp;
1575 struct tx_ring *txr = adapter->tx_rings;
1576 struct rx_ring *rxr = adapter->rx_rings;
1577
1578 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1579 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1580
1581 EM_TX_LOCK(txr);
1582 em_txeof(txr);
1583#ifdef EM_MULTIQUEUE
1584 if (!drbr_empty(ifp, txr->br))
1585 em_mq_start_locked(ifp, txr);
1586#else
1587 if (!if_sendq_empty(ifp))
1588 em_start_locked(ifp, txr);
1589#endif
1590 EM_TX_UNLOCK(txr);
1591 if (more) {
1592 taskqueue_enqueue(adapter->tq, &adapter->que_task);
1593 return;
1594 }
1595 }
1596
1597 em_enable_intr(adapter);
1598 return;
1599}
1600
1601
1602/*********************************************************************
1603 *
1604 * MSIX Interrupt Service Routines
1605 *
1606 **********************************************************************/
1607static void
1608em_msix_tx(void *arg)
1609{
1610 struct tx_ring *txr = arg;
1611 struct adapter *adapter = txr->adapter;
1612 if_t ifp = adapter->ifp;
1613
1614 ++txr->tx_irq;
1615 EM_TX_LOCK(txr);
1616 em_txeof(txr);
1617#ifdef EM_MULTIQUEUE
1618 if (!drbr_empty(ifp, txr->br))
1619 em_mq_start_locked(ifp, txr);
1620#else
1621 if (!if_sendq_empty(ifp))
1622 em_start_locked(ifp, txr);
1623#endif
1624
1625 /* Reenable this interrupt */
1626 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1627 EM_TX_UNLOCK(txr);
1628 return;
1629}
1630
1631/*********************************************************************
1632 *
1633 * MSIX RX Interrupt Service routine
1634 *
1635 **********************************************************************/
1636
1637static void
1638em_msix_rx(void *arg)
1639{
1640 struct rx_ring *rxr = arg;
1641 struct adapter *adapter = rxr->adapter;
1642 bool more;
1643
1644 ++rxr->rx_irq;
1645 if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1646 return;
1647 more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1648 if (more)
1649 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1650 else {
1651 /* Reenable this interrupt */
1652 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1653 }
1654 return;
1655}
1656
1657/*********************************************************************
1658 *
1659 * MSIX Link Fast Interrupt Service routine
1660 *
1661 **********************************************************************/
1662static void
1663em_msix_link(void *arg)
1664{
1665 struct adapter *adapter = arg;
1666 u32 reg_icr;
1667
1668 ++adapter->link_irq;
1669 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1670
1671 if (reg_icr & E1000_ICR_RXO)
1672 adapter->rx_overruns++;
1673
1674 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1675 adapter->hw.mac.get_link_status = 1;
1676 em_handle_link(adapter, 0);
1677 } else
1678 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1679 EM_MSIX_LINK | E1000_IMS_LSC);
1680 /*
1681 ** Because we must read the ICR for this interrupt
1682 ** it may clear other causes using autoclear, for
1683 ** this reason we simply create a soft interrupt
1684 ** for all these vectors.
1685 */
1686 if (reg_icr) {
1687 E1000_WRITE_REG(&adapter->hw,
1688 E1000_ICS, adapter->ims);
1689 }
1690 return;
1691}
1692
1693static void
1694em_handle_rx(void *context, int pending)
1695{
1696 struct rx_ring *rxr = context;
1697 struct adapter *adapter = rxr->adapter;
1698 bool more;
1699
1700 more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1701 if (more)
1702 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1703 else {
1704 /* Reenable this interrupt */
1705 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1706 }
1707}
1708
1709static void
1710em_handle_tx(void *context, int pending)
1711{
1712 struct tx_ring *txr = context;
1713 struct adapter *adapter = txr->adapter;
1714 if_t ifp = adapter->ifp;
1715
1716 EM_TX_LOCK(txr);
1717 em_txeof(txr);
1718#ifdef EM_MULTIQUEUE
1719 if (!drbr_empty(ifp, txr->br))
1720 em_mq_start_locked(ifp, txr);
1721#else
1722 if (!if_sendq_empty(ifp))
1723 em_start_locked(ifp, txr);
1724#endif
1725 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1726 EM_TX_UNLOCK(txr);
1727}
1728
1729static void
1730em_handle_link(void *context, int pending)
1731{
1732 struct adapter *adapter = context;
1733 struct tx_ring *txr = adapter->tx_rings;
1734 if_t ifp = adapter->ifp;
1735
1736 if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1737 return;
1738
1739 EM_CORE_LOCK(adapter);
1740 callout_stop(&adapter->timer);
1741 em_update_link_status(adapter);
1742 callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1743 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1744 EM_MSIX_LINK | E1000_IMS_LSC);
1745 if (adapter->link_active) {
1746 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1747 EM_TX_LOCK(txr);
1748#ifdef EM_MULTIQUEUE
1749 if (!drbr_empty(ifp, txr->br))
1750 em_mq_start_locked(ifp, txr);
1751#else
1752 if (if_sendq_empty(ifp))
1753 em_start_locked(ifp, txr);
1754#endif
1755 EM_TX_UNLOCK(txr);
1756 }
1757 }
1758 EM_CORE_UNLOCK(adapter);
1759}
1760
1761
1762/*********************************************************************
1763 *
1764 * Media Ioctl callback
1765 *
1766 * This routine is called whenever the user queries the status of
1767 * the interface using ifconfig.
1768 *
1769 **********************************************************************/
1770static void
1771em_media_status(if_t ifp, struct ifmediareq *ifmr)
1772{
1773 struct adapter *adapter = if_getsoftc(ifp);
1774 u_char fiber_type = IFM_1000_SX;
1775
1776 INIT_DEBUGOUT("em_media_status: begin");
1777
1778 EM_CORE_LOCK(adapter);
1779 em_update_link_status(adapter);
1780
1781 ifmr->ifm_status = IFM_AVALID;
1782 ifmr->ifm_active = IFM_ETHER;
1783
1784 if (!adapter->link_active) {
1785 EM_CORE_UNLOCK(adapter);
1786 return;
1787 }
1788
1789 ifmr->ifm_status |= IFM_ACTIVE;
1790
1791 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1792 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1793 ifmr->ifm_active |= fiber_type | IFM_FDX;
1794 } else {
1795 switch (adapter->link_speed) {
1796 case 10:
1797 ifmr->ifm_active |= IFM_10_T;
1798 break;
1799 case 100:
1800 ifmr->ifm_active |= IFM_100_TX;
1801 break;
1802 case 1000:
1803 ifmr->ifm_active |= IFM_1000_T;
1804 break;
1805 }
1806 if (adapter->link_duplex == FULL_DUPLEX)
1807 ifmr->ifm_active |= IFM_FDX;
1808 else
1809 ifmr->ifm_active |= IFM_HDX;
1810 }
1811 EM_CORE_UNLOCK(adapter);
1812}
1813
1814/*********************************************************************
1815 *
1816 * Media Ioctl callback
1817 *
1818 * This routine is called when the user changes speed/duplex using
1819 * media/mediopt option with ifconfig.
1820 *
1821 **********************************************************************/
1822static int
1823em_media_change(if_t ifp)
1824{
1825 struct adapter *adapter = if_getsoftc(ifp);
1826 struct ifmedia *ifm = &adapter->media;
1827
1828 INIT_DEBUGOUT("em_media_change: begin");
1829
1830 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1831 return (EINVAL);
1832
1833 EM_CORE_LOCK(adapter);
1834 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1835 case IFM_AUTO:
1836 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1837 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1838 break;
1839 case IFM_1000_LX:
1840 case IFM_1000_SX:
1841 case IFM_1000_T:
1842 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1843 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1844 break;
1845 case IFM_100_TX:
1846 adapter->hw.mac.autoneg = FALSE;
1847 adapter->hw.phy.autoneg_advertised = 0;
1848 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1849 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1850 else
1851 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1852 break;
1853 case IFM_10_T:
1854 adapter->hw.mac.autoneg = FALSE;
1855 adapter->hw.phy.autoneg_advertised = 0;
1856 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1857 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1858 else
1859 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1860 break;
1861 default:
1862 device_printf(adapter->dev, "Unsupported media type\n");
1863 }
1864
1865 em_init_locked(adapter);
1866 EM_CORE_UNLOCK(adapter);
1867
1868 return (0);
1869}
1870
1871/*********************************************************************
1872 *
1873 * This routine maps the mbufs to tx descriptors.
1874 *
1875 * return 0 on success, positive on failure
1876 **********************************************************************/
1877
1878static int
1879em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1880{
1881 struct adapter *adapter = txr->adapter;
1882 bus_dma_segment_t segs[EM_MAX_SCATTER];
1883 bus_dmamap_t map;
1884 struct em_txbuffer *tx_buffer, *tx_buffer_mapped;
1885 struct e1000_tx_desc *ctxd = NULL;
1886 struct mbuf *m_head;
1887 struct ether_header *eh;
1888 struct ip *ip = NULL;
1889 struct tcphdr *tp = NULL;
1890 u32 txd_upper = 0, txd_lower = 0;
1891 int ip_off, poff;
1892 int nsegs, i, j, first, last = 0;
1893 int error;
1894 bool do_tso, tso_desc, remap = TRUE;
1895
1896 m_head = *m_headp;
1897 do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO);
1898 tso_desc = FALSE;
1899 ip_off = poff = 0;
1900
1901 /*
1902 * Intel recommends entire IP/TCP header length reside in a single
1903 * buffer. If multiple descriptors are used to describe the IP and
1904 * TCP header, each descriptor should describe one or more
1905 * complete headers; descriptors referencing only parts of headers
1906 * are not supported. If all layer headers are not coalesced into
1907 * a single buffer, each buffer should not cross a 4KB boundary,
1908 * or be larger than the maximum read request size.
1909 * Controller also requires modifing IP/TCP header to make TSO work
1910 * so we firstly get a writable mbuf chain then coalesce ethernet/
1911 * IP/TCP header into a single buffer to meet the requirement of
1912 * controller. This also simplifies IP/TCP/UDP checksum offloading
1913 * which also has similiar restrictions.
1914 */
1915 if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1916 if (do_tso || (m_head->m_next != NULL &&
1917 m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1918 if (M_WRITABLE(*m_headp) == 0) {
1919 m_head = m_dup(*m_headp, M_NOWAIT);
1920 m_freem(*m_headp);
1921 if (m_head == NULL) {
1922 *m_headp = NULL;
1923 return (ENOBUFS);
1924 }
1925 *m_headp = m_head;
1926 }
1927 }
1928 /*
1929 * XXX
1930 * Assume IPv4, we don't have TSO/checksum offload support
1931 * for IPv6 yet.
1932 */
1933 ip_off = sizeof(struct ether_header);
1934 if (m_head->m_len < ip_off) {
1935 m_head = m_pullup(m_head, ip_off);
1936 if (m_head == NULL) {
1937 *m_headp = NULL;
1938 return (ENOBUFS);
1939 }
1940 }
1941 eh = mtod(m_head, struct ether_header *);
1942 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1943 ip_off = sizeof(struct ether_vlan_header);
1944 if (m_head->m_len < ip_off) {
1945 m_head = m_pullup(m_head, ip_off);
1946 if (m_head == NULL) {
1947 *m_headp = NULL;
1948 return (ENOBUFS);
1949 }
1950 }
1951 }
1952 if (m_head->m_len < ip_off + sizeof(struct ip)) {
1953 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1954 if (m_head == NULL) {
1955 *m_headp = NULL;
1956 return (ENOBUFS);
1957 }
1958 }
1959 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1960 poff = ip_off + (ip->ip_hl << 2);
1961
1962 if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
1963 if (m_head->m_len < poff + sizeof(struct tcphdr)) {
1964 m_head = m_pullup(m_head, poff +
1965 sizeof(struct tcphdr));
1966 if (m_head == NULL) {
1967 *m_headp = NULL;
1968 return (ENOBUFS);
1969 }
1970 }
1971 tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1972 /*
1973 * TSO workaround:
1974 * pull 4 more bytes of data into it.
1975 */
1976 if (m_head->m_len < poff + (tp->th_off << 2)) {
1977 m_head = m_pullup(m_head, poff +
1978 (tp->th_off << 2) +
1979 TSO_WORKAROUND);
1980 if (m_head == NULL) {
1981 *m_headp = NULL;
1982 return (ENOBUFS);
1983 }
1984 }
1985 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1986 tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1987 if (do_tso) {
1988 ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
1989 (ip->ip_hl << 2) +
1990 (tp->th_off << 2));
1991 ip->ip_sum = 0;
1992 /*
1993 * The pseudo TCP checksum does not include TCP
1994 * payload length so driver should recompute
1995 * the checksum here what hardware expect to
1996 * see. This is adherence of Microsoft's Large
1997 * Send specification.
1998 */
1999 tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2000 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2001 }
2002 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2003 if (m_head->m_len < poff + sizeof(struct udphdr)) {
2004 m_head = m_pullup(m_head, poff +
2005 sizeof(struct udphdr));
2006 if (m_head == NULL) {
2007 *m_headp = NULL;
2008 return (ENOBUFS);
2009 }
2010 }
2011 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2012 }
2013 *m_headp = m_head;
2014 }
2015
2016 /*
2017 * Map the packet for DMA
2018 *
2019 * Capture the first descriptor index,
2020 * this descriptor will have the index
2021 * of the EOP which is the only one that
2022 * now gets a DONE bit writeback.
2023 */
2024 first = txr->next_avail_desc;
2025 tx_buffer = &txr->tx_buffers[first];
2026 tx_buffer_mapped = tx_buffer;
2027 map = tx_buffer->map;
2028
2029retry:
2030 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2031 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2032
2033 /*
2034 * There are two types of errors we can (try) to handle:
2035 * - EFBIG means the mbuf chain was too long and bus_dma ran
2036 * out of segments. Defragment the mbuf chain and try again.
2037 * - ENOMEM means bus_dma could not obtain enough bounce buffers
2038 * at this point in time. Defer sending and try again later.
2039 * All other errors, in particular EINVAL, are fatal and prevent the
2040 * mbuf chain from ever going through. Drop it and report error.
2041 */
2042 if (error == EFBIG && remap) {
2043 struct mbuf *m;
2044
2045 m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2046 if (m == NULL) {
2047 adapter->mbuf_defrag_failed++;
2048 m_freem(*m_headp);
2049 *m_headp = NULL;
2050 return (ENOBUFS);
2051 }
2052 *m_headp = m;
2053
2054 /* Try it again, but only once */
2055 remap = FALSE;
2056 goto retry;
2057 } else if (error != 0) {
2058 adapter->no_tx_dma_setup++;
2059 m_freem(*m_headp);
2060 *m_headp = NULL;
2061 return (error);
2062 }
2063
2064 /*
2065 * TSO Hardware workaround, if this packet is not
2066 * TSO, and is only a single descriptor long, and
2067 * it follows a TSO burst, then we need to add a
2068 * sentinel descriptor to prevent premature writeback.
2069 */
2070 if ((!do_tso) && (txr->tx_tso == TRUE)) {
2071 if (nsegs == 1)
2072 tso_desc = TRUE;
2073 txr->tx_tso = FALSE;
2074 }
2075
2076 if (nsegs > (txr->tx_avail - EM_MAX_SCATTER)) {
2077 txr->no_desc_avail++;
2078 bus_dmamap_unload(txr->txtag, map);
2079 return (ENOBUFS);
2080 }
2081 m_head = *m_headp;
2082
2083 /* Do hardware assists */
2084 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2085 em_tso_setup(txr, m_head, ip_off, ip, tp,
2086 &txd_upper, &txd_lower);
2087 /* we need to make a final sentinel transmit desc */
2088 tso_desc = TRUE;
2089 } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2090 em_transmit_checksum_setup(txr, m_head,
2091 ip_off, ip, &txd_upper, &txd_lower);
2092
2093 if (m_head->m_flags & M_VLANTAG) {
2094 /* Set the vlan id. */
2095 txd_upper |= htole16(if_getvtag(m_head)) << 16;
2096 /* Tell hardware to add tag */
2097 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2098 }
2099
2100 i = txr->next_avail_desc;
2101
2102 /* Set up our transmit descriptors */
2103 for (j = 0; j < nsegs; j++) {
2104 bus_size_t seg_len;
2105 bus_addr_t seg_addr;
2106
2107 tx_buffer = &txr->tx_buffers[i];
2108 ctxd = &txr->tx_base[i];
2109 seg_addr = segs[j].ds_addr;
2110 seg_len = segs[j].ds_len;
2111 /*
2112 ** TSO Workaround:
2113 ** If this is the last descriptor, we want to
2114 ** split it so we have a small final sentinel
2115 */
2116 if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2117 seg_len -= TSO_WORKAROUND;
2118 ctxd->buffer_addr = htole64(seg_addr);
2119 ctxd->lower.data = htole32(
2120 adapter->txd_cmd | txd_lower | seg_len);
2121 ctxd->upper.data = htole32(txd_upper);
2122 if (++i == adapter->num_tx_desc)
2123 i = 0;
2124
2125 /* Now make the sentinel */
2126 txr->tx_avail--;
2127 ctxd = &txr->tx_base[i];
2128 tx_buffer = &txr->tx_buffers[i];
2129 ctxd->buffer_addr =
2130 htole64(seg_addr + seg_len);
2131 ctxd->lower.data = htole32(
2132 adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2133 ctxd->upper.data =
2134 htole32(txd_upper);
2135 last = i;
2136 if (++i == adapter->num_tx_desc)
2137 i = 0;
2138 } else {
2139 ctxd->buffer_addr = htole64(seg_addr);
2140 ctxd->lower.data = htole32(
2141 adapter->txd_cmd | txd_lower | seg_len);
2142 ctxd->upper.data = htole32(txd_upper);
2143 last = i;
2144 if (++i == adapter->num_tx_desc)
2145 i = 0;
2146 }
2147 tx_buffer->m_head = NULL;
2148 tx_buffer->next_eop = -1;
2149 }
2150
2151 txr->next_avail_desc = i;
2152 txr->tx_avail -= nsegs;
2153
2154 tx_buffer->m_head = m_head;
2155 /*
2156 ** Here we swap the map so the last descriptor,
2157 ** which gets the completion interrupt has the
2158 ** real map, and the first descriptor gets the
2159 ** unused map from this descriptor.
2160 */
2161 tx_buffer_mapped->map = tx_buffer->map;
2162 tx_buffer->map = map;
2163 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2164
2165 /*
2166 * Last Descriptor of Packet
2167 * needs End Of Packet (EOP)
2168 * and Report Status (RS)
2169 */
2170 ctxd->lower.data |=
2171 htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2172 /*
2173 * Keep track in the first buffer which
2174 * descriptor will be written back
2175 */
2176 tx_buffer = &txr->tx_buffers[first];
2177 tx_buffer->next_eop = last;
2178
2179 /*
2180 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2181 * that this frame is available to transmit.
2182 */
2183 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2184 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2185 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2186
2187 return (0);
2188}
2189
2190static void
2191em_set_promisc(struct adapter *adapter)
2192{
2193 if_t ifp = adapter->ifp;
2194 u32 reg_rctl;
2195
2196 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2197
2198 if (if_getflags(ifp) & IFF_PROMISC) {
2199 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2200 /* Turn this on if you want to see bad packets */
2201 if (em_debug_sbp)
2202 reg_rctl |= E1000_RCTL_SBP;
2203 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2204 } else if (if_getflags(ifp) & IFF_ALLMULTI) {
2205 reg_rctl |= E1000_RCTL_MPE;
2206 reg_rctl &= ~E1000_RCTL_UPE;
2207 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2208 }
2209}
2210
2211static void
2212em_disable_promisc(struct adapter *adapter)
2213{
2214 if_t ifp = adapter->ifp;
2215 u32 reg_rctl;
2216 int mcnt = 0;
2217
2218 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2219 reg_rctl &= (~E1000_RCTL_UPE);
2220 if (if_getflags(ifp) & IFF_ALLMULTI)
2221 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2222 else
2223 mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2224 /* Don't disable if in MAX groups */
2225 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2226 reg_rctl &= (~E1000_RCTL_MPE);
2227 reg_rctl &= (~E1000_RCTL_SBP);
2228 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2229}
2230
2231
2232/*********************************************************************
2233 * Multicast Update
2234 *
2235 * This routine is called whenever multicast address list is updated.
2236 *
2237 **********************************************************************/
2238
2239static void
2240em_set_multi(struct adapter *adapter)
2241{
2242 if_t ifp = adapter->ifp;
2243 u32 reg_rctl = 0;
2244 u8 *mta; /* Multicast array memory */
2245 int mcnt = 0;
2246
2247 IOCTL_DEBUGOUT("em_set_multi: begin");
2248
2249 mta = adapter->mta;
2250 bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2251
2252 if (adapter->hw.mac.type == e1000_82542 &&
2253 adapter->hw.revision_id == E1000_REVISION_2) {
2254 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2255 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2256 e1000_pci_clear_mwi(&adapter->hw);
2257 reg_rctl |= E1000_RCTL_RST;
2258 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2259 msec_delay(5);
2260 }
2261
2262 if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2263
2264 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2265 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2266 reg_rctl |= E1000_RCTL_MPE;
2267 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2268 } else
2269 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2270
2271 if (adapter->hw.mac.type == e1000_82542 &&
2272 adapter->hw.revision_id == E1000_REVISION_2) {
2273 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2274 reg_rctl &= ~E1000_RCTL_RST;
2275 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2276 msec_delay(5);
2277 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2278 e1000_pci_set_mwi(&adapter->hw);
2279 }
2280}
2281
2282
2283/*********************************************************************
2284 * Timer routine
2285 *
2286 * This routine checks for link status and updates statistics.
2287 *
2288 **********************************************************************/
2289
2290static void
2291em_local_timer(void *arg)
2292{
2293 struct adapter *adapter = arg;
2294 if_t ifp = adapter->ifp;
2295 struct tx_ring *txr = adapter->tx_rings;
2296 struct rx_ring *rxr = adapter->rx_rings;
2297 u32 trigger = 0;
2298
2299 EM_CORE_LOCK_ASSERT(adapter);
2300
2301 em_update_link_status(adapter);
2302 em_update_stats_counters(adapter);
2303
2304 /* Reset LAA into RAR[0] on 82571 */
2305 if ((adapter->hw.mac.type == e1000_82571) &&
2306 e1000_get_laa_state_82571(&adapter->hw))
2307 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2308
2309 /* Mask to use in the irq trigger */
2310 if (adapter->msix_mem) {
2311 for (int i = 0; i < adapter->num_queues; i++, rxr++)
2312 trigger |= rxr->ims;
2313 rxr = adapter->rx_rings;
2314 } else
2315 trigger = E1000_ICS_RXDMT0;
2316
2317 /*
2318 ** Check on the state of the TX queue(s), this
2319 ** can be done without the lock because its RO
2320 ** and the HUNG state will be static if set.
2321 */
2322 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2323 if (txr->busy == EM_TX_HUNG)
2324 goto hung;
2325 if (txr->busy >= EM_TX_MAXTRIES)
2326 txr->busy = EM_TX_HUNG;
2327 /* Schedule a TX tasklet if needed */
2328 if (txr->tx_avail <= EM_MAX_SCATTER)
2329 taskqueue_enqueue(txr->tq, &txr->tx_task);
2330 }
2331
2332 callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2333#ifndef DEVICE_POLLING
2334 /* Trigger an RX interrupt to guarantee mbuf refresh */
2335 E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2336#endif
2337 return;
2338hung:
2339 /* Looks like we're hung */
2340 device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2341 txr->me);
2342 em_print_debug_info(adapter);
2343 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2344 adapter->watchdog_events++;
2345 em_init_locked(adapter);
2346}
2347
2348
2349static void
2350em_update_link_status(struct adapter *adapter)
2351{
2352 struct e1000_hw *hw = &adapter->hw;
2353 if_t ifp = adapter->ifp;
2354 device_t dev = adapter->dev;
2355 struct tx_ring *txr = adapter->tx_rings;
2356 u32 link_check = 0;
2357
2358 /* Get the cached link value or read phy for real */
2359 switch (hw->phy.media_type) {
2360 case e1000_media_type_copper:
2361 if (hw->mac.get_link_status) {
2362 /* Do the work to read phy */
2363 e1000_check_for_link(hw);
2364 link_check = !hw->mac.get_link_status;
2365 if (link_check) /* ESB2 fix */
2366 e1000_cfg_on_link_up(hw);
2367 } else
2368 link_check = TRUE;
2369 break;
2370 case e1000_media_type_fiber:
2371 e1000_check_for_link(hw);
2372 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2373 E1000_STATUS_LU);
2374 break;
2375 case e1000_media_type_internal_serdes:
2376 e1000_check_for_link(hw);
2377 link_check = adapter->hw.mac.serdes_has_link;
2378 break;
2379 default:
2380 case e1000_media_type_unknown:
2381 break;
2382 }
2383
2384 /* Now check for a transition */
2385 if (link_check && (adapter->link_active == 0)) {
2386 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2387 &adapter->link_duplex);
2388 /* Check if we must disable SPEED_MODE bit on PCI-E */
2389 if ((adapter->link_speed != SPEED_1000) &&
2390 ((hw->mac.type == e1000_82571) ||
2391 (hw->mac.type == e1000_82572))) {
2392 int tarc0;
2393 tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2394 tarc0 &= ~TARC_SPEED_MODE_BIT;
2395 E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2396 }
2397 if (bootverbose)
2398 device_printf(dev, "Link is up %d Mbps %s\n",
2399 adapter->link_speed,
2400 ((adapter->link_duplex == FULL_DUPLEX) ?
2401 "Full Duplex" : "Half Duplex"));
2402 adapter->link_active = 1;
2403 adapter->smartspeed = 0;
2404 if_setbaudrate(ifp, adapter->link_speed * 1000000);
2405 if_link_state_change(ifp, LINK_STATE_UP);
2406 } else if (!link_check && (adapter->link_active == 1)) {
2407 if_setbaudrate(ifp, 0);
2408 adapter->link_speed = 0;
2409 adapter->link_duplex = 0;
2410 if (bootverbose)
2411 device_printf(dev, "Link is Down\n");
2412 adapter->link_active = 0;
2413 /* Link down, disable hang detection */
2414 for (int i = 0; i < adapter->num_queues; i++, txr++)
2415 txr->busy = EM_TX_IDLE;
2416 if_link_state_change(ifp, LINK_STATE_DOWN);
2417 }
2418}
2419
2420/*********************************************************************
2421 *
2422 * This routine disables all traffic on the adapter by issuing a
2423 * global reset on the MAC and deallocates TX/RX buffers.
2424 *
2425 * This routine should always be called with BOTH the CORE
2426 * and TX locks.
2427 **********************************************************************/
2428
2429static void
2430em_stop(void *arg)
2431{
2432 struct adapter *adapter = arg;
2433 if_t ifp = adapter->ifp;
2434 struct tx_ring *txr = adapter->tx_rings;
2435
2436 EM_CORE_LOCK_ASSERT(adapter);
2437
2438 INIT_DEBUGOUT("em_stop: begin");
2439
2440 em_disable_intr(adapter);
2441 callout_stop(&adapter->timer);
2442
2443 /* Tell the stack that the interface is no longer active */
2444 if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2445
2446 /* Disarm Hang Detection. */
2447 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2448 EM_TX_LOCK(txr);
2449 txr->busy = EM_TX_IDLE;
2450 EM_TX_UNLOCK(txr);
2451 }
2452
2453 e1000_reset_hw(&adapter->hw);
2454 E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2455
2456 e1000_led_off(&adapter->hw);
2457 e1000_cleanup_led(&adapter->hw);
2458}
2459
2460
2461/*********************************************************************
2462 *
2463 * Determine hardware revision.
2464 *
2465 **********************************************************************/
2466static void
2467em_identify_hardware(struct adapter *adapter)
2468{
2469 device_t dev = adapter->dev;
2470
2471 /* Make sure our PCI config space has the necessary stuff set */
2472 pci_enable_busmaster(dev);
2473 adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2474
2475 /* Save off the information about this board */
2476 adapter->hw.vendor_id = pci_get_vendor(dev);
2477 adapter->hw.device_id = pci_get_device(dev);
2478 adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2479 adapter->hw.subsystem_vendor_id =
2480 pci_read_config(dev, PCIR_SUBVEND_0, 2);
2481 adapter->hw.subsystem_device_id =
2482 pci_read_config(dev, PCIR_SUBDEV_0, 2);
2483
2484 /* Do Shared Code Init and Setup */
2485 if (e1000_set_mac_type(&adapter->hw)) {
2486 device_printf(dev, "Setup init failure\n");
2487 return;
2488 }
2489}
2490
2491static int
2492em_allocate_pci_resources(struct adapter *adapter)
2493{
2494 device_t dev = adapter->dev;
2495 int rid;
2496
2497 rid = PCIR_BAR(0);
2498 adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2499 &rid, RF_ACTIVE);
2500 if (adapter->memory == NULL) {
2501 device_printf(dev, "Unable to allocate bus resource: memory\n");
2502 return (ENXIO);
2503 }
2504 adapter->osdep.mem_bus_space_tag =
2505 rman_get_bustag(adapter->memory);
2506 adapter->osdep.mem_bus_space_handle =
2507 rman_get_bushandle(adapter->memory);
2508 adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2509
2510 adapter->hw.back = &adapter->osdep;
2511
2512 return (0);
2513}
2514
2515/*********************************************************************
2516 *
2517 * Setup the Legacy or MSI Interrupt handler
2518 *
2519 **********************************************************************/
2520int
2521em_allocate_legacy(struct adapter *adapter)
2522{
2523 device_t dev = adapter->dev;
2524 struct tx_ring *txr = adapter->tx_rings;
2525 int error, rid = 0;
2526
2527 /* Manually turn off all interrupts */
2528 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2529
2530 if (adapter->msix == 1) /* using MSI */
2531 rid = 1;
2532 /* We allocate a single interrupt resource */
2533 adapter->res = bus_alloc_resource_any(dev,
2534 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2535 if (adapter->res == NULL) {
2536 device_printf(dev, "Unable to allocate bus resource: "
2537 "interrupt\n");
2538 return (ENXIO);
2539 }
2540
2541 /*
2542 * Allocate a fast interrupt and the associated
2543 * deferred processing contexts.
2544 */
2545 TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2546 adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2547 taskqueue_thread_enqueue, &adapter->tq);
2548 taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2549 device_get_nameunit(adapter->dev));
2550 /* Use a TX only tasklet for local timer */
2551 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2552 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2553 taskqueue_thread_enqueue, &txr->tq);
2554 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2555 device_get_nameunit(adapter->dev));
2556 TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2557 if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2558 em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2559 device_printf(dev, "Failed to register fast interrupt "
2560 "handler: %d\n", error);
2561 taskqueue_free(adapter->tq);
2562 adapter->tq = NULL;
2563 return (error);
2564 }
2565
2566 return (0);
2567}
2568
2569/*********************************************************************
2570 *
2571 * Setup the MSIX Interrupt handlers
2572 * This is not really Multiqueue, rather
2573 * its just seperate interrupt vectors
2574 * for TX, RX, and Link.
2575 *
2576 **********************************************************************/
2577int
2578em_allocate_msix(struct adapter *adapter)
2579{
2580 device_t dev = adapter->dev;
2581 struct tx_ring *txr = adapter->tx_rings;
2582 struct rx_ring *rxr = adapter->rx_rings;
2583 int error, rid, vector = 0;
2584 int cpu_id = 0;
2585
2586
2587 /* Make sure all interrupts are disabled */
2588 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2589
2590 /* First set up ring resources */
2591 for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2592
2593 /* RX ring */
2594 rid = vector + 1;
2595
2596 rxr->res = bus_alloc_resource_any(dev,
2597 SYS_RES_IRQ, &rid, RF_ACTIVE);
2598 if (rxr->res == NULL) {
2599 device_printf(dev,
2600 "Unable to allocate bus resource: "
2601 "RX MSIX Interrupt %d\n", i);
2602 return (ENXIO);
2603 }
2604 if ((error = bus_setup_intr(dev, rxr->res,
2605 INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2606 rxr, &rxr->tag)) != 0) {
2607 device_printf(dev, "Failed to register RX handler");
2608 return (error);
2609 }
2610#if __FreeBSD_version >= 800504
2611 bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2612#endif
2613 rxr->msix = vector;
2614
2615 if (em_last_bind_cpu < 0)
2616 em_last_bind_cpu = CPU_FIRST();
2617 cpu_id = em_last_bind_cpu;
2618 bus_bind_intr(dev, rxr->res, cpu_id);
2619
2620 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2621 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2622 taskqueue_thread_enqueue, &rxr->tq);
2623 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2624 device_get_nameunit(adapter->dev), cpu_id);
2625 /*
2626 ** Set the bit to enable interrupt
2627 ** in E1000_IMS -- bits 20 and 21
2628 ** are for RX0 and RX1, note this has
2629 ** NOTHING to do with the MSIX vector
2630 */
2631 rxr->ims = 1 << (20 + i);
2632 adapter->ims |= rxr->ims;
2633 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2634
2635 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2636 }
2637
2638 for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2639 /* TX ring */
2640 rid = vector + 1;
2641 txr->res = bus_alloc_resource_any(dev,
2642 SYS_RES_IRQ, &rid, RF_ACTIVE);
2643 if (txr->res == NULL) {
2644 device_printf(dev,
2645 "Unable to allocate bus resource: "
2646 "TX MSIX Interrupt %d\n", i);
2647 return (ENXIO);
2648 }
2649 if ((error = bus_setup_intr(dev, txr->res,
2650 INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2651 txr, &txr->tag)) != 0) {
2652 device_printf(dev, "Failed to register TX handler");
2653 return (error);
2654 }
2655#if __FreeBSD_version >= 800504
2656 bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2657#endif
2658 txr->msix = vector;
2659
2660 if (em_last_bind_cpu < 0)
2661 em_last_bind_cpu = CPU_FIRST();
2662 cpu_id = em_last_bind_cpu;
2663 bus_bind_intr(dev, txr->res, cpu_id);
2664
2665 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2666 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2667 taskqueue_thread_enqueue, &txr->tq);
2668 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2669 device_get_nameunit(adapter->dev), cpu_id);
2670 /*
2671 ** Set the bit to enable interrupt
2672 ** in E1000_IMS -- bits 22 and 23
2673 ** are for TX0 and TX1, note this has
2674 ** NOTHING to do with the MSIX vector
2675 */
2676 txr->ims = 1 << (22 + i);
2677 adapter->ims |= txr->ims;
2678 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2679
2680 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2681 }
2682
2683 /* Link interrupt */
2684 rid = vector + 1;
2685 adapter->res = bus_alloc_resource_any(dev,
2686 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2687 if (!adapter->res) {
2688 device_printf(dev,"Unable to allocate "
2689 "bus resource: Link interrupt [%d]\n", rid);
2690 return (ENXIO);
2691 }
2692 /* Set the link handler function */
2693 error = bus_setup_intr(dev, adapter->res,
2694 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2695 em_msix_link, adapter, &adapter->tag);
2696 if (error) {
2697 adapter->res = NULL;
2698 device_printf(dev, "Failed to register LINK handler");
2699 return (error);
2700 }
2701#if __FreeBSD_version >= 800504
2702 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2703#endif
2704 adapter->linkvec = vector;
2705 adapter->ivars |= (8 | vector) << 16;
2706 adapter->ivars |= 0x80000000;
2707
2708 return (0);
2709}
2710
2711
2712static void
2713em_free_pci_resources(struct adapter *adapter)
2714{
2715 device_t dev = adapter->dev;
2716 struct tx_ring *txr;
2717 struct rx_ring *rxr;
2718 int rid;
2719
2720
2721 /*
2722 ** Release all the queue interrupt resources:
2723 */
2724 for (int i = 0; i < adapter->num_queues; i++) {
2725 txr = &adapter->tx_rings[i];
2726 /* an early abort? */
2727 if (txr == NULL)
2728 break;
2729 rid = txr->msix +1;
2730 if (txr->tag != NULL) {
2731 bus_teardown_intr(dev, txr->res, txr->tag);
2732 txr->tag = NULL;
2733 }
2734 if (txr->res != NULL)
2735 bus_release_resource(dev, SYS_RES_IRQ,
2736 rid, txr->res);
2737
2738 rxr = &adapter->rx_rings[i];
2739 /* an early abort? */
2740 if (rxr == NULL)
2741 break;
2742 rid = rxr->msix +1;
2743 if (rxr->tag != NULL) {
2744 bus_teardown_intr(dev, rxr->res, rxr->tag);
2745 rxr->tag = NULL;
2746 }
2747 if (rxr->res != NULL)
2748 bus_release_resource(dev, SYS_RES_IRQ,
2749 rid, rxr->res);
2750 }
2751
2752 if (adapter->linkvec) /* we are doing MSIX */
2753 rid = adapter->linkvec + 1;
2754 else
2755 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2756
2757 if (adapter->tag != NULL) {
2758 bus_teardown_intr(dev, adapter->res, adapter->tag);
2759 adapter->tag = NULL;
2760 }
2761
2762 if (adapter->res != NULL)
2763 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2764
2765
2766 if (adapter->msix)
2767 pci_release_msi(dev);
2768
2769 if (adapter->msix_mem != NULL)
2770 bus_release_resource(dev, SYS_RES_MEMORY,
2771 PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2772
2773 if (adapter->memory != NULL)
2774 bus_release_resource(dev, SYS_RES_MEMORY,
2775 PCIR_BAR(0), adapter->memory);
2776
2777 if (adapter->flash != NULL)
2778 bus_release_resource(dev, SYS_RES_MEMORY,
2779 EM_FLASH, adapter->flash);
2780}
2781
2782/*
2783 * Setup MSI or MSI/X
2784 */
2785static int
2786em_setup_msix(struct adapter *adapter)
2787{
2788 device_t dev = adapter->dev;
2789 int val;
2790
2791 /* Nearly always going to use one queue */
2792 adapter->num_queues = 1;
2793
2794 /*
2795 ** Try using MSI-X for Hartwell adapters
2796 */
2797 if ((adapter->hw.mac.type == e1000_82574) &&
2798 (em_enable_msix == TRUE)) {
2799#ifdef EM_MULTIQUEUE
2800 adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2801 if (adapter->num_queues > 1)
2802 em_enable_vectors_82574(adapter);
2803#endif
2804 /* Map the MSIX BAR */
2805 int rid = PCIR_BAR(EM_MSIX_BAR);
2806 adapter->msix_mem = bus_alloc_resource_any(dev,
2807 SYS_RES_MEMORY, &rid, RF_ACTIVE);
2808 if (adapter->msix_mem == NULL) {
2809 /* May not be enabled */
2810 device_printf(adapter->dev,
2811 "Unable to map MSIX table \n");
2812 goto msi;
2813 }
2814 val = pci_msix_count(dev);
2815
2816#ifdef EM_MULTIQUEUE
2817 /* We need 5 vectors in the multiqueue case */
2818 if (adapter->num_queues > 1 ) {
2819 if (val >= 5)
2820 val = 5;
2821 else {
2822 adapter->num_queues = 1;
2823 device_printf(adapter->dev,
2824 "Insufficient MSIX vectors for >1 queue, "
2825 "using single queue...\n");
2826 goto msix_one;
2827 }
2828 } else {
2829msix_one:
2830#endif
2831 if (val >= 3)
2832 val = 3;
2833 else {
2834 device_printf(adapter->dev,
2835 "Insufficient MSIX vectors, using MSI\n");
2836 goto msi;
2837 }
2838#ifdef EM_MULTIQUEUE
2839 }
2840#endif
2841
2842 if ((pci_alloc_msix(dev, &val) == 0)) {
2843 device_printf(adapter->dev,
2844 "Using MSIX interrupts "
2845 "with %d vectors\n", val);
2846 return (val);
2847 }
2848
2849 /*
2850 ** If MSIX alloc failed or provided us with
2851 ** less than needed, free and fall through to MSI
2852 */
2853 pci_release_msi(dev);
2854 }
2855msi:
2856 if (adapter->msix_mem != NULL) {
2857 bus_release_resource(dev, SYS_RES_MEMORY,
2858 PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2859 adapter->msix_mem = NULL;
2860 }
2861 val = 1;
2862 if (pci_alloc_msi(dev, &val) == 0) {
2863 device_printf(adapter->dev, "Using an MSI interrupt\n");
2864 return (val);
2865 }
2866 /* Should only happen due to manual configuration */
2867 device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2868 return (0);
2869}
2870
2871
2872/*********************************************************************
2873 *
2874 * Initialize the hardware to a configuration
2875 * as specified by the adapter structure.
2876 *
2877 **********************************************************************/
2878static void
2879em_reset(struct adapter *adapter)
2880{
2881 device_t dev = adapter->dev;
2882 if_t ifp = adapter->ifp;
2883 struct e1000_hw *hw = &adapter->hw;
2884 u16 rx_buffer_size;
2885 u32 pba;
2886
2887 INIT_DEBUGOUT("em_reset: begin");
2888
2889 /* Set up smart power down as default off on newer adapters. */
2890 if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2891 hw->mac.type == e1000_82572)) {
2892 u16 phy_tmp = 0;
2893
2894 /* Speed up time to link by disabling smart power down. */
2895 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2896 phy_tmp &= ~IGP02E1000_PM_SPD;
2897 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2898 }
2899
2900 /*
2901 * Packet Buffer Allocation (PBA)
2902 * Writing PBA sets the receive portion of the buffer
2903 * the remainder is used for the transmit buffer.
2904 */
2905 switch (hw->mac.type) {
2906 /* Total Packet Buffer on these is 48K */
2907 case e1000_82571:
2908 case e1000_82572:
2909 case e1000_80003es2lan:
2910 pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2911 break;
2912 case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2913 pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2914 break;
2915 case e1000_82574:
2916 case e1000_82583:
2917 pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2918 break;
2919 case e1000_ich8lan:
2920 pba = E1000_PBA_8K;
2921 break;
2922 case e1000_ich9lan:
2923 case e1000_ich10lan:
2924 /* Boost Receive side for jumbo frames */
2925 if (adapter->hw.mac.max_frame_size > 4096)
2926 pba = E1000_PBA_14K;
2927 else
2928 pba = E1000_PBA_10K;
2929 break;
2930 case e1000_pchlan:
2931 case e1000_pch2lan:
2932 case e1000_pch_lpt:
2933 pba = E1000_PBA_26K;
2934 break;
2935 default:
2936 if (adapter->hw.mac.max_frame_size > 8192)
2937 pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2938 else
2939 pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2940 }
2941 E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2942
2943 /*
2944 * These parameters control the automatic generation (Tx) and
2945 * response (Rx) to Ethernet PAUSE frames.
2946 * - High water mark should allow for at least two frames to be
2947 * received after sending an XOFF.
2948 * - Low water mark works best when it is very near the high water mark.
2949 * This allows the receiver to restart by sending XON when it has
2950 * drained a bit. Here we use an arbitary value of 1500 which will
2951 * restart after one full frame is pulled from the buffer. There
2952 * could be several smaller frames in the buffer and if so they will
2953 * not trigger the XON until their total number reduces the buffer
2954 * by 1500.
2955 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2956 */
2957 rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2958 hw->fc.high_water = rx_buffer_size -
2959 roundup2(adapter->hw.mac.max_frame_size, 1024);
2960 hw->fc.low_water = hw->fc.high_water - 1500;
2961
2962 if (adapter->fc) /* locally set flow control value? */
2963 hw->fc.requested_mode = adapter->fc;
2964 else
2965 hw->fc.requested_mode = e1000_fc_full;
2966
2967 if (hw->mac.type == e1000_80003es2lan)
2968 hw->fc.pause_time = 0xFFFF;
2969 else
2970 hw->fc.pause_time = EM_FC_PAUSE_TIME;
2971
2972 hw->fc.send_xon = TRUE;
2973
2974 /* Device specific overrides/settings */
2975 switch (hw->mac.type) {
2976 case e1000_pchlan:
2977 /* Workaround: no TX flow ctrl for PCH */
2978 hw->fc.requested_mode = e1000_fc_rx_pause;
2979 hw->fc.pause_time = 0xFFFF; /* override */
2980 if (if_getmtu(ifp) > ETHERMTU) {
2981 hw->fc.high_water = 0x3500;
2982 hw->fc.low_water = 0x1500;
2983 } else {
2984 hw->fc.high_water = 0x5000;
2985 hw->fc.low_water = 0x3000;
2986 }
2987 hw->fc.refresh_time = 0x1000;
2988 break;
2989 case e1000_pch2lan:
2990 case e1000_pch_lpt:
2991 hw->fc.high_water = 0x5C20;
2992 hw->fc.low_water = 0x5048;
2993 hw->fc.pause_time = 0x0650;
2994 hw->fc.refresh_time = 0x0400;
2995 /* Jumbos need adjusted PBA */
2996 if (if_getmtu(ifp) > ETHERMTU)
2997 E1000_WRITE_REG(hw, E1000_PBA, 12);
2998 else
2999 E1000_WRITE_REG(hw, E1000_PBA, 26);
3000 break;
3001 case e1000_ich9lan:
3002 case e1000_ich10lan:
3003 if (if_getmtu(ifp) > ETHERMTU) {
3004 hw->fc.high_water = 0x2800;
3005 hw->fc.low_water = hw->fc.high_water - 8;
3006 break;
3007 }
3008 /* else fall thru */
3009 default:
3010 if (hw->mac.type == e1000_80003es2lan)
3011 hw->fc.pause_time = 0xFFFF;
3012 break;
3013 }
3014
3015 /* Issue a global reset */
3016 e1000_reset_hw(hw);
3017 E1000_WRITE_REG(hw, E1000_WUC, 0);
3018 em_disable_aspm(adapter);
3019 /* and a re-init */
3020 if (e1000_init_hw(hw) < 0) {
3021 device_printf(dev, "Hardware Initialization Failed\n");
3022 return;
3023 }
3024
3025 E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3026 e1000_get_phy_info(hw);
3027 e1000_check_for_link(hw);
3028 return;
3029}
3030
3031/*********************************************************************
3032 *
3033 * Setup networking device structure and register an interface.
3034 *
3035 **********************************************************************/
3036static int
3037em_setup_interface(device_t dev, struct adapter *adapter)
3038{
3039 if_t ifp;
3040
3041 INIT_DEBUGOUT("em_setup_interface: begin");
3042
3043 ifp = adapter->ifp = if_gethandle(IFT_ETHER);
3044 if (ifp == 0) {
3045 device_printf(dev, "can not allocate ifnet structure\n");
3046 return (-1);
3047 }
3048 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3049 if_setdev(ifp, dev);
3050 if_setinitfn(ifp, em_init);
3051 if_setsoftc(ifp, adapter);
3052 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
3053 if_setioctlfn(ifp, em_ioctl);
3054 if_setgetcounterfn(ifp, em_get_counter);
3055 /* TSO parameters */
3056 ifp->if_hw_tsomax = IP_MAXPACKET;
3057 ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER;
3058 ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3059
3060#ifdef EM_MULTIQUEUE
3061 /* Multiqueue stack interface */
3062 if_settransmitfn(ifp, em_mq_start);
3063 if_setqflushfn(ifp, em_qflush);
3064#else
3065 if_setstartfn(ifp, em_start);
3066 if_setsendqlen(ifp, adapter->num_tx_desc - 1);
3067 if_setsendqready(ifp);
3068#endif
3069
3070 ether_ifattach(ifp, adapter->hw.mac.addr);
3071
3072 if_setcapabilities(ifp, 0);
3073 if_setcapenable(ifp, 0);
3074
3075
3076 if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
3077 IFCAP_TSO4, 0);
3078 /*
3079 * Tell the upper layer(s) we
3080 * support full VLAN capability
3081 */
3082 if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
3083 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
3084 IFCAP_VLAN_MTU, 0);
3085 if_setcapenable(ifp, if_getcapabilities(ifp));
3086
3087 /*
3088 ** Don't turn this on by default, if vlans are
3089 ** created on another pseudo device (eg. lagg)
3090 ** then vlan events are not passed thru, breaking
3091 ** operation, but with HW FILTER off it works. If
3092 ** using vlans directly on the em driver you can
3093 ** enable this and get full hardware tag filtering.
3094 */
3095 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
3096
3097#ifdef DEVICE_POLLING
3098 if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
3099#endif
3100
3101 /* Enable only WOL MAGIC by default */
3102 if (adapter->wol) {
3103 if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
3104 if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
3105 }
3106
3107 /*
3108 * Specify the media types supported by this adapter and register
3109 * callbacks to update media and link information
3110 */
3111 ifmedia_init(&adapter->media, IFM_IMASK,
3112 em_media_change, em_media_status);
3113 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3114 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3115 u_char fiber_type = IFM_1000_SX; /* default type */
3116
3117 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3118 0, NULL);
3119 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3120 } else {
3121 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3122 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3123 0, NULL);
3124 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3125 0, NULL);
3126 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3127 0, NULL);
3128 if (adapter->hw.phy.type != e1000_phy_ife) {
3129 ifmedia_add(&adapter->media,
3130 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3131 ifmedia_add(&adapter->media,
3132 IFM_ETHER | IFM_1000_T, 0, NULL);
3133 }
3134 }
3135 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3136 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3137 return (0);
3138}
3139
3140
3141/*
3142 * Manage DMA'able memory.
3143 */
3144static void
3145em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3146{
3147 if (error)
3148 return;
3149 *(bus_addr_t *) arg = segs[0].ds_addr;
3150}
3151
3152static int
3153em_dma_malloc(struct adapter *adapter, bus_size_t size,
3154 struct em_dma_alloc *dma, int mapflags)
3155{
3156 int error;
3157
3158 error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3159 EM_DBA_ALIGN, 0, /* alignment, bounds */
3160 BUS_SPACE_MAXADDR, /* lowaddr */
3161 BUS_SPACE_MAXADDR, /* highaddr */
3162 NULL, NULL, /* filter, filterarg */
3163 size, /* maxsize */
3164 1, /* nsegments */
3165 size, /* maxsegsize */
3166 0, /* flags */
3167 NULL, /* lockfunc */
3168 NULL, /* lockarg */
3169 &dma->dma_tag);
3170 if (error) {
3171 device_printf(adapter->dev,
3172 "%s: bus_dma_tag_create failed: %d\n",
3173 __func__, error);
3174 goto fail_0;
3175 }
3176
3177 error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3178 BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3179 if (error) {
3180 device_printf(adapter->dev,
3181 "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3182 __func__, (uintmax_t)size, error);
3183 goto fail_2;
3184 }
3185
3186 dma->dma_paddr = 0;
3187 error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3188 size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3189 if (error || dma->dma_paddr == 0) {
3190 device_printf(adapter->dev,
3191 "%s: bus_dmamap_load failed: %d\n",
3192 __func__, error);
3193 goto fail_3;
3194 }
3195
3196 return (0);
3197
3198fail_3:
3199 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3200fail_2:
3201 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3202 bus_dma_tag_destroy(dma->dma_tag);
3203fail_0:
3204 dma->dma_tag = NULL;
3205
3206 return (error);
3207}
3208
3209static void
3210em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3211{
3212 if (dma->dma_tag == NULL)
3213 return;
3214 if (dma->dma_paddr != 0) {
3215 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3216 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3217 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3218 dma->dma_paddr = 0;
3219 }
3220 if (dma->dma_vaddr != NULL) {
3221 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3222 dma->dma_vaddr = NULL;
3223 }
3224 bus_dma_tag_destroy(dma->dma_tag);
3225 dma->dma_tag = NULL;
3226}
3227
3228
3229/*********************************************************************
3230 *
3231 * Allocate memory for the transmit and receive rings, and then
3232 * the descriptors associated with each, called only once at attach.
3233 *
3234 **********************************************************************/
3235static int
3236em_allocate_queues(struct adapter *adapter)
3237{
3238 device_t dev = adapter->dev;
3239 struct tx_ring *txr = NULL;
3240 struct rx_ring *rxr = NULL;
3241 int rsize, tsize, error = E1000_SUCCESS;
3242 int txconf = 0, rxconf = 0;
3243
3244
3245 /* Allocate the TX ring struct memory */
3246 if (!(adapter->tx_rings =
3247 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3248 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3249 device_printf(dev, "Unable to allocate TX ring memory\n");
3250 error = ENOMEM;
3251 goto fail;
3252 }
3253
3254 /* Now allocate the RX */
3255 if (!(adapter->rx_rings =
3256 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3257 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3258 device_printf(dev, "Unable to allocate RX ring memory\n");
3259 error = ENOMEM;
3260 goto rx_fail;
3261 }
3262
3263 tsize = roundup2(adapter->num_tx_desc *
3264 sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3265 /*
3266 * Now set up the TX queues, txconf is needed to handle the
3267 * possibility that things fail midcourse and we need to
3268 * undo memory gracefully
3269 */
3270 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3271 /* Set up some basics */
3272 txr = &adapter->tx_rings[i];
3273 txr->adapter = adapter;
3274 txr->me = i;
3275
3276 /* Initialize the TX lock */
3277 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3278 device_get_nameunit(dev), txr->me);
3279 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3280
3281 if (em_dma_malloc(adapter, tsize,
3282 &txr->txdma, BUS_DMA_NOWAIT)) {
3283 device_printf(dev,
3284 "Unable to allocate TX Descriptor memory\n");
3285 error = ENOMEM;
3286 goto err_tx_desc;
3287 }
3288 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3289 bzero((void *)txr->tx_base, tsize);
3290
3291 if (em_allocate_transmit_buffers(txr)) {
3292 device_printf(dev,
3293 "Critical Failure setting up transmit buffers\n");
3294 error = ENOMEM;
3295 goto err_tx_desc;
3296 }
3297#if __FreeBSD_version >= 800000
3298 /* Allocate a buf ring */
3299 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3300 M_WAITOK, &txr->tx_mtx);
3301#endif
3302 }
3303
3304 /*
3305 * Next the RX queues...
3306 */
3307 rsize = roundup2(adapter->num_rx_desc *
3308 sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3309 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3310 rxr = &adapter->rx_rings[i];
3311 rxr->adapter = adapter;
3312 rxr->me = i;
3313
3314 /* Initialize the RX lock */
3315 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3316 device_get_nameunit(dev), txr->me);
3317 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3318
3319 if (em_dma_malloc(adapter, rsize,
3320 &rxr->rxdma, BUS_DMA_NOWAIT)) {
3321 device_printf(dev,
3322 "Unable to allocate RxDescriptor memory\n");
3323 error = ENOMEM;
3324 goto err_rx_desc;
3325 }
3326 rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3327 bzero((void *)rxr->rx_base, rsize);
3328
3329 /* Allocate receive buffers for the ring*/
3330 if (em_allocate_receive_buffers(rxr)) {
3331 device_printf(dev,
3332 "Critical Failure setting up receive buffers\n");
3333 error = ENOMEM;
3334 goto err_rx_desc;
3335 }
3336 }
3337
3338 return (0);
3339
3340err_rx_desc:
3341 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3342 em_dma_free(adapter, &rxr->rxdma);
3343err_tx_desc:
3344 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3345 em_dma_free(adapter, &txr->txdma);
3346 free(adapter->rx_rings, M_DEVBUF);
3347rx_fail:
3348#if __FreeBSD_version >= 800000
3349 buf_ring_free(txr->br, M_DEVBUF);
3350#endif
3351 free(adapter->tx_rings, M_DEVBUF);
3352fail:
3353 return (error);
3354}
3355
3356
3357/*********************************************************************
3358 *
3359 * Allocate memory for tx_buffer structures. The tx_buffer stores all
3360 * the information needed to transmit a packet on the wire. This is
3361 * called only once at attach, setup is done every reset.
3362 *
3363 **********************************************************************/
3364static int
3365em_allocate_transmit_buffers(struct tx_ring *txr)
3366{
3367 struct adapter *adapter = txr->adapter;
3368 device_t dev = adapter->dev;
3369 struct em_txbuffer *txbuf;
3370 int error, i;
3371
3372 /*
3373 * Setup DMA descriptor areas.
3374 */
3375 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3376 1, 0, /* alignment, bounds */
3377 BUS_SPACE_MAXADDR, /* lowaddr */
3378 BUS_SPACE_MAXADDR, /* highaddr */
3379 NULL, NULL, /* filter, filterarg */
3380 EM_TSO_SIZE, /* maxsize */
3381 EM_MAX_SCATTER, /* nsegments */
3382 PAGE_SIZE, /* maxsegsize */
3383 0, /* flags */
3384 NULL, /* lockfunc */
3385 NULL, /* lockfuncarg */
3386 &txr->txtag))) {
3387 device_printf(dev,"Unable to allocate TX DMA tag\n");
3388 goto fail;
3389 }
3390
3391 if (!(txr->tx_buffers =
3392 (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3393 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3394 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3395 error = ENOMEM;
3396 goto fail;
3397 }
3398
3399 /* Create the descriptor buffer dma maps */
3400 txbuf = txr->tx_buffers;
3401 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3402 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3403 if (error != 0) {
3404 device_printf(dev, "Unable to create TX DMA map\n");
3405 goto fail;
3406 }
3407 }
3408
3409 return 0;
3410fail:
3411 /* We free all, it handles case where we are in the middle */
3412 em_free_transmit_structures(adapter);
3413 return (error);
3414}
3415
3416/*********************************************************************
3417 *
3418 * Initialize a transmit ring.
3419 *
3420 **********************************************************************/
3421static void
3422em_setup_transmit_ring(struct tx_ring *txr)
3423{
3424 struct adapter *adapter = txr->adapter;
3425 struct em_txbuffer *txbuf;
3426 int i;
3427#ifdef DEV_NETMAP
3428 struct netmap_slot *slot;
3429 struct netmap_adapter *na = netmap_getna(adapter->ifp);
3430#endif /* DEV_NETMAP */
3431
3432 /* Clear the old descriptor contents */
3433 EM_TX_LOCK(txr);
3434#ifdef DEV_NETMAP
3435 slot = netmap_reset(na, NR_TX, txr->me, 0);
3436#endif /* DEV_NETMAP */
3437
3438 bzero((void *)txr->tx_base,
3439 (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3440 /* Reset indices */
3441 txr->next_avail_desc = 0;
3442 txr->next_to_clean = 0;
3443
3444 /* Free any existing tx buffers. */
3445 txbuf = txr->tx_buffers;
3446 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3447 if (txbuf->m_head != NULL) {
3448 bus_dmamap_sync(txr->txtag, txbuf->map,
3449 BUS_DMASYNC_POSTWRITE);
3450 bus_dmamap_unload(txr->txtag, txbuf->map);
3451 m_freem(txbuf->m_head);
3452 txbuf->m_head = NULL;
3453 }
3454#ifdef DEV_NETMAP
3455 if (slot) {
3456 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3457 uint64_t paddr;
3458 void *addr;
3459
3460 addr = PNMB(na, slot + si, &paddr);
3461 txr->tx_base[i].buffer_addr = htole64(paddr);
3462 /* reload the map for netmap mode */
3463 netmap_load_map(na, txr->txtag, txbuf->map, addr);
3464 }
3465#endif /* DEV_NETMAP */
3466
3467 /* clear the watch index */
3468 txbuf->next_eop = -1;
3469 }
3470
3471 /* Set number of descriptors available */
3472 txr->tx_avail = adapter->num_tx_desc;
3473 txr->busy = EM_TX_IDLE;
3474
3475 /* Clear checksum offload context. */
3476 txr->last_hw_offload = 0;
3477 txr->last_hw_ipcss = 0;
3478 txr->last_hw_ipcso = 0;
3479 txr->last_hw_tucss = 0;
3480 txr->last_hw_tucso = 0;
3481
3482 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3483 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3484 EM_TX_UNLOCK(txr);
3485}
3486
3487/*********************************************************************
3488 *
3489 * Initialize all transmit rings.
3490 *
3491 **********************************************************************/
3492static void
3493em_setup_transmit_structures(struct adapter *adapter)
3494{
3495 struct tx_ring *txr = adapter->tx_rings;
3496
3497 for (int i = 0; i < adapter->num_queues; i++, txr++)
3498 em_setup_transmit_ring(txr);
3499
3500 return;
3501}
3502
3503/*********************************************************************
3504 *
3505 * Enable transmit unit.
3506 *
3507 **********************************************************************/
3508static void
3509em_initialize_transmit_unit(struct adapter *adapter)
3510{
3511 struct tx_ring *txr = adapter->tx_rings;
3512 struct e1000_hw *hw = &adapter->hw;
3513 u32 tctl, txdctl = 0, tarc, tipg = 0;
3514
3515 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3516
3517 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3518 u64 bus_addr = txr->txdma.dma_paddr;
3519 /* Base and Len of TX Ring */
3520 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3521 adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3522 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3523 (u32)(bus_addr >> 32));
3524 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3525 (u32)bus_addr);
3526 /* Init the HEAD/TAIL indices */
3527 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3528 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3529
3530 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3531 E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3532 E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3533
3534 txr->busy = EM_TX_IDLE;
3535 txdctl = 0; /* clear txdctl */
3536 txdctl |= 0x1f; /* PTHRESH */
3537 txdctl |= 1 << 8; /* HTHRESH */
3538 txdctl |= 1 << 16;/* WTHRESH */
3539 txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3540 txdctl |= E1000_TXDCTL_GRAN;
3541 txdctl |= 1 << 25; /* LWTHRESH */
3542
3543 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3544 }
3545
3546 /* Set the default values for the Tx Inter Packet Gap timer */
3547 switch (adapter->hw.mac.type) {
3548 case e1000_80003es2lan:
3549 tipg = DEFAULT_82543_TIPG_IPGR1;
3550 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3551 E1000_TIPG_IPGR2_SHIFT;
3552 break;
3553 default:
3554 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3555 (adapter->hw.phy.media_type ==
3556 e1000_media_type_internal_serdes))
3557 tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3558 else
3559 tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3560 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3561 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3562 }
3563
3564 E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3565 E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3566
3567 if(adapter->hw.mac.type >= e1000_82540)
3568 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3569 adapter->tx_abs_int_delay.value);
3570
3571 if ((adapter->hw.mac.type == e1000_82571) ||
3572 (adapter->hw.mac.type == e1000_82572)) {
3573 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3574 tarc |= TARC_SPEED_MODE_BIT;
3575 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3576 } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3577 /* errata: program both queues to unweighted RR */
3578 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3579 tarc |= 1;
3580 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3581 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3582 tarc |= 1;
3583 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3584 } else if (adapter->hw.mac.type == e1000_82574) {
3585 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3586 tarc |= TARC_ERRATA_BIT;
3587 if ( adapter->num_queues > 1) {
3588 tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3589 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3590 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3591 } else
3592 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3593 }
3594
3595 adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3596 if (adapter->tx_int_delay.value > 0)
3597 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3598
3599 /* Program the Transmit Control Register */
3600 tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3601 tctl &= ~E1000_TCTL_CT;
3602 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3603 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3604
3605 if (adapter->hw.mac.type >= e1000_82571)
3606 tctl |= E1000_TCTL_MULR;
3607
3608 /* This write will effectively turn on the transmit unit. */
3609 E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3610
3611}
3612
3613
3614/*********************************************************************
3615 *
3616 * Free all transmit rings.
3617 *
3618 **********************************************************************/
3619static void
3620em_free_transmit_structures(struct adapter *adapter)
3621{
3622 struct tx_ring *txr = adapter->tx_rings;
3623
3624 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3625 EM_TX_LOCK(txr);
3626 em_free_transmit_buffers(txr);
3627 em_dma_free(adapter, &txr->txdma);
3628 EM_TX_UNLOCK(txr);
3629 EM_TX_LOCK_DESTROY(txr);
3630 }
3631
3632 free(adapter->tx_rings, M_DEVBUF);
3633}
3634
3635/*********************************************************************
3636 *
3637 * Free transmit ring related data structures.
3638 *
3639 **********************************************************************/
3640static void
3641em_free_transmit_buffers(struct tx_ring *txr)
3642{
3643 struct adapter *adapter = txr->adapter;
3644 struct em_txbuffer *txbuf;
3645
3646 INIT_DEBUGOUT("free_transmit_ring: begin");
3647
3648 if (txr->tx_buffers == NULL)
3649 return;
3650
3651 for (int i = 0; i < adapter->num_tx_desc; i++) {
3652 txbuf = &txr->tx_buffers[i];
3653 if (txbuf->m_head != NULL) {
3654 bus_dmamap_sync(txr->txtag, txbuf->map,
3655 BUS_DMASYNC_POSTWRITE);
3656 bus_dmamap_unload(txr->txtag,
3657 txbuf->map);
3658 m_freem(txbuf->m_head);
3659 txbuf->m_head = NULL;
3660 if (txbuf->map != NULL) {
3661 bus_dmamap_destroy(txr->txtag,
3662 txbuf->map);
3663 txbuf->map = NULL;
3664 }
3665 } else if (txbuf->map != NULL) {
3666 bus_dmamap_unload(txr->txtag,
3667 txbuf->map);
3668 bus_dmamap_destroy(txr->txtag,
3669 txbuf->map);
3670 txbuf->map = NULL;
3671 }
3672 }
3673#if __FreeBSD_version >= 800000
3674 if (txr->br != NULL)
3675 buf_ring_free(txr->br, M_DEVBUF);
3676#endif
3677 if (txr->tx_buffers != NULL) {
3678 free(txr->tx_buffers, M_DEVBUF);
3679 txr->tx_buffers = NULL;
3680 }
3681 if (txr->txtag != NULL) {
3682 bus_dma_tag_destroy(txr->txtag);
3683 txr->txtag = NULL;
3684 }
3685 return;
3686}
3687
3688
3689/*********************************************************************
3690 * The offload context is protocol specific (TCP/UDP) and thus
3691 * only needs to be set when the protocol changes. The occasion
3692 * of a context change can be a performance detriment, and
3693 * might be better just disabled. The reason arises in the way
3694 * in which the controller supports pipelined requests from the
3695 * Tx data DMA. Up to four requests can be pipelined, and they may
3696 * belong to the same packet or to multiple packets. However all
3697 * requests for one packet are issued before a request is issued
3698 * for a subsequent packet and if a request for the next packet
3699 * requires a context change, that request will be stalled
3700 * until the previous request completes. This means setting up
3701 * a new context effectively disables pipelined Tx data DMA which
3702 * in turn greatly slow down performance to send small sized
3703 * frames.
3704 **********************************************************************/
3705static void
3706em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3707 struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3708{
3709 struct adapter *adapter = txr->adapter;
3710 struct e1000_context_desc *TXD = NULL;
3711 struct em_txbuffer *tx_buffer;
3712 int cur, hdr_len;
3713 u32 cmd = 0;
3714 u16 offload = 0;
3715 u8 ipcso, ipcss, tucso, tucss;
3716
3717 ipcss = ipcso = tucss = tucso = 0;
3718 hdr_len = ip_off + (ip->ip_hl << 2);
3719 cur = txr->next_avail_desc;
3720
3721 /* Setup of IP header checksum. */
3722 if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3723 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3724 offload |= CSUM_IP;
3725 ipcss = ip_off;
3726 ipcso = ip_off + offsetof(struct ip, ip_sum);
3727 /*
3728 * Start offset for header checksum calculation.
3729 * End offset for header checksum calculation.
3730 * Offset of place to put the checksum.
3731 */
3732 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3733 TXD->lower_setup.ip_fields.ipcss = ipcss;
3734 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3735 TXD->lower_setup.ip_fields.ipcso = ipcso;
3736 cmd |= E1000_TXD_CMD_IP;
3737 }
3738
3739 if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3740 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3741 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3742 offload |= CSUM_TCP;
3743 tucss = hdr_len;
3744 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3745 /*
3746 * The 82574L can only remember the *last* context used
3747 * regardless of queue that it was use for. We cannot reuse
3748 * contexts on this hardware platform and must generate a new
3749 * context every time. 82574L hardware spec, section 7.2.6,
3750 * second note.
3751 */
3752 if (adapter->num_queues < 2) {
3753 /*
3754 * Setting up new checksum offload context for every
3755 * frames takes a lot of processing time for hardware.
3756 * This also reduces performance a lot for small sized
3757 * frames so avoid it if driver can use previously
3758 * configured checksum offload context.
3759 */
3760 if (txr->last_hw_offload == offload) {
3761 if (offload & CSUM_IP) {
3762 if (txr->last_hw_ipcss == ipcss &&
3763 txr->last_hw_ipcso == ipcso &&
3764 txr->last_hw_tucss == tucss &&
3765 txr->last_hw_tucso == tucso)
3766 return;
3767 } else {
3768 if (txr->last_hw_tucss == tucss &&
3769 txr->last_hw_tucso == tucso)
3770 return;
3771 }
3772 }
3773 txr->last_hw_offload = offload;
3774 txr->last_hw_tucss = tucss;
3775 txr->last_hw_tucso = tucso;
3776 }
3777 /*
3778 * Start offset for payload checksum calculation.
3779 * End offset for payload checksum calculation.
3780 * Offset of place to put the checksum.
3781 */
3782 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3783 TXD->upper_setup.tcp_fields.tucss = hdr_len;
3784 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3785 TXD->upper_setup.tcp_fields.tucso = tucso;
3786 cmd |= E1000_TXD_CMD_TCP;
3787 } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3788 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3789 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3790 tucss = hdr_len;
3791 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3792 /*
3793 * The 82574L can only remember the *last* context used
3794 * regardless of queue that it was use for. We cannot reuse
3795 * contexts on this hardware platform and must generate a new
3796 * context every time. 82574L hardware spec, section 7.2.6,
3797 * second note.
3798 */
3799 if (adapter->num_queues < 2) {
3800 /*
3801 * Setting up new checksum offload context for every
3802 * frames takes a lot of processing time for hardware.
3803 * This also reduces performance a lot for small sized
3804 * frames so avoid it if driver can use previously
3805 * configured checksum offload context.
3806 */
3807 if (txr->last_hw_offload == offload) {
3808 if (offload & CSUM_IP) {
3809 if (txr->last_hw_ipcss == ipcss &&
3810 txr->last_hw_ipcso == ipcso &&
3811 txr->last_hw_tucss == tucss &&
3812 txr->last_hw_tucso == tucso)
3813 return;
3814 } else {
3815 if (txr->last_hw_tucss == tucss &&
3816 txr->last_hw_tucso == tucso)
3817 return;
3818 }
3819 }
3820 txr->last_hw_offload = offload;
3821 txr->last_hw_tucss = tucss;
3822 txr->last_hw_tucso = tucso;
3823 }
3824 /*
3825 * Start offset for header checksum calculation.
3826 * End offset for header checksum calculation.
3827 * Offset of place to put the checksum.
3828 */
3829 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3830 TXD->upper_setup.tcp_fields.tucss = tucss;
3831 TXD->upper_setup.tcp_fields.tucse = htole16(0);
3832 TXD->upper_setup.tcp_fields.tucso = tucso;
3833 }
3834
3835 if (offload & CSUM_IP) {
3836 txr->last_hw_ipcss = ipcss;
3837 txr->last_hw_ipcso = ipcso;
3838 }
3839
3840 TXD->tcp_seg_setup.data = htole32(0);
3841 TXD->cmd_and_length =
3842 htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3843 tx_buffer = &txr->tx_buffers[cur];
3844 tx_buffer->m_head = NULL;
3845 tx_buffer->next_eop = -1;
3846
3847 if (++cur == adapter->num_tx_desc)
3848 cur = 0;
3849
3850 txr->tx_avail--;
3851 txr->next_avail_desc = cur;
3852}
3853
3854
3855/**********************************************************************
3856 *
3857 * Setup work for hardware segmentation offload (TSO)
3858 *
3859 **********************************************************************/
3860static void
3861em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3862 struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3863{
3864 struct adapter *adapter = txr->adapter;
3865 struct e1000_context_desc *TXD;
3866 struct em_txbuffer *tx_buffer;
3867 int cur, hdr_len;
3868
3869 /*
3870 * In theory we can use the same TSO context if and only if
3871 * frame is the same type(IP/TCP) and the same MSS. However
3872 * checking whether a frame has the same IP/TCP structure is
3873 * hard thing so just ignore that and always restablish a
3874 * new TSO context.
3875 */
3876 hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3877 *txd_lower = (E1000_TXD_CMD_DEXT | /* Extended descr type */
3878 E1000_TXD_DTYP_D | /* Data descr type */
3879 E1000_TXD_CMD_TSE); /* Do TSE on this packet */
3880
3881 /* IP and/or TCP header checksum calculation and insertion. */
3882 *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3883
3884 cur = txr->next_avail_desc;
3885 tx_buffer = &txr->tx_buffers[cur];
3886 TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3887
3888 /*
3889 * Start offset for header checksum calculation.
3890 * End offset for header checksum calculation.
3891 * Offset of place put the checksum.
3892 */
3893 TXD->lower_setup.ip_fields.ipcss = ip_off;
3894 TXD->lower_setup.ip_fields.ipcse =
3895 htole16(ip_off + (ip->ip_hl << 2) - 1);
3896 TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3897 /*
3898 * Start offset for payload checksum calculation.
3899 * End offset for payload checksum calculation.
3900 * Offset of place to put the checksum.
3901 */
3902 TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3903 TXD->upper_setup.tcp_fields.tucse = 0;
3904 TXD->upper_setup.tcp_fields.tucso =
3905 ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3906 /*
3907 * Payload size per packet w/o any headers.
3908 * Length of all headers up to payload.
3909 */
3910 TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3911 TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3912
3913 TXD->cmd_and_length = htole32(adapter->txd_cmd |
3914 E1000_TXD_CMD_DEXT | /* Extended descr */
3915 E1000_TXD_CMD_TSE | /* TSE context */
3916 E1000_TXD_CMD_IP | /* Do IP csum */
3917 E1000_TXD_CMD_TCP | /* Do TCP checksum */
3918 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
3919
3920 tx_buffer->m_head = NULL;
3921 tx_buffer->next_eop = -1;
3922
3923 if (++cur == adapter->num_tx_desc)
3924 cur = 0;
3925
3926 txr->tx_avail--;
3927 txr->next_avail_desc = cur;
3928 txr->tx_tso = TRUE;
3929}
3930
3931
3932/**********************************************************************
3933 *
3934 * Examine each tx_buffer in the used queue. If the hardware is done
3935 * processing the packet then free associated resources. The
3936 * tx_buffer is put back on the free queue.
3937 *
3938 **********************************************************************/
3939static void
3940em_txeof(struct tx_ring *txr)
3941{
3942 struct adapter *adapter = txr->adapter;
3943 int first, last, done, processed;
3944 struct em_txbuffer *tx_buffer;
3945 struct e1000_tx_desc *tx_desc, *eop_desc;
3946 if_t ifp = adapter->ifp;
3947
3948 EM_TX_LOCK_ASSERT(txr);
3949#ifdef DEV_NETMAP
3950 if (netmap_tx_irq(ifp, txr->me))
3951 return;
3952#endif /* DEV_NETMAP */
3953
3954 /* No work, make sure hang detection is disabled */
3955 if (txr->tx_avail == adapter->num_tx_desc) {
3956 txr->busy = EM_TX_IDLE;
3957 return;
3958 }
3959
3960 processed = 0;
3961 first = txr->next_to_clean;
3962 tx_desc = &txr->tx_base[first];
3963 tx_buffer = &txr->tx_buffers[first];
3964 last = tx_buffer->next_eop;
3965 eop_desc = &txr->tx_base[last];
3966
3967 /*
3968 * What this does is get the index of the
3969 * first descriptor AFTER the EOP of the
3970 * first packet, that way we can do the
3971 * simple comparison on the inner while loop.
3972 */
3973 if (++last == adapter->num_tx_desc)
3974 last = 0;
3975 done = last;
3976
3977 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3978 BUS_DMASYNC_POSTREAD);
3979
3980 while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3981 /* We clean the range of the packet */
3982 while (first != done) {
3983 tx_desc->upper.data = 0;
3984 tx_desc->lower.data = 0;
3985 tx_desc->buffer_addr = 0;
3986 ++txr->tx_avail;
3987 ++processed;
3988
3989 if (tx_buffer->m_head) {
3990 bus_dmamap_sync(txr->txtag,
3991 tx_buffer->map,
3992 BUS_DMASYNC_POSTWRITE);
3993 bus_dmamap_unload(txr->txtag,
3994 tx_buffer->map);
3995 m_freem(tx_buffer->m_head);
3996 tx_buffer->m_head = NULL;
3997 }
3998 tx_buffer->next_eop = -1;
3999
4000 if (++first == adapter->num_tx_desc)
4001 first = 0;
4002
4003 tx_buffer = &txr->tx_buffers[first];
4004 tx_desc = &txr->tx_base[first];
4005 }
4006 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
4007 /* See if we can continue to the next packet */
4008 last = tx_buffer->next_eop;
4009 if (last != -1) {
4010 eop_desc = &txr->tx_base[last];
4011 /* Get new done point */
4012 if (++last == adapter->num_tx_desc) last = 0;
4013 done = last;
4014 } else
4015 break;
4016 }
4017 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4018 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4019
4020 txr->next_to_clean = first;
4021
4022 /*
4023 ** Hang detection: we know there's work outstanding
4024 ** or the entry return would have been taken, so no
4025 ** descriptor processed here indicates a potential hang.
4026 ** The local timer will examine this and do a reset if needed.
4027 */
4028 if (processed == 0) {
4029 if (txr->busy != EM_TX_HUNG)
4030 ++txr->busy;
4031 } else /* At least one descriptor was cleaned */
4032 txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4033
4034 /*
4035 * If we have a minimum free, clear IFF_DRV_OACTIVE
4036 * to tell the stack that it is OK to send packets.
4037 * Notice that all writes of OACTIVE happen under the
4038 * TX lock which, with a single queue, guarantees
4039 * sanity.
4040 */
4041 if (txr->tx_avail >= EM_MAX_SCATTER) {
4042 if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
4043 }
4044
4045 /* Disable hang detection if all clean */
4046 if (txr->tx_avail == adapter->num_tx_desc)
4047 txr->busy = EM_TX_IDLE;
4048}
4049
4050/*********************************************************************
4051 *
4052 * Refresh RX descriptor mbufs from system mbuf buffer pool.
4053 *
4054 **********************************************************************/
4055static void
4056em_refresh_mbufs(struct rx_ring *rxr, int limit)
4057{
4058 struct adapter *adapter = rxr->adapter;
4059 struct mbuf *m;
4060 bus_dma_segment_t segs;
4061 struct em_rxbuffer *rxbuf;
4062 int i, j, error, nsegs;
4063 bool cleaned = FALSE;
4064
4065 i = j = rxr->next_to_refresh;
4066 /*
4067 ** Get one descriptor beyond
4068 ** our work mark to control
4069 ** the loop.
4070 */
4071 if (++j == adapter->num_rx_desc)
4072 j = 0;
4073
4074 while (j != limit) {
4075 rxbuf = &rxr->rx_buffers[i];
4076 if (rxbuf->m_head == NULL) {
4077 m = m_getjcl(M_NOWAIT, MT_DATA,
4078 M_PKTHDR, adapter->rx_mbuf_sz);
4079 /*
4080 ** If we have a temporary resource shortage
4081 ** that causes a failure, just abort refresh
4082 ** for now, we will return to this point when
4083 ** reinvoked from em_rxeof.
4084 */
4085 if (m == NULL)
4086 goto update;
4087 } else
4088 m = rxbuf->m_head;
4089
4090 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4091 m->m_flags |= M_PKTHDR;
4092 m->m_data = m->m_ext.ext_buf;
4093
4094 /* Use bus_dma machinery to setup the memory mapping */
4095 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4096 m, &segs, &nsegs, BUS_DMA_NOWAIT);
4097 if (error != 0) {
4098 printf("Refresh mbufs: hdr dmamap load"
4099 " failure - %d\n", error);
4100 m_free(m);
4101 rxbuf->m_head = NULL;
4102 goto update;
4103 }
4104 rxbuf->m_head = m;
4105 rxbuf->paddr = segs.ds_addr;
4106 bus_dmamap_sync(rxr->rxtag,
4107 rxbuf->map, BUS_DMASYNC_PREREAD);
4108 em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4109 cleaned = TRUE;
4110
4111 i = j; /* Next is precalulated for us */
4112 rxr->next_to_refresh = i;
4113 /* Calculate next controlling index */
4114 if (++j == adapter->num_rx_desc)
4115 j = 0;
4116 }
4117update:
4118 /*
4119 ** Update the tail pointer only if,
4120 ** and as far as we have refreshed.
4121 */
4122 if (cleaned)
4123 E1000_WRITE_REG(&adapter->hw,
4124 E1000_RDT(rxr->me), rxr->next_to_refresh);
4125
4126 return;
4127}
4128
4129
4130/*********************************************************************
4131 *
4132 * Allocate memory for rx_buffer structures. Since we use one
4133 * rx_buffer per received packet, the maximum number of rx_buffer's
4134 * that we'll need is equal to the number of receive descriptors
4135 * that we've allocated.
4136 *
4137 **********************************************************************/
4138static int
4139em_allocate_receive_buffers(struct rx_ring *rxr)
4140{
4141 struct adapter *adapter = rxr->adapter;
4142 device_t dev = adapter->dev;
4143 struct em_rxbuffer *rxbuf;
4144 int error;
4145
4146 rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4147 adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4148 if (rxr->rx_buffers == NULL) {
4149 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4150 return (ENOMEM);
4151 }
4152
4153 error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4154 1, 0, /* alignment, bounds */
4155 BUS_SPACE_MAXADDR, /* lowaddr */
4156 BUS_SPACE_MAXADDR, /* highaddr */
4157 NULL, NULL, /* filter, filterarg */
4158 MJUM9BYTES, /* maxsize */
4159 1, /* nsegments */
4160 MJUM9BYTES, /* maxsegsize */
4161 0, /* flags */
4162 NULL, /* lockfunc */
4163 NULL, /* lockarg */
4164 &rxr->rxtag);
4165 if (error) {
4166 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4167 __func__, error);
4168 goto fail;
4169 }
4170
4171 rxbuf = rxr->rx_buffers;
4172 for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4173 rxbuf = &rxr->rx_buffers[i];
4174 error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4175 if (error) {
4176 device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4177 __func__, error);
4178 goto fail;
4179 }
4180 }
4181
4182 return (0);
4183
4184fail:
4185 em_free_receive_structures(adapter);
4186 return (error);
4187}
4188
4189
4190/*********************************************************************
4191 *
4192 * Initialize a receive ring and its buffers.
4193 *
4194 **********************************************************************/
4195static int
4196em_setup_receive_ring(struct rx_ring *rxr)
4197{
4198 struct adapter *adapter = rxr->adapter;
4199 struct em_rxbuffer *rxbuf;
4200 bus_dma_segment_t seg[1];
4201 int rsize, nsegs, error = 0;
4202#ifdef DEV_NETMAP
4203 struct netmap_slot *slot;
4204 struct netmap_adapter *na = netmap_getna(adapter->ifp);
4205#endif
4206
4207
4208 /* Clear the ring contents */
4209 EM_RX_LOCK(rxr);
4210 rsize = roundup2(adapter->num_rx_desc *
4211 sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4212 bzero((void *)rxr->rx_base, rsize);
4213#ifdef DEV_NETMAP
4214 slot = netmap_reset(na, NR_RX, rxr->me, 0);
4215#endif
4216
4217 /*
4218 ** Free current RX buffer structs and their mbufs
4219 */
4220 for (int i = 0; i < adapter->num_rx_desc; i++) {
4221 rxbuf = &rxr->rx_buffers[i];
4222 if (rxbuf->m_head != NULL) {
4223 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4224 BUS_DMASYNC_POSTREAD);
4225 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4226 m_freem(rxbuf->m_head);
4227 rxbuf->m_head = NULL; /* mark as freed */
4228 }
4229 }
4230
4231 /* Now replenish the mbufs */
4232 for (int j = 0; j != adapter->num_rx_desc; ++j) {
4233 rxbuf = &rxr->rx_buffers[j];
4234#ifdef DEV_NETMAP
4235 if (slot) {
4236 int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4237 uint64_t paddr;
4238 void *addr;
4239
4240 addr = PNMB(na, slot + si, &paddr);
4241 netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4242 em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4243 continue;
4244 }
4245#endif /* DEV_NETMAP */
4246 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4247 M_PKTHDR, adapter->rx_mbuf_sz);
4248 if (rxbuf->m_head == NULL) {
4249 error = ENOBUFS;
4250 goto fail;
4251 }
4252 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4253 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4254 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4255
4256 /* Get the memory mapping */
4257 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4258 rxbuf->map, rxbuf->m_head, seg,
4259 &nsegs, BUS_DMA_NOWAIT);
4260 if (error != 0) {
4261 m_freem(rxbuf->m_head);
4262 rxbuf->m_head = NULL;
4263 goto fail;
4264 }
4265 bus_dmamap_sync(rxr->rxtag,
4266 rxbuf->map, BUS_DMASYNC_PREREAD);
4267
4268 rxbuf->paddr = seg[0].ds_addr;
4269 em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4270 }
4271 rxr->next_to_check = 0;
4272 rxr->next_to_refresh = 0;
4273 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4274 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4275
4276fail:
4277 EM_RX_UNLOCK(rxr);
4278 return (error);
4279}
4280
4281/*********************************************************************
4282 *
4283 * Initialize all receive rings.
4284 *
4285 **********************************************************************/
4286static int
4287em_setup_receive_structures(struct adapter *adapter)
4288{
4289 struct rx_ring *rxr = adapter->rx_rings;
4290 int q;
4291
4292 for (q = 0; q < adapter->num_queues; q++, rxr++)
4293 if (em_setup_receive_ring(rxr))
4294 goto fail;
4295
4296 return (0);
4297fail:
4298 /*
4299 * Free RX buffers allocated so far, we will only handle
4300 * the rings that completed, the failing case will have
4301 * cleaned up for itself. 'q' failed, so its the terminus.
4302 */
4303 for (int i = 0; i < q; ++i) {
4304 rxr = &adapter->rx_rings[i];
4305 for (int n = 0; n < adapter->num_rx_desc; n++) {
4306 struct em_rxbuffer *rxbuf;
4307 rxbuf = &rxr->rx_buffers[n];
4308 if (rxbuf->m_head != NULL) {
4309 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4310 BUS_DMASYNC_POSTREAD);
4311 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4312 m_freem(rxbuf->m_head);
4313 rxbuf->m_head = NULL;
4314 }
4315 }
4316 rxr->next_to_check = 0;
4317 rxr->next_to_refresh = 0;
4318 }
4319
4320 return (ENOBUFS);
4321}
4322
4323/*********************************************************************
4324 *
4325 * Free all receive rings.
4326 *
4327 **********************************************************************/
4328static void
4329em_free_receive_structures(struct adapter *adapter)
4330{
4331 struct rx_ring *rxr = adapter->rx_rings;
4332
4333 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4334 em_free_receive_buffers(rxr);
4335 /* Free the ring memory as well */
4336 em_dma_free(adapter, &rxr->rxdma);
4337 EM_RX_LOCK_DESTROY(rxr);
4338 }
4339
4340 free(adapter->rx_rings, M_DEVBUF);
4341}
4342
4343
4344/*********************************************************************
4345 *
4346 * Free receive ring data structures
4347 *
4348 **********************************************************************/
4349static void
4350em_free_receive_buffers(struct rx_ring *rxr)
4351{
4352 struct adapter *adapter = rxr->adapter;
4353 struct em_rxbuffer *rxbuf = NULL;
4354
4355 INIT_DEBUGOUT("free_receive_buffers: begin");
4356
4357 if (rxr->rx_buffers != NULL) {
4358 for (int i = 0; i < adapter->num_rx_desc; i++) {
4359 rxbuf = &rxr->rx_buffers[i];
4360 if (rxbuf->map != NULL) {
4361 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4362 BUS_DMASYNC_POSTREAD);
4363 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4364 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4365 }
4366 if (rxbuf->m_head != NULL) {
4367 m_freem(rxbuf->m_head);
4368 rxbuf->m_head = NULL;
4369 }
4370 }
4371 free(rxr->rx_buffers, M_DEVBUF);
4372 rxr->rx_buffers = NULL;
4373 rxr->next_to_check = 0;
4374 rxr->next_to_refresh = 0;
4375 }
4376
4377 if (rxr->rxtag != NULL) {
4378 bus_dma_tag_destroy(rxr->rxtag);
4379 rxr->rxtag = NULL;
4380 }
4381
4382 return;
4383}
4384
4385
4386/*********************************************************************
4387 *
4388 * Enable receive unit.
4389 *
4390 **********************************************************************/
4391
4392static void
4393em_initialize_receive_unit(struct adapter *adapter)
4394{
4395 struct rx_ring *rxr = adapter->rx_rings;
4396 if_t ifp = adapter->ifp;
4397 struct e1000_hw *hw = &adapter->hw;
4398 u32 rctl, rxcsum, rfctl;
4399
4400 INIT_DEBUGOUT("em_initialize_receive_units: begin");
4401
4402 /*
4403 * Make sure receives are disabled while setting
4404 * up the descriptor ring
4405 */
4406 rctl = E1000_READ_REG(hw, E1000_RCTL);
4407 /* Do not disable if ever enabled on this hardware */
4408 if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4409 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4410
4411 /* Setup the Receive Control Register */
4412 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4413 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4414 E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4415 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4416
4417 /* Do not store bad packets */
4418 rctl &= ~E1000_RCTL_SBP;
4419
4420 /* Enable Long Packet receive */
4421 if (if_getmtu(ifp) > ETHERMTU)
4422 rctl |= E1000_RCTL_LPE;
4423 else
4424 rctl &= ~E1000_RCTL_LPE;
4425
4426 /* Strip the CRC */
4427 if (!em_disable_crc_stripping)
4428 rctl |= E1000_RCTL_SECRC;
4429
4430 E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4431 adapter->rx_abs_int_delay.value);
4432
4433 E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4434 adapter->rx_int_delay.value);
4435 /*
4436 * Set the interrupt throttling rate. Value is calculated
4437 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4438 */
4439 E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4440
4441 /* Use extended rx descriptor formats */
4442 rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4443 rfctl |= E1000_RFCTL_EXTEN;
4444 /*
4445 ** When using MSIX interrupts we need to throttle
4446 ** using the EITR register (82574 only)
4447 */
4448 if (hw->mac.type == e1000_82574) {
4449 for (int i = 0; i < 4; i++)
4450 E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4451 DEFAULT_ITR);
4452 /* Disable accelerated acknowledge */
4453 rfctl |= E1000_RFCTL_ACK_DIS;
4454 }
4455 E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4456
4457 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4458 if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
4459#ifdef EM_MULTIQUEUE
4460 rxcsum |= E1000_RXCSUM_TUOFL |
4461 E1000_RXCSUM_IPOFL |
4462 E1000_RXCSUM_PCSD;
4463#else
4464 rxcsum |= E1000_RXCSUM_TUOFL;
4465#endif
4466 } else
4467 rxcsum &= ~E1000_RXCSUM_TUOFL;
4468
4469 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4470
4471#ifdef EM_MULTIQUEUE
4472#define RSSKEYLEN 10
4473 if (adapter->num_queues > 1) {
4474 uint8_t rss_key[4 * RSSKEYLEN];
4475 uint32_t reta = 0;
4476 int i;
4477
4478 /*
4479 * Configure RSS key
4480 */
4481 arc4rand(rss_key, sizeof(rss_key), 0);
4482 for (i = 0; i < RSSKEYLEN; ++i) {
4483 uint32_t rssrk = 0;
4484
4485 rssrk = EM_RSSRK_VAL(rss_key, i);
4486 E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4487 }
4488
4489 /*
4490 * Configure RSS redirect table in following fashion:
4491 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4492 */
4493 for (i = 0; i < sizeof(reta); ++i) {
4494 uint32_t q;
4495
4496 q = (i % adapter->num_queues) << 7;
4497 reta |= q << (8 * i);
4498 }
4499
4500 for (i = 0; i < 32; ++i) {
4501 E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4502 }
4503
4504 E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
4505 E1000_MRQC_RSS_FIELD_IPV4_TCP |
4506 E1000_MRQC_RSS_FIELD_IPV4 |
4507 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4508 E1000_MRQC_RSS_FIELD_IPV6_EX |
4509 E1000_MRQC_RSS_FIELD_IPV6);
4510 }
4511#endif
4512 /*
4513 ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4514 ** long latencies are observed, like Lenovo X60. This
4515 ** change eliminates the problem, but since having positive
4516 ** values in RDTR is a known source of problems on other
4517 ** platforms another solution is being sought.
4518 */
4519 if (hw->mac.type == e1000_82573)
4520 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4521
4522 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4523 /* Setup the Base and Length of the Rx Descriptor Ring */
4524 u64 bus_addr = rxr->rxdma.dma_paddr;
4525 u32 rdt = adapter->num_rx_desc - 1; /* default */
4526
4527 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4528 adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4529 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4530 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4531 /* Setup the Head and Tail Descriptor Pointers */
4532 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4533#ifdef DEV_NETMAP
4534 /*
4535 * an init() while a netmap client is active must
4536 * preserve the rx buffers passed to userspace.
4537 */
4538 if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4539 struct netmap_adapter *na = netmap_getna(adapter->ifp);
4540 rdt -= nm_kr_rxspace(&na->rx_rings[i]);
4541 }
4542#endif /* DEV_NETMAP */
4543 E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4544 }
4545
4546 /*
4547 * Set PTHRESH for improved jumbo performance
4548 * According to 10.2.5.11 of Intel 82574 Datasheet,
4549 * RXDCTL(1) is written whenever RXDCTL(0) is written.
4550 * Only write to RXDCTL(1) if there is a need for different
4551 * settings.
4552 */
4553 if (((adapter->hw.mac.type == e1000_ich9lan) ||
4554 (adapter->hw.mac.type == e1000_pch2lan) ||
4555 (adapter->hw.mac.type == e1000_ich10lan)) &&
4556 (if_getmtu(ifp) > ETHERMTU)) {
4557 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4558 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4559 } else if (adapter->hw.mac.type == e1000_82574) {
4560 for (int i = 0; i < adapter->num_queues; i++) {
4561 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4562
4563 rxdctl |= 0x20; /* PTHRESH */
4564 rxdctl |= 4 << 8; /* HTHRESH */
4565 rxdctl |= 4 << 16;/* WTHRESH */
4566 rxdctl |= 1 << 24; /* Switch to granularity */
4567 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4568 }
4569 }
4570
4571 if (adapter->hw.mac.type >= e1000_pch2lan) {
4572 if (if_getmtu(ifp) > ETHERMTU)
4573 e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4574 else
4575 e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4576 }
4577
4578 /* Make sure VLAN Filters are off */
4579 rctl &= ~E1000_RCTL_VFE;
4580
4581 if (adapter->rx_mbuf_sz == MCLBYTES)
4582 rctl |= E1000_RCTL_SZ_2048;
4583 else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4584 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4585 else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4586 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4587
4588 /* ensure we clear use DTYPE of 00 here */
4589 rctl &= ~0x00000C00;
4590 /* Write out the settings */
4591 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4592
4593 return;
4594}
4595
4596
4597/*********************************************************************
4598 *
4599 * This routine executes in interrupt context. It replenishes
4600 * the mbufs in the descriptor and sends data which has been
4601 * dma'ed into host memory to upper layer.
4602 *
4603 * We loop at most count times if count is > 0, or until done if
4604 * count < 0.
4605 *
4606 * For polling we also now return the number of cleaned packets
4607 *********************************************************************/
4608static bool
4609em_rxeof(struct rx_ring *rxr, int count, int *done)
4610{
4611 struct adapter *adapter = rxr->adapter;
4612 if_t ifp = adapter->ifp;
4613 struct mbuf *mp, *sendmp;
4614 u32 status = 0;
4615 u16 len;
4616 int i, processed, rxdone = 0;
4617 bool eop;
4618 union e1000_rx_desc_extended *cur;
4619
4620 EM_RX_LOCK(rxr);
4621
4622 /* Sync the ring */
4623 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4624 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4625
4626
4627#ifdef DEV_NETMAP
4628 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4629 EM_RX_UNLOCK(rxr);
4630 return (FALSE);
4631 }
4632#endif /* DEV_NETMAP */
4633
4634 for (i = rxr->next_to_check, processed = 0; count != 0;) {
4635 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4636 break;
4637
4638 cur = &rxr->rx_base[i];
4639 status = le32toh(cur->wb.upper.status_error);
4640 mp = sendmp = NULL;
4641
4642 if ((status & E1000_RXD_STAT_DD) == 0)
4643 break;
4644
4645 len = le16toh(cur->wb.upper.length);
4646 eop = (status & E1000_RXD_STAT_EOP) != 0;
4647
4648 if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4649 (rxr->discard == TRUE)) {
4650 adapter->dropped_pkts++;
4651 ++rxr->rx_discarded;
4652 if (!eop) /* Catch subsequent segs */
4653 rxr->discard = TRUE;
4654 else
4655 rxr->discard = FALSE;
4656 em_rx_discard(rxr, i);
4657 goto next_desc;
4658 }
4659 bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4660
4661 /* Assign correct length to the current fragment */
4662 mp = rxr->rx_buffers[i].m_head;
4663 mp->m_len = len;
4664
4665 /* Trigger for refresh */
4666 rxr->rx_buffers[i].m_head = NULL;
4667
4668 /* First segment? */
4669 if (rxr->fmp == NULL) {
4670 mp->m_pkthdr.len = len;
4671 rxr->fmp = rxr->lmp = mp;
4672 } else {
4673 /* Chain mbuf's together */
4674 mp->m_flags &= ~M_PKTHDR;
4675 rxr->lmp->m_next = mp;
4676 rxr->lmp = mp;
4677 rxr->fmp->m_pkthdr.len += len;
4678 }
4679
4680 if (eop) {
4681 --count;
4682 sendmp = rxr->fmp;
4683 if_setrcvif(sendmp, ifp);
4684 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
4685 em_receive_checksum(status, sendmp);
4686#ifndef __NO_STRICT_ALIGNMENT
4687 if (adapter->hw.mac.max_frame_size >
4688 (MCLBYTES - ETHER_ALIGN) &&
4689 em_fixup_rx(rxr) != 0)
4690 goto skip;
4691#endif
4692 if (status & E1000_RXD_STAT_VP) {
4693 if_setvtag(sendmp,
4694 le16toh(cur->wb.upper.vlan));
4695 sendmp->m_flags |= M_VLANTAG;
4696 }
4697#ifndef __NO_STRICT_ALIGNMENT
4698skip:
4699#endif
4700 rxr->fmp = rxr->lmp = NULL;
4701 }
4702next_desc:
4703 /* Sync the ring */
4704 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4705 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4706
4707 /* Zero out the receive descriptors status. */
4708 cur->wb.upper.status_error &= htole32(~0xFF);
4709 ++rxdone; /* cumulative for POLL */
4710 ++processed;
4711
4712 /* Advance our pointers to the next descriptor. */
4713 if (++i == adapter->num_rx_desc)
4714 i = 0;
4715
4716 /* Send to the stack */
4717 if (sendmp != NULL) {
4718 rxr->next_to_check = i;
4719 EM_RX_UNLOCK(rxr);
4720 if_input(ifp, sendmp);
4721 EM_RX_LOCK(rxr);
4722 i = rxr->next_to_check;
4723 }
4724
4725 /* Only refresh mbufs every 8 descriptors */
4726 if (processed == 8) {
4727 em_refresh_mbufs(rxr, i);
4728 processed = 0;
4729 }
4730 }
4731
4732 /* Catch any remaining refresh work */
4733 if (e1000_rx_unrefreshed(rxr))
4734 em_refresh_mbufs(rxr, i);
4735
4736 rxr->next_to_check = i;
4737 if (done != NULL)
4738 *done = rxdone;
4739 EM_RX_UNLOCK(rxr);
4740
4741 return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4742}
4743
4744static __inline void
4745em_rx_discard(struct rx_ring *rxr, int i)
4746{
4747 struct em_rxbuffer *rbuf;
4748
4749 rbuf = &rxr->rx_buffers[i];
4750 bus_dmamap_unload(rxr->rxtag, rbuf->map);
4751
4752 /* Free any previous pieces */
4753 if (rxr->fmp != NULL) {
4754 rxr->fmp->m_flags |= M_PKTHDR;
4755 m_freem(rxr->fmp);
4756 rxr->fmp = NULL;
4757 rxr->lmp = NULL;
4758 }
4759 /*
4760 ** Free buffer and allow em_refresh_mbufs()
4761 ** to clean up and recharge buffer.
4762 */
4763 if (rbuf->m_head) {
4764 m_free(rbuf->m_head);
4765 rbuf->m_head = NULL;
4766 }
4767 return;
4768}
4769
4770#ifndef __NO_STRICT_ALIGNMENT
4771/*
4772 * When jumbo frames are enabled we should realign entire payload on
4773 * architecures with strict alignment. This is serious design mistake of 8254x
4774 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4775 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4776 * payload. On architecures without strict alignment restrictions 8254x still
4777 * performs unaligned memory access which would reduce the performance too.
4778 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4779 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4780 * existing mbuf chain.
4781 *
4782 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4783 * not used at all on architectures with strict alignment.
4784 */
4785static int
4786em_fixup_rx(struct rx_ring *rxr)
4787{
4788 struct adapter *adapter = rxr->adapter;
4789 struct mbuf *m, *n;
4790 int error;
4791
4792 error = 0;
4793 m = rxr->fmp;
4794 if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4795 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4796 m->m_data += ETHER_HDR_LEN;
4797 } else {
4798 MGETHDR(n, M_NOWAIT, MT_DATA);
4799 if (n != NULL) {
4800 bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4801 m->m_data += ETHER_HDR_LEN;
4802 m->m_len -= ETHER_HDR_LEN;
4803 n->m_len = ETHER_HDR_LEN;
4804 M_MOVE_PKTHDR(n, m);
4805 n->m_next = m;
4806 rxr->fmp = n;
4807 } else {
4808 adapter->dropped_pkts++;
4809 m_freem(rxr->fmp);
4810 rxr->fmp = NULL;
4811 error = ENOMEM;
4812 }
4813 }
4814
4815 return (error);
4816}
4817#endif
4818
4819static void
4820em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
4821{
4822 rxd->read.buffer_addr = htole64(rxbuf->paddr);
4823 /* DD bits must be cleared */
4824 rxd->wb.upper.status_error= 0;
4825}
4826
4827/*********************************************************************
4828 *
4829 * Verify that the hardware indicated that the checksum is valid.
4830 * Inform the stack about the status of checksum so that stack
4831 * doesn't spend time verifying the checksum.
4832 *
4833 *********************************************************************/
4834static void
4835em_receive_checksum(uint32_t status, struct mbuf *mp)
4836{
4837 mp->m_pkthdr.csum_flags = 0;
4838
4839 /* Ignore Checksum bit is set */
4840 if (status & E1000_RXD_STAT_IXSM)
4841 return;
4842
4843 /* If the IP checksum exists and there is no IP Checksum error */
4844 if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
4845 E1000_RXD_STAT_IPCS) {
4846 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4847 }
4848
4849 /* TCP or UDP checksum */
4850 if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
4851 E1000_RXD_STAT_TCPCS) {
4852 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4853 mp->m_pkthdr.csum_data = htons(0xffff);
4854 }
4855 if (status & E1000_RXD_STAT_UDPCS) {
4856 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4857 mp->m_pkthdr.csum_data = htons(0xffff);
4858 }
4859}
4860
4861/*
4862 * This routine is run via an vlan
4863 * config EVENT
4864 */
4865static void
4866em_register_vlan(void *arg, if_t ifp, u16 vtag)
4867{
4868 struct adapter *adapter = if_getsoftc(ifp);
4869 u32 index, bit;
4870
4871 if ((void*)adapter != arg) /* Not our event */
4872 return;
4873
4874 if ((vtag == 0) || (vtag > 4095)) /* Invalid ID */
4875 return;
4876
4877 EM_CORE_LOCK(adapter);
4878 index = (vtag >> 5) & 0x7F;
4879 bit = vtag & 0x1F;
4880 adapter->shadow_vfta[index] |= (1 << bit);
4881 ++adapter->num_vlans;
4882 /* Re-init to load the changes */
4883 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4884 em_init_locked(adapter);
4885 EM_CORE_UNLOCK(adapter);
4886}
4887
4888/*
4889 * This routine is run via an vlan
4890 * unconfig EVENT
4891 */
4892static void
4893em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
4894{
4895 struct adapter *adapter = if_getsoftc(ifp);
4896 u32 index, bit;
4897
4898 if (adapter != arg)
4899 return;
4900
4901 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
4902 return;
4903
4904 EM_CORE_LOCK(adapter);
4905 index = (vtag >> 5) & 0x7F;
4906 bit = vtag & 0x1F;
4907 adapter->shadow_vfta[index] &= ~(1 << bit);
4908 --adapter->num_vlans;
4909 /* Re-init to load the changes */
4910 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
4911 em_init_locked(adapter);
4912 EM_CORE_UNLOCK(adapter);
4913}
4914
4915static void
4916em_setup_vlan_hw_support(struct adapter *adapter)
4917{
4918 struct e1000_hw *hw = &adapter->hw;
4919 u32 reg;
4920
4921 /*
4922 ** We get here thru init_locked, meaning
4923 ** a soft reset, this has already cleared
4924 ** the VFTA and other state, so if there
4925 ** have been no vlan's registered do nothing.
4926 */
4927 if (adapter->num_vlans == 0)
4928 return;
4929
4930 /*
4931 ** A soft reset zero's out the VFTA, so
4932 ** we need to repopulate it now.
4933 */
4934 for (int i = 0; i < EM_VFTA_SIZE; i++)
4935 if (adapter->shadow_vfta[i] != 0)
4936 E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4937 i, adapter->shadow_vfta[i]);
4938
4939 reg = E1000_READ_REG(hw, E1000_CTRL);
4940 reg |= E1000_CTRL_VME;
4941 E1000_WRITE_REG(hw, E1000_CTRL, reg);
4942
4943 /* Enable the Filter Table */
4944 reg = E1000_READ_REG(hw, E1000_RCTL);
4945 reg &= ~E1000_RCTL_CFIEN;
4946 reg |= E1000_RCTL_VFE;
4947 E1000_WRITE_REG(hw, E1000_RCTL, reg);
4948}
4949
4950static void
4951em_enable_intr(struct adapter *adapter)
4952{
4953 struct e1000_hw *hw = &adapter->hw;
4954 u32 ims_mask = IMS_ENABLE_MASK;
4955
4956 if (hw->mac.type == e1000_82574) {
4957 E1000_WRITE_REG(hw, EM_EIAC, adapter->ims);
4958 ims_mask |= adapter->ims;
4959 }
4960 E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4961}
4962
4963static void
4964em_disable_intr(struct adapter *adapter)
4965{
4966 struct e1000_hw *hw = &adapter->hw;
4967
4968 if (hw->mac.type == e1000_82574)
4969 E1000_WRITE_REG(hw, EM_EIAC, 0);
4970 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4971}
4972
4973/*
4974 * Bit of a misnomer, what this really means is
4975 * to enable OS management of the system... aka
4976 * to disable special hardware management features
4977 */
4978static void
4979em_init_manageability(struct adapter *adapter)
4980{
4981 /* A shared code workaround */
4982#define E1000_82542_MANC2H E1000_MANC2H
4983 if (adapter->has_manage) {
4984 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4985 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4986
4987 /* disable hardware interception of ARP */
4988 manc &= ~(E1000_MANC_ARP_EN);
4989
4990 /* enable receiving management packets to the host */
4991 manc |= E1000_MANC_EN_MNG2HOST;
4992#define E1000_MNG2HOST_PORT_623 (1 << 5)
4993#define E1000_MNG2HOST_PORT_664 (1 << 6)
4994 manc2h |= E1000_MNG2HOST_PORT_623;
4995 manc2h |= E1000_MNG2HOST_PORT_664;
4996 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4997 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4998 }
4999}
5000
5001/*
5002 * Give control back to hardware management
5003 * controller if there is one.
5004 */
5005static void
5006em_release_manageability(struct adapter *adapter)
5007{
5008 if (adapter->has_manage) {
5009 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5010
5011 /* re-enable hardware interception of ARP */
5012 manc |= E1000_MANC_ARP_EN;
5013 manc &= ~E1000_MANC_EN_MNG2HOST;
5014
5015 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5016 }
5017}
5018
5019/*
5020 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5021 * For ASF and Pass Through versions of f/w this means
5022 * that the driver is loaded. For AMT version type f/w
5023 * this means that the network i/f is open.
5024 */
5025static void
5026em_get_hw_control(struct adapter *adapter)
5027{
5028 u32 ctrl_ext, swsm;
5029
5030 if (adapter->hw.mac.type == e1000_82573) {
5031 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5032 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5033 swsm | E1000_SWSM_DRV_LOAD);
5034 return;
5035 }
5036 /* else */
5037 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5038 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5039 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5040 return;
5041}
5042
5043/*
5044 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5045 * For ASF and Pass Through versions of f/w this means that
5046 * the driver is no longer loaded. For AMT versions of the
5047 * f/w this means that the network i/f is closed.
5048 */
5049static void
5050em_release_hw_control(struct adapter *adapter)
5051{
5052 u32 ctrl_ext, swsm;
5053
5054 if (!adapter->has_manage)
5055 return;
5056
5057 if (adapter->hw.mac.type == e1000_82573) {
5058 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5059 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5060 swsm & ~E1000_SWSM_DRV_LOAD);
5061 return;
5062 }
5063 /* else */
5064 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5065 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5066 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5067 return;
5068}
5069
5070static int
5071em_is_valid_ether_addr(u8 *addr)
5072{
5073 char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5074
5075 if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5076 return (FALSE);
5077 }
5078
5079 return (TRUE);
5080}
5081
5082/*
5083** Parse the interface capabilities with regard
5084** to both system management and wake-on-lan for
5085** later use.
5086*/
5087static void
5088em_get_wakeup(device_t dev)
5089{
5090 struct adapter *adapter = device_get_softc(dev);
5091 u16 eeprom_data = 0, device_id, apme_mask;
5092
5093 adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5094 apme_mask = EM_EEPROM_APME;
5095
5096 switch (adapter->hw.mac.type) {
5097 case e1000_82573:
5098 case e1000_82583:
5099 adapter->has_amt = TRUE;
5100 /* Falls thru */
5101 case e1000_82571:
5102 case e1000_82572:
5103 case e1000_80003es2lan:
5104 if (adapter->hw.bus.func == 1) {
5105 e1000_read_nvm(&adapter->hw,
5106 NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5107 break;
5108 } else
5109 e1000_read_nvm(&adapter->hw,
5110 NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5111 break;
5112 case e1000_ich8lan:
5113 case e1000_ich9lan:
5114 case e1000_ich10lan:
5115 case e1000_pchlan:
5116 case e1000_pch2lan:
5117 apme_mask = E1000_WUC_APME;
5118 adapter->has_amt = TRUE;
5119 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5120 break;
5121 default:
5122 e1000_read_nvm(&adapter->hw,
5123 NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5124 break;
5125 }
5126 if (eeprom_data & apme_mask)
5127 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5128 /*
5129 * We have the eeprom settings, now apply the special cases
5130 * where the eeprom may be wrong or the board won't support
5131 * wake on lan on a particular port
5132 */
5133 device_id = pci_get_device(dev);
5134 switch (device_id) {
5135 case E1000_DEV_ID_82571EB_FIBER:
5136 /* Wake events only supported on port A for dual fiber
5137 * regardless of eeprom setting */
5138 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5139 E1000_STATUS_FUNC_1)
5140 adapter->wol = 0;
5141 break;
5142 case E1000_DEV_ID_82571EB_QUAD_COPPER:
5143 case E1000_DEV_ID_82571EB_QUAD_FIBER:
5144 case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5145 /* if quad port adapter, disable WoL on all but port A */
5146 if (global_quad_port_a != 0)
5147 adapter->wol = 0;
5148 /* Reset for multiple quad port adapters */
5149 if (++global_quad_port_a == 4)
5150 global_quad_port_a = 0;
5151 break;
5152 }
5153 return;
5154}
5155
5156
5157/*
5158 * Enable PCI Wake On Lan capability
5159 */
5160static void
5161em_enable_wakeup(device_t dev)
5162{
5163 struct adapter *adapter = device_get_softc(dev);
5164 if_t ifp = adapter->ifp;
5165 u32 pmc, ctrl, ctrl_ext, rctl;
5166 u16 status;
5167
5168 if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
5169 return;
5170
5171 /* Advertise the wakeup capability */
5172 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5173 ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5174 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5175 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5176
5177 if ((adapter->hw.mac.type == e1000_ich8lan) ||
5178 (adapter->hw.mac.type == e1000_pchlan) ||
5179 (adapter->hw.mac.type == e1000_ich9lan) ||
5180 (adapter->hw.mac.type == e1000_ich10lan))
5181 e1000_suspend_workarounds_ich8lan(&adapter->hw);
5182
5183 /* Keep the laser running on Fiber adapters */
5184 if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5185 adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5186 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5187 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5188 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5189 }
5190
5191 /*
5192 ** Determine type of Wakeup: note that wol
5193 ** is set with all bits on by default.
5194 */
5195 if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
5196 adapter->wol &= ~E1000_WUFC_MAG;
5197
5198 if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
5199 adapter->wol &= ~E1000_WUFC_MC;
5200 else {
5201 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5202 rctl |= E1000_RCTL_MPE;
5203 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5204 }
5205
5206 if ((adapter->hw.mac.type == e1000_pchlan) ||
5207 (adapter->hw.mac.type == e1000_pch2lan)) {
5208 if (em_enable_phy_wakeup(adapter))
5209 return;
5210 } else {
5211 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5212 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5213 }
5214
5215 if (adapter->hw.phy.type == e1000_phy_igp_3)
5216 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5217
5218 /* Request PME */
5219 status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5220 status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5221 if (if_getcapenable(ifp) & IFCAP_WOL)
5222 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5223 pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5224
5225 return;
5226}
5227
5228/*
5229** WOL in the newer chipset interfaces (pchlan)
5230** require thing to be copied into the phy
5231*/
5232static int
5233em_enable_phy_wakeup(struct adapter *adapter)
5234{
5235 struct e1000_hw *hw = &adapter->hw;
5236 u32 mreg, ret = 0;
5237 u16 preg;
5238
5239 /* copy MAC RARs to PHY RARs */
5240 e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5241
5242 /* copy MAC MTA to PHY MTA */
5243 for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5244 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5245 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5246 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5247 (u16)((mreg >> 16) & 0xFFFF));
5248 }
5249
5250 /* configure PHY Rx Control register */
5251 e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5252 mreg = E1000_READ_REG(hw, E1000_RCTL);
5253 if (mreg & E1000_RCTL_UPE)
5254 preg |= BM_RCTL_UPE;
5255 if (mreg & E1000_RCTL_MPE)
5256 preg |= BM_RCTL_MPE;
5257 preg &= ~(BM_RCTL_MO_MASK);
5258 if (mreg & E1000_RCTL_MO_3)
5259 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5260 << BM_RCTL_MO_SHIFT);
5261 if (mreg & E1000_RCTL_BAM)
5262 preg |= BM_RCTL_BAM;
5263 if (mreg & E1000_RCTL_PMCF)
5264 preg |= BM_RCTL_PMCF;
5265 mreg = E1000_READ_REG(hw, E1000_CTRL);
5266 if (mreg & E1000_CTRL_RFCE)
5267 preg |= BM_RCTL_RFCE;
5268 e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5269
5270 /* enable PHY wakeup in MAC register */
5271 E1000_WRITE_REG(hw, E1000_WUC,
5272 E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5273 E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5274
5275 /* configure and enable PHY wakeup in PHY registers */
5276 e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5277 e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5278
5279 /* activate PHY wakeup */
5280 ret = hw->phy.ops.acquire(hw);
5281 if (ret) {
5282 printf("Could not acquire PHY\n");
5283 return ret;
5284 }
5285 e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5286 (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5287 ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5288 if (ret) {
5289 printf("Could not read PHY page 769\n");
5290 goto out;
5291 }
5292 preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5293 ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5294 if (ret)
5295 printf("Could not set PHY Host Wakeup bit\n");
5296out:
5297 hw->phy.ops.release(hw);
5298
5299 return ret;
5300}
5301
5302static void
5303em_led_func(void *arg, int onoff)
5304{
5305 struct adapter *adapter = arg;
5306
5307 EM_CORE_LOCK(adapter);
5308 if (onoff) {
5309 e1000_setup_led(&adapter->hw);
5310 e1000_led_on(&adapter->hw);
5311 } else {
5312 e1000_led_off(&adapter->hw);
5313 e1000_cleanup_led(&adapter->hw);
5314 }
5315 EM_CORE_UNLOCK(adapter);
5316}
5317
5318/*
5319** Disable the L0S and L1 LINK states
5320*/
5321static void
5322em_disable_aspm(struct adapter *adapter)
5323{
5324 int base, reg;
5325 u16 link_cap,link_ctrl;
5326 device_t dev = adapter->dev;
5327
5328 switch (adapter->hw.mac.type) {
5329 case e1000_82573:
5330 case e1000_82574:
5331 case e1000_82583:
5332 break;
5333 default:
5334 return;
5335 }
5336 if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5337 return;
5338 reg = base + PCIER_LINK_CAP;
5339 link_cap = pci_read_config(dev, reg, 2);
5340 if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5341 return;
5342 reg = base + PCIER_LINK_CTL;
5343 link_ctrl = pci_read_config(dev, reg, 2);
5344 link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5345 pci_write_config(dev, reg, link_ctrl, 2);
5346 return;
5347}
5348
5349/**********************************************************************
5350 *
5351 * Update the board statistics counters.
5352 *
5353 **********************************************************************/
5354static void
5355em_update_stats_counters(struct adapter *adapter)
5356{
5357
5358 if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5359 (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5360 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5361 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5362 }
5363 adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5364 adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5365 adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5366 adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5367
5368 adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5369 adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5370 adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5371 adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5372 adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5373 adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5374 adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5375 adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5376 adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5377 adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5378 adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5379 adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5380 adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5381 adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5382 adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5383 adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5384 adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5385 adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5386 adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5387 adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5388
5389 /* For the 64-bit byte counters the low dword must be read first. */
5390 /* Both registers clear on the read of the high dword */
5391
5392 adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5393 ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5394 adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5395 ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5396
5397 adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5398 adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5399 adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5400 adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5401 adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5402
5403 adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5404 adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5405
5406 adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5407 adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5408 adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5409 adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5410 adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5411 adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5412 adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5413 adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5414 adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5415 adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5416
5417 /* Interrupt Counts */
5418
5419 adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5420 adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5421 adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5422 adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5423 adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5424 adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5425 adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5426 adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5427 adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5428
5429 if (adapter->hw.mac.type >= e1000_82543) {
5430 adapter->stats.algnerrc +=
5431 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5432 adapter->stats.rxerrc +=
5433 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5434 adapter->stats.tncrs +=
5435 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5436 adapter->stats.cexterr +=
5437 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5438 adapter->stats.tsctc +=
5439 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5440 adapter->stats.tsctfc +=
5441 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5442 }
5443}
5444
5445static uint64_t
5446em_get_counter(if_t ifp, ift_counter cnt)
5447{
5448 struct adapter *adapter;
5449
5450 adapter = if_getsoftc(ifp);
5451
5452 switch (cnt) {
5453 case IFCOUNTER_COLLISIONS:
5454 return (adapter->stats.colc);
5455 case IFCOUNTER_IERRORS:
5456 return (adapter->dropped_pkts + adapter->stats.rxerrc +
5457 adapter->stats.crcerrs + adapter->stats.algnerrc +
5458 adapter->stats.ruc + adapter->stats.roc +
5459 adapter->stats.mpc + adapter->stats.cexterr);
5460 case IFCOUNTER_OERRORS:
5461 return (adapter->stats.ecol + adapter->stats.latecol +
5462 adapter->watchdog_events);
5463 default:
5464 return (if_get_counter_default(ifp, cnt));
5465 }
5466}
5467
5468/* Export a single 32-bit register via a read-only sysctl. */
5469static int
5470em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5471{
5472 struct adapter *adapter;
5473 u_int val;
5474
5475 adapter = oidp->oid_arg1;
5476 val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5477 return (sysctl_handle_int(oidp, &val, 0, req));
5478}
5479
5480/*
5481 * Add sysctl variables, one per statistic, to the system.
5482 */
5483static void
5484em_add_hw_stats(struct adapter *adapter)
5485{
5486 device_t dev = adapter->dev;
5487
5488 struct tx_ring *txr = adapter->tx_rings;
5489 struct rx_ring *rxr = adapter->rx_rings;
5490
5491 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5492 struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5493 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5494 struct e1000_hw_stats *stats = &adapter->stats;
5495
5496 struct sysctl_oid *stat_node, *queue_node, *int_node;
5497 struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5498
5499#define QUEUE_NAME_LEN 32
5500 char namebuf[QUEUE_NAME_LEN];
5501
5502 /* Driver Statistics */
5503 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5504 CTLFLAG_RD, &adapter->dropped_pkts,
5505 "Driver dropped packets");
5506 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5507 CTLFLAG_RD, &adapter->link_irq,
5508 "Link MSIX IRQ Handled");
5509 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5510 CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5511 "Defragmenting mbuf chain failed");
5512 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5513 CTLFLAG_RD, &adapter->no_tx_dma_setup,
5514 "Driver tx dma failure in xmit");
5515 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5516 CTLFLAG_RD, &adapter->rx_overruns,
5517 "RX overruns");
5518 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5519 CTLFLAG_RD, &adapter->watchdog_events,
5520 "Watchdog timeouts");
5521
5522 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5523 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5524 em_sysctl_reg_handler, "IU",
5525 "Device Control Register");
5526 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5527 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5528 em_sysctl_reg_handler, "IU",
5529 "Receiver Control Register");
5530 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5531 CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5532 "Flow Control High Watermark");
5533 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5534 CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5535 "Flow Control Low Watermark");
5536
5537 for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5538 snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5539 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5540 CTLFLAG_RD, NULL, "TX Queue Name");
5541 queue_list = SYSCTL_CHILDREN(queue_node);
5542
5543 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5544 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5545 E1000_TDH(txr->me),
5546 em_sysctl_reg_handler, "IU",
5547 "Transmit Descriptor Head");
5548 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5549 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5550 E1000_TDT(txr->me),
5551 em_sysctl_reg_handler, "IU",
5552 "Transmit Descriptor Tail");
5553 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5554 CTLFLAG_RD, &txr->tx_irq,
5555 "Queue MSI-X Transmit Interrupts");
5556 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5557 CTLFLAG_RD, &txr->no_desc_avail,
5558 "Queue No Descriptor Available");
5559
5560 snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5561 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5562 CTLFLAG_RD, NULL, "RX Queue Name");
5563 queue_list = SYSCTL_CHILDREN(queue_node);
5564
5565 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5566 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5567 E1000_RDH(rxr->me),
5568 em_sysctl_reg_handler, "IU",
5569 "Receive Descriptor Head");
5570 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5571 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5572 E1000_RDT(rxr->me),
5573 em_sysctl_reg_handler, "IU",
5574 "Receive Descriptor Tail");
5575 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5576 CTLFLAG_RD, &rxr->rx_irq,
5577 "Queue MSI-X Receive Interrupts");
5578 }
5579
5580 /* MAC stats get their own sub node */
5581
5582 stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5583 CTLFLAG_RD, NULL, "Statistics");
5584 stat_list = SYSCTL_CHILDREN(stat_node);
5585
5586 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5587 CTLFLAG_RD, &stats->ecol,
5588 "Excessive collisions");
5589 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5590 CTLFLAG_RD, &stats->scc,
5591 "Single collisions");
5592 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5593 CTLFLAG_RD, &stats->mcc,
5594 "Multiple collisions");
5595 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5596 CTLFLAG_RD, &stats->latecol,
5597 "Late collisions");
5598 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5599 CTLFLAG_RD, &stats->colc,
5600 "Collision Count");
5601 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5602 CTLFLAG_RD, &adapter->stats.symerrs,
5603 "Symbol Errors");
5604 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5605 CTLFLAG_RD, &adapter->stats.sec,
5606 "Sequence Errors");
5607 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5608 CTLFLAG_RD, &adapter->stats.dc,
5609 "Defer Count");
5610 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5611 CTLFLAG_RD, &adapter->stats.mpc,
5612 "Missed Packets");
5613 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5614 CTLFLAG_RD, &adapter->stats.rnbc,
5615 "Receive No Buffers");
5616 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5617 CTLFLAG_RD, &adapter->stats.ruc,
5618 "Receive Undersize");
5619 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5620 CTLFLAG_RD, &adapter->stats.rfc,
5621 "Fragmented Packets Received ");
5622 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5623 CTLFLAG_RD, &adapter->stats.roc,
5624 "Oversized Packets Received");
5625 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5626 CTLFLAG_RD, &adapter->stats.rjc,
5627 "Recevied Jabber");
5628 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5629 CTLFLAG_RD, &adapter->stats.rxerrc,
5630 "Receive Errors");
5631 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5632 CTLFLAG_RD, &adapter->stats.crcerrs,
5633 "CRC errors");
5634 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5635 CTLFLAG_RD, &adapter->stats.algnerrc,
5636 "Alignment Errors");
5637 /* On 82575 these are collision counts */
5638 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5639 CTLFLAG_RD, &adapter->stats.cexterr,
5640 "Collision/Carrier extension errors");
5641 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5642 CTLFLAG_RD, &adapter->stats.xonrxc,
5643 "XON Received");
5644 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5645 CTLFLAG_RD, &adapter->stats.xontxc,
5646 "XON Transmitted");
5647 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5648 CTLFLAG_RD, &adapter->stats.xoffrxc,
5649 "XOFF Received");
5650 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5651 CTLFLAG_RD, &adapter->stats.xofftxc,
5652 "XOFF Transmitted");
5653
5654 /* Packet Reception Stats */
5655 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5656 CTLFLAG_RD, &adapter->stats.tpr,
5657 "Total Packets Received ");
5658 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5659 CTLFLAG_RD, &adapter->stats.gprc,
5660 "Good Packets Received");
5661 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5662 CTLFLAG_RD, &adapter->stats.bprc,
5663 "Broadcast Packets Received");
5664 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5665 CTLFLAG_RD, &adapter->stats.mprc,
5666 "Multicast Packets Received");
5667 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5668 CTLFLAG_RD, &adapter->stats.prc64,
5669 "64 byte frames received ");
5670 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5671 CTLFLAG_RD, &adapter->stats.prc127,
5672 "65-127 byte frames received");
5673 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5674 CTLFLAG_RD, &adapter->stats.prc255,
5675 "128-255 byte frames received");
5676 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5677 CTLFLAG_RD, &adapter->stats.prc511,
5678 "256-511 byte frames received");
5679 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5680 CTLFLAG_RD, &adapter->stats.prc1023,
5681 "512-1023 byte frames received");
5682 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5683 CTLFLAG_RD, &adapter->stats.prc1522,
5684 "1023-1522 byte frames received");
5685 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5686 CTLFLAG_RD, &adapter->stats.gorc,
5687 "Good Octets Received");
5688
5689 /* Packet Transmission Stats */
5690 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5691 CTLFLAG_RD, &adapter->stats.gotc,
5692 "Good Octets Transmitted");
5693 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5694 CTLFLAG_RD, &adapter->stats.tpt,
5695 "Total Packets Transmitted");
5696 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5697 CTLFLAG_RD, &adapter->stats.gptc,
5698 "Good Packets Transmitted");
5699 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5700 CTLFLAG_RD, &adapter->stats.bptc,
5701 "Broadcast Packets Transmitted");
5702 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5703 CTLFLAG_RD, &adapter->stats.mptc,
5704 "Multicast Packets Transmitted");
5705 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5706 CTLFLAG_RD, &adapter->stats.ptc64,
5707 "64 byte frames transmitted ");
5708 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5709 CTLFLAG_RD, &adapter->stats.ptc127,
5710 "65-127 byte frames transmitted");
5711 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5712 CTLFLAG_RD, &adapter->stats.ptc255,
5713 "128-255 byte frames transmitted");
5714 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5715 CTLFLAG_RD, &adapter->stats.ptc511,
5716 "256-511 byte frames transmitted");
5717 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5718 CTLFLAG_RD, &adapter->stats.ptc1023,
5719 "512-1023 byte frames transmitted");
5720 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5721 CTLFLAG_RD, &adapter->stats.ptc1522,
5722 "1024-1522 byte frames transmitted");
5723 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5724 CTLFLAG_RD, &adapter->stats.tsctc,
5725 "TSO Contexts Transmitted");
5726 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5727 CTLFLAG_RD, &adapter->stats.tsctfc,
5728 "TSO Contexts Failed");
5729
5730
5731 /* Interrupt Stats */
5732
5733 int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5734 CTLFLAG_RD, NULL, "Interrupt Statistics");
5735 int_list = SYSCTL_CHILDREN(int_node);
5736
5737 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5738 CTLFLAG_RD, &adapter->stats.iac,
5739 "Interrupt Assertion Count");
5740
5741 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5742 CTLFLAG_RD, &adapter->stats.icrxptc,
5743 "Interrupt Cause Rx Pkt Timer Expire Count");
5744
5745 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5746 CTLFLAG_RD, &adapter->stats.icrxatc,
5747 "Interrupt Cause Rx Abs Timer Expire Count");
5748
5749 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5750 CTLFLAG_RD, &adapter->stats.ictxptc,
5751 "Interrupt Cause Tx Pkt Timer Expire Count");
5752
5753 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5754 CTLFLAG_RD, &adapter->stats.ictxatc,
5755 "Interrupt Cause Tx Abs Timer Expire Count");
5756
5757 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5758 CTLFLAG_RD, &adapter->stats.ictxqec,
5759 "Interrupt Cause Tx Queue Empty Count");
5760
5761 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5762 CTLFLAG_RD, &adapter->stats.ictxqmtc,
5763 "Interrupt Cause Tx Queue Min Thresh Count");
5764
5765 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5766 CTLFLAG_RD, &adapter->stats.icrxdmtc,
5767 "Interrupt Cause Rx Desc Min Thresh Count");
5768
5769 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5770 CTLFLAG_RD, &adapter->stats.icrxoc,
5771 "Interrupt Cause Receiver Overrun Count");
5772}
5773
5774/**********************************************************************
5775 *
5776 * This routine provides a way to dump out the adapter eeprom,
5777 * often a useful debug/service tool. This only dumps the first
5778 * 32 words, stuff that matters is in that extent.
5779 *
5780 **********************************************************************/
5781static int
5782em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5783{
5784 struct adapter *adapter = (struct adapter *)arg1;
5785 int error;
5786 int result;
5787
5788 result = -1;
5789 error = sysctl_handle_int(oidp, &result, 0, req);
5790
5791 if (error || !req->newptr)
5792 return (error);
5793
5794 /*
5795 * This value will cause a hex dump of the
5796 * first 32 16-bit words of the EEPROM to
5797 * the screen.
5798 */
5799 if (result == 1)
5800 em_print_nvm_info(adapter);
5801
5802 return (error);
5803}
5804
5805static void
5806em_print_nvm_info(struct adapter *adapter)
5807{
5808 u16 eeprom_data;
5809 int i, j, row = 0;
5810
5811 /* Its a bit crude, but it gets the job done */
5812 printf("\nInterface EEPROM Dump:\n");
5813 printf("Offset\n0x0000 ");
5814 for (i = 0, j = 0; i < 32; i++, j++) {
5815 if (j == 8) { /* Make the offset block */
5816 j = 0; ++row;
5817 printf("\n0x00%x0 ",row);
5818 }
5819 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5820 printf("%04x ", eeprom_data);
5821 }
5822 printf("\n");
5823}
5824
5825static int
5826em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5827{
5828 struct em_int_delay_info *info;
5829 struct adapter *adapter;
5830 u32 regval;
5831 int error, usecs, ticks;
5832
5833 info = (struct em_int_delay_info *)arg1;
5834 usecs = info->value;
5835 error = sysctl_handle_int(oidp, &usecs, 0, req);
5836 if (error != 0 || req->newptr == NULL)
5837 return (error);
5838 if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5839 return (EINVAL);
5840 info->value = usecs;
5841 ticks = EM_USECS_TO_TICKS(usecs);
5842 if (info->offset == E1000_ITR) /* units are 256ns here */
5843 ticks *= 4;
5844
5845 adapter = info->adapter;
5846
5847 EM_CORE_LOCK(adapter);
5848 regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5849 regval = (regval & ~0xffff) | (ticks & 0xffff);
5850 /* Handle a few special cases. */
5851 switch (info->offset) {
5852 case E1000_RDTR:
5853 break;
5854 case E1000_TIDV:
5855 if (ticks == 0) {
5856 adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5857 /* Don't write 0 into the TIDV register. */
5858 regval++;
5859 } else
5860 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5861 break;
5862 }
5863 E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5864 EM_CORE_UNLOCK(adapter);
5865 return (0);
5866}
5867
5868static void
5869em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5870 const char *description, struct em_int_delay_info *info,
5871 int offset, int value)
5872{
5873 info->adapter = adapter;
5874 info->offset = offset;
5875 info->value = value;
5876 SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5877 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5878 OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5879 info, 0, em_sysctl_int_delay, "I", description);
5880}
5881
5882static void
5883em_set_sysctl_value(struct adapter *adapter, const char *name,
5884 const char *description, int *limit, int value)
5885{
5886 *limit = value;
5887 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5888 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5889 OID_AUTO, name, CTLFLAG_RW, limit, value, description);
5890}
5891
5892
5893/*
5894** Set flow control using sysctl:
5895** Flow control values:
5896** 0 - off
5897** 1 - rx pause
5898** 2 - tx pause
5899** 3 - full
5900*/
5901static int
5902em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5903{
5904 int error;
5905 static int input = 3; /* default is full */
5906 struct adapter *adapter = (struct adapter *) arg1;
5907
5908 error = sysctl_handle_int(oidp, &input, 0, req);
5909
5910 if ((error) || (req->newptr == NULL))
5911 return (error);
5912
5913 if (input == adapter->fc) /* no change? */
5914 return (error);
5915
5916 switch (input) {
5917 case e1000_fc_rx_pause:
5918 case e1000_fc_tx_pause:
5919 case e1000_fc_full:
5920 case e1000_fc_none:
5921 adapter->hw.fc.requested_mode = input;
5922 adapter->fc = input;
5923 break;
5924 default:
5925 /* Do nothing */
5926 return (error);
5927 }
5928
5929 adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5930 e1000_force_mac_fc(&adapter->hw);
5931 return (error);
5932}
5933
5934/*
5935** Manage Energy Efficient Ethernet:
5936** Control values:
5937** 0/1 - enabled/disabled
5938*/
5939static int
5940em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5941{
5942 struct adapter *adapter = (struct adapter *) arg1;
5943 int error, value;
5944
5945 value = adapter->hw.dev_spec.ich8lan.eee_disable;
5946 error = sysctl_handle_int(oidp, &value, 0, req);
5947 if (error || req->newptr == NULL)
5948 return (error);
5949 EM_CORE_LOCK(adapter);
5950 adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5951 em_init_locked(adapter);
5952 EM_CORE_UNLOCK(adapter);
5953 return (0);
5954}
5955
5956static int
5957em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5958{
5959 struct adapter *adapter;
5960 int error;
5961 int result;
5962
5963 result = -1;
5964 error = sysctl_handle_int(oidp, &result, 0, req);
5965
5966 if (error || !req->newptr)
5967 return (error);
5968
5969 if (result == 1) {
5970 adapter = (struct adapter *)arg1;
5971 em_print_debug_info(adapter);
5972 }
5973
5974 return (error);
5975}
5976
5977/*
5978** This routine is meant to be fluid, add whatever is
5979** needed for debugging a problem. -jfv
5980*/
5981static void
5982em_print_debug_info(struct adapter *adapter)
5983{
5984 device_t dev = adapter->dev;
5985 struct tx_ring *txr = adapter->tx_rings;
5986 struct rx_ring *rxr = adapter->rx_rings;
5987
5988 if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
5989 printf("Interface is RUNNING ");
5990 else
5991 printf("Interface is NOT RUNNING\n");
5992
5993 if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
5994 printf("and INACTIVE\n");
5995 else
5996 printf("and ACTIVE\n");
5997
5998 for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5999 device_printf(dev, "TX Queue %d ------\n", i);
6000 device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6001 E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6002 E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6003 device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6004 device_printf(dev, "TX descriptors avail = %d\n",
6005 txr->tx_avail);
6006 device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6007 txr->no_desc_avail);
6008 device_printf(dev, "RX Queue %d ------\n", i);
6009 device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6010 E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6011 E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6012 device_printf(dev, "RX discarded packets = %ld\n",
6013 rxr->rx_discarded);
6014 device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6015 device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6016 }
6017}
6018
6019#ifdef EM_MULTIQUEUE
6020/*
6021 * 82574 only:
6022 * Write a new value to the EEPROM increasing the number of MSIX
6023 * vectors from 3 to 5, for proper multiqueue support.
6024 */
6025static void
6026em_enable_vectors_82574(struct adapter *adapter)
6027{
6028 struct e1000_hw *hw = &adapter->hw;
6029 device_t dev = adapter->dev;
6030 u16 edata;
6031
6032 e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6033 printf("Current cap: %#06x\n", edata);
6034 if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6035 device_printf(dev, "Writing to eeprom: increasing "
6036 "reported MSIX vectors from 3 to 5...\n");
6037 edata &= ~(EM_NVM_MSIX_N_MASK);
6038 edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6039 e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6040 e1000_update_nvm_checksum(hw);
6041 device_printf(dev, "Writing to eeprom: done\n");
6042 }
6043}
6044#endif
6045
6046#ifdef DDB
6047DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6048{
6049 devclass_t dc;
6050 int max_em;
6051
6052 dc = devclass_find("em");
6053 max_em = devclass_get_maxunit(dc);
6054
6055 for (int index = 0; index < (max_em - 1); index++) {
6056 device_t dev;
6057 dev = devclass_get_device(dc, index);
6058 if (device_get_driver(dev) == &em_driver) {
6059 struct adapter *adapter = device_get_softc(dev);
6060 EM_CORE_LOCK(adapter);
6061 em_init_locked(adapter);
6062 EM_CORE_UNLOCK(adapter);
6063 }
6064 }
6065}
6066DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6067{
6068 devclass_t dc;
6069 int max_em;
6070
6071 dc = devclass_find("em");
6072 max_em = devclass_get_maxunit(dc);
6073
6074 for (int index = 0; index < (max_em - 1); index++) {
6075 device_t dev;
6076 dev = devclass_get_device(dc, index);
6077 if (device_get_driver(dev) == &em_driver)
6078 em_print_debug_info(device_get_softc(dev));
6079 }
6080
6081}
6082#endif